From 8d3d303c7942ced6a987a52db8977d768dc3605f Mon Sep 17 00:00:00 2001 From: Hamsalekha S Date: Fri, 13 Mar 2015 21:24:58 +0530 Subject: [PATCH] Initial version Change-Id: I7efe9a589cd24edf86e8d086b40c27cbbf8b4017 --- Android.mk | 8 + common/arm/ih264_arm_memory_barrier.s | 77 + common/arm/ih264_deblk_chroma_a9.s | 1337 ++ common/arm/ih264_deblk_luma_a9.s | 1092 ++ common/arm/ih264_default_weighted_pred_a9q.s | 359 + common/arm/ih264_ihadamard_scaling_a9.s | 250 + common/arm/ih264_inter_pred_chroma_a9q.s | 254 + .../ih264_inter_pred_filters_luma_horz_a9q.s | 245 + .../ih264_inter_pred_filters_luma_vert_a9q.s | 301 + .../arm/ih264_inter_pred_luma_bilinear_a9q.s | 398 + common/arm/ih264_inter_pred_luma_copy_a9q.s | 253 + ..._inter_pred_luma_horz_hpel_vert_hpel_a9q.s | 441 + ..._inter_pred_luma_horz_hpel_vert_qpel_a9q.s | 1044 ++ .../arm/ih264_inter_pred_luma_horz_qpel_a9q.s | 266 + ..._inter_pred_luma_horz_qpel_vert_hpel_a9q.s | 505 + ..._inter_pred_luma_horz_qpel_vert_qpel_a9q.s | 355 + .../arm/ih264_inter_pred_luma_vert_qpel_a9q.s | 330 + common/arm/ih264_intra_pred_chroma_a9q.s | 551 + common/arm/ih264_intra_pred_luma_16x16_a9q.s | 520 + common/arm/ih264_intra_pred_luma_4x4_a9q.s | 842 ++ common/arm/ih264_intra_pred_luma_8x8_a9q.s | 1037 ++ common/arm/ih264_iquant_itrans_recon_a9.s | 871 ++ common/arm/ih264_iquant_itrans_recon_dc_a9.s | 399 + common/arm/ih264_itrans_recon_a9.s | 216 + common/arm/ih264_mem_fns_neon.s | 268 + common/arm/ih264_padding_neon.s | 646 + common/arm/ih264_platform_macros.h | 152 + common/arm/ih264_resi_trans_a9.s | 604 + common/arm/ih264_resi_trans_quant_a9.s | 694 + common/arm/ih264_weighted_bi_pred_a9q.s | 642 + common/arm/ih264_weighted_pred_a9q.s | 479 + common/armv8/ih264_deblk_chroma_av8.s | 585 + common/armv8/ih264_deblk_luma_av8.s | 1084 ++ .../armv8/ih264_default_weighted_pred_av8.s | 353 + common/armv8/ih264_ihadamard_scaling_av8.s | 250 + common/armv8/ih264_inter_pred_chroma_av8.s | 392 + .../ih264_inter_pred_filters_luma_horz_av8.s | 530 + .../ih264_inter_pred_filters_luma_vert_av8.s | 452 + common/armv8/ih264_inter_pred_luma_copy_av8.s | 267 + ..._inter_pred_luma_horz_hpel_vert_hpel_av8.s | 820 ++ ..._inter_pred_luma_horz_hpel_vert_qpel_av8.s | 1120 ++ .../ih264_inter_pred_luma_horz_qpel_av8.s | 597 + ..._inter_pred_luma_horz_qpel_vert_hpel_av8.s | 910 ++ ..._inter_pred_luma_horz_qpel_vert_qpel_av8.s | 958 ++ .../ih264_inter_pred_luma_vert_qpel_av8.s | 511 + common/armv8/ih264_intra_pred_chroma_av8.s | 574 + .../armv8/ih264_intra_pred_luma_16x16_av8.s | 606 + common/armv8/ih264_intra_pred_luma_4x4_av8.s | 876 ++ common/armv8/ih264_intra_pred_luma_8x8_av8.s | 1084 ++ common/armv8/ih264_iquant_itrans_recon_av8.s | 778 ++ .../armv8/ih264_iquant_itrans_recon_dc_av8.s | 397 + common/armv8/ih264_mem_fns_neon_av8.s | 274 + common/armv8/ih264_neon_macros.s | 41 + common/armv8/ih264_padding_neon_av8.s | 784 ++ common/armv8/ih264_platform_macros.h | 152 + common/armv8/ih264_resi_trans_quant_av8.s | 731 ++ common/armv8/ih264_weighted_bi_pred_av8.s | 574 + common/armv8/ih264_weighted_pred_av8.s | 471 + common/ih264_buf_mgr.c | 696 + common/ih264_buf_mgr.h | 122 + common/ih264_cabac_tables.c | 10869 ++++++++++++++++ common/ih264_cabac_tables.h | 101 + common/ih264_cavlc_tables.c | 282 + common/ih264_cavlc_tables.h | 133 + common/ih264_chroma_intra_pred_filters.c | 478 + common/ih264_common_tables.c | 725 ++ common/ih264_common_tables.h | 136 + common/ih264_deblk_edge_filters.c | 2087 +++ common/ih264_deblk_edge_filters.h | 195 + common/ih264_deblk_tables.c | 119 + common/ih264_deblk_tables.h | 73 + common/ih264_debug.h | 61 + common/ih264_defs.h | 690 + common/ih264_disp_mgr.c | 186 + common/ih264_disp_mgr.h | 70 + common/ih264_dpb_mgr.c | 1176 ++ common/ih264_dpb_mgr.h | 186 + common/ih264_error.h | 68 + common/ih264_ihadamard_scaling.c | 216 + common/ih264_inter_pred_filters.c | 1042 ++ common/ih264_inter_pred_filters.h | 241 + common/ih264_intra_pred_filters.h | 331 + common/ih264_iquant_itrans_recon.c | 873 ++ common/ih264_itrans_recon.h | 71 + common/ih264_list.c | 574 + common/ih264_list.h | 93 + common/ih264_luma_intra_pred_filters.c | 1933 +++ common/ih264_macros.h | 110 + common/ih264_mem_fns.c | 176 + common/ih264_mem_fns.h | 126 + common/ih264_padding.c | 331 + common/ih264_padding.h | 74 + common/ih264_resi_trans.h | 70 + common/ih264_resi_trans_quant.c | 814 ++ common/ih264_size_defs.h | 85 + common/ih264_structs.h | 1722 +++ common/ih264_trans_data.c | 312 + common/ih264_trans_data.h | 125 + common/ih264_trans_macros.h | 124 + common/ih264_trans_quant_itrans_iquant.h | 232 + common/ih264_typedefs.h | 64 + common/ih264_weighted_pred.c | 495 + common/ih264_weighted_pred.h | 164 + common/ithread.c | 604 + common/ithread.h | 104 + common/mips/ih264_platform_macros.h | 102 + .../ih264_chroma_intra_pred_filters_ssse3.c | 433 + common/x86/ih264_deblk_chroma_ssse3.c | 1087 ++ common/x86/ih264_deblk_luma_ssse3.c | 2012 +++ common/x86/ih264_ihadamard_scaling_sse42.c | 238 + common/x86/ih264_ihadamard_scaling_ssse3.c | 200 + common/x86/ih264_inter_pred_filters_ssse3.c | 4375 +++++++ .../x86/ih264_iquant_itrans_recon_dc_ssse3.c | 437 + common/x86/ih264_iquant_itrans_recon_sse42.c | 554 + common/x86/ih264_iquant_itrans_recon_ssse3.c | 1035 ++ .../x86/ih264_luma_intra_pred_filters_ssse3.c | 2282 ++++ common/x86/ih264_mem_fns_ssse3.c | 169 + common/x86/ih264_padding_ssse3.c | 335 + common/x86/ih264_platform_macros.h | 114 + common/x86/ih264_resi_trans_quant_sse42.c | 984 ++ common/x86/ih264_weighted_pred_sse42.c | 1349 ++ decoder.arm.mk | 44 + decoder.arm64.mk | 46 + decoder.mips.mk | 6 + decoder.mips64.mk | 6 + decoder.mk | 76 + decoder.x86.mk | 26 + decoder.x86_64.mk | 30 + decoder/arm/ih264d_function_selector.c | 101 + decoder/arm/ih264d_function_selector_a9q.c | 200 + decoder/arm/ih264d_function_selector_av8.c | 191 + decoder/ih264d.h | 482 + decoder/ih264d_api.c | 4680 +++++++ decoder/ih264d_bitstrm.c | 181 + decoder/ih264d_bitstrm.h | 195 + decoder/ih264d_cabac.c | 779 ++ decoder/ih264d_cabac.h | 267 + decoder/ih264d_cabac_init_tables.c | 9273 +++++++++++++ decoder/ih264d_compute_bs.c | 2394 ++++ decoder/ih264d_deblocking.c | 2134 +++ decoder/ih264d_deblocking.h | 173 + decoder/ih264d_debug.c | 40 + decoder/ih264d_debug.h | 135 + decoder/ih264d_defs.h | 671 + decoder/ih264d_dpb_manager.h | 173 + decoder/ih264d_dpb_mgr.c | 1987 +++ decoder/ih264d_error_handler.h | 115 + decoder/ih264d_format_conv.c | 838 ++ decoder/ih264d_format_conv.h | 120 + decoder/ih264d_function_selector.h | 75 + decoder/ih264d_function_selector_generic.c | 222 + decoder/ih264d_inter_pred.c | 1614 +++ decoder/ih264d_inter_pred.h | 93 + decoder/ih264d_mb_utils.c | 1496 +++ decoder/ih264d_mb_utils.h | 293 + decoder/ih264d_mem_request.h | 82 + decoder/ih264d_mvpred.c | 1193 ++ decoder/ih264d_mvpred.h | 153 + decoder/ih264d_nal.c | 393 + decoder/ih264d_nal.h | 56 + decoder/ih264d_parse_bslice.c | 1696 +++ decoder/ih264d_parse_cabac.c | 1607 +++ decoder/ih264d_parse_cabac.h | 60 + decoder/ih264d_parse_cavlc.c | 2694 ++++ decoder/ih264d_parse_cavlc.h | 165 + decoder/ih264d_parse_headers.c | 1204 ++ decoder/ih264d_parse_headers.h | 46 + decoder/ih264d_parse_islice.c | 1479 +++ decoder/ih264d_parse_islice.h | 113 + decoder/ih264d_parse_mb_header.c | 1397 ++ decoder/ih264d_parse_mb_header.h | 88 + decoder/ih264d_parse_pslice.c | 1760 +++ decoder/ih264d_parse_slice.c | 1887 +++ decoder/ih264d_parse_slice.h | 47 + decoder/ih264d_process_bslice.c | 2345 ++++ decoder/ih264d_process_bslice.h | 108 + decoder/ih264d_process_intra_mb.c | 2006 +++ decoder/ih264d_process_intra_mb.h | 65 + decoder/ih264d_process_pslice.c | 1139 ++ decoder/ih264d_process_pslice.h | 69 + decoder/ih264d_quant_scaling.c | 274 + decoder/ih264d_quant_scaling.h | 37 + decoder/ih264d_sei.c | 386 + decoder/ih264d_sei.h | 91 + decoder/ih264d_structs.h | 1582 +++ decoder/ih264d_tables.c | 872 ++ decoder/ih264d_tables.h | 157 + decoder/ih264d_thread_compute_bs.c | 802 ++ decoder/ih264d_thread_compute_bs.h | 34 + decoder/ih264d_thread_parse_decode.c | 732 ++ decoder/ih264d_thread_parse_decode.h | 48 + decoder/ih264d_transfer_address.h | 45 + decoder/ih264d_utils.c | 2625 ++++ decoder/ih264d_utils.h | 101 + decoder/ih264d_vui.c | 233 + decoder/ih264d_vui.h | 96 + decoder/iv.h | 420 + decoder/ivd.h | 585 + decoder/mips/ih264d_function_selector.c | 66 + decoder/x86/ih264d_function_selector.c | 94 + decoder/x86/ih264d_function_selector_sse42.c | 95 + decoder/x86/ih264d_function_selector_ssse3.c | 181 + encoder.arm.mk | 47 + encoder.arm64.mk | 48 + encoder.mips.mk | 7 + encoder.mips64.mk | 7 + encoder.mk | 90 + encoder.x86.mk | 37 + encoder.x86_64.mk | 35 + .../ih264e_evaluate_intra16x16_modes_a9q.s | 313 + .../arm/ih264e_evaluate_intra4x4_modes_a9q.s | 529 + .../ih264e_evaluate_intra_chroma_modes_a9q.s | 346 + encoder/arm/ih264e_fmt_conv.s | 329 + encoder/arm/ih264e_function_selector.c | 170 + encoder/arm/ih264e_function_selector_a9q.c | 252 + encoder/arm/ih264e_function_selector_av8.c | 259 + encoder/arm/ih264e_half_pel.s | 951 ++ encoder/arm/ih264e_platform_macros.h | 143 + encoder/arm/ime_distortion_metrics_a9q.s | 1353 ++ encoder/arm/ime_platform_macros.h | 51 + .../ih264e_evaluate_intra16x16_modes_av8.s | 592 + .../ih264e_evaluate_intra_chroma_modes_av8.s | 467 + encoder/armv8/ih264e_half_pel_av8.s | 1024 ++ encoder/armv8/ih264e_platform_macros.h | 143 + encoder/armv8/ime_distortion_metrics_av8.s | 978 ++ encoder/armv8/ime_platform_macros.h | 51 + encoder/ih264e.h | 620 + encoder/ih264e_api.c | 5559 ++++++++ encoder/ih264e_bitstream.c | 472 + encoder/ih264e_bitstream.h | 401 + encoder/ih264e_cavlc.c | 1448 ++ encoder/ih264e_cavlc.h | 112 + encoder/ih264e_config.h | 52 + encoder/ih264e_core_coding.c | 2365 ++++ encoder/ih264e_core_coding.h | 653 + encoder/ih264e_deblk.c | 854 ++ encoder/ih264e_deblk.h | 99 + encoder/ih264e_debug.h | 65 + encoder/ih264e_defs.h | 538 + encoder/ih264e_encode.c | 580 + encoder/ih264e_encode_header.c | 1187 ++ encoder/ih264e_encode_header.h | 278 + encoder/ih264e_error.h | 229 + encoder/ih264e_fmt_conv.c | 864 ++ encoder/ih264e_fmt_conv.h | 142 + encoder/ih264e_function_selector_generic.c | 259 + encoder/ih264e_globals.c | 261 + encoder/ih264e_globals.h | 192 + encoder/ih264e_half_pel.c | 226 + encoder/ih264e_half_pel.h | 162 + encoder/ih264e_intra_modes_eval.c | 2296 ++++ encoder/ih264e_intra_modes_eval.h | 418 + encoder/ih264e_list.h | 42 + encoder/ih264e_master.h | 132 + encoder/ih264e_mc.c | 320 + encoder/ih264e_mc.h | 104 + encoder/ih264e_me.c | 1153 ++ encoder/ih264e_me.h | 278 + encoder/ih264e_modify_frm_rate.c | 240 + encoder/ih264e_modify_frm_rate.h | 182 + encoder/ih264e_process.c | 2369 ++++ encoder/ih264e_process.h | 364 + encoder/ih264e_rate_control.c | 801 ++ encoder/ih264e_rate_control.h | 351 + encoder/ih264e_rc_mem_interface.c | 395 + encoder/ih264e_rc_mem_interface.h | 179 + encoder/ih264e_statistics.h | 141 + encoder/ih264e_structs.h | 2566 ++++ encoder/ih264e_time_stamp.c | 748 ++ encoder/ih264e_time_stamp.h | 498 + encoder/ih264e_trace.h | 161 + encoder/ih264e_trace_support.h | 61 + encoder/ih264e_utils.c | 1804 +++ encoder/ih264e_utils.h | 327 + encoder/ih264e_version.c | 143 + encoder/ih264e_version.h | 64 + encoder/ime.c | 836 ++ encoder/ime.h | 209 + encoder/ime_defs.h | 59 + encoder/ime_distortion_metrics.c | 1262 ++ encoder/ime_distortion_metrics.h | 170 + encoder/ime_macros.h | 44 + encoder/ime_statistics.h | 86 + encoder/ime_structs.h | 305 + encoder/ime_typedefs.h | 50 + encoder/irc_bit_allocation.c | 859 ++ encoder/irc_bit_allocation.h | 99 + encoder/irc_cbr_buffer_control.c | 653 + encoder/irc_cbr_buffer_control.h | 104 + encoder/irc_cntrl_param.h | 59 + encoder/irc_common.h | 104 + encoder/irc_datatypes.h | 64 + encoder/irc_est_sad.c | 260 + encoder/irc_est_sad.h | 64 + encoder/irc_fixed_point_error_bits.c | 185 + encoder/irc_fixed_point_error_bits.h | 64 + encoder/irc_frame_info_collector.c | 177 + encoder/irc_frame_info_collector.h | 109 + encoder/irc_mb_model_based.c | 157 + encoder/irc_mb_model_based.h | 57 + encoder/irc_mem_req_and_acq.h | 179 + encoder/irc_picture_type.c | 1585 +++ encoder/irc_picture_type.h | 95 + encoder/irc_rate_control_api.c | 1600 +++ encoder/irc_rate_control_api.h | 188 + encoder/irc_rate_control_api_structs.h | 93 + encoder/irc_rd_model.c | 565 + encoder/irc_rd_model.h | 98 + encoder/irc_rd_model_struct.h | 75 + encoder/irc_trace_support.h | 61 + encoder/irc_vbr_storage_vbv.c | 368 + encoder/irc_vbr_storage_vbv.h | 119 + encoder/irc_vbr_str_prms.c | 199 + encoder/irc_vbr_str_prms.h | 65 + encoder/ithread.h | 101 + encoder/iv2.h | 386 + encoder/ive2.h | 1445 ++ encoder/mips/ih264e_function_selector.c | 110 + encoder/mips/ih264e_platform_macros.h | 135 + encoder/mips/ime_platform_macros.h | 52 + encoder/x86/ih264e_function_selector.c | 141 + encoder/x86/ih264e_function_selector_sse42.c | 146 + encoder/x86/ih264e_function_selector_ssse3.c | 190 + encoder/x86/ih264e_half_pel_ssse3.c | 487 + encoder/x86/ih264e_intra_modes_eval_ssse3.c | 1259 ++ encoder/x86/ih264e_platform_macros.h | 154 + encoder/x86/ime_distortion_metrics_sse42.c | 1940 +++ encoder/x86/ime_platform_macros.h | 52 + test/Android.mk | 8 + test/decoder.mk | 13 + test/decoder/main.c | 3196 +++++ test/encoder.mk | 13 + test/encoder/app.h | 348 + test/encoder/input.c | 312 + test/encoder/main.c | 2512 ++++ test/encoder/output.c | 109 + test/encoder/psnr.c | 242 + test/encoder/psnr.h | 62 + test/encoder/recon.c | 221 + 339 files changed, 204373 insertions(+) create mode 100755 Android.mk create mode 100755 common/arm/ih264_arm_memory_barrier.s create mode 100755 common/arm/ih264_deblk_chroma_a9.s create mode 100755 common/arm/ih264_deblk_luma_a9.s create mode 100755 common/arm/ih264_default_weighted_pred_a9q.s create mode 100755 common/arm/ih264_ihadamard_scaling_a9.s create mode 100755 common/arm/ih264_inter_pred_chroma_a9q.s create mode 100755 common/arm/ih264_inter_pred_filters_luma_horz_a9q.s create mode 100755 common/arm/ih264_inter_pred_filters_luma_vert_a9q.s create mode 100755 common/arm/ih264_inter_pred_luma_bilinear_a9q.s create mode 100755 common/arm/ih264_inter_pred_luma_copy_a9q.s create mode 100755 common/arm/ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q.s create mode 100755 common/arm/ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q.s create mode 100755 common/arm/ih264_inter_pred_luma_horz_qpel_a9q.s create mode 100755 common/arm/ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q.s create mode 100755 common/arm/ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q.s create mode 100755 common/arm/ih264_inter_pred_luma_vert_qpel_a9q.s create mode 100755 common/arm/ih264_intra_pred_chroma_a9q.s create mode 100755 common/arm/ih264_intra_pred_luma_16x16_a9q.s create mode 100755 common/arm/ih264_intra_pred_luma_4x4_a9q.s create mode 100755 common/arm/ih264_intra_pred_luma_8x8_a9q.s create mode 100755 common/arm/ih264_iquant_itrans_recon_a9.s create mode 100755 common/arm/ih264_iquant_itrans_recon_dc_a9.s create mode 100755 common/arm/ih264_itrans_recon_a9.s create mode 100755 common/arm/ih264_mem_fns_neon.s create mode 100755 common/arm/ih264_padding_neon.s create mode 100755 common/arm/ih264_platform_macros.h create mode 100755 common/arm/ih264_resi_trans_a9.s create mode 100755 common/arm/ih264_resi_trans_quant_a9.s create mode 100755 common/arm/ih264_weighted_bi_pred_a9q.s create mode 100755 common/arm/ih264_weighted_pred_a9q.s create mode 100755 common/armv8/ih264_deblk_chroma_av8.s create mode 100755 common/armv8/ih264_deblk_luma_av8.s create mode 100755 common/armv8/ih264_default_weighted_pred_av8.s create mode 100755 common/armv8/ih264_ihadamard_scaling_av8.s create mode 100755 common/armv8/ih264_inter_pred_chroma_av8.s create mode 100755 common/armv8/ih264_inter_pred_filters_luma_horz_av8.s create mode 100755 common/armv8/ih264_inter_pred_filters_luma_vert_av8.s create mode 100755 common/armv8/ih264_inter_pred_luma_copy_av8.s create mode 100755 common/armv8/ih264_inter_pred_luma_horz_hpel_vert_hpel_av8.s create mode 100755 common/armv8/ih264_inter_pred_luma_horz_hpel_vert_qpel_av8.s create mode 100755 common/armv8/ih264_inter_pred_luma_horz_qpel_av8.s create mode 100755 common/armv8/ih264_inter_pred_luma_horz_qpel_vert_hpel_av8.s create mode 100755 common/armv8/ih264_inter_pred_luma_horz_qpel_vert_qpel_av8.s create mode 100755 common/armv8/ih264_inter_pred_luma_vert_qpel_av8.s create mode 100755 common/armv8/ih264_intra_pred_chroma_av8.s create mode 100755 common/armv8/ih264_intra_pred_luma_16x16_av8.s create mode 100755 common/armv8/ih264_intra_pred_luma_4x4_av8.s create mode 100755 common/armv8/ih264_intra_pred_luma_8x8_av8.s create mode 100755 common/armv8/ih264_iquant_itrans_recon_av8.s create mode 100755 common/armv8/ih264_iquant_itrans_recon_dc_av8.s create mode 100755 common/armv8/ih264_mem_fns_neon_av8.s create mode 100755 common/armv8/ih264_neon_macros.s create mode 100755 common/armv8/ih264_padding_neon_av8.s create mode 100755 common/armv8/ih264_platform_macros.h create mode 100755 common/armv8/ih264_resi_trans_quant_av8.s create mode 100755 common/armv8/ih264_weighted_bi_pred_av8.s create mode 100755 common/armv8/ih264_weighted_pred_av8.s create mode 100755 common/ih264_buf_mgr.c create mode 100755 common/ih264_buf_mgr.h create mode 100755 common/ih264_cabac_tables.c create mode 100755 common/ih264_cabac_tables.h create mode 100755 common/ih264_cavlc_tables.c create mode 100755 common/ih264_cavlc_tables.h create mode 100755 common/ih264_chroma_intra_pred_filters.c create mode 100755 common/ih264_common_tables.c create mode 100755 common/ih264_common_tables.h create mode 100755 common/ih264_deblk_edge_filters.c create mode 100755 common/ih264_deblk_edge_filters.h create mode 100755 common/ih264_deblk_tables.c create mode 100755 common/ih264_deblk_tables.h create mode 100755 common/ih264_debug.h create mode 100755 common/ih264_defs.h create mode 100755 common/ih264_disp_mgr.c create mode 100755 common/ih264_disp_mgr.h create mode 100755 common/ih264_dpb_mgr.c create mode 100755 common/ih264_dpb_mgr.h create mode 100755 common/ih264_error.h create mode 100755 common/ih264_ihadamard_scaling.c create mode 100755 common/ih264_inter_pred_filters.c create mode 100755 common/ih264_inter_pred_filters.h create mode 100755 common/ih264_intra_pred_filters.h create mode 100755 common/ih264_iquant_itrans_recon.c create mode 100755 common/ih264_itrans_recon.h create mode 100755 common/ih264_list.c create mode 100755 common/ih264_list.h create mode 100755 common/ih264_luma_intra_pred_filters.c create mode 100755 common/ih264_macros.h create mode 100755 common/ih264_mem_fns.c create mode 100755 common/ih264_mem_fns.h create mode 100755 common/ih264_padding.c create mode 100755 common/ih264_padding.h create mode 100755 common/ih264_resi_trans.h create mode 100755 common/ih264_resi_trans_quant.c create mode 100755 common/ih264_size_defs.h create mode 100755 common/ih264_structs.h create mode 100755 common/ih264_trans_data.c create mode 100755 common/ih264_trans_data.h create mode 100755 common/ih264_trans_macros.h create mode 100755 common/ih264_trans_quant_itrans_iquant.h create mode 100755 common/ih264_typedefs.h create mode 100755 common/ih264_weighted_pred.c create mode 100755 common/ih264_weighted_pred.h create mode 100755 common/ithread.c create mode 100755 common/ithread.h create mode 100755 common/mips/ih264_platform_macros.h create mode 100755 common/x86/ih264_chroma_intra_pred_filters_ssse3.c create mode 100755 common/x86/ih264_deblk_chroma_ssse3.c create mode 100755 common/x86/ih264_deblk_luma_ssse3.c create mode 100755 common/x86/ih264_ihadamard_scaling_sse42.c create mode 100755 common/x86/ih264_ihadamard_scaling_ssse3.c create mode 100755 common/x86/ih264_inter_pred_filters_ssse3.c create mode 100755 common/x86/ih264_iquant_itrans_recon_dc_ssse3.c create mode 100755 common/x86/ih264_iquant_itrans_recon_sse42.c create mode 100755 common/x86/ih264_iquant_itrans_recon_ssse3.c create mode 100755 common/x86/ih264_luma_intra_pred_filters_ssse3.c create mode 100755 common/x86/ih264_mem_fns_ssse3.c create mode 100755 common/x86/ih264_padding_ssse3.c create mode 100755 common/x86/ih264_platform_macros.h create mode 100755 common/x86/ih264_resi_trans_quant_sse42.c create mode 100755 common/x86/ih264_weighted_pred_sse42.c create mode 100755 decoder.arm.mk create mode 100755 decoder.arm64.mk create mode 100755 decoder.mips.mk create mode 100755 decoder.mips64.mk create mode 100755 decoder.mk create mode 100755 decoder.x86.mk create mode 100755 decoder.x86_64.mk create mode 100755 decoder/arm/ih264d_function_selector.c create mode 100755 decoder/arm/ih264d_function_selector_a9q.c create mode 100755 decoder/arm/ih264d_function_selector_av8.c create mode 100755 decoder/ih264d.h create mode 100755 decoder/ih264d_api.c create mode 100755 decoder/ih264d_bitstrm.c create mode 100755 decoder/ih264d_bitstrm.h create mode 100755 decoder/ih264d_cabac.c create mode 100755 decoder/ih264d_cabac.h create mode 100755 decoder/ih264d_cabac_init_tables.c create mode 100755 decoder/ih264d_compute_bs.c create mode 100755 decoder/ih264d_deblocking.c create mode 100755 decoder/ih264d_deblocking.h create mode 100755 decoder/ih264d_debug.c create mode 100755 decoder/ih264d_debug.h create mode 100755 decoder/ih264d_defs.h create mode 100755 decoder/ih264d_dpb_manager.h create mode 100755 decoder/ih264d_dpb_mgr.c create mode 100755 decoder/ih264d_error_handler.h create mode 100755 decoder/ih264d_format_conv.c create mode 100755 decoder/ih264d_format_conv.h create mode 100755 decoder/ih264d_function_selector.h create mode 100755 decoder/ih264d_function_selector_generic.c create mode 100755 decoder/ih264d_inter_pred.c create mode 100755 decoder/ih264d_inter_pred.h create mode 100755 decoder/ih264d_mb_utils.c create mode 100755 decoder/ih264d_mb_utils.h create mode 100755 decoder/ih264d_mem_request.h create mode 100755 decoder/ih264d_mvpred.c create mode 100755 decoder/ih264d_mvpred.h create mode 100755 decoder/ih264d_nal.c create mode 100755 decoder/ih264d_nal.h create mode 100755 decoder/ih264d_parse_bslice.c create mode 100755 decoder/ih264d_parse_cabac.c create mode 100755 decoder/ih264d_parse_cabac.h create mode 100755 decoder/ih264d_parse_cavlc.c create mode 100755 decoder/ih264d_parse_cavlc.h create mode 100755 decoder/ih264d_parse_headers.c create mode 100755 decoder/ih264d_parse_headers.h create mode 100755 decoder/ih264d_parse_islice.c create mode 100755 decoder/ih264d_parse_islice.h create mode 100755 decoder/ih264d_parse_mb_header.c create mode 100755 decoder/ih264d_parse_mb_header.h create mode 100755 decoder/ih264d_parse_pslice.c create mode 100755 decoder/ih264d_parse_slice.c create mode 100755 decoder/ih264d_parse_slice.h create mode 100755 decoder/ih264d_process_bslice.c create mode 100755 decoder/ih264d_process_bslice.h create mode 100755 decoder/ih264d_process_intra_mb.c create mode 100755 decoder/ih264d_process_intra_mb.h create mode 100755 decoder/ih264d_process_pslice.c create mode 100755 decoder/ih264d_process_pslice.h create mode 100755 decoder/ih264d_quant_scaling.c create mode 100755 decoder/ih264d_quant_scaling.h create mode 100755 decoder/ih264d_sei.c create mode 100755 decoder/ih264d_sei.h create mode 100755 decoder/ih264d_structs.h create mode 100755 decoder/ih264d_tables.c create mode 100755 decoder/ih264d_tables.h create mode 100755 decoder/ih264d_thread_compute_bs.c create mode 100755 decoder/ih264d_thread_compute_bs.h create mode 100755 decoder/ih264d_thread_parse_decode.c create mode 100755 decoder/ih264d_thread_parse_decode.h create mode 100755 decoder/ih264d_transfer_address.h create mode 100755 decoder/ih264d_utils.c create mode 100755 decoder/ih264d_utils.h create mode 100755 decoder/ih264d_vui.c create mode 100755 decoder/ih264d_vui.h create mode 100755 decoder/iv.h create mode 100755 decoder/ivd.h create mode 100755 decoder/mips/ih264d_function_selector.c create mode 100755 decoder/x86/ih264d_function_selector.c create mode 100755 decoder/x86/ih264d_function_selector_sse42.c create mode 100755 decoder/x86/ih264d_function_selector_ssse3.c create mode 100755 encoder.arm.mk create mode 100755 encoder.arm64.mk create mode 100755 encoder.mips.mk create mode 100755 encoder.mips64.mk create mode 100755 encoder.mk create mode 100755 encoder.x86.mk create mode 100755 encoder.x86_64.mk create mode 100755 encoder/arm/ih264e_evaluate_intra16x16_modes_a9q.s create mode 100755 encoder/arm/ih264e_evaluate_intra4x4_modes_a9q.s create mode 100755 encoder/arm/ih264e_evaluate_intra_chroma_modes_a9q.s create mode 100755 encoder/arm/ih264e_fmt_conv.s create mode 100755 encoder/arm/ih264e_function_selector.c create mode 100755 encoder/arm/ih264e_function_selector_a9q.c create mode 100755 encoder/arm/ih264e_function_selector_av8.c create mode 100755 encoder/arm/ih264e_half_pel.s create mode 100755 encoder/arm/ih264e_platform_macros.h create mode 100755 encoder/arm/ime_distortion_metrics_a9q.s create mode 100755 encoder/arm/ime_platform_macros.h create mode 100755 encoder/armv8/ih264e_evaluate_intra16x16_modes_av8.s create mode 100755 encoder/armv8/ih264e_evaluate_intra_chroma_modes_av8.s create mode 100755 encoder/armv8/ih264e_half_pel_av8.s create mode 100755 encoder/armv8/ih264e_platform_macros.h create mode 100755 encoder/armv8/ime_distortion_metrics_av8.s create mode 100755 encoder/armv8/ime_platform_macros.h create mode 100755 encoder/ih264e.h create mode 100755 encoder/ih264e_api.c create mode 100755 encoder/ih264e_bitstream.c create mode 100755 encoder/ih264e_bitstream.h create mode 100755 encoder/ih264e_cavlc.c create mode 100755 encoder/ih264e_cavlc.h create mode 100755 encoder/ih264e_config.h create mode 100755 encoder/ih264e_core_coding.c create mode 100755 encoder/ih264e_core_coding.h create mode 100755 encoder/ih264e_deblk.c create mode 100755 encoder/ih264e_deblk.h create mode 100755 encoder/ih264e_debug.h create mode 100755 encoder/ih264e_defs.h create mode 100755 encoder/ih264e_encode.c create mode 100755 encoder/ih264e_encode_header.c create mode 100755 encoder/ih264e_encode_header.h create mode 100755 encoder/ih264e_error.h create mode 100755 encoder/ih264e_fmt_conv.c create mode 100755 encoder/ih264e_fmt_conv.h create mode 100755 encoder/ih264e_function_selector_generic.c create mode 100755 encoder/ih264e_globals.c create mode 100755 encoder/ih264e_globals.h create mode 100755 encoder/ih264e_half_pel.c create mode 100755 encoder/ih264e_half_pel.h create mode 100755 encoder/ih264e_intra_modes_eval.c create mode 100755 encoder/ih264e_intra_modes_eval.h create mode 100755 encoder/ih264e_list.h create mode 100755 encoder/ih264e_master.h create mode 100755 encoder/ih264e_mc.c create mode 100755 encoder/ih264e_mc.h create mode 100755 encoder/ih264e_me.c create mode 100755 encoder/ih264e_me.h create mode 100755 encoder/ih264e_modify_frm_rate.c create mode 100755 encoder/ih264e_modify_frm_rate.h create mode 100755 encoder/ih264e_process.c create mode 100755 encoder/ih264e_process.h create mode 100755 encoder/ih264e_rate_control.c create mode 100755 encoder/ih264e_rate_control.h create mode 100755 encoder/ih264e_rc_mem_interface.c create mode 100755 encoder/ih264e_rc_mem_interface.h create mode 100755 encoder/ih264e_statistics.h create mode 100755 encoder/ih264e_structs.h create mode 100755 encoder/ih264e_time_stamp.c create mode 100755 encoder/ih264e_time_stamp.h create mode 100755 encoder/ih264e_trace.h create mode 100755 encoder/ih264e_trace_support.h create mode 100755 encoder/ih264e_utils.c create mode 100755 encoder/ih264e_utils.h create mode 100755 encoder/ih264e_version.c create mode 100755 encoder/ih264e_version.h create mode 100755 encoder/ime.c create mode 100755 encoder/ime.h create mode 100755 encoder/ime_defs.h create mode 100755 encoder/ime_distortion_metrics.c create mode 100755 encoder/ime_distortion_metrics.h create mode 100755 encoder/ime_macros.h create mode 100755 encoder/ime_statistics.h create mode 100755 encoder/ime_structs.h create mode 100755 encoder/ime_typedefs.h create mode 100755 encoder/irc_bit_allocation.c create mode 100755 encoder/irc_bit_allocation.h create mode 100755 encoder/irc_cbr_buffer_control.c create mode 100755 encoder/irc_cbr_buffer_control.h create mode 100755 encoder/irc_cntrl_param.h create mode 100755 encoder/irc_common.h create mode 100755 encoder/irc_datatypes.h create mode 100755 encoder/irc_est_sad.c create mode 100755 encoder/irc_est_sad.h create mode 100755 encoder/irc_fixed_point_error_bits.c create mode 100755 encoder/irc_fixed_point_error_bits.h create mode 100755 encoder/irc_frame_info_collector.c create mode 100755 encoder/irc_frame_info_collector.h create mode 100755 encoder/irc_mb_model_based.c create mode 100755 encoder/irc_mb_model_based.h create mode 100755 encoder/irc_mem_req_and_acq.h create mode 100755 encoder/irc_picture_type.c create mode 100755 encoder/irc_picture_type.h create mode 100755 encoder/irc_rate_control_api.c create mode 100755 encoder/irc_rate_control_api.h create mode 100755 encoder/irc_rate_control_api_structs.h create mode 100755 encoder/irc_rd_model.c create mode 100755 encoder/irc_rd_model.h create mode 100755 encoder/irc_rd_model_struct.h create mode 100755 encoder/irc_trace_support.h create mode 100755 encoder/irc_vbr_storage_vbv.c create mode 100755 encoder/irc_vbr_storage_vbv.h create mode 100755 encoder/irc_vbr_str_prms.c create mode 100755 encoder/irc_vbr_str_prms.h create mode 100755 encoder/ithread.h create mode 100755 encoder/iv2.h create mode 100755 encoder/ive2.h create mode 100755 encoder/mips/ih264e_function_selector.c create mode 100755 encoder/mips/ih264e_platform_macros.h create mode 100755 encoder/mips/ime_platform_macros.h create mode 100755 encoder/x86/ih264e_function_selector.c create mode 100755 encoder/x86/ih264e_function_selector_sse42.c create mode 100755 encoder/x86/ih264e_function_selector_ssse3.c create mode 100755 encoder/x86/ih264e_half_pel_ssse3.c create mode 100755 encoder/x86/ih264e_intra_modes_eval_ssse3.c create mode 100755 encoder/x86/ih264e_platform_macros.h create mode 100755 encoder/x86/ime_distortion_metrics_sse42.c create mode 100755 encoder/x86/ime_platform_macros.h create mode 100755 test/Android.mk create mode 100755 test/decoder.mk create mode 100755 test/decoder/main.c create mode 100755 test/encoder.mk create mode 100755 test/encoder/app.h create mode 100755 test/encoder/input.c create mode 100755 test/encoder/main.c create mode 100755 test/encoder/output.c create mode 100755 test/encoder/psnr.c create mode 100755 test/encoder/psnr.h create mode 100755 test/encoder/recon.c diff --git a/Android.mk b/Android.mk new file mode 100755 index 0000000..0085832 --- /dev/null +++ b/Android.mk @@ -0,0 +1,8 @@ +LOCAL_PATH := $(call my-dir) +include $(CLEAR_VARS) + +# encoder +include $(LOCAL_PATH)/encoder.mk + +# decoder +include $(LOCAL_PATH)/decoder.mk diff --git a/common/arm/ih264_arm_memory_barrier.s b/common/arm/ih264_arm_memory_barrier.s new file mode 100755 index 0000000..523218f --- /dev/null +++ b/common/arm/ih264_arm_memory_barrier.s @@ -0,0 +1,77 @@ +@/****************************************************************************** +@ * +@ * Copyright (C) 2015 The Android Open Source Project +@ * +@ * Licensed under the Apache License, Version 2.0 (the "License"); +@ * you may not use this file except in compliance with the License. +@ * You may obtain a copy of the License at: +@ * +@ * http://www.apache.org/licenses/LICENSE-2.0 +@ * +@ * Unless required by applicable law or agreed to in writing, software +@ * distributed under the License is distributed on an "AS IS" BASIS, +@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ * See the License for the specific language governing permissions and +@ * limitations under the License. +@ * +@ ***************************************************************************** +@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +@*/ +@/** +@******************************************************************************* +@* @file +@* ih264_arm_memory_barrier.s +@* +@* @brief +@* Contains function definitions for data synchronization. +@* +@* @author +@* Ittiam +@* +@* @par List of Functions: +@* +@* +@* @remarks +@* None +@* +@******************************************************************************* + +.text +.p2align 2 + + +@***************************************************************************** +@* +@* Function Name : ih264_arm_dsb +@* Description : Adds DSB +@* Revision History : +@* DD MM YYYY Author(s) Changes +@* 03 07 2008 100355 First version +@* +@***************************************************************************** + + .global ih264_arm_dsb +ih264_arm_dsb: + dsb + bx lr + + + +@***************************************************************************** +@* +@* Function Name : ih264_arm_dmb +@* Description : Adds DMB +@* Revision History : +@* DD MM YYYY Author(s) Changes +@* 03 07 2008 100355 First version +@* +@***************************************************************************** + + .global ih264_arm_dmb + +ih264_arm_dmb: + dmb + bx lr + + + diff --git a/common/arm/ih264_deblk_chroma_a9.s b/common/arm/ih264_deblk_chroma_a9.s new file mode 100755 index 0000000..66102a7 --- /dev/null +++ b/common/arm/ih264_deblk_chroma_a9.s @@ -0,0 +1,1337 @@ +@/****************************************************************************** +@ * +@ * Copyright (C) 2015 The Android Open Source Project +@ * +@ * Licensed under the Apache License, Version 2.0 (the "License"); +@ * you may not use this file except in compliance with the License. +@ * You may obtain a copy of the License at: +@ * +@ * http://www.apache.org/licenses/LICENSE-2.0 +@ * +@ * Unless required by applicable law or agreed to in writing, software +@ * distributed under the License is distributed on an "AS IS" BASIS, +@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ * See the License for the specific language governing permissions and +@ * limitations under the License. +@ * +@ ***************************************************************************** +@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +@*/ +@/*****************************************************************************/ +@/* */ +@/* File Name : ih264_deblk_chroma_a9.s */ +@/* */ +@/* Description : Contains function definitions for deblocking luma */ +@/* edge. Functions are coded in NEON assembly and can */ +@/* be compiled using ARM RVDS. */ +@/* */ +@/* List of Functions : ih264_deblk_chroma_vert_bs4_bp_a9() */ +@/* ih264_deblk_chroma_vert_bslt4_bp_a9() */ +@/* ih264_deblk_chroma_horz_bs4_bp_a9() */ +@/* ih264_deblk_chroma_horz_bslt4_bp_a9() */ +@/* ih264_deblk_chroma_vert_bs4_mbaff_bp_a9() */ +@/* ih264_deblk_chroma_vert_bslt4_mbaff_bp_a9() */ +@/* ih264_deblk_chroma_vert_bs4_a9() */ +@/* ih264_deblk_chroma_vert_bslt4_a9() */ +@/* ih264_deblk_chroma_horz_bs4_a9() */ +@/* ih264_deblk_chroma_horz_bslt4_a9() */ +@/* ih264_deblk_chroma_vert_bs4_mbaff_a9() */ +@/* ih264_deblk_chroma_vert_bslt4_mbaff_a9() */ +@/* */ +@/* Issues / Problems : None */ +@/* */ +@/* Revision History : */ +@/* */ +@/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +@/* 28 11 2013 Ittiam Draft */ +@/* 05 01 2015 Kaushik Added double-call functions for */ +@/* Senthoor vertical deblocking, and high */ +@/* profile functions. */ +@/* */ +@/*****************************************************************************/ + + +.text +.p2align 2 + +@/** +@******************************************************************************* +@* +@* @brief +@* Performs filtering of a chroma block horizontal edge when the +@* boundary strength is set to 4 +@* +@* @par Description: +@* This operation is described in Sec. 8.7.2.4 under the title +@* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264. +@* +@* @param[in] r0 - pu1_src +@* Pointer to the src sample q0 +@* +@* @param[in] r1 - src_strd +@* Source stride +@* +@* @param[in] r2 - alpha +@* Alpha Value for the boundary +@* +@* @param[in] r3 - beta +@* Beta Value for the boundary +@* +@* @returns +@* None +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ + + .global ih264_deblk_chroma_horz_bs4_bp_a9 + +ih264_deblk_chroma_horz_bs4_bp_a9: + + stmfd sp!, {r4, lr} @ + vpush {d8 - d15} + sub r0, r0, r1, lsl #1 @R0 = uc_edgePixel pointing to p1 of chroma + vld2.8 {d6, d7}, [r0], r1 @D6 = p1u , D7 = p1v + mov r4, r0 @Keeping a backup of the pointer p0 of chroma + vld2.8 {d4, d5}, [r0], r1 @D4 = p0u , D5 = p0v + vdup.8 q10, r2 @Q10 contains alpha + vld2.8 {d0, d1}, [r0], r1 @D0 = q0u , D1 = q0v + vaddl.u8 q4, d6, d0 @ + vaddl.u8 q5, d7, d1 @Q4,Q5 = q0 + p1 + vmov.i8 d31, #2 @ + vld2.8 {d2, d3}, [r0] @D2 = q1u , D3 = q1v + vabd.u8 q13, q3, q2 @Q13 = ABS(p1 - p0) + vmlal.u8 q4, d2, d31 @ + vmlal.u8 q5, d3, d31 @Q5,Q4 = (X2(q1U) + q0U + p1U) + vabd.u8 q11, q2, q0 @Q11 = ABS(p0 - q0) + vabd.u8 q12, q1, q0 @Q12 = ABS(q1 - q0) + vaddl.u8 q7, d4, d2 @ + vaddl.u8 q14, d5, d3 @Q14,Q7 = P0 + Q1 + vdup.8 q8, r3 @Q8 contains beta + vmlal.u8 q7, d6, d31 @ + vmlal.u8 q14, d7, d31 @Q14,Q7 = (X2(p1U) + p0U + q1U) + vcge.u8 q9, q11, q10 @Q9 = ( ABS(p0 - q0) >= Alpha ) + vcge.u8 q12, q12, q8 @Q12= ( ABS(q1 - q0) >= Beta ) + vcge.u8 q13, q13, q8 @Q13= ( ABS(p1 - p0) >= Beta ) + vrshrn.u16 d8, q4, #2 @ + vrshrn.u16 d9, q5, #2 @Q4 = (X2(q1U) + q0U + p1U + 2) >> 2 + vorr q9, q9, q12 @Q9 = ( ABS(p0 - q0) >= Alpha ) | ( ABS(q1 - q0) >= Beta ) + vrshrn.u16 d10, q7, #2 @ + vrshrn.u16 d11, q14, #2 @Q5 = (X2(p1U) + p0U + q1U + 2) >> 2 + vorr q9, q9, q13 @Q9 = ( ABS(p0 - q0) >= Alpha ) | ( ABS(q1 - q0) >= Beta ) | ( ABS(p1 - p0) >= Beta ) + vbit q5, q2, q9 @ + vbit q4, q0, q9 @ + vst2.8 {d10, d11}, [r4], r1 @ + vst2.8 {d8, d9}, [r4] @ + vpop {d8 - d15} + ldmfd sp!, {r4, pc} @ + + + +@/** +@******************************************************************************* +@* +@* @brief +@* Performs filtering of a chroma block vertical edge when the +@* boundary strength is set to 4 +@* +@* @par Description: +@* This operation is described in Sec. 8.7.2.4 under the title +@* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264. +@* +@* @param[in] r0 - pu1_src +@* Pointer to the src sample q0 +@* +@* @param[in] r1 - src_strd +@* Source stride +@* +@* @param[in] r2 - alpha +@* Alpha Value for the boundary +@* +@* @param[in] r3 - beta +@* Beta Value for the boundary +@* +@* @returns +@* None +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ + + .global ih264_deblk_chroma_vert_bs4_bp_a9 + +ih264_deblk_chroma_vert_bs4_bp_a9: + + stmfd sp!, {r12, r14} + vpush {d8 - d15} + sub r0, r0, #4 @point r0 to p1u of row0. + mov r12, r0 @keep a back up of r0 for buffer write + + vld4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0], r1 + vld4.16 {d0[1], d2[1], d4[1], d6[1]}, [r0], r1 + vld4.16 {d0[2], d2[2], d4[2], d6[2]}, [r0], r1 + vld4.16 {d0[3], d2[3], d4[3], d6[3]}, [r0], r1 + + vld4.16 {d1[0], d3[0], d5[0], d7[0]}, [r0], r1 + vld4.16 {d1[1], d3[1], d5[1], d7[1]}, [r0], r1 + vld4.16 {d1[2], d3[2], d5[2], d7[2]}, [r0], r1 + vld4.16 {d1[3], d3[3], d5[3], d7[3]}, [r0], r1 + + vdup.8 q11, r2 @Q4 = alpha + vdup.8 q12, r3 @Q5 = beta + vmov.i8 d31, #2 + + vabd.u8 q4, q1, q2 @|p0-q0| + vabd.u8 q5, q3, q2 @|q1-q0| + vabd.u8 q6, q0, q1 @|p1-p0| + vaddl.u8 q7, d2, d6 + vaddl.u8 q8, d3, d7 @(p0 + q1) + vclt.u8 q4, q4, q11 @|p0-q0| < alpha ? + vclt.u8 q5, q5, q12 @|q1-q0| < beta ? + vclt.u8 q6, q6, q12 @|p1-p0| < beta ? + vmlal.u8 q7, d0, d31 + vmlal.u8 q8, d1, d31 @2*p1 + (p0 + q1) + vaddl.u8 q9, d0, d4 + vaddl.u8 q10, d1, d5 @(p1 + q0) + vand.u8 q4, q4, q5 @|p0-q0| < alpha && |q1-q0| < beta + vmlal.u8 q9, d6, d31 + vmlal.u8 q10, d7, d31 @2*q1 + (p1 + q0) + + vrshrn.i16 d14, q7, #2 + vrshrn.i16 d15, q8, #2 @(2*p1 + (p0 + q1) + 2) >> 2 + vand.u8 q4, q4, q6 @|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta + vrshrn.i16 d18, q9, #2 + vrshrn.i16 d19, q10, #2 @(2*q1 + (p1 + q0) + 2) >> 2 + + vbit q1, q7, q4 + vbit q2, q9, q4 + + vst4.16 {d0[0], d2[0], d4[0], d6[0]}, [r12], r1 + vst4.16 {d0[1], d2[1], d4[1], d6[1]}, [r12], r1 + vst4.16 {d0[2], d2[2], d4[2], d6[2]}, [r12], r1 + vst4.16 {d0[3], d2[3], d4[3], d6[3]}, [r12], r1 + + vst4.16 {d1[0], d3[0], d5[0], d7[0]}, [r12], r1 + vst4.16 {d1[1], d3[1], d5[1], d7[1]}, [r12], r1 + vst4.16 {d1[2], d3[2], d5[2], d7[2]}, [r12], r1 + vst4.16 {d1[3], d3[3], d5[3], d7[3]}, [r12], r1 + vpop {d8 - d15} + ldmfd sp!, {r12, pc} + + + +@/** +@******************************************************************************* +@* +@* @brief +@* Performs filtering of a chroma block horizontal edge for cases where the +@* boundary strength is less than 4 +@* +@* @par Description: +@* This operation is described in Sec. 8.7.2.4 under the title +@* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264. +@* +@* @param[in] r0 - pu1_src +@* Pointer to the src sample q0 +@* +@* @param[in] r1 - src_strd +@* Source stride +@* +@* @param[in] r2 - alpha +@* Alpha Value for the boundary +@* +@* @param[in] r3 - beta +@* Beta Value for the boundary +@* +@* @param[in] sp(0) - u4_bs +@* Packed Boundary strength array +@* +@* @param[in] sp(4) - pu1_cliptab +@* tc0_table +@* +@* @returns +@* None +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ + + .global ih264_deblk_chroma_horz_bslt4_bp_a9 + +ih264_deblk_chroma_horz_bslt4_bp_a9: + + stmfd sp!, {r4-r6, lr} @ + + ldrd r4, r5, [sp, #0x10] @r4 = u4_bs , r5 = pu1_cliptab + vpush {d8 - d15} + sub r0, r0, r1, lsl #1 @R0 = uc_edgePixelU pointing to p2 of chroma U + rev r4, r4 @ + vmov.32 d12[0], r4 @d12[0] = ui_Bs + vld1.32 d16[0], [r5] @D16[0] contains cliptab + vld2.8 {d6, d7}, [r0], r1 @Q3=p1 + vtbl.8 d14, {d16}, d12 @ + vmovl.u8 q6, d12 @q6 = uc_Bs in each 16 bit scalar + mov r6, r0 @Keeping a backup of the pointer to chroma U P0 + vld2.8 {d4, d5}, [r0], r1 @Q2=p0 + vmov.i8 d30, #1 @ + vdup.8 q10, r2 @Q10 contains alpha + vld2.8 {d0, d1}, [r0], r1 @Q0=q0 + vmovl.u8 q7, d14 @ + vld2.8 {d2, d3}, [r0] @Q1=q1 + vsubl.u8 q5, d1, d5 @ + vsubl.u8 q4, d0, d4 @Q5,Q4 = (q0 - p0) + vabd.u8 q13, q3, q2 @Q13 = ABS(p1 - p0) + vshl.i16 q5, q5, #2 @Q5 = (q0 - p0)<<2 + vabd.u8 q11, q2, q0 @Q11 = ABS(p0 - q0) + vshl.i16 q4, q4, #2 @Q4 = (q0 - p0)<<2 + vsli.16 q7, q7, #8 @ + vabd.u8 q12, q1, q0 @Q12 = ABS(q1 - q0) + vcge.u8 q9, q11, q10 @Q9 = ( ABS(p0 - q0) >= Alpha ) + vsubl.u8 q10, d6, d2 @Q10 = (p1 - q1)L + vsubl.u8 q3, d7, d3 @Q3 = (p1 - q1)H + vdup.8 q8, r3 @Q8 contains beta + vadd.i16 q4, q4, q10 @ + vadd.i16 q5, q5, q3 @Q5,Q4 = [ (q0 - p0)<<2 ] + (p1 - q1) + vcge.u8 q12, q12, q8 @Q12= ( ABS(q1 - q0) >= Beta ) + vcgt.s16 d12, d12, #0 @Q6 = (us_Bs > 0) + vqrshrn.s16 d8, q4, #3 @ + vqrshrn.s16 d9, q5, #3 @Q4 = i_macro = (((q0 - p0)<<2) + (p1 - q1) + 4)>>3 + vadd.i8 d14, d14, d30 @Q7 = C = C0+1 + vcge.u8 q13, q13, q8 @Q13= ( ABS(p1 - p0) >= Beta ) + vorr q9, q9, q12 @Q9 = ( ABS(p0 - q0) >= Alpha ) | ( ABS(q1 - q0) >= Beta ) + vabs.s8 q3, q4 @Q4 = ABS (i_macro) + vmov.i8 d15, d14 @ + vmov.i8 d13, d12 @ + vorr q9, q9, q13 @Q9 = ( ABS(p0 - q0) >= Alpha ) | ( ABS(q1 - q0) >= Beta ) | ( ABS(p1 - p0) >= Beta ) + vmin.u8 q7, q3, q7 @Q7 = delta = (ABS(i_macro) > C) ? C : ABS(i_macro) + vbic q6, q6, q9 @final condition + vcge.s8 q4, q4, #0 @Q4 = (i_macro >= 0) + vand q7, q7, q6 @Making delta zero in places where values shouldn be filterd + vqadd.u8 q8, q2, q7 @Q8 = p0 + delta + vqsub.u8 q2, q2, q7 @Q2 = p0 - delta + vqadd.u8 q9, q0, q7 @Q9 = q0 + delta + vqsub.u8 q0, q0, q7 @Q0 = q0 - delta + vbif q8, q2, q4 @Q8 = (i_macro >= 0 ) ? (p0+delta) : (p0-delta) + vbif q0, q9, q4 @Q0 = (i_macro >= 0 ) ? (q0-delta) : (q0+delta) + vst2.8 {d16, d17}, [r6], r1 @ + vst2.8 {d0, d1}, [r6] @ + vpop {d8 - d15} + ldmfd sp!, {r4-r6, pc} @ + + + +@/** +@******************************************************************************* +@* +@* @brief +@* Performs filtering of a chroma block vertical edge for cases where the +@* boundary strength is less than 4 +@* +@* @par Description: +@* This operation is described in Sec. 8.7.2.4 under the title +@* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264. +@* +@* @param[in] r0 - pu1_src +@* Pointer to the src sample q0 +@* +@* @param[in] r1 - src_strd +@* Source stride +@* +@* @param[in] r2 - alpha +@* Alpha Value for the boundary +@* +@* @param[in] r3 - beta +@* Beta Value for the boundary +@* +@* @param[in] sp(0) - u4_bs +@* Packed Boundary strength array +@* +@* @param[in] sp(4) - pu1_cliptab +@* tc0_table +@* +@* @returns +@* None +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ + + .global ih264_deblk_chroma_vert_bslt4_bp_a9 + +ih264_deblk_chroma_vert_bslt4_bp_a9: + + stmfd sp!, {r10-r12, r14} + + sub r0, r0, #4 @point r0 to p1u of row0. + ldr r11, [sp, #16] @r12 = ui_Bs + + ldr r10, [sp, #20] @r14 = puc_ClipTab + mov r12, r0 @keep a back up of r0 for buffer write + vpush {d8 - d15} + vld4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0], r1 + vld4.16 {d0[1], d2[1], d4[1], d6[1]}, [r0], r1 + vld4.16 {d0[2], d2[2], d4[2], d6[2]}, [r0], r1 + vld4.16 {d0[3], d2[3], d4[3], d6[3]}, [r0], r1 + + vld4.16 {d1[0], d3[0], d5[0], d7[0]}, [r0], r1 + vld4.16 {d1[1], d3[1], d5[1], d7[1]}, [r0], r1 + vld4.16 {d1[2], d3[2], d5[2], d7[2]}, [r0], r1 + vld4.16 {d1[3], d3[3], d5[3], d7[3]}, [r0], r1 + + + vdup.8 q11, r2 @Q4 = alpha + vabd.u8 q4, q1, q2 @|p0-q0| + vdup.8 q12, r3 @Q5 = beta + vabd.u8 q5, q3, q2 @|q1-q0| + vabd.u8 q6, q0, q1 @|p1-p0| + vclt.u8 q4, q4, q11 @|p0-q0| < alpha ? + vsubl.u8 q7, d0, d6 + vclt.u8 q5, q5, q12 @|q1-q0| < beta ? + vsubl.u8 q8, d1, d7 @(p1 - q1) + vclt.u8 q6, q6, q12 @|p1-p0| < beta ? + vsubl.u8 q9, d4, d2 + vand.u8 q4, q4, q5 @|p0-q0| < alpha && |q1-q0| < beta + vsubl.u8 q10, d5, d3 @(q0 - p0) + vmov.u16 q14, #4 + vld1.32 {d24[0]}, [r10] @Load ClipTable + rev r11, r11 @Blocking strengths + vand.u8 q4, q4, q6 @|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta + + vmov.32 d10[0], r11 + + vmla.s16 q7, q9, q14 + vmla.s16 q8, q10, q14 @4*(q0 - p0) + (p1 - q1) + + vmovl.u8 q5, d10 + + + vsli.u16 d10, d10, #8 + vmovl.u16 q5, d10 + vsli.u32 q5, q5, #16 + vtbl.8 d12, {d24}, d10 + vtbl.8 d13, {d24}, d11 @tC0 + vmov.u8 q12, #1 + vadd.u8 q6, q6, q12 @tC0 + 1 + vcge.u8 q5, q5, q12 @u4_bS > 0 ? + vand.u8 q4, q4, q5 @|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta && u4_bs != 0 + + @ Q0 - Q3(inputs), + @ Q4 (|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta && u4_bs != 0), + @ Q6 (tC) + + vrshr.s16 q7, q7, #3 + vrshr.s16 q8, q8, #3 @(((q0 - p0) << 2) + (p1 - q1) + 4) >> 3) + + vcgt.s16 q9, q7, #0 + vcgt.s16 q10, q8, #0 + vmovn.i16 d18, q9 + vmovn.i16 d19, q10 @Q9 = sign(delta) + vabs.s16 q7, q7 + vabs.s16 q8, q8 + vmovn.u16 d14, q7 + vmovn.u16 d15, q8 + vmin.u8 q7, q7, q6 @Q7 = |delta| + + vqadd.u8 q10, q1, q7 @p0+|delta| + vqadd.u8 q11, q2, q7 @q0+|delta| + vqsub.u8 q12, q1, q7 @p0-|delta| + vqsub.u8 q13, q2, q7 @q0-|delta| + + vbit q12, q10, q9 @p0 + delta + vbit q11, q13, q9 @q0 - delta + + vbit q1, q12, q4 + vbit q2, q11, q4 + + vst4.16 {d0[0], d2[0], d4[0], d6[0]}, [r12], r1 + vst4.16 {d0[1], d2[1], d4[1], d6[1]}, [r12], r1 + vst4.16 {d0[2], d2[2], d4[2], d6[2]}, [r12], r1 + vst4.16 {d0[3], d2[3], d4[3], d6[3]}, [r12], r1 + + vst4.16 {d1[0], d3[0], d5[0], d7[0]}, [r12], r1 + vst4.16 {d1[1], d3[1], d5[1], d7[1]}, [r12], r1 + vst4.16 {d1[2], d3[2], d5[2], d7[2]}, [r12], r1 + vst4.16 {d1[3], d3[3], d5[3], d7[3]}, [r12], r1 + vpop {d8 - d15} + ldmfd sp!, {r10-r12, pc} + + + +@/** +@******************************************************************************* +@* +@* @brief +@* Performs filtering of a chroma block vertical edge when the +@* boundary strength is set to 4 on calling twice +@* +@* @par Description: +@* This operation is described in Sec. 8.7.2.4 under the title +@* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264. +@* +@* @param[in] r0 - pu1_src +@* Pointer to the src sample q0 +@* +@* @param[in] r1 - src_strd +@* Source stride +@* +@* @param[in] r2 - alpha +@* Alpha Value for the boundary +@* +@* @param[in] r3 - beta +@* Beta Value for the boundary +@* +@* @returns +@* None +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ + + .global ih264_deblk_chroma_vert_bs4_mbaff_bp_a9 + +ih264_deblk_chroma_vert_bs4_mbaff_bp_a9: + + stmfd sp!, {r12, r14} + vpush {d8 - d15} + sub r0, r0, #4 @point r0 to p1u of row0. + mov r12, r0 @keep a back up of r0 for buffer write + + vld4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0], r1 + vld4.16 {d0[1], d1[1], d2[1], d3[1]}, [r0], r1 + vld4.16 {d0[2], d1[2], d2[2], d3[2]}, [r0], r1 + vld4.16 {d0[3], d1[3], d2[3], d3[3]}, [r0], r1 + + vdup.8 d11, r2 @D11 = alpha + vdup.8 d12, r3 @D12 = beta + vmov.i8 d31, #2 + + vabd.u8 d4, d1, d2 @|p0-q0| + vabd.u8 d5, d3, d2 @|q1-q0| + vabd.u8 d6, d0, d1 @|p1-p0| + vaddl.u8 q14, d1, d3 @(p0 + q1) + vclt.u8 d4, d4, d11 @|p0-q0| < alpha ? + vclt.u8 d5, d5, d12 @|q1-q0| < beta ? + vclt.u8 d6, d6, d12 @|p1-p0| < beta ? + vmlal.u8 q14, d0, d31 @2*p1 + (p0 + q1) + vaddl.u8 q13, d0, d2 @(p1 + q0) + vand.u8 d4, d4, d5 @|p0-q0| < alpha && |q1-q0| < beta + vmlal.u8 q13, d3, d31 @2*q1 + (p1 + q0) + + vrshrn.i16 d7, q14, #2 @(2*p1 + (p0 + q1) + 2) >> 2 + vand.u8 d4, d4, d6 @|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta + vrshrn.i16 d9, q13, #2 @(2*q1 + (p1 + q0) + 2) >> 2 + + vbit d1, d7, d4 + vbit d2, d9, d4 + + vst4.16 {d0[0], d1[0], d2[0], d3[0]}, [r12], r1 + vst4.16 {d0[1], d1[1], d2[1], d3[1]}, [r12], r1 + vst4.16 {d0[2], d1[2], d2[2], d3[2]}, [r12], r1 + vst4.16 {d0[3], d1[3], d2[3], d3[3]}, [r12], r1 + vpop {d8 - d15} + ldmfd sp!, {r12, pc} + + + +@/** +@******************************************************************************* +@* +@* @brief +@* Performs filtering of a chroma block vertical edge for cases where the +@* boundary strength is less than 4 on calling twice +@* +@* @par Description: +@* This operation is described in Sec. 8.7.2.4 under the title +@* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264. +@* +@* @param[in] r0 - pu1_src +@* Pointer to the src sample q0 +@* +@* @param[in] r1 - src_strd +@* Source stride +@* +@* @param[in] r2 - alpha +@* Alpha Value for the boundary +@* +@* @param[in] r3 - beta +@* Beta Value for the boundary +@* +@* @param[in] sp(0) - u4_bs +@* Packed Boundary strength array +@* +@* @param[in] sp(4) - pu1_cliptab +@* tc0_table +@* +@* @returns +@* None +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ + + .global ih264_deblk_chroma_vert_bslt4_mbaff_bp_a9 + +ih264_deblk_chroma_vert_bslt4_mbaff_bp_a9: + + stmfd sp!, {r10-r12, r14} + + sub r0, r0, #4 @point r0 to p1u of row0. + ldr r11, [sp, #16] @r11 = ui_Bs + + ldr r10, [sp, #20] @r10 = puc_ClipTab + mov r12, r0 @keep a back up of r0 for buffer write + vpush {d8 - d15} + vld4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0], r1 + vld4.16 {d0[1], d1[1], d2[1], d3[1]}, [r0], r1 + vld4.16 {d0[2], d1[2], d2[2], d3[2]}, [r0], r1 + vld4.16 {d0[3], d1[3], d2[3], d3[3]}, [r0], r1 + + vdup.8 d11, r2 @D11 = alpha + vabd.u8 d4, d1, d2 @|p0-q0| + vdup.8 d12, r3 @D12 = beta + vabd.u8 d5, d3, d2 @|q1-q0| + vabd.u8 d6, d0, d1 @|p1-p0| + vclt.u8 d4, d4, d11 @|p0-q0| < alpha ? + vclt.u8 d5, d5, d12 @|q1-q0| < beta ? + vsubl.u8 q14, d0, d3 @(p1 - q1) + vclt.u8 d6, d6, d12 @|p1-p0| < beta ? + vand.u8 d4, d4, d5 @|p0-q0| < alpha && |q1-q0| < beta + vsubl.u8 q12, d2, d1 @(q0 - p0) + vmov.u16 q10, #4 + + vld1.32 {d31[0]}, [r10] @Load ClipTable + rev r11, r11 @Blocking strengths + vand.u8 d4, d4, d6 @|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta + vmov.32 d22[0], r11 + vmla.s16 q14, q12, q10 @4*(q0 - p0) + (p1 - q1) + vmovl.u8 q11, d22 + vsli.u16 d22, d22, #8 + vtbl.8 d6, {d31}, d22 @tC0 + vmov.u8 d12, #1 + vadd.u8 d6, d6, d12 @tC0 + 1 + vcge.u8 d5, d22, d12 @u4_bS > 0 ? + vand.u8 d4, d4, d5 @|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta && u4_bs != 0 + + @ D0 - D3(inputs), + @ D4 (|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta && u4_bs != 0), + @ D6 (tC) + + vrshr.s16 q14, q14, #3 @(((q0 - p0) << 2) + (p1 - q1) + 4) >> 3) + + vcgt.s16 q13, q14, #0 + vmovn.i16 d9, q13 @D9 = sign(delta) + vabs.s16 q14, q14 + vmovn.u16 d7, q14 + vmin.u8 d7, d7, d6 @D7 = |delta| + + vqadd.u8 d10, d1, d7 @p0+|delta| + vqadd.u8 d11, d2, d7 @q0+|delta| + vqsub.u8 d12, d1, d7 @p0-|delta| + vqsub.u8 d13, d2, d7 @q0-|delta| + + vbit d12, d10, d9 @p0 + delta + vbit d11, d13, d9 @q0 - delta + + vbit d1, d12, d4 + vbit d2, d11, d4 + + vst4.16 {d0[0], d1[0], d2[0], d3[0]}, [r12], r1 + vst4.16 {d0[1], d1[1], d2[1], d3[1]}, [r12], r1 + vst4.16 {d0[2], d1[2], d2[2], d3[2]}, [r12], r1 + vst4.16 {d0[3], d1[3], d2[3], d3[3]}, [r12], r1 + vpop {d8 - d15} + ldmfd sp!, {r10-r12, pc} + + + +@/** +@******************************************************************************* +@* +@* @brief +@* Performs filtering of a chroma block horizontal edge when the +@* boundary strength is set to 4 in high profile +@* +@* @par Description: +@* This operation is described in Sec. 8.7.2.4 under the title +@* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264. +@* +@* @param[in] r0 - pu1_src +@* Pointer to the src sample q0 +@* +@* @param[in] r1 - src_strd +@* Source stride +@* +@* @param[in] r2 - alpha_cb +@* Alpha Value for the boundary in U +@* +@* @param[in] r3 - beta_cb +@* Beta Value for the boundary in U +@* +@* @param[in] sp(0) - alpha_cr +@* Alpha Value for the boundary in V +@* +@* @param[in] sp(4) - beta_cr +@* Beta Value for the boundary in V +@* +@* @returns +@* None +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ + + .global ih264_deblk_chroma_horz_bs4_a9 + +ih264_deblk_chroma_horz_bs4_a9: + + stmfd sp!, {r4-r6, lr} @ + + ldr r5, [sp, #16] @R5 = alpha_cr + ldr r6, [sp, #20] @R6 = beta_cr + vpush {d8 - d15} + sub r0, r0, r1, lsl #1 @R0 = uc_edgePixel pointing to p1 of chroma + vld2.8 {d6, d7}, [r0], r1 @D6 = p1u , D7 = p1v + mov r4, r0 @Keeping a backup of the pointer p0 of chroma + vld2.8 {d4, d5}, [r0], r1 @D4 = p0u , D5 = p0v + vdup.8 d20, r2 @D20 contains alpha_cb + vdup.8 d21, r5 @D21 contains alpha_cr + vld2.8 {d0, d1}, [r0], r1 @D0 = q0u , D1 = q0v + vaddl.u8 q4, d6, d0 @ + vaddl.u8 q5, d7, d1 @Q4,Q5 = q0 + p1 + vmov.i8 d31, #2 @ + vld2.8 {d2, d3}, [r0] @D2 = q1u , D3 = q1v + vabd.u8 q13, q3, q2 @Q13 = ABS(p1 - p0) + vmlal.u8 q4, d2, d31 @ + vmlal.u8 q5, d3, d31 @Q5,Q4 = (X2(q1U) + q0U + p1U) + vabd.u8 q11, q2, q0 @Q11 = ABS(p0 - q0) + vabd.u8 q12, q1, q0 @Q12 = ABS(q1 - q0) + vaddl.u8 q7, d4, d2 @ + vaddl.u8 q14, d5, d3 @Q14,Q7 = P0 + Q1 + vdup.8 d16, r3 @D16 contains beta_cb + vdup.8 d17, r6 @D17 contains beta_cr + vmlal.u8 q7, d6, d31 @ + vmlal.u8 q14, d7, d31 @Q14,Q7 = (X2(p1U) + p0U + q1U) + vcge.u8 q9, q11, q10 @Q9 = ( ABS(p0 - q0) >= Alpha ) + vcge.u8 q12, q12, q8 @Q12= ( ABS(q1 - q0) >= Beta ) + vcge.u8 q13, q13, q8 @Q13= ( ABS(p1 - p0) >= Beta ) + vrshrn.u16 d8, q4, #2 @ + vrshrn.u16 d9, q5, #2 @Q4 = (X2(q1U) + q0U + p1U + 2) >> 2 + vorr q9, q9, q12 @Q9 = ( ABS(p0 - q0) >= Alpha ) | ( ABS(q1 - q0) >= Beta ) + vrshrn.u16 d10, q7, #2 @ + vrshrn.u16 d11, q14, #2 @Q5 = (X2(p1U) + p0U + q1U + 2) >> 2 + vorr q9, q9, q13 @Q9 = ( ABS(p0 - q0) >= Alpha ) | ( ABS(q1 - q0) >= Beta ) | ( ABS(p1 - p0) >= Beta ) + vbit q5, q2, q9 @ + vbit q4, q0, q9 @ + vst2.8 {d10, d11}, [r4], r1 @ + vst2.8 {d8, d9}, [r4] @ + vpop {d8 - d15} + ldmfd sp!, {r4-r6, pc} @ + + + +@/** +@******************************************************************************* +@* +@* @brief +@* Performs filtering of a chroma block vertical edge when the +@* boundary strength is set to 4 in high profile +@* +@* @par Description: +@* This operation is described in Sec. 8.7.2.4 under the title +@* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264. +@* +@* @param[in] r0 - pu1_src +@* Pointer to the src sample q0 +@* +@* @param[in] r1 - src_strd +@* Source stride +@* +@* @param[in] r2 - alpha_cb +@* Alpha Value for the boundary in U +@* +@* @param[in] r3 - beta_cb +@* Beta Value for the boundary in U +@* +@* @param[in] sp(0) - alpha_cr +@* Alpha Value for the boundary in V +@* +@* @param[in] sp(4) - beta_cr +@* Beta Value for the boundary in V +@* +@* @returns +@* None +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ + + .global ih264_deblk_chroma_vert_bs4_a9 + +ih264_deblk_chroma_vert_bs4_a9: + + stmfd sp!, {r4, r5, r12, r14} + + sub r0, r0, #4 @point r0 to p1u of row0. + mov r12, r0 @keep a back up of r0 for buffer write + + ldr r4, [sp, #16] @r4 = alpha_cr + ldr r5, [sp, #20] @r5 = beta_cr + add r2, r2, r4, lsl #8 @r2 = (alpha_cr,alpha_cb) + add r3, r3, r5, lsl #8 @r3 = (beta_cr,beta_cb) + vpush {d8 - d15} + vld4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0], r1 + vld4.16 {d0[1], d2[1], d4[1], d6[1]}, [r0], r1 + vld4.16 {d0[2], d2[2], d4[2], d6[2]}, [r0], r1 + vld4.16 {d0[3], d2[3], d4[3], d6[3]}, [r0], r1 + + vld4.16 {d1[0], d3[0], d5[0], d7[0]}, [r0], r1 + vld4.16 {d1[1], d3[1], d5[1], d7[1]}, [r0], r1 + vld4.16 {d1[2], d3[2], d5[2], d7[2]}, [r0], r1 + vld4.16 {d1[3], d3[3], d5[3], d7[3]}, [r0], r1 + + vdup.16 q11, r2 @Q11 = alpha + vdup.16 q12, r3 @Q12 = beta + vmov.i8 d31, #2 + + vabd.u8 q4, q1, q2 @|p0-q0| + vabd.u8 q5, q3, q2 @|q1-q0| + vabd.u8 q6, q0, q1 @|p1-p0| + vaddl.u8 q7, d2, d6 + vaddl.u8 q8, d3, d7 @(p0 + q1) + vclt.u8 q4, q4, q11 @|p0-q0| < alpha ? + vclt.u8 q5, q5, q12 @|q1-q0| < beta ? + vclt.u8 q6, q6, q12 @|p1-p0| < beta ? + vmlal.u8 q7, d0, d31 + vmlal.u8 q8, d1, d31 @2*p1 + (p0 + q1) + vaddl.u8 q9, d0, d4 + vaddl.u8 q10, d1, d5 @(p1 + q0) + vand.u8 q4, q4, q5 @|p0-q0| < alpha && |q1-q0| < beta + vmlal.u8 q9, d6, d31 + vmlal.u8 q10, d7, d31 @2*q1 + (p1 + q0) + + vrshrn.i16 d14, q7, #2 + vrshrn.i16 d15, q8, #2 @(2*p1 + (p0 + q1) + 2) >> 2 + vand.u8 q4, q4, q6 @|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta + vrshrn.i16 d18, q9, #2 + vrshrn.i16 d19, q10, #2 @(2*q1 + (p1 + q0) + 2) >> 2 + + vbit q1, q7, q4 + vbit q2, q9, q4 + + vst4.16 {d0[0], d2[0], d4[0], d6[0]}, [r12], r1 + vst4.16 {d0[1], d2[1], d4[1], d6[1]}, [r12], r1 + vst4.16 {d0[2], d2[2], d4[2], d6[2]}, [r12], r1 + vst4.16 {d0[3], d2[3], d4[3], d6[3]}, [r12], r1 + + vst4.16 {d1[0], d3[0], d5[0], d7[0]}, [r12], r1 + vst4.16 {d1[1], d3[1], d5[1], d7[1]}, [r12], r1 + vst4.16 {d1[2], d3[2], d5[2], d7[2]}, [r12], r1 + vst4.16 {d1[3], d3[3], d5[3], d7[3]}, [r12], r1 + vpop {d8 - d15} + ldmfd sp!, {r4, r5, r12, pc} + + + +@/** +@******************************************************************************* +@* +@* @brief +@* Performs filtering of a chroma block horizontal edge for cases where the +@* boundary strength is less than 4 in high profile +@* +@* @par Description: +@* This operation is described in Sec. 8.7.2.4 under the title +@* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264. +@* +@* @param[in] r0 - pu1_src +@* Pointer to the src sample q0 +@* +@* @param[in] r1 - src_strd +@* Source stride +@* +@* @param[in] r2 - alpha_cb +@* Alpha Value for the boundary in U +@* +@* @param[in] r3 - beta_cb +@* Beta Value for the boundary in U +@* +@* @param[in] sp(0) - alpha_cr +@* Alpha Value for the boundary in V +@* +@* @param[in] sp(4) - beta_cr +@* Beta Value for the boundary in V +@* +@* @param[in] sp(8) - u4_bs +@* Packed Boundary strength array +@* +@* @param[in] sp(12) - pu1_cliptab_cb +@* tc0_table for U +@* +@* @param[in] sp(16) - pu1_cliptab_cr +@* tc0_table for V +@* +@* @returns +@* None +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ + + .global ih264_deblk_chroma_horz_bslt4_a9 + +ih264_deblk_chroma_horz_bslt4_a9: + + stmfd sp!, {r4-r9, lr} @ + + ldrd r4, r5, [sp, #28] @R4 = alpha_cr , R5 = beta_cr + ldr r7, [sp, #36] @R7 = u4_bs + ldrd r8, r9, [sp, #40] @R8 = pu1_cliptab_cb , R9 = pu1_cliptab_cr + sub r0, r0, r1, lsl #1 @R0 = uc_edgePixelU pointing to p1 of chroma U + vpush {d8 - d15} + rev r7, r7 @ + vmov.32 d12[0], r7 @D12[0] = ui_Bs + + vld1.32 d16[0], [r8] @D16[0] contains cliptab_cb + vld1.32 d17[0], [r9] @D17[0] contains cliptab_cr + vld2.8 {d6, d7}, [r0], r1 @Q3=p1 + vtbl.8 d14, {d16}, d12 @Retreiving cliptab values for U + vtbl.8 d28, {d17}, d12 @Retrieving cliptab values for V + vmovl.u8 q6, d12 @Q6 = uc_Bs in each 16 bit scalar + mov r6, r0 @Keeping a backup of the pointer to chroma U P0 + vld2.8 {d4, d5}, [r0], r1 @Q2=p0 + vmov.i8 d30, #1 @ + vdup.8 d20, r2 @D20 contains alpha_cb + vdup.8 d21, r4 @D21 contains alpha_cr + vld2.8 {d0, d1}, [r0], r1 @Q0=q0 + vmovl.u8 q7, d14 @ + vmovl.u8 q14, d28 @ + vmov.i16 d15, d28 @D14 has cliptab values for U, D15 for V + vld2.8 {d2, d3}, [r0] @Q1=q1 + vsubl.u8 q5, d1, d5 @ + vsubl.u8 q4, d0, d4 @Q5,Q4 = (q0 - p0) + vabd.u8 q13, q3, q2 @Q13 = ABS(p1 - p0) + vshl.i16 q5, q5, #2 @Q5 = (q0 - p0)<<2 + vabd.u8 q11, q2, q0 @Q11 = ABS(p0 - q0) + vshl.i16 q4, q4, #2 @Q4 = (q0 - p0)<<2 + vsli.16 q7, q7, #8 @ + vabd.u8 q12, q1, q0 @Q12 = ABS(q1 - q0) + vcge.u8 q9, q11, q10 @Q9 = ( ABS(p0 - q0) >= Alpha ) + vsubl.u8 q10, d6, d2 @Q10 = (p1 - q1)L + vsubl.u8 q3, d7, d3 @Q3 = (p1 - q1)H + vdup.8 d16, r3 @Q8 contains beta_cb + vdup.8 d17, r5 @Q8 contains beta_cr + vadd.i16 q4, q4, q10 @ + vadd.i16 q5, q5, q3 @Q5,Q4 = [ (q0 - p0)<<2 ] + (p1 - q1) + vcge.u8 q12, q12, q8 @Q12= ( ABS(q1 - q0) >= Beta ) + vcgt.s16 d12, d12, #0 @Q6 = (us_Bs > 0) + vqrshrn.s16 d8, q4, #3 @ + vqrshrn.s16 d9, q5, #3 @Q4 = i_macro = (((q0 - p0)<<2) + (p1 - q1) + 4)>>3 + vadd.i8 d14, d14, d30 @D14 = C = C0+1 for U + vcge.u8 q13, q13, q8 @Q13= ( ABS(p1 - p0) >= Beta ) + vorr q9, q9, q12 @Q9 = ( ABS(p0 - q0) >= Alpha ) | ( ABS(q1 - q0) >= Beta ) + vabs.s8 q3, q4 @Q4 = ABS (i_macro) + vadd.i8 d15, d15, d30 @D15 = C = C0+1 for V + vmov.i8 d13, d12 @ + vorr q9, q9, q13 @Q9 = ( ABS(p0 - q0) >= Alpha ) | ( ABS(q1 - q0) >= Beta ) | ( ABS(p1 - p0) >= Beta ) + vmin.u8 q7, q3, q7 @Q7 = delta = (ABS(i_macro) > C) ? C : ABS(i_macro) + vbic q6, q6, q9 @final condition + vcge.s8 q4, q4, #0 @Q4 = (i_macro >= 0) + vand q7, q7, q6 @Making delta zero in places where values shouldn be filterd + vqadd.u8 q8, q2, q7 @Q8 = p0 + delta + vqsub.u8 q2, q2, q7 @Q2 = p0 - delta + vqadd.u8 q9, q0, q7 @Q9 = q0 + delta + vqsub.u8 q0, q0, q7 @Q0 = q0 - delta + vbif q8, q2, q4 @Q8 = (i_macro >= 0 ) ? (p0+delta) : (p0-delta) + vbif q0, q9, q4 @Q0 = (i_macro >= 0 ) ? (q0-delta) : (q0+delta) + vst2.8 {d16, d17}, [r6], r1 @ + vst2.8 {d0, d1}, [r6] @ + vpop {d8 - d15} + ldmfd sp!, {r4-r9, pc} @ + + + +@/** +@******************************************************************************* +@* +@* @brief +@* Performs filtering of a chroma block vertical edge for cases where the +@* boundary strength is less than 4 in high profile +@* +@* @par Description: +@* This operation is described in Sec. 8.7.2.4 under the title +@* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264. +@* +@* @param[in] r0 - pu1_src +@* Pointer to the src sample q0 +@* +@* @param[in] r1 - src_strd +@* Source stride +@* +@* @param[in] r2 - alpha_cb +@* Alpha Value for the boundary in U +@* +@* @param[in] r3 - beta_cb +@* Beta Value for the boundary in U +@* +@* @param[in] sp(0) - alpha_cr +@* Alpha Value for the boundary in V +@* +@* @param[in] sp(4) - beta_cr +@* Beta Value for the boundary in V +@* +@* @param[in] sp(8) - u4_bs +@* Packed Boundary strength array +@* +@* @param[in] sp(12) - pu1_cliptab_cb +@* tc0_table for U +@* +@* @param[in] sp(16) - pu1_cliptab_cr +@* tc0_table for V +@* +@* @returns +@* None +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ + + .global ih264_deblk_chroma_vert_bslt4_a9 + +ih264_deblk_chroma_vert_bslt4_a9: + + stmfd sp!, {r4-r7, r10-r12, r14} + + sub r0, r0, #4 @point r0 to p1u of row0. + ldrd r4, r5, [sp, #32] @R4 = alpha_cr , R5 = beta_cr + add r2, r2, r4, lsl #8 + add r3, r3, r5, lsl #8 + ldr r6, [sp, #40] @R6 = u4_bs + ldrd r10, r11, [sp, #44] @R10 = pu1_cliptab_cb , R11 = pu1_cliptab_cr + vpush {d8 - d15} + mov r12, r0 @keep a back up of R0 for buffer write + + vld4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0], r1 + vld4.16 {d0[1], d2[1], d4[1], d6[1]}, [r0], r1 + vld4.16 {d0[2], d2[2], d4[2], d6[2]}, [r0], r1 + vld4.16 {d0[3], d2[3], d4[3], d6[3]}, [r0], r1 + + vld4.16 {d1[0], d3[0], d5[0], d7[0]}, [r0], r1 + vld4.16 {d1[1], d3[1], d5[1], d7[1]}, [r0], r1 + vld4.16 {d1[2], d3[2], d5[2], d7[2]}, [r0], r1 + vld4.16 {d1[3], d3[3], d5[3], d7[3]}, [r0], r1 + + + vdup.16 q11, r2 @Q11 = alpha + vabd.u8 q4, q1, q2 @|p0-q0| + vdup.16 q12, r3 @Q12 = beta + vabd.u8 q5, q3, q2 @|q1-q0| + vabd.u8 q6, q0, q1 @|p1-p0| + vclt.u8 q4, q4, q11 @|p0-q0| < alpha ? + vsubl.u8 q7, d0, d6 + vclt.u8 q5, q5, q12 @|q1-q0| < beta ? + vsubl.u8 q8, d1, d7 @(p1 - q1) + vclt.u8 q6, q6, q12 @|p1-p0| < beta ? + vsubl.u8 q9, d4, d2 + vand.u8 q4, q4, q5 @|p0-q0| < alpha && |q1-q0| < beta + vsubl.u8 q10, d5, d3 @(q0 - p0) + vmov.u16 q14, #4 + vld1.32 {d24[0]}, [r10] @Load ClipTable for U + vld1.32 {d25[0]}, [r11] @Load ClipTable for V + rev r6, r6 @Blocking strengths + vand.u8 q4, q4, q6 @|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta + + vmov.32 d10[0], r6 + + vmla.s16 q7, q9, q14 + vmla.s16 q8, q10, q14 @4*(q0 - p0) + (p1 - q1) + + vmovl.u8 q5, d10 + vsli.u16 d10, d10, #8 + vtbl.8 d12, {d24}, d10 @tC0 for U + vtbl.8 d13, {d25}, d10 @tC0 for V + vzip.8 d12, d13 + vmovl.u16 q5, d10 + vsli.u32 q5, q5, #16 + vmov.u8 q12, #1 + vadd.u8 q6, q6, q12 @tC0 + 1 + vcge.u8 q5, q5, q12 @u4_bS > 0 ? + vand.u8 q4, q4, q5 @|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta && u4_bs != 0 + + @ Q0 - Q3(inputs), + @ Q4 (|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta && u4_bs != 0), + @ Q6 (tC) + + vrshr.s16 q7, q7, #3 + vrshr.s16 q8, q8, #3 @(((q0 - p0) << 2) + (p1 - q1) + 4) >> 3) + + vcgt.s16 q9, q7, #0 + vcgt.s16 q10, q8, #0 + vmovn.i16 d18, q9 + vmovn.i16 d19, q10 @Q9 = sign(delta) + vabs.s16 q7, q7 + vabs.s16 q8, q8 + vmovn.u16 d14, q7 + vmovn.u16 d15, q8 + vmin.u8 q7, q7, q6 @Q7 = |delta| + + vqadd.u8 q10, q1, q7 @p0+|delta| + vqadd.u8 q11, q2, q7 @q0+|delta| + vqsub.u8 q12, q1, q7 @p0-|delta| + vqsub.u8 q13, q2, q7 @q0-|delta| + + vbit q12, q10, q9 @p0 + delta + vbit q11, q13, q9 @q0 - delta + + vbit q1, q12, q4 + vbit q2, q11, q4 + + vst4.16 {d0[0], d2[0], d4[0], d6[0]}, [r12], r1 + vst4.16 {d0[1], d2[1], d4[1], d6[1]}, [r12], r1 + vst4.16 {d0[2], d2[2], d4[2], d6[2]}, [r12], r1 + vst4.16 {d0[3], d2[3], d4[3], d6[3]}, [r12], r1 + + vst4.16 {d1[0], d3[0], d5[0], d7[0]}, [r12], r1 + vst4.16 {d1[1], d3[1], d5[1], d7[1]}, [r12], r1 + vst4.16 {d1[2], d3[2], d5[2], d7[2]}, [r12], r1 + vst4.16 {d1[3], d3[3], d5[3], d7[3]}, [r12], r1 + vpop {d8 - d15} + ldmfd sp!, {r4-r7, r10-r12, pc} + + + +@/** +@******************************************************************************* +@* +@* @brief +@* Performs filtering of a chroma block vertical edge when the +@* boundary strength is set to 4 on calling twice in high profile +@* +@* @par Description: +@* This operation is described in Sec. 8.7.2.4 under the title +@* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264. +@* +@* @param[in] r0 - pu1_src +@* Pointer to the src sample q0 +@* +@* @param[in] r1 - src_strd +@* Source stride +@* +@* @param[in] r2 - alpha_cb +@* Alpha Value for the boundary in U +@* +@* @param[in] r3 - beta_cb +@* Beta Value for the boundary in U +@* +@* @param[in] sp(0) - alpha_cr +@* Alpha Value for the boundary in V +@* +@* @param[in] sp(4) - beta_cr +@* Beta Value for the boundary in V +@* +@* @returns +@* None +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ + + .global ih264_deblk_chroma_vert_bs4_mbaff_a9 + +ih264_deblk_chroma_vert_bs4_mbaff_a9: + + stmfd sp!, {r4, r5, r12, r14} + + sub r0, r0, #4 @point r0 to p1u of row0. + mov r12, r0 @keep a back up of r0 for buffer write + ldrd r4, r5, [sp, #16] @R4 = alpha_cr , R5 = beta_cr + add r2, r2, r4, lsl #8 + add r3, r3, r5, lsl #8 + vpush {d8 - d15} + vld4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0], r1 + vld4.16 {d0[1], d1[1], d2[1], d3[1]}, [r0], r1 + vld4.16 {d0[2], d1[2], d2[2], d3[2]}, [r0], r1 + vld4.16 {d0[3], d1[3], d2[3], d3[3]}, [r0], r1 + + vdup.16 d11, r2 @D11 = alpha + vdup.16 d12, r3 @D12 = beta + vmov.i8 d31, #2 + + vabd.u8 d4, d1, d2 @|p0-q0| + vabd.u8 d5, d3, d2 @|q1-q0| + vabd.u8 d6, d0, d1 @|p1-p0| + vaddl.u8 q14, d1, d3 @(p0 + q1) + vclt.u8 d4, d4, d11 @|p0-q0| < alpha ? + vclt.u8 d5, d5, d12 @|q1-q0| < beta ? + vclt.u8 d6, d6, d12 @|p1-p0| < beta ? + vmlal.u8 q14, d0, d31 @2*p1 + (p0 + q1) + vaddl.u8 q13, d0, d2 @(p1 + q0) + vand.u8 d4, d4, d5 @|p0-q0| < alpha && |q1-q0| < beta + vmlal.u8 q13, d3, d31 @2*q1 + (p1 + q0) + + vrshrn.i16 d7, q14, #2 @(2*p1 + (p0 + q1) + 2) >> 2 + vand.u8 d4, d4, d6 @|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta + vrshrn.i16 d9, q13, #2 @(2*q1 + (p1 + q0) + 2) >> 2 + + vbit d1, d7, d4 + vbit d2, d9, d4 + + vst4.16 {d0[0], d1[0], d2[0], d3[0]}, [r12], r1 + vst4.16 {d0[1], d1[1], d2[1], d3[1]}, [r12], r1 + vst4.16 {d0[2], d1[2], d2[2], d3[2]}, [r12], r1 + vst4.16 {d0[3], d1[3], d2[3], d3[3]}, [r12], r1 + vpop {d8 - d15} + ldmfd sp!, {r4, r5, r12, pc} + + + +@/** +@******************************************************************************* +@* +@* @brief +@* Performs filtering of a chroma block vertical edge for cases where the +@* boundary strength is less than 4 on calling twice in high profile +@* +@* @par Description: +@* This operation is described in Sec. 8.7.2.4 under the title +@* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264. +@* +@* @param[in] r0 - pu1_src +@* Pointer to the src sample q0 +@* +@* @param[in] r1 - src_strd +@* Source stride +@* +@* @param[in] r2 - alpha_cb +@* Alpha Value for the boundary in U +@* +@* @param[in] r3 - beta_cb +@* Beta Value for the boundary in U +@* +@* @param[in] sp(0) - alpha_cr +@* Alpha Value for the boundary in V +@* +@* @param[in] sp(4) - beta_cr +@* Beta Value for the boundary in V +@* +@* @param[in] sp(8) - u4_bs +@* Packed Boundary strength array +@* +@* @param[in] sp(12) - pu1_cliptab_cb +@* tc0_table for U +@* +@* @param[in] sp(16) - pu1_cliptab_cr +@* tc0_table for V +@* +@* @returns +@* None +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ + + .global ih264_deblk_chroma_vert_bslt4_mbaff_a9 + +ih264_deblk_chroma_vert_bslt4_mbaff_a9: + + stmfd sp!, {r4-r6, r10-r12, r14} + + sub r0, r0, #4 @point r0 to p1u of row0. + mov r12, r0 @keep a back up of r0 for buffer write + + ldrd r4, r5, [sp, #28] @R4 = alpha_cr , R5 = beta_cr + add r2, r2, r4, lsl #8 + add r3, r3, r5, lsl #8 + ldr r6, [sp, #36] @R6 = u4_bs + ldrd r10, r11, [sp, #40] @R10 = pu1_cliptab_cb , R11 = pu1_cliptab_cr + vpush {d8 - d15} + vld4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0], r1 + vld4.16 {d0[1], d1[1], d2[1], d3[1]}, [r0], r1 + vld4.16 {d0[2], d1[2], d2[2], d3[2]}, [r0], r1 + vld4.16 {d0[3], d1[3], d2[3], d3[3]}, [r0], r1 + + vdup.16 d11, r2 @D11 = alpha + vabd.u8 d4, d1, d2 @|p0-q0| + vdup.16 d12, r3 @D12 = beta + vabd.u8 d5, d3, d2 @|q1-q0| + vabd.u8 d6, d0, d1 @|p1-p0| + vclt.u8 d4, d4, d11 @|p0-q0| < alpha ? + vclt.u8 d5, d5, d12 @|q1-q0| < beta ? + vsubl.u8 q14, d0, d3 @(p1 - q1) + vclt.u8 d6, d6, d12 @|p1-p0| < beta ? + vand.u8 d4, d4, d5 @|p0-q0| < alpha && |q1-q0| < beta + vsubl.u8 q12, d2, d1 @(q0 - p0) + vmov.u16 q10, #4 + + vld1.32 {d31[1]}, [r10] @Load ClipTable for U + vld1.32 {d31[0]}, [r11] @Load ClipTable for V + rev r6, r6 @Blocking strengths + vand.u8 d4, d4, d6 @|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta + vmov.32 d22[0], r6 + vmla.s16 q14, q12, q10 @4*(q0 - p0) + (p1 - q1) + vmovl.u8 q11, d22 + vsli.u16 d22, d22, #8 + vmov.u16 d13, #4 + vadd.u8 d22, d22, d13 + vtbl.8 d6, {d31}, d22 @tC0 + vmov.u8 d12, #1 + vsub.u8 d22, d22, d13 + vadd.u8 d6, d6, d12 @tC0 + 1 + vcge.u8 d5, d22, d12 @u4_bS > 0 ? + vand.u8 d4, d4, d5 @|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta && u4_bs != 0 + + @ D0 - D3(inputs), + @ D4 (|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta && u4_bs != 0), + @ D6 (tC) + + vrshr.s16 q14, q14, #3 @(((q0 - p0) << 2) + (p1 - q1) + 4) >> 3) + + vcgt.s16 q13, q14, #0 + vmovn.i16 d9, q13 @D9 = sign(delta) + vabs.s16 q14, q14 + vmovn.u16 d7, q14 + vmin.u8 d7, d7, d6 @D7 = |delta| + + vqadd.u8 d10, d1, d7 @p0+|delta| + vqadd.u8 d11, d2, d7 @q0+|delta| + vqsub.u8 d12, d1, d7 @p0-|delta| + vqsub.u8 d13, d2, d7 @q0-|delta| + + vbit d12, d10, d9 @p0 + delta + vbit d11, d13, d9 @q0 - delta + + vbit d1, d12, d4 + vbit d2, d11, d4 + + vst4.16 {d0[0], d1[0], d2[0], d3[0]}, [r12], r1 + vst4.16 {d0[1], d1[1], d2[1], d3[1]}, [r12], r1 + vst4.16 {d0[2], d1[2], d2[2], d3[2]}, [r12], r1 + vst4.16 {d0[3], d1[3], d2[3], d3[3]}, [r12], r1 + vpop {d8 - d15} + ldmfd sp!, {r4-r6, r10-r12, pc} + + + diff --git a/common/arm/ih264_deblk_luma_a9.s b/common/arm/ih264_deblk_luma_a9.s new file mode 100755 index 0000000..3e6a4d9 --- /dev/null +++ b/common/arm/ih264_deblk_luma_a9.s @@ -0,0 +1,1092 @@ +@/****************************************************************************** +@ * +@ * Copyright (C) 2015 The Android Open Source Project +@ * +@ * Licensed under the Apache License, Version 2.0 (the "License"); +@ * you may not use this file except in compliance with the License. +@ * You may obtain a copy of the License at: +@ * +@ * http://www.apache.org/licenses/LICENSE-2.0 +@ * +@ * Unless required by applicable law or agreed to in writing, software +@ * distributed under the License is distributed on an "AS IS" BASIS, +@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ * See the License for the specific language governing permissions and +@ * limitations under the License. +@ * +@ ***************************************************************************** +@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +@*/ +@/*****************************************************************************/ +@/* */ +@/* File Name : ih264_deblk_luma_a9.s */ +@/* */ +@/* Description : Contains function definitions for deblocking luma */ +@/* edge. Functions are coded in NEON assembly and can */ +@/* be compiled using ARM RVDS. */ +@/* */ +@/* List of Functions : ih264_deblk_luma_vert_bs4_a9() */ +@/* ih264_deblk_luma_vert_bslt4_a9() */ +@/* ih264_deblk_luma_horz_bs4_a9() */ +@/* ih264_deblk_luma_horz_bslt4_a9() */ +@/* ih264_deblk_luma_vert_bs4_mbaff_a9() */ +@/* ih264_deblk_luma_vert_bslt4_mbaff_a9() */ +@/* */ +@/* Issues / Problems : None */ +@/* */ +@/* Revision History : */ +@/* */ +@/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +@/* 28 11 2013 Ittiam Draft */ +@/* 05 01 2015 Kaushik Added double-call functions for */ +@/* Senthoor vertical deblocking. */ +@/* */ +@/*****************************************************************************/ + + +.text +.p2align 2 + +@/** +@******************************************************************************* +@* +@* @brief +@* Performs filtering of a luma block horizontal edge for cases where the +@* boundary strength is less than 4 +@* +@* @par Description: +@* This operation is described in Sec. 8.7.2.4 under the title +@* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264. +@* +@* @param[in] r0 - pu1_src +@* Pointer to the src sample q0 +@* +@* @param[in] r1 - src_strd +@* Source stride +@* +@* @param[in] r2 - alpha +@* Alpha Value for the boundary +@* +@* @param[in] r3 - beta +@* Beta Value for the boundary +@* +@* @param[in] sp(0) - u4_bs +@* Packed Boundary strength array +@* +@* @param[in] sp(4) - pu1_cliptab +@* tc0_table +@* +@* @returns +@* None +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ + + .global ih264_deblk_luma_horz_bslt4_a9 + +ih264_deblk_luma_horz_bslt4_a9: + + stmfd sp!, {r4-r7, lr} + + ldrd r4, r5, [sp, #0x14] @r4 = ui_Bs , r5 = *puc_ClpTab + vpush {d8 - d15} + sub r0, r0, r1, lsl #1 @R1 = uc_Horizonpad + sub r0, r0, r1 @r0 pointer to p2 + rev r4, r4 @ + vld1.8 {q5}, [r0], r1 @p2 values are loaded into q5 + vmov.32 d12[0], r4 @d12[0] = ui_Bs + mov r6, r0 @keeping backup of pointer to p1 + vld1.8 {q4}, [r0], r1 @p1 values are loaded into q4 + mov r7, r0 @keeping backup of pointer to p0 + vld1.8 {q3}, [r0], r1 @p0 values are loaded into q3 + vmovl.u8 q6, d12 @q6 = uc_Bs in each 16 bt scalar + vld1.8 {q0}, [r0], r1 @q0 values are loaded into q0 + vabd.u8 q13, q4, q3 @Q13 = ABS(p1 - p0) + vld1.8 {q1}, [r0], r1 @q1 values are loaded into q1 + vabd.u8 q11, q3, q0 @Q11 = ABS(p0 - q0) + vld1.32 d16[0], [r5] @D16[0] contains cliptab + vabd.u8 q12, q1, q0 @Q12 = ABS(q1 - q0) + vld1.8 {q2}, [r0], r1 @q2 values are loaded into q2 + vtbl.8 d14, {d16}, d12 @ + vdup.8 q10, r2 @Q10 contains alpha + vdup.8 q8, r3 @Q8 contains beta + vmovl.u16 q6, d12 @ + vmovl.u16 q7, d14 @ + vabd.u8 q14, q5, q3 @Q14 = Ap = ABS(p2 - p0) + vabd.u8 q15, q2, q0 @Q15 = Aq = ABS(q2 - q0) + vcgt.s32 q6, q6, #0 @Q6 = (us_Bs > 0) + vsli.32 q7, q7, #8 @ + vcge.u8 q9, q11, q10 @Q9 = ( ABS(p0 - q0) >= Alpha ) + vcge.u8 q12, q12, q8 @Q12=( ABS(q1 - q0) >= Beta ) + vcge.u8 q13, q13, q8 @Q13=( ABS(p1 - p0) >= Beta ) + vcgt.u8 q10, q8, q14 @Q10=(Ap= Alpha ) | ( ABS(q1 - q0) >= Beta ) + vsubl.u8 q15, d1, d7 @ + vsubl.u8 q12, d0, d6 @Q15,Q12 = (q0 - p0) + vorr q9, q9, q13 @Q9 = ( ABS(p0 - q0) >= Alpha ) | ( ABS(q1 - q0) >= Beta ) | ( ABS(p1 - p0) >= Beta ) + vsubl.u8 q14, d8, d2 @Q14 = (p1 - q1)L + vshl.i16 q13, q15, #2 @Q13 = (q0 - p0)<<2 + vshl.i16 q12, q12, #2 @Q12 = (q0 - p0)<<2 + vsubl.u8 q15, d9, d3 @Q15 = (p1 - q1)H + vbic q6, q6, q9 @final condition + vadd.i16 q12, q12, q14 @ + vadd.i16 q13, q13, q15 @Q13,Q12 = [ (q0 - p0)<<2 ] + (p1 - q1) + vsub.i8 q9, q7, q10 @Q9 = C0 + (Ap < Beta) + vrhadd.u8 q8, q3, q0 @Q8 = ((p0+q0+1) >> 1) + vqrshrn.s16 d24, q12, #3 @ + vqrshrn.s16 d25, q13, #3 @Q12 = i_macro = (((q0 - p0)<<2) + (p1 - q1) + 4)>>3 + vsub.i8 q9, q9, q11 @Q9 = C0 + (Ap < Beta) + (Aq < Beta) + vand.i8 q10, q10, q6 @ + vand.i8 q11, q11, q6 @ + vabs.s8 q13, q12 @Q13 = ABS (i_macro) + vaddl.u8 q14, d17, d11 @ + vaddl.u8 q5, d16, d10 @Q14,Q5 = p2 + (p0+q0+1)>>1 + vaddl.u8 q15, d17, d5 @ + vmin.u8 q9, q13, q9 @Q9 = delta = (ABS(i_macro) > C) ? C : ABS(i_macro) + vshll.u8 q13, d9, #1 @ + vaddl.u8 q2, d16, d4 @Q15,Q2 = q2 + (p0+q0+1)>>1 + vshll.u8 q8, d8, #1 @Q13,Q8 = (p1<<1) + vand q9, q9, q6 @Making delta zero in places where values shouldn be filterd + vsub.i16 q14, q14, q13 @Q14,Q5 = [p2 + (p0+q0+1)>>1] - (p1<<1) + vsub.i16 q5, q5, q8 @ + vshll.u8 q8, d2, #1 @ + vshll.u8 q13, d3, #1 @Q13,Q8 = (q1<<1) + vqshrn.s16 d29, q14, #1 @ + vqshrn.s16 d28, q5, #1 @Q14 = i_macro_p1 + vsub.i16 q2, q2, q8 @ + vsub.i16 q15, q15, q13 @Q15,Q2 = [q2 + (p0+q0+1)>>1] - (q1<<1) + vneg.s8 q13, q7 @Q13 = -C0 + vmin.s8 q14, q14, q7 @Q14 = min(C0,i_macro_p1) + vcge.s8 q12, q12, #0 @Q12 = (i_macro >= 0) + vqshrn.s16 d31, q15, #1 @ + vqshrn.s16 d30, q2, #1 @Q15 = i_macro_q1 + vmax.s8 q14, q14, q13 @Q14 = max( - C0 , min(C0, i_macro_p1) ) + vqadd.u8 q8, q3, q9 @Q8 = p0 + delta + vqsub.u8 q3, q3, q9 @Q3 = p0 - delta + vmin.s8 q15, q15, q7 @Q15 = min(C0,i_macro_q1) + vand.i8 q14, q10, q14 @condition check Ap= 0 ) ? (p0+delta) : (p0-delta) + vbif q0, q7, q12 @Q0 = (i_macro >= 0 ) ? (q0-delta) : (q0+delta) + vadd.i8 q14, q14, q4 @ + vand.i8 q15, q11, q15 @condition check Aq= Alpha + vcge.u8 q7, q7, q1 @ABS(q1 - q0) >= Beta + vcge.u8 q8, q8, q1 @ABS(p1 - p0) >= Beta + vmov.i8 q10, #2 + vorr q9, q9, q7 @ABS(p0 - q0) >= Alpha || ABS(q1 - q0) >= Beta + vld1.8 {d14, d15}, [r0], r1 @load q2 to Q7, q0 = q0 + src_strd + vorr q9, q9, q8 @ABS(p0 - q0) >= Alpha || ABS(q1 - q0) >= Beta || ABS(p1 - p0) >= Beta + vsra.u8 q10, q0, #2 @((Alpha >> 2) + 2) + vabd.u8 q11, q7, q2 @Aq = ABS(q2 - q0) + vaddl.u8 q12, d4, d6 @p0+q0 L + vaddl.u8 q13, d5, d7 @p0+q0 H + vclt.u8 q11, q11, q1 @Aq < Beta + vclt.u8 q10, q6, q10 @(ABS(p0 - q0) <((Alpha >>2) + 2)) + + @ Deblock Filtering q0', q1', q2' + vaddw.u8 q14, q12, d8 @p0+q0+q1 L + vaddw.u8 q15, q13, d9 @p0+q0+q1 H + vand q11, q11, q10 @(Aq < Beta && ABS(p0 - q0) <((Alpha >>2) + 2)) + @ q0' if (Aq < Beta && ABS(p0 - q0) <((Alpha >>2) + 2)) TRUE + vadd.i16 q8, q14, q14 @2*(p0+q0+q1)L + vadd.i16 q0, q15, q15 @2*(p0+q0+q1)H + vaddw.u8 q8, q8, d14 @2*(p0+q0+q1)+q2 L + vaddw.u8 q0, q0, d15 @2*(p0+q0+q1)+q2 H + vaddw.u8 q8, q8, d10 @2*(p0+q0+q1)+q2 +p1 L + vaddw.u8 q0, q0, d11 @2*(p0+q0+q1)+q2 +p1 H + vrshrn.u16 d12, q8, #3 @(2*(p0+q0+q1)+q2 +p1 +4)>> 3 L [q0'] + vrshrn.u16 d13, q0, #3 @(2*(p0+q0+q1)+q2 +p1 +4)>> 3 H [q0'] + @ q0" if (Aq < Beta && ABS(p0 - q0) <((Alpha >>2) + 2)) FALSE + vaddl.u8 q8, d8, d8 @2*q1 L + vaddl.u8 q0, d9, d9 @2*q1 H + vaddw.u8 q8, q8, d4 @2*q1+q0 L + vaddw.u8 q0, q0, d5 @2*q1+q0 H + vaddw.u8 q8, q8, d10 @2*q1+q0+p1 L + vaddw.u8 q0, q0, d11 @2*q1+q0+p1 H + vrshrn.u16 d16, q8, #2 @(2*q1+q0+p1+2)>>2 L [q0"] + vrshrn.u16 d17, q0, #2 @(2*q1+q0+p1+2)>>2 H [q0"] + @ q1' + vaddw.u8 q14, q14, d14 @p0+q0+q1+q2 L + vaddw.u8 q15, q15, d15 @p0+q0+q1+q2 H + vld1.8 {q0}, [r0], r1 @load q3 to Q0, q0 = q0 + src_strd + vbit q8, q6, q11 @choosing between q0' and q0" depending on condn + sub r0, r0, r1, lsl #2 @pointer to q0 + vbic q11, q11, q9 @((ABS(p0 - q0) >= Alpha || ABS(q1 - q0) >= Beta || ABS(p1 - p0) >= Beta)) + @ && (Aq < Beta && ABS(p0 - q0) <((Alpha >>2) + 2)) + vrshrn.u16 d12, q14, #2 @(p0+q0+q1+q2+2)>>2 L [q1'] + vrshrn.u16 d13, q15, #2 @(p0+q0+q1+q2+2)>>2 H [q1'] + vbif q2, q8, q9 @choose q0 or filtered q0 + @ q2' + vaddl.u8 q8, d14, d0 @q2+q3,L + vaddl.u8 q0, d15, d1 @q2+q3,H + vadd.i16 q14, q14, q8 @p0+q0+q1+2*q2+q3 L + vst1.8 {d4, d5}, [r0], r1 @store q0 + vadd.i16 q15, q15, q0 @p0+q0+q1+2*q2+q3 H + vadd.i16 q14, q14, q8 @p0+q0+q1+3*q2+2*q3 L + vadd.i16 q15, q15, q0 @p0+q0+q1+3*q2+2*q3 H + vrshrn.u16 d0, q14, #3 @(p0+q0+q1+3*q2+2*q3+4)>>3 L [q2'] + vrshrn.u16 d1, q15, #3 @(p0+q0+q1+3*q2+2*q3+4)>>3 H [q2'] + vld1.8 {d30, d31}, [r3] @load p2 to Q15 + vbif q6, q4, q11 @choose q1 or filtered value of q1 + + vabd.u8 q8, q15, q3 @Ap,ABS(p2 - p0) + vaddw.u8 q12, q12, d10 @p0+q0+p1 L + vbif q0, q7, q11 @choose q2 or filtered q2 + vaddw.u8 q13, q13, d11 @p0+q0+p1 H + vst1.8 {d12, d13}, [r0], r1 @store q1 + vclt.u8 q8, q8, q1 @Ap < Beta + vadd.i16 q14, q12, q12 @2*(p0+q0+p1) L + vadd.i16 q2, q13, q13 @2*(p0+q0+p1) H + vst1.8 {d0, d1}, [r0], r1 @store q2 + vand q10, q10, q8 @((Ap < Beta) && (ABS(p0 - q0) <((Alpha >>2) + 2))) + vaddw.u8 q14, q14, d30 @2*(p0+q0+p1)+p2 l + vaddw.u8 q2, q2, d31 @2*(p0+q0+p1)+p2 H + vaddw.u8 q14, q14, d8 @2*(p0+q0+p1)+p2+q1 L + vaddw.u8 q2, q2, d9 @2*(p0+q0+p1)+p2+q1 H + vrshrn.u16 d28, q14, #3 @(2*(p0+q0+p1)+p2+q1+4)>>3 L,p0' + vrshrn.u16 d29, q2, #3 @(2*(p0+q0+p1)+p2+q1+4)>>3 H,p0' + vmov.i8 d0, #2 + vmov.i16 d1, #2 + vaddl.u8 q1, d6, d8 @p0+q1 L + vmlal.u8 q1, d10, d0 @2*p1+p0+q1 L + vaddl.u8 q8, d7, d9 @p0+q1 H + vmlal.u8 q8, d11, d0 @2*p1+p0+q1 H + vaddw.u8 q6, q12, d30 @(p0+q0+p1) +p2 L + vld1.8 {d24, d25}, [r2] @load p3,Q12 + vaddw.u8 q2, q13, d31 @(p0+q0+p1) +p2 H + vaddl.u8 q4, d30, d24 @p2+p3 L + vrshrn.u16 d26, q6, #2 @((p0+q0+p1)+p2 +2)>>2,p1' L + vrshrn.u16 d2, q1, #2 @(2*p1+p0+q1+2)>>2,p0"L + vrshrn.u16 d27, q2, #2 @((p0+q0+p1)+p2 +2)>>2,p1' H + vrshrn.u16 d3, q8, #2 @(2*p1+p0+q1+2)>>2,p0" H + vaddl.u8 q8, d31, d25 @p2+p3 H + vmla.u16 q6, q4, d1[0] @(p0+q0+p1)+3*p2+2*p3 L + vmla.u16 q2, q8, d1[0] @(p0+q0+p1)+3*p2+2*p3 H + vbic q8, q10, q9 @((ABS(p0 - q0) >= Alpha || ABS(q1 - q0) >= Beta || ABS(p1 - p0) >= Beta)) + @&& (Ap < Beta && ABS(p0 - q0) <((Alpha >>2) + 2)) + vbit q1, q14, q10 @choosing between po' and p0" + vrshrn.u16 d12, q6, #3 @((p0+q0+p1)+3*p2+2*p3+4)>>3 L p2' + vrshrn.u16 d13, q2, #3 @((p0+q0+p1)+3*p2+2*p3+4)>>3 H p2' + vbif q3, q1, q9 @choosing between p0 and filtered value of p0 + vbit q5, q13, q8 @choosing between p1 and p1' + vbit q15, q6, q8 @choosing between p2 and p2' + vst1.8 {d6, d7}, [r12] @store p0 + vst1.8 {d10, d11}, [r14] @store p1 + vst1.8 {d30, d31}, [r3] @store p2 + vpop {d8 - d15} + ldmfd sp!, {r12, pc} + + + +@/** +@******************************************************************************* +@* +@* @brief +@* Performs filtering of a luma block vertical edge for cases where the +@* boundary strength is less than 4 +@* +@* @par Description: +@* This operation is described in Sec. 8.7.2.4 under the title +@* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264. +@* +@* @param[in] r0 - pu1_src +@* Pointer to the src sample q0 +@* +@* @param[in] r1 - src_strd +@* Source stride +@* +@* @param[in] r2 - alpha +@* Alpha Value for the boundary +@* +@* @param[in] r3 - beta +@* Beta Value for the boundary +@* +@* @param[in] sp(0) - u4_bs +@* Packed Boundary strength array +@* +@* @param[in] sp(4) - pu1_cliptab +@* tc0_table +@* +@* @returns +@* None +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ + + .global ih264_deblk_luma_vert_bslt4_a9 + +ih264_deblk_luma_vert_bslt4_a9: + + stmfd sp!, {r12, lr} + + sub r0, r0, #4 @pointer uc_edgePixel-4 + ldr r12, [sp, #8] @r12 = ui_Bs + ldr r14, [sp, #12] @r14 = *puc_ClpTab + vpush {d8 - d15} + @loading p3:p2:p1:p0:q0:q1:q2:q3 for every row + vld1.8 {d0}, [r0], r1 @row1 + vld1.8 d2, [r0], r1 @row2 + vld1.8 d4, [r0], r1 @row3 + rev r12, r12 @reversing ui_bs + vld1.8 d6, [r0], r1 @row4 + vmov.32 d18[0], r12 @d12[0] = ui_Bs + vld1.32 d16[0], [r14] @D16[0] contains cliptab + vld1.8 d8, [r0], r1 @row5 + vmovl.u8 q9, d18 @q6 = uc_Bs in each 16 bt scalar + vld1.8 d10, [r0], r1 @row6 + vld1.8 d12, [r0], r1 @row7 + vtbl.8 d16, {d16}, d18 @puc_ClipTab[uc_Bs] + vld1.8 d14, [r0], r1 @row8 + vld1.8 d1, [r0], r1 @row9 + vmovl.u16 q8, d16 @ + vld1.8 d3, [r0], r1 @row10 + vld1.8 d5, [r0], r1 @row11 + vld1.8 d7, [r0], r1 @row12 + vsli.32 q8, q8, #8 @ + vld1.8 d9, [r0], r1 @row13 + vld1.8 d11, [r0], r1 @row14 + vld1.8 d13, [r0], r1 @row15 + vsli.32 q8, q8, #16 @Q8 = C0 + vld1.8 d15, [r0], r1 @row16 + + @taking two 8x8 transposes + @2X2 transposes + vtrn.8 d0, d2 @row1 &2 + vtrn.8 d4, d6 @row3&row4 + vtrn.8 d8, d10 @row5&6 + vtrn.8 d12, d14 @row7 & 8 + vtrn.8 d1, d3 @row9 &10 + vtrn.8 d5, d7 @row11 & 12 + vtrn.8 d9, d11 @row13 &14 + vtrn.8 d13, d15 @row15 & 16 + @4x4 transposes + vtrn.16 d2, d6 @row2 & row4 + vtrn.16 d10, d14 @row6 & row8 + vtrn.16 d3, d7 @row10 & 12 + vtrn.16 d11, d15 @row14 & row16 + vtrn.32 d6, d14 @row4 & 8 + vtrn.32 d7, d15 @row 12 & 16 + + @now Q3 ->p0 and Q7->q3 + vtrn.16 d0, d4 @row1 & 3 + vtrn.16 d8, d12 @row 5 & 7 + vtrn.16 d1, d5 @row9 & row11 + vtrn.16 d9, d13 @row13 & row15 + vtrn.32 d0, d8 @row1 & row5 + vtrn.32 d1, d9 @row9 & 13 + + @now Q0->p3 & Q4->q0 + @starting processing as p0 and q0 are now ready + vtrn.32 d2, d10 @row2 &6 + vrhadd.u8 q10, q3, q4 @((p0 + q0 + 1) >> 1) + vtrn.32 d3, d11 @row10&row14 + vmov.i8 d19, #2 + @now Q1->p2 & Q5->q1 + vtrn.32 d4, d12 @row3 & 7 + vabd.u8 q11, q3, q4 @ABS(p0 - q0) + vtrn.32 d5, d13 @row11 & row15 + vaddl.u8 q12, d20, d2 @(p2 + ((p0 + q0 + 1) >> 1) L + @now Q2->p1,Q6->q2 + vaddl.u8 q13, d21, d3 @(p2 + ((p0 + q0 + 1) >> 1) H + vmlsl.u8 q12, d4, d19 @(p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1)) L + vmlsl.u8 q13, d5, d19 @(p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1)) H + vdup.8 q14, r2 @alpha + vcle.u8 q11, q14, q11 @ABS(p0 - q0) >= Alpha(Alpha <=ABS(p0 - q0)) + vdup.i8 q14, r3 @beta + vabd.u8 q15, q5, q4 @ABS(q1 - q0) + vqshrn.s16 d24, q12, #1 @((p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1)) >> 1) L + vqshrn.s16 d25 , q13, #1 @((p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1)) >> 1) H + vcge.u8 q15, q15, q14 @ABS(q1 - q0) >= Beta + vabd.u8 q13, q2, q3 @ABS(p1 - p0) + vmin.s8 q12, q12, q8 @min(deltap1 ,C0) + vorr q11, q11, q15 @ABS(q1 - q0) >= Beta ||ABS(p0 - q0) >= Alpha + vneg.s8 q15, q8 @-C0 + vcge.u8 q13, q13, q14 @ABS(p1 - p0) >= Beta + vmax.s8 q12, q12, q15 @max(deltap1,-C0) + vorr q11, q11, q13 @ABS(p0 - q0) >= Alpha || ABS(q1 - q0) >= Beta || ABS(p1 - p0) >= Beta) + vmovl.u16 q13, d18 @ui_bs + vaddl.u8 q9, d20, d12 @q2 + ((p0 + q0 + 1) >> 1) L + vceq.u32 q13, q13, #0 @ui_bs == 0 + vsubw.u8 q9, q9, d10 @(q2 + ((p0 + q0 + 1) >> 1) - q1) L + vaddl.u8 q10, d21, d13 @q2 + ((p0 + q0 + 1) >> 1) H + vsubw.u8 q9, q9, d10 @(q2 + ((p0 + q0 + 1) >> 1) - 2*q1)L + vsubw.u8 q10, q10, d11 @(q2 + ((p0 + q0 + 1) >> 1) - q1) H + vorr q13, q13, q11 @(ABS(p0 - q0) >= Alpha || ABS(q1 - q0) >= Beta || ABS(p1 - p0) >= Beta)) &&(ui_bs) + vsubw.u8 q10, q10, d11 @(q2 + ((p0 + q0 + 1) >> 1) - 2*q1) H + vqshrn.s16 d18, q9, #1 @((q2 + ((p0 + q0 + 1) >> 1) - (q1 << 1)) >> 1) L + vabd.u8 q11, q1, q3 @Ap = ABS(p2 - p0) + vqshrn.s16 d19, q10, #1 @((q2 + ((p0 + q0 + 1) >> 1) - (q1 << 1)) >> 1) H + vabd.u8 q10, q6, q4 @Aq= ABS(q2 - q0) + vclt.u8 q11, q11, q14 @Ap < Beta + vmin.s8 q9, q9, q8 @min(delatq1,C0) + vclt.u8 q10, q10, q14 @Aq > 3); L + vrshrn.s16 d29, q15, #3 @delta = ((((q0 - p0) << 2) + (p1 - q1) + 4) >> 3) H + vsub.u8 q8, q8, q10 @C0 + (Ap < Beta) + (Aq < Beta) + vbic q10, q10, q13 @final condition for q1 + vabs.s8 q15, q14 @abs(delta) + vand q12, q12, q11 @delatp1 + vand q9, q9, q10 @delta q1 + vmin.u8 q15, q15, q8 @min((abs(delta),C) + vadd.i8 q2, q2, q12 @p1+deltap1 + vadd.i8 q5, q5, q9 @q1+deltaq1 + vbic q15, q15, q13 @abs(delta) of pixels to be changed only + vcge.s8 q14, q14, #0 @sign(delta) + vqsub.u8 q11, q3, q15 @clip(p0-delta) + vtrn.8 d0, d2 @row1 &2 + vqadd.u8 q3, q3, q15 @clip(p0+delta) + vtrn.8 d1, d3 @row9 &10 + vqadd.u8 q12, q4, q15 @clip(q0+delta) + vtrn.8 d12, d14 @row7 & 8 + vqsub.u8 q4, q4, q15 @clip(q0-delta) + vtrn.8 d13, d15 @row15 & 16 + vbif q3, q11, q14 @p0 + vbif q4, q12, q14 @q0 + vtrn.8 d4, d6 @row3&row4 + vtrn.8 d8, d10 @row5&6 + vtrn.8 d5, d7 @row11 & 12 + vtrn.8 d9, d11 @row13 &14 + vtrn.16 d2, d6 @row2 & row4 + vtrn.16 d10, d14 @row6 & row8 + vtrn.16 d3, d7 @row10 & 12 + vtrn.16 d11, d15 @row14 & row16 + vtrn.32 d6, d14 @row4 & 8 + vtrn.32 d7, d15 @row 12 & 16 + @now Q3 ->p0 and Q7->q3 + vtrn.16 d0, d4 @row1 & 3 + vtrn.16 d8, d12 @row 5 & 7 + vtrn.16 d1, d5 @row9 & row11 + vtrn.16 d9, d13 @row13 & row15 + sub r0, r0, r1, lsl#4 @restore pointer + vtrn.32 d0, d8 @row1 & row5 + vtrn.32 d1, d9 @row9 & 13 + vtrn.32 d2, d10 @row2 &6 + vtrn.32 d3, d11 @row10&row14 + vtrn.32 d4, d12 @row3 & 7 + vtrn.32 d5, d13 @row11 & row15 + vst1.8 {d0}, [r0], r1 @row1 + vst1.8 d2, [r0], r1 @row2 + vst1.8 d4, [r0], r1 @row3 + vst1.8 d6, [r0], r1 @row4 + vst1.8 d8, [r0], r1 @row5 + vst1.8 d10, [r0], r1 @row6 + vst1.8 d12, [r0], r1 @row7 + vst1.8 d14, [r0], r1 @row8 + vst1.8 d1, [r0], r1 @row9 + vst1.8 d3, [r0], r1 @row10 + vst1.8 d5, [r0], r1 @row11 + vst1.8 d7, [r0], r1 @row12 + vst1.8 d9, [r0], r1 @row13 + vst1.8 d11, [r0], r1 @row14 + vst1.8 d13, [r0], r1 @row15 + vst1.8 d15, [r0], r1 @row16 + vpop {d8 - d15} + ldmfd sp!, {r12, pc} + + + +@/** +@******************************************************************************* +@* +@* @brief +@* Performs filtering of a luma block vertical edge when the +@* boundary strength is set to 4 +@* +@* @par Description: +@* This operation is described in Sec. 8.7.2.4 under the title +@* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264. +@* +@* @param[in] r0 - pu1_src +@* Pointer to the src sample q0 +@* +@* @param[in] r1 - src_strd +@* Source stride +@* +@* @param[in] r2 - alpha +@* Alpha Value for the boundary +@* +@* @param[in] r3 - beta +@* Beta Value for the boundary +@* +@* @returns +@* None +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ + + .global ih264_deblk_luma_vert_bs4_a9 + +ih264_deblk_luma_vert_bs4_a9: + + stmfd sp!, {r12, lr} + vpush {d8 - d15} + sub r0, r0, #4 @pointer uc_edgePixel-4 + @loading p3:p2:p1:p0:q0:q1:q2:q3 for every row + vld1.8 d0, [r0], r1 @row1 + vld1.8 d2, [r0], r1 @row2 + vld1.8 d4, [r0], r1 @row3 + vld1.8 d6, [r0], r1 @row4 + vld1.8 d8, [r0], r1 @row5 + vld1.8 d10, [r0], r1 @row6 + vld1.8 d12, [r0], r1 @row7 + vld1.8 d14, [r0], r1 @row8 + vld1.8 d1, [r0], r1 @row9 + vld1.8 d3, [r0], r1 @row10 + vld1.8 d5, [r0], r1 @row11 + vld1.8 d7, [r0], r1 @row12 + vld1.8 d9, [r0], r1 @row13 + vld1.8 d11, [r0], r1 @row14 + vld1.8 d13, [r0], r1 @row15 + vld1.8 d15, [r0], r1 @row16 + @taking two 8x8 transposes + @2X2 transposes + vtrn.8 d0, d2 @row1 &2 + vtrn.8 d4, d6 @row3&row4 + vtrn.8 d8, d10 @row5&6 + vtrn.8 d12, d14 @row7 & 8 + vtrn.8 d1, d3 @row9 &10 + vtrn.8 d5, d7 @row11 & 12 + vtrn.8 d9, d11 @row13 &14 + vtrn.8 d13, d15 @row15 & 16 + @4x4 transposes + vtrn.16 d2, d6 @row2 & row4 + vtrn.16 d10, d14 @row6 & row8 + vtrn.16 d3, d7 @row10 & 12 + vtrn.16 d11, d15 @row14 & row16 + vtrn.32 d6, d14 @row4 & 8 + vtrn.32 d7, d15 @row 12 & 16 + @now Q3 ->p0 and Q7->q3 + vtrn.16 d0, d4 @row1 & 3 + vtrn.16 d8, d12 @row 5 & 7 + vtrn.16 d1, d5 @row9 & row11 + vtrn.16 d9, d13 @row13 & row15 + vtrn.32 d0, d8 @row1 & row5 + vtrn.32 d1, d9 @row9 & 13 + @now Q0->p3 & Q4->q0 + @starting processing as p0 and q0 are now ready + @now Q1->p2 & Q5->q1 + vpush {q7} @saving in stack + vtrn.32 d4, d12 @row3 & 7 + vmov.i16 q14, #2 + vtrn.32 d5, d13 @row11 & row15 + vaddl.u8 q8, d6, d8 @p0+q0 L + vtrn.32 d2, d10 @row2 &6 + vaddl.u8 q9, d7, d9 @p0+q0 H + vtrn.32 d3, d11 @row10&row14 + vaddw.u8 q10, q8, d4 @p0+q0+p1 L + vaddw.u8 q11, q9, d5 @p0+q0+p1 H + vaddl.u8 q12, d2, d10 @p2+q1 L + vaddl.u8 q13, d3, d11 @p2+q1 H + vmla.u16 q12, q10, q14 @p2 + X2(p1) + X2(p0) + X2(q0) + q1 L + vmla.u16 q13, q11, q14 @p2 + X2(p1) + X2(p0) + X2(q0) + q1 H + vmov.i8 q14, #2 + vaddw.u8 q8, q10, d2 @p0+q0+p1+p2 L + vaddw.u8 q9, q11, d3 @p0+q0+p1+p2 H + vdup.i8 q15, r2 @duplicate alpha + vrshrn.u16 d20, q8, #2 @(p2 + p1 + p0 + q0 + 2) >> 2)L p1' + vrshrn.u16 d21, q9, #2 @(p2 + p1 + p0 + q0 + 2) >> 2)H p1' + vabd.u8 q11, q3, q4 @ABD(p0-q0) + vsra.u8 q14, q15, #2 @alpha >>2 +2 + vabd.u8 q15, q1, q3 @Ap = ABD(p2-p0) + vrshrn.u16 d24, q12, #3 @((p2 + X2(p1) + X2(p0) + X2(q0) + q1 + 4) >> 3) L p0' + vrshrn.u16 d25, q13, #3 @((p2 + X2(p1) + X2(p0) + X2(q0) + q1 + 4) >> 3) H p0' + vdup.i8 q13, r3 @beta + vcgt.u8 q14, q14, q11 @ABS(p0 - q0) <((Alpha >>2) + 2) + vaddl.u8 q11, d6, d10 @p0+q1 L + vcgt.u8 q7, q13, q15 @beta>Ap + vaddl.u8 q15, d7, d11 @p0+q1 H + vaddw.u8 q11, q11, d4 @p0+q1+p1 L + vaddw.u8 q15, q15, d5 @p0+q1+p1 H + vaddw.u8 q11, q11, d4 @p0+q1+2*p1 L + vaddw.u8 q15, q15, d5 @p0+q1+2*p1 H + vand q7, q7, q14 @(Ap < Beta && ABS(p0 - q0) <((Alpha >>2) + 2) + vrshrn.u16 d22, q11, #2 @((X2(p1) + p0 + q1 + 2) >> 2) L p0" + vrshrn.u16 d23, q15, #2 @((X2(p1) + p0 + q1 + 2) >> 2) H p0" + vaddl.u8 q15, d2, d0 @p2+p3 L + vbif q12, q11, q7 @p0' or p0 " + vaddl.u8 q11, d3, d1 @p2+p3 H + vadd.u16 q15, q15, q15 @2*(p2+p3) L + vadd.u16 q11, q11, q11 @2*(p2+p3)H + vadd.u16 q8, q8, q15 @(X2(p3) + X3(p2) + p1 + p0 + q0) L + vadd.u16 q9, q9, q11 @(X2(p3) + X3(p2) + p1 + p0 + q0) H + vabd.u8 q15, q6, q4 @Aq = abs(q2-q0) + vabd.u8 q11, q5, q4 @ABS(Q1-Q0) + vrshrn.u16 d16, q8, #3 @((X2(p3) + X3(p2) + p1 + p0 + q0 + 4) >> 3); L p2' + vrshrn.u16 d17, q9, #3 @((X2(p3) + X3(p2) + p1 + p0 + q0 + 4) >> 3); H p2' + vabd.u8 q9, q2, q3 @ABS(p1-p0) + vcgt.u8 q15, q13, q15 @Aq < Beta + vcge.u8 q11, q11, q13 @ABS(q1 - q0) >= Beta + vcge.u8 q9, q9, q13 @ABS(p1 - p0) >= beta + vdup.i8 q13, r2 @duplicate alpha + vand q15, q15, q14 @(Aq < Beta && ABS(p0 - q0) <((Alpha >>2) + 2)) + vabd.u8 q14, q3, q4 @abs(p0-q0) + vorr q11, q11, q9 @ABS(p1 - p0) >= Beta || ABS(q1 - q0) >= Beta + vaddl.u8 q9, d6, d8 @p0+q0 L + vcge.u8 q14, q14, q13 @ABS(p0 - q0) >= Alpha + vaddl.u8 q13, d7, d9 @p0+q0 H + vaddw.u8 q9, q9, d10 @p0+q0+q1 L + vorr q11, q11, q14 @ABS(p1 - p0) >= Beta || ABS(q1 - q0) >= Beta||ABS(p0 - q0) >= Alpha + vaddw.u8 q13, q13, d11 @p0+q0+q1 H + vbic q7, q7, q11 @final condn for p's + vmov.i8 q14, #2 + vbif q3, q12, q11 @final p0 + vbit q1, q8, q7 @final p2 + vbif q10, q2, q7 @final p1 + vaddl.u8 q12, d8, d4 @q0+p1 L + vmlal.u8 q12, d10, d28 @X2(q1) + q0 + p1 L + vaddl.u8 q8, d9, d5 @q0+p1 H + vmlal.u8 q8, d11, d28 @X2(q1) + q0 + p1 H + vmov.i16 q14, #2 + vaddl.u8 q7, d4, d12 @p1+q2 L + vmla.u16 q7, q9, q14 @p1 + X2(p0) + X2(q0) + X2(q1) + q2L + vaddl.u8 q2, d5, d13 @p1+q2H + vmla.u16 q2, q13, q14 @p1 + X2(p0) + X2(q0) + X2(q1) + q2H + vrshrn.u16 d24, q12, #2 @(X2(q1) + q0 + p1 + 2) >> 2; L q0' + vrshrn.u16 d25, q8, #2 @(X2(q1) + q0 + p1 + 2) >> 2; H q0' + vaddw.u8 q9, q9, d12 @p0 + q0 + q1 + q2 L + vaddw.u8 q13, q13, d13 @p0 + q0 + q1 + q2 H + vrshrn.u16 d16, q7, #3 @(p1 + X2(p0) + X2(q0) + X2(q1) + q2 + 4) >> 3 L qo" + vpop {q7} + vrshrn.u16 d17, q2, #3 @(p1 + X2(p0) + X2(q0) + X2(q1) + q2 + 4) >> 3 H qo" + vrshrn.u16 d4, q9, #2 @p0 + q0 + q1 + q2 + 2)>>2 L q1' + vrshrn.u16 d5, q13, #2 @p0 + q0 + q1 + q2 + 2)>>2 H q1' + vbit q12, q8, q15 @q0' or q0" + vbic q15, q15, q11 @final condn for q's + vtrn.8 d0, d2 @row1 &2 + vbit q5, q2, q15 @final q1 + vtrn.8 d1, d3 @row9 &10 + vaddl.u8 q8, d12, d14 @q2+q3 L + vtrn.8 d20, d6 @row3&row4 + vaddl.u8 q2, d13, d15 @q2+q3 H + vtrn.8 d21, d7 @row11 & 12 + vmla.u16 q9, q8, q14 @X2(q3) + X3(q2) + q1 + q0 + p0 L + vtrn.16 d2, d6 @row2 & row4 + vmla.u16 q13, q2, q14 @X2(q3) + X3(q2) + q1 + q0 + p0 H + vtrn.16 d3, d7 @row10 & 12 + vbif q4, q12, q11 @final q0 + vtrn.16 d0, d20 @row1 & 3 + vrshrn.u16 d18, q9, #3 @(X2(q3) + X3(q2) + q1 + q0 + p0 + 4) >> 3; L + vtrn.16 d1, d21 @row9 & row11 + vrshrn.u16 d19, q13, #3 @(X2(q3) + X3(q2) + q1 + q0 + p0 + 4) >> 3; H + vtrn.8 d8, d10 @row5&6 + vbit q6, q9, q15 @final q2 + vtrn.8 d9, d11 @row13 &14 + vtrn.8 d12, d14 @row7 & 8 + vtrn.8 d13, d15 @row15 & 16 + vtrn.16 d10, d14 @row6 & row8 + vtrn.16 d11, d15 @row14 & row16 + @now Q3 ->p0 and Q7->q3 + vtrn.16 d8, d12 @row 5 & 7 + vtrn.16 d9, d13 @row13 & row15 + sub r0, r0, r1, lsl#4 @restore pointer + vtrn.32 d6, d14 @row4 & 8 + vtrn.32 d7, d15 @row 12 & 16 + vtrn.32 d0, d8 @row1 & row5 + vtrn.32 d1, d9 @row9 & 13 + vtrn.32 d2, d10 @row2 &6 + vtrn.32 d3, d11 @row10&row14 + vtrn.32 d20, d12 @row3 & 7 + vtrn.32 d21, d13 @row11 & row15 + vst1.8 d0, [r0], r1 @row1 + vst1.8 d2, [r0], r1 @row2 + vst1.8 d20, [r0], r1 @row3 + vst1.8 d6, [r0], r1 @row4 + vst1.8 d8, [r0], r1 @row5 + vst1.8 d10, [r0], r1 @row6 + vst1.8 d12, [r0], r1 @row7 + vst1.8 d14, [r0], r1 @row8 + vst1.8 d1, [r0], r1 @row9 + vst1.8 d3, [r0], r1 @row10 + vst1.8 d21, [r0], r1 @row11 + vst1.8 d7, [r0], r1 @row12 + vst1.8 d9, [r0], r1 @row13 + vst1.8 d11, [r0], r1 @row14 + vst1.8 d13, [r0], r1 @row15 + vst1.8 d15, [r0], r1 @row16 + vpop {d8 - d15} + ldmfd sp!, {r12, pc} + + + +@/** +@******************************************************************************* +@* +@* @brief +@* Performs filtering of a luma block vertical edge when the +@* boundary strength is set to 4 on calling twice +@* +@* @par Description: +@* This operation is described in Sec. 8.7.2.4 under the title +@* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264. +@* +@* @param[in] r0 - pu1_src +@* Pointer to the src sample q0 +@* +@* @param[in] r1 - src_strd +@* Source stride +@* +@* @param[in] r2 - alpha +@* Alpha Value for the boundary +@* +@* @param[in] r3 - beta +@* Beta Value for the boundary +@* +@* @returns +@* None +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ + + .global ih264_deblk_luma_vert_bs4_mbaff_a9 + +ih264_deblk_luma_vert_bs4_mbaff_a9: + + stmfd sp!, {lr} + + sub r0, r0, #4 @pointer uc_edgePixel-4 + vpush {d8 - d15} + @loading [p3:p2],[p1:p0]:[q0:q1]:[q2:q3] for every row + vld4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0], r1 + vld4.16 {d0[1], d2[1], d4[1], d6[1]}, [r0], r1 + vld4.16 {d0[2], d2[2], d4[2], d6[2]}, [r0], r1 + vld4.16 {d0[3], d2[3], d4[3], d6[3]}, [r0], r1 + vld4.16 {d1[0], d3[0], d5[0], d7[0]}, [r0], r1 + vld4.16 {d1[1], d3[1], d5[1], d7[1]}, [r0], r1 + vld4.16 {d1[2], d3[2], d5[2], d7[2]}, [r0], r1 + vld4.16 {d1[3], d3[3], d5[3], d7[3]}, [r0], r1 + + vuzp.8 d0, d1 @D0->p3, D1->p2 + vuzp.8 d2, d3 @D2->p1, D3->p0 + vuzp.8 d4, d5 @D4->q0, D5->q1 + vuzp.8 d6, d7 @D6->q2, D7->q3 + + vmov.i16 q14, #2 + vaddl.u8 q4, d3, d4 @p0+q0 + vaddw.u8 q5, q4, d2 @p0+q0+p1 + vaddl.u8 q6, d1, d5 @p2+q1 + vmla.u16 q6, q5, q14 @p2 + X2(p1) + X2(p0) + X2(q0) + q1 + + vmov.i8 d14, #2 + vaddw.u8 q4, q5, d1 @p0+q0+p1+p2 + vdup.i8 d15, r2 @duplicate alpha + vrshrn.u16 d10, q4, #2 @(p2 + p1 + p0 + q0 + 2) >> 2) p1' + vabd.u8 d11, d3, d4 @ABD(p0-q0) + vsra.u8 d14, d15, #2 @alpha >>2 +2 + vabd.u8 d15, d1, d3 @Ap = ABD(p2-p0) + vrshrn.u16 d12, q6, #3 @((p2 + X2(p1) + X2(p0) + X2(q0) + q1 + 4) >> 3) p0' + vdup.i8 d13, r3 @beta + vcgt.u8 d14, d14, d11 @ABS(p0 - q0) <((Alpha >>2) + 2) + vaddl.u8 q8, d3, d5 @p0+q1 + vcgt.u8 d26, d13, d15 @beta>Ap + vaddw.u8 q8, q8, d2 @p0+q1+p1 + vaddw.u8 q8, q8, d2 @p0+q1+2*p1 + vand d26, d26, d14 @(Ap < Beta && ABS(p0 - q0) <((Alpha >>2) + 2) + vrshrn.u16 d11, q8, #2 @((X2(p1) + p0 + q1 + 2) >> 2) p0" + vbif d12, d11, d26 @p0' or p0 " + vaddl.u8 q9, d1, d0 @p2+p3 + vadd.u16 q9, q9, q9 @2*(p2+p3) + vadd.u16 q4, q4, q9 @(X2(p3) + X3(p2) + p1 + p0 + q0) + vabd.u8 d15, d6, d4 @Aq = abs(q2-q0) + vabd.u8 d11, d5, d4 @ABS(q1-q0) + vrshrn.u16 d8, q4, #3 @((X2(p3) + X3(p2) + p1 + p0 + q0 + 4) >> 3); p2' + vabd.u8 d9, d2, d3 @ABS(p1-p0) + vcgt.u8 d15, d13, d15 @Aq < Beta + vcge.u8 d11, d11, d13 @ABS(q1 - q0) >= Beta + vcge.u8 d9, d9, d13 @ABS(p1 - p0) >= beta + vdup.i8 d13, r2 @duplicate alpha + vand d15, d15, d14 @(Aq < Beta && ABS(p0 - q0) <((Alpha >>2) + 2)) + vabd.u8 d14, d3, d4 @abs(p0-q0) + vorr d11, d11, d9 @ABS(p1 - p0) >= Beta || ABS(q1 - q0) >= Beta + vcge.u8 d14, d14, d13 @ABS(p0 - q0) >= Alpha + vaddl.u8 q10, d3, d4 @p0+q0 + vorr d11, d11, d14 @ABS(p1 - p0) >= Beta || ABS(q1 - q0) >= Beta||ABS(p0 - q0) >= Alpha + vaddw.u8 q10, q10, d5 @p0+q0+q1 + vbic d26, d26, d11 @final condn for p's + vmov.i8 d14, #2 + vbif d3, d12, d11 @final p0 + vbit d1, d8, d26 @final p2 + vbif d10, d2, d26 @final p1 + vaddl.u8 q6, d4, d2 @q0+p1 + vmlal.u8 q6, d5, d14 @X2(q1) + q0 + p1 + + vaddl.u8 q11, d2, d6 @p1+q2 + vmla.u16 q11, q10, q14 @p1 + X2(p0) + X2(q0) + X2(q1) + q2 + vrshrn.u16 d12, q6, #2 @(X2(q1) + q0 + p1 + 2) >> 2; q0' + vaddw.u8 q10, q10, d6 @p0 + q0 + q1 + q2 + vrshrn.u16 d8, q11, #3 @(p1 + X2(p0) + X2(q0) + X2(q1) + q2 + 4) >> 3 qo" + + vrshrn.u16 d2, q10, #2 @p0 + q0 + q1 + q2 + 2)>>2 q1' + vbit d12, d8, d15 @q0' or q0" + vbic d15, d15, d11 @final condn for q's + vbit d5, d2, d15 @final q1 + vaddl.u8 q12, d6, d7 @q2+q3 + vmla.u16 q10, q12, q14 @X2(q3) + X3(q2) + q1 + q0 + p0 + vbif d4, d12, d11 @final q0 + vrshrn.u16 d9, q10, #3 @(X2(q3) + X3(q2) + q1 + q0 + p0 + 4) >> 3; + vbit d6, d9, d15 @final q2 + vand d2, d10, d10 @D0->p3, D1->p2, D2->p1, D3->p0, D4->q0, D5->q1, D6->q2, D7->q3 + + vzip.8 d0, d1 @D0,D1 -> [p3:p2] + vzip.8 d2, d3 @D2,D3 -> [p1:p0] + vzip.8 d4, d5 @D4,D5 -> [q0:q1] + vzip.8 d6, d7 @D6,D7 -> [q2:q3] + + sub r0, r0, r1, lsl#3 @restore pointer + + @storing [p3:p2],[p1:p0]:[q0:q1]:[q2:q3] in every row + vst4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0], r1 + vst4.16 {d0[1], d2[1], d4[1], d6[1]}, [r0], r1 + vst4.16 {d0[2], d2[2], d4[2], d6[2]}, [r0], r1 + vst4.16 {d0[3], d2[3], d4[3], d6[3]}, [r0], r1 + vst4.16 {d1[0], d3[0], d5[0], d7[0]}, [r0], r1 + vst4.16 {d1[1], d3[1], d5[1], d7[1]}, [r0], r1 + vst4.16 {d1[2], d3[2], d5[2], d7[2]}, [r0], r1 + vst4.16 {d1[3], d3[3], d5[3], d7[3]}, [r0], r1 + vpop {d8 - d15} + ldmfd sp!, {pc} + + + +@/** +@******************************************************************************* +@* +@* @brief +@* Performs filtering of a luma block vertical edge for cases where the +@* boundary strength is less than 4 on calling twice +@* +@* @par Description: +@* This operation is described in Sec. 8.7.2.4 under the title +@* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264. +@* +@* @param[in] r0 - pu1_src +@* Pointer to the src sample q0 +@* +@* @param[in] r1 - src_strd +@* Source stride +@* +@* @param[in] r2 - alpha +@* Alpha Value for the boundary +@* +@* @param[in] r3 - beta +@* Beta Value for the boundary +@* +@* @param[in] sp(0) - u4_bs +@* Packed Boundary strength array +@* +@* @param[in] sp(4) - pu1_cliptab +@* tc0_table +@* +@* @returns +@* None +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ + + .global ih264_deblk_luma_vert_bslt4_mbaff_a9 + +ih264_deblk_luma_vert_bslt4_mbaff_a9: + + stmfd sp!, {r12, lr} + + sub r0, r0, #4 @pointer uc_edgePixel-4 + ldr r12, [sp, #8] @r12 = ui_Bs + ldr r14, [sp, #12] @r14 = pu1_ClipTab + vpush {d8 - d15} + @loading [p3:p2],[p1:p0]:[q0:q1]:[q2:q3] for every row + vld4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0], r1 + vld4.16 {d0[1], d2[1], d4[1], d6[1]}, [r0], r1 + vld4.16 {d0[2], d2[2], d4[2], d6[2]}, [r0], r1 + vld4.16 {d0[3], d2[3], d4[3], d6[3]}, [r0], r1 + vld4.16 {d1[0], d3[0], d5[0], d7[0]}, [r0], r1 + vld4.16 {d1[1], d3[1], d5[1], d7[1]}, [r0], r1 + vld4.16 {d1[2], d3[2], d5[2], d7[2]}, [r0], r1 + vld4.16 {d1[3], d3[3], d5[3], d7[3]}, [r0], r1 + + vuzp.8 d0, d1 @D0->p3, D1->p2 + vuzp.8 d2, d3 @D2->p1, D3->p0 + vuzp.8 d4, d5 @D4->q0, D5->q1 + vuzp.8 d6, d7 @D6->q2, D7->q3 + + rev r12, r12 @reversing ui_bs + vmov.32 d8[0], r12 @D8[0] = ui_Bs + vld1.32 d9[0], [r14] @D9[0] contains cliptab + vmovl.u8 q15, d8 @D30 = ui_Bs in each 16 bt scalar + vtbl.8 d8, {d9}, d30 @puc_ClipTab[ui_Bs] + vsli.16 d8, d8, #8 @D8 = C0 + + vrhadd.u8 d10, d3, d4 @((p0 + q0 + 1) >> 1) + vmov.i8 d31, #2 + vabd.u8 d11, d3, d4 @ABS(p0 - q0) + vaddl.u8 q6, d10, d1 @(p2 + ((p0 + q0 + 1) >> 1) + vmlsl.u8 q6, d2, d31 @(p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1)) + vdup.8 d14, r2 @alpha + vcle.u8 d11, d14, d11 @ABS(p0 - q0) >= Alpha(Alpha <=ABS(p0 - q0)) + vdup.i8 d14, r3 @beta + vabd.u8 d15, d5, d4 @ABS(q1 - q0) + vqshrn.s16 d12, q6, #1 @((p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1)) >> 1) + vcge.u8 d15, d15, d14 @ABS(q1 - q0) >= Beta + vabd.u8 d13, d2, d3 @ABS(p1 - p0) + vmin.s8 d12, d12, d8 @min(deltap1 ,C0) + vorr d11, d11, d15 @ABS(q1 - q0) >= Beta ||ABS(p0 - q0) >= Alpha + vneg.s8 d15, d8 @-C0 + vcge.u8 d13, d13, d14 @ABS(p1 - p0) >= Beta + vmax.s8 d12, d12, d15 @max(deltap1,-C0) + vorr d11, d11, d13 @ABS(p0 - q0) >= Alpha || ABS(q1 - q0) >= Beta || ABS(p1 - p0) >= Beta) + vceq.u16 d13, d30, #0 @ui_bs == 0 + vaddl.u8 q14, d10, d6 @q2 + ((p0 + q0 + 1) >> 1) + vsubw.u8 q14, q14, d5 @q2 + ((p0 + q0 + 1) >> 1) - q1 + vsubw.u8 q14, q14, d5 @q2 + ((p0 + q0 + 1) >> 1) - 2*q1 + vorr d13, d13, d11 @(ABS(p0 - q0) >= Alpha || ABS(q1 - q0) >= Beta || ABS(p1 - p0) >= Beta)) + @|| (ui_bs == 0) + vqshrn.s16 d9, q14, #1 @(q2 + ((p0 + q0 + 1) >> 1) - (q1 << 1)) >> 1 + vabd.u8 d11, d1, d3 @Ap = ABS(p2 - p0) + vabd.u8 d10, d6, d4 @Aq= ABS(q2 - q0) + vclt.u8 d11, d11, d14 @Ap < Beta + vmin.s8 d9, d9, d8 @min(deltaq1,C0) + vclt.u8 d10, d10, d14 @Aq < Beta + vmax.s8 d9, d9, d15 @max(deltaq1,-C0) + vsubl.u8 q7, d4, d3 @q0 - p0 + vshl.s16 q7, q7, #2 @(q0 - p0) << 2 + vsub.u8 d8, d8, d11 @C0 + (Ap < Beta) + vaddw.u8 q7, q7, d2 @((q0 - p0) << 2) + p1 + vsubw.u8 q7, q7, d5 @((q0 - p0) << 2) + (p1 - q1) + vbic d11, d11, d13 @final condition for p1 + vrshr.s16 q15, q7, #3 @delta = (((q0 - p0) << 2) + (p1 - q1) + 4) >> 3 + vsub.u8 d8, d8, d10 @C0 + (Ap < Beta) + (Aq < Beta) + vbic d10, d10, d13 @final condition for q1 + vabs.s16 q14, q15 + vmovn.i16 d15, q14 @abs(delta) + vand d12, d12, d11 @delatp1 + vand d9, d9, d10 @deltaq1 + vmin.u8 d15, d15, d8 @min((abs(delta),C) + vadd.i8 d2, d2, d12 @p1+deltap1 + vadd.i8 d5, d5, d9 @q1+deltaq1 + vbic d15, d15, d13 @abs(delta) of pixels to be changed only + vcge.s16 q14, q15, #0 + vmovn.i16 d14, q14 @sign(delta) + vqsub.u8 d11, d3, d15 @clip(p0-delta) + vqadd.u8 d3, d3, d15 @clip(p0+delta) + vqadd.u8 d12, d4, d15 @clip(q0+delta) + vqsub.u8 d4, d4, d15 @clip(q0-delta) + vbif d3, d11, d14 @p0 + vbif d4, d12, d14 @q0 + + sub r0, r0, r1, lsl#3 @restore pointer + @D0->p3, D1->p2, D2->p1, D3->p0, D4->q0, D5->q1, D6->q2, D7->q3 + vzip.8 d0, d1 @D0,D1 -> [p3:p2] + vzip.8 d2, d3 @D2,D3 -> [p1:p0] + vzip.8 d4, d5 @D4,D5 -> [q0:q1] + vzip.8 d6, d7 @D6,D7 -> [q2:q3] + + @storing [p3:p2],[p1:p0]:[q0:q1]:[q2:q3] in every row + vst4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0], r1 + vst4.16 {d0[1], d2[1], d4[1], d6[1]}, [r0], r1 + vst4.16 {d0[2], d2[2], d4[2], d6[2]}, [r0], r1 + vst4.16 {d0[3], d2[3], d4[3], d6[3]}, [r0], r1 + vst4.16 {d1[0], d3[0], d5[0], d7[0]}, [r0], r1 + vst4.16 {d1[1], d3[1], d5[1], d7[1]}, [r0], r1 + vst4.16 {d1[2], d3[2], d5[2], d7[2]}, [r0], r1 + vst4.16 {d1[3], d3[3], d5[3], d7[3]}, [r0], r1 + vpop {d8 - d15} + ldmfd sp!, {r12, pc} + + + diff --git a/common/arm/ih264_default_weighted_pred_a9q.s b/common/arm/ih264_default_weighted_pred_a9q.s new file mode 100755 index 0000000..94cda46 --- /dev/null +++ b/common/arm/ih264_default_weighted_pred_a9q.s @@ -0,0 +1,359 @@ +@/****************************************************************************** +@ * +@ * Copyright (C) 2015 The Android Open Source Project +@ * +@ * Licensed under the Apache License, Version 2.0 (the "License"); +@ * you may not use this file except in compliance with the License. +@ * You may obtain a copy of the License at: +@ * +@ * http://www.apache.org/licenses/LICENSE-2.0 +@ * +@ * Unless required by applicable law or agreed to in writing, software +@ * distributed under the License is distributed on an "AS IS" BASIS, +@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ * See the License for the specific language governing permissions and +@ * limitations under the License. +@ * +@ ***************************************************************************** +@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +@*/ +@/** +@****************************************************************************** +@* @file +@* ih264_default_weighted_pred_a9q.s +@* +@* @brief +@* Contains function definitions for default weighted prediction. +@* Functions are coded using NEON intrinsics and can be compiled using ARM RVCT +@* +@* @author +@* Kaushik Senthoor R +@* +@* @par List of Functions: +@* +@* - ih264_default_weighted_pred_luma_a9q() +@* - ih264_default_weighted_pred_chroma_a9q() +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ +@******************************************************************************* +@* @function +@* ih264_default_weighted_pred_luma_a9q() +@* +@* @brief +@* This routine performs the default weighted prediction as described in sec +@* 8.4.2.3.1 titled "Default weighted sample prediction process" for luma. +@* +@* @par Description: +@* This function gets two ht x wd blocks, calculates their rounded-average and +@* stores it in the destination block. +@* +@* @param[in] pu1_src1: +@* UWORD8 Pointer to the buffer containing the first input block. +@* +@* @param[in] pu1_src2: +@* UWORD8 Pointer to the buffer containing the second input block. +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination where the output block is stored. +@* +@* @param[in] src_strd1 +@* Stride of the first input buffer +@* +@* @param[in] src_strd2 +@* Stride of the second input buffer +@* +@* @param[in] dst_strd +@* Stride of the destination buffer +@* +@* @param[in] ht +@* integer height of the array +@* +@* @param[in] wd +@* integer width of the array +@* +@* @returns +@* None +@* +@* @remarks +@* (ht,wd) can be (4,4), (4,8), (8,4), (8,8), (8,16), (16,8) or (16,16). +@* +@******************************************************************************* +@*/ +@void ih264_default_weighted_pred_luma_a9q(UWORD8 *pu1_src1, +@ UWORD8 *pu1_src2, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd1, +@ WORD32 src_strd2, +@ WORD32 dst_strd, +@ WORD32 ht, +@ WORD32 wd) +@ +@**************Variables Vs Registers***************************************** +@ r0 => pu1_src1 +@ r1 => pu1_src2 +@ r2 => pu1_dst +@ r3 => src_strd1 +@ [sp] => src_strd2 (r4) +@ [sp+4] => dst_strd (r5) +@ [sp+8] => ht (r6) +@ [sp+12] => wd (r7) +@ +.text +.p2align 2 + + .global ih264_default_weighted_pred_luma_a9q + +ih264_default_weighted_pred_luma_a9q: + + stmfd sp!, {r4-r7, r14} @stack stores the values of the arguments + ldr r7, [sp, #32] @Load wd + ldr r4, [sp, #20] @Load src_strd2 + ldr r5, [sp, #24] @Load dst_strd + cmp r7, #16 + ldr r6, [sp, #28] @Load ht + vpush {d8-d15} + beq loop_16 @branch if wd is 16 + cmp r7, #8 + beq loop_8 @branch if wd is 8 + +loop_4: @each iteration processes four rows + + vld1.32 d0[0], [r0], r3 @load row 1 in source 1 + vld1.32 d0[1], [r0], r3 @load row 2 in source 1 + vld1.32 d2[0], [r1], r4 @load row 1 in source 2 + vld1.32 d2[1], [r1], r4 @load row 2 in source 2 + + vld1.32 d1[0], [r0], r3 @load row 3 in source 1 + vld1.32 d1[1], [r0], r3 @load row 4 in source 1 + vrhadd.u8 d0, d0, d2 + vld1.32 d3[0], [r1], r4 @load row 3 in source 2 + vld1.32 d3[1], [r1], r4 @load row 4 in source 2 + + subs r6, r6, #4 @decrement ht by 4 + vst1.32 d0[0], [r2], r5 @load row 1 in destination + vst1.32 d0[1], [r2], r5 @load row 2 in destination + vrhadd.u8 d1, d1, d3 + vst1.32 d1[0], [r2], r5 @load row 3 in destination + vst1.32 d1[1], [r2], r5 @load row 4 in destination + + bgt loop_4 @if greater than 0 repeat the loop again + + b end_loops + +loop_8: @each iteration processes four rows + + vld1.8 d0, [r0], r3 @load row 1 in source 1 + vld1.8 d4, [r1], r4 @load row 1 in source 2 + vld1.8 d1, [r0], r3 @load row 2 in source 1 + vld1.8 d5, [r1], r4 @load row 2 in source 2 + vld1.8 d2, [r0], r3 @load row 3 in source 1 + vrhadd.u8 q0, q0, q2 + vld1.8 d6, [r1], r4 @load row 3 in source 2 + vld1.8 d3, [r0], r3 @load row 4 in source 1 + vrhadd.u8 d2, d2, d6 + vld1.8 d7, [r1], r4 @load row 4 in source 2 + + subs r6, r6, #4 @decrement ht by 4 + vst1.8 d0, [r2], r5 @load row 1 in destination + vrhadd.u8 d3, d3, d7 + vst1.8 d1, [r2], r5 @load row 2 in destination + vst1.8 d2, [r2], r5 @load row 3 in destination + vst1.8 d3, [r2], r5 @load row 4 in destination + + bgt loop_8 @if greater than 0 repeat the loop again + + b end_loops + +loop_16: @each iteration processes eight rows + + vld1.8 {q0}, [r0], r3 @load row 1 in source 1 + vld1.8 {q8}, [r1], r4 @load row 1 in source 2 + vld1.8 {q1}, [r0], r3 @load row 2 in source 1 + vld1.8 {q9}, [r1], r4 @load row 2 in source 2 + vrhadd.u8 q0, q0, q8 + vld1.8 {q2}, [r0], r3 @load row 3 in source 1 + vld1.8 {q10}, [r1], r4 @load row 3 in source 2 + vrhadd.u8 q1, q1, q9 + vld1.8 {q3}, [r0], r3 @load row 4 in source 1 + vld1.8 {q11}, [r1], r4 @load row 4 in source 2 + vrhadd.u8 q2, q2, q10 + vld1.8 {q4}, [r0], r3 @load row 5 in source 1 + vld1.8 {q12}, [r1], r4 @load row 5 in source 2 + vrhadd.u8 q3, q3, q11 + vld1.8 {q5}, [r0], r3 @load row 6 in source 1 + vld1.8 {q13}, [r1], r4 @load row 6 in source 2 + vrhadd.u8 q4, q4, q12 + vld1.8 {q6}, [r0], r3 @load row 7 in source 1 + vld1.8 {q14}, [r1], r4 @load row 7 in source 2 + vrhadd.u8 q5, q5, q13 + vld1.8 {q7}, [r0], r3 @load row 8 in source 1 + vld1.8 {q15}, [r1], r4 @load row 8 in source 2 + + vrhadd.u8 q6, q6, q14 + vst1.8 {q0}, [r2], r5 @load row 1 in destination + vst1.8 {q1}, [r2], r5 @load row 2 in destination + vrhadd.u8 q7, q7, q15 + vst1.8 {q2}, [r2], r5 @load row 3 in destination + vst1.8 {q3}, [r2], r5 @load row 4 in destination + subs r6, r6, #8 @decrement ht by 8 + vst1.8 {q4}, [r2], r5 @load row 5 in destination + vst1.8 {q5}, [r2], r5 @load row 6 in destination + vst1.8 {q6}, [r2], r5 @load row 7 in destination + vst1.8 {q7}, [r2], r5 @load row 8 in destination + + bgt loop_16 @if greater than 0 repeat the loop again + +end_loops: + + vpop {d8-d15} + ldmfd sp!, {r4-r7, r15} @Reload the registers from sp + + +@******************************************************************************* +@* @function +@* ih264_default_weighted_pred_chroma_a9q() +@* +@* @brief +@* This routine performs the default weighted prediction as described in sec +@* 8.4.2.3.1 titled "Default weighted sample prediction process" for chroma. +@* +@* @par Description: +@* This function gets two ht x wd blocks, calculates their rounded-average and +@* stores it in the destination block for U and V. +@* +@* @param[in] pu1_src1: +@* UWORD8 Pointer to the buffer containing the first input block. +@* +@* @param[in] pu1_src2: +@* UWORD8 Pointer to the buffer containing the second input block. +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination where the output block is stored. +@* +@* @param[in] src_strd1 +@* Stride of the first input buffer +@* +@* @param[in] src_strd2 +@* Stride of the second input buffer +@* +@* @param[in] dst_strd +@* Stride of the destination buffer +@* +@* @param[in] ht +@* integer height of the array +@* +@* @param[in] wd +@* integer width of the array +@* +@* @returns +@* None +@* +@* @remarks +@* (ht,wd) can be (2,2), (2,4), (4,2), (4,4), (4,8), (8,4) or (8,8). +@* +@******************************************************************************* +@*/ +@void ih264_default_weighted_pred_chroma_a9q(UWORD8 *pu1_src1, +@ UWORD8 *pu1_src2, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd1, +@ WORD32 src_strd2, +@ WORD32 dst_strd, +@ WORD32 ht, +@ WORD32 wd) +@ +@**************Variables Vs Registers***************************************** +@ r0 => pu1_src1 +@ r1 => pu1_src2 +@ r2 => pu1_dst +@ r3 => src_strd1 +@ [sp] => src_strd2 (r4) +@ [sp+4] => dst_strd (r5) +@ [sp+8] => ht (r6) +@ [sp+12] => wd (r7) +@ + + + .global ih264_default_weighted_pred_chroma_a9q + +ih264_default_weighted_pred_chroma_a9q: + + stmfd sp!, {r4-r7, r14} @stack stores the values of the arguments + ldr r7, [sp, #32] @Load wd + ldr r4, [sp, #20] @Load src_strd2 + ldr r5, [sp, #24] @Load dst_strd + cmp r7, #8 + ldr r6, [sp, #28] @Load ht + vpush {d8-d15} + beq loop_8_uv @branch if wd is 8 + cmp r7, #4 + beq loop_4_uv @branch if wd is 4 + +loop_2_uv: @each iteration processes two rows + + vld1.32 d0[0], [r0], r3 @load row 1 in source 1 + vld1.32 d0[1], [r0], r3 @load row 2 in source 1 + + vld1.32 d1[0], [r1], r4 @load row 1 in source 2 + vld1.32 d1[1], [r1], r4 @load row 2 in source 2 + + vrhadd.u8 d0, d0, d1 + + subs r6, r6, #2 @decrement ht by 2 + vst1.32 d0[0], [r2], r5 @load row 1 in destination + vst1.32 d0[1], [r2], r5 @load row 2 in destination + + bgt loop_2_uv @if greater than 0 repeat the loop again + + b end_loops_uv + +loop_4_uv: @each iteration processes two rows + + vld1.8 d0, [r0], r3 @load row 1 in source 1 + vld1.8 d2, [r1], r4 @load row 1 in source 2 + vld1.8 d1, [r0], r3 @load row 2 in source 1 + vrhadd.u8 d0, d0, d2 + vld1.8 d3, [r1], r4 @load row 2 in source 2 + + vrhadd.u8 d1, d1, d3 + vst1.8 d0, [r2], r5 @load row 1 in destination + subs r6, r6, #2 @decrement ht by 2 + vst1.8 d1, [r2], r5 @load row 2 in destination + + bgt loop_4_uv @if greater than 0 repeat the loop again + + b end_loops_uv + +loop_8_uv: @each iteration processes four rows + + vld1.8 {q0}, [r0], r3 @load row 1 in source 1 + vld1.8 {q4}, [r1], r4 @load row 1 in source 2 + vld1.8 {q1}, [r0], r3 @load row 2 in source 1 + vrhadd.u8 q0, q0, q4 + vld1.8 {q5}, [r1], r4 @load row 2 in source 2 + vld1.8 {q2}, [r0], r3 @load row 3 in source 1 + vrhadd.u8 q1, q1, q5 + vld1.8 {q6}, [r1], r4 @load row 3 in source 2 + vld1.8 {q3}, [r0], r3 @load row 4 in source 1 + vrhadd.u8 q2, q2, q6 + vld1.8 {q7}, [r1], r4 @load row 4 in source 2 + + vst1.8 {q0}, [r2], r5 @load row 1 in destination + vrhadd.u8 q3, q3, q7 + vst1.8 {q1}, [r2], r5 @load row 2 in destination + subs r6, r6, #4 @decrement ht by 4 + vst1.8 {q2}, [r2], r5 @load row 3 in destination + vst1.8 {q3}, [r2], r5 @load row 4 in destination + + bgt loop_8_uv @if greater than 0 repeat the loop again + +end_loops_uv: + + vpop {d8-d15} + ldmfd sp!, {r4-r7, r15} @Reload the registers from sp + + diff --git a/common/arm/ih264_ihadamard_scaling_a9.s b/common/arm/ih264_ihadamard_scaling_a9.s new file mode 100755 index 0000000..687099a --- /dev/null +++ b/common/arm/ih264_ihadamard_scaling_a9.s @@ -0,0 +1,250 @@ +@/****************************************************************************** +@ * +@ * Copyright (C) 2015 The Android Open Source Project +@ * +@ * Licensed under the Apache License, Version 2.0 (the "License"); +@ * you may not use this file except in compliance with the License. +@ * You may obtain a copy of the License at: +@ * +@ * http://www.apache.org/licenses/LICENSE-2.0 +@ * +@ * Unless required by applicable law or agreed to in writing, software +@ * distributed under the License is distributed on an "AS IS" BASIS, +@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ * See the License for the specific language governing permissions and +@ * limitations under the License. +@ * +@ ***************************************************************************** +@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +@*/ +@/** +@ ******************************************************************************* +@ * @file +@ * ih264_ihadamard_scaling_a9.s +@ * +@ * @brief +@ * Contains function definitions for inverse hadamard transform on 4x4 DC outputs +@ * of 16x16 intra-prediction +@ * +@ * @author +@ * Mohit +@ * +@ * @par List of Functions: +@ * - ih264_ihadamard_scaling_4x4_a9() +@ * - ih264_ihadamard_scaling_2x2_uv_a9() +@ * +@ * @remarks +@ * None +@ * +@ ******************************************************************************* +@ */ +@ * @brief This function performs a 4x4 inverse hadamard transform on the 4x4 DC coefficients +@ * of a 16x16 intra prediction macroblock, and then performs scaling. +@ * prediction buffer +@ * +@ * @par Description: +@ * The DC coefficients pass through a 2-stage inverse hadamard transform. +@ * This inverse transformed content is scaled to based on Qp value. +@ * +@ * @param[in] pi2_src +@ * input 4x4 block of DC coefficients +@ * +@ * @param[out] pi2_out +@ * output 4x4 block +@ * +@ * @param[in] pu2_iscal_mat +@ * pointer to scaling list +@ * +@ * @param[in] pu2_weigh_mat +@ * pointer to weight matrix +@ * +@ * @param[in] u4_qp_div_6 +@ * Floor (qp/6) +@ * +@ * @param[in] pi4_tmp +@ * temporary buffer of size 1*16 +@ * +@ * @returns none +@ * +@ * @remarks none +@ * +@ ******************************************************************************* +@ */ +@ * +@ ******************************************************************************* +@ */ +@ void ih264_ihadamard_scaling_4x4(WORD16* pi2_src, +@ WORD16* pi2_out, +@ const UWORD16 *pu2_iscal_mat, +@ const UWORD16 *pu2_weigh_mat, +@ UWORD32 u4_qp_div_6, +@ WORD32* pi4_tmp) +@**************Variables Vs Registers***************************************** +@r0 => *pi2_src +@r1 => *pi2_out +@r2 => *pu2_iscal_mat +@r3 => *pu2_weigh_mat +@r4 => u4_qp_div_6 + +.text +.p2align 2 + + .global ih264_ihadamard_scaling_4x4_a9 + +ih264_ihadamard_scaling_4x4_a9: + +@VLD4.S16 is used because the pointer is incremented by SUB_BLK_WIDTH_4x4 +@If the macro value changes need to change the instruction according to it. +@Only one shift is done in horizontal inverse because, +@if u4_qp_div_6 is lesser than 4 then shift value will be neagative and do negative left shift, in this case rnd_factor has value +@if u4_qp_div_6 is greater than 4 then shift value will be positive and do left shift, here rnd_factor is 0 + + stmfd sp!, {r4-r12, r14} @ stack stores the values of the arguments + ldr r4, [sp, #40] @ Loads u4_qp_div_6 + vdup.s32 q10, r4 @ Populate the u4_qp_div_6 in Q10 + ldrh r6, [r3] @ load pu2_weight_mat[0] , H for unsigned halfword load + ldrh r7, [r2] @ load pu2_iscal_mat[0] , H for unsigned halfword load + mul r6, r6, r7 @ pu2_iscal_mat[0]*pu2_weigh_mat[0] + vdup.s32 q9, r6 @ Populate pu2_iscal_mat[0]*pu2_weigh_mat[0] 32-bit in Q9 + vpush {d8-d15} +@=======================INVERSE HADAMARD TRANSFORM================================ + + vld4.s16 {d0, d1, d2, d3}, [r0] @load x4,x5,x6,x7 + vaddl.s16 q12, d0, d3 @x0 = x4 + x7 + vaddl.s16 q13, d1, d2 @x1 = x5 + x6 + vsubl.s16 q14, d1, d2 @x2 = x5 - x6 + vsubl.s16 q15, d0, d3 @x3 = x4 - x7 + + vadd.s32 q2, q12, q13 @pi4_tmp_ptr[0] = x0 + x1 + vadd.s32 q3, q15, q14 @pi4_tmp_ptr[1] = x3 + x2 + vsub.s32 q4, q12, q13 @pi4_tmp_ptr[2] = x0 - x1 + vsub.s32 q5, q15, q14 @pi4_tmp_ptr[3] = x3 - x2 + + vtrn.32 q2, q3 @Transpose the register for vertical transform + vtrn.32 q4, q5 + + vswp d5, d8 @Q2 = x4, Q4 = x6 + vswp d7, d10 @Q3 = x5, Q5 = x7 + + + vadd.s32 q12, q2, q5 @x0 = x4+x7 + vadd.s32 q13, q3, q4 @x1 = x5+x6 + vsub.s32 q14, q3, q4 @x2 = x5-x6 + vsub.s32 q15, q2, q5 @x3 = x4-x7 + + vadd.s32 q0, q12, q13 @pi4_tmp_ptr[0] = x0 + x1 + vadd.s32 q1, q15, q14 @pi4_tmp_ptr[1] = x3 + x2 + vsub.s32 q2, q12, q13 @pi4_tmp_ptr[2] = x0 - x1 + vsub.s32 q3, q15, q14 @pi4_tmp_ptr[3] = x3 - x2 + + + vmul.s32 q0, q0, q9 @ Q0 = p[i] = (x[i] * trns_coeff[i]) where i = 0..3 + vmul.s32 q1, q1, q9 @ Q1 = p[i] = (x[i] * trns_coeff[i]) where i = 4..7 + vmul.s32 q2, q2, q9 @ Q2 = p[i] = (x[i] * trns_coeff[i]) where i = 8..11 + vmul.s32 q3, q3, q9 @ Q3 = p[i] = (x[i] * trns_coeff[i]) where i = 12..15 + + vshl.s32 q0, q0, q10 @ Q0 = q[i] = (p[i] << (qP/6)) where i = 0..3 + vshl.s32 q1, q1, q10 @ Q1 = q[i] = (p[i] << (qP/6)) where i = 4..7 + vshl.s32 q2, q2, q10 @ Q2 = q[i] = (p[i] << (qP/6)) where i = 8..11 + vshl.s32 q3, q3, q10 @ Q3 = q[i] = (p[i] << (qP/6)) where i = 12..15 + + vqrshrn.s32 d0, q0, #0x6 @ D0 = c[i] = ((q[i] + 32) >> 4) where i = 0..3 + vqrshrn.s32 d1, q1, #0x6 @ D1 = c[i] = ((q[i] + 32) >> 4) where i = 4..7 + vqrshrn.s32 d2, q2, #0x6 @ D2 = c[i] = ((q[i] + 32) >> 4) where i = 8..11 + vqrshrn.s32 d3, q3, #0x6 @ D3 = c[i] = ((q[i] + 32) >> 4) where i = 12..15 + + vst1.s16 {d0, d1, d2, d3}, [r1] @IV row store the value + + vpop {d8-d15} + ldmfd sp!, {r4-r12, r15} @Reload the registers from SP + + + +@ ******************************************************************************* +@ */ +@ * @brief This function performs a 2x2 inverse hadamard transform for chroma block +@ * +@ * @par Description: +@ * The DC coefficients pass through a 2-stage inverse hadamard transform. +@ * This inverse transformed content is scaled to based on Qp value. +@ * Both DC blocks of U and v blocks are processesd +@ * +@ * @param[in] pi2_src +@ * input 1x8 block of ceffs. First 4 are from U and next from V +@ * +@ * @param[out] pi2_out +@ * output 1x8 block +@ * +@ * @param[in] pu2_iscal_mat +@ * pointer to scaling list +@ * +@ * @param[in] pu2_weigh_mat +@ * pointer to weight matrix +@ * +@ * @param[in] u4_qp_div_6 +@ * Floor (qp/6) +@ * +@ * @returns none +@ * +@ * @remarks none +@ * +@ ******************************************************************************* +@ */ +@ * +@ ******************************************************************************* +@ */ +@ void ih264_ihadamard_scaling_2x2_uv(WORD16* pi2_src, +@ WORD16* pi2_out, +@ const UWORD16 *pu2_iscal_mat, +@ const UWORD16 *pu2_weigh_mat, +@ UWORD32 u4_qp_div_6, + + .global ih264_ihadamard_scaling_2x2_uv_a9 +ih264_ihadamard_scaling_2x2_uv_a9: + +@Registers used +@ r0 : *pi2_src +@ r1 : *pi2_out +@ r2 : *pu2_iscal_mat +@ r3 : *pu2_weigh_mat + + vld1.u16 d26[0], [r2] + vld1.u16 d27[0], [r3] + vmull.u16 q15, d26, d27 @pu2_iscal_mat[0] * pu2_weigh_mat[0] + vdup.u32 q15, d30[0] + + vld1.u16 d28[0], [sp] @load qp/6 + + vpush {d8-d15} + + vmov.u16 d29, #5 + vsubl.u16 q14, d28, d29 @qp\6 - 5 + vdup.s32 q14, d28[0] + + vld2.s16 {d0, d1}, [r0] @load 8 dc coeffs + @i2_x4,i2_x6,i2_y4,i1_y6 -> d0 + @i2_x5,i2_x7,i2_y5,i1_y6 -> d1 + + vaddl.s16 q1, d0, d1 @ i4_x0 = i4_x4 + i4_x5;...x2 + vsubl.s16 q2, d0, d1 @ i4_x1 = i4_x4 - i4_x5;...x3 + + vtrn.s32 q1, q2 @i4_x0 i4_x1 -> q1 + + vadd.s32 q3, q1, q2 @i4_x4 = i4_x0+i4_x2;.. i4_x5 + vsub.s32 q1, q1, q2 @i4_x6 = i4_x0-i4_x2;.. i4_x7 + + vmul.s32 q5, q3, q15 + vmul.s32 q6, q1, q15 + + vshl.s32 q7, q5, q14 + vshl.s32 q8, q6, q14 + + vmovn.s32 d18, q7 @i4_x4 i4_x5 i4_y4 i4_y5 + vmovn.s32 d19, q8 @i4_x6 i4_x7 i4_y6 i4_y7 + + vst2.s32 {d18-d19}, [r1] + + vpop {d8-d15} + bx lr + + diff --git a/common/arm/ih264_inter_pred_chroma_a9q.s b/common/arm/ih264_inter_pred_chroma_a9q.s new file mode 100755 index 0000000..afd2860 --- /dev/null +++ b/common/arm/ih264_inter_pred_chroma_a9q.s @@ -0,0 +1,254 @@ +@/****************************************************************************** +@ * +@ * Copyright (C) 2015 The Android Open Source Project +@ * +@ * Licensed under the Apache License, Version 2.0 (the "License"); +@ * you may not use this file except in compliance with the License. +@ * You may obtain a copy of the License at: +@ * +@ * http://www.apache.org/licenses/LICENSE-2.0 +@ * +@ * Unless required by applicable law or agreed to in writing, software +@ * distributed under the License is distributed on an "AS IS" BASIS, +@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ * See the License for the specific language governing permissions and +@ * limitations under the License. +@ * +@ ***************************************************************************** +@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +@*/ +@/** +@****************************************************************************** +@* @file +@* ih264_inter_pred_chroma_a9q.s +@* +@* @brief +@* Contains function definitions for inter prediction interpolation. +@* +@* @author +@* Ittaim +@* +@* @par List of Functions: +@* +@* - ih264_inter_pred_chroma_a9q() +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ + +@/* All the functions here are replicated from ih264_inter_pred_filters.c +@ + +@/** +@/** +@/** +@ +@/** +@******************************************************************************* +@* +@* @brief +@* Interprediction chroma filter +@* +@* @par Description: +@* Applies filtering to chroma samples as mentioned in +@* sec 8.4.2.2.2 titled "chroma sample interpolation process" +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source containing alternate U and V samples +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@* @param[in]uc_dx +@* dx value where the sample is to be produced(refer sec 8.4.2.2.2 ) +@* +@* @param[in] uc_dy +@* dy value where the sample is to be produced(refer sec 8.4.2.2.2 ) +@* +@* @param[in] ht +@* integer height of the array +@* +@* @param[in] wd +@* integer width of the array +@* +@* @returns +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ + +@void ih264_inter_pred_chroma(UWORD8 *pu1_src, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd, +@ WORD32 dst_strd, +@ UWORD8 u1_dx, +@ UWORD8 u1_dy, +@ WORD32 ht, +@ WORD32 wd) +@**************Variables Vs Registers***************************************** +@ r0 => *pu1_src +@ r1 => *pu1_dst +@ r2 => src_strd +@ r3 => dst_strd +@ r4 => u1_dx +@ r5 => u1_dy +@ r6 => height +@ r7 => width +@ +.text +.p2align 2 + + .global ih264_inter_pred_chroma_a9q + +ih264_inter_pred_chroma_a9q: + + + + stmfd sp!, {r4-r12, r14} @store register values to stack + vstmdb sp!, {d8-d15} @push neon registers to stack + ldr r4, [sp, #104] + ldr r5, [sp, #108] + ldr r6, [sp, #112] + ldr r7, [sp, #116] + + rsb r8, r4, #8 @8-u1_dx + rsb r9, r5, #8 @8-u1_dy + mul r10, r8, r9 + mul r11, r4, r9 + + vdup.u8 d28, r10 + vdup.u8 d29, r11 + + mul r10, r8, r5 + mul r11, r4, r5 + + vdup.u8 d30, r10 + vdup.u8 d31, r11 + + subs r12, r7, #2 @if wd=4 branch to loop_4 + beq loop_2 + subs r12, r7, #4 @if wd=8 branch to loop_8 + beq loop_4 + +loop_8: + sub r6, #1 + vld1.8 {d0, d1, d2}, [r0], r2 @ Load row0 + vld1.8 {d5, d6, d7}, [r0], r2 @ Load row1 + vext.8 d3, d0, d1, #2 + vext.8 d8, d5, d6, #2 + + vmull.u8 q5, d0, d28 + vmlal.u8 q5, d5, d30 + vmlal.u8 q5, d3, d29 + vmlal.u8 q5, d8, d31 + vext.8 d9, d6, d7, #2 + vext.8 d4, d1, d2, #2 + +inner_loop_8: + vmull.u8 q6, d6, d30 + vmlal.u8 q6, d1, d28 + vmlal.u8 q6, d9, d31 + vmlal.u8 q6, d4, d29 + vmov d0, d5 + vmov d3, d8 + + vqrshrun.s16 d14, q5, #6 + vmov d1, d6 + vmov d4, d9 + + vld1.8 {d5, d6, d7}, [r0], r2 @ Load row1 + vqrshrun.s16 d15, q6, #6 + + vext.8 d8, d5, d6, #2 + subs r6, #1 + vext.8 d9, d6, d7, #2 + vst1.8 {q7}, [r1], r3 @ Store dest row + + vmull.u8 q5, d0, d28 + vmlal.u8 q5, d5, d30 + vmlal.u8 q5, d3, d29 + vmlal.u8 q5, d8, d31 + bne inner_loop_8 + + vmull.u8 q6, d6, d30 + vmlal.u8 q6, d1, d28 + vmlal.u8 q6, d9, d31 + vmlal.u8 q6, d4, d29 + + vqrshrun.s16 d14, q5, #6 + vqrshrun.s16 d15, q6, #6 + + vst1.8 {q7}, [r1], r3 @ Store dest row + + b end_func + +loop_4: + sub r6, #1 + vld1.8 {d0, d1}, [r0], r2 @ Load row0 + vld1.8 {d2, d3}, [r0], r2 @ Load row1 + vext.8 d1, d0, d1, #2 + vext.8 d3, d2, d3, #2 + + vmull.u8 q2, d2, d30 + vmlal.u8 q2, d0, d28 + vmlal.u8 q2, d3, d31 + vmlal.u8 q2, d1, d29 + +inner_loop_4: + subs r6, #1 + vmov d0, d2 + vmov d1, d3 + + vld1.8 {d2, d3}, [r0], r2 @ Load row1 + vqrshrun.s16 d6, q2, #6 + + vext.8 d3, d2, d3, #2 + vst1.8 {d6}, [r1], r3 @ Store dest row + + vmull.u8 q2, d0, d28 + vmlal.u8 q2, d2, d30 + vmlal.u8 q2, d1, d29 + vmlal.u8 q2, d3, d31 + bne inner_loop_4 + + vqrshrun.s16 d6, q2, #6 + vst1.8 {d6}, [r1], r3 @ Store dest row + + b end_func + +loop_2: + vld1.8 {d0}, [r0], r2 @ Load row0 + vext.8 d1, d0, d0, #2 + vld1.8 {d2}, [r0], r2 @ Load row1 + vext.8 d3, d2, d2, #2 + vmull.u8 q2, d0, d28 + vmlal.u8 q2, d1, d29 + vmlal.u8 q2, d2, d30 + vmlal.u8 q2, d3, d31 + vld1.8 {d6}, [r0] @ Load row2 + vqrshrun.s16 d4, q2, #6 + vext.8 d7, d6, d6, #2 + vst1.32 d4[0], [r1], r3 @ Store dest row0 + vmull.u8 q4, d2, d28 + vmlal.u8 q4, d3, d29 + vmlal.u8 q4, d6, d30 + vmlal.u8 q4, d7, d31 + subs r6, #2 + vqrshrun.s16 d8, q4, #6 + vst1.32 d8[0], [r1], r3 @ Store dest row1 + bne loop_2 @ repeat if ht=2 + +end_func: + vldmia sp!, {d8-d15} @ Restore neon registers that were saved + ldmfd sp!, {r4-r12, pc} @ Restoring registers from stack + diff --git a/common/arm/ih264_inter_pred_filters_luma_horz_a9q.s b/common/arm/ih264_inter_pred_filters_luma_horz_a9q.s new file mode 100755 index 0000000..ea6bba0 --- /dev/null +++ b/common/arm/ih264_inter_pred_filters_luma_horz_a9q.s @@ -0,0 +1,245 @@ +@/****************************************************************************** +@ * +@ * Copyright (C) 2015 The Android Open Source Project +@ * +@ * Licensed under the Apache License, Version 2.0 (the "License"); +@ * you may not use this file except in compliance with the License. +@ * You may obtain a copy of the License at: +@ * +@ * http://www.apache.org/licenses/LICENSE-2.0 +@ * +@ * Unless required by applicable law or agreed to in writing, software +@ * distributed under the License is distributed on an "AS IS" BASIS, +@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ * See the License for the specific language governing permissions and +@ * limitations under the License. +@ * +@ ***************************************************************************** +@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +@*/ +@/** +@****************************************************************************** +@* @file +@* ih264_inter_pred_luma_horz_a9q.s +@* +@* @brief +@* Contains function definitions for inter prediction interpolation. +@* +@* @author +@* Ittiam +@* +@* @par List of Functions: +@* +@* - ih264_inter_pred_luma_horz_a9q() +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ + +@/* All the functions here are replicated from ih264_inter_pred_filters.c +@ + +@/** +@/** +@******************************************************************************* +@* +@* @brief +@* Interprediction luma filter for horizontal input +@* +@* @par Description: +@* Applies a 6 tap horizontal filter .The output is clipped to 8 bits +@* sec 8.4.2.2.1 titled "Luma sample interpolation process" +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@* @param[in] ht +@* integer height of the array +@* +@* @param[in] wd +@* integer width of the array +@* +@* @returns +@* +@ @remarks +@* None +@* +@******************************************************************************* +@*/ + +@void ih264_inter_pred_luma_horz ( +@ UWORD8 *pu1_src, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd, +@ WORD32 dst_strd, +@ WORD32 ht, +@ WORD32 wd ) + +@**************Variables Vs Registers***************************************** +@ r0 => *pu1_src +@ r1 => *pu1_dst +@ r2 => src_strd +@ r3 => dst_strd +@ r5 => ht +@ r6 => wd + +.text +.p2align 2 + + + .global ih264_inter_pred_luma_horz_a9q + +ih264_inter_pred_luma_horz_a9q: + + stmfd sp!, {r4-r12, r14} @store register values to stack + vstmdb sp!, {d8-d15} @push neon registers to stack + ldr r5, [sp, #104] @Loads ht + sub r0, r0, #2 @pu1_src-2 + ldr r6, [sp, #108] @Loads wd + vmov.i8 d0, #5 @filter coeff + subs r12, r6, #8 @if wd=8 branch to loop_8 + vmov.i8 d1, #20 @filter coeff + beq loop_8 + + subs r12, r6, #4 @if wd=4 branch to loop_4 + beq loop_4 + +loop_16: @when wd=16 + @// Processing row0 and row1 + vld1.8 {d2, d3, d4}, [r0], r2 @// Load row0 ;for checking loop + vext.8 d31, d2, d3, #5 @//extract a[5] (column1,row0) + vld1.8 {d5, d6, d7}, [r0], r2 @// Load row1 + vext.8 d30, d3, d4, #5 @//extract a[5] (column2,row0) + vaddl.u8 q4, d31, d2 @// a0 + a5 (column1,row0) + vext.8 d28, d5, d6, #5 @//extract a[5] (column1,row1) + vaddl.u8 q5, d30, d3 @// a0 + a5 (column2,row0) + vext.8 d27, d6, d7, #5 @//extract a[5] (column2,row1) + vaddl.u8 q7, d28, d5 @// a0 + a5 (column1,row1) + vext.8 d31, d2, d3, #2 @//extract a[2] (column1,row0) + vaddl.u8 q8, d27, d6 @// a0 + a5 (column2,row1) + vext.8 d30, d3, d4, #2 @//extract a[2] (column2,row0) + vmlal.u8 q4, d31, d1 @// a0 + a5 + 20a2 (column1,row0) + vext.8 d28, d5, d6, #2 @//extract a[2] (column1,row1) + vmlal.u8 q5, d30, d1 @// a0 + a5 + 20a2 (column2,row0) + vext.8 d27, d6, d7, #2 @//extract a[2] (column2,row1) + vmlal.u8 q7, d28, d1 @// a0 + a5 + 20a2 (column1,row1) + vext.8 d31, d2, d3, #3 @//extract a[3] (column1,row0) + vmlal.u8 q8, d27, d1 @// a0 + a5 + 20a2 (column2,row1) + vext.8 d30, d3, d4, #3 @//extract a[3] (column2,row0) + vmlal.u8 q4, d31, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row0) + vext.8 d28, d5, d6, #3 @//extract a[3] (column1,row1) + vmlal.u8 q5, d30, d1 @// a0 + a5 + 20a2 + 20a3 (column2,row0) + vext.8 d27, d6, d7, #3 @//extract a[3] (column2,row1) + vmlal.u8 q7, d28, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row1) + vext.8 d31, d2, d3, #1 @//extract a[1] (column1,row0) + vmlal.u8 q8, d27, d1 @// a0 + a5 + 20a2 + 20a3 (column2,row1) + vext.8 d30, d3, d4, #1 @//extract a[1] (column2,row0) + vmlsl.u8 q4, d31, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0) + vext.8 d28, d5, d6, #1 @//extract a[1] (column1,row1) + vmlsl.u8 q5, d30, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0) + vext.8 d27, d6, d7, #1 @//extract a[1] (column2,row1) + vmlsl.u8 q7, d28, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row1) + vext.8 d31, d2, d3, #4 @//extract a[4] (column1,row0) + vmlsl.u8 q8, d27, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row1) + vext.8 d30, d3, d4, #4 @//extract a[4] (column2,row0) + vmlsl.u8 q4, d31, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0) + vext.8 d28, d5, d6, #4 @//extract a[4] (column1,row1) + vmlsl.u8 q5, d30, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0) + vext.8 d27, d6, d7, #4 @//extract a[4] (column2,row1) + vmlsl.u8 q7, d28, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row1) + vmlsl.u8 q8, d27, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row1) + vqrshrun.s16 d20, q4, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0) + vqrshrun.s16 d21, q5, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0) + vext.8 d31, d2, d3, #5 @//extract a[5] (column1,row2) + vst1.8 {d20, d21}, [r1], r3 @//Store dest row0 + vqrshrun.s16 d23, q7, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row1) + vext.8 d30, d3, d4, #5 @//extract a[5] (column2,row2) + vqrshrun.s16 d24, q8, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row1) + vst1.8 {d23, d24}, [r1], r3 @//Store dest row1 + subs r5, r5, #2 @ 2 rows done, decrement by 2 + + beq end_func + b loop_16 @ loop if height == 8 or 16 + +loop_8: +@// Processing row0 and row1 + vld1.8 {d5, d6}, [r0], r2 @// Load row1 + vext.8 d28, d5, d6, #5 @//extract a[5] (column1,row1) + vld1.8 {d2, d3}, [r0], r2 @// Load row0 + vext.8 d25, d5, d6, #2 @//extract a[2] (column1,row1) + vext.8 d31, d2, d3, #5 @//extract a[5] (column1,row0) + vext.8 d24, d5, d6, #3 @//extract a[3] (column1,row1) + vext.8 d23, d5, d6, #1 @//extract a[1] (column1,row1) + vext.8 d22, d5, d6, #4 @//extract a[4] (column1,row1) + vaddl.u8 q7, d28, d5 @// a0 + a5 (column1,row1) + vext.8 d29, d2, d3, #3 @//extract a[3] (column1,row0) + vmlal.u8 q7, d25, d1 @// a0 + a5 + 20a2 (column1,row1) + vmlal.u8 q7, d24, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row1) + vmlsl.u8 q7, d23, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row1) + vmlsl.u8 q7, d22, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row1) + vext.8 d30, d2, d3, #2 @//extract a[2] (column1,row0) + vaddl.u8 q4, d31, d2 @// a0 + a5 (column1,row0) + vext.8 d27, d2, d3, #1 @//extract a[1] (column1,row0) + vext.8 d26, d2, d3, #4 @//extract a[4] (column1,row0) + vmlal.u8 q4, d29, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row0) + vmlal.u8 q4, d30, d1 @// a0 + a5 + 20a2 (column1,row0) + vmlsl.u8 q4, d27, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0) + vmlsl.u8 q4, d26, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0) + vqrshrun.s16 d23, q7, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0) + vst1.8 {d23}, [r1], r3 @//Store dest row0 + vqrshrun.s16 d20, q4, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row1) + vst1.8 {d20}, [r1], r3 @//Store dest row1 + subs r5, r5, #2 @ 2 rows done, decrement by 2 + + beq end_func @ Branch if height==4 + + b loop_8 @looping if height =8 or 16 + +loop_4: + vld1.8 {d5, d6}, [r0], r2 @// Load row1 + vext.8 d28, d5, d6, #5 @//extract a[5] (column1,row1) + vld1.8 {d2, d3}, [r0], r2 @// Load row0 + vext.8 d25, d5, d6, #2 @//extract a[2] (column1,row1) + vext.8 d31, d2, d3, #5 @//extract a[5] (column1,row0) + vaddl.u8 q7, d28, d5 @// a0 + a5 (column1,row1) + vext.8 d24, d5, d6, #3 @//extract a[3] (column1,row1) + vext.8 d23, d5, d6, #1 @//extract a[1] (column1,row1) + vext.8 d22, d5, d6, #4 @//extract a[4] (column1,row1) + vext.8 d29, d2, d3, #3 @//extract a[3] (column1,row0) + vmlal.u8 q7, d25, d1 @// a0 + a5 + 20a2 (column1,row1) + vmlal.u8 q7, d24, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row1) + vmlsl.u8 q7, d23, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row1) + vmlsl.u8 q7, d22, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row1) + vaddl.u8 q4, d31, d2 @// a0 + a5 (column1,row0) + vext.8 d30, d2, d3, #2 @//extract a[2] (column1,row0) + vext.8 d27, d2, d3, #1 @//extract a[1] (column1,row0) + vext.8 d26, d2, d3, #4 @//extract a[4] (column1,row0) + vmlal.u8 q4, d29, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row0) + vmlal.u8 q4, d30, d1 @// a0 + a5 + 20a2 (column1,row0) + vmlsl.u8 q4, d27, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0) + vmlsl.u8 q4, d26, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0) + vqrshrun.s16 d23, q7, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0) + vst1.32 d23[0], [r1], r3 @//Store dest row0 + vqrshrun.s16 d20, q4, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row1) + vst1.32 d20[0], [r1], r3 @//Store dest row1 + subs r5, r5, #2 @ 2 rows done, decrement by 2 + beq end_func + + b loop_4 + +end_func: + vldmia sp!, {d8-d15} @ Restore neon registers that were saved + ldmfd sp!, {r4-r12, pc} @Restoring registers from stack + + diff --git a/common/arm/ih264_inter_pred_filters_luma_vert_a9q.s b/common/arm/ih264_inter_pred_filters_luma_vert_a9q.s new file mode 100755 index 0000000..5b29e02 --- /dev/null +++ b/common/arm/ih264_inter_pred_filters_luma_vert_a9q.s @@ -0,0 +1,301 @@ +@/****************************************************************************** +@ * +@ * Copyright (C) 2015 The Android Open Source Project +@ * +@ * Licensed under the Apache License, Version 2.0 (the "License"); +@ * you may not use this file except in compliance with the License. +@ * You may obtain a copy of the License at: +@ * +@ * http://www.apache.org/licenses/LICENSE-2.0 +@ * +@ * Unless required by applicable law or agreed to in writing, software +@ * distributed under the License is distributed on an "AS IS" BASIS, +@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ * See the License for the specific language governing permissions and +@ * limitations under the License. +@ * +@ ***************************************************************************** +@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +@*/ +@/** +@****************************************************************************** +@* @file +@* ih264_inter_pred_luma_vert_a9q.s +@* +@* @brief +@* Contains function definitions for inter prediction interpolation. +@* +@* @author +@* Ittiam +@* +@* @par List of Functions: +@* +@* - ih264_inter_pred_luma_vert_a9q() +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ + +@/* All the functions here are replicated from ih264_inter_pred_filters.c +@ + +@/** +@/** +@/** +@ ******************************************************************************* +@ * +@ * @brief +@ * Interprediction luma filter for vertical input +@ * +@ * @par Description: +@ * Applies a 6 tap vertcal filter.The output is clipped to 8 bits +@ * sec 8.4.2.2.1 titled "Luma sample interpolation process" +@ * +@ * @param[in] pu1_src +@ * UWORD8 pointer to the source +@ * +@ * @param[out] pu1_dst +@ * UWORD8 pointer to the destination +@ * +@ * @param[in] src_strd +@ * integer source stride +@ * +@ * @param[in] dst_strd +@ * integer destination stride +@ * +@ * @param[in] ht +@ * integer height of the array +@ * +@ * @param[in] wd +@ * integer width of the array +@ * +@ * @returns +@ * +@ * @remarks +@ * None +@ * +@ ******************************************************************************* + +@void ih264_inter_pred_luma_vert ( +@ UWORD8 *pu1_src, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd, +@ WORD32 dst_strd, +@ WORD32 ht, +@ WORD32 wd ) + +@**************Variables Vs Registers***************************************** +@ r0 => *pu1_src +@ r1 => *pu1_dst +@ r2 => src_strd +@ r3 => dst_strd +@ r5 => ht +@ r6 => wd + +.text +.p2align 2 + + + .global ih264_inter_pred_luma_vert_a9q + +ih264_inter_pred_luma_vert_a9q: + + stmfd sp!, {r4-r12, r14} @store register values to stack + vstmdb sp!, {d8-d15} @push neon registers to stack + ldr r5, [sp, #104] @Loads ht + sub r0, r0, r2, lsl #1 @pu1_src-2*src_strd + ldr r6, [sp, #108] @Loads wd + vmov.u16 q11, #20 @ Filter coeff 0x14 into Q11 + + subs r12, r6, #8 @if wd=8 branch to loop_8 + vmov.u16 q12, #5 @ Filter coeff 0x5 into Q12 + beq loop_8 + + subs r12, r6, #4 @if wd=4 branch to loop_4 + beq loop_4 + +loop_16: @when wd=16 + + vld1.u32 {q0}, [r0], r2 @ Vector load from src[0_0] + vld1.u32 {q1}, [r0], r2 @ Vector load from src[1_0] + vld1.u32 {q2}, [r0], r2 @ Vector load from src[2_0] + vld1.u32 {q3}, [r0], r2 @ Vector load from src[3_0] + vld1.u32 {q4}, [r0], r2 @ Vector load from src[4_0] + vaddl.u8 q6, d4, d6 @ temp1 = src[2_0] + src[3_0] + vld1.u32 {q5}, [r0], r2 @ Vector load from src[5_0] + + vaddl.u8 q7, d0, d10 @ temp = src[0_0] + src[5_0] + vaddl.u8 q8, d2, d8 @ temp2 = src[1_0] + src[4_0] + vmla.u16 q7, q6, q11 @ temp += temp1 * 20 + vaddl.u8 q10, d1, d11 @ temp4 = src[0_8] + src[5_8] + vaddl.u8 q9, d5, d7 @ temp3 = src[2_8] + src[3_8] + vmla.u16 q10, q9, q11 @ temp4 += temp3 * 20 + vld1.u32 {q0}, [r0], r2 + vaddl.u8 q13, d3, d9 @ temp5 = src[1_8] + src[4_8] + vaddl.u8 q6, d6, d8 + vmls.u16 q7, q8, q12 @ temp -= temp2 * 5 + vaddl.u8 q8, d2, d0 + vaddl.u8 q9, d4, d10 + vmla.u16 q8, q6, q11 + vmls.u16 q10, q13, q12 @ temp4 -= temp5 * 5 + vaddl.u8 q13, d5, d11 + vaddl.u8 q6, d7, d9 + vqrshrun.s16 d30, q7, #5 @ dst[0_0] = CLIP_U8((temp +16) >> 5) + vaddl.u8 q7, d3, d1 + vld1.u32 {q1}, [r0], r2 + vmla.u16 q7, q6, q11 + vmls.u16 q8, q9, q12 + vqrshrun.s16 d31, q10, #5 @ dst[0_8] = CLIP_U8((temp4 +16) >> 5) + vaddl.u8 q9, d4, d2 + vaddl.u8 q6, d8, d10 + + vst1.u32 {q15}, [r1], r3 @ Vector store to dst[0_0] + vmla.u16 q9, q6, q11 + vaddl.u8 q10, d6, d0 + vmls.u16 q7, q13, q12 + vqrshrun.s16 d30, q8, #5 + vaddl.u8 q6, d9, d11 + vaddl.u8 q8, d5, d3 + vaddl.u8 q13, d7, d1 + vmla.u16 q8, q6, q11 + vmls.u16 q9, q10, q12 + vld1.u32 {q2}, [r0], r2 + + vqrshrun.s16 d31, q7, #5 + vaddl.u8 q6, d10, d0 + vaddl.u8 q7, d6, d4 + vaddl.u8 q10, d8, d2 + vmla.u16 q7, q6, q11 + vmls.u16 q8, q13, q12 + vst1.u32 {q15}, [r1], r3 @store row 1 + vqrshrun.s16 d30, q9, #5 + vaddl.u8 q9, d7, d5 + vaddl.u8 q6, d11, d1 + vmla.u16 q9, q6, q11 + vaddl.u8 q13, d9, d3 + vmls.u16 q7, q10, q12 + + vqrshrun.s16 d31, q8, #5 + vmls.u16 q9, q13, q12 + vaddl.u8 q6, d0, d2 @ temp1 = src[2_0] + src[3_0] + vst1.u32 {q15}, [r1], r3 @store row 2 + vaddl.u8 q8, d10, d4 @ temp2 = src[1_0] + src[4_0] + vaddl.u8 q10, d9, d7 @ temp4 = src[0_8] + src[5_8] + vqrshrun.s16 d30, q7, #5 + vaddl.u8 q13, d5, d11 @ temp5 = src[1_8] + src[4_8] + vaddl.u8 q7, d8, d6 @ temp = src[0_0] + src[5_0] + vqrshrun.s16 d31, q9, #5 + vmla.u16 q7, q6, q11 @ temp += temp1 * 20 + vaddl.u8 q9, d1, d3 @ temp3 = src[2_8] + src[3_8] + vst1.u32 {q15}, [r1], r3 @store row 3 + subs r5, r5, #4 @ 4 rows processed, decrement by 4 + subne r0, r0 , r2, lsl #2 + subne r0, r0, r2 + beq end_func @ Branch if height==4 + + b loop_16 @ looping if height = 8 or 16 + +loop_8: +@// Processing row0 and row1 + + vld1.u32 d0, [r0], r2 @ Vector load from src[0_0] + vld1.u32 d1, [r0], r2 @ Vector load from src[1_0] + vld1.u32 d2, [r0], r2 @ Vector load from src[2_0] + vld1.u32 d3, [r0], r2 @ Vector load from src[3_0] + vld1.u32 d4, [r0], r2 @ Vector load from src[4_0] + vld1.u32 d5, [r0], r2 @ Vector load from src[5_0] + + vaddl.u8 q3, d2, d3 @ temp1 = src[2_0] + src[3_0] + vaddl.u8 q4, d0, d5 @ temp = src[0_0] + src[5_0] + vaddl.u8 q5, d1, d4 @ temp2 = src[1_0] + src[4_0] + vmla.u16 q4, q3, q11 @ temp += temp1 * 20 + vld1.u32 d6, [r0], r2 + vaddl.u8 q7, d3, d4 + vaddl.u8 q8, d1, d6 + vaddl.u8 q9, d2, d5 + vmls.u16 q4, q5, q12 @ temp -= temp2 * 5 + vmla.u16 q8, q7, q11 + vld1.u32 d7, [r0], r2 + vaddl.u8 q10, d4, d5 + vaddl.u8 q6, d2, d7 + vaddl.u8 q5, d3, d6 + vmls.u16 q8, q9, q12 + vqrshrun.s16 d26, q4, #5 @ dst[0_0] = CLIP_U8( (temp + 16) >> 5) + vmla.u16 q6, q10, q11 + vld1.u32 d0, [r0], r2 + vaddl.u8 q7, d5, d6 + vqrshrun.s16 d27, q8, #5 + vaddl.u8 q10, d3, d0 + vmls.u16 q6, q5, q12 + vst1.u32 d26, [r1], r3 @ Vector store to dst[0_0] + vaddl.u8 q9, d4, d7 + vmla.u16 q10, q7, q11 + vst1.u32 d27, [r1], r3 + vqrshrun.s16 d28, q6, #5 + vst1.u32 d28, [r1], r3 + vmls.u16 q10, q9, q12 + vqrshrun.s16 d29, q10, #5 + vst1.u32 d29, [r1], r3 @store row 3 + + subs r5, r5, #4 @ 4 rows processed, decrement by 4 + subne r0, r0 , r2, lsl #2 + subne r0, r0, r2 + beq end_func @ Branch if height==4 + + b loop_8 @looping if height == 8 or 16 + + +loop_4: +@// Processing row0 and row1 + + vld1.u32 d0[0], [r0], r2 @ Vector load from src[0_0] + vld1.u32 d1[0], [r0], r2 @ Vector load from src[1_0] + vld1.u32 d2[0], [r0], r2 @ Vector load from src[2_0] + vld1.u32 d3[0], [r0], r2 @ Vector load from src[3_0] + vld1.u32 d4[0], [r0], r2 @ Vector load from src[4_0] + vld1.u32 d5[0], [r0], r2 @ Vector load from src[5_0] + + vaddl.u8 q3, d2, d3 @ temp1 = src[2_0] + src[3_0] + vaddl.u8 q4, d0, d5 @ temp = src[0_0] + src[5_0] + vaddl.u8 q5, d1, d4 @ temp2 = src[1_0] + src[4_0] + vmla.u16 q4, q3, q11 @ temp += temp1 * 20 + vld1.u32 d6[0], [r0], r2 + vaddl.u8 q7, d3, d4 + vaddl.u8 q8, d1, d6 + vaddl.u8 q9, d2, d5 + vmls.u16 q4, q5, q12 @ temp -= temp2 * 5 + vld1.u32 d7[0], [r0], r2 + vmla.u16 q8, q7, q11 + vaddl.u8 q10, d4, d5 + vaddl.u8 q6, d2, d7 + vaddl.u8 q5, d3, d6 + vmls.u16 q8, q9, q12 + vqrshrun.s16 d26, q4, #5 @ dst[0_0] = CLIP_U8( (temp + 16) >> 5) + vmla.u16 q6, q10, q11 + vld1.u32 d0[0], [r0], r2 + vaddl.u8 q7, d5, d6 + vqrshrun.s16 d27, q8, #5 + vaddl.u8 q10, d3, d0 + vmls.u16 q6, q5, q12 + vst1.u32 d26[0], [r1], r3 @ Vector store to dst[0_0] + vaddl.u8 q9, d4, d7 + vmla.u16 q10, q7, q11 + vst1.u32 d27[0], [r1], r3 + vqrshrun.s16 d28, q6, #5 + vst1.u32 d28[0], [r1], r3 + vmls.u16 q10, q9, q12 + vqrshrun.s16 d29, q10, #5 + vst1.u32 d29[0], [r1], r3 @store row 3 + + subs r5, r5, #8 + subeq r0, r0, r2, lsl #2 + subeq r0, r0, r2 + beq loop_4 @ Loop if height==8 + +end_func: + vldmia sp!, {d8-d15} @ Restore neon registers that were saved + ldmfd sp!, {r4-r12, pc} @Restoring registers from stack + + diff --git a/common/arm/ih264_inter_pred_luma_bilinear_a9q.s b/common/arm/ih264_inter_pred_luma_bilinear_a9q.s new file mode 100755 index 0000000..6a3c83d --- /dev/null +++ b/common/arm/ih264_inter_pred_luma_bilinear_a9q.s @@ -0,0 +1,398 @@ +@/****************************************************************************** +@ * +@ * Copyright (C) 2015 The Android Open Source Project +@ * +@ * Licensed under the Apache License, Version 2.0 (the "License"); +@ * you may not use this file except in compliance with the License. +@ * You may obtain a copy of the License at: +@ * +@ * http://www.apache.org/licenses/LICENSE-2.0 +@ * +@ * Unless required by applicable law or agreed to in writing, software +@ * distributed under the License is distributed on an "AS IS" BASIS, +@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ * See the License for the specific language governing permissions and +@ * limitations under the License. +@ * +@ ***************************************************************************** +@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +@*/ +@/** +@****************************************************************************** +@* @file +@* ih264_inter_pred_luma_bilinear_a9q.s +@* +@* @brief +@* Contains function definitions for inter prediction interpolation. +@* +@* @author +@* Ittiam +@* +@* @par List of Functions: +@* +@* - ih264_inter_pred_luma_bilinear_a9q() +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ + +@/* All the functions here are replicated from ih264_inter_pred_filters.c +@ + +@/** +@/** +@/** +@ ******************************************************************************* +@ * function:ih264_inter_pred_luma_bilinear +@ * +@* @brief +@* This routine applies the bilinear filter to the predictors . +@* The filtering operation is described in +@* sec 8.4.2.2.1 titled "Luma sample interpolation process" +@* +@* @par Description: +@\note +@* This function is called to obtain pixels lying at the following +@* locations (1/4,1), (3/4,1),(1,1/4), (1,3/4) ,(1/4,1/2), (3/4,1/2),(1/2,1/4), (1/2,3/4),(3/4,1/4),(1/4,3/4),(3/4,3/4)&& (1/4,1/4) . +@* The function averages the two adjacent values from the two input arrays in horizontal direction. +@* +@* +@* @param[in] pu1_src1: +@* UWORD8 Pointer to the buffer containing the first input array. +@* +@* @param[in] pu1_src2: +@* UWORD8 Pointer to the buffer containing the second input array. +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination where the output of bilinear filter is stored. +@* +@* @param[in] src_strd1 +@* Stride of the first input buffer +@* +@* @param[in] src_strd2 +@* Stride of the second input buffer +@* +@* @param[in] dst_strd +@* integer destination stride of pu1_dst +@* +@* @param[in] ht +@* integer height of the array +@* +@* @param[in] wd +@* integer width of the array +@* +@* @returns +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ + +@void ih264_inter_pred_luma_bilinear(UWORD8 *pu1_src1, +@ UWORD8 *pu1_src2, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd1, +@ WORD32 src_strd2, +@ WORD32 dst_strd, +@ WORD32 height, +@ WORD32 width) +@ +@**************Variables Vs Registers***************************************** +@ r0 => *pu1_src1 +@ r1 => *pu1_src2 +@ r2 => *pu1_dst +@ r3 => src_strd1 +@ r4 => src_strd2 +@ r5 => dst_strd +@ r6 => height +@ r7 => width +@ +.text +.p2align 2 + + .global ih264_inter_pred_luma_bilinear_a9q + +ih264_inter_pred_luma_bilinear_a9q: + + + + stmfd sp!, {r4-r12, r14} @store register values to stack + vstmdb sp!, {d8-d15} @push neon registers to stack + ldr r4, [sp, #104] + ldr r5, [sp, #108] @ + ldr r6, [sp, #112] + ldr r7, [sp, #116] + + subs r12, r7, #4 @if wd=4 branch to loop_4 + beq loop_4 + subs r12, r7, #8 @if wd=8 branch to loop_8 + beq loop_8 + +loop_16: @when wd=16 + + vld1.8 {q0}, [r0], r3 @// Load row0 ;src1 + vld1.8 {q2}, [r1], r4 @// Load row0 ;src2 + vld1.8 {q1}, [r0], r3 @// Load row1 ;src1 + vaddl.u8 q10, d0, d4 + vld1.8 {q3}, [r1], r4 @// Load row1 ;src2 + vaddl.u8 q11, d1, d5 + vld1.8 {q4}, [r0], r3 @// Load row2 ;src1 + vaddl.u8 q12, d2, d6 + vld1.8 {q5}, [r0], r3 @// Load row3 ;src1 + vaddl.u8 q13, d3, d7 + vld1.8 {q6}, [r1], r4 @// Load row2 ;src2 + vaddl.u8 q8, d8, d12 + vld1.8 {q7}, [r1], r4 @// Load row3 ;src2 + vaddl.u8 q9, d9, d13 + vqrshrun.s16 d28, q10, #1 + vqrshrun.s16 d29, q11, #1 + vaddl.u8 q10, d10, d14 + vqrshrun.s16 d30, q12, #1 + vqrshrun.s16 d31, q13, #1 + vst1.8 {q14}, [r2], r5 @//Store dest row0 + vaddl.u8 q11, d11, d15 + vst1.8 {q15}, [r2], r5 @//Store dest row1 + vqrshrun.s16 d28, q8, #1 + vld1.8 {q0}, [r0], r3 @// Load row4 ;src1 + vqrshrun.s16 d29, q9, #1 + vld1.8 {q1}, [r0], r3 @// Load row5 ;src1 + vqrshrun.s16 d30, q10, #1 + vld1.8 {q2}, [r1], r4 @// Load row4 ;src2 + vqrshrun.s16 d31, q11, #1 + vld1.8 {q3}, [r1], r4 @// Load row5 ;src2 + vaddl.u8 q10, d0, d4 + vst1.8 {q14}, [r2], r5 @//Store dest row2 + vaddl.u8 q13, d3, d7 + vst1.8 {q15}, [r2], r5 @//Store dest row3 + vaddl.u8 q11, d1, d5 + vld1.8 {q4}, [r0], r3 @// Load row6 ;src1 + vaddl.u8 q12, d2, d6 + vld1.8 {q5}, [r0], r3 @// Load row7 ;src1 + vqrshrun.s16 d28, q10, #1 + vld1.8 {q6}, [r1], r4 @// Load row6 ;src2 + vqrshrun.s16 d29, q11, #1 + vld1.8 {q7}, [r1], r4 @// Load row7 ;src2 + vaddl.u8 q8, d8, d12 + vaddl.u8 q9, d9, d13 + vaddl.u8 q10, d10, d14 + vqrshrun.s16 d30, q12, #1 + vqrshrun.s16 d31, q13, #1 + vst1.8 {q14}, [r2], r5 @//Store dest row4 + vaddl.u8 q11, d11, d15 + vst1.8 {q15}, [r2], r5 @//Store dest row5 + vqrshrun.s16 d28, q8, #1 + vqrshrun.s16 d30, q10, #1 + vqrshrun.s16 d29, q9, #1 + vld1.8 {q2}, [r1], r4 @// Load row8 ;src2 + vqrshrun.s16 d31, q11, #1 + vst1.8 {q14}, [r2], r5 @//Store dest row6 + subs r12, r6, #8 + vst1.8 {q15}, [r2], r5 @//Store dest row7 + + beq end_func @ end function if ht=8 + + vld1.8 {q0}, [r0], r3 @// Load row8 ;src1 + vaddl.u8 q10, d0, d4 + vld1.8 {q1}, [r0], r3 @// Load row9 ;src1 + vaddl.u8 q11, d1, d5 + vld1.8 {q3}, [r1], r4 @// Load row9 ;src2 + vqrshrun.s16 d28, q10, #1 + vld1.8 {q4}, [r0], r3 @// Load row10 ;src1 + vqrshrun.s16 d29, q11, #1 + vld1.8 {q5}, [r0], r3 @// Load row11 ;src1 + vaddl.u8 q12, d2, d6 + vld1.8 {q6}, [r1], r4 @// Load row10 ;src2 + vaddl.u8 q13, d3, d7 + vld1.8 {q7}, [r1], r4 @// Load row11 ;src2 + vaddl.u8 q8, d8, d12 + vaddl.u8 q9, d9, d13 + vaddl.u8 q10, d10, d14 + vqrshrun.s16 d30, q12, #1 + vst1.8 {q14}, [r2], r5 @//Store dest row8 + vqrshrun.s16 d31, q13, #1 + vst1.8 {q15}, [r2], r5 @//Store dest row9 + vqrshrun.s16 d28, q8, #1 + vld1.8 {q0}, [r0], r3 @// Load row12 ;src1 + vaddl.u8 q11, d11, d15 + vld1.8 {q1}, [r0], r3 @// Load row13 ;src1 + vqrshrun.s16 d29, q9, #1 + vld1.8 {q2}, [r1], r4 @// Load row12 ;src2 + vqrshrun.s16 d30, q10, #1 + vld1.8 {q3}, [r1], r4 @// Load row13 ;src2 + vqrshrun.s16 d31, q11, #1 + vst1.8 {q14}, [r2], r5 @//Store dest row10 + vaddl.u8 q10, d0, d4 + vst1.8 {q15}, [r2], r5 @//Store dest row11 + vaddl.u8 q11, d1, d5 + vld1.8 {q4}, [r0], r3 @// Load row14 ;src1 + vaddl.u8 q13, d3, d7 + vld1.8 {q5}, [r0], r3 @// Load row15 ;src1 + vaddl.u8 q12, d2, d6 + vld1.8 {q6}, [r1], r4 @// Load row14 ;src2 + vaddl.u8 q8, d8, d12 + vld1.8 {q7}, [r1], r4 @// Load row15 ;src2 + vaddl.u8 q9, d9, d13 + vqrshrun.s16 d28, q10, #1 + vqrshrun.s16 d29, q11, #1 + vaddl.u8 q10, d10, d14 + vst1.8 {q14}, [r2], r5 @//Store dest row12 + vqrshrun.s16 d30, q12, #1 + vqrshrun.s16 d31, q13, #1 + vaddl.u8 q11, d11, d15 + vst1.8 {q15}, [r2], r5 @//Store dest row13 + vqrshrun.s16 d28, q8, #1 + vqrshrun.s16 d29, q9, #1 + vqrshrun.s16 d30, q10, #1 + vst1.8 {q14}, [r2], r5 @//Store dest row14 + vqrshrun.s16 d31, q11, #1 + vst1.8 {q15}, [r2], r5 @//Store dest row15 + b end_func + + + +loop_8: @wd=8; + vld1.8 {d0}, [r0], r3 @// Load row0 ;src1 + vld1.8 {d4}, [r1], r4 @// Load row0 ;src2 + vld1.8 {d1}, [r0], r3 @// Load row1 ;src1 + vaddl.u8 q10, d0, d4 + vld1.8 {d5}, [r1], r4 @// Load row1 ;src2 + vld1.8 {d2}, [r0], r3 @// Load row2 ;src1 + vqrshrun.s16 d28, q10, #1 + vld1.8 {d6}, [r1], r4 @// Load row2 ;src2 + vaddl.u8 q11, d1, d5 + vld1.8 {d3}, [r0], r3 @// Load row3 ;src1 + vaddl.u8 q12, d2, d6 + vst1.8 {d28}, [r2], r5 @//Store dest row0 + vqrshrun.s16 d29, q11, #1 + vld1.8 {d7}, [r1], r4 @// Load row3 ;src2 + vqrshrun.s16 d30, q12, #1 + vst1.8 {d29}, [r2], r5 @//Store dest row1 + vaddl.u8 q13, d3, d7 + vst1.8 {d30}, [r2], r5 @//Store dest row2 + vqrshrun.s16 d31, q13, #1 + subs r12, r6, #4 + vst1.8 {d31}, [r2], r5 @//Store dest row3 + beq end_func @ end function if ht=4 + + vld1.8 {d12}, [r1], r4 @// Load row4 ;src2 + vld1.8 {d8}, [r0], r3 @// Load row4 ;src1 + vld1.8 {d9}, [r0], r3 @// Load row5 ;src1 + vaddl.u8 q8, d8, d12 + vld1.8 {d13}, [r1], r4 @// Load row5 ;src2 + vld1.8 {d10}, [r0], r3 @// Load row6;src1 + vaddl.u8 q9, d9, d13 + vld1.8 {d14}, [r1], r4 @// Load row6 ;src2 + vqrshrun.s16 d28, q8, #1 + vld1.8 {d11}, [r0], r3 @// Load row7 ;src1 + vqrshrun.s16 d29, q9, #1 + vst1.8 {d28}, [r2], r5 @//Store dest row4 + vaddl.u8 q10, d10, d14 + vst1.8 {d29}, [r2], r5 @//Store dest row5 + vqrshrun.s16 d30, q10, #1 + vld1.8 {d15}, [r1], r4 @// Load row7 ;src2 + vaddl.u8 q11, d11, d15 + vst1.8 {d30}, [r2], r5 @//Store dest row6 + vqrshrun.s16 d31, q11, #1 + subs r12, r6, #8 + vst1.8 {d31}, [r2], r5 @//Store dest row7 + beq end_func @ end function if ht=8 + + vld1.8 {d0}, [r0], r3 @// Load row8 ;src1 + vld1.8 {d4}, [r1], r4 @// Load row8 ;src2 + vld1.8 {d1}, [r0], r3 @// Load row9 ;src1 + vaddl.u8 q10, d0, d4 + vld1.8 {d5}, [r1], r4 @// Load row9 ;src2 + vld1.8 {d2}, [r0], r3 @// Load row10 ;src1 + vaddl.u8 q11, d1, d5 + vld1.8 {d6}, [r1], r4 @// Load row10 ;src2 + vqrshrun.s16 d28, q10, #1 + vld1.8 {d3}, [r0], r3 @// Load row11 ;src1 + vaddl.u8 q12, d2, d6 + vld1.8 {d7}, [r1], r4 @// Load row11 ;src2 + vqrshrun.s16 d29, q11, #1 + vld1.8 {d8}, [r0], r3 @// Load row12 ;src1 + vaddl.u8 q13, d3, d7 + vst1.8 {d28}, [r2], r5 @//Store dest row8 + vqrshrun.s16 d30, q12, #1 + vld1.8 {d12}, [r1], r4 @// Load row12 ;src2 + vqrshrun.s16 d31, q13, #1 + vst1.8 {d29}, [r2], r5 @//Store dest row9 + vaddl.u8 q8, d8, d12 + vld1.8 {d9}, [r0], r3 @// Load row13 ;src1 + vqrshrun.s16 d28, q8, #1 + vld1.8 {d13}, [r1], r4 @// Load row13 ;src2 + vld1.8 {d10}, [r0], r3 @// Load row14;src1 + vaddl.u8 q9, d9, d13 + vld1.8 {d11}, [r0], r3 @// Load row15 ;src1 + vld1.8 {d14}, [r1], r4 @// Load row14 ;src2 + vqrshrun.s16 d29, q9, #1 + vld1.8 {d15}, [r1], r4 @// Load roW15 ;src2 + vaddl.u8 q10, d10, d14 + vst1.8 {d30}, [r2], r5 @//Store dest row10 + vaddl.u8 q11, d11, d15 + vst1.8 {d31}, [r2], r5 @//Store dest row11 + vqrshrun.s16 d30, q10, #1 + vst1.8 {d28}, [r2], r5 @//Store dest row12 + vqrshrun.s16 d31, q11, #1 + vst1.8 {d29}, [r2], r5 @//Store dest row13 + vst1.8 {d30}, [r2], r5 @//Store dest row14 + vst1.8 {d31}, [r2], r5 @//Store dest row15 + + b end_func + + + +loop_4: + vld1.32 d0[0], [r0], r3 @// Load row0 ;src1 + vld1.32 d4[0], [r1], r4 @// Load row0 ;src2 + vld1.32 d1[0], [r0], r3 @// Load row1 ;src1 + vaddl.u8 q10, d0, d4 + vld1.32 d5[0], [r1], r4 @// Load row1 ;src2 + vld1.32 d2[0], [r0], r3 @// Load row2 ;src1 + vqrshrun.s16 d28, q10, #1 + vld1.32 d6[0], [r1], r4 @// Load row2 ;src2 + vaddl.u8 q11, d1, d5 + vld1.32 d3[0], [r0], r3 @// Load row3 ;src1 + vaddl.u8 q12, d2, d6 + vst1.32 d28[0], [r2], r5 @//Store dest row0 + vqrshrun.s16 d29, q11, #1 + vld1.32 d7[0], [r1], r4 @// Load row3 ;src2 + vqrshrun.s16 d30, q12, #1 + vst1.32 d29[0], [r2], r5 @//Store dest row1 + vaddl.u8 q13, d3, d7 + vst1.32 d30[0], [r2], r5 @//Store dest row2 + vqrshrun.s16 d31, q13, #1 + subs r12, r6, #4 + vst1.32 d31[0], [r2], r5 @//Store dest row3 + beq end_func @ end function if ht=4 + + vld1.32 d12[0], [r1], r4 @// Load row4 ;src2 + vld1.32 d8[0], [r0], r3 @// Load row4 ;src1 + vld1.32 d9[0], [r0], r3 @// Load row5 ;src1 + vaddl.u8 q8, d8, d12 + vld1.32 d13[0], [r1], r4 @// Load row5 ;src2 + vld1.32 d10[0], [r0], r3 @// Load row6;src1 + vaddl.u8 q9, d9, d13 + vld1.32 d14[0], [r1], r4 @// Load row6 ;src2 + vqrshrun.s16 d28, q8, #1 + vld1.32 d11[0], [r0], r3 @// Load row7 ;src1 + vqrshrun.s16 d29, q9, #1 + vst1.32 d28[0], [r2], r5 @//Store dest row4 + vaddl.u8 q10, d10, d14 + vst1.32 d29[0], [r2], r5 @//Store dest row5 + vqrshrun.s16 d30, q10, #1 + vld1.32 d15[0], [r1], r4 @// Load row7 ;src2 + vaddl.u8 q11, d11, d15 + vst1.32 d30[0], [r2], r5 @//Store dest row6 + vqrshrun.s16 d31, q11, #1 + vst1.32 d31[0], [r2], r5 @//Store dest row7 + +end_func: + + vldmia sp!, {d8-d15} @ Restore neon registers that were saved + ldmfd sp!, {r4-r12, pc} @Restoring registers from stack + + diff --git a/common/arm/ih264_inter_pred_luma_copy_a9q.s b/common/arm/ih264_inter_pred_luma_copy_a9q.s new file mode 100755 index 0000000..8ba2fbf --- /dev/null +++ b/common/arm/ih264_inter_pred_luma_copy_a9q.s @@ -0,0 +1,253 @@ +@/****************************************************************************** +@ * +@ * Copyright (C) 2015 The Android Open Source Project +@ * +@ * Licensed under the Apache License, Version 2.0 (the "License"); +@ * you may not use this file except in compliance with the License. +@ * You may obtain a copy of the License at: +@ * +@ * http://www.apache.org/licenses/LICENSE-2.0 +@ * +@ * Unless required by applicable law or agreed to in writing, software +@ * distributed under the License is distributed on an "AS IS" BASIS, +@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ * See the License for the specific language governing permissions and +@ * limitations under the License. +@ * +@ ***************************************************************************** +@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +@*/ +@/** +@/** +@******************************************************************************* +@* +@* @brief +@* Interprediction luma function for copy +@* +@* @par Description: +@* Copies the array of width 'wd' and height 'ht' from the location pointed +@* by 'src' to the location pointed by 'dst' +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@* +@* @param[in] ht +@* integer height of the array +@* +@* @param[in] wd +@* integer width of the array +@* +@* @returns +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ +@void ih264_inter_pred_luma_copy ( +@ UWORD8 *pu1_src, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd, +@ WORD32 dst_strd, +@ WORD32 ht, +@ WORD32 wd ) + +@**************Variables Vs Registers***************************************** +@ r0 => *pu1_src +@ r1 => *pu1_dst +@ r2 => src_strd +@ r3 => dst_strd +@ r7 => ht +@ r12 => wd + +.text +.p2align 2 + + .global ih264_inter_pred_luma_copy_a9q + +ih264_inter_pred_luma_copy_a9q: + stmfd sp!, {r4-r12, r14} @stack stores the values of the arguments + vstmdb sp!, {d8-d15} @push neon registers to stack + ldr r12, [sp, #108] @Loads wd + ldr r7, [sp, #104] @Loads ht + cmp r7, #0 @checks ht == 0 + ble end_loops + tst r12, #15 @checks wd for multiples for 4 & 8 + beq core_loop_wd_16 + tst r12, #7 @checks wd for multiples for 4 & 8 + beq core_loop_wd_8 + sub r11, r12, #4 + +outer_loop_wd_4: + subs r4, r12, #0 @checks wd == 0 + ble end_inner_loop_wd_4 + +inner_loop_wd_4: + vld1.32 {d0[0]}, [r0] @vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp, 0) + add r5, r0, r2 @pu1_src_tmp += src_strd + add r6, r1, r3 @pu1_dst_tmp += dst_strd + vst1.32 {d0[0]}, [r1] @vst1_lane_u32((uint32_t *)pu1_dst_tmp, src_tmp, 0) + vld1.32 {d0[0]}, [r5], r2 @vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp, 0) + add r0, r0, #4 @pu1_src += 4 + vst1.32 {d0[0]}, [r6], r3 @vst1_lane_u32((uint32_t *)pu1_dst_tmp, src_tmp, 0) + vld1.32 {d0[0]}, [r5], r2 @vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp, 0) + subs r4, r4, #4 @(wd -4) + vst1.32 {d0[0]}, [r6], r3 @vst1_lane_u32((uint32_t *)pu1_dst_tmp, src_tmp, 0) + vld1.32 {d0[0]}, [r5], r2 @vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp, 0) + add r1, r1, #4 @pu1_dst += 4 + vst1.32 {d0[0]}, [r6], r3 @vst1_lane_u32((uint32_t *)pu1_dst_tmp, src_tmp, 0) + + bgt inner_loop_wd_4 + +end_inner_loop_wd_4: + subs r7, r7, #4 @ht - 4 + sub r0, r5, r11 @pu1_src = pu1_src_tmp + sub r1, r6, r11 @pu1_dst = pu1_dst_tmp + bgt outer_loop_wd_4 + +end_loops: + vldmia sp!, {d8-d15} @ Restore neon registers that were saved + ldmfd sp!, {r4-r12, r15} @Reload the registers from SP + + + +core_loop_wd_8: + sub r11, r12, #8 + +outer_loop_wd_8: + subs r4, r12, #0 @checks wd + ble end_inner_loop_wd_8 + +inner_loop_wd_8: + add r5, r0, r2 @pu1_src_tmp += src_strd + vld1.8 {d0}, [r0]! @vld1_u8(pu1_src_tmp) + add r6, r1, r3 @pu1_dst_tmp += dst_strd + vst1.8 {d0}, [r1]! @vst1_u8(pu1_dst_tmp, tmp_src) + vld1.8 {d1}, [r5], r2 @vld1_u8(pu1_src_tmp) + vst1.8 {d1}, [r6], r3 @vst1_u8(pu1_dst_tmp, tmp_src) + subs r4, r4, #8 @wd - 8(Loop condition) + vld1.8 {d2}, [r5], r2 @vld1_u8(pu1_src_tmp) + vst1.8 {d2}, [r6], r3 @vst1_u8(pu1_dst_tmp, tmp_src) + vld1.8 {d3}, [r5], r2 @vld1_u8(pu1_src_tmp) + vst1.8 {d3}, [r6], r3 @vst1_u8(pu1_dst_tmp, tmp_src) + bgt inner_loop_wd_8 + +end_inner_loop_wd_8: + subs r7, r7, #4 @ht -= 4 + sub r0, r5, r11 @pu1_src = pu1_src_tmp + sub r1, r6, r11 @pu1_dst = pu1_dst_tmp + bgt outer_loop_wd_8 + + vldmia sp!, {d8-d15} @ Restore neon registers that were saved + ldmfd sp!, {r4-r12, r15} @Reload the registers from SP + +core_loop_wd_16: + sub r11, r12, #16 + +outer_loop_wd_16: + subs r4, r12, #0 @checks wd + ble end_inner_loop_wd_16 + +inner_loop_wd_16: + add r5, r0, r2 @pu1_src_tmp += src_strd + vld1.8 {q0}, [r0]! @vld1_u8(pu1_src_tmp) + add r6, r1, r3 @pu1_dst_tmp += dst_strd + vst1.8 {q0}, [r1]! @vst1_u8(pu1_dst_tmp, tmp_src) + vld1.8 {q1}, [r5], r2 @vld1_u8(pu1_src_tmp) + vst1.8 {q1}, [r6], r3 @vst1_u8(pu1_dst_tmp, tmp_src) + subs r4, r4, #16 @wd - 8(Loop condition) + vld1.8 {q2}, [r5], r2 @vld1_u8(pu1_src_tmp) + vst1.8 {q2}, [r6], r3 @vst1_u8(pu1_dst_tmp, tmp_src) + vld1.8 {q3}, [r5], r2 @vld1_u8(pu1_src_tmp) + vst1.8 {q3}, [r6], r3 @vst1_u8(pu1_dst_tmp, tmp_src) + bgt inner_loop_wd_16 + +end_inner_loop_wd_16: + subs r7, r7, #4 @ht -= 4 + sub r0, r5, r11 @pu1_src = pu1_src_tmp + sub r1, r6, r11 @pu1_dst = pu1_dst_tmp + bgt outer_loop_wd_16 + + vldmia sp!, {d8-d15} @ Restore neon registers that were saved + ldmfd sp!, {r4-r12, r15} @Reload the registers from SP + + +@ /* +@ ******************************************************************************** +@ * +@ * @brief This function copies a 4x4 block to destination +@ * +@ * @par Description: +@ * Copies a 4x4 block to destination, where both src and dst are interleaved +@ * +@ * @param[in] pi2_src +@ * Source +@ * +@ * @param[in] pu1_out +@ * Output pointer +@ * +@ * @param[in] pred_strd, +@ * Prediction buffer stride +@ * +@ * @param[in] out_strd +@ * output buffer buffer Stride +@ * +@ * @returns none +@ * +@ * @remarks none +@ * Currently wd and height is not used, ie a 4x4 block is always copied +@ * +@ ******************************************************************************* +@ */ +@ void ih264_interleave_copy(WORD16 *pi2_src, +@ UWORD8 *pu1_out, +@ WORD32 pred_strd, +@ WORD32 out_strd +@ WORD32 wd +@ WORD32 ht) +@ Register Usage +@ r0 : pi2_src +@ r1 : pu1_out +@ r2 : src_strd +@ r3 : out_strd +@ Neon registers d0-d7, d16-d30 are used +@ No need for pushing arm and neon registers + + .global ih264_interleave_copy_a9 +ih264_interleave_copy_a9: + + vld1.u8 d2, [r0], r2 @load src plane 1 => d2 &pred palne 2 => d3 + vld1.u8 d3, [r0], r2 + vld1.u8 d4, [r0], r2 + vld1.u8 d5, [r0], r2 + + mov r0, r1 + + vld1.u8 d18, [r1], r3 @load out [8 bit size) -8 coeffs + vld1.u8 d19, [r1], r3 + vmov.u16 q15, #0x00ff + vld1.u8 d20, [r1], r3 + vld1.u8 d21, [r1], r3 + + vbit.u8 q9, q1, q15 + vbit.u8 q10, q2, q15 + + vst1.u8 d18, [r0], r3 @store out + vst1.u8 d19, [r0], r3 + vst1.u8 d20, [r0], r3 + vst1.u8 d21, [r0], r3 + + bx lr + + + diff --git a/common/arm/ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q.s b/common/arm/ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q.s new file mode 100755 index 0000000..43321a8 --- /dev/null +++ b/common/arm/ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q.s @@ -0,0 +1,441 @@ +@/****************************************************************************** +@ * +@ * Copyright (C) 2015 The Android Open Source Project +@ * +@ * Licensed under the Apache License, Version 2.0 (the "License"); +@ * you may not use this file except in compliance with the License. +@ * You may obtain a copy of the License at: +@ * +@ * http://www.apache.org/licenses/LICENSE-2.0 +@ * +@ * Unless required by applicable law or agreed to in writing, software +@ * distributed under the License is distributed on an "AS IS" BASIS, +@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ * See the License for the specific language governing permissions and +@ * limitations under the License. +@ * +@ ***************************************************************************** +@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +@*/ +@/** +@****************************************************************************** +@* @file +@* ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q.s +@* +@* @brief +@* Contains function definitions for inter prediction interpolation. +@* +@* @author +@* Mohit +@* +@* @par List of Functions: +@* +@* - ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q() +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ + +@/* All the functions here are replicated from ih264_inter_pred_filters.c +@ + +@/** +@/** +@/** +@******************************************************************************* +@* +@* @brief +@* This function implements a two stage cascaded six tap filter. It +@* applies the six tap filter in the vertical direction on the +@* predictor values, followed by applying the same filter in the +@* horizontal direction on the output of the first stage. The six tap +@* filtering operation is described in sec 8.4.2.2.1 titled "Luma sample +@* interpolation process" +@* +@* @par Description: +@* This function is called to obtain pixels lying at the following +@* location (1/2,1/2). The function interpolates +@* the predictors first in the horizontal direction and then in the +@* vertical direction to output the (1/2,1/2). +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@* @param[in] ht +@* integer height of the array +@* +@* @param[in] wd +@* integer width of the array +@* +@* @param[in] pu1_tmp: temporary buffer +@* +@* @param[in] dydx: x and y reference offset for qpel calculations: UNUSED in this function. +@* +@* @returns +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/; + +@void ih264_inter_pred_luma_horz_hpel_vert_hpel(UWORD8 *pu1_src, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd,, +@ WORD32 dst_strd, +@ WORD32 ht, +@ WORD32 wd, +@ UWORD8* pu1_tmp, +@ UWORD32 dydx) + +@**************Variables Vs Registers***************************************** +@ r0 => *pu1_src +@ r1 => *pu1_dst +@ r2 => src_strd +@ r3 => dst_strd +@ r8 => ht +@ r9 => wd + +.text +.p2align 2 + + .global ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q + +ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q: + + stmfd sp!, {r4-r12, r14} @store register values to stack + vstmdb sp!, {d8-d15} @push neon registers to stack + ldr r8, [sp, #104] @ loads ht + sub r0, r0, r2, lsl #1 @pu1_src-2*src_strd + sub r0, r0, #2 @pu1_src-2 + ldr r9, [sp, #108] @ loads wd + + vmov.s16 d0, #20 @ Filter coeff 20 + vmov.s16 d1, #5 @ Filter coeff 5 + subs r12, r9, #4 @if wd=4 branch to loop_4 + beq loop_4 + subs r12, r9, #8 @if wd=8 branch to loop_8 + beq loop_8 + + mov r10, #8 + sub r7, r3, r10 + @when wd=16 + +loop_16: + vld1.u32 {d2, d3, d4}, [r0], r2 @ Vector load from src[0_0] + vld1.u32 {d5, d6, d7}, [r0], r2 @ Vector load from src[1_0] + vld1.u32 {d8, d9, d10}, [r0], r2 @ Vector load from src[2_0] + vld1.u32 {d11, d12, d13}, [r0], r2 @ Vector load from src[3_0] + vld1.u32 {d14, d15, d16}, [r0], r2 @ Vector load from src[4_0] + vld1.u32 {d17, d18, d19}, [r0], r2 @ Vector load from src[5_0] + + @ vERTICAL FILTERING FOR ROW 0 + vaddl.u8 q10, d8, d11 @ temp1 = src[2_0] + src[3_0] + vaddl.u8 q12, d2, d17 @ temp2 = src[0_0] + src[5_0] + vaddl.u8 q11, d5, d14 @ temp = src[1_0] + src[4_0] + vaddl.u8 q13, d3, d18 @ temp2 = src[0_0] + src[5_0] + vmla.u16 q12, q10, d0[0] @ temp += temp1 * 20 + vmls.s16 q12, q11, d1[0] @ temp -= temp2 * 5 + vaddl.u8 q10, d6, d15 @ temp = src[1_0] + src[4_0] + vaddl.u8 q11, d9, d12 @ temp3 = src[2_0] + src[3_0] + vaddl.u8 q14, d4, d19 @ temp2 = src[0_0] + src[5_0] + vmla.u16 q13, q11, d0[0] @ temp4 += temp3 * 20 + vmls.s16 q13, q10, d1[0] @ temp -= temp2 * 5 + vaddl.u8 q11, d10, d13 @ temp3 = src[2_0] + src[3_0] + vaddl.u8 q10, d7, d16 @ temp = src[1_0] + src[4_0] + vmla.u16 q14, q11, d0[0] @ temp4 += temp3 * 20 + vmls.s16 q14, q10, d1[0] @ temp -= temp2 * 5 + vext.16 q10, q12, q13, #5 @//extract a[5] (column1) + + @Q12,Q13,Q14 HAVE VERTICAL FILTERED VALUES + @CASCADED FILTERING FOR ROW 0 + vext.16 q11, q12, q13, #2 @//extract a[2] (column1) + vaddl.s16 q1, d20, d24 @// a0 + a5 (column1) + vaddl.s16 q15, d21, d25 @// a0 + a5 (column1) + vmlal.s16 q1, d22, d0[0] @// a0 + a5 + 20a2 (column1) + vmlal.s16 q15, d23, d0[0] @// a0 + a5 + 20a2 (column1) + vext.16 q11, q12, q13, #1 @//extract a[1] (column1) + vext.16 q10, q12, q13, #3 @//extract a[3] (column1) + vmlsl.s16 q1, d22, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1) + vmlsl.s16 q15, d23, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1) + vmlal.s16 q1, d20, d0[0] @// a0 + a5 + 20a2 + 20a3 (column1) + vmlal.s16 q15, d21, d0[0] @// a0 + a5 + 20a2 + 20a3 (column1) + vext.16 q11, q12, q13, #4 @//extract a[4] (column1) + vext.16 q10, q13, q14, #5 @//extract a[5] (column2) + vmlsl.s16 q1, d22, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1) + vmlsl.s16 q15, d23, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1) + vqrshrun.s32 d22, q1, #10 + vqrshrun.s32 d23, q15, #10 + vqshrun.s16 d22, q11, #0 + vst1.u8 {d22}, [r1], r10 @//Store dest row0, column 1; (1/2,1/2) + vext.16 q11, q13, q14, #2 @//extract a[2] (column2) + vaddl.s16 q1, d20, d26 @// a0 + a5 (column2) + vaddl.s16 q15, d21, d27 @// a0 + a5 (column2) + vmlal.s16 q1, d22, d0[0] @// a0 + a5 + 20a2 (column2) + vmlal.s16 q15, d23, d0[0] @// a0 + a5 + 20a2 (column2) + vext.16 q10, q13, q14, #3 @//extract a[3] (column2) + vext.16 q11, q13, q14, #1 @//extract a[1] (column2) + vmlal.s16 q1, d20, d0[0] @// a0 + a5 + 20a2 + 20a3 (column2) + vmlal.s16 q15, d21, d0[0] @// a0 + a5 + 20a2 + 20a3 (column2) + vext.16 q10, q13, q14, #4 @//extract a[4] (column2) + vmlsl.s16 q1, d22, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (column2) + vmlsl.s16 q15, d23, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (column2) + vmlsl.s16 q1, d20, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2) + vmlsl.s16 q15, d21, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2) + vqrshrun.s32 d20, q1, #10 + vqrshrun.s32 d21, q15, #10 + vld1.u32 {d2, d3, d4}, [r0], r2 @ Vector load from src[6_0] + vqshrun.s16 d22, q10, #0 + vst1.u8 {d22}, [r1], r7 @//Store dest row0 ,column 2; (1/2,1/2) + + @ vERTICAL FILTERING FOR ROW 1 + vaddl.u8 q10, d11, d14 @ temp1 = src[2_0] + src[3_0] + vaddl.u8 q12, d5, d2 @ temp2 = src[0_0] + src[5_0] + vaddl.u8 q11, d8, d17 @ temp = src[1_0] + src[4_0] + vaddl.u8 q13, d6, d3 @ temp2 = src[0_0] + src[5_0] + vmla.u16 q12, q10, d0[0] @ temp += temp1 * 20 + vaddl.u8 q10, d9, d18 @ temp = src[1_0] + src[4_0] + vmls.s16 q12, q11, d1[0] @ temp -= temp2 * 5 + vaddl.u8 q11, d12, d15 @ temp3 = src[2_0] + src[3_0] + vaddl.u8 q14, d7, d4 @ temp2 = src[0_0] + src[5_0] + vmla.u16 q13, q11, d0[0] @ temp4 += temp3 * 20 + vaddl.u8 q11, d13, d16 @ temp3 = src[2_0] + src[3_0] + vmls.s16 q13, q10, d1[0] @ temp -= temp2 * 5 + vmla.u16 q14, q11, d0[0] @ temp4 += temp3 * 20 + vaddl.u8 q10, d10, d19 @ temp = src[1_0] + src[4_0] + vmls.s16 q14, q10, d1[0] @ temp -= temp2 * 5 + vext.16 q10, q12, q13, #5 @//extract a[5] (column1) + + @Q12,Q13,Q14 HAVE VERTICAL FILTERED VALUES + @CASCADED FILTERING FOR ROW 1 + vext.16 q11, q12, q13, #2 @//extract a[2] (column1) + vaddl.s16 q3, d20, d24 @// a0 + a5 (column1) + vaddl.s16 q15, d21, d25 @// a0 + a5 (column1) + vmlal.s16 q3, d22, d0[0] @// a0 + a5 + 20a2 (column1) + vmlal.s16 q15, d23, d0[0] @// a0 + a5 + 20a2 (column1) + vext.16 q11, q12, q13, #1 @//extract a[1] (column1) + vext.16 q10, q12, q13, #3 @//extract a[3] (column1) + vmlsl.s16 q3, d22, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1) + vmlsl.s16 q15, d23, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1) + vmlal.s16 q3, d20, d0[0] @// a0 + a5 + 20a2 + 20a3 (column1) + vmlal.s16 q15, d21, d0[0] @// a0 + a5 + 20a2 + 20a3 (column1) + vext.16 q11, q12, q13, #4 @//extract a[4] (column1) + vext.16 q10, q13, q14, #5 @//extract a[5] (column2) + vmlsl.s16 q3, d22, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1) + vmlsl.s16 q15, d23, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1) + vqrshrun.s32 d22, q3, #10 + vqrshrun.s32 d23, q15, #10 + vqshrun.s16 d22, q11, #0 + vst1.u8 {d22}, [r1], r10 @//Store dest row1, column 1; (1/2,1/2) + vext.16 q11, q13, q14, #2 @//extract a[2] (column2) + vaddl.s16 q3, d20, d26 @// a0 + a5 (column2) + vaddl.s16 q15, d21, d27 @// a0 + a5 (column2) + vmlal.s16 q3, d22, d0[0] @// a0 + a5 + 20a2 (column2) + vmlal.s16 q15, d23, d0[0] @// a0 + a5 + 20a2 (column2) + vext.16 q10, q13, q14, #3 @//extract a[3] (column2) + vext.16 q11, q13, q14, #1 @//extract a[1] (column2) + vmlal.s16 q3, d20, d0[0] @// a0 + a5 + 20a2 + 20a3 (column2) + vmlal.s16 q15, d21, d0[0] @// a0 + a5 + 20a2 + 20a3 (column2) + vext.16 q10, q13, q14, #4 @//extract a[4] (column2) + vmlsl.s16 q3, d22, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (column2) + vmlsl.s16 q15, d23, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (column2) + vmlsl.s16 q3, d20, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2) + vmlsl.s16 q15, d21, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2) + vqrshrun.s32 d20, q3, #10 + vqrshrun.s32 d21, q15, #10 + vqshrun.s16 d22, q10, #0 + vst1.u8 {d22}, [r1], r7 @//Store dest row1 ,column 2; (1/2,1/2) + + subs r8, r8, #2 @ 2 rows processed, decrement by 2 + subne r0, r0 , r2, lsl #2 + subne r0, r0, r2 + beq end_func @ Branch if height==4 + + b loop_16 @ looping if height = 8 or 16 + +loop_8: + vld1.u32 {d2, d3}, [r0], r2 @ Vector load from src[0_0] + vld1.u32 {d4, d5}, [r0], r2 @ Vector load from src[1_0] + vld1.u32 {d6, d7}, [r0], r2 @ Vector load from src[2_0] + vld1.u32 {d8, d9}, [r0], r2 @ Vector load from src[3_0] + vld1.u32 {d10, d11}, [r0], r2 @ Vector load from src[4_0] + vld1.u32 {d12, d13}, [r0], r2 @ Vector load from src[5_0] + + @ vERTICAL FILTERING FOR ROW 0 + vaddl.u8 q10, d6, d8 @ temp1 = src[2_0] + src[3_0] + vaddl.u8 q11, d4, d10 @ temp2 = src[1_0] + src4_0] + vaddl.u8 q12, d2, d12 @ temp = src[0_0] + src[5_0] + vaddl.u8 q13, d3, d13 @ temp = src[0_0] + src[5_0] + vaddl.u8 q14, d7, d9 @ temp1 = src[2_0] + src[3_0] + vmla.u16 q12, q10, d0[0] @ temp += temp1 * 20 + vmls.s16 q12, q11, d1[0] @ temp -= temp2 * 5 + vaddl.u8 q15, d5, d11 @ temp2 = src[1_0] + src4_0] + vmla.u16 q13, q14, d0[0] @ temp += temp1 * 20 + vmls.s16 q13, q15, d1[0] @ temp -= temp2 * 5 + @Q12,Q13 HAVE VERTICAL FILTERED VALUES + @CASCADED FILTERING FOR ROW 0 + + vext.16 q10, q12, q13, #5 @//extract a[5] (column1) + vext.16 q11, q12, q13, #2 @//extract a[2] (column1) + vaddl.s16 q14, d20, d24 @// a0 + a5 (column1) + vaddl.s16 q15, d21, d25 @// a0 + a5 (column1) + vext.16 q9, q12, q13, #1 @//extract a[1] (column1) + vext.16 q10, q12, q13, #3 @//extract a[3] (column1) + vext.16 q1, q12, q13, #4 @//extract a[4] (column1) + vmlal.s16 q14, d22, d0[0] @// a0 + a5 + 20a2 (column1) + vmlsl.s16 q14, d18, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1) + vmlal.s16 q14, d20, d0[0] @// a0 + a5 + 20a2 + 20a3 (column1) + vmlsl.s16 q14, d2, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1) + vld1.u32 {d14, d15}, [r0], r2 @ Vector load from src[6_0] + vmlal.s16 q15, d23, d0[0] @// a0 + a5 + 20a2 (column1) + vmlal.s16 q15, d21, d0[0] @// a0 + a5 + 20a2 + 20a3 (column1) + vmlsl.s16 q15, d19, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1) + vmlsl.s16 q15, d3, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1) + + vaddl.u8 q12, d4, d14 @ temp = src[0_0] + src[5_0] + vaddl.u8 q13, d5, d15 @ temp = src[0_0] + src[5_0] + vqrshrun.s32 d18, q14, #10 + vaddl.u8 q14, d9, d11 @ temp1 = src[2_0] + src[3_0] + vaddl.u8 q10, d8, d10 @ temp1 = src[2_0] + src[3_0] + vaddl.u8 q11, d6, d12 @ temp2 = src[1_0] + src4_0] + vqrshrun.s32 d19, q15, #10 + vmla.u16 q12, q10, d0[0] @ temp += temp1 * 20 + vmls.s16 q12, q11, d1[0] @ temp -= temp2 * 5 + vaddl.u8 q15, d7, d13 @ temp2 = src[1_0] + src4_0] + vmla.u16 q13, q14, d0[0] @ temp += temp1 * 20 + vmls.s16 q13, q15, d1[0] @ temp -= temp2 * 5 + vqshrun.s16 d2, q9, #0 + @ vERTICAL FILTERING FOR ROW 1 + + @Q12,Q13 HAVE VERTICAL FILTERED VALUES + @CASCADED FILTERING FOR ROW 1 + vext.16 q10, q12, q13, #5 @//extract a[5] (column1) + vext.16 q11, q12, q13, #2 @//extract a[2] (column1) + vaddl.s16 q14, d20, d24 @// a0 + a5 (column1) + vaddl.s16 q15, d21, d25 @// a0 + a5 (column1) + vst1.u8 {d2}, [r1], r3 @//Store dest row0, column 1; (1/2,1/2) + vext.16 q9, q12, q13, #1 @//extract a[1] (column1) + vext.16 q10, q12, q13, #3 @//extract a[3] (column1) + vext.16 q2, q12, q13, #4 @//extract a[4] (column1) + vmlal.s16 q14, d22, d0[0] @// a0 + a5 + 20a2 (column1) + vmlsl.s16 q14, d18, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1) + vmlal.s16 q14, d20, d0[0] @// a0 + a5 + 20a2 + 20a3 (column1) + vmlsl.s16 q14, d4, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1) + vmlal.s16 q15, d23, d0[0] @// a0 + a5 + 20a2 (column1) + vmlal.s16 q15, d21, d0[0] @// a0 + a5 + 20a2 + 20a3 (column1) + vmlsl.s16 q15, d19, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1) + vmlsl.s16 q15, d5, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1) + vqrshrun.s32 d18, q14, #10 + vqrshrun.s32 d19, q15, #10 + vqshrun.s16 d3, q9, #0 + vst1.u8 {d3}, [r1], r3 @//Store dest row1, column 1; (1/2,1/2) + + subs r8, r8, #2 @ 2 rows processed, decrement by 2 + subne r0, r0 , r2, lsl #2 + subne r0, r0, r2 + beq end_func @ Branch if height==4 + + b loop_8 @looping if height == 8 or 16 + +loop_4: + vld1.u32 {d2, d3}, [r0], r2 @ Vector load from src[0_0] + vld1.u32 {d4, d5}, [r0], r2 @ Vector load from src[1_0] + vld1.u32 {d6, d7}, [r0], r2 @ Vector load from src[2_0] + vld1.u32 {d8, d9}, [r0], r2 @ Vector load from src[3_0] + vld1.u32 {d10, d11}, [r0], r2 @ Vector load from src[4_0] + vld1.u32 {d12, d13}, [r0], r2 @ Vector load from src[5_0] + + @ vERTICAL FILTERING FOR ROW 0 + vaddl.u8 q10, d6, d8 @ temp1 = src[2_0] + src[3_0] + vaddl.u8 q11, d4, d10 @ temp2 = src[1_0] + src4_0] + vaddl.u8 q12, d2, d12 @ temp = src[0_0] + src[5_0] + vaddl.u8 q13, d3, d13 @ temp = src[0_0] + src[5_0] + vaddl.u8 q14, d7, d9 @ temp1 = src[2_0] + src[3_0] + vmla.u16 q12, q10, d0[0] @ temp += temp1 * 20 + vmls.s16 q12, q11, d1[0] @ temp -= temp2 * 5 + vaddl.u8 q15, d5, d11 @ temp2 = src[1_0] + src4_0] + vmla.u16 q13, q14, d0[0] @ temp += temp1 * 20 + vmls.s16 q13, q15, d1[0] @ temp -= temp2 * 5 + @Q12,Q13 HAVE VERTICAL FILTERED VALUES + @CASCADED FILTERING FOR ROW 0 + + vext.16 q10, q12, q13, #5 @//extract a[5] (column1) + vext.16 q11, q12, q13, #2 @//extract a[2] (column1) + vaddl.s16 q14, d20, d24 @// a0 + a5 (column1) + vaddl.s16 q15, d21, d25 @// a0 + a5 (column1) + + vext.16 q1, q12, q13, #4 @//extract a[4] (column1) + vext.16 q9, q12, q13, #1 @//extract a[1] (column1) + vext.16 q10, q12, q13, #3 @//extract a[3] (column1) + + vmlal.s16 q14, d22, d0[0] @// a0 + a5 + 20a2 (column1) + vmlsl.s16 q14, d18, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1) + vmlal.s16 q14, d20, d0[0] @// a0 + a5 + 20a2 + 20a3 (column1) + vmlsl.s16 q14, d2, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1) + vld1.u32 {d14, d15}, [r0], r2 @ Vector load from src[6_0] + vmlal.s16 q15, d23, d0[0] @// a0 + a5 + 20a2 (column1) + vmlal.s16 q15, d21, d0[0] @// a0 + a5 + 20a2 + 20a3 (column1) + vmlsl.s16 q15, d19, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1) + vmlsl.s16 q15, d3, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1) + vaddl.u8 q12, d4, d14 @ temp = src[0_0] + src[5_0] + vaddl.u8 q13, d5, d15 @ temp = src[0_0] + src[5_0] + vqrshrun.s32 d18, q14, #10 + vaddl.u8 q14, d9, d11 @ temp1 = src[2_0] + src[3_0] + vaddl.u8 q11, d6, d12 @ temp2 = src[1_0] + src4_0] + vaddl.u8 q10, d8, d10 @ temp1 = src[2_0] + src[3_0] + vqrshrun.s32 d19, q15, #10 + vmla.u16 q12, q10, d0[0] @ temp += temp1 * 20 + vmls.s16 q12, q11, d1[0] @ temp -= temp2 * 5 + vaddl.u8 q15, d7, d13 @ temp2 = src[1_0] + src4_0] + vqshrun.s16 d2, q9, #0 + vmla.u16 q13, q14, d0[0] @ temp += temp1 * 20 + vmls.s16 q13, q15, d1[0] @ temp -= temp2 * 5 + + @ vERTICAL FILTERING FOR ROW 1 + + @Q12,Q13 HAVE VERTICAL FILTERED VALUES + @CASCADED FILTERING FOR ROW 1 + vext.16 q10, q12, q13, #5 @//extract a[5] (column1) + vext.16 q11, q12, q13, #2 @//extract a[2] (column1) + vst1.u32 {d2[0]}, [r1], r3 @//Store dest row0, column 1; (1/2,1/2) + vaddl.s16 q14, d20, d24 @// a0 + a5 (column1) + vaddl.s16 q15, d21, d25 @// a0 + a5 (column1) + vext.16 q9, q12, q13, #1 @//extract a[1] (column1) + vext.16 q10, q12, q13, #3 @//extract a[3] (column1) + vext.16 q2, q12, q13, #4 @//extract a[4] (column1) + vmlal.s16 q14, d22, d0[0] @// a0 + a5 + 20a2 (column1) + vmlsl.s16 q14, d18, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1) + vmlal.s16 q14, d20, d0[0] @// a0 + a5 + 20a2 + 20a3 (column1) + vmlsl.s16 q14, d4, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1) + vmlal.s16 q15, d23, d0[0] @// a0 + a5 + 20a2 (column1) + vmlal.s16 q15, d21, d0[0] @// a0 + a5 + 20a2 + 20a3 (column1) + vmlsl.s16 q15, d19, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1) + vmlsl.s16 q15, d5, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1) + vqrshrun.s32 d18, q14, #10 + vqrshrun.s32 d19, q15, #10 + vqshrun.s16 d4, q9, #0 + vst1.u32 {d4[0]}, [r1], r3 @//Store dest row1, column 1; (1/2,1/2) + + subs r8, r8, #2 @ 2 rows processed, decrement by 2 + subne r0, r0 , r2, lsl #2 + subne r0, r0, r2 + beq end_func @ Branch if height==4 + + b loop_4 @looping if height == 8 or 16 + +end_func: + vldmia sp!, {d8-d15} @ Restore neon registers that were saved + ldmfd sp!, {r4-r12, pc} @Restoring registers from stack + + diff --git a/common/arm/ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q.s b/common/arm/ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q.s new file mode 100755 index 0000000..65a6de7 --- /dev/null +++ b/common/arm/ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q.s @@ -0,0 +1,1044 @@ +@/****************************************************************************** +@ * +@ * Copyright (C) 2015 The Android Open Source Project +@ * +@ * Licensed under the Apache License, Version 2.0 (the "License"); +@ * you may not use this file except in compliance with the License. +@ * You may obtain a copy of the License at: +@ * +@ * http://www.apache.org/licenses/LICENSE-2.0 +@ * +@ * Unless required by applicable law or agreed to in writing, software +@ * distributed under the License is distributed on an "AS IS" BASIS, +@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ * See the License for the specific language governing permissions and +@ * limitations under the License. +@ * +@ ***************************************************************************** +@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +@*/ +@/** +@****************************************************************************** +@* @file +@* ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q.s +@* +@* @brief +@* Contains function definitions for inter prediction interpolation. +@* +@* @author +@* Mohit +@* +@* @par List of Functions: +@* +@* - ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q() +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ + +@/* All the functions here are replicated from ih264_inter_pred_filters.c +@ + +@/** +@/** +@/** +@******************************************************************************* +@* +@* @brief +@* This function implements a two stage cascaded six tap filter. It +@* applies the six tap filter in the horizontal direction on the +@* predictor values, followed by applying the same filter in the +@* vertical direction on the output of the first stage. It then averages +@* the output of the 1st stage and the output of the 2nd stage to obtain +@* the quarter pel values. The six tap filtering operation is described +@* in sec 8.4.2.2.1 titled "Luma sample interpolation process". +@* +@* @par Description: +@* This function is called to obtain pixels lying at the following +@* location (1/2,1/4) or (1/2,3/4). The function interpolates +@* the predictors first in the horizontal direction and then in the +@* vertical direction to output the (1/2,1/2). It then averages +@* the output of the 2nd stage and (1/2,1/2) value to obtain (1/2,1/4) +@* or (1/2,3/4) depending on the offset. +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@* @param[in] ht +@* integer height of the array +@* +@* @param[in] wd +@* integer width of the array +@* +@* @param[in] pu1_tmp: temporary buffer +@* +@* @param[in] dydx: x and y reference offset for qpel calculations +@* +@* @returns +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/; + +@void ih264_inter_pred_luma_horz_hpel_vert_qpel(UWORD8 *pu1_src, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd,, +@ WORD32 dst_strd, +@ WORD32 ht, +@ WORD32 wd, +@ UWORD8* pu1_tmp, +@ UWORD32 dydx) + +@**************Variables Vs Registers***************************************** +@ r0 => *pu1_src +@ r1 => *pu1_dst +@ r2 => src_strd +@ r3 => dst_strd +@ r4 => ht +@ r5 => wd +@ r7 => dydx +@ r9 => *pu1_tmp + +.text +.p2align 2 + + .global ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q + +ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q: + + stmfd sp!, {r4-r12, r14} @ store register values to stack + vstmdb sp!, {d8-d15} @push neon registers to stack + ldr r4, [sp, #104] @ loads ht + sub r0, r0, r2, lsl #1 @ pu1_src-2*src_strd + sub r0, r0, #2 @ pu1_src-2 + ldr r5, [sp, #108] @ loads wd + ldr r7, [sp, #116] @ loads dydx + lsr r7, r7, #3 @ dydx >> 2 followed by dydx & 0x3 and dydx>>1 to obtain the deciding bit + ldr r9, [sp, #112] @ pu1_tmp + add r7, r7, #2 + mov r6, #48 + mla r7, r7, r6, r9 + + subs r12, r5, #4 @if wd=4 branch to loop_4 + beq loop_4_start + + subs r12, r5, #8 @if wd=8 branch to loop_8 + beq loop_8_start + + @when wd=16 + vmov.u16 q11, #20 @ Filter coeff 0x14 into Q11 + vmov.u16 q12, #5 @ Filter coeff 0x5 into Q12 + add r8, r0, #8 + add r14, r1, #8 + add r10, r9, #8 + mov r12, r4 + add r11, r7, #8 + +loop_16_lowhalf_start: + vld1.32 {q0}, [r0], r2 @ row -2 load for horizontal filter + vext.8 d5, d0, d1, #5 + vaddl.u8 q3, d0, d5 + + vext.8 d2, d0, d1, #2 + vext.8 d3, d0, d1, #3 + vaddl.u8 q4, d2, d3 + vext.8 d4, d0, d1, #4 + vmla.u16 q3, q4, q11 + vext.8 d1, d0, d1, #1 + vaddl.u8 q4, d1, d4 + vld1.32 {q0}, [r0], r2 @ row -1 load for horizontal filter + vmls.u16 q3, q4, q12 + vext.8 d5, d0, d1, #5 + vaddl.u8 q4, d0, d5 + vext.8 d2, d0, d1, #2 + vext.8 d3, d0, d1, #3 + vaddl.u8 q5, d2, d3 + + vst1.32 {q3}, [r9], r6 @ store temp buffer 0 + + vext.8 d4, d0, d1, #4 + vmla.u16 q4, q5, q11 + vext.8 d1, d0, d1, #1 + vaddl.u8 q5, d1, d4 + vld1.32 {q0}, [r0], r2 @ row 0 load for horizontal filter + vmls.u16 q4, q5, q12 + vext.8 d5, d0, d1, #5 + vaddl.u8 q5, d0, d5 + vext.8 d2, d0, d1, #2 + vext.8 d3, d0, d1, #3 + vaddl.u8 q6, d2, d3 + + vst1.32 {q4}, [r9], r6 @ store temp buffer 1 + + vext.8 d4, d0, d1, #4 + vmla.u16 q5, q6, q11 + vext.8 d1, d0, d1, #1 + vaddl.u8 q6, d1, d4 + vld1.32 {q0}, [r0], r2 @ row 1 load for horizontal filter + vmls.u16 q5, q6, q12 + vext.8 d5, d0, d1, #5 + vaddl.u8 q6, d0, d5 + vext.8 d2, d0, d1, #2 + vext.8 d3, d0, d1, #3 + vaddl.u8 q7, d2, d3 + + vst1.32 {q5}, [r9], r6 @ store temp buffer 2 + + vext.8 d4, d0, d1, #4 + vmla.u16 q6, q7, q11 + vext.8 d1, d0, d1, #1 + vaddl.u8 q7, d1, d4 + vld1.32 {q0}, [r0], r2 @ row 2 load for horizontal filter + vmls.u16 q6, q7, q12 + vext.8 d5, d0, d1, #5 + vaddl.u8 q7, d0, d5 + vext.8 d2, d0, d1, #2 + vext.8 d3, d0, d1, #3 + vaddl.u8 q8, d2, d3 + + vst1.32 {q6}, [r9], r6 @ store temp buffer 3 + + vext.8 d4, d0, d1, #4 + vmla.u16 q7, q8, q11 + vext.8 d1, d0, d1, #1 + vaddl.u8 q8, d1, d4 + + vmls.u16 q7, q8, q12 +loop_16_lowhalf: + + vld1.32 {q0}, [r0], r2 @ row 3 load for horizontal filter + vext.8 d5, d0, d1, #5 + vext.8 d2, d0, d1, #2 + vext.8 d3, d0, d1, #3 + vaddl.u8 q8, d0, d5 + + vst1.32 {q7}, [r9], r6 @ store temp buffer 4 + vaddl.u8 q9, d2, d3 + vext.8 d4, d0, d1, #4 + vmla.u16 q8, q9, q11 + vext.8 d1, d0, d1, #1 + vadd.s16 q14, q4, q7 + vaddl.u8 q9, d1, d4 + vadd.s16 q15, q5, q6 + vmls.u16 q8, q9, q12 + vld1.32 {q0}, [r0], r2 @ row 4 load for hoorizontal filter + vext.8 d5, d0, d1, #5 + vext.8 d2, d0, d1, #2 + vext.8 d3, d0, d1, #3 + vaddl.u8 q10, d0, d5 + + vst1.32 {q8}, [r9], r6 @ store temp buffer r5 + + vaddl.s16 q9, d6, d16 + + vld1.32 {q13}, [r7], r6 @ load from temp buffer 0 + + vaddl.s16 q3, d7, d17 + + vqrshrun.s16 d26, q13, #5 + + vmlal.s16 q9, d30, d22 + vmlsl.s16 q9, d28, d24 + vmlal.s16 q3, d31, d22 + vmlsl.s16 q3, d29, d24 + vaddl.u8 q1, d2, d3 + vext.8 d4, d0, d1, #4 + vmla.u16 q10, q1, q11 + vqrshrun.s32 d18, q9, #10 + vext.8 d1, d0, d1, #1 + vqrshrun.s32 d19, q3, #10 + vadd.s16 q14, q5, q8 + vaddl.u8 q1, d1, d4 + vadd.s16 q15, q6, q7 + vmls.u16 q10, q1, q12 + vqmovn.u16 d18, q9 + vld1.32 {q0}, [r0], r2 @ row 5 load for horizontal filter + + vrhadd.u8 d26, d18, d26 + + vext.8 d5, d0, d1, #5 + vext.8 d2, d0, d1, #2 + + vst1.32 {q10}, [r9], r6 @ store temp buffer r6 + + vaddl.s16 q9, d8, d20 + + vaddl.s16 q3, d9, d21 + + vld1.32 {q4}, [r7], r6 @load from temp buffer 1 + + + vst1.32 d26, [r1], r3 @ store row 0 + + vmlal.s16 q9, d30, d22 + vmlsl.s16 q9, d28, d24 + + vqrshrun.s16 d28, q4, #5 + + vmlal.s16 q3, d31, d22 + vmlsl.s16 q3, d29, d24 + vext.8 d3, d0, d1, #3 + vaddl.u8 q4, d0, d5 + vaddl.u8 q1, d2, d3 + vqrshrun.s32 d18, q9, #10 + vext.8 d4, d0, d1, #4 + vqrshrun.s32 d19, q3, #10 + vmla.u16 q4, q1, q11 + vext.8 d1, d0, d1, #1 + vadd.s16 q13, q6, q10 + vaddl.u8 q1, d1, d4 + vqmovn.u16 d18, q9 + vadd.s16 q15, q7, q8 + vmls.u16 q4, q1, q12 + vld1.32 {q0}, [r0], r2 @ row 6 load for horizontal filter + + vrhadd.u8 d28, d28, d18 + + vext.8 d5, d0, d1, #5 + vext.8 d2, d0, d1, #2 + vext.8 d3, d0, d1, #3 + + vst1.32 d28, [r1], r3 @ store row 1 + + vaddl.u8 q14, d0, d5 + + vst1.32 {q4}, [r9], r6 @ store temp buffer r7 + + vaddl.s16 q9, d10, d8 + vaddl.s16 q3, d11, d9 + + vld1.32 {q5}, [r7], r6 @ load from temp buffer 2 + + vmlal.s16 q9, d30, d22 + vmlsl.s16 q9, d26, d24 + vmlal.s16 q3, d31, d22 + + vqrshrun.s16 d26, q5, #5 + + vmlsl.s16 q3, d27, d24 + vaddl.u8 q1, d2, d3 + vext.8 d4, d0, d1, #4 + vmla.u16 q14, q1, q11 + vqrshrun.s32 d18, q9, #10 + vext.8 d1, d0, d1, #1 + vqrshrun.s32 d19, q3, #10 + vadd.s16 q5, q7, q4 + vaddl.u8 q1, d1, d4 + vadd.s16 q15, q8, q10 + vmls.u16 q14, q1, q12 + vqmovn.u16 d27, q9 + + vaddl.s16 q9, d12, d28 + vaddl.s16 q3, d13, d29 + + vrhadd.u8 d26, d26, d27 + + vmlal.s16 q9, d30, d22 + vmlsl.s16 q9, d10, d24 + vmlal.s16 q3, d31, d22 + vmlsl.s16 q3, d11, d24 + + vst1.32 d26, [r1], r3 @ store row 2 + + vst1.32 {q14}, [r9] + + + vqrshrun.s32 d18, q9, #10 + vmov q5, q10 + vld1.32 {q15}, [r7], r6 @ load from temp buffer 3 + + vqrshrun.s32 d19, q3, #10 + subs r4, r4, #4 + + vqrshrun.s16 d30, q15, #5 + + vqmovn.u16 d18, q9 + vmov q6, q4 + vmov q3, q7 + vrhadd.u8 d30, d18, d30 + vmov q4, q8 + vmov q7, q14 + vst1.32 d30, [r1], r3 @ store row 3 + + bgt loop_16_lowhalf @ looping if height =16 + + +loop_16_highhalf_start: + vld1.32 {q0}, [r8], r2 + vext.8 d5, d0, d1, #5 + vaddl.u8 q3, d0, d5 + vext.8 d2, d0, d1, #2 + vext.8 d3, d0, d1, #3 + vaddl.u8 q4, d2, d3 + vext.8 d4, d0, d1, #4 + vmla.u16 q3, q4, q11 + vext.8 d1, d0, d1, #1 + vaddl.u8 q4, d1, d4 + vld1.32 {q0}, [r8], r2 + vmls.u16 q3, q4, q12 + vext.8 d5, d0, d1, #5 + vaddl.u8 q4, d0, d5 + vext.8 d2, d0, d1, #2 + vext.8 d3, d0, d1, #3 + vaddl.u8 q5, d2, d3 + + vst1.32 {q3}, [r10], r6 + + vext.8 d4, d0, d1, #4 + vmla.u16 q4, q5, q11 + vext.8 d1, d0, d1, #1 + vaddl.u8 q5, d1, d4 + vld1.32 {q0}, [r8], r2 + vmls.u16 q4, q5, q12 + vext.8 d5, d0, d1, #5 + vaddl.u8 q5, d0, d5 + vext.8 d2, d0, d1, #2 + vext.8 d3, d0, d1, #3 + vaddl.u8 q6, d2, d3 + + vst1.32 {q4}, [r10], r6 + + vext.8 d4, d0, d1, #4 + vmla.u16 q5, q6, q11 + vext.8 d1, d0, d1, #1 + vaddl.u8 q6, d1, d4 + vld1.32 {q0}, [r8], r2 + vmls.u16 q5, q6, q12 + vext.8 d5, d0, d1, #5 + vaddl.u8 q6, d0, d5 + vext.8 d2, d0, d1, #2 + vext.8 d3, d0, d1, #3 + vaddl.u8 q7, d2, d3 + + vst1.32 {q5}, [r10], r6 + + vext.8 d4, d0, d1, #4 + vmla.u16 q6, q7, q11 + vext.8 d1, d0, d1, #1 + vaddl.u8 q7, d1, d4 + vld1.32 {q0}, [r8], r2 + vmls.u16 q6, q7, q12 + vext.8 d5, d0, d1, #5 + vaddl.u8 q7, d0, d5 + vext.8 d2, d0, d1, #2 + vext.8 d3, d0, d1, #3 + vaddl.u8 q8, d2, d3 + + vst1.32 {q6}, [r10], r6 + + vext.8 d4, d0, d1, #4 + vmla.u16 q7, q8, q11 + vext.8 d1, d0, d1, #1 + vaddl.u8 q8, d1, d4 + + vmls.u16 q7, q8, q12 + +loop_16_highhalf: + + vld1.32 {q0}, [r8], r2 + vext.8 d5, d0, d1, #5 + vext.8 d2, d0, d1, #2 + vext.8 d3, d0, d1, #3 + vaddl.u8 q8, d0, d5 + + vst1.32 {q7}, [r10], r6 + + vaddl.u8 q9, d2, d3 + vext.8 d4, d0, d1, #4 + vmla.u16 q8, q9, q11 + vext.8 d1, d0, d1, #1 + vadd.s16 q14, q4, q7 + vaddl.u8 q9, d1, d4 + vadd.s16 q15, q5, q6 + vmls.u16 q8, q9, q12 + vld1.32 {q0}, [r8], r2 + vext.8 d5, d0, d1, #5 + vext.8 d2, d0, d1, #2 + vext.8 d3, d0, d1, #3 + vaddl.u8 q10, d0, d5 + + vst1.32 {q8}, [r10], r6 + + vaddl.s16 q9, d6, d16 + + vld1.32 {q13}, [r11], r6 + + vaddl.s16 q3, d7, d17 + + vqrshrun.s16 d26, q13, #5 + + vmlal.s16 q9, d30, d22 + vmlsl.s16 q9, d28, d24 + vmlal.s16 q3, d31, d22 + vmlsl.s16 q3, d29, d24 + vaddl.u8 q1, d2, d3 + vext.8 d4, d0, d1, #4 + vmla.u16 q10, q1, q11 + vqrshrun.s32 d18, q9, #10 + vext.8 d1, d0, d1, #1 + vqrshrun.s32 d19, q3, #10 + vadd.s16 q14, q5, q8 + vaddl.u8 q1, d1, d4 + vadd.s16 q15, q6, q7 + vmls.u16 q10, q1, q12 + vqmovn.u16 d18, q9 + vld1.32 {q0}, [r8], r2 + + vrhadd.u8 d26, d18, d26 + + vext.8 d5, d0, d1, #5 + vext.8 d2, d0, d1, #2 + + vst1.32 {q10}, [r10], r6 + + vaddl.s16 q9, d8, d20 + vaddl.s16 q3, d9, d21 + + vld1.32 {q4}, [r11], r6 + + + vst1.32 d26, [r14], r3 @store row 0 + + vmlal.s16 q9, d30, d22 + vmlsl.s16 q9, d28, d24 + + vqrshrun.s16 d28, q4, #5 + + vmlal.s16 q3, d31, d22 + vmlsl.s16 q3, d29, d24 + vext.8 d3, d0, d1, #3 + vaddl.u8 q4, d0, d5 + vaddl.u8 q1, d2, d3 + vqrshrun.s32 d18, q9, #10 + vext.8 d4, d0, d1, #4 + vqrshrun.s32 d19, q3, #10 + vmla.u16 q4, q1, q11 + vext.8 d1, d0, d1, #1 + vadd.s16 q13, q6, q10 + vaddl.u8 q1, d1, d4 + vqmovn.u16 d18, q9 + vadd.s16 q15, q7, q8 + vmls.u16 q4, q1, q12 + vld1.32 {q0}, [r8], r2 + + vrhadd.u8 d28, d28, d18 + + vext.8 d5, d0, d1, #5 + vext.8 d2, d0, d1, #2 + vext.8 d3, d0, d1, #3 + + vst1.32 d28, [r14], r3 @store row 1 + + vaddl.u8 q14, d0, d5 + + vst1.32 {q4}, [r10], r6 + + vaddl.s16 q9, d10, d8 + vaddl.s16 q3, d11, d9 + + vld1.32 {q5}, [r11], r6 + + vmlal.s16 q9, d30, d22 + vmlsl.s16 q9, d26, d24 + vmlal.s16 q3, d31, d22 + + vqrshrun.s16 d26, q5, #5 + + vmlsl.s16 q3, d27, d24 + vaddl.u8 q1, d2, d3 + vext.8 d4, d0, d1, #4 + vmla.u16 q14, q1, q11 + vqrshrun.s32 d18, q9, #10 + vext.8 d1, d0, d1, #1 + vqrshrun.s32 d19, q3, #10 + vadd.s16 q5, q7, q4 + vaddl.u8 q1, d1, d4 + vadd.s16 q15, q8, q10 + vmls.u16 q14, q1, q12 + vqmovn.u16 d27, q9 + + + vaddl.s16 q9, d12, d28 + vaddl.s16 q3, d13, d29 + + vrhadd.u8 d26, d26, d27 + + vmlal.s16 q9, d30, d22 + vmlsl.s16 q9, d10, d24 + vmlal.s16 q3, d31, d22 + vmlsl.s16 q3, d11, d24 + + vst1.32 d26, [r14], r3 @ store row 2 + + vst1.32 {q14}, [r10] + + vqrshrun.s32 d18, q9, #10 + vmov q5, q10 + vld1.32 {q15}, [r11], r6 + + vqrshrun.s32 d19, q3, #10 + subs r12, r12, #4 + + vqrshrun.s16 d30, q15, #5 + + vqmovn.u16 d18, q9 + vmov q6, q4 + vmov q3, q7 + vrhadd.u8 d30, d18, d30 + vmov q4, q8 + vmov q7, q14 + vst1.32 d30, [r14], r3 @ store row 3 + + bgt loop_16_highhalf @ looping if height = 8 or 16 + b end_func + +loop_8_start: + + vmov.u16 q11, #20 @ Filter coeff 20 into Q11 + vmov.u16 q12, #5 @ Filter coeff 5 into Q12 + vld1.32 {q0}, [r0], r2 @ row -2 load for horizontal filter + vext.8 d5, d0, d1, #5 + vaddl.u8 q3, d0, d5 + + vext.8 d2, d0, d1, #2 + vext.8 d3, d0, d1, #3 + vaddl.u8 q4, d2, d3 + vext.8 d4, d0, d1, #4 + vmla.u16 q3, q4, q11 + vext.8 d1, d0, d1, #1 + vaddl.u8 q4, d1, d4 + vld1.32 {q0}, [r0], r2 @ row -1 load for horizontal filter + vmls.u16 q3, q4, q12 + vext.8 d5, d0, d1, #5 + vaddl.u8 q4, d0, d5 + vext.8 d2, d0, d1, #2 + vext.8 d3, d0, d1, #3 + vaddl.u8 q5, d2, d3 + + vst1.32 {q3}, [r9], r6 @ store temp buffer 0 + + vext.8 d4, d0, d1, #4 + vmla.u16 q4, q5, q11 + vext.8 d1, d0, d1, #1 + vaddl.u8 q5, d1, d4 + vld1.32 {q0}, [r0], r2 @ row 0 load for horizontal filter + vmls.u16 q4, q5, q12 + vext.8 d5, d0, d1, #5 + vaddl.u8 q5, d0, d5 + vext.8 d2, d0, d1, #2 + vext.8 d3, d0, d1, #3 + vaddl.u8 q6, d2, d3 + + vst1.32 {q4}, [r9], r6 @ store temp buffer 1 + + vext.8 d4, d0, d1, #4 + vmla.u16 q5, q6, q11 + vext.8 d1, d0, d1, #1 + vaddl.u8 q6, d1, d4 + vld1.32 {q0}, [r0], r2 @ row 1 load for horizontal filter + vmls.u16 q5, q6, q12 + vext.8 d5, d0, d1, #5 + vaddl.u8 q6, d0, d5 + vext.8 d2, d0, d1, #2 + vext.8 d3, d0, d1, #3 + vaddl.u8 q7, d2, d3 + + vst1.32 {q5}, [r9], r6 @ store temp buffer 2 + + vext.8 d4, d0, d1, #4 + vmla.u16 q6, q7, q11 + vext.8 d1, d0, d1, #1 + vaddl.u8 q7, d1, d4 + vld1.32 {q0}, [r0], r2 @ row 2 load for horizontal filter + vmls.u16 q6, q7, q12 + vext.8 d5, d0, d1, #5 + vaddl.u8 q7, d0, d5 + vext.8 d2, d0, d1, #2 + vext.8 d3, d0, d1, #3 + vaddl.u8 q8, d2, d3 + + vst1.32 {q6}, [r9], r6 @ store temp buffer 3 + + vext.8 d4, d0, d1, #4 + vmla.u16 q7, q8, q11 + vext.8 d1, d0, d1, #1 + vaddl.u8 q8, d1, d4 + + vmls.u16 q7, q8, q12 +loop_8: + + vld1.32 {q0}, [r0], r2 @ row 3 load for horizontal filter + vext.8 d5, d0, d1, #5 + vext.8 d2, d0, d1, #2 + vext.8 d3, d0, d1, #3 + vaddl.u8 q8, d0, d5 + + vst1.32 {q7}, [r9], r6 @ store temp buffer 4 + + vaddl.u8 q9, d2, d3 + vext.8 d4, d0, d1, #4 + vmla.u16 q8, q9, q11 + vext.8 d1, d0, d1, #1 + vadd.s16 q14, q4, q7 + vaddl.u8 q9, d1, d4 + vadd.s16 q15, q5, q6 + vmls.u16 q8, q9, q12 + vld1.32 {q0}, [r0], r2 @ row 4 load for hoorizontal filter + vext.8 d5, d0, d1, #5 + vext.8 d2, d0, d1, #2 + vext.8 d3, d0, d1, #3 + vaddl.u8 q10, d0, d5 + + vst1.32 {q8}, [r9], r6 @ store temp buffer r5 + + vaddl.s16 q9, d6, d16 + + vld1.32 {q13}, [r7], r6 @ load from temp buffer 0 + + vaddl.s16 q3, d7, d17 + + vqrshrun.s16 d26, q13, #5 + + vmlal.s16 q9, d30, d22 + vmlsl.s16 q9, d28, d24 + vmlal.s16 q3, d31, d22 + vmlsl.s16 q3, d29, d24 + vaddl.u8 q1, d2, d3 + vext.8 d4, d0, d1, #4 + vmla.u16 q10, q1, q11 + vqrshrun.s32 d18, q9, #10 + vext.8 d1, d0, d1, #1 + vqrshrun.s32 d19, q3, #10 + vadd.s16 q14, q5, q8 + vaddl.u8 q1, d1, d4 + vadd.s16 q15, q6, q7 + vmls.u16 q10, q1, q12 + vqmovn.u16 d18, q9 + vld1.32 {q0}, [r0], r2 @ row 5 load for horizontal filter + + vrhadd.u8 d26, d18, d26 + + vext.8 d5, d0, d1, #5 + vext.8 d2, d0, d1, #2 + + vst1.32 {q10}, [r9], r6 @ store temp buffer r6 + + vaddl.s16 q9, d8, d20 + + vaddl.s16 q3, d9, d21 + + vld1.32 {q4}, [r7], r6 @load from temp buffer 1 + + + vst1.32 d26, [r1], r3 @ store row 0 + + vmlal.s16 q9, d30, d22 + vmlsl.s16 q9, d28, d24 + + vqrshrun.s16 d28, q4, #5 + + vmlal.s16 q3, d31, d22 + vmlsl.s16 q3, d29, d24 + vext.8 d3, d0, d1, #3 + vaddl.u8 q4, d0, d5 + vaddl.u8 q1, d2, d3 + vqrshrun.s32 d18, q9, #10 + vext.8 d4, d0, d1, #4 + vqrshrun.s32 d19, q3, #10 + vmla.u16 q4, q1, q11 + vext.8 d1, d0, d1, #1 + vadd.s16 q13, q6, q10 + vaddl.u8 q1, d1, d4 + vqmovn.u16 d18, q9 + vadd.s16 q15, q7, q8 + vmls.u16 q4, q1, q12 + vld1.32 {q0}, [r0], r2 @ row 6 load for horizontal filter + + vrhadd.u8 d28, d28, d18 + + vext.8 d5, d0, d1, #5 + vext.8 d2, d0, d1, #2 + vext.8 d3, d0, d1, #3 + + vst1.32 d28, [r1], r3 @ store row 1 + + vaddl.u8 q14, d0, d5 + + vst1.32 {q4}, [r9], r6 @ store temp buffer r7 + + vaddl.s16 q9, d10, d8 + vaddl.s16 q3, d11, d9 + + vld1.32 {q5}, [r7], r6 @ load from temp buffer 2 + + vmlal.s16 q9, d30, d22 + vmlsl.s16 q9, d26, d24 + vmlal.s16 q3, d31, d22 + + vqrshrun.s16 d26, q5, #5 + + vmlsl.s16 q3, d27, d24 + vaddl.u8 q1, d2, d3 + vext.8 d4, d0, d1, #4 + vmla.u16 q14, q1, q11 + vqrshrun.s32 d18, q9, #10 + vext.8 d1, d0, d1, #1 + vqrshrun.s32 d19, q3, #10 + vadd.s16 q5, q7, q4 + vaddl.u8 q1, d1, d4 + vadd.s16 q15, q8, q10 + vmls.u16 q14, q1, q12 + vqmovn.u16 d27, q9 + + vaddl.s16 q9, d12, d28 + vaddl.s16 q3, d13, d29 + + vrhadd.u8 d26, d26, d27 + + vmlal.s16 q9, d30, d22 + vmlsl.s16 q9, d10, d24 + vmlal.s16 q3, d31, d22 + vmlsl.s16 q3, d11, d24 + + vst1.32 d26, [r1], r3 @ store row 2 + + vst1.32 {q14}, [r9] + + + vqrshrun.s32 d18, q9, #10 + vmov q5, q10 + vld1.32 {q15}, [r7], r6 @ load from temp buffer 3 + + vqrshrun.s32 d19, q3, #10 + subs r4, r4, #4 + + vqrshrun.s16 d30, q15, #5 + + vqmovn.u16 d18, q9 + vmov q6, q4 + vmov q3, q7 + vrhadd.u8 d30, d18, d30 + vmov q4, q8 + vmov q7, q14 + vst1.32 d30, [r1], r3 @ store row 3 + + bgt loop_8 @if height =8 or 16 loop + b end_func + +loop_4_start: + vmov.u16 d22, #20 @ Filter coeff 20 into D22 + vmov.u16 d23, #5 @ Filter coeff 5 into D23 + + vld1.32 {q0}, [r0], r2 @row -2 load + vext.8 d5, d0, d1, #5 + vaddl.u8 q3, d0, d5 + vext.8 d2, d0, d1, #2 + vext.8 d3, d0, d1, #3 + vaddl.u8 q4, d2, d3 + vext.8 d4, d0, d1, #4 + vmla.u16 d6, d8, d22 + vext.8 d1, d0, d1, #1 + vaddl.u8 q4, d1, d4 + vld1.32 {q0}, [r0], r2 @ row -1 load + vmls.u16 d6, d8, d23 + vext.8 d5, d0, d1, #5 + vaddl.u8 q4, d0, d5 + vext.8 d2, d0, d1, #2 + vext.8 d3, d0, d1, #3 + vaddl.u8 q5, d2, d3 + + vst1.32 d6, [r9], r6 @ store temp buffer 0 + + vext.8 d4, d0, d1, #4 + vmla.u16 d8, d10, d22 + vext.8 d1, d0, d1, #1 + vaddl.u8 q5, d1, d4 + vld1.32 {q0}, [r0], r2 @ row 0 load + vmls.u16 d8, d10, d23 + vext.8 d5, d0, d1, #5 + vaddl.u8 q5, d0, d5 + vext.8 d2, d0, d1, #2 + vext.8 d3, d0, d1, #3 + vaddl.u8 q6, d2, d3 + + vst1.32 d8, [r9], r6 @ store temp buffer 1 + + vext.8 d4, d0, d1, #4 + vmla.u16 d10, d12, d22 + vext.8 d1, d0, d1, #1 + vaddl.u8 q6, d1, d4 + vld1.32 {q0}, [r0], r2 @ row 1 load + vmls.u16 d10, d12, d23 + vext.8 d5, d0, d1, #5 + vaddl.u8 q6, d0, d5 + vext.8 d2, d0, d1, #2 + vext.8 d3, d0, d1, #3 + vaddl.u8 q7, d2, d3 + + vst1.32 d10, [r9], r6 @ store temp buffer 2 + + vext.8 d4, d0, d1, #4 + vmla.u16 d12, d14, d22 + vext.8 d1, d0, d1, #1 + vaddl.u8 q7, d1, d4 + vld1.32 {q0}, [r0], r2 @ row 2 load + vmls.u16 d12, d14, d23 + vext.8 d5, d0, d1, #5 + vaddl.u8 q7, d0, d5 + vext.8 d2, d0, d1, #2 + vext.8 d3, d0, d1, #3 + vaddl.u8 q8, d2, d3 + vext.8 d4, d0, d1, #4 + vmla.u16 d14, d16, d22 + vext.8 d1, d0, d1, #1 + vaddl.u8 q8, d1, d4 + + vst1.32 d12, [r9], r6 @ store temp buffer 3 + + vmls.u16 d14, d16, d23 + +loop_4: + + vld1.32 {q0}, [r0], r2 @ row 3 load + vext.8 d5, d0, d1, #5 + vaddl.u8 q8, d0, d5 + vext.8 d2, d0, d1, #2 + vext.8 d3, d0, d1, #3 + vaddl.u8 q9, d2, d3 + vst1.32 d14, [r9], r6 @ store temp buffer 4 + vext.8 d4, d0, d1, #4 + vmla.u16 d16, d18, d22 + vext.8 d1, d0, d1, #1 + vaddl.u8 q9, d1, d4 + vadd.s16 d2, d10, d12 + vmls.u16 d16, d18, d23 + vadd.s16 d3, d8, d14 + vld1.32 {q9}, [r0], r2 @ row 4 load + vext.8 d25, d18, d19, #5 + vaddl.u8 q13, d18, d25 + vext.8 d20, d18, d19, #2 + + vst1.32 d16, [r9], r6 @ store temp buffer 5 + + vaddl.s16 q0, d6, d16 + vmlal.s16 q0, d2, d22 + vext.8 d21, d18, d19, #3 + vaddl.u8 q14, d20, d21 + vext.8 d24, d18, d19, #4 + vmlsl.s16 q0, d3, d23 + vmla.u16 d26, d28, d22 + vext.8 d19, d18, d19, #1 + vaddl.u8 q14, d19, d24 + vadd.s16 d2, d12, d14 + vmls.u16 d26, d28, d23 + vqrshrun.s32 d0, q0, #0xa + vadd.s16 d3, d10, d16 + vld1.32 {q9}, [r0], r2 @ row 5 load + vext.8 d25, d18, d19, #5 + vqmovn.u16 d11, q0 + vaddl.u8 q14, d18, d25 + + vst1.32 d26, [r9], r6 @ store temp buffer 6 + + @Q3 available here + vld1.32 d6, [r7], r6 @ load from temp buffer 0 + vld1.32 d7, [r7], r6 @ load from temp buffer 1 + vqrshrun.s16 d9, q3, #5 + + vext.8 d20, d18, d19, #2 + + vaddl.s16 q0, d8, d26 + vmlal.s16 q0, d2, d22 + vext.8 d21, d18, d19, #3 + vaddl.u8 q3, d20, d21 + vext.8 d24, d18, d19, #4 + vmlsl.s16 q0, d3, d23 + vmla.u16 d28, d6, d22 + vext.8 d19, d18, d19, #1 + vaddl.u8 q3, d19, d24 + vadd.s16 d2, d14, d16 + vmls.u16 d28, d6, d23 + vqrshrun.s32 d0, q0, #0xa + vadd.s16 d3, d12, d26 + vld1.32 {q9}, [r0], r2 @ row 6 load + vext.8 d25, d18, d19, #5 + vqmovn.u16 d13, q0 + + vtrn.32 d11, d13 + vaddl.s16 q0, d10, d28 + vrhadd.u8 d9, d9, d11 + + vst1.32 d28, [r9], r6 @ store temp buffer 7 + + vmlal.s16 q0, d2, d22 + vaddl.u8 q15, d18, d25 + + vst1.32 d9[0], [r1], r3 @ store row 0 + + vext.8 d20, d18, d19, #2 + + vst1.32 d9[1], [r1], r3 @ store row 1 + + vext.8 d21, d18, d19, #3 + vmlsl.s16 q0, d3, d23 + vaddl.u8 q4, d20, d21 + vext.8 d24, d18, d19, #4 + vmla.u16 d30, d8, d22 + vext.8 d19, d18, d19, #1 + vaddl.u8 q4, d19, d24 + vqrshrun.s32 d0, q0, #0xa + vadd.s16 d2, d16, d26 + vmls.u16 d30, d8, d23 + vqmovn.u16 d4, q0 + + vadd.s16 d3, d14, d28 + + + vaddl.s16 q0, d12, d30 + + vst1.32 d30, [r9] + + vmlal.s16 q0, d2, d22 + + vld1.32 d8, [r7], r6 @ load from temp buffer 2 + vld1.32 d9, [r7], r6 @ load from temp buffer 3 + vmlsl.s16 q0, d3, d23 + subs r4, r4, #4 + vqrshrun.s16 d10, q4, #5 + + vmov d12, d28 + + vqrshrun.s32 d0, q0, #0xa + vmov d6, d14 + vmov d8, d16 + + vqmovn.u16 d5, q0 + + vtrn.32 d4, d5 + vrhadd.u8 d4, d4, d10 + vmov d10, d26 + vmov d14, d30 + + vst1.32 d4[0], [r1], r3 @ store row 2 + vst1.32 d4[1], [r1], r3 @ store row 3 + + bgt loop_4 + +end_func: + vldmia sp!, {d8-d15} @ Restore neon registers that were saved + ldmfd sp!, {r4-r12, pc} @Restoring registers from stack + + diff --git a/common/arm/ih264_inter_pred_luma_horz_qpel_a9q.s b/common/arm/ih264_inter_pred_luma_horz_qpel_a9q.s new file mode 100755 index 0000000..c39ae01 --- /dev/null +++ b/common/arm/ih264_inter_pred_luma_horz_qpel_a9q.s @@ -0,0 +1,266 @@ +@/****************************************************************************** +@ * +@ * Copyright (C) 2015 The Android Open Source Project +@ * +@ * Licensed under the Apache License, Version 2.0 (the "License"); +@ * you may not use this file except in compliance with the License. +@ * You may obtain a copy of the License at: +@ * +@ * http://www.apache.org/licenses/LICENSE-2.0 +@ * +@ * Unless required by applicable law or agreed to in writing, software +@ * distributed under the License is distributed on an "AS IS" BASIS, +@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ * See the License for the specific language governing permissions and +@ * limitations under the License. +@ * +@ ***************************************************************************** +@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +@*/ +@/** +@****************************************************************************** +@* @file +@* ih264_inter_pred_luma_horz_qpel_a9q.s +@* +@* @brief +@* Contains function definitions for inter prediction horizontal quarter pel interpolation. +@* +@* @author +@* Mohit +@* +@* @par List of Functions: +@* +@* - ih264_inter_pred_luma_horz_qpe_a9ql() +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ + +@/* All the functions here are replicated from ih264_inter_pred_filters.c +@ + +@/** +@/** +@******************************************************************************* +@* +@* @brief +@* Quarter pel interprediction luma filter for horizontal input +@* +@* @par Description: +@* Applies a 6 tap horizontal filter .The output is clipped to 8 bits +@* sec 8.4.2.2.1 titled "Luma sample interpolation process" +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@* @param[in] ht +@* integer height of the array +@* +@* @param[in] wd +@* integer width of the array +@* +@ @param[in] pu1_tmp: temporary buffer: UNUSED in this function +@* +@* @param[in] dydx: x and y reference offset for qpel calculations. +@* @returns +@* +@ @remarks +@* None +@* +@******************************************************************************* +@*/ + +@void ih264_inter_pred_luma_horz ( +@ UWORD8 *pu1_src, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd, +@ WORD32 dst_strd, +@ WORD32 ht, +@ WORD32 wd, +@ UWORD8* pu1_tmp, +@ UWORD32 dydx) + +@**************Variables Vs Registers***************************************** +@ r0 => *pu1_src +@ r1 => *pu1_dst +@ r2 => src_strd +@ r3 => dst_strd +@ r5 => ht +@ r6 => wd +@ r7 => dydx + +.text +.p2align 2 + + + .global ih264_inter_pred_luma_horz_qpel_a9q + +ih264_inter_pred_luma_horz_qpel_a9q: + + stmfd sp!, {r4-r12, r14} @store register values to stack + vstmdb sp!, {d8-d15} @push neon registers to stack + ldr r5, [sp, #104] @Loads ht + ldr r6, [sp, #108] @Loads wd + ldr r7, [sp, #116] @Loads dydx + and r7, r7, #3 @Finds x-offset + add r7, r0, r7, lsr #1 @pu1_src + (x_offset>>1) + sub r0, r0, #2 @pu1_src-2 + vmov.i8 d0, #5 @filter coeff + subs r12, r6, #8 @if wd=8 branch to loop_8 + vmov.i8 d1, #20 @filter coeff + + beq loop_8 + + subs r12, r6, #4 @if wd=4 branch to loop_4 + beq loop_4 + +loop_16: @when wd=16 + @// Processing row0 and row1 + vld1.8 {d2, d3, d4}, [r0], r2 @// Load row0 + vext.8 d31, d2, d3, #5 @//extract a[5] (column1,row0) + vld1.8 {d5, d6, d7}, [r0], r2 @// Load row1 + vext.8 d30, d3, d4, #5 @//extract a[5] (column2,row0) + vaddl.u8 q4, d31, d2 @// a0 + a5 (column1,row0) + vext.8 d28, d5, d6, #5 @//extract a[5] (column1,row1) + vaddl.u8 q5, d30, d3 @// a0 + a5 (column2,row0) + vext.8 d27, d6, d7, #5 @//extract a[5] (column2,row1) + vaddl.u8 q7, d28, d5 @// a0 + a5 (column1,row1) + vext.8 d31, d2, d3, #2 @//extract a[2] (column1,row0) + vaddl.u8 q8, d27, d6 @// a0 + a5 (column2,row1) + vext.8 d30, d3, d4, #2 @//extract a[2] (column2,row0) + vmlal.u8 q4, d31, d1 @// a0 + a5 + 20a2 (column1,row0) + vext.8 d28, d5, d6, #2 @//extract a[2] (column1,row1) + vmlal.u8 q5, d30, d1 @// a0 + a5 + 20a2 (column2,row0) + vext.8 d27, d6, d7, #2 @//extract a[2] (column2,row1) + vmlal.u8 q7, d28, d1 @// a0 + a5 + 20a2 (column1,row1) + vext.8 d31, d2, d3, #3 @//extract a[3] (column1,row0) + vmlal.u8 q8, d27, d1 @// a0 + a5 + 20a2 (column2,row1) + vext.8 d30, d3, d4, #3 @//extract a[3] (column2,row0) + vmlal.u8 q4, d31, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row0) + vext.8 d28, d5, d6, #3 @//extract a[3] (column1,row1) + vmlal.u8 q5, d30, d1 @// a0 + a5 + 20a2 + 20a3 (column2,row0) + vext.8 d27, d6, d7, #3 @//extract a[3] (column2,row1) + vmlal.u8 q7, d28, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row1) + vext.8 d31, d2, d3, #1 @//extract a[1] (column1,row0) + vmlal.u8 q8, d27, d1 @// a0 + a5 + 20a2 + 20a3 (column2,row1) + vext.8 d30, d3, d4, #1 @//extract a[1] (column2,row0) + vmlsl.u8 q4, d31, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0) + vext.8 d28, d5, d6, #1 @//extract a[1] (column1,row1) + vmlsl.u8 q5, d30, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0) + vext.8 d27, d6, d7, #1 @//extract a[1] (column2,row1) + vmlsl.u8 q7, d28, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row1) + vext.8 d31, d2, d3, #4 @//extract a[4] (column1,row0) + vmlsl.u8 q8, d27, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row1) + vext.8 d30, d3, d4, #4 @//extract a[4] (column2,row0) + vmlsl.u8 q4, d31, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0) + vext.8 d28, d5, d6, #4 @//extract a[4] (column1,row1) + vmlsl.u8 q5, d30, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0) + vext.8 d27, d6, d7, #4 @//extract a[4] (column2,row1) + vmlsl.u8 q7, d28, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row1) + vmlsl.u8 q8, d27, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row1) + vld1.32 {d12, d13}, [r7], r2 @Load value for interpolation (column1,row0) + vqrshrun.s16 d20, q4, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0) + vqrshrun.s16 d21, q5, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0) + vext.8 d31, d2, d3, #5 @//extract a[5] (column1,row2) + vrhadd.u8 q10, q6, q10 @Interpolation step for qpel calculation + vqrshrun.s16 d18, q7, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row1) + vst1.8 {d20, d21}, [r1], r3 @//Store dest row0 + vext.8 d30, d3, d4, #5 @//extract a[5] (column2,row2) + vqrshrun.s16 d19, q8, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row1) + vld1.32 {d12, d13}, [r7], r2 @Load value for interpolation (column1,row1) + vrhadd.u8 q9, q6, q9 @Interpolation step for qpel calculation + vst1.8 {d18, d19}, [r1], r3 @//Store dest row1 + subs r5, r5, #2 @ 2 rows done, decrement by 2 + + beq end_func + b loop_16 + +loop_8: +@// Processing row0 and row1 + + vld1.8 {d5, d6}, [r0], r2 @// Load row1 + vext.8 d28, d5, d6, #5 @//extract a[5] (column1,row1) + vld1.8 {d2, d3}, [r0], r2 @// Load row0 + vext.8 d25, d5, d6, #2 @//extract a[2] (column1,row1) + vext.8 d31, d2, d3, #5 @//extract a[5] (column1,row0) + vext.8 d24, d5, d6, #3 @//extract a[3] (column1,row1) + vext.8 d23, d5, d6, #1 @//extract a[1] (column1,row1) + vext.8 d22, d5, d6, #4 @//extract a[4] (column1,row1) + vaddl.u8 q7, d28, d5 @// a0 + a5 (column1,row1) + vext.8 d29, d2, d3, #3 @//extract a[3] (column1,row0) + vmlal.u8 q7, d25, d1 @// a0 + a5 + 20a2 (column1,row1) + vmlal.u8 q7, d24, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row1) + vmlsl.u8 q7, d23, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row1) + vmlsl.u8 q7, d22, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row1) + vext.8 d30, d2, d3, #2 @//extract a[2] (column1,row0) + vaddl.u8 q4, d31, d2 @// a0 + a5 (column1,row0) + vext.8 d27, d2, d3, #1 @//extract a[1] (column1,row0) + vext.8 d26, d2, d3, #4 @//extract a[4] (column1,row0) + vmlal.u8 q4, d29, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row0) + vmlal.u8 q4, d30, d1 @// a0 + a5 + 20a2 (column1,row0) + vmlsl.u8 q4, d27, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0) + vmlsl.u8 q4, d26, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0) + vqrshrun.s16 d18, q7, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0) + vld1.32 d12, [r7], r2 @Load value for interpolation (column1,row0) + vld1.32 d13, [r7], r2 @Load value for interpolation (column1,row1) + vqrshrun.s16 d19, q4, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row1) + vrhadd.u8 q9, q6, q9 @Interpolation step for qpel calculation + vst1.8 {d18}, [r1], r3 @//Store dest row0 + vst1.8 {d19}, [r1], r3 @//Store dest row1 + subs r5, r5, #2 @ 2 rows done, decrement by 2 + + beq end_func @ Branch if height==4 + b loop_8 @looping if height == 8 or 16 + +loop_4: + vld1.8 {d5, d6}, [r0], r2 @// Load row1 + vext.8 d28, d5, d6, #5 @//extract a[5] (column1,row1) + vld1.8 {d2, d3}, [r0], r2 @// Load row0 + vext.8 d25, d5, d6, #2 @//extract a[2] (column1,row1) + vext.8 d31, d2, d3, #5 @//extract a[5] (column1,row0) + vaddl.u8 q7, d28, d5 @// a0 + a5 (column1,row1) + vext.8 d24, d5, d6, #3 @//extract a[3] (column1,row1) + vext.8 d23, d5, d6, #1 @//extract a[1] (column1,row1) + vext.8 d22, d5, d6, #4 @//extract a[4] (column1,row1) + vext.8 d29, d2, d3, #3 @//extract a[3] (column1,row0) + vmlal.u8 q7, d25, d1 @// a0 + a5 + 20a2 (column1,row1) + vmlal.u8 q7, d24, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row1) + vmlsl.u8 q7, d23, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row1) + vmlsl.u8 q7, d22, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row1) + vaddl.u8 q4, d31, d2 @// a0 + a5 (column1,row0) + vext.8 d30, d2, d3, #2 @//extract a[2] (column1,row0) + vld1.32 d12, [r7], r2 @Load value for interpolation (column1,row0) + vld1.32 d13, [r7], r2 @Load value for interpolation (column1,row1) + vext.8 d27, d2, d3, #1 @//extract a[1] (column1,row0) + vext.8 d26, d2, d3, #4 @//extract a[4] (column1,row0) + vmlal.u8 q4, d29, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row0) + vmlal.u8 q4, d30, d1 @// a0 + a5 + 20a2 (column1,row0) + vmlsl.u8 q4, d27, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0) + vmlsl.u8 q4, d26, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0) + vqrshrun.s16 d18, q7, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0) + vqrshrun.s16 d19, q4, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row1) + vrhadd.u8 q9, q6, q9 @Interpolation step for qpel calculation + vst1.32 d18[0], [r1], r3 @//Store dest row0 + vst1.32 d19[0], [r1], r3 @//Store dest row1 + + subs r5, r5, #2 @ 2 rows done, decrement by 2 + beq end_func + + b loop_4 + +end_func: + vldmia sp!, {d8-d15} @ Restore neon registers that were saved + ldmfd sp!, {r4-r12, pc} @Restoring registers from stack + + diff --git a/common/arm/ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q.s b/common/arm/ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q.s new file mode 100755 index 0000000..565cc80 --- /dev/null +++ b/common/arm/ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q.s @@ -0,0 +1,505 @@ +@/****************************************************************************** +@ * +@ * Copyright (C) 2015 The Android Open Source Project +@ * +@ * Licensed under the Apache License, Version 2.0 (the "License"); +@ * you may not use this file except in compliance with the License. +@ * You may obtain a copy of the License at: +@ * +@ * http://www.apache.org/licenses/LICENSE-2.0 +@ * +@ * Unless required by applicable law or agreed to in writing, software +@ * distributed under the License is distributed on an "AS IS" BASIS, +@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ * See the License for the specific language governing permissions and +@ * limitations under the License. +@ * +@ ***************************************************************************** +@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +@*/ +@/** +@****************************************************************************** +@* @file +@* ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q.s +@* +@* @brief +@* Contains function definitions for inter prediction interpolation. +@* +@* @author +@* Mohit +@* +@* @par List of Functions: +@* +@* - ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q() +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ + +@/* All the functions here are replicated from ih264_inter_pred_filters.c +@ + +@/** +@/** +@/** +@******************************************************************************* +@* +@* @brief +@* This function implements a two stage cascaded six tap filter. It +@* applies the six tap filter in the vertical direction on the +@* predictor values, followed by applying the same filter in the +@* horizontal direction on the output of the first stage. It then averages +@* the output of the 1st stage and the final stage to obtain the quarter +@* pel values.The six tap filtering operation is described in sec 8.4.2.2.1 +@* titled "Luma sample interpolation process". +@* +@* @par Description: +@* This function is called to obtain pixels lying at the following +@* location (1/4,1/2) or (3/4,1/2). The function interpolates +@* the predictors first in the verical direction and then in the +@* horizontal direction to output the (1/2,1/2). It then averages +@* the output of the 2nd stage and (1/2,1/2) value to obtain (1/4,1/2) +@* or (3/4,1/2) depending on the offset. +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@* @param[in] ht +@* integer height of the array +@* +@* @param[in] wd +@* integer width of the array +@* +@* @param[in] pu1_tmp: temporary buffer +@* +@* @param[in] dydx: x and y reference offset for qpel calculations +@* +@* @returns +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/; + +@void ih264_inter_pred_luma_horz_qpel_vert_hpel(UWORD8 *pu1_src, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd,, +@ WORD32 dst_strd, +@ WORD32 ht, +@ WORD32 wd, +@ UWORD8* pu1_tmp, +@ UWORD32 dydx) + +@**************Variables Vs Registers***************************************** +@ r0 => *pu1_src +@ r1 => *pu1_dst +@ r2 => src_strd +@ r3 => dst_strd +@ r4 => ht +@ r5 => wd +@ r6 => dydx +@ r9 => *pu1_tmp + +.text +.p2align 2 + + .global ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q + +ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q: + + stmfd sp!, {r4-r12, r14} @store register values to stack + vstmdb sp!, {d8-d15} @push neon registers to stack + ldr r4, [sp, #104] @ loads ht + sub r0, r0, r2, lsl #1 @pu1_src-2*src_strd + sub r0, r0, #2 @pu1_src-2 + ldr r5, [sp, #108] @ loads wd + ldr r6, [sp, #116] @ loads dydx + and r6, r6, #2 @ dydx & 0x3 followed by dydx>>1 and dydx<<1 + ldr r9, [sp, #112] @pu1_tmp + add r7, r9, #4 + add r6, r7, r6 @ pi16_pred1_temp += (x_offset>>1) + + vmov.u16 q13, #0x14 @ Filter coeff 20 into Q13 + vmov.u16 q12, #0x5 @ Filter coeff 5 into Q12 + mov r7, #0x20 + mov r8, #0x30 + subs r12, r5, #4 @if wd=4 branch to loop_4 + beq loop_4 + + subs r12, r5, #8 @if wd=8 branch to loop_8 + beq loop_8 + + @when wd=16 + vmov.u16 q14, #0x14 @ Filter coeff 20 into Q13 + vmov.u16 q15, #0x5 @ Filter coeff 5 into Q12 + add r14, r2, #0 + sub r2, r2, #16 + + +loop_16: + + vld1.u32 {q0}, [r0]! @ Vector load from src[0_0] + vld1.u32 d12, [r0], r2 @ Vector load from src[0_0] + vld1.u32 {q1}, [r0]! @ Vector load from src[1_0] + vld1.u32 d13, [r0], r2 @ Vector load from src[1_0] + vld1.u32 {q2}, [r0]! @ Vector load from src[2_0] + vld1.u32 d14, [r0], r2 @ Vector load from src[2_0] + vld1.u32 {q3}, [r0]! @ Vector load from src[3_0] + vld1.u32 d15, [r0], r2 @ Vector load from src[3_0] + vld1.u32 {q4}, [r0]! @ Vector load from src[4_0] + vld1.u32 d16, [r0], r2 @ Vector load from src[4_0] + + vld1.u32 {q5}, [r0]! @ Vector load from src[5_0] + vld1.u32 d17, [r0], r2 @ Vector load from src[5_0] + + vaddl.u8 q10, d4, d6 + vaddl.u8 q9, d0, d10 + vaddl.u8 q11, d2, d8 + vmla.u16 q9, q10, q14 + vaddl.u8 q12, d5, d7 + vaddl.u8 q10, d1, d11 + vaddl.u8 q13, d3, d9 + vmla.u16 q10, q12, q14 + vaddl.u8 q12, d14, d15 + vmls.u16 q9, q11, q15 + vaddl.u8 q11, d12, d17 + vmls.u16 q10, q13, q15 + vaddl.u8 q13, d13, d16 + vmla.u16 q11, q12, q14 + vmls.u16 q11, q13, q15 + vst1.32 {q9}, [r9]! + vst1.32 {q10}, [r9]! + vext.16 q12, q9, q10, #2 + vext.16 q13, q9, q10, #3 + vst1.32 {q11}, [r9] + vext.16 q11, q9, q10, #5 + vadd.s16 q0, q12, q13 + vext.16 q12, q9, q10, #1 + vext.16 q13, q9, q10, #4 + vadd.s16 q12, q12, q13 + + vaddl.s16 q13, d18, d22 + vmlal.s16 q13, d0, d28 + vmlsl.s16 q13, d24, d30 + + vaddl.s16 q11, d19, d23 + vmlal.s16 q11, d1, d28 + vmlsl.s16 q11, d25, d30 + + vqrshrun.s32 d18, q13, #10 + vqrshrun.s32 d19, q11, #10 + vld1.32 {q11}, [r9]! + vqmovn.u16 d18, q9 + + vext.16 q12, q10, q11, #2 + vext.16 q13, q10, q11, #3 + vext.16 q0, q10, q11, #5 + vst1.32 d18, [r1] + vadd.s16 q9, q12, q13 + vext.16 q12, q10, q11, #1 + vext.16 q13, q10, q11, #4 + vadd.s16 q12, q12, q13 + + vaddl.s16 q13, d0, d20 + vmlal.s16 q13, d18, d28 + vmlsl.s16 q13, d24, d30 + + vaddl.s16 q11, d1, d21 + vmlal.s16 q11, d19, d28 + vmlsl.s16 q11, d25, d30 + + vqrshrun.s32 d18, q13, #10 + vqrshrun.s32 d19, q11, #10 + + vaddl.u8 q12, d7, d9 + vld1.32 {q10}, [r6]! + vld1.32 {q11}, [r6], r7 + + vqmovn.u16 d19, q9 + + vld1.32 d18, [r1] + vqrshrun.s16 d20, q10, #5 + vqrshrun.s16 d21, q11, #5 + vaddl.u8 q11, d4, d10 + vld1.u32 {q0}, [r0]! @ Vector load from src[6_0] + vrhadd.u8 q9, q9, q10 + vld1.u32 d12, [r0], r2 @ Vector load from src[6_0] + vaddl.u8 q10, d6, d8 + vaddl.u8 q13, d5, d11 + vst1.32 {q9}, [r1], r3 @ store row 0 + +@ROW_2 + + vaddl.u8 q9, d2, d0 + + vmla.u16 q9, q10, q14 + + vaddl.u8 q10, d3, d1 + + vmla.u16 q10, q12, q14 + vaddl.u8 q12, d15, d16 + vmls.u16 q9, q11, q15 + vaddl.u8 q11, d13, d12 + vmls.u16 q10, q13, q15 + vaddl.u8 q13, d14, d17 + vmla.u16 q11, q12, q14 + vmls.u16 q11, q13, q15 + vst1.32 {q9}, [r9]! + vst1.32 {q10}, [r9]! + vext.16 q12, q9, q10, #2 + vext.16 q13, q9, q10, #3 + vst1.32 {q11}, [r9] + vext.16 q11, q9, q10, #5 + vadd.s16 q1, q12, q13 + vext.16 q12, q9, q10, #1 + vext.16 q13, q9, q10, #4 + vadd.s16 q12, q12, q13 + + vaddl.s16 q13, d18, d22 + vmlal.s16 q13, d2, d28 + vmlsl.s16 q13, d24, d30 + + vaddl.s16 q11, d19, d23 + vmlal.s16 q11, d3, d28 + vmlsl.s16 q11, d25, d30 + + vqrshrun.s32 d18, q13, #10 + vqrshrun.s32 d19, q11, #10 + vld1.32 {q11}, [r9]! + vqmovn.u16 d18, q9 + + vext.16 q12, q10, q11, #2 + vext.16 q13, q10, q11, #3 + vext.16 q1, q10, q11, #5 + vst1.32 d18, [r1] + vadd.s16 q9, q12, q13 + vext.16 q12, q10, q11, #1 + vext.16 q13, q10, q11, #4 + vadd.s16 q12, q12, q13 + + vaddl.s16 q13, d2, d20 + vmlal.s16 q13, d18, d28 + vmlsl.s16 q13, d24, d30 + + vaddl.s16 q11, d3, d21 + vmlal.s16 q11, d19, d28 + vmlsl.s16 q11, d25, d30 + + vqrshrun.s32 d18, q13, #10 + vqrshrun.s32 d19, q11, #10 + vaddl.u8 q12, d9, d11 + vld1.32 {q10}, [r6]! + vld1.32 {q11}, [r6], r7 + vqmovn.u16 d19, q9 + vld1.32 d18, [r1] + vqrshrun.s16 d20, q10, #5 + vqrshrun.s16 d21, q11, #5 + + vrhadd.u8 q9, q9, q10 + + vst1.32 {q9}, [r1], r3 @ store row 1 + + subs r4, r4, #2 + subne r0, r0 , r14, lsl #2 + subne r0, r0, r14 + + beq end_func @ Branch if height==4 + b loop_16 @ Loop if height==8 + +loop_8: + vld1.u32 {q0}, [r0], r2 @ Vector load from src[0_0] + vld1.u32 {q1}, [r0], r2 @ Vector load from src[1_0] + vld1.u32 {q2}, [r0], r2 @ Vector load from src[2_0] + vld1.u32 {q3}, [r0], r2 @ Vector load from src[3_0] + vld1.u32 {q4}, [r0], r2 @ Vector load from src[4_0] + + vld1.u32 {q5}, [r0], r2 @ Vector load from src[5_0] + vaddl.u8 q7, d4, d6 + vaddl.u8 q6, d0, d10 + vaddl.u8 q8, d2, d8 + vmla.u16 q6, q7, q13 + vaddl.u8 q9, d5, d7 + vaddl.u8 q7, d1, d11 + vaddl.u8 q11, d3, d9 + vmla.u16 q7, q9, q13 + vmls.u16 q6, q8, q12 + vld1.32 {q0}, [r0], r2 @ Vector load from src[6_0] + vaddl.u8 q8, d6, d8 + vmls.u16 q7, q11, q12 + vaddl.u8 q14, d2, d0 + vst1.32 {q6}, [r9]! @ store row 0 to temp buffer: col 0 + vext.16 q11, q6, q7, #5 + vaddl.u8 q9, d4, d10 + vmla.u16 q14, q8, q13 + vaddl.s16 q15, d12, d22 + vst1.32 {q7}, [r9], r7 @ store row 0 to temp buffer: col 1 + vaddl.s16 q11, d13, d23 + vext.16 q8, q6, q7, #2 + vmls.u16 q14, q9, q12 + vext.16 q9, q6, q7, #3 + vext.16 q10, q6, q7, #4 + vext.16 q7, q6, q7, #1 + vadd.s16 q8, q8, q9 + vadd.s16 q9, q7, q10 + vaddl.u8 q10, d7, d9 + vmlal.s16 q15, d16, d26 + vmlsl.s16 q15, d18, d24 + vmlal.s16 q11, d17, d26 + vmlsl.s16 q11, d19, d24 + vaddl.u8 q7, d3, d1 + vst1.32 {q14}, [r9]! @ store row 1 to temp buffer: col 0 + vmla.u16 q7, q10, q13 + vqrshrun.s32 d12, q15, #10 + vaddl.u8 q8, d5, d11 + vqrshrun.s32 d13, q11, #10 + vmls.u16 q7, q8, q12 +@ vld1.32 {q1},[r0],r2 ; Vector load from src[7_0] + vqmovn.u16 d25, q6 + vaddl.u8 q8, d8, d10 + + + vext.16 q11, q14, q7, #5 + vaddl.u8 q10, d4, d2 + vaddl.s16 q15, d28, d22 + vmla.u16 q10, q8, q13 + vst1.32 {q7}, [r9], r7 @ store row 1 to temp buffer: col 1 + vaddl.s16 q11, d29, d23 + vext.16 q8, q14, q7, #2 + vext.16 q9, q14, q7, #3 + vext.16 q6, q14, q7, #4 + vext.16 q7, q14, q7, #1 + vadd.s16 q8, q8, q9 + vadd.s16 q9, q6, q7 + vld1.32 {q7}, [r6], r8 @ load row 0 from temp buffer + vmlal.s16 q15, d16, d26 + vmlsl.s16 q15, d18, d24 + vmlal.s16 q11, d17, d26 + vmlsl.s16 q11, d19, d24 + vqrshrun.s16 d14, q7, #0x5 + vld1.32 {q14}, [r6], r8 @ load row 1 from temp buffer + vaddl.u8 q9, d6, d0 + vqrshrun.s32 d16, q15, #10 + vqrshrun.s16 d15, q14, #0x5 + vqrshrun.s32 d17, q11, #10 + vmov d12, d25 + vmov d25, d24 + + vqmovn.u16 d13, q8 + vrhadd.u8 q6, q6, q7 + + vst1.32 d12, [r1], r3 @ store row 0 + vst1.32 d13, [r1], r3 @ store row 1 + + subs r4, r4, #2 + subne r0, r0 , r2, lsl #2 + subne r0, r0, r2 + + beq end_func @ Branch if height==4 + b loop_8 @ Loop if height==8 + +loop_4: + vld1.u32 {q0}, [r0], r2 @ Vector load from src[0_0] + vld1.u32 {q1}, [r0], r2 @ Vector load from src[1_0] + vld1.u32 {q2}, [r0], r2 @ Vector load from src[2_0] + vld1.u32 {q3}, [r0], r2 @ Vector load from src[3_0] + vld1.u32 {q4}, [r0], r2 @ Vector load from src[4_0] + vld1.u32 {q5}, [r0], r2 @ Vector load from src[5_0] + + vaddl.u8 q7, d4, d6 @ temp1 = src[2_0] + src[3_0] + vaddl.u8 q6, d0, d10 @ temp = src[0_0] + src[5_0] + vaddl.u8 q8, d2, d8 @ temp2 = src[1_0] + src[4_0] + vmla.u16 q6, q7, q13 @ temp += temp1 * 20 + vaddl.u8 q9, d5, d7 @ temp1 = src[2_0] + src[3_0] + vaddl.u8 q7, d1, d11 @ temp = src[0_0] + src[5_0] + vaddl.u8 q11, d3, d9 @ temp2 = src[1_0] + src[4_0] + vmla.u16 q7, q9, q13 @ temp += temp1 * 20 + vmls.u16 q6, q8, q12 @ temp -= temp2 * 5 + vld1.32 {q0}, [r0], r2 @ Vector load from src[6_0] + vaddl.u8 q8, d6, d8 + vmls.u16 q7, q11, q12 @ temp -= temp2 * 5 + @Q6 and Q7 have filtered values + vaddl.u8 q14, d2, d0 + vst1.32 {q6}, [r9]! @ store row 0 to temp buffer: col 0 + vext.16 q11, q6, q7, #5 + vaddl.u8 q9, d4, d10 + vmla.u16 q14, q8, q13 + vaddl.s16 q15, d12, d22 + vst1.32 {q7}, [r9], r7 @ store row 0 to temp buffer: col 1 + vaddl.s16 q11, d13, d23 + vext.16 q8, q6, q7, #2 + vmls.u16 q14, q9, q12 + vext.16 q9, q6, q7, #3 + vext.16 q10, q6, q7, #4 + vext.16 q7, q6, q7, #1 + vadd.s16 q8, q8, q9 + vadd.s16 q9, q7, q10 + vaddl.u8 q10, d7, d9 + vmlal.s16 q15, d16, d26 + vmlsl.s16 q15, d18, d24 + vmlal.s16 q11, d17, d26 + vmlsl.s16 q11, d19, d24 + vaddl.u8 q7, d3, d1 + vst1.32 {q14}, [r9]! @ store row 1 to temp buffer: col 0 + vmla.u16 q7, q10, q13 + vqrshrun.s32 d12, q15, #10 + vaddl.u8 q8, d5, d11 + vqrshrun.s32 d13, q11, #10 + vmls.u16 q7, q8, q12 + vqmovn.u16 d25, q6 + vaddl.u8 q8, d8, d10 + + vext.16 q11, q14, q7, #5 + vaddl.u8 q10, d4, d2 + vaddl.s16 q15, d28, d22 + vmla.u16 q10, q8, q13 + vst1.32 {q7}, [r9], r7 @ store row 1 to temp buffer: col 1 + vaddl.s16 q11, d29, d23 + vext.16 q8, q14, q7, #2 + vext.16 q9, q14, q7, #3 + vext.16 q6, q14, q7, #4 + vext.16 q7, q14, q7, #1 + vadd.s16 q8, q8, q9 + vadd.s16 q9, q6, q7 + vld1.32 d14, [r6], r8 @load row 0 from temp buffer + vmlal.s16 q15, d16, d26 + vmlsl.s16 q15, d18, d24 + vmlal.s16 q11, d17, d26 + vmlsl.s16 q11, d19, d24 + vqrshrun.s16 d14, q7, #0x5 + vld1.32 d28, [r6], r8 @load row 1 from temp buffer + vaddl.u8 q9, d6, d0 + vqrshrun.s32 d16, q15, #10 + vqrshrun.s16 d15, q14, #0x5 + vqrshrun.s32 d17, q11, #10 + vmov d12, d25 + vmov d25, d24 + + vqmovn.u16 d13, q8 + vrhadd.u8 q6, q6, q7 + vst1.32 d12[0], [r1], r3 @ store row 0 + vst1.32 d13[0], [r1], r3 @store row 1 + + subs r4, r4, #2 + subne r0, r0 , r2, lsl #2 + subne r0, r0, r2 + + beq end_func @ Branch if height==4 + b loop_4 @ Loop if height==8 + +end_func: + vldmia sp!, {d8-d15} @ Restore neon registers that were saved + ldmfd sp!, {r4-r12, pc} @Restoring registers from stack + + diff --git a/common/arm/ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q.s b/common/arm/ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q.s new file mode 100755 index 0000000..3c8b60a --- /dev/null +++ b/common/arm/ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q.s @@ -0,0 +1,355 @@ +@/****************************************************************************** +@ * +@ * Copyright (C) 2015 The Android Open Source Project +@ * +@ * Licensed under the Apache License, Version 2.0 (the "License"); +@ * you may not use this file except in compliance with the License. +@ * You may obtain a copy of the License at: +@ * +@ * http://www.apache.org/licenses/LICENSE-2.0 +@ * +@ * Unless required by applicable law or agreed to in writing, software +@ * distributed under the License is distributed on an "AS IS" BASIS, +@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ * See the License for the specific language governing permissions and +@ * limitations under the License. +@ * +@ ***************************************************************************** +@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +@*/ +@/** +@****************************************************************************** +@* @file +@* ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q.s +@* +@* @brief +@* Contains function definitions for inter prediction interpolation. +@* +@* @author +@* Mohit +@* +@* @par List of Functions: +@* +@* - ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q() +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ + +@/* All the functions here are replicated from ih264_inter_pred_filters.c +@ + +@/** +@/** +@/** +@******************************************************************************* +@* +@* @brief +@* This function implements two six tap filters. It +@* applies the six tap filter in the horizontal direction on the +@* predictor values, then applies the same filter in the +@* vertical direction on the predictor values. It then averages these +@* two outputs to obtain quarter pel values in horizontal and vertical direction. +@* The six tap filtering operation is described in sec 8.4.2.2.1 titled +@* "Luma sample interpolation process" +@* +@* @par Description: +@* This function is called to obtain pixels lying at the following +@* location (1/4,1/4) or (3/4,1/4) or (1/4,3/4) or (3/4,3/4). +@* The function interpolates the predictors first in the horizontal direction +@* and then in the vertical direction, and then averages these two +@* values. +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@* @param[in] ht +@* integer height of the array +@* +@* @param[in] wd +@* integer width of the array +@* +@* @param[in] pu1_tmp: temporary buffer +@* +@* @param[in] dydx: x and y reference offset for qpel calculations +@* +@* @returns +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/; + +@void ih264_inter_pred_luma_horz_qpel_vert_qpel(UWORD8 *pu1_src, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd,, +@ WORD32 dst_strd, +@ WORD32 ht, +@ WORD32 wd, +@ UWORD8* pu1_tmp, +@ UWORD32 dydx) + +@**************Variables Vs Registers***************************************** +@ r0 => *pu1_src +@ r1 => *pu1_dst +@ r2 => src_strd +@ r3 => dst_strd +@ r4 => ht +@ r5 => wd +@ r6 => dydx + +.text +.p2align 2 + + .global ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q + +ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q: + + stmfd sp!, {r4-r12, r14} @store register values to stack + vstmdb sp!, {d8-d15} @push neon registers to stack + ldr r4, [sp, #104] @ loads ht + ldr r5, [sp, #108] @ loads wd + ldr r6, [sp, #116] @dydx + and r7, r6, #3 + add r7, r0, r7, lsr #1 @pu1_pred_vert = pu1_src + (x_offset>>1) + + and r6, r6, #12 @Finds y-offset + lsr r6, r6, #3 @dydx>>3 + mul r6, r2, r6 + add r6, r0, r6 @pu1_pred_horz = pu1_src + (y_offset>>1)*src_strd + sub r7, r7, r2, lsl #1 @pu1_pred_vert-2*src_strd + sub r6, r6, #2 @pu1_pred_horz-2 + vmov.u8 d30, #20 @ Filter coeff 20 + vmov.u8 d31, #5 @ Filter coeff 5 + + subs r12, r5, #4 @if wd=4 branch to loop_4 + beq loop_4 + subs r12, r5, #8 @if wd=8 branch to loop_8 + beq loop_8 + +loop_16: + vld1.32 {q0}, [r7], r2 @ Vector load from src[0_0] + vld1.32 {q1}, [r7], r2 @ Vector load from src[1_0] + vld1.32 {q2}, [r7], r2 @ Vector load from src[2_0] + vld1.32 {q3}, [r7], r2 @ Vector load from src[3_0] + vld1.32 {q4}, [r7], r2 @ Vector load from src[4_0] + add r11, r6, #8 + vld1.32 {q5}, [r7], r2 @ Vector load from src[5_0] + vld1.32 {q9}, [r6], r2 @ horz row0, col 0 + vaddl.u8 q12, d0, d10 + vmlal.u8 q12, d4, d30 + vmlal.u8 q12, d6, d30 + vmlsl.u8 q12, d2, d31 + vmlsl.u8 q12, d8, d31 + vext.8 d23, d18, d19, #5 + vext.8 d20, d18, d19, #2 + vext.8 d21, d18, d19, #3 + vext.8 d22, d18, d19, #4 + vext.8 d19, d18, d19, #1 + vqrshrun.s16 d26, q12, #5 + vaddl.u8 q14, d18, d23 + vmlal.u8 q14, d20, d30 + vmlal.u8 q14, d21, d30 + vmlsl.u8 q14, d19, d31 + vmlsl.u8 q14, d22, d31 + vld1.32 {q9}, [r11], r2 @ horz row 0, col 1 + vaddl.u8 q12, d1, d11 + vmlal.u8 q12, d5, d30 + vmlal.u8 q12, d7, d30 + vmlsl.u8 q12, d3, d31 + vmlsl.u8 q12, d9, d31 + vqrshrun.s16 d28, q14, #5 + vext.8 d23, d18, d19, #5 + vext.8 d20, d18, d19, #2 + vext.8 d21, d18, d19, #3 + vext.8 d22, d18, d19, #4 + vext.8 d19, d18, d19, #1 + vqrshrun.s16 d27, q12, #5 + vld1.32 {q6}, [r7], r2 @ src[6_0] + + vaddl.u8 q12, d18, d23 + vmlal.u8 q12, d20, d30 + vmlal.u8 q12, d21, d30 + vmlsl.u8 q12, d19, d31 + vmlsl.u8 q12, d22, d31 + + vaddl.u8 q8, d2, d12 + vmlal.u8 q8, d6, d30 + vmlal.u8 q8, d8, d30 + vmlsl.u8 q8, d4, d31 + vmlsl.u8 q8, d10, d31 + vqrshrun.s16 d29, q12, #5 + vld1.32 {q9}, [r6], r2 @ horz row 1, col 0 + + vaddl.u8 q12, d3, d13 + vmlal.u8 q12, d7, d30 + vmlal.u8 q12, d9, d30 + vmlsl.u8 q12, d5, d31 + vmlsl.u8 q12, d11, d31 + vrhadd.u8 q14, q14, q13 + vqrshrun.s16 d26, q8, #5 + vext.8 d23, d18, d19, #5 + vext.8 d20, d18, d19, #2 + vext.8 d21, d18, d19, #3 + vext.8 d22, d18, d19, #4 + vst1.32 {q14}, [r1], r3 @ store row 0 + vext.8 d19, d18, d19, #1 + vqrshrun.s16 d27, q12, #5 + + vaddl.u8 q14, d18, d23 + vmlal.u8 q14, d20, d30 + vmlal.u8 q14, d21, d30 + vmlsl.u8 q14, d19, d31 + vmlsl.u8 q14, d22, d31 + + vld1.32 {q9}, [r11], r2 @ horz row 1, col 1 + + vext.8 d23, d18, d19, #5 + vext.8 d20, d18, d19, #2 + vext.8 d21, d18, d19, #3 + vext.8 d22, d18, d19, #4 + vext.8 d19, d18, d19, #1 + + vqrshrun.s16 d28, q14, #5 + vaddl.u8 q12, d18, d23 + vmlal.u8 q12, d20, d30 + vmlal.u8 q12, d21, d30 + vmlsl.u8 q12, d19, d31 + vmlsl.u8 q12, d22, d31 + + vqrshrun.s16 d29, q12, #5 + vrhadd.u8 q14, q14, q13 + vst1.32 {q14}, [r1], r3 @ store row 1 + + subs r4, r4, #2 @ 2 rows processed, decrement by 2 + subne r7, r7 , r2, lsl #2 + subne r7, r7, r2 + beq end_func @ Branch if height==4 + + b loop_16 @ looping if height = 8 or 16 + + +loop_8: + vld1.32 d0, [r7], r2 @ Vector load from src[0_0] + vld1.32 d1, [r7], r2 @ Vector load from src[1_0] + vld1.32 d2, [r7], r2 @ Vector load from src[2_0] + vld1.32 d3, [r7], r2 @ Vector load from src[3_0] + vld1.32 d4, [r7], r2 @ Vector load from src[4_0] + vld1.32 d5, [r7], r2 @ Vector load from src[5_0] + vaddl.u8 q5, d0, d5 + vmlal.u8 q5, d2, d30 + vmlal.u8 q5, d3, d30 + vmlsl.u8 q5, d1, d31 + vmlsl.u8 q5, d4, d31 + vld1.32 {q6}, [r6], r2 @horz row 0 + vext.8 d17, d12, d13, #5 + vext.8 d14, d12, d13, #2 + vext.8 d15, d12, d13, #3 + vext.8 d16, d12, d13, #4 + vext.8 d13, d12, d13, #1 + vqrshrun.s16 d26, q5, #5 + vld1.32 d6, [r7], r2 @ src[6_0] + vaddl.u8 q5, d12, d17 + vmlal.u8 q5, d14, d30 + vmlal.u8 q5, d15, d30 + vmlsl.u8 q5, d13, d31 + vmlsl.u8 q5, d16, d31 + vld1.32 {q6}, [r6], r2 @ horz row 1 + vaddl.u8 q9, d1, d6 + vmlal.u8 q9, d3, d30 + vmlal.u8 q9, d4, d30 + vmlsl.u8 q9, d2, d31 + vmlsl.u8 q9, d5, d31 + vqrshrun.s16 d28, q5, #5 + vext.8 d17, d12, d13, #5 + vext.8 d14, d12, d13, #2 + vext.8 d15, d12, d13, #3 + vext.8 d16, d12, d13, #4 + vext.8 d13, d12, d13, #1 + vqrshrun.s16 d27, q9, #5 + vaddl.u8 q5, d12, d17 + vmlal.u8 q5, d14, d30 + vmlal.u8 q5, d15, d30 + vmlsl.u8 q5, d13, d31 + vmlsl.u8 q5, d16, d31 + vqrshrun.s16 d29, q5, #5 + vrhadd.u8 q13, q13, q14 + vst1.32 d26, [r1], r3 + vst1.32 d27, [r1], r3 + + subs r4, r4, #2 @ 2 rows processed, decrement by 2 + subne r7, r7 , r2, lsl #2 + subne r7, r7, r2 + beq end_func @ Branch if height==4 + b loop_8 @looping if height == 8 or 16 + +loop_4: + vld1.32 d0[0], [r7], r2 @ Vector load from src[0_0] + vld1.32 d1[0], [r7], r2 @ Vector load from src[1_0] + vld1.32 d2[0], [r7], r2 @ Vector load from src[2_0] + vld1.32 d3[0], [r7], r2 @ Vector load from src[3_0] + vld1.32 d4[0], [r7], r2 @ Vector load from src[4_0] + vld1.32 d5[0], [r7], r2 @ Vector load from src[5_0] + vaddl.u8 q5, d0, d5 + vmlal.u8 q5, d2, d30 + vmlal.u8 q5, d3, d30 + vmlsl.u8 q5, d1, d31 + vmlsl.u8 q5, d4, d31 + vld1.32 {q6}, [r6], r2 @load for horz filter row 0 + vext.8 d17, d12, d13, #5 + vext.8 d14, d12, d13, #2 + vext.8 d15, d12, d13, #3 + vext.8 d16, d12, d13, #4 + vext.8 d13, d12, d13, #1 + vqrshrun.s16 d26, q5, #5 + vld1.32 d6[0], [r7], r2 @ Vector load from src[6_0] + vaddl.u8 q5, d12, d17 + vmlal.u8 q5, d14, d30 + vmlal.u8 q5, d15, d30 + vmlsl.u8 q5, d13, d31 + vmlsl.u8 q5, d16, d31 + vld1.32 {q6}, [r6], r2 @horz row 1 + vaddl.u8 q9, d1, d6 + vmlal.u8 q9, d3, d30 + vmlal.u8 q9, d4, d30 + vmlsl.u8 q9, d2, d31 + vmlsl.u8 q9, d5, d31 + vqrshrun.s16 d28, q5, #5 + vext.8 d17, d12, d13, #5 + vext.8 d14, d12, d13, #2 + vext.8 d15, d12, d13, #3 + vext.8 d16, d12, d13, #4 + vext.8 d13, d12, d13, #1 + vqrshrun.s16 d27, q9, #5 + vaddl.u8 q5, d12, d17 + vmlal.u8 q5, d14, d30 + vmlal.u8 q5, d15, d30 + vmlsl.u8 q5, d13, d31 + vmlsl.u8 q5, d16, d31 + vqrshrun.s16 d29, q5, #5 + vrhadd.u8 q13, q13, q14 + vst1.32 d26[0], [r1], r3 + vst1.32 d27[0], [r1], r3 + + subs r4, r4, #2 @ 2 rows processed, decrement by 2 + subne r7, r7 , r2, lsl #2 + subne r7, r7, r2 + beq end_func @ Branch if height==4 + b loop_4 @ Loop if height==8 +end_func: + vldmia sp!, {d8-d15} @ Restore neon registers that were saved + ldmfd sp!, {r4-r12, pc} @Restoring registers from stack + + diff --git a/common/arm/ih264_inter_pred_luma_vert_qpel_a9q.s b/common/arm/ih264_inter_pred_luma_vert_qpel_a9q.s new file mode 100755 index 0000000..d45055e --- /dev/null +++ b/common/arm/ih264_inter_pred_luma_vert_qpel_a9q.s @@ -0,0 +1,330 @@ +@/****************************************************************************** +@ * +@ * Copyright (C) 2015 The Android Open Source Project +@ * +@ * Licensed under the Apache License, Version 2.0 (the "License"); +@ * you may not use this file except in compliance with the License. +@ * You may obtain a copy of the License at: +@ * +@ * http://www.apache.org/licenses/LICENSE-2.0 +@ * +@ * Unless required by applicable law or agreed to in writing, software +@ * distributed under the License is distributed on an "AS IS" BASIS, +@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ * See the License for the specific language governing permissions and +@ * limitations under the License. +@ * +@ ***************************************************************************** +@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +@*/ +@/** +@****************************************************************************** +@* @file +@* ih264_inter_pred_luma_vert_qpel_a9q.s +@* +@* @brief +@* Contains function definitions for inter prediction vertical quarter pel interpolation. +@* +@* @author +@* Mohit +@* +@* @par List of Functions: +@* +@* - ih264_inter_pred_luma_vert_qpel_a9q() +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ + +@/* All the functions here are replicated from ih264_inter_pred_filters.c +@ + +@/** +@/** +@******************************************************************************* +@* +@* @brief +@* Quarter pel interprediction luma filter for vertical input +@* +@* @par Description: +@* Applies a 6 tap horizontal filter .The output is clipped to 8 bits +@* sec 8.4.2.2.1 titled "Luma sample interpolation process" +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@* @param[in] ht +@* integer height of the array +@* +@* @param[in] wd +@* integer width of the array +@* +@* @param[in] pu1_tmp: temporary buffer: UNUSED in this function +@* +@* @param[in] dydx: x and y reference offset for qpel calculations. +@* @returns +@* +@ @remarks +@* None +@* +@******************************************************************************* +@*/ + +@void ih264_inter_pred_luma_vert ( +@ UWORD8 *pu1_src, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd, +@ WORD32 dst_strd, +@ WORD32 ht, +@ WORD32 wd, +@ UWORD8* pu1_tmp, +@ UWORD32 dydx) + +@**************Variables Vs Registers***************************************** +@ r0 => *pu1_src +@ r1 => *pu1_dst +@ r2 => src_strd +@ r3 => dst_strd +@ r5 => ht +@ r6 => wd +@ r7 => dydx + +.text +.p2align 2 + + .global ih264_inter_pred_luma_vert_qpel_a9q + +ih264_inter_pred_luma_vert_qpel_a9q: + + stmfd sp!, {r4-r12, r14} @store register values to stack + vstmdb sp!, {d8-d15} @push neon registers to stack + ldr r5, [sp, #104] @Loads ht + + ldr r6, [sp, #108] @Loads wd + ldr r7, [sp, #116] @Loads dydx + and r7, r7, #12 @Finds y-offset + lsr r7, r7, #3 @dydx>>3 + mul r7, r2, r7 + add r7, r0, r7 @pu1_src + (y_offset>>1)*src_strd + vmov.u16 q11, #20 @ Filter coeff 0x14 into Q11 + sub r0, r0, r2, lsl #1 @pu1_src-2*src_strd + subs r12, r6, #8 @if wd=8 branch to loop_8 + vmov.u16 q12, #5 @ Filter coeff 0x5 into Q12 + beq loop_8 + + subs r12, r6, #4 @if wd=4 branch to loop_4 + beq loop_4 + +loop_16: @when wd=16 + + vld1.u32 {q0}, [r0], r2 @ Vector load from src[0_0] + vld1.u32 {q1}, [r0], r2 @ Vector load from src[1_0] + vld1.u32 {q2}, [r0], r2 @ Vector load from src[2_0] + vld1.u32 {q3}, [r0], r2 @ Vector load from src[3_0] + vld1.u32 {q4}, [r0], r2 @ Vector load from src[4_0] + vaddl.u8 q6, d4, d6 @ temp1 = src[2_0] + src[3_0] + vld1.u32 {q5}, [r0], r2 @ Vector load from src[5_0] + vaddl.u8 q7, d0, d10 @ temp = src[0_0] + src[5_0] + vaddl.u8 q8, d2, d8 @ temp2 = src[1_0] + src[4_0] + vmla.u16 q7, q6, q11 @ temp += temp1 * 20 + vaddl.u8 q10, d1, d11 @ temp4 = src[0_8] + src[5_8] + vaddl.u8 q9, d5, d7 @ temp3 = src[2_8] + src[3_8] + vmla.u16 q10, q9, q11 @ temp4 += temp3 * 20 + vld1.u32 {q0}, [r0], r2 + vaddl.u8 q13, d3, d9 @ temp5 = src[1_8] + src[4_8] + vaddl.u8 q6, d6, d8 + vmls.u16 q7, q8, q12 @ temp -= temp2 * 5 + vaddl.u8 q8, d2, d0 + vaddl.u8 q9, d4, d10 + vmla.u16 q8, q6, q11 + vmls.u16 q10, q13, q12 @ temp4 -= temp5 * 5 + vaddl.u8 q13, d5, d11 + vaddl.u8 q6, d7, d9 + vqrshrun.s16 d30, q7, #5 @ dst[0_0] = CLIP_U8((temp +16) >> 5) + vaddl.u8 q7, d3, d1 + vld1.u32 {q1}, [r0], r2 + vmla.u16 q7, q6, q11 + vmls.u16 q8, q9, q12 + vqrshrun.s16 d31, q10, #5 @ dst[0_8] = CLIP_U8((temp4 +16) >> 5) + vld1.u32 {q10}, [r7], r2 @ Load for interpolation row 0 + vrhadd.u8 q15, q10, q15 @ Interpolation to obtain qpel value + vaddl.u8 q9, d4, d2 + vaddl.u8 q6, d8, d10 + + vst1.u32 {q15}, [r1], r3 @ Vector store to dst[0_0] + vmla.u16 q9, q6, q11 + vaddl.u8 q10, d6, d0 + vmls.u16 q7, q13, q12 + vqrshrun.s16 d30, q8, #5 + vaddl.u8 q6, d9, d11 + vaddl.u8 q8, d5, d3 + vaddl.u8 q13, d7, d1 + vmla.u16 q8, q6, q11 + vmls.u16 q9, q10, q12 + vld1.u32 {q2}, [r0], r2 + + vqrshrun.s16 d31, q7, #5 + vld1.u32 {q7}, [r7], r2 @ Load for interpolation row 1 + vaddl.u8 q6, d10, d0 + vrhadd.u8 q15, q7, q15 @ Interpolation to obtain qpel value + vaddl.u8 q7, d6, d4 + vaddl.u8 q10, d8, d2 + vmla.u16 q7, q6, q11 + vmls.u16 q8, q13, q12 + vst1.u32 {q15}, [r1], r3 @store row 1 + vqrshrun.s16 d30, q9, #5 + vaddl.u8 q9, d7, d5 + vaddl.u8 q6, d11, d1 + vmla.u16 q9, q6, q11 + vaddl.u8 q13, d9, d3 + vmls.u16 q7, q10, q12 + vqrshrun.s16 d31, q8, #5 + vld1.u32 {q8}, [r7], r2 @ Load for interpolation row 2 + vmls.u16 q9, q13, q12 + vrhadd.u8 q15, q8, q15 @ Interpolation to obtain qpel value + vaddl.u8 q6, d0, d2 @ temp1 = src[2_0] + src[3_0] + vst1.u32 {q15}, [r1], r3 @store row 2 + vaddl.u8 q8, d10, d4 @ temp2 = src[1_0] + src[4_0] + vaddl.u8 q10, d9, d7 @ temp4 = src[0_8] + src[5_8] + vqrshrun.s16 d30, q7, #5 + vaddl.u8 q13, d5, d11 @ temp5 = src[1_8] + src[4_8] + vaddl.u8 q7, d8, d6 @ temp = src[0_0] + src[5_0] + vqrshrun.s16 d31, q9, #5 + vld1.u32 {q9}, [r7], r2 @ Load for interpolation row 3 + vmla.u16 q7, q6, q11 @ temp += temp1 * 20 + vrhadd.u8 q15, q9, q15 @ Interpolation to obtain qpel value + vaddl.u8 q9, d1, d3 @ temp3 = src[2_8] + src[3_8] + vst1.u32 {q15}, [r1], r3 @store row 3 + subs r5, r5, #4 @ 4 rows processed, decrement by 4 + subne r0, r0 , r2, lsl #2 + subne r0, r0, r2 + beq end_func @ Branch if height==4 + + b loop_16 @ looping if height = 8 or 16 + + +loop_8: + + @// Processing row0 and row1 + vld1.u32 d0, [r0], r2 @ Vector load from src[0_0] + vld1.u32 d1, [r0], r2 @ Vector load from src[1_0] + vld1.u32 d2, [r0], r2 @ Vector load from src[2_0] + vld1.u32 d3, [r0], r2 @ Vector load from src[3_0] + vld1.u32 d4, [r0], r2 @ Vector load from src[4_0] + vld1.u32 d5, [r0], r2 @ Vector load from src[5_0] + + vaddl.u8 q3, d2, d3 @ temp1 = src[2_0] + src[3_0] + vaddl.u8 q4, d0, d5 @ temp = src[0_0] + src[5_0] + vaddl.u8 q5, d1, d4 @ temp2 = src[1_0] + src[4_0] + vmla.u16 q4, q3, q11 @ temp += temp1 * 20 + vld1.u32 d6, [r0], r2 + vaddl.u8 q7, d3, d4 + vaddl.u8 q8, d1, d6 + vaddl.u8 q9, d2, d5 + vmls.u16 q4, q5, q12 @ temp -= temp2 * 5 + vmla.u16 q8, q7, q11 + vld1.u32 d7, [r0], r2 + vaddl.u8 q10, d4, d5 + vaddl.u8 q6, d2, d7 + vaddl.u8 q5, d3, d6 + vmls.u16 q8, q9, q12 + vqrshrun.s16 d26, q4, #5 @ dst[0_0] = CLIP_U8( (temp + 16) >> 5) + vmla.u16 q6, q10, q11 + vld1.32 d8, [r7], r2 @Load value for interpolation (row0) + vld1.32 d9, [r7], r2 @Load value for interpolation (row1) + vld1.u32 d0, [r0], r2 + vaddl.u8 q7, d5, d6 + vqrshrun.s16 d27, q8, #5 + vrhadd.u8 q13, q4, q13 @ Interpolation step for qpel calculation + vaddl.u8 q10, d3, d0 + vmls.u16 q6, q5, q12 + vst1.u32 d26, [r1], r3 @ Vector store to dst[0_0] + vaddl.u8 q9, d4, d7 + vmla.u16 q10, q7, q11 + vst1.u32 d27, [r1], r3 @ Vector store to dst[1_0] + vqrshrun.s16 d28, q6, #5 + vmls.u16 q10, q9, q12 + vld1.32 d12, [r7], r2 @Load value for interpolation (row2) + vld1.32 d13, [r7], r2 @Load value for interpolation (row3) + vqrshrun.s16 d29, q10, #5 + subs r9, r5, #4 + vrhadd.u8 q14, q6, q14 + vst1.u32 d28, [r1], r3 @store row 2 + vst1.u32 d29, [r1], r3 @store row 3 + + subs r5, r5, #4 @ 4 rows processed, decrement by 4 + subne r0, r0 , r2, lsl #2 + subne r0, r0, r2 + beq end_func @ Branch if height==4 + b loop_8 @looping if height == 8 or 16 + +loop_4: +@// Processing row0 and row1 + + vld1.u32 d0[0], [r0], r2 @ Vector load from src[0_0] + vld1.u32 d1[0], [r0], r2 @ Vector load from src[1_0] + vld1.u32 d2[0], [r0], r2 @ Vector load from src[2_0] + vld1.u32 d3[0], [r0], r2 @ Vector load from src[3_0] + vld1.u32 d4[0], [r0], r2 @ Vector load from src[4_0] + vld1.u32 d5[0], [r0], r2 @ Vector load from src[5_0] + + vaddl.u8 q3, d2, d3 @ temp1 = src[2_0] + src[3_0] + vaddl.u8 q4, d0, d5 @ temp = src[0_0] + src[5_0] + vaddl.u8 q5, d1, d4 @ temp2 = src[1_0] + src[4_0] + vmla.u16 q4, q3, q11 @ temp += temp1 * 20 + vld1.u32 d6, [r0], r2 + vaddl.u8 q7, d3, d4 + vaddl.u8 q8, d1, d6 + vaddl.u8 q9, d2, d5 + vmls.u16 q4, q5, q12 @ temp -= temp2 * 5 + vld1.u32 d7[0], [r0], r2 + vmla.u16 q8, q7, q11 + vaddl.u8 q10, d4, d5 + vaddl.u8 q6, d2, d7 + vaddl.u8 q5, d3, d6 + vmls.u16 q8, q9, q12 + vqrshrun.s16 d26, q4, #5 @ dst[0_0] = CLIP_U8( (temp + 16) >> 5) + vld1.u32 d8[0], [r7], r2 @Load value for interpolation - row 0 + vld1.u32 d9[0], [r7], r2 @Load value for interpolation - row 1 + vmla.u16 q6, q10, q11 + vld1.u32 d0[0], [r0], r2 + vaddl.u8 q7, d5, d6 + vqrshrun.s16 d27, q8, #5 + vaddl.u8 q10, d3, d0 + vrhadd.u8 q13, q13, q4 @Interpolation step for qpel calculation + vmls.u16 q6, q5, q12 + vst1.u32 d26[0], [r1], r3 @ Vector store to dst[0_0] + vaddl.u8 q9, d4, d7 + vmla.u16 q10, q7, q11 + vst1.u32 d27[0], [r1], r3 @ store row 1 + vqrshrun.s16 d28, q6, #5 + vld1.u32 d12[0], [r7], r2 @Load value for interpolation - row 2 + vld1.u32 d13[0], [r7], r2 @Load value for interpolation - row 3 + + vmls.u16 q10, q9, q12 + vqrshrun.s16 d29, q10, #5 + vrhadd.u8 q14, q6, q14 @Interpolation step for qpel calculation + vst1.u32 d28[0], [r1], r3 @store row 2 + vst1.u32 d29[0], [r1], r3 @store row 3 + + subs r5, r5, #8 + subeq r0, r0, r2, lsl #2 + subeq r0, r0, r2 + beq loop_4 @ Loop if height==8 + +end_func: + vldmia sp!, {d8-d15} @ Restore neon registers that were saved + ldmfd sp!, {r4-r12, pc} @Restoring registers from stack + + diff --git a/common/arm/ih264_intra_pred_chroma_a9q.s b/common/arm/ih264_intra_pred_chroma_a9q.s new file mode 100755 index 0000000..d03fc55 --- /dev/null +++ b/common/arm/ih264_intra_pred_chroma_a9q.s @@ -0,0 +1,551 @@ +@/****************************************************************************** +@ * +@ * Copyright (C) 2015 The Android Open Source Project +@ * +@ * Licensed under the Apache License, Version 2.0 (the "License"); +@ * you may not use this file except in compliance with the License. +@ * You may obtain a copy of the License at: +@ * +@ * http://www.apache.org/licenses/LICENSE-2.0 +@ * +@ * Unless required by applicable law or agreed to in writing, software +@ * distributed under the License is distributed on an "AS IS" BASIS, +@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ * See the License for the specific language governing permissions and +@ * limitations under the License. +@ * +@ ***************************************************************************** +@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +@*/ +@/** +@****************************************************************************** +@* @file +@* ih264_intra_pred_chroma_a9q.s +@* +@* @brief +@* Contains function definitions for intra chroma prediction . +@* +@* @author +@* Ittiam +@* +@* @par List of Functions: +@* +@* - ih264_intra_pred_chroma_mode_horz_a9q() +@* - ih264_intra_pred_chroma_8x8_mode_vert_a9q() +@* - ih264_intra_pred_chroma_mode_dc_a9q() +@* - ih264_intra_pred_chroma_mode_plane_a9q() +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ + +@/* All the functions here are replicated from ih264_chroma_intra_pred_filters.c +@ + +@/** +@/** +@/** +@ +.text +.p2align 2 + + .extern ih264_gai1_intrapred_chroma_plane_coeffs1 +.hidden ih264_gai1_intrapred_chroma_plane_coeffs1 + .extern ih264_gai1_intrapred_chroma_plane_coeffs2 +.hidden ih264_gai1_intrapred_chroma_plane_coeffs2 +scratch_chroma_intrapred_addr1: + .long ih264_gai1_intrapred_chroma_plane_coeffs1 - scrlblc1 - 8 + +scratch_intrapred_chroma_plane_addr1: + .long ih264_gai1_intrapred_chroma_plane_coeffs2 - scrlblc2 - 8 +@/** +@******************************************************************************* +@* +@*ih264_intra_pred_chroma_8x8_mode_dc +@* +@* @brief +@* Perform Intra prediction for chroma_8x8 mode:DC +@* +@* @par Description: +@* Perform Intra prediction for chroma_8x8 mode:DC ,described in sec 8.3.4.1 +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source containing alternate U and V samples +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination with alternate U and V samples +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@** @param[in] ui_neighboravailability +@* availability of neighbouring pixels +@* +@* @returns +@* +@* @remarks +@* None +@* +@*******************************************************************************/ +@void ih264_intra_pred_chroma_8x8_mode_dc(UWORD8 *pu1_src, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd, +@ WORD32 dst_strd, +@ WORD32 ui_neighboravailability) + +@**************Variables Vs Registers***************************************** +@ r0 => *pu1_src +@ r1 => *pu1_dst +@ r2 => src_strd +@ r3 => dst_strd +@ r4 => ui_neighboravailability + + + + .global ih264_intra_pred_chroma_8x8_mode_dc_a9q + +ih264_intra_pred_chroma_8x8_mode_dc_a9q: + + stmfd sp!, {r4, r14} @store register values to stack + ldr r4, [sp, #8] @r4 => ui_neighboravailability + vpush {d8-d15} + + ands r2, r4, #0x01 @CHECKING IF LEFT_AVAILABLE ELSE BRANCHING TO ONLY TOP AVAILABLE + beq top_available + ands r2, r4, #0x04 @CHECKING IF TOP_AVAILABLE ELSE BRANCHING TO ONLY LEFT AVAILABLE + beq left_available + + vld1.u8 {q0}, [r0] @BOTH LEFT AND TOP AVAILABLE + add r0, r0, #18 + vld1.u8 {q1}, [r0] + vaddl.u8 q2, d1, d2 + vaddl.u8 q3, d0, d3 + vmovl.u8 q1, d3 + vmovl.u8 q0, d0 + + vadd.u16 d12, d4, d5 + vadd.u16 d13, d2, d3 + vadd.u16 d15, d6, d7 + vadd.u16 d14, d0, d1 + + vpadd.u32 d12, d12, d15 + vpadd.u32 d14, d13, d14 + vqrshrun.s16 d12, q6, #3 + vqrshrun.s16 d14, q7, #2 + vdup.u16 d8, d12[0] + vdup.u16 d9, d14[0] + vdup.u16 d10, d14[1] + vdup.u16 d11, d12[1] + b str_pred + +top_available: @ONLY TOP AVAILABLE + ands r2, r4, #0x04 @CHECKING TOP AVAILABILTY OR ELSE BRANCH TO NONE AVAILABLE + beq none_available + + add r0, r0, #18 + vld1.u8 {q0}, [r0] + vmovl.u8 q1, d0 + vmovl.u8 q2, d1 + vadd.u16 d0, d2, d3 + vadd.u16 d1, d4, d5 + vpaddl.u32 q0, q0 + vqrshrun.s16 d0, q0, #2 + vdup.u16 d8, d0[0] + vdup.u16 d9, d0[2] + vmov q5, q4 + b str_pred + +left_available: @ONLY LEFT AVAILABLE + vld1.u8 {q0}, [r0] + vmovl.u8 q1, d0 + vmovl.u8 q2, d1 + vadd.u16 d0, d2, d3 + vadd.u16 d1, d4, d5 + vpaddl.u32 q0, q0 + vqrshrun.s16 d0, q0, #2 + vdup.u16 q5, d0[0] + vdup.u16 q4, d0[2] + b str_pred + +none_available: @NONE AVAILABLE + vmov.u8 q4, #128 + vmov.u8 q5, #128 + +str_pred: + vst1.8 {q4}, [r1], r3 + vst1.8 {q4}, [r1], r3 + vst1.8 {q4}, [r1], r3 + vst1.8 {q4}, [r1], r3 + vst1.8 {q5}, [r1], r3 + vst1.8 {q5}, [r1], r3 + vst1.8 {q5}, [r1], r3 + vst1.8 {q5}, [r1], r3 + + vpop {d8-d15} + ldmfd sp!, {r4, pc} @Restoring registers from stack + + + +@/****************************************************************************** + + +@/** +@******************************************************************************* +@* +@*ih264_intra_pred_chroma_8x8_mode_horz +@* +@* @brief +@* Perform Intra prediction for chroma_8x8 mode:Horizontal +@* +@* @par Description: +@* Perform Intra prediction for chroma_8x8 mode:Horizontal ,described in sec 8.3.4.2 +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source containing alternate U and V samples +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination with alternate U and V samples +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@* @param[in] ui_neighboravailability +@* availability of neighbouring pixels(Not used in this function) +@* +@* @returns +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ +@void ih264_intra_pred_chroma_8x8_mode_horz(UWORD8 *pu1_src, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd, +@ WORD32 dst_strd, +@ WORD32 ui_neighboravailability) +@**************Variables Vs Registers***************************************** +@ r0 => *pu1_src +@ r1 => *pu1_dst +@ r2 => src_strd +@ r3 => dst_strd +@ r4 => ui_neighboravailability + + + .global ih264_intra_pred_chroma_8x8_mode_horz_a9q + +ih264_intra_pred_chroma_8x8_mode_horz_a9q: + + stmfd sp!, {r14} @store register values to stack + + vld1.u8 {q0}, [r0] + mov r2, #6 + + vdup.u16 q1, d1[3] + vdup.u16 q2, d1[2] + vst1.8 {q1}, [r1], r3 + +loop_8x8_horz: + vext.8 q0, q0, q0, #12 + vst1.8 {q2}, [r1], r3 + vdup.u16 q1, d1[3] + subs r2, #2 + vdup.u16 q2, d1[2] + vst1.8 {q1}, [r1], r3 + bne loop_8x8_horz + + vext.8 q0, q0, q0, #12 + vst1.8 {q2}, [r1], r3 + + ldmfd sp!, {pc} @restoring registers from stack + + + + +@/** +@******************************************************************************* +@* +@*ih264_intra_pred_chroma_8x8_mode_vert +@* +@* @brief +@* Perform Intra prediction for chroma_8x8 mode:vertical +@* +@* @par Description: +@*Perform Intra prediction for chroma_8x8 mode:vertical ,described in sec 8.3.4.3 +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source containing alternate U and V samples +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination with alternate U and V samples +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@* @param[in] ui_neighboravailability +@* availability of neighbouring pixels(Not used in this function) +@* +@* @returns +@* +@* @remarks +@* None +@* +@******************************************************************************* +@void ih264_intra_pred_chroma_8x8_mode_vert(UWORD8 *pu1_src, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd, +@ WORD32 dst_strd, +@ WORD32 ui_neighboravailability) + +@**************Variables Vs Registers***************************************** +@ r0 => *pu1_src +@ r1 => *pu1_dst +@ r2 => src_strd +@ r3 => dst_strd +@ r4 => ui_neighboravailability + + + .global ih264_intra_pred_chroma_8x8_mode_vert_a9q + +ih264_intra_pred_chroma_8x8_mode_vert_a9q: + + stmfd sp!, {r4-r12, r14} @store register values to stack + + add r0, r0, #18 + vld1.8 {q0}, [r0] + + vst1.8 {q0}, [r1], r3 + vst1.8 {q0}, [r1], r3 + vst1.8 {q0}, [r1], r3 + vst1.8 {q0}, [r1], r3 + vst1.8 {q0}, [r1], r3 + vst1.8 {q0}, [r1], r3 + vst1.8 {q0}, [r1], r3 + vst1.8 {q0}, [r1], r3 + + ldmfd sp!, {r4-r12, pc} @Restoring registers from stack + + + + +@/****************************************************************************** + + +@/** +@******************************************************************************* +@* +@*ih264_intra_pred_chroma_8x8_mode_plane +@* +@* @brief +@* Perform Intra prediction for chroma_8x8 mode:PLANE +@* +@* @par Description: +@* Perform Intra prediction for chroma_8x8 mode:PLANE ,described in sec 8.3.4.4 +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source containing alternate U and V samples +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination with alternate U and V samples +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@* @param[in] ui_neighboravailability +@* availability of neighbouring pixels +@* +@* @returns +@* +@* @remarks +@* None +@* +@*******************************************************************************/ +@void ih264_intra_pred_chroma_8x8_mode_plane(UWORD8 *pu1_src, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd, +@ WORD32 dst_strd, +@ WORD32 ui_neighboravailability) + +@**************Variables Vs Registers***************************************** +@ r0 => *pu1_src +@ r1 => *pu1_dst +@ r2 => src_strd +@ r3 => dst_strd +@ r4 => ui_neighboravailability + + .global ih264_intra_pred_chroma_8x8_mode_plane_a9q +ih264_intra_pred_chroma_8x8_mode_plane_a9q: + + stmfd sp!, {r4-r10, r12, lr} + vpush {d8-d15} + + + vld1.32 d0, [r0] + add r10, r0, #10 + vld1.32 d1, [r10] + add r10, r10, #6 + vrev64.16 d5, d0 + vld1.32 d2, [r10]! + add r10, r10, #2 + vrev64.16 d7, d2 + vld1.32 d3, [r10] + sub r5, r3, #8 + ldr r12, scratch_chroma_intrapred_addr1 +scrlblc1: + add r12, r12, pc + vsubl.u8 q5, d5, d1 + vld1.64 {q4}, [r12] @ Load multiplication factors 1 to 8 into D3 + vsubl.u8 q6, d3, d7 + vmul.s16 q7, q5, q4 + vmul.s16 q8, q6, q4 + vuzp.16 q7, q8 + + vpadd.s16 d14, d14 + vpadd.s16 d15, d15 + vpadd.s16 d16, d16 + vpadd.s16 d17, d17 + vpadd.s16 d14, d14 + vpadd.s16 d15, d15 + vpadd.s16 d16, d16 + vpadd.s16 d17, d17 + + mov r6, #34 + vdup.16 q9, r6 + + vmull.s16 q11, d14, d18 + vmull.s16 q12, d15, d18 + vmull.s16 q13, d16, d18 + vmull.s16 q14, d17, d18 + + vrshrn.s32 d10, q11, #6 + vrshrn.s32 d12, q12, #6 + vrshrn.s32 d13, q13, #6 + vrshrn.s32 d14, q14, #6 + + + ldrb r6, [r0], #1 + add r10, r0, #31 + ldrb r8, [r0], #1 + ldrb r7, [r10], #1 + ldrb r9, [r10], #1 + + add r6, r6, r7 + add r8, r8, r9 + lsl r6, r6, #4 + lsl r8, r8, #4 + + vdup.16 q0, r6 + vdup.16 q1, r8 + vdup.16 q2, d12[0] + vdup.16 q3, d10[0] + + vdup.16 q12, d14[0] + vdup.16 q13, d13[0] + vzip.16 q2, q12 + vzip.16 q3, q13 + vzip.16 q0, q1 + + ldr r12, scratch_intrapred_chroma_plane_addr1 +scrlblc2: + add r12, r12, pc + vld1.64 {q4}, [r12] + vmov.16 q5, q4 + vmov q11, q4 + vzip.16 q4, q5 + + vmul.s16 q6, q2, q4 + vmul.s16 q8, q2, q5 + vadd.s16 q6, q0, q6 + vadd.s16 q8, q0, q8 + + + vdup.16 q10, d22[0] + vmul.s16 q2, q3, q10 + vdup.16 q15, d22[1] + vmul.s16 q9, q3, q10 + vmul.s16 q7, q3, q15 + vmul.s16 q4, q3, q15 + vadd.s16 q12, q6, q2 + vadd.s16 q0, q8, q9 + vadd.s16 q1, q6, q7 + vqrshrun.s16 d28, q12, #5 + vadd.s16 q13, q8, q4 + vqrshrun.s16 d29, q0, #5 + vdup.16 q10, d22[2] + vst1.8 {q14}, [r1], r3 + vqrshrun.s16 d28, q1, #5 + vqrshrun.s16 d29, q13, #5 + vmul.s16 q2, q3, q10 + vmul.s16 q9, q3, q10 + vst1.8 {q14}, [r1], r3 + vadd.s16 q12, q6, q2 + vadd.s16 q0, q8, q9 + vdup.16 q15, d22[3] + vqrshrun.s16 d28, q12, #5 + vqrshrun.s16 d29, q0, #5 + vmul.s16 q7, q3, q15 + vmul.s16 q4, q3, q15 + vst1.8 {q14}, [r1], r3 + vadd.s16 q1, q6, q7 + vadd.s16 q13, q8, q4 + vdup.16 q10, d23[0] + vqrshrun.s16 d28, q1, #5 + vqrshrun.s16 d29, q13, #5 + vmul.s16 q2, q3, q10 + vmul.s16 q9, q3, q10 + vst1.8 {q14}, [r1], r3 + vadd.s16 q12, q6, q2 + vadd.s16 q0, q8, q9 + vdup.16 q15, d23[1] + vqrshrun.s16 d28, q12, #5 + vqrshrun.s16 d29, q0, #5 + vmul.s16 q7, q3, q15 + vmul.s16 q4, q3, q15 + vst1.8 {q14}, [r1], r3 + vadd.s16 q1, q6, q7 + vadd.s16 q13, q8, q4 + vdup.16 q10, d23[2] + vqrshrun.s16 d28, q1, #5 + vqrshrun.s16 d29, q13, #5 + vmul.s16 q2, q3, q10 + vmul.s16 q9, q3, q10 + vst1.8 {q14}, [r1], r3 + vadd.s16 q12, q6, q2 + vadd.s16 q0, q8, q9 + vdup.16 q15, d23[3] + vqrshrun.s16 d28, q12, #5 + vqrshrun.s16 d29, q0, #5 + vmul.s16 q7, q3, q15 + vmul.s16 q4, q3, q15 + vst1.8 {q14}, [r1], r3 + vadd.s16 q1, q6, q7 + vadd.s16 q13, q8, q4 + vqrshrun.s16 d28, q1, #5 + vqrshrun.s16 d29, q13, #5 + vst1.8 {q14}, [r1], r3 + + + +end_func_plane: + + + vpop {d8-d15} + ldmfd sp!, {r4-r10, r12, pc} + + + + diff --git a/common/arm/ih264_intra_pred_luma_16x16_a9q.s b/common/arm/ih264_intra_pred_luma_16x16_a9q.s new file mode 100755 index 0000000..e38e203 --- /dev/null +++ b/common/arm/ih264_intra_pred_luma_16x16_a9q.s @@ -0,0 +1,520 @@ +@/****************************************************************************** +@ * +@ * Copyright (C) 2015 The Android Open Source Project +@ * +@ * Licensed under the Apache License, Version 2.0 (the "License"); +@ * you may not use this file except in compliance with the License. +@ * You may obtain a copy of the License at: +@ * +@ * http://www.apache.org/licenses/LICENSE-2.0 +@ * +@ * Unless required by applicable law or agreed to in writing, software +@ * distributed under the License is distributed on an "AS IS" BASIS, +@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ * See the License for the specific language governing permissions and +@ * limitations under the License. +@ * +@ ***************************************************************************** +@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +@*/ +@/** +@****************************************************************************** +@* @file +@* ih264_intra_pred_luma_16x16_a9q.s +@* +@* @brief +@* Contains function definitions for intra 16x16 Luma prediction . +@* +@* @author +@* Ittiam +@* +@* @par List of Functions: +@* +@* - ih264_intra_pred_luma_16x16_mode_vert_a9q() +@* - ih264_intra_pred_luma_16x16_mode_horz_a9q() +@* - ih264_intra_pred_luma_16x16_mode_dc_a9q() +@* - ih264_intra_pred_luma_16x16_mode_plane_a9q() +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ + +@/* All the functions here are replicated from ih264_intra_pred_filters.c +@ + +@/** +@/** +@/** +@ + +.text +.p2align 2 + + + .extern ih264_gai1_intrapred_luma_plane_coeffs +.hidden ih264_gai1_intrapred_luma_plane_coeffs +scratch_intrapred_addr1: + .long ih264_gai1_intrapred_luma_plane_coeffs - scrlbl1 - 8 +@/** +@******************************************************************************* +@* +@*ih264_intra_pred_luma_16x16_mode_vert_a9q +@* +@* @brief +@* Perform Intra prediction for luma_16x16 mode:vertical +@* +@* @par Description: +@* Perform Intra prediction for luma_16x16 mode:Vertical ,described in sec 8.3.3.1 +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@* @param[in] ui_neighboravailability +@* availability of neighbouring pixels(Not used in this function) +@* +@* @returns +@* +@* @remarks +@* None +@* +@******************************************************************************* +@void ih264_intra_pred_luma_16x16_mode_vert(UWORD8 *pu1_src, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd, +@ WORD32 dst_strd, +@ WORD32 ui_neighboravailability) + +@**************Variables Vs Registers***************************************** +@ r0 => *pu1_src +@ r1 => *pu1_dst +@ r2 => src_strd +@ r3 => dst_strd +@ r4 => ui_neighboravailability + + + .global ih264_intra_pred_luma_16x16_mode_vert_a9q + +ih264_intra_pred_luma_16x16_mode_vert_a9q: + + stmfd sp!, {r4-r12, r14} @store register values to stack + + add r0, r0, #17 + vld1.8 {q0}, [r0] + + vst1.8 {q0}, [r1], r3 + vst1.8 {q0}, [r1], r3 + vst1.8 {q0}, [r1], r3 + vst1.8 {q0}, [r1], r3 + vst1.8 {q0}, [r1], r3 + vst1.8 {q0}, [r1], r3 + vst1.8 {q0}, [r1], r3 + vst1.8 {q0}, [r1], r3 + vst1.8 {q0}, [r1], r3 + vst1.8 {q0}, [r1], r3 + vst1.8 {q0}, [r1], r3 + vst1.8 {q0}, [r1], r3 + vst1.8 {q0}, [r1], r3 + vst1.8 {q0}, [r1], r3 + vst1.8 {q0}, [r1], r3 + vst1.8 {q0}, [r1], r3 + + ldmfd sp!, {r4-r12, pc} @Restoring registers from stack + + + + + +@/****************************************************************************** + + +@/** +@******************************************************************************* +@* +@*ih264_intra_pred_luma_16x16_mode_horz_a9q +@* +@* @brief +@* Perform Intra prediction for luma_16x16 mode:horizontal +@* +@* @par Description: +@* Perform Intra prediction for luma_16x16 mode:horizontal ,described in sec 8.3.3.2 +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@* @param[in] ui_neighboravailability +@* availability of neighbouring pixels(Not used in this function) +@* +@* @returns +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ +@void ih264_intra_pred_luma_16x16_mode_horz(UWORD8 *pu1_src, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd, +@ WORD32 dst_strd, +@ WORD32 ui_neighboravailability) +@**************Variables Vs Registers***************************************** +@ r0 => *pu1_src +@ r1 => *pu1_dst +@ r2 => src_strd +@ r3 => dst_strd +@ r4 => ui_neighboravailability + + .global ih264_intra_pred_luma_16x16_mode_horz_a9q + +ih264_intra_pred_luma_16x16_mode_horz_a9q: + + stmfd sp!, {r14} @store register values to stack + + vld1.u8 {q0}, [r0] + mov r2, #14 + + vdup.u8 q1, d1[7] + vdup.u8 q2, d1[6] + vst1.8 {q1}, [r1], r3 + +loop_16x16_horz: + vext.8 q0, q0, q0, #14 + vst1.8 {q2}, [r1], r3 + vdup.u8 q1, d1[7] + subs r2, #2 + vdup.u8 q2, d1[6] + vst1.8 {q1}, [r1], r3 + bne loop_16x16_horz + + vext.8 q0, q0, q0, #14 + vst1.8 {q2}, [r1], r3 + + ldmfd sp!, {pc} @Restoring registers from stack + + + + +@/****************************************************************************** + + +@/** +@******************************************************************************* +@* +@*ih264_intra_pred_luma_16x16_mode_dc_a9q +@* +@* @brief +@* Perform Intra prediction for luma_16x16 mode:DC +@* +@* @par Description: +@* Perform Intra prediction for luma_16x16 mode:DC ,described in sec 8.3.3.3 +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@* @param[in] ui_neighboravailability +@* availability of neighbouring pixels +@* +@* @returns +@* +@* @remarks +@* None +@* +@*******************************************************************************/ +@void ih264_intra_pred_luma_16x16_mode_dc(UWORD8 *pu1_src, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd, +@ WORD32 dst_strd, +@ WORD32 ui_neighboravailability) + +@**************Variables Vs Registers***************************************** +@ r0 => *pu1_src +@ r1 => *pu1_dst +@ r2 => src_strd +@ r3 => dst_strd +@ r4 => ui_neighboravailability + + .global ih264_intra_pred_luma_16x16_mode_dc_a9q + +ih264_intra_pred_luma_16x16_mode_dc_a9q: + + stmfd sp!, {r4, r14} @store register values to stack + ldr r4, [sp, #8] @r4 => ui_neighboravailability + + ands r2, r4, #0x01 @CHECKING IF LEFT_AVAILABLE ELSE BRANCHING TO ONLY TOP AVAILABLE + beq top_available + ands r2, r4, #0x04 @CHECKING IF TOP_AVAILABLE ELSE BRANCHING TO ONLY LEFT AVAILABLE + beq left_available + + vld1.u8 {q0}, [r0] @BOTH LEFT AND TOP AVAILABLE + add r0, r0, #17 + vpaddl.u8 q0, q0 + vld1.u8 {q1}, [r0] + vpaddl.u8 q1, q1 + vadd.u16 q0, q0, q1 + vadd.u16 d0, d0, d1 + vpaddl.u16 d0, d0 + vpaddl.u32 d0, d0 + vqrshrun.s16 d0, q0, #5 + vdup.u8 q0, d0[0] + b str_pred + +top_available: @ONLY TOP AVAILABLE + ands r2, r4, #0x04 @CHECKING TOP AVAILABILTY OR ELSE BRANCH TO NONE AVAILABLE + beq none_available + + add r0, r0, #17 + vld1.u8 {q0}, [r0] + vpaddl.u8 q0, q0 + vadd.u16 d0, d0, d1 + vpaddl.u16 d0, d0 + vpaddl.u32 d0, d0 + vqrshrun.s16 d0, q0, #4 + vdup.u8 q0, d0[0] + b str_pred + +left_available: @ONLY LEFT AVAILABLE + vld1.u8 {q0}, [r0] + vpaddl.u8 q0, q0 + vadd.u16 d0, d0, d1 + vpaddl.u16 d0, d0 + vpaddl.u32 d0, d0 + vqrshrun.s16 d0, q0, #4 + vdup.u8 q0, d0[0] + b str_pred + +none_available: @NONE AVAILABLE + vmov.u8 q0, #128 + +str_pred: + vst1.8 {q0}, [r1], r3 + vst1.8 {q0}, [r1], r3 + vst1.8 {q0}, [r1], r3 + vst1.8 {q0}, [r1], r3 + vst1.8 {q0}, [r1], r3 + vst1.8 {q0}, [r1], r3 + vst1.8 {q0}, [r1], r3 + vst1.8 {q0}, [r1], r3 + vst1.8 {q0}, [r1], r3 + vst1.8 {q0}, [r1], r3 + vst1.8 {q0}, [r1], r3 + vst1.8 {q0}, [r1], r3 + vst1.8 {q0}, [r1], r3 + vst1.8 {q0}, [r1], r3 + vst1.8 {q0}, [r1], r3 + vst1.8 {q0}, [r1], r3 + + ldmfd sp!, {r4, pc} @Restoring registers from stack + + + + + +@/****************************************************************************** + + +@/** +@******************************************************************************* +@* +@*ih264_intra_pred_luma_16x16_mode_plane_a9q +@* +@* @brief +@* Perform Intra prediction for luma_16x16 mode:PLANE +@* +@* @par Description: +@* Perform Intra prediction for luma_16x16 mode:PLANE ,described in sec 8.3.3.4 +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@* @param[in] ui_neighboravailability +@* availability of neighbouring pixels +@* +@* @returns +@* +@* @remarks +@* None +@* +@*******************************************************************************/ +@void ih264_intra_pred_luma_16x16_mode_plane(UWORD8 *pu1_src, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd, +@ WORD32 dst_strd, +@ WORD32 ui_neighboravailability) + +@**************Variables Vs Registers***************************************** +@ r0 => *pu1_src +@ r1 => *pu1_dst +@ r2 => src_strd +@ r3 => dst_strd +@ r4 => ui_neighboravailability + + .global ih264_intra_pred_luma_16x16_mode_plane_a9q +ih264_intra_pred_luma_16x16_mode_plane_a9q: + + stmfd sp!, {r4-r10, r12, lr} + + mov r2, r1 + add r1, r0, #17 + add r0, r0, #15 + + mov r8, #9 + sub r1, r1, #1 + mov r10, r1 @top_left + mov r4, #-1 + vld1.32 d2, [r1], r8 + ldr r7, scratch_intrapred_addr1 +scrlbl1: + add r7, r7, pc + + vld1.32 d0, [r1] + vrev64.8 d2, d2 + vld1.32 {q3}, [r7] + vsubl.u8 q0, d0, d2 + vmovl.u8 q8, d6 + vmul.s16 q0, q0, q8 + vmovl.u8 q9, d7 + + add r7, r0, r4, lsl #3 + sub r0, r7, r4, lsl #1 + rsb lr, r4, #0x0 + + vpadd.s16 d0, d0, d1 + + ldrb r8, [r7], r4 + ldrb r9, [r0], lr + + vpaddl.s16 d0, d0 + sub r12, r8, r9 + + ldrb r8, [r7], r4 + + vpaddl.s32 d0, d0 + ldrb r9, [r0], lr + sub r8, r8, r9 + vshl.s32 d2, d0, #2 + add r12, r12, r8, lsl #1 + + vadd.s32 d0, d0, d2 + ldrb r8, [r7], r4 + ldrb r9, [r0], lr + vrshr.s32 d0, d0, #6 @ i_b = D0[0] + sub r8, r8, r9 + ldrb r5, [r7], r4 + add r8, r8, r8, lsl #1 + + vdup.16 q2, d0[0] + add r12, r12, r8 + ldrb r9, [r0], lr + vmul.s16 q0, q2, q8 + sub r5, r5, r9 + vmul.s16 q1, q2, q9 + add r12, r12, r5, lsl #2 + + ldrb r8, [r7], r4 + ldrb r9, [r0], lr + sub r8, r8, r9 + ldrb r5, [r7], r4 + add r8, r8, r8, lsl #2 + ldrb r6, [r0], lr + add r12, r12, r8 + ldrb r8, [r7], r4 + ldrb r9, [r0], lr + + sub r5, r5, r6 + sub r8, r8, r9 + add r5, r5, r5, lsl #1 + rsb r8, r8, r8, lsl #3 + add r12, r12, r5, lsl #1 + ldrb r5, [r7], r4 + ldrb r6, [r10] @top_left + add r12, r12, r8 + sub r9, r5, r6 + ldrb r6, [r1, #7] + add r12, r12, r9, lsl #3 @ i_c = r12 + add r8, r5, r6 + + add r12, r12, r12, lsl #2 + lsl r8, r8, #4 @ i_a = r8 + + add r12, r12, #0x20 + lsr r12, r12, #6 + + vshl.s16 q14, q2, #3 + vdup.16 q3, r12 + + vdup.16 q15, r8 + vshl.s16 q13, q3, #3 + vsub.s16 q15, q15, q14 + vsub.s16 q15, q15, q13 + vadd.s16 q14, q15, q3 + + mov r0, #14 + vadd.s16 q13, q14, q0 + vadd.s16 q14, q14, q1 + vqrshrun.s16 d20, q13, #5 + vqrshrun.s16 d21, q14, #5 + +loop_16x16_plane: + + vadd.s16 q13, q13, q3 + vadd.s16 q14, q14, q3 + vqrshrun.s16 d22, q13, #5 + vst1.32 {q10}, [r2], r3 + vqrshrun.s16 d23, q14, #5 + + vadd.s16 q13, q13, q3 + subs r0, #2 + vadd.s16 q14, q14, q3 + vqrshrun.s16 d20, q13, #5 + vst1.32 {q11}, [r2], r3 + vqrshrun.s16 d21, q14, #5 + bne loop_16x16_plane + + vadd.s16 q13, q13, q3 + vadd.s16 q14, q14, q3 + vqrshrun.s16 d22, q13, #5 + vst1.32 {q10}, [r2], r3 + vqrshrun.s16 d23, q14, #5 + vst1.32 {q11}, [r2], r3 + + ldmfd sp!, {r4-r10, r12, pc} + + + diff --git a/common/arm/ih264_intra_pred_luma_4x4_a9q.s b/common/arm/ih264_intra_pred_luma_4x4_a9q.s new file mode 100755 index 0000000..cb386ea --- /dev/null +++ b/common/arm/ih264_intra_pred_luma_4x4_a9q.s @@ -0,0 +1,842 @@ +@/****************************************************************************** +@ * +@ * Copyright (C) 2015 The Android Open Source Project +@ * +@ * Licensed under the Apache License, Version 2.0 (the "License"); +@ * you may not use this file except in compliance with the License. +@ * You may obtain a copy of the License at: +@ * +@ * http://www.apache.org/licenses/LICENSE-2.0 +@ * +@ * Unless required by applicable law or agreed to in writing, software +@ * distributed under the License is distributed on an "AS IS" BASIS, +@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ * See the License for the specific language governing permissions and +@ * limitations under the License. +@ * +@ ***************************************************************************** +@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +@*/ +@/** +@****************************************************************************** +@* @file +@* ih264_intra_pred_luma_4x4_a9q.s +@* +@* @brief +@* Contains function definitions for intra 4x4 Luma prediction . +@* +@* @author +@* Ittiam +@* +@* @par List of Functions: +@* +@* -ih264_intra_pred_luma_4x4_mode_vert_a9q +@* -ih264_intra_pred_luma_4x4_mode_horz_a9q +@* -ih264_intra_pred_luma_4x4_mode_dc_a9q +@* -ih264_intra_pred_luma_4x4_mode_diag_dl_a9q +@* -ih264_intra_pred_luma_4x4_mode_diag_dr_a9q +@* -ih264_intra_pred_luma_4x4_mode_vert_r_a9q +@* -ih264_intra_pred_luma_4x4_mode_horz_d_a9q +@* -ih264_intra_pred_luma_4x4_mode_vert_l_a9q +@* -ih264_intra_pred_luma_4x4_mode_horz_u_a9q +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ + +@/* All the functions here are replicated from ih264_intra_pred_filters.c +@ + +@/** +@/** +@/** +@ + +.text +.p2align 2 + + +@/** +@******************************************************************************* +@* +@*ih264_intra_pred_luma_4x4_mode_vert +@* +@* @brief +@* Perform Intra prediction for luma_4x4 mode:vertical +@* +@* @par Description: +@* Perform Intra prediction for luma_4x4 mode:vertical ,described in sec 8.3.1.2.1 +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@* @param[in] ui_neighboravailability +@* availability of neighbouring pixels(Not used in this function) +@* +@* @returns +@* +@* @remarks +@* None +@* +@******************************************************************************* +@void ih264_intra_pred_luma_4x4_mode_vert(UWORD8 *pu1_src, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd, +@ WORD32 dst_strd, +@ WORD32 ui_neighboravailability) + +@**************Variables Vs Registers***************************************** +@ r0 => *pu1_src +@ r1 => *pu1_dst +@ r2 => src_strd +@ r3 => dst_strd +@ r4 => ui_neighboravailability + + .global ih264_intra_pred_luma_4x4_mode_vert_a9q + +ih264_intra_pred_luma_4x4_mode_vert_a9q: + + + + stmfd sp!, {r4-r12, r14} @store register values to stack + + add r0, r0, #5 + + vld1.32 d0[0], [r0] + + vst1.32 d0[0], [r1], r3 + vst1.32 d0[0], [r1], r3 + vst1.32 d0[0], [r1], r3 + vst1.32 d0[0], [r1], r3 + + + + ldmfd sp!, {r4-r12, pc} @Restoring registers from stack + + + + + +@/****************************************************************************** + + +@/** +@******************************************************************************* +@* +@*ih264_intra_pred_luma_4x4_mode_horz +@* +@* @brief +@* Perform Intra prediction for luma_4x4 mode:horizontal +@* +@* @par Description: +@* Perform Intra prediction for luma_4x4 mode:horizontal ,described in sec 8.3.1.2.2 +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@* @param[in] ui_neighboravailability +@* availability of neighbouring pixels(Not used in this function) +@* +@* @returns +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ +@void ih264_intra_pred_luma_4x4_mode_horz(UWORD8 *pu1_src, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd, +@ WORD32 dst_strd, +@ WORD32 ui_neighboravailability) +@**************Variables Vs Registers***************************************** +@ r0 => *pu1_src +@ r1 => *pu1_dst +@ r2 => src_strd +@ r3 => dst_strd +@ r4 => ui_neighboravailability + + + + .global ih264_intra_pred_luma_4x4_mode_horz_a9q + +ih264_intra_pred_luma_4x4_mode_horz_a9q: + + + + stmfd sp!, {r4-r12, r14} @store register values to stack + add r0, r0, #3 + mov r2 , #-1 + + ldrb r5, [r0], r2 + vdup.u8 d0, r5 + ldrb r6, [r0], r2 + vst1.32 d0[0], [r1], r3 + vdup.u8 d1, r6 + ldrb r7, [r0], r2 + vst1.32 d1[0], [r1], r3 + vdup.u8 d2, r7 + ldrb r8, [r0], r2 + vst1.32 d2[0], [r1], r3 + vdup.u8 d3, r8 + vst1.32 d3[0], [r1], r3 + + + ldmfd sp!, {r4-r12, pc} @Restoring registers from stack + + + + + + + +@/****************************************************************************** + + +@/** +@******************************************************************************* +@* +@*ih264_intra_pred_luma_4x4_mode_dc +@* +@* @brief +@* Perform Intra prediction for luma_4x4 mode:DC +@* +@* @par Description: +@* Perform Intra prediction for luma_4x4 mode:DC ,described in sec 8.3.1.2.3 +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@* @param[in] ui_neighboravailability +@* availability of neighbouring pixels +@* +@* @returns +@* +@* @remarks +@* None +@* +@*******************************************************************************/ +@void ih264_intra_pred_luma_4x4_mode_dc(UWORD8 *pu1_src, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd, +@ WORD32 dst_strd, +@ WORD32 ui_neighboravailability) + +@**************Variables Vs Registers***************************************** +@ r0 => *pu1_src +@ r1 => *pu1_dst +@ r2 => src_strd +@ r3 => dst_strd +@ r4 => ui_neighboravailability + + + + .global ih264_intra_pred_luma_4x4_mode_dc_a9q + +ih264_intra_pred_luma_4x4_mode_dc_a9q: + + + + stmfd sp!, {r4-r12, r14} @store register values to stack + ldr r4, [sp, #40] @ r4 => ui_neighboravailability + + ands r5, r4, #0x01 + beq top_available @LEFT NOT AVAILABLE + + add r10, r0, #3 + mov r2, #-1 + ldrb r5, [r10], r2 + ldrb r6, [r10], r2 + ldrb r7, [r10], r2 + add r5, r5, r6 + ldrb r8, [r10], r2 + add r5, r5, r7 + ands r11, r4, #0x04 @ CHECKING IF TOP_AVAILABLE ELSE BRANCHING TO ONLY LEFT AVAILABLE + add r5, r5, r8 + beq left_available + add r10, r0, #5 + @ BOTH LEFT AND TOP AVAILABLE + ldrb r6, [r10], #1 + ldrb r7, [r10], #1 + add r5, r5, r6 + ldrb r8, [r10], #1 + add r5, r5, r7 + ldrb r9, [r10], #1 + add r5, r5, r8 + add r5, r5, r9 + add r5, r5, #4 + lsr r5, r5, #3 + vdup.u8 d0, r5 + vst1.32 d0[0], [r1], r3 + vst1.32 d0[0], [r1], r3 + vst1.32 d0[0], [r1], r3 + vst1.32 d0[0], [r1], r3 + b end_func + +top_available: @ ONLT TOP AVAILABLE + ands r11, r4, #0x04 @ CHECKING TOP AVAILABILTY OR ELSE BRANCH TO NONE AVAILABLE + beq none_available + + add r10, r0, #5 + ldrb r6, [r10], #1 + ldrb r7, [r10], #1 + ldrb r8, [r10], #1 + add r5, r6, r7 + ldrb r9, [r10], #1 + add r5, r5, r8 + add r5, r5, r9 + add r5, r5, #2 + lsr r5, r5, #2 + vdup.u8 d0, r5 + vst1.32 d0[0], [r1], r3 + vst1.32 d0[0], [r1], r3 + vst1.32 d0[0], [r1], r3 + vst1.32 d0[0], [r1], r3 + b end_func + +left_available: @ONLY LEFT AVAILABLE + add r5, r5, #2 + lsr r5, r5, #2 + vdup.u8 d0, r5 + vst1.32 d0[0], [r1], r3 + vst1.32 d0[0], [r1], r3 + vst1.32 d0[0], [r1], r3 + vst1.32 d0[0], [r1], r3 + b end_func + +none_available: @NONE AVAILABLE + mov r5, #128 + vdup.u8 d0, r5 + vst1.32 d0[0], [r1], r3 + vst1.32 d0[0], [r1], r3 + vst1.32 d0[0], [r1], r3 + vst1.32 d0[0], [r1], r3 + b end_func + + +end_func: + ldmfd sp!, {r4-r12, pc} @Restoring registers from stack + + + + + + + +@/** +@******************************************************************************* +@* +@*ih264_intra_pred_luma_4x4_mode_diag_dl +@* +@* @brief +@* Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Left +@* +@* @par Description: +@* Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Left ,described in sec 8.3.1.2.4 +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@* @param[in] ui_neighboravailability +@* availability of neighbouring pixels +@* +@* @returns +@* +@* @remarks +@* None +@* +@*******************************************************************************/ +@void ih264_intra_pred_luma_4x4_mode_diag_dl(UWORD8 *pu1_src, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd, +@ WORD32 dst_strd, +@ WORD32 ui_neighboravailability) + +@**************Variables Vs Registers***************************************** +@ r0 => *pu1_src +@ r1 => *pu1_dst +@ r2 => src_strd +@ r3 => dst_strd +@ r4 => ui_neighboravailability + + + .global ih264_intra_pred_luma_4x4_mode_diag_dl_a9q + +ih264_intra_pred_luma_4x4_mode_diag_dl_a9q: + + stmfd sp!, {r4-r12, r14} @store register values to stack + + add r0, r0, #5 + sub r5, r3, #2 + add r6, r0, #7 + vld1.8 {d0}, [r0] + vext.8 d1, d0, d0, #1 + vext.8 d2, d0, d0, #2 + vld1.8 {d2[6]}, [r6] + vaddl.u8 q10, d0, d1 + vaddl.u8 q11, d1, d2 + vadd.u16 q12, q10, q11 + vqrshrun.s16 d3, q12, #2 + vst1.32 {d3[0]}, [r1], r3 + vext.8 d4, d3, d3, #1 + vst1.32 {d4[0]}, [r1], r3 + vst1.16 {d3[1]}, [r1]! + vst1.16 {d3[2]}, [r1], r5 + vst1.16 {d4[1]}, [r1]! + vst1.16 {d4[2]}, [r1] + +end_func_diag_dl: + ldmfd sp!, {r4-r12, pc} @Restoring registers from stack + + + + + + + + + +@/** +@******************************************************************************* +@* +@*ih264_intra_pred_luma_4x4_mode_diag_dr +@* +@* @brief +@* Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Right +@* +@* @par Description: +@* Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Right ,described in sec 8.3.1.2.5 +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@* @param[in] ui_neighboravailability +@* availability of neighbouring pixels +@* +@* @returns +@* +@* @remarks +@* None +@* +@*******************************************************************************/ +@void ih264_intra_pred_luma_4x4_mode_diag_dr(UWORD8 *pu1_src, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd, +@ WORD32 dst_strd, +@ WORD32 ui_neighboravailability) + +@**************Variables Vs Registers***************************************** +@ r0 => *pu1_src +@ r1 => *pu1_dst +@ r2 => src_strd +@ r3 => dst_strd +@ r4 => ui_neighboravailability + + + .global ih264_intra_pred_luma_4x4_mode_diag_dr_a9q + +ih264_intra_pred_luma_4x4_mode_diag_dr_a9q: + + stmfd sp!, {r4-r12, r14} @store register values to stack + + + vld1.u8 {d0}, [r0] + add r0, r0, #1 + vld1.u8 {d1}, [r0] + vext.8 d2, d1, d1, #1 + vaddl.u8 q10, d0, d1 + vaddl.u8 q11, d1, d2 + vadd.u16 q12, q10, q11 + vqrshrun.s16 d3, q12, #2 + + vext.8 d4, d3, d3, #1 + sub r5, r3, #2 + vst1.16 {d4[1]}, [r1]! + vst1.16 {d4[2]}, [r1], r5 + vst1.16 {d3[1]}, [r1]! + vst1.16 {d3[2]}, [r1], r5 + vst1.32 {d4[0]}, [r1], r3 + vst1.32 {d3[0]}, [r1], r3 + +end_func_diag_dr: + ldmfd sp!, {r4-r12, pc} @Restoring registers from stack + + + + + + + +@/** +@******************************************************************************* +@* +@*ih264_intra_pred_luma_4x4_mode_vert_r +@* +@* @brief +@* Perform Intra prediction for luma_4x4 mode:Vertical_Right +@* +@* @par Description: +@* Perform Intra prediction for luma_4x4 mode:Vertical_Right ,described in sec 8.3.1.2.6 +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@* @param[in] ui_neighboravailability +@* availability of neighbouring pixels +@* +@* @returns +@* +@* @remarks +@* None +@* +@*******************************************************************************/ +@void ih264_intra_pred_luma_4x4_mode_vert_r(UWORD8 *pu1_src, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd, +@ WORD32 dst_strd, +@ WORD32 ui_neighboravailability) + +@**************Variables Vs Registers***************************************** +@ r0 => *pu1_src +@ r1 => *pu1_dst +@ r2 => src_strd +@ r3 => dst_strd +@ r4 => ui_neighboravailability + + + .global ih264_intra_pred_luma_4x4_mode_vert_r_a9q + +ih264_intra_pred_luma_4x4_mode_vert_r_a9q: + + stmfd sp!, {r4-r12, r14} @store register values to stack + + + vld1.u8 {d0}, [r0] + add r0, r0, #1 + vld1.u8 {d1}, [r0] + vext.8 d2, d1, d1, #1 + vaddl.u8 q10, d0, d1 + vaddl.u8 q11, d1, d2 + vadd.u16 q12, q10, q11 + vqrshrun.s16 d4, q10, #1 + vqrshrun.s16 d3, q12, #2 + sub r5, r3, #2 + vext.8 d5, d3, d3, #3 + vst1.32 {d4[1]}, [r1], r3 + vst1.32 {d5[0]}, [r1], r3 + sub r8, r3, #3 + vst1.u8 {d3[2]}, [r1]! + vst1.16 {d4[2]}, [r1]! + vst1.u8 {d4[6]}, [r1], r8 + vst1.u8 {d3[1]}, [r1]! + vst1.16 {d5[0]}, [r1]! + vst1.u8 {d5[2]}, [r1] + + +end_func_vert_r: + ldmfd sp!, {r4-r12, pc} @Restoring registers from stack + + + + + +@/** +@******************************************************************************* +@* +@*ih264_intra_pred_luma_4x4_mode_horz_d +@* +@* @brief +@* Perform Intra prediction for luma_4x4 mode:Horizontal_Down +@* +@* @par Description: +@* Perform Intra prediction for luma_4x4 mode:Horizontal_Down ,described in sec 8.3.1.2.7 +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@* @param[in] ui_neighboravailability +@* availability of neighbouring pixels +@* +@* @returns +@* +@* @remarks +@* None +@* +@*******************************************************************************/ +@void ih264_intra_pred_luma_4x4_mode_horz_d(UWORD8 *pu1_src, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd, +@ WORD32 dst_strd, +@ WORD32 ui_neighboravailability) + +@**************Variables Vs Registers***************************************** +@ r0 => *pu1_src +@ r1 => *pu1_dst +@ r2 => src_strd +@ r3 => dst_strd +@ r4 => ui_neighboravailability + + + .global ih264_intra_pred_luma_4x4_mode_horz_d_a9q + +ih264_intra_pred_luma_4x4_mode_horz_d_a9q: + + stmfd sp!, {r4-r12, r14} @store register values to stack + + vld1.u8 {d0}, [r0] + add r0, r0, #1 + vld1.u8 {d1}, [r0] + vext.8 d2, d1, d0, #1 + vaddl.u8 q10, d0, d1 + vaddl.u8 q11, d1, d2 + vadd.u16 q12, q10, q11 + vqrshrun.s16 d4, q10, #1 + vqrshrun.s16 d5, q12, #2 + sub r5, r3, #2 + vmov.8 d6, d5 + vtrn.8 d4, d5 @ + vst1.u16 {d5[1]}, [r1]! + vst1.16 {d6[2]}, [r1], r5 + vst1.u16 {d4[1]}, [r1]! + vst1.16 {d5[1]}, [r1], r5 + vst1.u16 {d5[0]}, [r1]! + vst1.16 {d4[1]}, [r1], r5 + vst1.u16 {d4[0]}, [r1]! + vst1.16 {d5[0]}, [r1], r5 + +end_func_horz_d: + ldmfd sp!, {r4-r12, pc} @Restoring registers from stack + + + + + + + +@/** +@******************************************************************************* +@* +@*ih264_intra_pred_luma_4x4_mode_vert_l +@* +@* @brief +@* Perform Intra prediction for luma_4x4 mode:Vertical_Left +@* +@* @par Description: +@* Perform Intra prediction for luma_4x4 mode:Vertical_Left ,described in sec 8.3.1.2.8 +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@* @param[in] ui_neighboravailability +@* availability of neighbouring pixels +@* +@* @returns +@* +@* @remarks +@* None +@* +@*******************************************************************************/ +@void ih264_intra_pred_luma_4x4_mode_vert_l(UWORD8 *pu1_src, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd, +@ WORD32 dst_strd, +@ WORD32 ui_neighboravailability) + +@**************Variables Vs Registers***************************************** +@ r0 => *pu1_src +@ r1 => *pu1_dst +@ r2 => src_strd +@ r3 => dst_strd +@ r4 => ui_neighboravailability + + + .global ih264_intra_pred_luma_4x4_mode_vert_l_a9q + +ih264_intra_pred_luma_4x4_mode_vert_l_a9q: + + stmfd sp!, {r4-r12, r14} @store register values to stack + add r0, r0, #4 + vld1.u8 {d0}, [r0] + add r0, r0, #1 + vld1.u8 {d1}, [r0] + vext.8 d2, d1, d0, #1 + vaddl.u8 q10, d0, d1 + vaddl.u8 q11, d1, d2 + vadd.u16 q12, q10, q11 + vqrshrun.s16 d4, q10, #1 + vqrshrun.s16 d5, q12, #2 + vext.8 d6, d4, d4, #1 + vext.8 d7, d5, d5, #1 + vst1.32 {d6[0]}, [r1], r3 + vext.8 d16, d4, d4, #2 + vext.8 d17, d5, d5, #2 + vst1.32 {d7[0]}, [r1], r3 + vst1.32 {d16[0]}, [r1], r3 + vst1.32 {d17[0]}, [r1], r3 + + + +end_func_vert_l: + ldmfd sp!, {r4-r12, pc} @Restoring registers from stack + + + + + + + +@/** +@******************************************************************************* +@* +@*ih264_intra_pred_luma_4x4_mode_horz_u +@* +@* @brief +@* Perform Intra prediction for luma_4x4 mode:Horizontal_Up +@* +@* @par Description: +@* Perform Intra prediction for luma_4x4 mode:Horizontal_Up ,described in sec 8.3.1.2.9 +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@* @param[in] ui_neighboravailability +@* availability of neighbouring pixels +@* +@* @returns +@* +@* @remarks +@* None +@* +@*******************************************************************************/ +@void ih264_intra_pred_luma_4x4_mode_horz_u(UWORD8 *pu1_src, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd, +@ WORD32 dst_strd, +@ WORD32 ui_neighboravailability) + +@**************Variables Vs Registers***************************************** +@ r0 => *pu1_src +@ r1 => *pu1_dst +@ r2 => src_strd +@ r3 => dst_strd +@ r4 => ui_neighboravailability + + + .global ih264_intra_pred_luma_4x4_mode_horz_u_a9q + +ih264_intra_pred_luma_4x4_mode_horz_u_a9q: + + stmfd sp!, {r4-r12, r14} @store register values to stack + mov r10, r0 + vld1.u8 {d0}, [r0] + ldrb r9, [r0], #1 + vext.8 d1, d0, d0, #1 + vld1.u8 {d0[7]}, [r10] + vext.8 d2, d1, d1, #1 + vaddl.u8 q10, d0, d1 + vaddl.u8 q11, d1, d2 + vadd.u16 q12, q10, q11 + vqrshrun.s16 d4, q10, #1 + vqrshrun.s16 d5, q12, #2 + vmov d6, d4 + vext.8 d6, d5, d4, #1 + vst1.8 {d4[2]}, [r1]! + vst1.8 {d6[0]}, [r1]! + vtrn.8 d6, d5 @ + sub r5, r3, #2 + vtrn.8 d4, d6 @ + vdup.8 d7, r9 + vst1.16 {d6[0]}, [r1], r5 + vst1.16 {d6[0]}, [r1]! + vst1.16 {d5[3]}, [r1], r5 + vst1.16 {d5[3]}, [r1]! + vst1.16 {d7[3]}, [r1], r5 + vst1.32 {d7[0]}, [r1], r3 + +end_func_horz_u: + ldmfd sp!, {r4-r12, pc} @Restoring registers from stack + + diff --git a/common/arm/ih264_intra_pred_luma_8x8_a9q.s b/common/arm/ih264_intra_pred_luma_8x8_a9q.s new file mode 100755 index 0000000..6da1c95 --- /dev/null +++ b/common/arm/ih264_intra_pred_luma_8x8_a9q.s @@ -0,0 +1,1037 @@ +@/****************************************************************************** +@ * +@ * Copyright (C) 2015 The Android Open Source Project +@ * +@ * Licensed under the Apache License, Version 2.0 (the "License"); +@ * you may not use this file except in compliance with the License. +@ * You may obtain a copy of the License at: +@ * +@ * http://www.apache.org/licenses/LICENSE-2.0 +@ * +@ * Unless required by applicable law or agreed to in writing, software +@ * distributed under the License is distributed on an "AS IS" BASIS, +@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ * See the License for the specific language governing permissions and +@ * limitations under the License. +@ * +@ ***************************************************************************** +@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +@*/ +@/** +@****************************************************************************** +@* @file +@* ih264_intra_pred_luma_8x8_a9q.s +@* +@* @brief +@* Contains function definitions for intra 8x8 Luma prediction . +@* +@* @author +@* Ittiam +@* +@* @par List of Functions: +@* +@* -ih264_intra_pred_luma_8x8_mode_ref_filtering_a9q +@* -ih264_intra_pred_luma_8x8_mode_vert_a9q +@* -ih264_intra_pred_luma_8x8_mode_horz_a9q +@* -ih264_intra_pred_luma_8x8_mode_dc_a9q +@* -ih264_intra_pred_luma_8x8_mode_diag_dl_a9q +@* -ih264_intra_pred_luma_8x8_mode_diag_dr_a9q +@* -ih264_intra_pred_luma_8x8_mode_vert_r_a9q +@* -ih264_intra_pred_luma_8x8_mode_horz_d_a9q +@* -ih264_intra_pred_luma_8x8_mode_vert_l_a9q +@* -ih264_intra_pred_luma_8x8_mode_horz_u_a9q +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ + +@/* All the functions here are replicated from ih264_intra_pred_filters.c +@ + +@/** +@/** +@/** +@ + + +.text +.p2align 2 + + .extern ih264_gai1_intrapred_luma_8x8_horz_u +.hidden ih264_gai1_intrapred_luma_8x8_horz_u +scratch_intrapred_addr_8x8: + .long ih264_gai1_intrapred_luma_8x8_horz_u - scrlb8x8l2 - 8 + +@/** +@******************************************************************************* +@* +@*ih264_intra_pred_luma_8x8_mode_ref_filtering +@* +@* @brief +@* Reference sample filtering process for Intra_8x8 sample prediction +@* +@* @par Description: +@* Perform Reference sample filtering process for Intra_8x8 sample prediction ,described in sec 8.3.2.2.1 +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination +@* +@* @param[in] src_strd +@* integer source stride [Not used] +@* +@* @param[in] dst_strd +@* integer destination stride[Not used] +@* +@* @param[in] ui_neighboravailability +@* availability of neighbouring pixels[Not used] +@* +@* @returns +@* +@* @remarks +@* None +@* +@*******************************************************************************/ +@void ih264_intra_pred_luma_8x8_mode_ref_filtering(UWORD8 *pu1_src, +@ UWORD8 *pu1_dst) + +@**************Variables Vs Registers***************************************** +@ r0 => *pu1_src +@ r1 => *pu1_dst + + + .global ih264_intra_pred_luma_8x8_mode_ref_filtering_a9q + +ih264_intra_pred_luma_8x8_mode_ref_filtering_a9q: + + stmfd sp!, {r4-r12, r14} @store register values to stack + vpush {d8-d15} + + + vld1.u8 {q0}, [r0]! @ + vld1.u8 {q1}, [r0] + add r0, r0, #8 @ + vext.8 q2, q0, q1, #1 + vext.8 q3, q1, q1, #1 + vext.8 q4, q2, q3, #1 + vext.8 q5, q3, q3, #1 + vld1.8 {d10[7]}, [r0] @ LOADING SRC[24] AGIN TO THE END FOR p'[ 15, -1 ] = ( p[ 14, -1 ] + 3 * p[ 15, -1 ] + 2 ) >> 2 + vaddl.u8 q10, d0, d4 + vaddl.u8 q7, d0, d0 @ SPECIAL CASE FOR p'[ -1 ,7 ] = ( p[ -1, 6 ] + 3 * p[ -1, 7 ] + 2 ) >> 2 + vadd.u16 q7, q10, q7 + vaddl.u8 q11, d1, d5 + vqrshrun.s16 d14, q7, #2 + vaddl.u8 q12, d4, d8 + vaddl.u8 q13, d5, d9 + vst1.8 {d14[0]}, [r1]! + vadd.u16 q12, q10, q12 + vadd.u16 q13, q11, q13 + vaddl.u8 q9, d2, d6 + vaddl.u8 q8, d6, d10 + vqrshrun.s16 d4, q12, #2 + vqrshrun.s16 d5, q13, #2 + vadd.u16 q6, q8, q9 + vst1.8 {q2}, [r1]! + vqrshrun.s16 d6, q6, #2 + vst1.8 {d6}, [r1] + + +end_func_ref_filt: + vpop {d8-d15} + ldmfd sp!, {r4-r12, pc} @Restoring registers from stack + + + + + + +@/** +@******************************************************************************* +@* +@*ih264_intra_pred_luma_8x8_mode_vert +@* +@* @brief +@* Perform Intra prediction for luma_8x8 mode:vertical +@* +@* @par Description: +@* Perform Intra prediction for luma_8x8 mode:vertical ,described in sec 8.3.2.2.2 +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@* @param[in] ui_neighboravailability +@* availability of neighbouring pixels(Not used in this function) +@* +@* @returns +@* +@* @remarks +@* None +@* +@******************************************************************************* +@void ih264_intra_pred_luma_8x8_mode_vert(UWORD8 *pu1_src, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd, +@ WORD32 dst_strd, +@ WORD32 ui_neighboravailability) + +@**************Variables Vs Registers***************************************** +@ r0 => *pu1_src +@ r1 => *pu1_dst +@ r2 => src_strd +@ r3 => dst_strd +@ r4 => ui_neighboravailability + + + .global ih264_intra_pred_luma_8x8_mode_vert_a9q + +ih264_intra_pred_luma_8x8_mode_vert_a9q: + + stmfd sp!, {r4-r12, r14} @store register values to stack + + add r0, r0, #9 + vld1.8 d0, [r0] + + vst1.8 d0, [r1], r3 + vst1.8 d0, [r1], r3 + vst1.8 d0, [r1], r3 + vst1.8 d0, [r1], r3 + vst1.8 d0, [r1], r3 + vst1.8 d0, [r1], r3 + vst1.8 d0, [r1], r3 + vst1.8 d0, [r1], r3 + + ldmfd sp!, {r4-r12, pc} @Restoring registers from stack + + + + + +@/****************************************************************************** + + +@/** +@******************************************************************************* +@* +@*ih264_intra_pred_luma_8x8_mode_horz +@* +@* @brief +@* Perform Intra prediction for luma_8x8 mode:horizontal +@* +@* @par Description: +@* Perform Intra prediction for luma_8x8 mode:horizontal ,described in sec 8.3.2.2.2 +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@* @param[in] ui_neighboravailability +@* availability of neighbouring pixels(Not used in this function) +@* +@* @returns +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ +@void ih264_intra_pred_luma_8x8_mode_horz(UWORD8 *pu1_src, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd, +@ WORD32 dst_strd, +@ WORD32 ui_neighboravailability) +@**************Variables Vs Registers***************************************** +@ r0 => *pu1_src +@ r1 => *pu1_dst +@ r2 => src_strd +@ r3 => dst_strd +@ r4 => ui_neighboravailability + + + .global ih264_intra_pred_luma_8x8_mode_horz_a9q + +ih264_intra_pred_luma_8x8_mode_horz_a9q: + + stmfd sp!, {r14} @store register values to stack + + vld1.u8 {d0}, [r0] + mov r2, #6 + + vdup.u8 d1, d0[7] + vdup.u8 d2, d0[6] + vst1.8 {d1}, [r1], r3 + +loop_8x8_horz: + vext.8 d0, d0, d0, #6 + vst1.8 {d2}, [r1], r3 + vdup.u8 d1, d0[7] + subs r2, #2 + vdup.u8 d2, d0[6] + vst1.8 {d1}, [r1], r3 + bne loop_8x8_horz + + vext.8 d0, d0, d0, #6 + vst1.8 {d2}, [r1], r3 + + ldmfd sp!, {pc} @restoring registers from stack + + + + + +@/****************************************************************************** + + +@/** +@******************************************************************************* +@* +@*ih264_intra_pred_luma_8x8_mode_dc +@* +@* @brief +@* Perform Intra prediction for luma_8x8 mode:DC +@* +@* @par Description: +@* Perform Intra prediction for luma_8x8 mode:DC ,described in sec 8.3.2.2.3 +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@* @param[in] ui_neighboravailability +@* availability of neighbouring pixels +@* +@* @returns +@* +@* @remarks +@* None +@* +@*******************************************************************************/ +@void ih264_intra_pred_luma_8x8_mode_dc(UWORD8 *pu1_src, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd, +@ WORD32 dst_strd, +@ WORD32 ui_neighboravailability) + +@**************Variables Vs Registers***************************************** +@ r0 => *pu1_src +@ r1 => *pu1_dst +@ r2 => src_strd +@ r3 => dst_strd +@ r4 => ui_neighboravailability + + + .global ih264_intra_pred_luma_8x8_mode_dc_a9q + +ih264_intra_pred_luma_8x8_mode_dc_a9q: + + stmfd sp!, {r4, r14} @store register values to stack + ldr r4, [sp, #8] @r4 => ui_neighboravailability + + ands r2, r4, #0x01 @CHECKING IF LEFT_AVAILABLE ELSE BRANCHING TO ONLY TOP AVAILABLE + beq top_available + ands r2, r4, #0x04 @CHECKING IF TOP_AVAILABLE ELSE BRANCHING TO ONLY LEFT AVAILABLE + beq left_available + + vld1.u8 {d0}, [r0] @BOTH LEFT AND TOP AVAILABLE + add r0, r0, #9 + vld1.u8 {d1}, [r0] + vpaddl.u8 q0, q0 + vadd.u16 d0, d0, d1 + vpaddl.u16 d0, d0 + vpaddl.u32 d0, d0 + vqrshrun.s16 d0, q0, #4 + vdup.u8 d0, d0[0] + b str_pred + +top_available: @ONLY TOP AVAILABLE + ands r2, r4, #0x04 @CHECKING TOP AVAILABILTY OR ELSE BRANCH TO NONE AVAILABLE + beq none_available + + add r0, r0, #9 + vld1.u8 {d0}, [r0] + vpaddl.u8 d0, d0 + vpaddl.u16 d0, d0 + vpaddl.u32 d0, d0 + vqrshrun.s16 d0, q0, #3 + vdup.u8 d0, d0[0] + b str_pred + +left_available: @ONLY LEFT AVAILABLE + vld1.u8 {d0}, [r0] + vpaddl.u8 d0, d0 + vpaddl.u16 d0, d0 + vpaddl.u32 d0, d0 + vqrshrun.s16 d0, q0, #3 + vdup.u8 d0, d0[0] + b str_pred + +none_available: @NONE AVAILABLE + vmov.u8 q0, #128 + +str_pred: + vst1.8 {d0}, [r1], r3 + vst1.8 {d0}, [r1], r3 + vst1.8 {d0}, [r1], r3 + vst1.8 {d0}, [r1], r3 + vst1.8 {d0}, [r1], r3 + vst1.8 {d0}, [r1], r3 + vst1.8 {d0}, [r1], r3 + vst1.8 {d0}, [r1], r3 + + ldmfd sp!, {r4, pc} @Restoring registers from stack + + + + + + +@/** +@******************************************************************************* +@* +@*ih264_intra_pred_luma_8x8_mode_diag_dl +@* +@* @brief +@* Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Left +@* +@* @par Description: +@* Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Left ,described in sec 8.3.2.2.4 +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@* @param[in] ui_neighboravailability +@* availability of neighbouring pixels +@* +@* @returns +@* +@* @remarks +@* None +@* +@*******************************************************************************/ +@void ih264_intra_pred_luma_8x8_mode_diag_dl(UWORD8 *pu1_src, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd, +@ WORD32 dst_strd, +@ WORD32 ui_neighboravailability) + +@**************Variables Vs Registers***************************************** +@ r0 => *pu1_src +@ r1 => *pu1_dst +@ r2 => src_strd +@ r3 => dst_strd +@ r4 => ui_neighboravailability + + .global ih264_intra_pred_luma_8x8_mode_diag_dl_a9q + +ih264_intra_pred_luma_8x8_mode_diag_dl_a9q: + + stmfd sp!, {r4-r12, r14} @store register values to stack + + add r0, r0, #9 + sub r5, r3, #4 + add r6, r0, #15 + vld1.8 {q0}, [r0] + vext.8 q2, q0, q0, #2 + vext.8 q1, q0, q0, #1 + vld1.8 {d5[6]}, [r6] + @ q1 = q0 shifted to left once + @ q2 = q1 shifted to left once + vaddl.u8 q10, d0, d2 @Adding for FILT121 + vaddl.u8 q11, d1, d3 + vaddl.u8 q12, d2, d4 + vaddl.u8 q13, d3, d5 + vadd.u16 q12, q10, q12 + vadd.u16 q13, q11, q13 + + vqrshrun.s16 d4, q12, #2 + vqrshrun.s16 d5, q13, #2 + @Q2 has all FILT121 values + vst1.8 {d4}, [r1], r3 + vext.8 q9, q2, q2, #1 + vext.8 q8, q9, q9, #1 + vst1.8 {d18}, [r1], r3 + vext.8 q15, q8, q8, #1 + vst1.8 {d16}, [r1], r3 + vst1.8 {d30}, [r1], r3 + vst1.32 {d4[1]}, [r1]! + vst1.32 {d5[0]}, [r1], r5 + vst1.32 {d18[1]}, [r1]! + vst1.32 {d19[0]}, [r1], r5 + vst1.32 {d16[1]}, [r1]! + vst1.32 {d17[0]}, [r1], r5 + vst1.32 {d30[1]}, [r1]! + vst1.32 {d31[0]}, [r1], r5 + + +end_func_diag_dl: + ldmfd sp!, {r4-r12, pc} @Restoring registers from stack + + + + +@/** +@******************************************************************************* +@* +@*ih264_intra_pred_luma_8x8_mode_diag_dr +@* +@* @brief +@* Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Right +@* +@* @par Description: +@* Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Right ,described in sec 8.3.2.2.5 +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@* @param[in] ui_neighboravailability +@* availability of neighbouring pixels +@* +@* @returns +@* +@* @remarks +@* None +@* +@*******************************************************************************/ +@void ih264_intra_pred_luma_8x8_mode_diag_dr(UWORD8 *pu1_src, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd, +@ WORD32 dst_strd, +@ WORD32 ui_neighboravailability) + +@**************Variables Vs Registers***************************************** +@ r0 => *pu1_src +@ r1 => *pu1_dst +@ r2 => src_strd +@ r3 => dst_strd +@ r4 => ui_neighboravailability + + + .global ih264_intra_pred_luma_8x8_mode_diag_dr_a9q + +ih264_intra_pred_luma_8x8_mode_diag_dr_a9q: + + stmfd sp!, {r4-r12, r14} @store register values to stack + + + vld1.u8 {q0}, [r0] + add r0, r0, #1 + vld1.u8 {q1}, [r0] + vext.8 q2, q1, q1, #1 + @ q1 = q0 shifted to left once + @ q2 = q1 shifted to left once + vaddl.u8 q10, d0, d2 @Adding for FILT121 + vaddl.u8 q11, d1, d3 + vaddl.u8 q12, d2, d4 + vaddl.u8 q13, d3, d5 + vadd.u16 q12, q10, q12 + vadd.u16 q13, q11, q13 + vqrshrun.s16 d4, q12, #2 + vqrshrun.s16 d5, q13, #2 + @Q2 has all FILT121 values + sub r5, r3, #4 + vext.8 q9, q2, q2, #15 + vst1.8 {d19}, [r1], r3 + vext.8 q8, q9, q9, #15 + vst1.8 {d17}, [r1], r3 + vext.8 q15, q8, q8, #15 + vst1.8 {d31}, [r1], r3 + vst1.32 {d4[1]}, [r1]! + vst1.32 {d5[0]}, [r1], r5 + vst1.32 {d18[1]}, [r1]! + vst1.32 {d19[0]}, [r1], r5 + vst1.32 {d16[1]}, [r1]! + vst1.32 {d17[0]}, [r1], r5 + vst1.32 {d30[1]}, [r1]! + vst1.32 {d31[0]}, [r1], r5 + vst1.8 {d4}, [r1], r3 + +end_func_diag_dr: + ldmfd sp!, {r4-r12, pc} @Restoring registers from stack + + + + +@/** +@******************************************************************************* +@* +@*ih264_intra_pred_luma_8x8_mode_vert_r +@* +@* @brief +@* Perform Intra prediction for luma_8x8 mode:Vertical_Right +@* +@* @par Description: +@* Perform Intra prediction for luma_8x8 mode:Vertical_Right ,described in sec 8.3.2.2.6 +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@* @param[in] ui_neighboravailability +@* availability of neighbouring pixels +@* +@* @returns +@* +@* @remarks +@* None +@* +@*******************************************************************************/ +@void ih264_intra_pred_luma_8x8_mode_vert_r(UWORD8 *pu1_src, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd, +@ WORD32 dst_strd, +@ WORD32 ui_neighboravailability) + +@**************Variables Vs Registers***************************************** +@ r0 => *pu1_src +@ r1 => *pu1_dst +@ r2 => src_strd +@ r3 => dst_strd +@ r4 => ui_neighboravailability + + + .global ih264_intra_pred_luma_8x8_mode_vert_r_a9q + +ih264_intra_pred_luma_8x8_mode_vert_r_a9q: + + stmfd sp!, {r4-r12, r14} @store register values to stack + + vld1.u8 {q0}, [r0] + add r0, r0, #1 + vld1.u8 {q1}, [r0] + vext.8 q2, q1, q1, #1 + @ q1 = q0 shifted to left once + @ q2 = q1 shifted to left once + vaddl.u8 q10, d0, d2 + vaddl.u8 q11, d1, d3 + vaddl.u8 q12, d2, d4 + vaddl.u8 q13, d3, d5 + vadd.u16 q12, q10, q12 + vadd.u16 q13, q11, q13 + + vqrshrun.s16 d4, q10, #1 + vqrshrun.s16 d5, q11, #1 + vqrshrun.s16 d6, q12, #2 + vqrshrun.s16 d7, q13, #2 + @Q2 has all FILT11 values + @Q3 has all FILT121 values + sub r5, r3, #6 + sub r6, r3, #4 + vst1.8 {d5}, [r1], r3 @ row 0 + vext.8 q9, q3, q3, #15 + vmov.8 q11, q9 + vext.8 q8, q2, q2, #1 + vst1.8 {d19}, [r1], r3 @row 1 + + vmov.8 q15, q8 + vext.8 q10, q2, q2, #15 + vuzp.8 q8, q9 + @row 2 + vext.8 q14, q8, q8, #1 + vst1.8 {d21}, [r1] + vst1.8 {d6[6]}, [r1], r3 + @row 3 + + vst1.16 {d29[1]}, [r1]! + vst1.32 {d7[0]}, [r1]! + vst1.16 {d7[2]}, [r1], r5 +@row 4 + vst1.16 {d19[1]}, [r1]! + vst1.32 {d5[0]}, [r1]! + vst1.16 {d5[2]}, [r1], r5 + +@row 5 + vext.8 q13, q9, q9, #1 + vst1.16 {d17[1]}, [r1]! + vst1.32 {d23[0]}, [r1]! + vst1.16 {d23[2]}, [r1], r5 + + +@row 6 + vst1.16 {d27[0]}, [r1]! + vst1.8 {d27[2]}, [r1]! + vst1.8 {d5[0]}, [r1]! + vst1.32 {d31[0]}, [r1], r6 +@row 7 + vst1.32 {d29[0]}, [r1]! + vst1.32 {d7[0]}, [r1]! + + + +end_func_vert_r: + ldmfd sp!, {r4-r12, pc} @Restoring registers from stack + + + + +@/** +@******************************************************************************* +@* +@*ih264_intra_pred_luma_8x8_mode_horz_d +@* +@* @brief +@* Perform Intra prediction for luma_8x8 mode:Horizontal_Down +@* +@* @par Description: +@* Perform Intra prediction for luma_8x8 mode:Horizontal_Down ,described in sec 8.3.2.2.7 +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@* @param[in] ui_neighboravailability +@* availability of neighbouring pixels +@* +@* @returns +@* +@* @remarks +@* None +@* +@*******************************************************************************/ +@void ih264_intra_pred_luma_8x8_mode_horz_d(UWORD8 *pu1_src, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd, +@ WORD32 dst_strd, +@ WORD32 ui_neighboravailability) + +@**************Variables Vs Registers***************************************** +@ r0 => *pu1_src +@ r1 => *pu1_dst +@ r2 => src_strd +@ r3 => dst_strd +@ r4 => ui_neighboravailability + + .global ih264_intra_pred_luma_8x8_mode_horz_d_a9q + +ih264_intra_pred_luma_8x8_mode_horz_d_a9q: + + stmfd sp!, {r4-r12, r14} @store register values to stack + vpush {d8-d15} + + vld1.u8 {q0}, [r0] + add r0, r0, #1 + vld1.u8 {q1}, [r0] + vext.8 q2, q1, q1, #1 + @ q1 = q0 shifted to left once + @ q2 = q1 shifted to left once + vaddl.u8 q10, d0, d2 + vaddl.u8 q11, d1, d3 + vaddl.u8 q12, d2, d4 + vaddl.u8 q13, d3, d5 + vadd.u16 q12, q10, q12 + vadd.u16 q13, q11, q13 + + vqrshrun.s16 d4, q10, #1 + vqrshrun.s16 d5, q11, #1 + vqrshrun.s16 d6, q12, #2 + vqrshrun.s16 d7, q13, #2 + @Q2 has all FILT11 values + @Q3 has all FILT121 values + vmov.8 q4, q2 + vmov.8 q5, q3 + sub r6, r3, #6 + vtrn.8 q4, q5 @ + vmov.8 q6, q4 + vmov.8 q7, q5 + sub r5, r3, #4 + vtrn.16 q6, q7 + vext.8 q8, q3, q3, #14 + @ROW 0 + vst1.8 {d17}, [r1] + vst1.16 {d10[3]}, [r1], r3 + + @ROW 1 + vst1.32 {d14[1]}, [r1]! + vst1.32 {d7[0]}, [r1], r5 + @ROW 2 + vst1.16 {d10[2]}, [r1]! + vst1.32 {d14[1]}, [r1]! + vst1.16 {d7[0]}, [r1], r6 + @ROW 3 + vst1.32 {d12[1]}, [r1]! + vst1.32 {d14[1]}, [r1], r5 + @ROW 4 + vst1.16 {d14[1]}, [r1]! + vst1.32 {d12[1]}, [r1]! + vst1.16 {d14[2]}, [r1], r6 + @ROW 5 + vst1.32 {d14[0]}, [r1]! + vst1.32 {d12[1]}, [r1], r5 + @ROW 6 + vst1.16 {d10[0]}, [r1]! + vst1.16 {d8[1]}, [r1]! + vst1.16 {d14[1]}, [r1]! + vst1.16 {d12[2]}, [r1], r6 + @ROW 7 + vst1.32 {d12[0]}, [r1]! + vst1.32 {d14[0]}, [r1], r5 + +end_func_horz_d: + vpop {d8-d15} + ldmfd sp!, {r4-r12, pc} @Restoring registers from stack + + + + + +@/** +@******************************************************************************* +@* +@*ih264_intra_pred_luma_8x8_mode_vert_l +@* +@* @brief +@* Perform Intra prediction for luma_8x8 mode:Vertical_Left +@* +@* @par Description: +@* Perform Intra prediction for luma_8x8 mode:Vertical_Left ,described in sec 8.3.2.2.8 +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@* @param[in] ui_neighboravailability +@* availability of neighbouring pixels +@* +@* @returns +@* +@* @remarks +@* None +@* +@*******************************************************************************/ +@void ih264_intra_pred_luma_8x8_mode_vert_l(UWORD8 *pu1_src, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd, +@ WORD32 dst_strd, +@ WORD32 ui_neighboravailability) + +@**************Variables Vs Registers***************************************** +@ r0 => *pu1_src +@ r1 => *pu1_dst +@ r2 => src_strd +@ r3 => dst_strd +@ r4 => ui_neighboravailability + + + .global ih264_intra_pred_luma_8x8_mode_vert_l_a9q + +ih264_intra_pred_luma_8x8_mode_vert_l_a9q: + + stmfd sp!, {r4-r12, r14} @Restoring registers from stack + vpush {d8-d15} + add r0, r0, #9 + vld1.u8 {q0}, [r0] + add r0, r0, #1 + vld1.u8 {q1}, [r0] + vext.8 q2, q1, q1, #1 + vaddl.u8 q10, d0, d2 + vaddl.u8 q11, d1, d3 + vaddl.u8 q12, d2, d4 + vaddl.u8 q13, d3, d5 + vadd.u16 q12, q10, q12 + vadd.u16 q13, q11, q13 + + vqrshrun.s16 d4, q10, #1 + vqrshrun.s16 d5, q11, #1 + vqrshrun.s16 d6, q12, #2 + vext.8 q4, q2, q2, #1 + vqrshrun.s16 d7, q13, #2 + @Q2 has all FILT11 values + @Q3 has all FILT121 values + + vext.8 q5, q3, q3, #1 + @ROW 0,1 + vst1.8 {d4}, [r1], r3 + vst1.8 {d6}, [r1], r3 + + vext.8 q6, q4, q4, #1 + vext.8 q7, q5, q5, #1 + @ROW 2,3 + vst1.8 {d8}, [r1], r3 + vst1.8 {d10}, [r1], r3 + + vext.8 q8, q6, q6, #1 + vext.8 q9, q7, q7, #1 + @ROW 4,5 + vst1.8 {d12}, [r1], r3 + vst1.8 {d14}, [r1], r3 + @ROW 6,7 + vst1.8 {d16}, [r1], r3 + vst1.8 {d18}, [r1], r3 + +end_func_vert_l: + vpop {d8-d15} + ldmfd sp!, {r4-r12, pc} @Restoring registers from stack + + + + + +@/** +@******************************************************************************* +@* +@*ih264_intra_pred_luma_8x8_mode_horz_u +@* +@* @brief +@* Perform Intra prediction for luma_8x8 mode:Horizontal_Up +@* +@* @par Description: +@* Perform Intra prediction for luma_8x8 mode:Horizontal_Up ,described in sec 8.3.2.2.9 +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@* @param[in] ui_neighboravailability +@* availability of neighbouring pixels +@* +@* @returns +@* +@* @remarks +@* None +@* +@*******************************************************************************/ +@void ih264_intra_pred_luma_8x8_mode_horz_u(UWORD8 *pu1_src, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd, +@ WORD32 dst_strd, +@ WORD32 ui_neighboravailability) + +@**************Variables Vs Registers***************************************** +@ r0 => *pu1_src +@ r1 => *pu1_dst +@ r2 => src_strd +@ r3 => dst_strd +@ r4 => ui_neighboravailability + + .global ih264_intra_pred_luma_8x8_mode_horz_u_a9q + +ih264_intra_pred_luma_8x8_mode_horz_u_a9q: + + stmfd sp!, {r4-r12, r14} @store register values to stack + vpush {d8-d15} + + vld1.u8 {q0}, [r0] + vld1.u8 {d1[7]}, [r0] + vext.8 q1, q0, q0, #1 + vext.8 q2, q1, q1, #1 + @ LOADING V TABLE + ldr r12, scratch_intrapred_addr_8x8 +scrlb8x8l2: + add r12, r12, pc + vaddl.u8 q10, d0, d2 + vaddl.u8 q11, d1, d3 + vaddl.u8 q12, d2, d4 + vaddl.u8 q13, d3, d5 + vadd.u16 q12, q10, q12 + vadd.u16 q13, q11, q13 + vld1.u8 {q5}, [r12] + vqrshrun.s16 d4, q10, #1 + vqrshrun.s16 d5, q11, #1 + vqrshrun.s16 d6, q12, #2 + vqrshrun.s16 d7, q13, #2 + @Q2 has all FILT11 values + @Q3 has all FILT121 values + vtbl.u8 d12, {q2, q3}, d10 + vdup.u8 q7, d5[7] @ + vtbl.u8 d13, {q2, q3}, d11 + vext.8 q8, q6, q7, #2 + vext.8 q9, q8, q7, #2 + vst1.8 {d12}, [r1], r3 + vext.8 q10, q9, q7, #2 + vst1.8 {d16}, [r1], r3 + vst1.8 {d18}, [r1], r3 + vst1.8 {d20}, [r1], r3 + vst1.8 {d13}, [r1], r3 + vst1.8 {d17}, [r1], r3 + vst1.8 {d19}, [r1], r3 + vst1.8 {d21}, [r1], r3 + + +end_func_horz_u: + vpop {d8-d15} + ldmfd sp!, {r4-r12, pc} @Restoring registers from stack + + + + + + + + diff --git a/common/arm/ih264_iquant_itrans_recon_a9.s b/common/arm/ih264_iquant_itrans_recon_a9.s new file mode 100755 index 0000000..f71ca69 --- /dev/null +++ b/common/arm/ih264_iquant_itrans_recon_a9.s @@ -0,0 +1,871 @@ +@/****************************************************************************** +@ * +@ * Copyright (C) 2015 The Android Open Source Project +@ * +@ * Licensed under the Apache License, Version 2.0 (the "License"); +@ * you may not use this file except in compliance with the License. +@ * You may obtain a copy of the License at: +@ * +@ * http://www.apache.org/licenses/LICENSE-2.0 +@ * +@ * Unless required by applicable law or agreed to in writing, software +@ * distributed under the License is distributed on an "AS IS" BASIS, +@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ * See the License for the specific language governing permissions and +@ * limitations under the License. +@ * +@ ***************************************************************************** +@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +@*/ +@/** +@ ******************************************************************************* +@ * @file +@ * ih264_iquant_itrans_recon_a9.s +@ * +@ * @brief +@ * Contains function definitions for single stage inverse transform +@ * +@ * @author +@ * Mohit +@ * Harinarayanaan +@ * +@ * @par List of Functions: +@ * - ih264_iquant_itrans_recon_4x4_a9() +@ * - ih264_iquant_itrans_recon_8x8_a9() +@ * - ih264_iquant_itrans_recon_chroma_4x4_a9() +@ * +@ * @remarks +@ * None +@ * +@ ******************************************************************************* +@*/ +@/** +@ ******************************************************************************* +@ * +@ * @brief +@ * This function performs inverse quant and Inverse transform type Ci4 for 4*4 block +@ * +@ * @par Description: +@ * Performs inverse transform Ci4 and adds the residue to get the +@ * reconstructed block +@ * +@ * @param[in] pi2_src +@ * Input 4x4 coefficients +@ * +@ * @param[in] pu1_pred +@ * Prediction 4x4 block +@ * +@ * @param[out] pu1_out +@ * Output 4x4 block +@ * +@ * @param[in] u4_qp_div_6 +@ * QP +@ * +@ * @param[in] pu2_weigh_mat +@ * Pointer to weight matrix +@ * +@ * @param[in] pred_strd, +@ * Prediction stride +@ * +@ * @param[in] out_strd +@ * Output Stride +@ * +@ *@param[in] pi2_tmp +@ * temporary buffer of size 1*16 +@ * +@ * @param[in] pu2_iscal_mat +@ * Pointer to the inverse quantization matrix +@ * +@ * @returns Void +@ * +@ * @remarks +@ * None +@ * +@ ******************************************************************************* +@ */ +@void ih264_iquant_itrans_recon_4x4(WORD16 *pi2_src, +@ UWORD8 *pu1_pred, +@ UWORD8 *pu1_out, +@ WORD32 pred_strd, +@ WORD32 out_strd, +@ const UWORD16 *pu2_iscal_mat, +@ const UWORD16 *pu2_weigh_mat, +@ UWORD32 u4_qp_div_6, +@ WORD32 *pi4_tmp, +@ WORD32 iq_start_idx +@ WORD16 *pi2_dc_ld_addr) +@**************Variables Vs Registers***************************************** +@r0 => *pi2_src +@r1 => *pu1_pred +@r2 => *pu1_out +@r3 => pred_strd +@r4 => out_strd +@r5 => *pu2_iscal_mat +@r6 => *pu2_weigh_mat +@r7 => u4_qp_div_6 +@r8 => iq_start_idx +@r10=> pi2_dc_ld_addr +.text +.p2align 2 + + .global ih264_iquant_itrans_recon_4x4_a9 + +ih264_iquant_itrans_recon_4x4_a9: + +@VLD4.S16 is used because the pointer is incremented by SUB_BLK_WIDTH_4x4 +@If the macro value changes need to change the instruction according to it. +@Only one shift is done in horizontal inverse because, +@if u4_qp_div_6 is lesser than 4 then shift value will be neagative and do negative left shift, in this case rnd_factor has value +@if u4_qp_div_6 is greater than 4 then shift value will be positive and do left shift, here rnd_factor is 0 + + stmfd sp!, {r4-r12, r14} @stack stores the values of the arguments + ldr r7, [sp, #52] @Loads u4_qp_div_6 + ldr r4, [sp, #40] @Loads out_strd + vdup.s32 q15, r7 @Populate the u4_qp_div_6 in Q15 + ldr r5, [sp, #44] @Loads *pu2_iscal_mat + + ldr r6, [sp, #48] @Loads *pu2_weigh_mat + + ldr r8, [sp, #60] @Loads iq_start_idx + + ldr r10, [sp, #64] @Load alternate dc address + + vpush {d8-d15} +@=======================DEQUANT FROM HERE=================================== + + vld4.s16 {d20, d21, d22, d23}, [r5] @Load pu2_iscal_mat[i], i =0..15 + vld4.s16 {d26, d27, d28, d29}, [r6] @pu2_weigh_mat[i], i =0..15 + vmul.s16 q10, q10, q13 @x[i]=(scale[i] * dequant[i]) where i = 0..7 + vld4.s16 {d16, d17, d18, d19}, [r0] @pi2_src_tmp[i], i =0..15 + + vmul.s16 q11, q11, q14 @x[i]=(scale[i] * dequant[i]) where i = 8..15 + + subs r8, r8, #1 @ if r8 == 1 => intra case , so result of subtraction is zero and Z flag is set + ldreqsh r9, [r10] @ Loads signed halfword pi2_dc_ld_addr[0], if r8==1 + + vmull.s16 q0, d16, d20 @ Q0 = p[i] = (x[i] * trns_coeff[i]) where i = 0..3 + vmull.s16 q1, d17, d21 @ Q1 = p[i] = (x[i] * trns_coeff[i]) where i = 4..7 + vmull.s16 q2, d18, d22 @ Q2 = p[i] = (x[i] * trns_coeff[i]) where i = 8..11 + vmull.s16 q3, d19, d23 @ Q3 = p[i] = (x[i] * trns_coeff[i]) where i = 12..15 + + vshl.s32 q0, q0, q15 @ Q0 = q[i] = (p[i] << (qP/6)) where i = 0..3 + vshl.s32 q1, q1, q15 @ Q1 = q[i] = (p[i] << (qP/6)) where i = 4..7 + vshl.s32 q2, q2, q15 @ Q2 = q[i] = (p[i] << (qP/6)) where i = 8..11 + vshl.s32 q3, q3, q15 @ Q3 = q[i] = (p[i] << (qP/6)) where i = 12..15 + + vqrshrn.s32 d0, q0, #0x4 @ D0 = c[i] = ((q[i] + 32) >> 4) where i = 0..3 + vqrshrn.s32 d1, q1, #0x4 @ D1 = c[i] = ((q[i] + 32) >> 4) where i = 4..7 + vqrshrn.s32 d2, q2, #0x4 @ D2 = c[i] = ((q[i] + 32) >> 4) where i = 8..11 + vqrshrn.s32 d3, q3, #0x4 @ D3 = c[i] = ((q[i] + 32) >> 4) where i = 12..15 + + vmoveq.16 d0[0], r9 @ Restore dc value in case of intra, i.e. r8 == 1 + +@========= PROCESS IDCT FROM HERE ======= +@Steps for Stage 1: +@------------------ + vld1.32 d30[0], [r1], r3 @I row Load pu1_pred buffer + vadd.s16 d4, d0, d2 @x0 = q0 + q1; + + vsub.s16 d5, d0, d2 @x1 = q0 - q1; + + vshr.s16 d8, d1, #1 @q0>>1 + vshr.s16 d9, d3, #1 @q1>>1 + + vsub.s16 d6, d8, d3 @x2 = (q0 >> 1) - q1; + vadd.s16 d7, d1, d9 @x3 = q0+ (q1 >> 1); + vld1.32 d30[1], [r1], r3 @II row Load pu1_pred buffer + + vswp d6, d7 @Reverse positions of x2 and x3 + + vsub.s16 q6, q2, q3 @x0-x3 and x1-x2 combined + vadd.s16 q5, q2, q3 @x0 + x3 and x1+x2 combined + + vld1.32 d31[0], [r1], r3 @III row Load pu1_pred buf + + vswp d12, d13 +@Steps for Stage 2: +@------------------ + vtrn.16 d10, d11 + vtrn.16 d12, d13 + vtrn.32 d10, d12 + vtrn.32 d11, d13 + vadd.s16 d14, d10, d12 @x0 = q0 + q1; + + vsub.s16 d15, d10, d12 @x1 = q0 - q1; + + vshr.s16 d18, d11, #1 @q0>>1 + vshr.s16 d19, d13, #1 @q1>>1 + + vsub.s16 d16, d18, d13 @x2 = (q0 >> 1) - q1; + vadd.s16 d17, d11, d19 @x3 = q0+ (q1 >> 1); + + vld1.32 d31[1], [r1], r3 @IV row Load pu1_pred buffer + vswp d16, d17 @Reverse positions of x2 and x3 + + vsub.s16 q11, q7, q8 @x0-x3 and x1-x2 combined + vadd.s16 q10, q7, q8 @x0 + x3 and x1+x2 combined + + vswp d22, d23 + + vrshr.s16 q10, q10, #6 @ + vrshr.s16 q11, q11, #6 + + vaddw.u8 q10, q10, d30 + vaddw.u8 q11, q11, d31 + + vqmovun.s16 d0, q10 + vqmovun.s16 d1, q11 + + vst1.32 d0[0], [r2], r4 @I row store the value + vst1.32 d0[1], [r2], r4 @II row store the value + vst1.32 d1[0], [r2], r4 @III row store the value + vst1.32 d1[1], [r2] @IV row store the value + + vpop {d8-d15} + ldmfd sp!, {r4-r12, r15} @Reload the registers from SP + + + @/** +@ ******************************************************************************* +@ * +@ * @brief +@ * This function performs inverse quant and Inverse transform type Ci4 for 4*4 block +@ * +@ * @par Description: +@ * Performs inverse transform Ci4 and adds the residue to get the +@ * reconstructed block +@ * +@ * @param[in] pi2_src +@ * Input 4x4 coefficients +@ * +@ * @param[in] pu1_pred +@ * Prediction 4x4 block +@ * +@ * @param[out] pu1_out +@ * Output 4x4 block +@ * +@ * @param[in] u4_qp_div_6 +@ * QP +@ * +@ * @param[in] pu2_weigh_mat +@ * Pointer to weight matrix +@ * +@ * @param[in] pred_strd, +@ * Prediction stride +@ * +@ * @param[in] out_strd +@ * Output Stride +@ * +@ *@param[in] pi2_tmp +@ * temporary buffer of size 1*16 +@ * +@ * @param[in] pu2_iscal_mat +@ * Pointer to the inverse quantization matrix +@ * +@ * @returns Void +@ * +@ * @remarks +@ * None +@ * +@ ******************************************************************************* +@ */ +@void ih264_iquant_itrans_recon_chroma_4x4(WORD16 *pi2_src, +@ UWORD8 *pu1_pred, +@ UWORD8 *pu1_out, +@ WORD32 pred_strd, +@ WORD32 out_strd, +@ const UWORD16 *pu2_iscal_mat, +@ const UWORD16 *pu2_weigh_mat, +@ UWORD32 u4_qp_div_6, +@ WORD32 *pi4_tmp +@ WORD16 *pi2_dc_src) +@**************Variables Vs Registers***************************************** +@r0 => *pi2_src +@r1 => *pu1_pred +@r2 => *pu1_out +@r3 => pred_strd +@r4 => out_strd +@r5 => *pu2_iscal_mat +@r6 => *pu2_weigh_mat +@r7 => u4_qp_div_6 + + .global ih264_iquant_itrans_recon_chroma_4x4_a9 +ih264_iquant_itrans_recon_chroma_4x4_a9: + +@VLD4.S16 is used because the pointer is incremented by SUB_BLK_WIDTH_4x4 +@If the macro value changes need to change the instruction according to it. +@Only one shift is done in horizontal inverse because, +@if u4_qp_div_6 is lesser than 4 then shift value will be neagative and do negative left shift, in this case rnd_factor has value +@if u4_qp_div_6 is greater than 4 then shift value will be positive and do left shift, here rnd_factor is 0 + + stmfd sp!, {r4-r12, r14} @stack stores the values of the arguments + ldr r7, [sp, #52] @Loads u4_qp_div_6 + ldr r4, [sp, #40] @Loads out_strd + vdup.s32 q15, r7 @Populate the u4_qp_div_6 in Q15 + ldr r5, [sp, #44] @Loads *pu2_iscal_mat + ldr r6, [sp, #48] @Loads *pu2_weigh_mat + ldr r8, [sp, #60] @loads *pi2_dc_src + + vpush {d8-d15} +@=======================DEQUANT FROM HERE=================================== + + vld4.s16 {d20, d21, d22, d23}, [r5] @Load pu2_iscal_mat[i], i =0..15 + vld4.s16 {d26, d27, d28, d29}, [r6] @pu2_weigh_mat[i], i =0..15 + vmul.s16 q10, q10, q13 @x[i]=(scale[i] * dequant[i]) where i = 0..7 + vld4.s16 {d16, d17, d18, d19}, [r0] @pi2_src_tmp[i], i =0..15 + + vmul.s16 q11, q11, q14 @x[i]=(scale[i] * dequant[i]) where i = 8..15 + + vmull.s16 q0, d16, d20 @ Q0 = p[i] = (x[i] * trns_coeff[i]) where i = 0..3 + vmull.s16 q1, d17, d21 @ Q1 = p[i] = (x[i] * trns_coeff[i]) where i = 4..7 + vmull.s16 q2, d18, d22 @ Q2 = p[i] = (x[i] * trns_coeff[i]) where i = 8..11 + vmull.s16 q3, d19, d23 @ Q3 = p[i] = (x[i] * trns_coeff[i]) where i = 12..15 + + vshl.s32 q0, q0, q15 @ Q0 = q[i] = (p[i] << (qP/6)) where i = 0..3 + vshl.s32 q1, q1, q15 @ Q1 = q[i] = (p[i] << (qP/6)) where i = 4..7 + vshl.s32 q2, q2, q15 @ Q2 = q[i] = (p[i] << (qP/6)) where i = 8..11 + vshl.s32 q3, q3, q15 @ Q3 = q[i] = (p[i] << (qP/6)) where i = 12..15 + + vqrshrn.s32 d0, q0, #0x4 @ D0 = c[i] = ((q[i] + 32) >> 4) where i = 0..3 + vqrshrn.s32 d1, q1, #0x4 @ D1 = c[i] = ((q[i] + 32) >> 4) where i = 4..7 + vqrshrn.s32 d2, q2, #0x4 @ D2 = c[i] = ((q[i] + 32) >> 4) where i = 8..11 + vqrshrn.s32 d3, q3, #0x4 @ D3 = c[i] = ((q[i] + 32) >> 4) where i = 12..15 + + ldrsh r9, [r8] @ Loads signed halfword pi2_dc_src[0] + vmov.16 d0[0], r9 @ Restore dc value since its chroma iq-it + +@========= PROCESS IDCT FROM HERE ======= +@Steps for Stage 1: +@------------------ + vld2.8 {d28, d29}, [r1], r3 @I row Load pu1_pred buffer + vadd.s16 d4, d0, d2 @x0 = q0 + q1; + + vsub.s16 d5, d0, d2 @x1 = q0 - q1; + + vshr.s16 d8, d1, #1 @q0>>1 + vshr.s16 d9, d3, #1 @q1>>1 + + vsub.s16 d6, d8, d3 @x2 = (q0 >> 1) - q1; + vadd.s16 d7, d1, d9 @x3 = q0+ (q1 >> 1); + vld2.8 {d29, d30}, [r1], r3 @II row Load pu1_pred buffer + + vswp d6, d7 @Reverse positions of x2 and x3 + + vsub.s16 q6, q2, q3 @x0-x3 and x1-x2 combined + vtrn.32 d28, d29 @ D28 -- row I and II of pu1_pred_buffer + vadd.s16 q5, q2, q3 @x0 + x3 and x1+x2 combined + + vld2.8 {d29, d30}, [r1], r3 @III row Load pu1_pred buf + + vswp d12, d13 +@Steps for Stage 2: +@------------------ + vtrn.16 d10, d11 + vtrn.16 d12, d13 + vtrn.32 d10, d12 + vtrn.32 d11, d13 + vadd.s16 d14, d10, d12 @x0 = q0 + q1; + + vsub.s16 d15, d10, d12 @x1 = q0 - q1; + + vshr.s16 d18, d11, #1 @q0>>1 + vshr.s16 d19, d13, #1 @q1>>1 + + vsub.s16 d16, d18, d13 @x2 = (q0 >> 1) - q1; + vadd.s16 d17, d11, d19 @x3 = q0+ (q1 >> 1); + + vld2.8 {d30, d31}, [r1], r3 @IV row Load pu1_pred buffer + vswp d16, d17 @Reverse positions of x2 and x3 + + vsub.s16 q11, q7, q8 @x0-x3 and x1-x2 combined + vtrn.32 d29, d30 @ D29 -- row III and IV of pu1_pred_buf + vadd.s16 q10, q7, q8 @x0 + x3 and x1+x2 combined + + vswp d22, d23 + + vrshr.s16 q10, q10, #6 @ + vrshr.s16 q11, q11, #6 + + vaddw.u8 q10, q10, d28 + vaddw.u8 q11, q11, d29 + + vld1.u8 d0, [r2], r4 @Loading out buffer 16 coeffs + vld1.u8 d1, [r2], r4 + vld1.u8 d2, [r2], r4 + vld1.u8 d3, [r2], r4 + + sub r2, r2, r4, lsl #2 + + vqmovun.s16 d20, q10 @Getting quantized coeffs + vqmovun.s16 d22, q11 + + vmovl.u8 q10, d20 @Move the coffs into 16 bit + vmovl.u8 q11, d22 @so that we can use vbit to copy + + vmov.u16 q14, #0x00ff @Copy lsb from qantized(long)coeffs + + vbit.u8 q0, q10, q14 + vbit.u8 q1, q11, q14 + + vst1.u8 d0, [r2], r4 + vst1.u8 d1, [r2], r4 + vst1.u8 d2, [r2], r4 + vst1.u8 d3, [r2] + + vpop {d8-d15} + ldmfd sp!, {r4-r12, r15} @Reload the registers from SP + + +@/* +@ ******************************************************************************* +@ * +@ * @brief +@ * This function performs inverse quant and Inverse transform type Ci4 for 8*8 block +@ * +@ * @par Description: +@ * Performs inverse transform Ci8 and adds the residue to get the +@ * reconstructed block +@ * +@ * @param[in] pi2_src +@ * Input 4x4 coefficients +@ * +@ * @param[in] pu1_pred +@ * Prediction 4x4 block +@ * +@ * @param[out] pu1_out +@ * Output 4x4 block +@ * +@ * @param[in] u4_qp_div_6 +@ * QP +@ * +@ * @param[in] pu2_weigh_mat +@ * Pointer to weight matrix +@ * +@ * @param[in] pred_strd, +@ * Prediction stride +@ * +@ * @param[in] out_strd +@ * Output Stride +@ * +@ *@param[in] pi2_tmp +@ * temporary buffer of size 1*64 +@ * +@ * @param[in] pu2_iscal_mat +@ * Pointer to the inverse quantization matrix +@ * +@ * @returns Void +@ * +@ * @remarks +@ * None +@ * +@ ******************************************************************************* +@ */ +@void ih264_iquant_itrans_recon_8x8(WORD16 *pi2_src, +@ UWORD8 *pu1_pred, +@ UWORD8 *pu1_out, +@ WORD32 pred_strd, +@ WORD32 out_strd, +@ const UWORD16 *pu2_iscal_mat, +@ const UWORD16 *pu2_weigh_mat, +@ UWORD32 u4_qp_div_6, +@ WORD32 *pi4_tmp, +@ WORD32 iq_start_idx) +@**************Variables Vs Registers***************************************** +@r0 => *pi2_src +@r1 => *pu1_pred +@r2 => *pu1_out +@r3 => pred_strd +@r4 => out_strd +@r5 => *pu2_iscal_mat +@r6 => *pu2_weigh_mat +@r7 => u4_qp_div_6 + + + .global ih264_iquant_itrans_recon_8x8_a9 +ih264_iquant_itrans_recon_8x8_a9: + + stmfd sp!, {r4-r12, r14} @stack stores the values of the arguments + ldr r7, [sp, #52] @Loads u4_qp_div_6 + ldr r4, [sp, #40] @Loads out_strd + + ldr r5, [sp, #44] @Loads *pu2_iscal_mat + ldr r6, [sp, #48] @Loads *pu2_weigh_mat + vdup.s32 q15, r7 @Populate the u4_qp_div_6 in Q15 + vpush {d8-d15} + +idct_8x8_begin: + +@========= DEQUANT FROM HERE =========== + + vld1.32 {q13}, [r5]! @ Q13 = dequant values row 0 + vld1.32 {q10}, [r6]! @ Q10 = scaling factors row 0 + vld1.32 {q14}, [r5]! @ Q14 = dequant values row 1 + vmul.s16 q10, q10, q13 @ Q10 = x[i] = (scale[i] * dequant[i]) where i = 0..7 + vld1.32 {q11}, [r6]! @ Q11 = scaling factors row 1 + vld1.32 {q8}, [r0]! @ Q8 = Source row 0 + vmul.s16 q11, q11, q14 @ Q11 = x[i] = (scale[i] * dequant[i]) where i = 8..15 + vmull.s16 q0, d16, d20 @ Q0 = p[i] = (x[i] * trns_coeff[i]) where i = 0..3 + vld1.32 {q9}, [r0]! @ Q8 = Source row 1 + vmull.s16 q1, d17, d21 @ Q1 = p[i] = (x[i] * trns_coeff[i]) where i = 4..7 + vmull.s16 q2, d18, d22 @ Q2 = p[i] = (x[i] * trns_coeff[i]) where i = 8..11 + vld1.32 {q13}, [r6]! @ Scaling factors row 2 + vmull.s16 q3, d19, d23 @ Q3 = p[i] = (x[i] * trns_coeff[i]) where i = 12..15 + vld1.32 {q14}, [r6]! @ Scaling factors row 3 + vshl.s32 q0, q0, q15 @ Q0 = q[i] = (p[i] << (qP/6)) where i = 0..3 + vld1.32 {q10}, [r5]! @ Q10 = Dequant values row 2 + vshl.s32 q1, q1, q15 @ Q1 = q[i] = (p[i] << (qP/6)) where i = 4..7 + vld1.32 {q8}, [r0]! @ Source Row 2 + vshl.s32 q2, q2, q15 @ Q2 = q[i] = (p[i] << (qP/6)) where i = 8..11 + vld1.32 {q11}, [r5]! @ Q11 = Dequant values row 3 + vshl.s32 q3, q3, q15 @ Q3 = q[i] = (p[i] << (qP/6)) where i = 12..15 + vld1.32 {q9}, [r0]! @ Source Row 3 + vmul.s16 q10, q10, q13 @ Dequant row2*scale matrix row 2 + vmul.s16 q11, q11, q14 @ Dequant row 3*scale matrix row 3 + vld1.32 {q4}, [r6]! @ Scaling factors row 4 + vqrshrn.s32 d0, q0, #0x6 @ D0 = c[i] = ((q[i] + 32) >> 6) where i = 0..3 + vqrshrn.s32 d1, q1, #0x6 @ D1 = c[i] = ((q[i] + 32) >> 6) where i = 4..7 + vld1.32 {q5}, [r6]! @ Scaling factors row 5 + vqrshrn.s32 d2, q2, #0x6 @ D2 = c[i] = ((q[i] + 32) >> 6) where i = 8..11 + vqrshrn.s32 d3, q3, #0x6 @ D3 = c[i] = ((q[i] + 32) >> 6) where i = 12..15 + vld1.32 {q13}, [r5]! @ Q13 = Dequant values row 4 + vmull.s16 q2, d16, d20 @ p[i] = (x[i] * trns_coeff[i]) where i=16..19 + vmull.s16 q3, d17, d21 @ p[i] = (x[i] * trns_coeff[i]) where i=20..23 + vld1.32 {q12}, [r5]! @ Q12 = Dequant values row 5 + vmull.s16 q6, d18, d22 @ p[i] = (x[i] * trns_coeff[i]) where i=24..27 + vmull.s16 q7, d19, d23 @ p[i] = (x[i] * trns_coeff[i]) where i=28..31 + + vld1.32 {q14}, [r0]! @ Source row 4 + vmul.s16 q10, q4, q13 @ Dequant row4*scale matrix row 4 + vmul.s16 q11, q5, q12 @ Dequant row5*scale matrix row 5 + vld1.32 {q9}, [r0]! @ Source row 5 + vshl.s32 q2, q2, q15 @ + vshl.s32 q3, q3, q15 @ + vld1.32 {q13}, [r6]! @ Scaling factors row 6 + vshl.s32 q6, q6, q15 @ + vshl.s32 q7, q7, q15 @ + vmull.s16 q4, d28, d20 @ i = 32..35 + vqrshrn.s32 d4, q2, #0x6 @ D4 = c[i] = ((q[i] + 32) >> 6) where i = 16..19 + vqrshrn.s32 d5, q3, #0x6 @ D5 = c[i] = ((q[i] + 32) >> 6) where i = 20..23 + vmull.s16 q5, d29, d21 @ i =36..39 + vld1.32 {q10}, [r5]! @ Dequant values row 6 + vqrshrn.s32 d6, q6, #0x6 @ D6 = c[i] = ((q[i] + 32) >> 6) where i = 24..27 + vqrshrn.s32 d7, q7, #0x6 @ D7 = c[i] = ((q[i] + 32) >> 6) where i = 28..31 + vld1.32 {q14}, [r6]! @ Scaling factors row 7 + vmull.s16 q6, d18, d22 @ + vld1.32 {q8}, [r0]! @ Source row 6 + vmull.s16 q7, d19, d23 @ + vld1.32 {q11}, [r5]! @ Dequant values row 7 + vshl.s32 q4, q4, q15 @ + vld1.32 {q9}, [r0]! @ Source row 7 + vshl.s32 q5, q5, q15 @ + + vshl.s32 q6, q6, q15 @ + vshl.s32 q7, q7, q15 @ + vmul.s16 q10, q10, q13 @ Dequant*scaling row 6 + vmul.s16 q11, q11, q14 @ Dequant*scaling row 7 + vqrshrn.s32 d8, q4, #0x6 @ D8 = c[i] = ((q[i] + 32) >> 6) where i = 32..35 + vqrshrn.s32 d9, q5, #0x6 @ D9 = c[i] = ((q[i] + 32) >> 6) where i = 36..39 + vqrshrn.s32 d10, q6, #0x6 @ D10 = c[i] = ((q[i] + 32) >> 6) where i = 40..43 + vqrshrn.s32 d11, q7, #0x6 @ D11 = c[i] = ((q[i] + 32) >> 6) where i = 44..47 + vmull.s16 q6, d16, d20 @ i= 48..51 + vmull.s16 q7, d17, d21 @ i= 52..55 + vmull.s16 q8, d18, d22 @ i=56..59 + vmull.s16 q9, d19, d23 @ i=60..63 + vshl.s32 q6, q6, q15 @ + vzip.s16 q0, q1 @Transpose + vshl.s32 q7, q7, q15 @ + vshl.s32 q8, q8, q15 @ + vzip.s16 q2, q3 @ + vshl.s32 q9, q9, q15 @ + vqrshrn.s32 d12, q6, #0x6 @ D12 = c[i] = ((q[i] + 32) >> 6) where i = 48..51 + vzip.s16 q4, q5 @Transpose + vqrshrn.s32 d13, q7, #0x6 @ D13 = c[i] = ((q[i] + 32) >> 6) where i = 52..55 + vqrshrn.s32 d14, q8, #0x6 @ D14 = c[i] = ((q[i] + 32) >> 6) where i = 56..59 + vzip.s32 q0, q2 @Transpose + vqrshrn.s32 d15, q9, #0x6 @ D15 = c[i] = ((q[i] + 32) >> 6) where i = 60..63 + +@========= PROCESS IDCT FROM HERE ======= + +@Steps for Stage 2: +@------------------ + +@ TRANSPOSE 8x8 coeffs to actual order + + vzip.s16 q6, q7 @ + + vzip.s32 q1, q3 @ + vzip.s32 q4, q6 @ + vzip.s32 q5, q7 @ + + vswp d1, d8 @ Q0/Q1 = Row order x0/x1 + vswp d3, d10 @ Q2/Q3 = Row order x2/x3 + vswp d5, d12 @ Q4/Q5 = Row order x4/x5 + vswp d7, d14 @ Q6/Q7 = Row order x6/x7 + + vswp q1, q4 @ + vshr.s16 q10, q2, #0x1 @ + vswp q3, q6 @ + +@Steps for Stage 1: +@------------------ + + vadd.s16 q8, q0, q4 @ Q8 = y0 + vsub.s16 q9, q0, q4 @ Q9 = y2 + + vsra.s16 q2, q6, #0x1 @ Q2 = y6 + vsub.s16 q6, q10, q6 @ Q6 = y4 + + vaddl.s16 q12, d14, d2 @ y3 (0-3) 1+7 + vaddl.s16 q13, d15, d3 @ y3 (4-7) 1+7 + + vsubl.s16 q10, d14, d2 @ y5 (0-3) 7-1 + vsubl.s16 q11, d15, d3 @ y5 (4-7) 7-1 + + vadd.s16 q0, q8, q2 @ Q0 = z0 + vsub.s16 q4, q8, q2 @ Q4 = z6 + + vadd.s16 q8, q9, q6 @ Q8 = z2 + vsub.s16 q2, q9, q6 @ Q2 = z4 + + vsubw.s16 q12, q12, d6 @ y3 (0-3) 1+7-3 + vsubw.s16 q13, q13, d7 @ y3 (0-7) 1+7-3 + + vshr.s16 q6, q3, #0x1 @ + + vaddw.s16 q10, q10, d10 @ + vaddw.s16 q11, q11, d11 @ + + vshr.s16 q9, q5, #0x1 @ + + vsubw.s16 q12, q12, d12 @ + vsubw.s16 q13, q13, d13 @ + + vaddw.s16 q10, q10, d18 @ + vaddw.s16 q11, q11, d19 @ + + vqmovn.s32 d12, q12 @ + vaddl.s16 q12, d10, d6 @ + vqmovn.s32 d13, q13 @ Q6 = y3 + vaddl.s16 q13, d11, d7 @ + vqmovn.s32 d18, q10 @ + vsubl.s16 q10, d10, d6 @ + vqmovn.s32 d19, q11 @ Q9 = y5 + vsubl.s16 q11, d11, d7 @ + + vshr.s16 q3, q6, #0x2 @ + + vsra.s16 q6, q9, #0x2 @ Q6 = z3 + + vaddw.s16 q12, q12, d2 @ + vaddw.s16 q13, q13, d3 @ + + vshr.s16 q1, #0x1 @ + + vsub.s16 q5, q3, q9 @ Q5 = z5 + + vsubw.s16 q10, q10, d14 @ + vsubw.s16 q11, q11, d15 @ + + vshr.s16 q7, #0x1 @ + + vaddw.s16 q12, q12, d2 @ + vaddw.s16 q13, q13, d3 @ + + vsubw.s16 q10, q10, d14 @ + vsubw.s16 q11, q11, d15 @ + + + vqmovn.s32 d14, q12 @ + vadd.s16 q1, q8, q5 @ Q1 = x1 + vqmovn.s32 d15, q13 @ Q7 = y7 + vsub.s16 q3, q8, q5 @ Q3 = x6 + vqmovn.s32 d18, q10 @ + vsub.s16 q5, q2, q6 @ Q5 = x5 + vqmovn.s32 d19, q11 @ Q9 = y1 + vadd.s16 q2, q2, q6 @ Q2 = x2 + + vshr.s16 q12, q9, #0x2 @ + vsra.s16 q9, q7, #0x2 @ Q9 = z1 + + vsub.s16 q11, q7, q12 @ Q11 = z7 + + vadd.s16 q6, q4, q9 @ Q6 = x3 + vsub.s16 q4, q4, q9 @ Q4 = x4 + + vsub.s16 q7, q0, q11 @ Q7 = x7 + vadd.s16 q0, q0, q11 @ Q0 = x0 + + vswp.s16 q3, q6 @ Q3 = x3, Q6 = x6 + + +@Steps for Stage 2: +@------------------ + +@ TRANSPOSE 8x8 coeffs to actual order + + vzip.s16 q0, q1 @ + vzip.s16 q2, q3 @ + vzip.s16 q4, q5 @ + vzip.s16 q6, q7 @ + + vzip.s32 q0, q2 @ + vzip.s32 q1, q3 @ + vzip.s32 q4, q6 @ + vzip.s32 q5, q7 @ + + vswp d1, d8 @ Q0/Q1 = Row order x0/x1 + vswp d3, d10 @ Q2/Q3 = Row order x2/x3 + vswp d5, d12 @ Q4/Q5 = Row order x4/x5 + vswp d7, d14 @ Q6/Q7 = Row order x6/x7 + + vswp q1, q4 @ + vshr.s16 q10, q2, #0x1 @ + vswp q3, q6 @ + +@Steps for Stage 3: +@------------------ + +@Repeat stage 1 again for vertical transform + + vadd.s16 q8, q0, q4 @ Q8 = y0 + vld1.32 d28, [r1], r3 @ Q12 = 0x070605....0x070605.... + vsub.s16 q9, q0, q4 @ Q9 = y2 + + vsra.s16 q2, q6, #0x1 @ Q2 = y6 + vsub.s16 q6, q10, q6 @ Q6 = y4 + + vaddl.s16 q12, d14, d2 @ + vld1.32 d29, [r1], r3 @ Q12 = 0x070605....0x070605.... + vaddl.s16 q13, d15, d3 @ + + vsubl.s16 q10, d14, d2 @ + vld1.32 d30, [r1], r3 @ Q12 = 0x070605....0x070605.... + vsubl.s16 q11, d15, d3 @ + + vadd.s16 q0, q8, q2 @ Q0 = z0 + vld1.32 d31, [r1], r3 @ Q12 = 0x070605....0x070605.... + vsub.s16 q4, q8, q2 @ Q4 = z6 + + vadd.s16 q8, q9, q6 @ Q8 = z2 + vsub.s16 q2, q9, q6 @ Q2 = z4 + + vsubw.s16 q12, q12, d6 @ + vsubw.s16 q13, q13, d7 @ + + vshr.s16 q6, q3, #0x1 @ + + vaddw.s16 q10, q10, d10 @ + vaddw.s16 q11, q11, d11 @ + + vshr.s16 q9, q5, #0x1 @ + + vsubw.s16 q12, q12, d12 @ + vsubw.s16 q13, q13, d13 @ + + vaddw.s16 q10, q10, d18 @ + vaddw.s16 q11, q11, d19 @ + + vqmovn.s32 d12, q12 @ + vaddl.s16 q12, d10, d6 @ + vqmovn.s32 d13, q13 @ Q6 = y3 + vaddl.s16 q13, d11, d7 @ + vqmovn.s32 d18, q10 @ + vsubl.s16 q10, d10, d6 @ + vqmovn.s32 d19, q11 @ Q9 = y5 + vsubl.s16 q11, d11, d7 @ + + vshr.s16 q3, q6, #0x2 @ + + vsra.s16 q6, q9, #0x2 @ Q6 = z3 + + vaddw.s16 q12, q12, d2 @ + vaddw.s16 q13, q13, d3 @ + + vshr.s16 q1, #0x1 @ + + vsub.s16 q5, q3, q9 @ Q5 = z5 + + vsubw.s16 q10, q10, d14 @ + vsubw.s16 q11, q11, d15 @ + + vshr.s16 q7, #0x1 @ + + vaddw.s16 q12, q12, d2 @ + vaddw.s16 q13, q13, d3 @ + + vsubw.s16 q10, q10, d14 @ + vsubw.s16 q11, q11, d15 @ + + vqmovn.s32 d14, q12 @ + vadd.s16 q1, q8, q5 @ Q1 = x1 + vqmovn.s32 d15, q13 @ Q7 = y7 + vsub.s16 q3, q8, q5 @ Q3 = x6 + vqmovn.s32 d18, q10 @ + vsub.s16 q5, q2, q6 @ Q5 = x5 + vqmovn.s32 d19, q11 @ Q9 = y1 + vadd.s16 q2, q2, q6 @ Q2 = x2 + + vshr.s16 q12, q9, #0x2 @ + vsra.s16 q9, q7, #0x2 @ Q9 = z1 + + vsub.s16 q11, q7, q12 @ Q11 = z7 + + vadd.s16 q6, q4, q9 @ Q6 = x3 + vsub.s16 q4, q4, q9 @ Q4 = x4 + + vsub.s16 q7, q0, q11 @ Q7 = x7 + vadd.s16 q0, q0, q11 @ Q0 = x0 + + vswp.s16 q3, q6 @ Q3 <-> Q6 + + vrshr.s16 q1, q1, #6 @ + vld1.32 d16, [r1], r3 @ Q12 = 0x070605....0x070605.... + vrshr.s16 q2, q2, #6 @ + vrshr.s16 q4, q4, #6 @ + vld1.32 d17, [r1], r3 @ Q12 = 0x070605....0x070605.... + vrshr.s16 q5, q5, #6 @ + vrshr.s16 q7, q7, #6 @ + vld1.32 d18, [r1], r3 @ Q12 = 0x070605....0x070605.... + vrshr.s16 q0, q0, #6 @ + vrshr.s16 q3, q3, #6 @ + vld1.32 d19, [r1], r3 @ Q12 = 0x070605....0x070605.... + vrshr.s16 q6, q6, #6 @ + +@ Code Added to pack sign and magnitudes + + vaddw.u8 q0, q0, d28 + vaddw.u8 q1, q1, d29 + vaddw.u8 q2, q2, d30 + vaddw.u8 q3, q3, d31 + vqmovun.s16 d0, q0 + vaddw.u8 q4, q4, d16 + vqmovun.s16 d1, q1 + vaddw.u8 q5, q5, d17 + vqmovun.s16 d2, q2 + vaddw.u8 q6, q6, d18 + vqmovun.s16 d3, q3 + vaddw.u8 q7, q7, d19 + + vqmovun.s16 d4, q4 + vst1.32 d0, [r2], r4 @ Magnitudes of 1st 4x4 block coeffs + vqmovun.s16 d5, q5 + vst1.32 d1, [r2], r4 @ Magnitudes of 1st 4x4 block coeffs + vqmovun.s16 d6, q6 + vst1.32 d2, [r2], r4 @ Magnitudes of 1st 4x4 block coeffs + vqmovun.s16 d7, q7 + vst1.32 d3, [r2], r4 @ Magnitudes of 1st 4x4 block coeffs + vst1.32 d4, [r2], r4 @ Magnitudes of 1st 4x4 block coeffs + + vst1.32 d5, [r2], r4 @ Magnitudes of 1st 4x4 block coeffs + + + vst1.32 d6, [r2], r4 @ Magnitudes of 1st 4x4 block coeffs + + + vst1.32 d7, [r2], r4 @ Magnitudes of 1st 4x4 block coeffs + +idct_8x8_end: + + vpop {d8-d15} + ldmfd sp!, {r4-r12, r15} + diff --git a/common/arm/ih264_iquant_itrans_recon_dc_a9.s b/common/arm/ih264_iquant_itrans_recon_dc_a9.s new file mode 100755 index 0000000..8d71bdb --- /dev/null +++ b/common/arm/ih264_iquant_itrans_recon_dc_a9.s @@ -0,0 +1,399 @@ +@/****************************************************************************** +@ * +@ * Copyright (C) 2015 The Android Open Source Project +@ * +@ * Licensed under the Apache License, Version 2.0 (the "License"); +@ * you may not use this file except in compliance with the License. +@ * You may obtain a copy of the License at: +@ * +@ * http://www.apache.org/licenses/LICENSE-2.0 +@ * +@ * Unless required by applicable law or agreed to in writing, software +@ * distributed under the License is distributed on an "AS IS" BASIS, +@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ * See the License for the specific language governing permissions and +@ * limitations under the License. +@ * +@ ***************************************************************************** +@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +@*/ +@/** +@ ******************************************************************************* +@ * @file +@ * ih264_iquant_itrans_recon_dc_a9.s +@ * +@ * @brief +@ * Contains function definitions for single stage inverse transform +@ * +@ * @author +@ * Mohit +@ * +@ * @par List of Functions: +@ * - ih264_iquant_itrans_recon_4x4_dc_a9() +@ * - ih264_iquant_itrans_recon_8x8_dc_a9() +@ * - ih264_iquant_itrans_recon_chroma_4x4_dc_a9() +@ * +@ * @remarks +@ * None +@ * +@ ******************************************************************************* +@*/ +@/** +@ ******************************************************************************* +@ * +@ * @brief +@ * This function performs inverse quant and Inverse transform type Ci4 for 4*4 block +@ * for dc input pattern only, i.e. only the (0,0) element of the input 4x4 block is +@ * non-zero. For complete function, refer ih264_iquant_itrans_recon_a9.s +@ * +@ * @par Description: +@ * Performs inverse transform Ci4 and adds the residue to get the +@ * reconstructed block +@ * +@ * @param[in] pi2_src +@ * Input 4x4 coefficients +@ * +@ * @param[in] pu1_pred +@ * Prediction 4x4 block +@ * +@ * @param[out] pu1_out +@ * Output 4x4 block +@ * +@ * @param[in] u4_qp_div_6 +@ * QP +@ * +@ * @param[in] pu2_weigh_mat +@ * Pointer to weight matrix +@ * +@ * @param[in] pred_strd, +@ * Prediction stride +@ * +@ * @param[in] out_strd +@ * Output Stride +@ * +@ *@param[in] pi2_tmp +@ * temporary buffer of size 1*16 +@ * +@ * @param[in] pu2_iscal_mat +@ * Pointer to the inverse quantization matrix +@ * +@ * @returns Void +@ * +@ * @remarks +@ * None +@ * +@ ******************************************************************************* +@ */ +@void ih264_iquant_itrans_recon_4x4_dc(WORD16 *pi2_src, +@ UWORD8 *pu1_pred, +@ UWORD8 *pu1_out, +@ WORD32 pred_strd, +@ WORD32 out_strd, +@ const UWORD16 *pu2_iscal_mat, +@ const UWORD16 *pu2_weigh_mat, +@ UWORD32 u4_qp_div_6, +@ WORD32 *pi4_tmp, +@ WORD32 iq_start_idx +@ WORD16 *pi2_dc_ld_addr) +@**************Variables Vs Registers***************************************** +@r0 => *pi2_src +@r1 => *pu1_pred +@r2 => *pu1_out +@r3 => pred_strd +@r4 => out_strd +@r5 => *pu2_iscal_mat +@r6 => *pu2_weigh_mat +@r7 => u4_qp_div_6 +@r9 => iq_start_idx +@unused => pi2_dc_ld_addr + +.text +.p2align 2 + + .global ih264_iquant_itrans_recon_4x4_dc_a9 + +ih264_iquant_itrans_recon_4x4_dc_a9: + +@Only one shift is done in horizontal inverse because, +@if u4_qp_div_6 is lesser than 4 then shift value will be neagative and do negative left shift, in this case rnd_factor has value +@if u4_qp_div_6 is greater than 4 then shift value will be positive and do left shift, here rnd_factor is 0 + + stmfd sp!, {r4-r10, r14} @stack stores the values of the arguments + ldr r5, [sp, #36] @Loads *pu2_iscal_mat + ldr r6, [sp, #40] @Loads *pu2_weigh_mat + ldrsh r8, [r0] @load pi2_src[0], SH for signed halfword load + ldrh r6, [r6] @load pu2_weight_mat[0] , H for unsigned halfword load + ldrh r5, [r5] @load pu2_iscal_mat[0] , H for unsigned halfword load +@=======================DEQUANT FROM HERE=================================== + mul r6, r6, r5 @pu2_iscal_mat[0]*pu2_weigh_mat[0] + ldr r7, [sp, #44] @Loads u4_qp_div_6 + mul r6, r6, r8 @pi2_src[0]*pu2_iscal_mat[0]*pu2_weigh_mat[0] + ldr r4, [sp, #32] @Loads out_strd + ldr r9, [sp, #52] @Loads iq_start_idx + + lsl r6, r6, r7 @(pi2_src[0]*pu2_iscal_mat[0]*pu2_weigh_mat[0])< intra case , so result of subtraction is zero and Z flag is set + ldreqsh r10, [r0] @ Loads signed halfword pi2_src[0], if r9==1 + moveq r6, r10 @ Restore dc value in case of intra, i.e. r9 == 1 + + add r6, r6, #32 @i_macro = q0 + 32 + asr r6, r6, #6 @i_macro >>6 = DC output of 2-stage transform + vdup.s16 q0, r6 @copy transform output to Q0 + + vld1.32 d30[0], [r1], r3 @I row Load pu1_pred buffer + + vld1.32 d30[1], [r1], r3 @II row Load pu1_pred buffer + + vld1.32 d31[0], [r1], r3 @III row Load pu1_pred buf + + vld1.32 d31[1], [r1], r3 @IV row Load pu1_pred buffer + vaddw.u8 q10, q0, d30 + + vaddw.u8 q11, q0, d31 + + vqmovun.s16 d0, q10 + + vst1.32 d0[0], [r2], r4 @I row store the value + vqmovun.s16 d1, q11 + vst1.32 d0[1], [r2], r4 @II row store the value + vst1.32 d1[0], [r2], r4 @III row store the value + vst1.32 d1[1], [r2] @IV row store the value + + ldmfd sp!, {r4-r10, r15} @Reload the registers from SP + + + + +@/* +@ ******************************************************************************* +@ * +@ * @brief +@ * This function performs inverse quant and Inverse transform type Ci4 for 8*8 block +@ * for dc input pattern only, i.e. only the (0,0) element of the input 8x8 block is +@ * non-zero. For complete function, refer ih264_iquant_itrans_recon_a9.s +@ * +@ * @par Description: +@ * Performs inverse transform Ci8 and adds the residue to get the +@ * reconstructed block +@ * +@ * @param[in] pi2_src +@ * Input 4x4 coefficients +@ * +@ * @param[in] pu1_pred +@ * Prediction 4x4 block +@ * +@ * @param[out] pu1_out +@ * Output 4x4 block +@ * +@ * @param[in] u4_qp_div_6 +@ * QP +@ * +@ * @param[in] pu2_weigh_mat +@ * Pointer to weight matrix +@ * +@ * @param[in] pred_strd, +@ * Prediction stride +@ * +@ * @param[in] out_strd +@ * Output Stride +@ * +@ *@param[in] pi2_tmp +@ * temporary buffer of size 1*64 +@ * +@ * @param[in] pu2_iscal_mat +@ * Pointer to the inverse quantization matrix +@ * +@ * @returns Void +@ * +@ * @remarks +@ * None +@ * +@ ******************************************************************************* +@ */ +@void ih264_iquant_itrans_recon_8x8_dc(WORD16 *pi2_src, +@ UWORD8 *pu1_pred, +@ UWORD8 *pu1_out, +@ WORD32 pred_strd, +@ WORD32 out_strd, +@ const UWORD16 *pu2_iscal_mat, +@ const UWORD16 *pu2_weigh_mat, +@ UWORD32 u4_qp_div_6, +@ WORD32 *pi4_tmp, +@ WORD32 iq_start_idx) +@**************Variables Vs Registers***************************************** +@r0 => *pi2_src +@r1 => *pu1_pred +@r2 => *pu1_out +@r3 => pred_strd +@r4 => out_strd +@r5 => *pu2_iscal_mat +@r6 => *pu2_weigh_mat +@r7 => u4_qp_div_6 + + + .global ih264_iquant_itrans_recon_8x8_dc_a9 +ih264_iquant_itrans_recon_8x8_dc_a9: + + stmfd sp!, {r4-r8, r14} @stack stores the values of the arguments + ldr r5, [sp, #28] @Loads *pu2_iscal_mat + ldr r6, [sp, #32] @Loads *pu2_weigh_mat + ldrsh r8, [r0] @load pi2_src[0], SH for signed halfword load + ldrh r6, [r6] @load pu2_weight_mat[0] , H for unsigned halfword load + ldrh r5, [r5] @load pu2_iscal_mat[0] , H for unsigned halfword load +@=======================DEQUANT FROM HERE=================================== + mul r6, r6, r5 @pu2_iscal_mat[0]*pu2_weigh_mat[0] + ldr r7, [sp, #36] @Loads u4_qp_div_6 + mul r6, r6, r8 @pi2_src[0]*pu2_iscal_mat[0]*pu2_weigh_mat[0] + ldr r4, [sp, #24] @Loads out_strd + + vpush {d8-d15} + lsl r6, r6, r7 @(pi2_src[0]*pu2_iscal_mat[0]*pu2_weigh_mat[0])<>6 = DC output of 2-stage transform + vdup.s16 q8, r6 @copy transform output to Q0 + + vld1.32 d24, [r1], r3 @ Q12 = 0x070605....0x070605.... + + vld1.32 d25, [r1], r3 @ Q12 = 0x070605....0x070605.... + + vld1.32 d26, [r1], r3 @ Q12 = 0x070605....0x070605.... + vaddw.u8 q0, q8, d24 + vld1.32 d27, [r1], r3 @ Q12 = 0x070605....0x070605.... + vaddw.u8 q1, q8, d25 + vld1.32 d28, [r1], r3 @ Q12 = 0x070605....0x070605.... + vaddw.u8 q2, q8, d26 + vld1.32 d29, [r1], r3 @ Q12 = 0x070605....0x070605.... + vaddw.u8 q3, q8, d27 + vld1.32 d30, [r1], r3 @ Q12 = 0x070605....0x070605.... + vaddw.u8 q4, q8, d28 + vld1.32 d31, [r1], r3 @ Q12 = 0x070605....0x070605.... + +@ Code Added to pack sign and magnitudes + + + vqmovun.s16 d0, q0 + vaddw.u8 q5, q8, d29 + vqmovun.s16 d1, q1 + vaddw.u8 q6, q8, d30 + vqmovun.s16 d2, q2 + vqmovun.s16 d3, q3 + vaddw.u8 q7, q8, d31 + vqmovun.s16 d4, q4 + vqmovun.s16 d5, q5 + vst1.32 d0, [r2], r4 @ Magnitudes of 1st 4x4 block coeffs + vqmovun.s16 d6, q6 + vst1.32 d1, [r2], r4 @ Magnitudes of 1st 4x4 block coeffs + vqmovun.s16 d7, q7 + vst1.32 d2, [r2], r4 @ Magnitudes of 1st 4x4 block coeffs + vst1.32 d3, [r2], r4 @ Magnitudes of 1st 4x4 block coeffs + vst1.32 d4, [r2], r4 @ Magnitudes of 1st 4x4 block coeffs + vst1.32 d5, [r2], r4 @ Magnitudes of 1st 4x4 block coeffs + vst1.32 d6, [r2], r4 @ Magnitudes of 1st 4x4 block coeffs + vst1.32 d7, [r2], r4 @ Magnitudes of 1st 4x4 block coeffs + + vpop {d8-d15} + ldmfd sp!, {r4-r8, r15} + + +@ /* +@ ******************************************************************************** +@ * +@ * @brief This function reconstructs a 4x4 sub block from quantized resiude and +@ * prediction buffer if only dc value is present for residue +@ * +@ * @par Description: +@ * The quantized residue is first inverse quantized, +@ * This inverse quantized content is added to the prediction buffer to recon- +@ * struct the end output +@ * +@ * @param[in] pi2_src +@ * quantized dc coeffiient +@ * +@ * @param[in] pu1_pred +@ * prediction 4x4 block in interleaved format +@ * +@ * @param[in] pred_strd, +@ * Prediction buffer stride in interleaved format +@ * +@ * @param[in] out_strd +@ * recon buffer Stride +@ * +@ * @returns none +@ * +@ * @remarks none +@ * +@ ******************************************************************************* +@ */ +@ void ih264_iquant_itrans_recon_chroma_4x4_dc(WORD16 *pi2_src, +@ UWORD8 *pu1_pred, +@ UWORD8 *pu1_out, +@ WORD32 pred_strd, +@ WORD32 out_strd, +@ const UWORD16 *pu2_iscal_mat, +@ const UWORD16 *pu2_weigh_mat, +@ UWORD32 u4_qp_div_6, +@ WORD16 *pi2_tmp, +@ WORD16 *pi2_dc_src) +@ Register Usage +@ r0 : pi2_src +@ r1 : pu1_pred +@ r2 : pu1_out +@ r3 : pred_strd +@ Neon registers d0-d7, d16-d30 are used +@ No need for pushing arm and neon registers + .global ih264_iquant_itrans_recon_chroma_4x4_dc_a9 +ih264_iquant_itrans_recon_chroma_4x4_dc_a9: + + ldr r0, [sp, #20] + vld1.s16 d0, [r0] @load pi2_dc_src + + ldr r0, [sp] @load out_strd + + vld2.s8 {d2, d3}, [r1], r3 @load pred plane 1 => d2 &pred palne 2 => d3 + vld2.s8 {d3, d4}, [r1], r3 + vrshr.s16 d0, d0, #6 @i_macro = ((q0 + 32) >> 6); + vld2.s8 {d4, d5}, [r1], r3 + vld2.s8 {d5, d6}, [r1], r3 + + vdup.s16 q0, d0[0] @duplicate pi2_sr[0] + mov r1, r2 @backup pu1_out + + vtrn.32 d2, d3 @mov the 4 coeffs of current block to d2 + vtrn.32 d4, d5 + + vmov.u16 q15, #0x00ff + + vld1.u8 d18, [r2], r0 @load out [8 bit size) -8 coeffs + vaddw.u8 q1, q0, d2 @Add pred + vld1.u8 d19, [r2], r0 + vaddw.u8 q2, q0, d4 + vld1.u8 d20, [r2], r0 + vld1.u8 d21, [r2], r0 + + vqmovun.s16 d2, q1 + vqmovun.s16 d4, q2 + + vmovl.u8 q1, d2 + vmovl.u8 q2, d4 + + vbit.u8 q9, q1, q15 + vbit.u8 q10, q2, q15 + + vst1.u8 d18, [r1], r0 @store out + vst1.u8 d19, [r1], r0 + vst1.u8 d20, [r1], r0 + vst1.u8 d21, [r1], r0 + + bx lr + + + + + + + diff --git a/common/arm/ih264_itrans_recon_a9.s b/common/arm/ih264_itrans_recon_a9.s new file mode 100755 index 0000000..1d74da5 --- /dev/null +++ b/common/arm/ih264_itrans_recon_a9.s @@ -0,0 +1,216 @@ +@/****************************************************************************** +@ * +@ * Copyright (C) 2015 The Android Open Source Project +@ * +@ * Licensed under the Apache License, Version 2.0 (the "License"); +@ * you may not use this file except in compliance with the License. +@ * You may obtain a copy of the License at: +@ * +@ * http://www.apache.org/licenses/LICENSE-2.0 +@ * +@ * Unless required by applicable law or agreed to in writing, software +@ * distributed under the License is distributed on an "AS IS" BASIS, +@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ * See the License for the specific language governing permissions and +@ * limitations under the License. +@ * +@ ***************************************************************************** +@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +@*/ +@/** +@ ******************************************************************************* +@ * @file +@ * ih264_itrans_recon_neon_a9.s +@ * +@ * @brief +@ * Contains function definitions for single stage inverse transform +@ * +@ * +@ * @par List of Functions: +@ * - ih264_itrans_recon_4x4_a9() +@ * +@ * @remarks +@ * None +@ * +@ ******************************************************************************* +@*/ +@/** +@ ******************************************************************************* +@ * +@ * @brief +@ * This function performs Inverse transform type Ci4 for 4*4 block +@ * +@ * @par Description: +@ * Performs inverse transform Ci4 and adds the residue to get the +@ * reconstructed block +@ * +@ * @param[in] pi16_levelBlock +@ * Input 4x4 coefficients +@ * +@ * @param[in] puc_predBuffer +@ * Prediction 4x4 block +@ * +@ * @param[out] puc_reconPic +@ * Output 4x4 block +@ * +@ * @param[in] ui16_picWidth +@ * Input stride +@ * +@ * @param[in] pred_strd +@ * Prediction stride +@ * +@ * @param[in] dst_strd +@ * Output Stride +@ * +@ * @param[in] zero_cols +@ * Zero columns in pi2_src +@ * +@ * @returns Void +@ * +@ * @remarks +@ * None +@ * +@ * +@ ******************************************************************************* +@ */ +@void ih264_itrans_recon_4x4( +@ WORD16 *pi2_src, +@ UWORD8 *pu1_pred, +@ UWORD8 *pu1_recon, +@ WORD32 src_strd, +@ WORD32 pred_strd, +@ WORD32 dst_strd, +@ UWORD32 q_lev, //quantizer level +@ WORD32 *pi4_tmp) +@**************Variables Vs Registers***************************************** +@r0 => *pi2_src +@r1 => *pu1_pred +@r2 => *pu1_recon +@r3 => src_strd +@r4 => pred_strd +@r5 => dst_strd +@r6 => q_lev +@r7 => *pi4_tmp + +.text +.p2align 2 + + + .global ih264_itrans_recon_4x4_a9 + +ih264_itrans_recon_4x4_a9: + stmfd sp!, {r4-r12, r14} @stack stores the values of the arguments + lsl r3, r3, #1 + + vld1.16 d0, [r0], r3 @0th row pi2_src_tmp[0] + ldr r4, [sp, #40] @Loads pred_strd + + vld1.16 d1, [r0], r3 @I row pi2_src_tmp[0] + ldr r5, [sp, #44] @Loads *dst_strd + + vld1.16 d2, [r0], r3 @II row pi2_src_tmp[0] + + vld1.16 d3, [r0] @III row pi2_src_tmp[0] + ldr r7, [sp, #52] @Loads *pi4_tmp + + vpush {d8-d15} + + vtrn.16 d0, d1 @Transpose to get all the 0th element in the single D register + vtrn.16 d2, d3 + vtrn.32 d0, d2 + vtrn.32 d1, d3 @D0 --> pi2_src_tmp[0], D1 --> pi2_src_tmp[1] + @D2 --> pi2_src_tmp[2], D3 --> pi2_src_tmp[3] + + vaddl.s16 q3, d0, d2 @x0 = (pi2_src_tmp[0] + pi2_src_tmp[2]) + vsubl.s16 q4, d0, d2 @x1 = (pi2_src_tmp[0] - pi2_src_tmp[2]) + vshr.s16 d4, d1, #1 @pi2_src_tmp[1] >> 1 + vshr.s16 d5, d3, #1 @pi2_src_tmp[3] >> 1 + + vsubl.s16 q5, d4, d3 @x2 = D_SHIFT(pi2_src_tmp[1],1,shft) - pi2_src_tmp[3] + + vaddl.s16 q6, d1, d5 @x3 = pi2_src_tmp[1] + D_SHIFT(pi2_src_tmp[3],1,shft) + + vadd.s32 q8, q4, q5 @x1 + x2 + vsub.s32 q9, q4, q5 @x1 - x2 + + vadd.s32 q7, q3, q6 @x0 + x3 + vsub.s32 q10, q3, q6 @x0 - x3 + + vtrn.32 q7, q8 @Transpose the register to have the adjacent values + + vtrn.32 q9, q10 + vadd.s32 d6, d14, d15 @x0(0,1) = (pi4_tblk[0,1] + pi4_tblk[8,9]) + + vsub.s32 d7, d14, d15 @x1(0,1) = (pi4_tblk[0,1] - pi4_tblk[8,9]) + + vshr.s32 d4, d16, #1 @pi4_tblk[4,5] >> 1 + vshr.s32 d5, d17, #1 @pi4_tblk[12,13] >> 1 + + vsub.s32 d8, d4, d17 @x2(0,1) = D_SHIFT(pi4_tblk[4,5],1,shft) - pi4_tblk[12,13] + vadd.s32 d9, d16, d5 @x3(0,1) = pi4_tblk[4,5] + D_SHIFT(pi4_tblk[12,13],1,shft) + + vadd.s32 d10, d18, d19 @x0(2,3) = (pi4_tblk[2,3] + pi4_tblk[10,11]) + vsub.s32 d11, d18, d19 @x1(2,3) = (pi4_tblk[2,3] - pi4_tblk[10,11]) + vshr.s32 d4, d20, #1 @pi4_tblk[6,7] >> 1 + vshr.s32 d5, d21, #1 @pi4_tblk[14,15] >> 1 + + vld1.32 d30[0], [r1], r4 @I row Load pu1_pred buffer + vsub.s32 d12, d4, d21 @x2(2,3) = D_SHIFT(pi4_tblk[6,7],1,shft) - pi4_tblk[14,15] + + vmovl.u8 q15, d30 @I row Convert 8 bit pred buffer to 16 bit + vadd.s32 d13, d20, d5 @x3(2,3) = pi4_tblk[6,7] + D_SHIFT(pi4_tblk[14,15],1,shft) + + vadd.s32 d16, d6, d9 @I row i_macro(0,1) = x0(0,1) + x3(0,1) + + vld1.32 d28[0], [r1], r4 @II row Load pu1_pred buffer + vadd.s32 d17, d10, d13 @I row i_macro(2,3) = x0(2,3) + x3(2,3) + + vqrshrn.s32 d16, q8, #6 @I row i_macro = D_SHIFT(i_macro,6,shft) + + vmovl.u8 q14, d28 @II row Convert 8 bit pred buffer to 16 bit + vadd.u16 d16, d16, d30 @I row i_macro += *pu1_pred_tmp + + vqmovun.s16 d16, q8 @I row CLIP_U8(i_macro) + vadd.s32 d18, d7, d8 @II row i_macro(0,1) = x1(0,1) + x2(0,1) + + vld1.32 d26[0], [r1], r4 @III row Load pu1_pred buffer + vadd.s32 d19, d11, d12 @II row i_macro(2,3) = x1(2,3) + x2(2,3) + + vqrshrn.s32 d18, q9, #6 @II row i_macro = D_SHIFT(i_macro,6,shft) + + vmovl.u8 q13, d26 @III row Convert 8 bit pred buffer to 16 bit + vadd.u16 d18, d18, d28 @II row i_macro += *pu1_pred_tmp + + vst1.32 d16[0], [r2], r5 @I row store the value + vsub.s32 d20, d7, d8 @III row i_macro(0,1) = x1(0,1) - x2(0,1) + + vqmovun.s16 d18, q9 @II row CLIP_U8(i_macro) + vsub.s32 d21, d11, d12 @III row i_macro(2,3) = x1(2,3) - x2(2,3) + + vld1.32 d24[0], [r1], r4 @IV row Load pu1_pred buffer + vqrshrn.s32 d20, q10, #6 @III row i_macro = D_SHIFT(i_macro,6,shft) + + vmovl.u8 q12, d24 @IV row Convert 8 bit pred buffer to 16 bit + vadd.u16 d20, d20, d26 @III row i_macro += *pu1_pred_tmp + + vqmovun.s16 d20, q10 @III row CLIP_U8(i_macro) + vsub.s32 d22, d6, d9 @IV row i_macro(0,1) = x0(0,1) - x3(0,1) + + vst1.32 d18[0], [r2], r5 @II row store the value + vsub.s32 d23, d10, d13 @IV row i_macro(2,3) = x0(2,3) - x3(2,3) + + vqrshrn.s32 d22, q11, #6 @IV row i_macro = D_SHIFT(i_macro,6,shft) + + vst1.32 d20[0], [r2], r5 @III row store the value + vadd.u16 d22, d22, d24 @IV row i_macro += *pu1_pred_tmp + + vqmovun.s16 d22, q11 @IV row CLIP_U8(i_macro) + vst1.32 d22[0], [r2], r5 @IV row store the value + + + vpop {d8-d15} + ldmfd sp!, {r4-r12, r15} @Reload the registers from SP + + + + diff --git a/common/arm/ih264_mem_fns_neon.s b/common/arm/ih264_mem_fns_neon.s new file mode 100755 index 0000000..2808897 --- /dev/null +++ b/common/arm/ih264_mem_fns_neon.s @@ -0,0 +1,268 @@ +@/****************************************************************************** +@ * +@ * Copyright (C) 2015 The Android Open Source Project +@ * +@ * Licensed under the Apache License, Version 2.0 (the "License"); +@ * you may not use this file except in compliance with the License. +@ * You may obtain a copy of the License at: +@ * +@ * http://www.apache.org/licenses/LICENSE-2.0 +@ * +@ * Unless required by applicable law or agreed to in writing, software +@ * distributed under the License is distributed on an "AS IS" BASIS, +@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ * See the License for the specific language governing permissions and +@ * limitations under the License. +@ * +@ ***************************************************************************** +@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +@*/ +@/** +@ ******************************************************************************* +@ * @file +@ * ih264_mem_fns_neon.s +@ * +@ * @brief +@ * Contains function definitions for memory manipulation +@ * +@ * @author +@ * Naveen SR +@ * +@ * @par List of Functions: +@ * - ih264_memcpy_mul_8_a9q() +@ * - ih264_memcpy_a9q() +@ * - ih264_memset_mul_8_a9q() +@ * - ih264_memset_a9q() +@ * - ih264_memset_16bit_mul_8_a9q() +@ * - ih264_memset_a9q() +@ * +@ * @remarks +@ * None +@ * +@ ******************************************************************************* +@*/ + +@/** +@******************************************************************************* +@* +@* @brief +@* memcpy of a 1d array +@* +@* @par Description: +@* Does memcpy of 8bit data from source to destination for 8,16 or 32 number of bytes +@* +@* @param[in] pu1_dst +@* UWORD8 pointer to the destination +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@* @param[in] num_bytes +@* number of bytes to copy +@* @returns +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ +@void ih264_memcpy_mul_8(UWORD8 *pu1_dst, +@ UWORD8 *pu1_src, +@ UWORD8 num_bytes) +@**************Variables Vs Registers************************* +@ r0 => *pu1_dst +@ r1 => *pu1_src +@ r2 => num_bytes + +.text +.p2align 2 + + + .global ih264_memcpy_mul_8_a9q + +ih264_memcpy_mul_8_a9q: + +loop_neon_memcpy_mul_8: + @ Memcpy 8 bytes + vld1.8 d0, [r1]! + vst1.8 d0, [r0]! + + subs r2, r2, #8 + bne loop_neon_memcpy_mul_8 + bx lr + + + +@******************************************************************************* +@*/ +@void ih264_memcpy(UWORD8 *pu1_dst, +@ UWORD8 *pu1_src, +@ UWORD8 num_bytes) +@**************Variables Vs Registers************************* +@ r0 => *pu1_dst +@ r1 => *pu1_src +@ r2 => num_bytes + + + + .global ih264_memcpy_a9q + +ih264_memcpy_a9q: + subs r2, #8 + blt memcpy +loop_neon_memcpy: + @ Memcpy 8 bytes + vld1.8 d0, [r1]! + vst1.8 d0, [r0]! + + subs r2, #8 + bge loop_neon_memcpy + cmp r2, #-8 + bxeq lr + +memcpy: + add r2, #8 + +loop_memcpy: + ldrb r3, [r1], #1 + strb r3, [r0], #1 + subs r2, #1 + bne loop_memcpy + bx lr + + + + +@void ih264_memset_mul_8(UWORD8 *pu1_dst, +@ UWORD8 value, +@ UWORD8 num_bytes) +@**************Variables Vs Registers************************* +@ r0 => *pu1_dst +@ r1 => value +@ r2 => num_bytes + + + + .global ih264_memset_mul_8_a9q + +ih264_memset_mul_8_a9q: + +@ Assumptions: numbytes is either 8, 16 or 32 + vdup.8 d0, r1 +loop_memset_mul_8: + @ Memset 8 bytes + vst1.8 d0, [r0]! + + subs r2, r2, #8 + bne loop_memset_mul_8 + + bx lr + + + + +@void ih264_memset(UWORD8 *pu1_dst, +@ UWORD8 value, +@ UWORD8 num_bytes) +@**************Variables Vs Registers************************* +@ r0 => *pu1_dst +@ r1 => value +@ r2 => num_bytes + + + + .global ih264_memset_a9q + +ih264_memset_a9q: + subs r2, #8 + blt memset + vdup.8 d0, r1 +loop_neon_memset: + @ Memcpy 8 bytes + vst1.8 d0, [r0]! + + subs r2, #8 + bge loop_neon_memset + cmp r2, #-8 + bxeq lr + +memset: + add r2, #8 + +loop_memset: + strb r1, [r0], #1 + subs r2, #1 + bne loop_memset + bx lr + + + + +@void ih264_memset_16bit_mul_8(UWORD16 *pu2_dst, +@ UWORD16 value, +@ UWORD8 num_words) +@**************Variables Vs Registers************************* +@ r0 => *pu2_dst +@ r1 => value +@ r2 => num_words + + + + .global ih264_memset_16bit_mul_8_a9q + +ih264_memset_16bit_mul_8_a9q: + +@ Assumptions: num_words is either 8, 16 or 32 + + @ Memset 8 words + vdup.16 d0, r1 +loop_memset_16bit_mul_8: + vst1.16 d0, [r0]! + vst1.16 d0, [r0]! + + subs r2, r2, #8 + bne loop_memset_16bit_mul_8 + + bx lr + + + + +@void ih264_memset_16bit(UWORD16 *pu2_dst, +@ UWORD16 value, +@ UWORD8 num_words) +@**************Variables Vs Registers************************* +@ r0 => *pu2_dst +@ r1 => value +@ r2 => num_words + + + + .global ih264_memset_16bit_a9q + +ih264_memset_16bit_a9q: + subs r2, #8 + blt memset_16bit + vdup.16 d0, r1 +loop_neon_memset_16bit: + @ Memset 8 words + vst1.16 d0, [r0]! + vst1.16 d0, [r0]! + + subs r2, #8 + bge loop_neon_memset_16bit + cmp r2, #-8 + bxeq lr + +memset_16bit: + add r2, #8 + +loop_memset_16bit: + strh r1, [r0], #2 + subs r2, #1 + bne loop_memset_16bit + bx lr + + + + diff --git a/common/arm/ih264_padding_neon.s b/common/arm/ih264_padding_neon.s new file mode 100755 index 0000000..9bab268 --- /dev/null +++ b/common/arm/ih264_padding_neon.s @@ -0,0 +1,646 @@ +@/****************************************************************************** +@ * +@ * Copyright (C) 2015 The Android Open Source Project +@ * +@ * Licensed under the Apache License, Version 2.0 (the "License"); +@ * you may not use this file except in compliance with the License. +@ * You may obtain a copy of the License at: +@ * +@ * http://www.apache.org/licenses/LICENSE-2.0 +@ * +@ * Unless required by applicable law or agreed to in writing, software +@ * distributed under the License is distributed on an "AS IS" BASIS, +@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ * See the License for the specific language governing permissions and +@ * limitations under the License. +@ * +@ ***************************************************************************** +@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +@*/ +@/** +@ ******************************************************************************* +@ * @file +@ * ih264_padding_neon.s +@ * +@ * @brief +@ * Contains function definitions padding +@ * +@ * @author +@ * Ittiam +@ * +@ * @par List of Functions: +@ * - ih264_pad_top_a9q() +@ * - ih264_pad_left_luma_a9q() +@ * - ih264_pad_left_chroma_a9q() +@ * - ih264_pad_right_luma_a9q() +@ * - ih264_pad_right_chroma_a9q() +@ * +@ * @remarks +@ * None +@ * +@ ******************************************************************************* +@*/ + + +@/** +@******************************************************************************* +@* +@* @brief pad at the top of a 2d array +@* +@* @par Description: +@* The top row of a 2d array is replicated for pad_size times at the top +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] wd +@* integer width of the array +@* +@* @param[in] pad_size +@* integer -padding size of the array +@* +@* @returns none +@* +@* @remarks none +@* +@******************************************************************************* +@*/ +@void ih264_pad_top(UWORD8 *pu1_src, +@ WORD32 src_strd, +@ WORD32 wd, +@ WORD32 pad_size) +@**************Variables Vs Registers************************* +@ r0 => *pu1_src +@ r1 => src_strd +@ r2 => wd +@ r3 => pad_size + +.text +.p2align 2 + + .global ih264_pad_top_a9q + +ih264_pad_top_a9q: + + stmfd sp!, {r4-r11, lr} @stack stores the values of the arguments + + sub r5, r0, r1 + rsb r6, r1, #0 + +loop_neon_memcpy_mul_16: + @ Load 16 bytes + vld1.8 {d0, d1}, [r0]! + mov r4, r5 + mov r7, r3 + add r5, r5, #16 + +loop_neon_pad_top: + vst1.8 {d0, d1}, [r4], r6 + subs r7, r7, #1 + bne loop_neon_pad_top + + subs r2, r2, #16 + bne loop_neon_memcpy_mul_16 + + ldmfd sp!, {r4-r11, pc} @Reload the registers from SP + + + + +@/** +@******************************************************************************* +@* +@* @brief +@* Padding (luma block) at the left of a 2d array +@* +@* @par Description: +@* The left column of a 2d array is replicated for pad_size times at the left +@* +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] ht +@* integer height of the array +@* +@* @param[in] wd +@* integer width of the array +@* +@* @param[in] pad_size +@* integer -padding size of the array +@* +@* @param[in] ht +@* integer height of the array +@* +@* @param[in] wd +@* integer width of the array +@* +@* @returns +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ +@#if PAD_LEFT_LUMA == C +@void ih264_pad_left_luma(UWORD8 *pu1_src, +@ WORD32 src_strd, +@ WORD32 ht, +@ WORD32 pad_size) +@**************Variables Vs Registers************************* +@ r0 => *pu1_src +@ r1 => src_strd +@ r2 => ht +@ r3 => pad_size + + + .global ih264_pad_left_luma_a9q + +ih264_pad_left_luma_a9q: + + stmfd sp!, {r4-r11, lr} @stack stores the values of the arguments + + + sub r4, r0, r3 + sub r6, r1, #16 + subs r5, r3, #16 + bne loop_32 +loop_16: @ /*hard coded for width=16 ,height =8,16*/ + ldrb r8, [r0], r1 + ldrb r9, [r0], r1 + vdup.u8 q0, r8 + ldrb r10, [r0], r1 + vst1.8 {q0}, [r4], r1 @ 16 bytes store + vdup.u8 q1, r9 + vst1.8 {q1}, [r4], r1 @ 16 bytes store + ldrb r11, [r0], r1 + vdup.u8 q2, r10 + vdup.u8 q3, r11 + vst1.8 {q2}, [r4], r1 @ 16 bytes store + ldrb r8, [r0], r1 + vst1.8 {q3}, [r4], r1 @ 16 bytes store + ldrb r9, [r0], r1 + vdup.u8 q0, r8 + ldrb r10, [r0], r1 + vst1.8 {q0}, [r4], r1 @ 16 bytes store + vdup.u8 q1, r9 + ldrb r11, [r0], r1 + vst1.8 {q1}, [r4], r1 @ 16 bytes store + vdup.u8 q2, r10 + vdup.u8 q3, r11 + subs r2, r2, #8 + vst1.8 {q2}, [r4], r1 @ 16 bytes store + vst1.8 {q3}, [r4], r1 @ 16 bytes store + bne loop_16 + b end_func + +loop_32: @ /*hard coded for width=32 ,height =8,16*/ + ldrb r8, [r0], r1 + ldrb r9, [r0], r1 + vdup.u8 q0, r8 + ldrb r10, [r0], r1 + vst1.8 {q0}, [r4]! @ 16 bytes store + vdup.u8 q1, r9 + vst1.8 {q0}, [r4], r6 + vst1.8 {q1}, [r4]! @ 16 bytes store + vdup.u8 q2, r10 + vst1.8 {q1}, [r4], r6 @ 16 bytes store + ldrb r11, [r0], r1 + vst1.8 {q2}, [r4]! @ 16 bytes store + vdup.u8 q3, r11 + vst1.8 {q2}, [r4], r6 @ 16 bytes store + ldrb r8, [r0], r1 + vst1.8 {q3}, [r4]! @ 16 bytes store + vdup.u8 q0, r8 + ldrb r9, [r0], r1 + vst1.8 {q3}, [r4], r6 @ 16 bytes store + ldrb r10, [r0], r1 + vst1.8 {q0}, [r4]! @ 16 bytes store + vdup.u8 q1, r9 + vst1.8 {q0}, [r4], r6 @ 16 bytes store + ldrb r11, [r0], r1 + vst1.8 {q1}, [r4]! @ 16 bytes store + vdup.u8 q2, r10 + vst1.8 {q1}, [r4], r6 @ 16 bytes store + vst1.8 {q2}, [r4]! @ 16 bytes store + vdup.u8 q3, r11 + vst1.8 {q2}, [r4], r6 @ 16 bytes store + subs r2, r2, #8 + vst1.8 {q3}, [r4]! @ 16 bytes store + vst1.8 {q3}, [r4], r6 @ 16 bytes store + bne loop_32 + + + +end_func: + ldmfd sp!, {r4-r11, pc} @Reload the registers from SP + + + + + +@/** +@******************************************************************************* +@* +@* @brief +@* Padding (chroma block) at the left of a 2d array +@* +@* @par Description: +@* The left column of a 2d array is replicated for pad_size times at the left +@* +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] ht +@* integer height of the array +@* +@* @param[in] wd +@* integer width of the array (each colour component) +@* +@* @param[in] pad_size +@* integer -padding size of the array +@* +@* @param[in] ht +@* integer height of the array +@* +@* @param[in] wd +@* integer width of the array +@* +@* @returns +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ +@#if PAD_LEFT_CHROMA == C +@void ih264_pad_left_chroma(UWORD8 *pu1_src, +@ WORD32 src_strd, +@ WORD32 ht, +@ WORD32 pad_size) +@{ +@ r0 => *pu1_src +@ r1 => src_strd +@ r2 => ht +@ r3 => pad_size + + + + .global ih264_pad_left_chroma_a9q + +ih264_pad_left_chroma_a9q: + + stmfd sp!, {r4-r11, lr} @stack stores the values of the arguments + + sub r4, r0, r3 + sub r6, r1, #16 + + +loop_32_l_c: @ /*hard coded for width=32 ,height =4,8,12*/ + ldrh r8, [r0], r1 + ldrh r9, [r0], r1 + vdup.u16 q0, r8 + ldrh r10, [r0], r1 + vst1.8 {q0}, [r4]! @ 16 bytes store + vdup.u16 q1, r9 + vst1.8 {q0}, [r4], r6 @ 16 bytes store + ldrh r11, [r0], r1 + vst1.8 {q1}, [r4]! @ 16 bytes store + vdup.u16 q2, r10 + vst1.8 {q1}, [r4], r6 @ 16 bytes store + vdup.u16 q3, r11 + vst1.8 {q2}, [r4]! @ 16 bytes store + vst1.8 {q2}, [r4], r6 @ 16 bytes store + subs r2, r2, #4 + vst1.8 {q3}, [r4]! @ 16 bytes store + vst1.8 {q3}, [r4], r6 @ 16 bytes store + + + beq end_func_l_c @/* Branching when ht=4*/ + + ldrh r8, [r0], r1 + ldrh r9, [r0], r1 + vdup.u16 q0, r8 + ldrh r10, [r0], r1 + vst1.8 {q0}, [r4]! @ 16 bytes store + vdup.u16 q1, r9 + vst1.8 {q0}, [r4], r6 + ldrh r11, [r0], r1 + vst1.8 {q1}, [r4]! @ 16 bytes store + vdup.u16 q2, r10 + vst1.8 {q1}, [r4], r6 @ 16 bytes store + vdup.u16 q3, r11 + vst1.8 {q2}, [r4]! @ 16 bytes store + vst1.8 {q2}, [r4], r6 @ 16 bytes store + subs r2, r2, #4 + vst1.8 {q3}, [r4]! @ 16 bytes store + vst1.8 {q3}, [r4], r6 @ 16 bytes store + + beq end_func_l_c @/* Branching when ht=8*/ + bne loop_32_l_c + + ldrh r8, [r0], r1 + ldrh r9, [r0], r1 + vdup.u16 q0, r8 + ldrh r10, [r0], r1 + vst1.8 {q0}, [r4]! @ 16 bytes store + vdup.u16 q1, r9 + vst1.8 {q0}, [r4], r6 + ldrh r11, [r0], r1 + vst1.8 {q1}, [r4]! @ 16 bytes store + vdup.u16 q2, r10 + vst1.8 {q1}, [r4], r6 @ 16 bytes store + vdup.u16 q3, r11 + vst1.8 {q2}, [r4]! @ 16 bytes store + vst1.8 {q2}, [r4], r6 @ 16 bytes store + vst1.8 {q3}, [r4]! @ 16 bytes store + vst1.8 {q3}, [r4], r6 @ 16 bytes store + +end_func_l_c: + ldmfd sp!, {r4-r11, pc} @Reload the registers from SP + + + + + +@/** +@******************************************************************************* +@* +@* @brief +@* Padding (luma block) at the right of a 2d array +@* +@* @par Description: +@* The right column of a 2d array is replicated for pad_size times at the right +@* +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] ht +@* integer height of the array +@* +@* @param[in] wd +@* integer width of the array +@* +@* @param[in] pad_size +@* integer -padding size of the array +@* +@* @param[in] ht +@* integer height of the array +@* +@* @param[in] wd +@* integer width of the array +@* +@* @returns +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ +@#if PAD_RIGHT_LUMA == C +@void ih264_pad_right_luma(UWORD8 *pu1_src, +@ WORD32 src_strd, +@ WORD32 ht, +@ WORD32 pad_size) +@{ +@ WORD32 row; +@ +@ for(row = 0; row < ht; row++) +@ { +@ memset(pu1_src, *(pu1_src -1), pad_size); +@ +@ pu1_src += src_strd; +@ } +@} +@ +@ r0 => *pu1_src +@ r1 => src_strd +@ r2 => ht +@ r3 => pad_size + + + + .global ih264_pad_right_luma_a9q + +ih264_pad_right_luma_a9q: + + stmfd sp!, {r4-r11, lr} @stack stores the values of the arguments + + mov r4, r0 + sub r6, r1, #16 + sub r0, r0, #1 + subs r5, r3, #16 + bne loop_32 +loop_16_r: @ /*hard coded for width=16 ,height =8,16*/ + ldrb r8, [r0], r1 + ldrb r9, [r0], r1 + vdup.u8 q0, r8 + ldrb r10, [r0], r1 + vst1.8 {q0}, [r4], r1 @ 16 bytes store + vdup.u8 q1, r9 + vst1.8 {q1}, [r4], r1 @ 16 bytes store + ldrb r11, [r0], r1 + vdup.u8 q2, r10 + vdup.u8 q3, r11 + vst1.8 {q2}, [r4], r1 @ 16 bytes store + ldrb r8, [r0], r1 + vst1.8 {q3}, [r4], r1 @ 16 bytes store + ldrb r9, [r0], r1 + vdup.u8 q0, r8 + ldrb r10, [r0], r1 + vst1.8 {q0}, [r4], r1 @ 16 bytes store + vdup.u8 q1, r9 + ldrb r11, [r0], r1 + vst1.8 {q1}, [r4], r1 @ 16 bytes store + vdup.u8 q2, r10 + vdup.u8 q3, r11 + subs r2, r2, #8 + vst1.8 {q2}, [r4], r1 @ 16 bytes store + vst1.8 {q3}, [r4], r1 @ 16 bytes store + bne loop_16_r + b end_func_r + +loop_32_r: @ /*hard coded for width=32 ,height =8,16*/ + ldrb r8, [r0], r1 + ldrb r9, [r0], r1 + vdup.u8 q0, r8 + ldrb r10, [r0], r1 + vst1.8 {q0}, [r4]! @ 16 bytes store + vdup.u8 q1, r9 + vst1.8 {q0}, [r4], r6 + vst1.8 {q1}, [r4]! @ 16 bytes store + vdup.u8 q2, r10 + vst1.8 {q1}, [r4], r6 @ 16 bytes store + ldrb r11, [r0], r1 + vst1.8 {q2}, [r4]! @ 16 bytes store + vdup.u8 q3, r11 + vst1.8 {q2}, [r4], r6 @ 16 bytes store + ldrb r8, [r0], r1 + vst1.8 {q3}, [r4]! @ 16 bytes store + ldrb r9, [r0], r1 + vdup.u8 q0, r8 + vst1.8 {q3}, [r4], r6 @ 16 bytes store + ldrb r10, [r0], r1 + vst1.8 {q0}, [r4]! @ 16 bytes store + vdup.u8 q1, r9 + vst1.8 {q0}, [r4], r6 @ 16 bytes store + ldrb r11, [r0], r1 + vst1.8 {q1}, [r4]! @ 16 bytes store + vdup.u8 q2, r10 + vst1.8 {q1}, [r4], r6 @ 16 bytes store + vst1.8 {q2}, [r4]! @ 16 bytes store + vdup.u8 q3, r11 + vst1.8 {q2}, [r4], r6 @ 16 bytes store + subs r2, r2, #8 + vst1.8 {q3}, [r4]! @ 16 bytes store + vst1.8 {q3}, [r4], r6 @ 16 bytes store + bne loop_32_r + + + +end_func_r: + ldmfd sp!, {r4-r11, pc} @Reload the registers from SP + + + + + +@/** +@******************************************************************************* +@* +@* @brief +@;* Padding (chroma block) at the right of a 2d array +@* +@* @par Description: +@* The right column of a 2d array is replicated for pad_size times at the right +@* +@* +@* @param[in] pu1_src +@;* UWORD8 pointer to the source +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] ht +@;* integer height of the array +@* +@* @param[in] wd +@* integer width of the array (each colour component) +@* +@* @param[in] pad_size +@* integer -padding size of the array +@* +@* @param[in] ht +@;* integer height of the array +@* +@* @param[in] wd +@* integer width of the array +@* +@* @returns +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ +@#if PAD_RIGHT_CHROMA == C +@void ih264_pad_right_chroma(UWORD8 *pu1_src, +@ WORD32 src_strd, +@ WORD32 ht, +@ WORD32 pad_size) +@ r0 => *pu1_src +@ r1 => src_strd +@ r2 => ht +@ r3 => pad_size + + + + .global ih264_pad_right_chroma_a9q + +ih264_pad_right_chroma_a9q: + + stmfd sp!, {r4-r11, lr} @stack stores the values of the arguments + + mov r4, r0 + sub r6, r1, #16 + sub r0, r0, #2 +loop_32_r_c: @ /*hard coded for width=32 ,height =8,4*/ + ldrh r8, [r0], r1 + ldrh r9, [r0], r1 + vdup.u16 q0, r8 + ldrh r10, [r0], r1 + vst1.8 {q0}, [r4]! @ 16 bytes store + vdup.u16 q1, r9 + vst1.8 {q0}, [r4], r6 + vst1.8 {q1}, [r4]! @ 16 bytes store + vdup.u16 q2, r10 + vst1.8 {q1}, [r4], r6 @ 16 bytes store + subs r2, r2, #4 + ldrh r11, [r0], r1 + vst1.8 {q2}, [r4]! @ 16 bytes store + vdup.u16 q3, r11 + vst1.8 {q2}, [r4], r6 @ 16 bytes store + vst1.8 {q3}, [r4]! @ 16 bytes store + vst1.8 {q3}, [r4], r6 @ 16 bytes store + + beq end_func_r_c @/* Branching when ht=4*/ + + ldrh r8, [r0], r1 + vdup.u16 q0, r8 + ldrh r9, [r0], r1 + ldrh r10, [r0], r1 + vst1.8 {q0}, [r4]! @ 16 bytes store + vdup.u16 q1, r9 + vst1.8 {q0}, [r4], r6 @ 16 bytes store + ldrh r11, [r0], r1 + vst1.8 {q1}, [r4]! @ 16 bytes store + vdup.u16 q2, r10 + vst1.8 {q1}, [r4], r6 @ 16 bytes store + vst1.8 {q2}, [r4]! @ 16 bytes store + vdup.u16 q3, r11 + vst1.8 {q2}, [r4], r6 @ 16 bytes store + subs r2, r2, #4 + vst1.8 {q3}, [r4]! @ 16 bytes store + vst1.8 {q3}, [r4], r6 @ 16 bytes store + + beq end_func_r_c @/* Branching when ht=8*/ + bne loop_32_r_c + + ldrh r8, [r0], r1 + vdup.u16 q0, r8 + ldrh r9, [r0], r1 + ldrh r10, [r0], r1 + vst1.8 {q0}, [r4]! @ 16 bytes store + vdup.u16 q1, r9 + vst1.8 {q0}, [r4], r6 @ 16 bytes store + ldrh r11, [r0], r1 + vst1.8 {q1}, [r4]! @ 16 bytes store + vdup.u16 q2, r10 + vst1.8 {q1}, [r4], r6 @ 16 bytes store + vst1.8 {q2}, [r4]! @ 16 bytes store + vdup.u16 q3, r11 + vst1.8 {q2}, [r4], r6 @ 16 bytes store + vst1.8 {q3}, [r4]! @ 16 bytes store + vst1.8 {q3}, [r4], r6 @ 16 bytes store + +end_func_r_c: + ldmfd sp!, {r4-r11, pc} @Reload the registers from SP + + + + + diff --git a/common/arm/ih264_platform_macros.h b/common/arm/ih264_platform_macros.h new file mode 100755 index 0000000..1f67403 --- /dev/null +++ b/common/arm/ih264_platform_macros.h @@ -0,0 +1,152 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ih264_platform_macros.h +* +* @brief +* Platform specific Macro definitions used in the codec +* +* @author +* Ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ +#ifndef _IHEVC_PLATFORM_MACROS_H_ +#define _IHEVC_PLATFORM_MACROS_H_ + +#ifndef ARMV8 +void ih264_arm_dsb(void); + +#define DATA_SYNC() ih264_arm_dsb() +static __inline WORD32 CLIP_U8(WORD32 x) +{ + asm("usat %0, #8, %1" : "=r"(x) : "r"(x)); + return x; +} + +static __inline WORD32 CLIP_S8(WORD32 x) +{ + asm("ssat %0, #8, %1" : "=r"(x) : "r"(x)); + return x; +} + +static __inline WORD32 CLIP_U10(WORD32 x) +{ + asm("usat %0, #10, %1" : "=r"(x) : "r"(x)); + return x; +} + +static __inline WORD32 CLIP_S10(WORD32 x) +{ + asm("ssat %0, #10, %1" : "=r"(x) : "r"(x)); + return x; +} + +static __inline WORD32 CLIP_U12(WORD32 x) +{ + asm("usat %0, #12, %1" : "=r"(x) : "r"(x)); + return x; +} + +static __inline WORD32 CLIP_S12(WORD32 x) +{ + asm("ssat %0, #12, %1" : "=r"(x) : "r"(x)); + return x; +} + +static __inline WORD32 CLIP_U16(WORD32 x) +{ + asm("usat %0, #16, %1" : "=r"(x) : "r"(x)); + return x; +} +static __inline WORD32 CLIP_S16(WORD32 x) +{ + asm("ssat %0, #16, %1" : "=r"(x) : "r"(x)); + return x; +} + + +static __inline UWORD32 ITT_BIG_ENDIAN(UWORD32 x) +{ + asm("rev %0, %1" : "=r"(x) : "r"(x)); + return x; +} +#else +#define DATA_SYNC() ; + +#define CLIP_U8(x) CLIP3(0, 255, (x)) +#define CLIP_S8(x) CLIP3(-128, 127, (x)) + +#define CLIP_U10(x) CLIP3(0, 1023, (x)) +#define CLIP_S10(x) CLIP3(-512, 511, (x)) + +#define CLIP_U12(x) CLIP3(0, 4095, (x)) +#define CLIP_S12(x) CLIP3(-2048, 2047, (x)) + +#define CLIP_U16(x) CLIP3(0, 65535, (x)) +#define CLIP_S16(x) CLIP3(-32768, 32767, (x)) + +#define ITT_BIG_ENDIAN(x) ((x & 0x000000ff) << 24) | \ + ((x & 0x0000ff00) << 8) | \ + ((x & 0x00ff0000) >> 8) | \ + ((UWORD32)x >> 24); +#endif + +#define SHL(x,y) (((y) < 32) ? ((x) << (y)) : 0) +#define SHR(x,y) (((y) < 32) ? ((x) >> (y)) : 0) + +#define SHR_NEG(val,shift) ((shift>0)?(val>>shift):(val<<(-shift))) +#define SHL_NEG(val,shift) ((shift<0)?(val>>(-shift)):(val< d0[1:31] d0[32:64] + @ a b c d # # # # + vld1.u8 d30, [r0], r3 @load 4 pixels of row1 current buffer + vld1.u8 d31, [r1], r4 @load 4 pixels of row1 pred buffer + @ data is like 1a -> q4[1:63] q4[64:148] + @ d8[1:63] d9[1:63] + @ a b c d # # # # + + vld1.u8 d28, [r0], r3 @load row 2 of src to d28[0] + vld1.u8 d29, [r1], r4 @load row2 of pred to d29[0] + + vld1.u8 d26, [r0], r3 @load row 3 of src to d26[0] + vsubl.u8 q0, d30, d31 @curr - pred for row one + + vld1.u8 d27, [r1], r4 @load row 3of pred t0 d27[0] + vsubl.u8 q1, d28, d29 @find row 2 of src -pred to d0 + + vld1.u8 d24, [r0], r3 @load row 4 of src to d24[0] + + vld1.u8 d25, [r1], r4 @load row 4 of src tp d25[0] + vsubl.u8 q2, d26, d27 @load src-pred row 3 to d[2] + + lsl r5, r5, #2 @ multiply dst stride by since we are storing 32 bit values + ldr r6, g_scal_coff_h264_4x4_addr +4x4lbl: + add r6, r6, pc @ load the address of global array + + vsubl.u8 q3, d24, d25 @load row 4 of src - pred to q6 + + @after this + @D0 -> 1a + @D2 -> 2a + @D4 -> 3a + @D6 -> 4a + + @transpose the matrix so that we can do the horizontal transform first + @#1 #2 #3 #4 + @a b c d ---- D0 + @e f g h -----D2 + @i j k l -----D4 + @m n o p -----D6 + @transpose the inner 2x2 blocks + vtrn.16 d0, d2 + vld1.s16 {q10}, [r6]! @ load the scaling values 0-7; + vtrn.16 d4, d6 + @a e c g + @b f d h + @i m k o + @j n l p + vtrn.32 d0, d4 + vtrn.32 d2, d6 + @a e i m #1 -- D0 --- x4 + @b f j n #2 -- D2 --- x5 + @c g k o #3 -- D4 ----x6 + @d h l p #4 -- D6 ----x7 + + @we have loaded the residuals into the registers , now we need to add and subtract them + @let us do the horiz transform first + + vsub.s16 d5, d2, d4 @x2 = x5-x6 + vsub.s16 d7, d0, d6 @x3 = x4-x7; + + vadd.s16 d3, d2, d4 @x1 = x5+x6 + vadd.s16 d1, d0, d6 @x0 = x4+x7 + + + vshl.s16 d31, d7, #1 @ + vshl.s16 d30, d5, #1 @ + + vadd.s16 d0, d1, d3 @x0 + x1; + vsub.s16 d4, d1, d3 @x0 - x1; + + vadd.s16 d2, d31, d5 @U_SHIFT(x3,1,shft) + x2; + vsub.s16 d6, d7, d30 @x3 - U_SHIFT(x2,1,shft); + + @taking transform again so as to make do vert transform + vtrn.16 d0, d2 + vtrn.16 d4, d6 + + vtrn.32 d0, d4 + vtrn.32 d2, d6 + + @let us do vertical transform + @same code as horiz + + vadd.s16 d1, d0, d6 @x0 = x4+x7 + vadd.s16 d3, d2, d4 @x1 = x5+x6 + vsub.s16 d7, d0, d6 @x3 = x4-x7; + vsub.s16 d5, d2, d4 @x2 = x5-x6 + + +@Since we are going to do scal / quant or whatever, we are going to divide by +@a 32 bit number. So we have to expand the values + + @VADDL.S16 Q12,D1,D3;x0 + x1 + @VSUBL.S16 Q14,D1,D3;x0 - x1 + + @VSHL.S16 D8,D5,#1; + @VSHL.S16 D9,D7,#1; + + @VADDL.S16 Q13,D9,D5 ; + x2 + @VSUBL.S16 Q15,D7,D8 ;x3 - U_SHIFT(x2,1,shft) + +@scaling follows + +@now we need to do the scaling,so load the scaling matrix +@mutliplying by the scaling coeffient; store the results from q5-q8 ; + + vadd.s16 d24, d3, d1 @x4 = x0 + x1 + vsub.s16 d28, d1, d3 @x6 = x0 - x1 + + vshl.s16 d0, d7, #1 @ U_SHIFT(x3,1,shft) + vmull.s16 q4, d24, d20 @x4*s0 + + vshl.s16 d2, d5, #1 @ U_SHIFT(x2,1,shft) + + vadd.s16 d26, d0, d5 @x5 = U_SHIFT(x3,1,shft) + x2 + vmull.s16 q5, d26, d21 @x5*s1 + + vst1.s32 {q4}, [r2], r5 @save 4 pixels of row1 current buffer and increment pointer by stride + + vld1.s16 {q10}, [r6] @load 8-16 scaling coeffcients + + vsub.s16 d30, d7, d2 @x7 = x3 - U_SHIFT(x2,1,shft) + + vmull.s16 q6, d28, d20 @x6*s2 + vst1.s32 {q5}, [r2], r5 + + vmull.s16 q7, d30, d21 @x7*s3 + + + vst1.s32 {q6}, [r2], r5 + vst1.s32 {q7}, [r2] + + pop {r4-r12, pc} @pop back all variables + + + + +@***************************************************************************** +@* Function Name : ih264_resi_trans_8x8_a9 +@* Description : This function does cf8 followd by an approximate normalization of H264 +@* +@* Arguments : +@* R0 :pointer to src buffer +@ R1 :pointer to pred buffer +@ R2 :pointer to dst buffer +@ R3 :src_stride +@ STACk :pred_stride,dst_st +@* +@* +@* Values Returned : NONE +@* +@* Register Usage : +@* Stack Usage : +@* Cycles : Around +@* Interruptiaility : Interruptable +@* +@* Known Limitations +@* \Assumptions : +@* +@* Revision History : +@* DD MM YYYY Author(s) Changes +@* 30 12 2009 100633 First version +@* +@***************************************************************************** + + + .global ih264_resi_trans_8x8_a9 + .extern g_scal_coff_h264_8x8 +g_scal_coff_h264_8x8_addr: + .long g_scal_coff_h264_8x8 - 8x8lbl - 8 + + +ih264_resi_trans_8x8_a9: + + @R0 :pointer to src buffer + @R1 :pointer to pred buffer + @R2 :pointer to dst buffer + @R3 :src_stride + @STACk :pred_stride,dst_stride + + push {r4-r12, lr} @push all the variables first + + mov r6, sp + add r6, r6, #40 @decrement stack pointer,to accomodate two variables + ldmfd r6, {r4-r5} @load the strides into registers + @R4 pred_stride + @R5 dst_stride + + @we have to give the stride as post inrement in vst1 + @in case of dst the stride represnts 16 bit ie 2*8bits + @hence we need to add #4 to it and thenm multiply by 2 + @--------------------function loading done------------------------ + + @lets find residual + @data is like 1a -> d0[1:31] d0[32:64] + @ a b c d # # # # + vld1.u8 d30, [r0], r3 @load 4 pixels of row1 current buffer + vld1.u8 d31, [r1], r4 @load 4 pixels of row1 pred buffer + + vld1.u8 d28, [r0], r3 @src rw2 + vld1.u8 d29, [r1], r4 @pred rw2 + vsubl.u8 q0, d30, d31 @src-pred rw1 + + vld1.u8 d26, [r0], r3 + vld1.u8 d27, [r1], r4 + vsubl.u8 q1, d28, d29 + + vld1.u8 d24, [r0], r3 + vld1.u8 d25, [r1], r4 + vsubl.u8 q2, d26, d27 + + vld1.u8 d22, [r0], r3 + vld1.u8 d23, [r1], r4 + vsubl.u8 q3, d24, d25 + + vld1.u8 d20, [r0], r3 + vld1.u8 d21, [r1], r4 + vsubl.u8 q4, d22, d23 + + vld1.u8 d18, [r0], r3 + vld1.u8 d19, [r1], r4 + vsubl.u8 q5, d20, d21 + + vld1.u8 d16, [r0], r3 + vld1.u8 d17, [r1], r4 + vsubl.u8 q6, d18, d19 + + lsl r5, r5, #2 + + + vsubl.u8 q7, d16, d17 + + @after this + @Q0 -> 1a + @Q1 -> 2a + @Q2 -> 3a + @Q3 -> 4a + @Q4 -> 5a + @Q5 -> 6a + @Q6 -> 7a + @Q7 -> 8a + + @transpose the matrix so that we can do the horizontal transform first + + @transpose the inner 2x2 blocks + vtrn.16 q0, q1 + vtrn.16 q2, q3 + vtrn.16 q4, q5 + vtrn.16 q6, q7 + + @transpose the inner 4x4 blocks + vtrn.32 q0, q2 + vtrn.32 q1, q3 + + vtrn.32 q4, q6 + vtrn.32 q5, q7 + + @transpose the outer 8x8 blocks + vswp d1, d8 + vswp d7, d14 + vswp d3, d10 + vswp d5, d12 + @transpose done + +@@this point we will have data in Q0-Q7 +@Q7 will be populated within 2 clock cycle +@all others are availabe @ this clock cycle + + @we have loaded the residuals into the registers , now we need to add and subtract them + @let us do the horiz transform first + + vadd.s16 q8, q0, q7 @ a0 = r0 + r7; + vadd.s16 q9, q1, q6 @ a1 = r1 + r6; + vadd.s16 q10, q2, q5 @ a2 = r2 + r5; + vadd.s16 q11, q3, q4 @ a3 = r3 + r4; + + vsub.s16 q12, q0, q7 @ b0 = r0 - r7; + vsub.s16 q13, q1, q6 @ b1 = r1 - r6; + vsub.s16 q15, q3, q4 @ b3 = r3 - r4; + vsub.s16 q14, q2, q5 @ b2 = r2 - r5; + + vadd.s16 q1, q8, q11 @ a4 = a0 + a3; + vadd.s16 q3, q9, q10 @ a5 = a1 + a2; + vsub.s16 q7, q9, q10 @ a7 = a1 - a2; + vsub.s16 q5, q8, q11 @ a6 = a0 - a3; + + ldr r6, g_scal_coff_h264_8x8_addr +8x8lbl: + add r6, r6, pc @ load the address of global array + + vadd.s16 q0, q1, q3 @ pi2_res[0] = a4 + a5; + vshr.s16 q8, q7, #1 @ pi2_res[2] = a6 + D_SHIFT(a7,1,shft); + + vsub.s16 q4, q1, q3 @ pi2_res[4] = a4 - a5; + + vadd.s16 q2, q5, q8 @ + + + vshr.s16 q9, q5, #1 @ pi2_res[6] = D_SHIFT(a6,1,shft) - a7; + vsub.s16 q6, q9, q7 @ + +@do not change Q0,Q2.Q4,Q6 they contain results +@Q1,Q3,Q5,Q7 TO STORE RESULTS +@Q8 Q9 Q10 Q11 USE @WILL + + vshr.s16 q1, q12, #1 @ D_SHIFT(b0,1,shft) + vshr.s16 q3, q13, #1 @ D_SHIFT(b1,1,shft) + vshr.s16 q5, q14, #1 @ D_SHIFT(b2,1,shft) + vshr.s16 q7, q15, #1 @ D_SHIFT(b3,1,shft) + + vadd.s16 q8, q1, q12 @ (D_SHIFT(b0,1,shft) + b0); + vadd.s16 q9, q3, q13 @ (D_SHIFT(b1,1,shft) + b1); + vadd.s16 q10, q5, q14 @ (D_SHIFT(b2,1,shft) + b2); + vadd.s16 q11, q7, q15 @ (D_SHIFT(b3,1,shft) + b3); + + vadd.s16 q1, q14, q8 @ b2 + (D_SHIFT(b0,1,shft) + b0); + vsub.s16 q5, q15, q9 @ b3 - (D_SHIFT(b1,1,shft) + b1); + vadd.s16 q3, q15, q10 @ b3 + (D_SHIFT(b2,1,shft) + b2); + vsub.s16 q7, q11, q14 @ -b2 + (D_SHIFT(b3,1,shft) + b3); + + vadd.s16 q8, q13, q1 @ b4 = b1 + b2 + (D_SHIFT(b0,1,shft) + b0); + vsub.s16 q9, q12, q3 @ b5 = b0 - b3 - (D_SHIFT(b2,1,shft) + b2); + vadd.s16 q10, q12, q5 @ b6 = b0 + b3 - (D_SHIFT(b1,1,shft) + b1); + vadd.s16 q11, q13, q7 @ b7 = b1 - b2 + (D_SHIFT(b3,1,shft) + b3); + + vshr.s16 q15, q8, #2 @ D_SHIFT(b4,2,shft) + vshr.s16 q14, q9, #2 @ D_SHIFT(b5,2,shft); + vshr.s16 q13, q10, #2 @ D_SHIFT(b6,2,shft); + vshr.s16 q12, q11, #2 @ D_SHIFT(b7,2,shft); + + + vadd.s16 q3, q9, q13 @ pi2_res[3] = b5 + D_SHIFT(b6,2,shft); + vsub.s16 q5, q10, q14 @ pi2_res[5] = b6 - D_SHIFT(b5,2,shft); + vadd.s16 q1, q8, q12 @ pi2_res[1] = b4 + D_SHIFT(b7,2,shft); + vsub.s16 q7, q15, q11 @ pi2_res[7] = D_SHIFT(b4,2,shft) - b7; + + @------------horiz transform done------------------------- + @results are in Q0-Q7 + @all other neon registes can be used at will + +@doing vertical transform +@code exact copy of horiz transform above + + @transpose the inner 2x2 blocks + vtrn.16 q0, q1 + vtrn.16 q2, q3 + vtrn.16 q4, q5 + vtrn.16 q6, q7 + + @transpose the inner 4x4 blocks + vtrn.32 q0, q2 + vtrn.32 q1, q3 + + vtrn.32 q4, q6 + vtrn.32 q5, q7 + + @transpose the outer 8x8 blocks + vswp d1, d8 + vswp d3, d10 + vswp d5, d12 + vswp d7, d14 + + @transpose done + + vadd.s16 q8, q0, q7 @ a0 = r0 + r7; + vadd.s16 q9, q1, q6 @ a1 = r1 + r6; + vadd.s16 q10, q2, q5 @ a2 = r2 + r5; + vadd.s16 q11, q3, q4 @ a3 = r3 + r4; + + vsub.s16 q12, q0, q7 @ b0 = r0 - r7; + vsub.s16 q13, q1, q6 @ b1 = r1 - r6; + vsub.s16 q14, q2, q5 @ b2 = r2 - r5; + vsub.s16 q15, q3, q4 @ b3 = r3 - r4; + + vadd.s16 q1, q8, q11 @ a4 = a0 + a3; + vadd.s16 q3, q9, q10 @ a5 = a1 + a2; + vsub.s16 q5, q8, q11 @ a6 = a0 - a3; + vsub.s16 q7, q9, q10 @ a7 = a1 - a2; + + + vadd.s16 q0, q1, q3 @ pi2_res[0] = a4 + a5; + + vshr.s16 q8, q7, #1 @ pi2_res[2] = a6 + D_SHIFT(a7,1,shft); + @DSHIFT_TO_0 Q8,Q7,#1,#0 + vadd.s16 q2, q5, q8 @ + + vsub.s16 q4, q1, q3 @ pi2_res[4] = a4 - a5; + + vshr.s16 q9, q5, #1 @ pi2_res[6] = D_SHIFT(a6,1,shft) - a7; + vsub.s16 q6, q9, q7 @ + +@do not change Q0,Q2.Q4,Q6 they contain results +@Q1,Q3,Q5,Q7 TO STORE RESULTS +@Q8 Q9 Q10 Q11 USE @WILL + + vshr.s16 q1, q12, #1 @ D_SHIFT(b0,1,shft) + vshr.s16 q3, q13, #1 @ D_SHIFT(b1,1,shft) + vshr.s16 q5, q14, #1 @ D_SHIFT(b2,1,shft) + vshr.s16 q7, q15, #1 @ D_SHIFT(b3,1,shft) + + + vadd.s16 q8, q1, q12 @ (D_SHIFT(b0,1,shft) + b0); + vadd.s16 q9, q3, q13 @ (D_SHIFT(b1,1,shft) + b1); + vadd.s16 q10, q5, q14 @ (D_SHIFT(b2,1,shft) + b2); + vadd.s16 q11, q7, q15 @ (D_SHIFT(b3,1,shft) + b3); + + vadd.s16 q1, q14, q8 @ b2 + (D_SHIFT(b0,1,shft) + b0); + vadd.s16 q3, q15, q10 @ b3 + (D_SHIFT(b2,1,shft) + b2); + vsub.s16 q5, q15, q9 @ b3 - (D_SHIFT(b1,1,shft) + b1); + vsub.s16 q7, q11, q14 @ -b2 + (D_SHIFT(b3,1,shft) + b3); + + vadd.s16 q8, q13, q1 @ b4 = b1 + b2 + (D_SHIFT(b0,1,shft) + b0); + vsub.s16 q9, q12, q3 @ b5 = b0 - b3 - (D_SHIFT(b2,1,shft) + b2); + vadd.s16 q10, q12, q5 @ b6 = b0 + b3 - (D_SHIFT(b1,1,shft) + b1); + vadd.s16 q11, q13, q7 @ b7 = b1 - b2 + (D_SHIFT(b3,1,shft) + b3); + + vshr.s16 q15, q8, #2 @ D_SHIFT(b4,2,shft) + vshr.s16 q14, q9, #2 @ D_SHIFT(b5,2,shft); + vshr.s16 q13, q10, #2 @ D_SHIFT(b6,2,shft); + vshr.s16 q12, q11, #2 @ D_SHIFT(b7,2,shft); + + +@since we are going to scal by small values, we need not expand the guys to 32 bit bit values + vsub.s16 q5, q10, q14 @ pi2_res[5] = b6 - D_SHIFT(b5,2,shft); + vsub.s16 q7, q15, q11 @ pi2_res[7] = D_SHIFT(b4,2,shft) - b7; + vadd.s16 q3, q9, q13 @ pi2_res[3] = b5 + D_SHIFT(b6,2,shft); + vadd.s16 q1, q8, q12 @ pi2_res[1] = b4 + D_SHIFT(b7,2,shft); + + @------------vert transform done------------------------- + @results are in Q0-Q7 + @all other neon registes can be used at will + + @scaling + @since the 8x8 scaling matrix repeats in 1x4,1x4 block , + @we need only load 4 values for each row and in total 4 rows + vld1.s16 {q14-q15}, [r6] @ + + @since we need to get a 32 bit o/p for two 16 bit multiplications + @we need a VMULL instruction +@-----------------------------first and second row + + vmull.s16 q8, d0, d28 @scale the first row first 4 elem + vmull.s16 q9, d28, d1 @scale the second row last 4 elemts + + vmull.s16 q10, d2, d29 @ scale second row first 4 elem + vmull.s16 q11, d29, d3 @scale the second row last 4 elem + vmull.s16 q12, d4, d30 @scale third row first 4 elem + + vst1.s32 {q8, q9}, [r2], r5 @ write the first row complete + + vmull.s16 q13, d30, d5 @scale the third row last 4 elem + vmull.s16 q8, d6, d31 @scale the fourth row first 4 elem + + + vst1.s32 {q10, q11}, [r2], r5 @store the second row complete + +@------------------------------- 3rd and 4th row + + vmull.s16 q9, d31, d7 @scale the fourth row second column + + vst1.s32 {q12, q13}, [r2], r5 @store the third row complete + + vmull.s16 q10, d8, d28 @scale the 5th row fisrst 4 elms + vmull.s16 q11, d28, d9 @scale the 5th row second 4 elems + + vmull.s16 q12, d10, d29 @scale the 6th row first4 elements + + + vst1.s32 {q8, q9}, [r2], r5 @store fifth row + +@--------------------------------5th and 6th row + + vmull.s16 q13, d29, d11 @scale 6th row sendond 4 elems + + vmull.s16 q8, d12, d30 @scale 7th rw first 4 elms + + vst1.s32 {q10, q11}, [r2], r5 @store 6th row second 4 elements + + vmull.s16 q9, d30, d13 @scale 7th rw second 4 elms + vmull.s16 q10, d14, d31 @scale 8th rw forst 4 elms + + + vst1.s32 {q12, q13}, [r2], r5 @store 6th row + +@----------------------------------7th and 8th row + vmull.s16 q11, d31, d15 @scale 8th row second 4 elms + + vst1.s32 {q8, q9}, [r2], r5 @store 7th row + vst1.s32 {q10, q11}, [r2], r5 @store 8th row + +@----------------------------------done writing + + pop {r4-r12, pc} @pop back all variables + + + + + + diff --git a/common/arm/ih264_resi_trans_quant_a9.s b/common/arm/ih264_resi_trans_quant_a9.s new file mode 100755 index 0000000..caf362e --- /dev/null +++ b/common/arm/ih264_resi_trans_quant_a9.s @@ -0,0 +1,694 @@ +@/****************************************************************************** +@ * +@ * Copyright (C) 2015 The Android Open Source Project +@ * +@ * Licensed under the Apache License, Version 2.0 (the "License"); +@ * you may not use this file except in compliance with the License. +@ * You may obtain a copy of the License at: +@ * +@ * http://www.apache.org/licenses/LICENSE-2.0 +@ * +@ * Unless required by applicable law or agreed to in writing, software +@ * distributed under the License is distributed on an "AS IS" BASIS, +@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ * See the License for the specific language governing permissions and +@ * limitations under the License. +@ * +@ ***************************************************************************** +@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +@*/ +@/** +@******************************************************************************* +@* @file +@* ih264_resi_trans_quant_a9.s +@* +@* @brief +@* Contains function definitions for residual and forward trans +@* +@* @author +@* Ittiam +@* +@* @par List of Functions: +@* ih264_resi_trans_quant_4x4_a9 +@* ih264_resi_trans_quant_8x8_a9 +@* ih264_resi_trans_quant_chroma_4x4_a9 +@* ih264_hadamard_quant_4x4_a9 +@* ih264_hadamard_quant_2x2_uv_a9 +@* +@* @remarks +@* None +@* +@******************************************************************************* + + +.text +.p2align 2 +@***************************************************************************** +@* +@* Function Name : ih264_resi_trans_quant_4x4_a9 +@* Description : This function does cf4 of H264 +@* +@* Arguments : R0 :pointer to src buffer +@ R1 :pointer to pred buffer +@ R2 :pointer to dst buffer +@ R3 :source stride +@ STACK : pred stride, +@ dst stride, +@ pointer to scaling matrix, +@ pointer to threshold matrix, +@ qbits, +@ rounding factor, +@ pointer to store nnz +@ pointer to store non quantized dc value +@ Values Returned : NONE +@ +@ Register Usage : +@ Stack Usage : 40 bytes +@ Cycles : Around +@ Interruptiaility : Interruptable +@ +@ Known Limitations +@ \Assumptions : +@ +@ Revision History : +@ DD MM YYYY Author(s) Changes +@ 1 12 2013 100633 First version +@ 20 1 2014 100633 Changes the API, Optimization +@ +@***************************************************************************** + + .global ih264_resi_trans_quant_4x4_a9 +ih264_resi_trans_quant_4x4_a9: + + @R0 :pointer to src buffer + @R1 :pointer to pred buffer + @R2 :pointer to dst buffer + @R3 :Source stride + @STACk :pred stride + @ :scale matirx, + @ :threshold matrix + @ :qbits + @ :round factor + @ :nnz + + push {r4-r12, lr} @push all the variables first + + add r11, sp, #40 @decrement stack pointer,to accomodate two variables + ldmfd r11, {r4-r10} @load the strides into registers + + @R0 :pointer to src buffer + @R1 :pointer to pred buffer + @R2 :pointer to dst buffer + @R3 :Source stride + @R4 :Pred stride + @R5 :scale matirx, + @R6 :threshold matrix + @R7 :qbits + @R8 :round factor + @R9 :nnz + + vpush {d8-d15} + + mov r11, #0 + sub r7, r11, r7 @Negate the qbit value for usiing LSL + + @------------Fucntion Loading done----------------; + + vld1.u8 d30, [r0], r3 @load first 8 pix src row 1 + + vld1.u8 d31, [r1], r4 @load first 8 pix pred row 1 + + vld1.u8 d28, [r0], r3 @load first 8 pix src row 2 + + vld1.u8 d29, [r1], r4 @load first 8 pix pred row 2 + + vld1.u8 d26, [r0], r3 @load first 8 pix src row 3 + + vld1.u8 d27, [r1], r4 @load first 8 pix pred row 3 + vsubl.u8 q0, d30, d31 @find residue row 1 + + vld1.u8 d24, [r0], r3 @load first 8 pix src row 4 + + vld1.u8 d25, [r1], r4 @load first 8 pix pred row 4 + vsubl.u8 q1, d28, d29 @find residue row 2 + + vsubl.u8 q2, d26, d27 @find residue row 3 + vsubl.u8 q3, d24, d25 @find residue row 4 + + vtrn.16 d0, d2 @T12 + vtrn.16 d4, d6 @T23 + vtrn.32 d0, d4 @T13 + vtrn.32 d2, d6 @T14 + + vadd.s16 d8 , d0, d6 @x0 = x4+x7 + vadd.s16 d9 , d2, d4 @x1 = x5+x6 + vsub.s16 d10, d2, d4 @x2 = x5-x6 + vsub.s16 d11, d0, d6 @x3 = x4-x7 + + vshl.s16 d12, d10, #1 @U_SHIFT(x2,1,shft) + vshl.s16 d13, d11, #1 @U_SHIFT(x3,1,shft) + + vadd.s16 d14, d8, d9 @x4 = x0 + x1; + vsub.s16 d16, d8, d9 @x6 = x0 - x1; + vadd.s16 d15, d13, d10 @x5 = U_SHIFT(x3,1,shft) + x2; + vsub.s16 d17, d11, d12 @x7 = x3 - U_SHIFT(x2,1,shft); + + @taking transpose again so as to make do vert transform + vtrn.16 d14, d15 @T12 + vtrn.16 d16, d17 @T23 + vtrn.32 d14, d16 @T13 + vtrn.32 d15, d17 @T24 + + @let us do vertical transform + @same code as horiz + vadd.s16 d18, d14, d17 @x0 = x4+x7 + vadd.s16 d19, d15, d16 @x1 = x5+x6 + vsub.s16 d20, d15, d16 @x2 = x5-x6 + vsub.s16 d21, d14, d17 @x3 = x4-x7 + + vshl.s16 d22, d20, #1 @U_SHIFT(x2,1,shft) + vshl.s16 d23, d21, #1 @U_SHIFT(x3,1,shft) + + vdup.s32 q4, r8 @Load rounding value row 1 + + vadd.s16 d24, d18, d19 @x5 = x0 + x1; + vsub.s16 d26, d18, d19 @x7 = x0 - x1; + vadd.s16 d25, d23, d20 @x6 = U_SHIFT(x3,1,shft) + x2; + vsub.s16 d27, d21, d22 @x8 = x3 - U_SHIFT(x2,1,shft); + vdup.s32 q10, r7 @Load qbit values + + vst1.s16 d24[0], [r10] @Store the dc value to alternate dc sddress + +@core tranform is done for 4x8 block 1 + vld1.s16 {q14-q15}, [r5] @load the scaling values + + vabs.s16 q0, q12 @Abs val of row 1 blk 1 + + vabs.s16 q1, q13 @Abs val of row 2 blk 1 + + vmov.s32 q5, q4 @copy round fact for row 2 + + vmov.s32 q6, q4 @copy round fact for row 2 + vclt.s16 q2, q12, #0 @Get the sign of row 1 blk 1 + + vmov.s32 q7, q4 @copy round fact for row 2 + vclt.s16 q3, q13, #0 @Get the sign of row 2 blk 1 + + vmlal.s16 q4, d0, d28 @Multiply and add row 1 + vmlal.s16 q5, d1, d29 @Multiply and add row 2 + vmlal.s16 q6, d2, d30 @Multiply and add row 3 + vmlal.s16 q7, d3, d31 @Multiply and add row 4 + + vshl.s32 q11, q4, q10 @Shift row 1 + vshl.s32 q12, q5, q10 @Shift row 2 + vshl.s32 q13, q6, q10 @Shift row 3 + vshl.s32 q14, q7, q10 @Shift row 4 + + vmovn.s32 d30, q11 @Narrow row 1 + vmovn.s32 d31, q12 @Narrow row 2 + vmovn.s32 d0 , q13 @Narrow row 3 + vmovn.s32 d1 , q14 @Narrow row 4 + + vneg.s16 q1, q15 @Get negative + vneg.s16 q4, q0 @Get negative + + vceq.s16 q5, q15, #0 @I compare with zero row 1 and 2 blk 1 + vceq.s16 q6, q0 , #0 @I compare with zero row 1 and 2 blk 1 + + vbsl.s16 q2, q1, q15 @Restore sign of row 1 and 2 + vbsl.s16 q3, q4, q0 @Restore sign of row 3 and 4 + + + vmovn.u16 d14, q5 @I Narrow the comparison for row 1 and 2 blk 1 + vmovn.u16 d15, q6 @I Narrow the comparison for row 1 and 2 blk 2 + + vshr.u8 q8, q7, #7 @I Reduce comaparison bit to a signle bit row 1 and 2 blk 1 and 2 [ keep the value for later use ] + + vpadd.u8 d18, d16, d17 @I pair add nnz 1 + vpadd.u8 d20, d18, d19 @I Pair add nnz 2 + vpadd.u8 d22, d20, d21 @I Pair add nnz 3 + vpadd.u8 d24, d22, d23 @I Pair add nnz4 + vst1.s16 {q2-q3}, [r2] @Store blk + + vmov.u8 d25, #16 @I Get max nnz + vsub.u8 d26, d25, d24 @I invert current nnz + + vst1.u8 d26[0], [r9] @I Write nnz + + vpop {d8-d15} + pop {r4-r12, pc} + + + +@***************************************************************************** +@* +@* Function Name : ih264_resi_trans_quant_chroma_4x4_a9 +@* Description : This function does residue calculation, forward transform +@* and quantization for 4x4 chroma block. +@* +@* Arguments : R0 :pointer to src buffer +@ R1 :pointer to pred buffer +@ R2 :pointer to dst buffer +@ R3 :source stride +@ STACK : pred stride, +@ dst stride, +@ pointer to scaling matrix, +@ pointer to threshold matrix, +@ qbits, +@ rounding factor, +@ pointer to store nnz +@ pointer to store unquantized dc values +@ Values Returned : NONE +@ +@ Register Usage : +@ Stack Usage : 40 bytes +@ Cycles : Around +@ Interruptiaility : Interruptable +@ +@ Known Limitations +@ \Assumptions : +@ +@ Revision History : +@ DD MM YYYY Author(s) Changes +@ 11 2 2015 100664 First version +@ +@***************************************************************************** + + .global ih264_resi_trans_quant_chroma_4x4_a9 +ih264_resi_trans_quant_chroma_4x4_a9: + + @R0 :pointer to src buffer + @R1 :pointer to pred buffer + @R2 :pointer to dst buffer + @R3 :Source stride + @STACk :pred stride + @ :scale matirx, + @ :threshold matrix + @ :qbits + @ :round factor + @ :nnz + @ :pu1_dc_alt_addr + push {r4-r12, lr} @push all the variables first + + add r11, sp, #40 @decrement stack pointer,to accomodate two variables + ldmfd r11, {r4-r10} @load the strides into registers + + @R0 :pointer to src buffer + @R1 :pointer to pred buffer + @R2 :pointer to dst buffer + @R3 :Source stride + @R4 :Pred stride + @R5 :scale matirx, + @R6 :threshold matrix + @R7 :qbits + @R8 :round factor + @R9 :nnz + vpush {d8-d15} + mov r11, #0 + sub r7, r11, r7 @Negate the qbit value for usiing LSL + + @------------Fucntion Loading done----------------; + + vld2.u8 {d10, d11}, [r0], r3 @load first 8 pix src row 1 + + vld2.u8 {d11, d12}, [r1], r4 @load first 8 pix pred row 1 + + vld2.u8 {d28, d29}, [r0], r3 @load first 8 pix src row 2 + + vld2.u8 {d29, d30}, [r1], r4 @load first 8 pix pred row 2 + + vld2.u8 {d25, d26}, [r0], r3 @load first 8 pix src row 3 + + vld2.u8 {d26, d27}, [r1], r4 @load first 8 pix pred row 3 + vsubl.u8 q0, d10, d11 @find residue row 1 + + vld2.u8 {d22, d23}, [r0], r3 @load first 8 pix src row 4 + + vld2.u8 {d23, d24}, [r1], r4 @load first 8 pix pred row 4 + vsubl.u8 q1, d28, d29 @find residue row 2 + + vsubl.u8 q2, d25, d26 @find residue row 3 + vsubl.u8 q3, d22, d23 @find residue row 4 + + vtrn.16 d0, d2 @T12 + vtrn.16 d4, d6 @T23 + vtrn.32 d0, d4 @T13 + vtrn.32 d2, d6 @T14 + + vadd.s16 d8 , d0, d6 @x0 = x4+x7 + vadd.s16 d9 , d2, d4 @x1 = x5+x6 + vsub.s16 d10, d2, d4 @x2 = x5-x6 + vsub.s16 d11, d0, d6 @x3 = x4-x7 + + vshl.s16 d12, d10, #1 @U_SHIFT(x2,1,shft) + vshl.s16 d13, d11, #1 @U_SHIFT(x3,1,shft) + + vadd.s16 d14, d8, d9 @x4 = x0 + x1; + vsub.s16 d16, d8, d9 @x6 = x0 - x1; + vadd.s16 d15, d13, d10 @x5 = U_SHIFT(x3,1,shft) + x2; + vsub.s16 d17, d11, d12 @x7 = x3 - U_SHIFT(x2,1,shft); + + @taking transpose again so as to make do vert transform + vtrn.16 d14, d15 @T12 + vtrn.16 d16, d17 @T23 + vtrn.32 d14, d16 @T13 + vtrn.32 d15, d17 @T24 + + @let us do vertical transform + @same code as horiz + vadd.s16 d18, d14, d17 @x0 = x4+x7 + vadd.s16 d19, d15, d16 @x1 = x5+x6 + vsub.s16 d20, d15, d16 @x2 = x5-x6 + vsub.s16 d21, d14, d17 @x3 = x4-x7 + + vshl.s16 d22, d20, #1 @U_SHIFT(x2,1,shft) + vshl.s16 d23, d21, #1 @U_SHIFT(x3,1,shft) + + vdup.s32 q4, r8 @Load rounding value row 1 + + vadd.s16 d24, d18, d19 @x5 = x0 + x1; + vsub.s16 d26, d18, d19 @x7 = x0 - x1; + vadd.s16 d25, d23, d20 @x6 = U_SHIFT(x3,1,shft) + x2; + vsub.s16 d27, d21, d22 @x8 = x3 - U_SHIFT(x2,1,shft); + vdup.s32 q10, r7 @Load qbit values + + vst1.s16 d24[0], [r10] @Store Unquantized dc value to dc alte address + +@core tranform is done for 4x8 block 1 + vld1.s16 {q14-q15}, [r5] @load the scaling values + + vabs.s16 q0, q12 @Abs val of row 1 blk 1 + + vabs.s16 q1, q13 @Abs val of row 2 blk 1 + + vmov.s32 q5, q4 @copy round fact for row 2 + + vmov.s32 q6, q4 @copy round fact for row 2 + vclt.s16 q2, q12, #0 @Get the sign of row 1 blk 1 + + vmov.s32 q7, q4 @copy round fact for row 2 + vclt.s16 q3, q13, #0 @Get the sign of row 2 blk 1 + + vmlal.s16 q4, d0, d28 @Multiply and add row 1 + vmlal.s16 q5, d1, d29 @Multiply and add row 2 + vmlal.s16 q6, d2, d30 @Multiply and add row 3 + vmlal.s16 q7, d3, d31 @Multiply and add row 4 + + vshl.s32 q11, q4, q10 @Shift row 1 + vshl.s32 q12, q5, q10 @Shift row 2 + vshl.s32 q13, q6, q10 @Shift row 3 + vshl.s32 q14, q7, q10 @Shift row 4 + + vmovn.s32 d30, q11 @Narrow row 1 + vmovn.s32 d31, q12 @Narrow row 2 + vmovn.s32 d0 , q13 @Narrow row 3 + vmovn.s32 d1 , q14 @Narrow row 4 + + vneg.s16 q1, q15 @Get negative + vneg.s16 q4, q0 @Get negative + + vceq.s16 q5, q15, #0 @I compare with zero row 1 and 2 blk 1 + vceq.s16 q6, q0 , #0 @I compare with zero row 1 and 2 blk 1 + + vbsl.s16 q2, q1, q15 @Restore sign of row 1 and 2 + vbsl.s16 q3, q4, q0 @Restore sign of row 3 and 4 + + vmovn.u16 d14, q5 @I Narrow the comparison for row 1 and 2 blk 1 + vmovn.u16 d15, q6 @I Narrow the comparison for row 1 and 2 blk 2 + + vshr.u8 q8, q7, #7 @I Reduce comaparison bit to a signle bit row 1 and 2 blk 1 and 2 [ keep the value for later use ] + + vpadd.u8 d18, d16, d17 @I pair add nnz 1 + vpadd.u8 d20, d18, d19 @I Pair add nnz 2 + vpadd.u8 d22, d20, d21 @I Pair add nnz 3 + vpadd.u8 d24, d22, d23 @I Pair add nnz4 + vst1.s16 {q2-q3}, [r2] @Store blk + + vmov.u8 d25, #16 @I Get max nnz + vsub.u8 d26, d25, d24 @I invert current nnz + + vst1.u8 d26[0], [r9] @I Write nnz + + vpop {d8-d15} + pop {r4-r12, pc} + + + +@***************************************************************************** +@* +@* Function Name : ih264_hadamard_quant_4x4_a9 +@* Description : This function does forward hadamard transform and +@* quantization for luma dc block +@* +@* Arguments : R0 :pointer to src buffer +@ R1 :pointer to dst buffer +@ R2 :pu2_scale_matrix +@ R2 :pu2_threshold_matrix +@ STACk : u4_qbits +@ u4_round_factor +@ pu1_nnz +@ Values Returned : NONE +@ +@ Register Usage : +@ Stack Usage : 0 bytes +@ Cycles : Around +@ Interruptiaility : Interruptable +@ +@ Known Limitations +@ \Assumptions : +@ +@ Revision History : +@ DD MM YYYY Author(s) Changes +@ 20 2 2015 100633 First version +@ +@***************************************************************************** +@ih264_hadamard_quant_4x4_a9(WORD16 *pi2_src, WORD16 *pi2_dst, +@ const UWORD16 *pu2_scale_matrix, +@ const UWORD16 *pu2_threshold_matrix, UWORD32 u4_qbits, +@ UWORD32 u4_round_factor,UWORD8 *pu1_nnz +@ ) + .global ih264_hadamard_quant_4x4_a9 +ih264_hadamard_quant_4x4_a9: + +@Registert usage +@ r0 : src +@ r1 : dst +@ r2 : *pu2_scale_matrix +@ r3 : *pu2_threshold_matrix + + vld4.s16 {d0, d1, d2, d3}, [r0]! @Load 4x4 block + vpush {d8-d15} + + vld1.u16 d30[0], [r2] @load pu2_scale_matrix[0] + + vaddl.s16 q3, d0, d3 @x0 = x4 + x7; + vaddl.s16 q4, d1, d2 @x1 = x5 + x6; + vsubl.s16 q5, d1, d2 @x2 = x5 - x6; + vsubl.s16 q6, d0, d3 @x3 = x4 - x7; + + vdup.u16 d30, d30[0] @pu2_scale_matrix[0] + + vadd.s32 q7, q3, q4 @pi2_dst[0] = x0 + x1; + vadd.s32 q8, q6, q5 @pi2_dst[1] = x3 + x2; + add r3, sp, #68 @Get address of u4_round_factor + vsub.s32 q9, q3, q4 @pi2_dst[2] = x0 - x1; + vsub.s32 q10, q6, q5 @pi2_dst[3] = x3 - x2; + + vtrn.s32 q7, q8 @transpose 4x4 block + vtrn.s32 q9, q10 + vld1.s32 d0[0], [r3] @load u4_round_factor + vswp d15, d18 + vswp d17, d20 + + add r3, sp, #64 @Get address of u4_qbits + vadd.s32 q11, q7, q10 @x0 = x4 + x7; + vadd.s32 q12, q8, q9 @x1 = x5 + x6; + vld1.s32 d31[0], [r3] @load u4_qbits + vsub.s32 q13, q8, q9 @x2 = x5 - x6; + vsub.s32 q14, q7, q10 @x3 = x4 - x7; + + vdup.s32 q7, d0[0] @u4_round_factor + + vadd.s32 q0, q11, q12 @(x0 + x1) + vadd.s32 q1, q14, q13 @(x3 + x2) + vsub.s32 q2, q11, q12 @(x0 - x1) + vsub.s32 q3, q14, q13 @(x3 - x2) + + vdup.s32 q11, d31[0] @u4_round_factor + + vshrn.s32 d0, q0, #1 @i4_value = (x0 + x1) >> 1; + vshrn.s32 d1, q1, #1 @i4_value = (x3 + x2) >> 1; + vshrn.s32 d2, q2, #1 @i4_value = (x0 - x1) >> 1; + vshrn.s32 d3, q3, #1 @i4_value = (x3 - x2) >> 1; + + vabs.s16 q5, q0 + vabs.s16 q6, q1 + + vmov.s32 q8, q7 @Get the round fact + vmov.s32 q9, q7 + vmov.s32 q10, q7 + + vclt.s16 q3, q0, #0 @get the sign row 1,2 + vclt.s16 q4, q1, #0 + + vneg.s32 q11, q11 @-u4_round_factor + + vmlal.u16 q7, d10, d30 + vmlal.u16 q8, d11, d30 + vmlal.u16 q9, d12, d30 + vmlal.u16 q10, d13, d30 + + vshl.u32 q7, q7, q11 + vshl.u32 q8, q8, q11 + vshl.u32 q9, q9, q11 + vshl.u32 q10, q10, q11 + + vqmovn.u32 d22, q7 + vqmovn.u32 d23, q8 + vqmovn.u32 d24, q9 + vqmovn.u32 d25, q10 + + vneg.s16 q13, q11 + vneg.s16 q14, q12 + + vbsl.s16 q3, q13, q11 + vbsl.s16 q4, q14, q12 + + vceq.s16 q5, q11, #0 + vceq.s16 q6, q12, #0 + + vst1.s16 {q3}, [r1]! + + vshrn.u16 d14, q5, #8 + vshrn.u16 d15, q6, #8 + + ldr r3, [sp, #72] @Load *pu1_nnz + + vshr.u8 q7, q7, #7 + + vst1.s16 {q4}, [r1]! + + vadd.u8 d16, d14, d15 + vmov.u8 d20, #16 + vpadd.u8 d17, d16, d16 + vpadd.u8 d18, d17, d17 + vpadd.u8 d19, d18, d18 + vsub.u8 d20, d20, d19 + vst1.u8 d20[0], [r3] + + vpop {d8-d15} + bx lr + + + + +@***************************************************************************** +@* +@* Function Name : ih264_hadamard_quant_2x2_uv_a9 +@* Description : This function does forward hadamard transform and +@* quantization for dc block of chroma for both planes +@* +@* Arguments : R0 :pointer to src buffer +@ R1 :pointer to dst buffer +@ R2 :pu2_scale_matrix +@ R2 :pu2_threshold_matrix +@ STACk : u4_qbits +@ u4_round_factor +@ pu1_nnz +@ Values Returned : NONE +@ +@ Register Usage : +@ Stack Usage : 0 bytes +@ Cycles : Around +@ Interruptiaility : Interruptable +@ +@ Known Limitations +@ \Assumptions : +@ +@ Revision History : +@ DD MM YYYY Author(s) Changes +@ 20 2 2015 100633 First version +@ +@***************************************************************************** +@ ih264_hadamard_quant_2x2_uv_a9(WORD16 *pi2_src, WORD16 *pi2_dst, +@ const UWORD16 *pu2_scale_matrix, +@ const UWORD16 *pu2_threshold_matrix, UWORD32 u4_qbits, +@ UWORD32 u4_round_factor,UWORD8 *pu1_nnz +@ ) + + .global ih264_hadamard_quant_2x2_uv_a9 +ih264_hadamard_quant_2x2_uv_a9: + + vpush {d8-d15} + vld2.s16 {d0-d1}, [r0] @load src + + add r3, sp, #68 @Get address of u4_round_factor + + vaddl.s16 q3, d0, d1 @x0 = x4 + x5;, x2 = x6 + x7; + vld1.u16 d30[0], [r2] @load pu2_scale_matrix[0] + vsubl.s16 q4, d0, d1 @x1 = x4 - x5; x3 = x6 - x7; + + add r0, sp, #64 @Get affress of u4_qbits + vld1.s32 d28[0], [r3] @load u4_round_factor + vtrn.s32 q3, q4 @q1 -> x0 x1, q2 -> x2 x3 + + vadd.s32 q0, q3, q4 @ (x0 + x2) (x1 + x3) (y0 + y2); (y1 + y3); + vld1.s32 d24[0], [r0] @load u4_qbits + vsub.s32 q1, q3, q4 @ (x0 - x2) (x1 - x3) (y0 - y2); (y1 - y3); + + vdup.u16 d30, d30[0] @pu2_scale_matrix + + vabs.s32 q2, q0 + vabs.s32 q3, q1 + + vdup.s32 q14, d28[0] @u4_round_factor + + vmovl.u16 q15, d30 @pu2_scale_matrix + + vclt.s32 q4, q0, #0 @get the sign row 1,2 + vdup.s32 q12, d24[0] @u4_round_factor + vclt.s32 q5, q1, #0 + + vqmovn.u32 d8, q4 + vqmovn.s32 d9, q5 + + vmov.s32 q13, q14 @Get the round fact + vneg.s32 q12, q12 @-u4_round_factor + + vmla.u32 q13, q2, q15 + vmla.u32 q14, q3, q15 + + vshl.u32 q13, q13, q12 @>>qbit + vshl.u32 q14, q14, q12 @>>qbit + + vqmovn.u32 d10, q13 + vqmovn.u32 d11, q14 + + vneg.s16 q6, q5 + + vbsl.s16 q4, q6, q5 @*sign + + vtrn.s32 d8, d9 + + vceq.s16 q7, q4, #0 @Compute nnz + + vshrn.u16 d14, q7, #8 @reduce nnz comparison to 1 bit + + ldr r3, [sp, #72] @Load *pu1_nnz + vshr.u8 d14, d14, #7 @reduce nnz comparison to 1 bit + vmov.u8 d20, #4 @Since we add zeros, we need to subtract from 4 to get nnz + vpadd.u8 d17, d14, d14 @Sum up nnz + + vst1.s16 {q4}, [r1]! @Store the block + + vpadd.u8 d17, d17, d17 @Sum up nnz + vsub.u8 d20, d20, d17 @4- numzeros + vst1.u16 d20[0], [r3] @store nnz + + vpop {d8-d15} + bx lr + + + + + diff --git a/common/arm/ih264_weighted_bi_pred_a9q.s b/common/arm/ih264_weighted_bi_pred_a9q.s new file mode 100755 index 0000000..ccae779 --- /dev/null +++ b/common/arm/ih264_weighted_bi_pred_a9q.s @@ -0,0 +1,642 @@ +@/****************************************************************************** +@ * +@ * Copyright (C) 2015 The Android Open Source Project +@ * +@ * Licensed under the Apache License, Version 2.0 (the "License"); +@ * you may not use this file except in compliance with the License. +@ * You may obtain a copy of the License at: +@ * +@ * http://www.apache.org/licenses/LICENSE-2.0 +@ * +@ * Unless required by applicable law or agreed to in writing, software +@ * distributed under the License is distributed on an "AS IS" BASIS, +@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ * See the License for the specific language governing permissions and +@ * limitations under the License. +@ * +@ ***************************************************************************** +@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +@*/ +@/** +@****************************************************************************** +@* @file +@* ih264_weighted_bi_pred_a9q.s +@* +@* @brief +@* Contains function definitions for weighted biprediction. +@* +@* @author +@* Kaushik Senthoor R +@* +@* @par List of Functions: +@* +@* - ih264_weighted_bi_pred_luma_a9q() +@* - ih264_weighted_bi_pred_chroma_a9q() +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ +@******************************************************************************* +@* @function +@* ih264_weighted_bi_pred_luma_a9q() +@* +@* @brief +@* This routine performs the weighted biprediction as described in sec +@* 8.4.2.3.2 titled "Weighted sample prediction process" for luma. +@* +@* @par Description: +@* This function gets two ht x wd blocks, calculates the weighted samples, +@* rounds off, adds offset and stores it in the destination block. +@* +@* @param[in] pu1_src1 +@* UWORD8 Pointer to the buffer containing the input block 1. +@* +@* @param[in] pu1_src2 +@* UWORD8 Pointer to the buffer containing the input block 2. +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination where the output block is stored. +@* +@* @param[in] src_strd1 +@* Stride of the input buffer 1 +@* +@* @param[in] src_strd2 +@* Stride of the input buffer 2 +@* +@* @param[in] dst_strd +@* Stride of the destination buffer +@* +@* @param[in] log_wd +@* number of bits to be rounded off +@* +@* @param[in] wt1 +@* weight for the weighted prediction +@* +@* @param[in] wt2 +@* weight for the weighted prediction +@* +@* @param[in] ofst1 +@* offset 1 used after rounding off +@* +@* @param[in] ofst2 +@* offset 2 used after rounding off +@* +@* @param[in] ht +@* integer height of the array +@* +@* @param[in] wd +@* integer width of the array +@* +@* @returns +@* None +@* +@* @remarks +@* (ht,wd) can be (4,4), (4,8), (8,4), (8,8), (8,16), (16,8) or (16,16). +@* +@******************************************************************************* +@*/ +@void ih264_weighted_bi_pred_luma_a9q(UWORD8 *pu1_src1, +@ UWORD8 *pu1_src2, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd1, +@ WORD32 src_strd2, +@ WORD32 dst_strd, +@ WORD32 log_wd, +@ WORD32 wt1, +@ WORD32 wt2, +@ WORD32 ofst1, +@ WORD32 ofst2, +@ WORD32 ht, +@ WORD32 wd) +@ +@**************Variables Vs Registers***************************************** +@ r0 => pu1_src1 +@ r1 => pu1_src2 +@ r2 => pu1_dst +@ r3 => src_strd1 +@ [sp] => src_strd2 (r4) +@ [sp+4] => dst_strd (r5) +@ [sp+8] => log_wd (r6) +@ [sp+12] => wt1 (r7) +@ [sp+16] => wt2 (r8) +@ [sp+20] => ofst1 (r9) +@ [sp+24] => ofst2 (r10) +@ [sp+28] => ht (r11) +@ [sp+32] => wd (r12) +@ +.text +.p2align 2 + + .global ih264_weighted_bi_pred_luma_a9q + +ih264_weighted_bi_pred_luma_a9q: + + stmfd sp!, {r4-r12, r14} @stack stores the values of the arguments + ldr r6, [sp, #48] @Load log_wd in r6 + ldr r7, [sp, #52] @Load wt1 in r7 + ldr r8, [sp, #56] @Load wt2 in r8 + ldr r9, [sp, #60] @Load ofst1 in r9 + + add r6, r6, #1 @r6 = log_wd + 1 + sxtb r7, r7 @sign-extend 16-bit wt1 to 32-bit + ldr r4, [sp, #40] @Load src_strd2 in r4 + ldr r5, [sp, #44] @Load dst_strd in r5 + sxtb r9, r9 @sign-extend 8-bit ofst1 to 32-bit + rsb r10, r6, #0 @r13 = -(log_wd + 1) + ldr r11, [sp, #68] @Load ht in r11 + ldr r12, [sp, #72] @Load wd in r12 + vdup.16 q0, r10 @Q0 = -(log_wd + 1) (32-bit) + add r9, r9, #1 @r9 = ofst1 + 1 + + ldr r10, [sp, #64] @Load ofst2 in r10 + sxtb r8, r8 @sign-extend 16-bit wt2 to 32-bit + cmp r12, #16 @check if wd is 16 + vpush {d8-d15} + sxtb r10, r10 @sign-extend 8-bit ofst2 to 32-bit + add r9, r9, r10 @r9 = ofst1 + ofst2 + 1 + vmov d2, r7, r8 @D2 = {wt1(32-bit), wt2(32-bit)} + asr r9, r9, #1 @r9 = ofst = (ofst1 + ofst2 + 1) >> 1 + vdup.8 d3, r9 @D3 = ofst (8-bit) + beq loop_16 @branch if wd is 16 + + cmp r12, #8 @check if wd is 8 + beq loop_8 @branch if wd is 8 + +loop_4: @each iteration processes four rows + + vld1.32 d4[0], [r0], r3 @load row 1 in source 1 + vld1.32 d4[1], [r0], r3 @load row 2 in source 1 + vld1.32 d6[0], [r1], r4 @load row 1 in source 2 + vld1.32 d6[1], [r1], r4 @load row 2 in source 2 + + vmovl.u8 q2, d4 @converting rows 1,2 in source 1 to 16-bit + vld1.32 d8[0], [r0], r3 @load row 3 in source 1 + vld1.32 d8[1], [r0], r3 @load row 4 in source 1 + vmovl.u8 q3, d6 @converting rows 1,2 in source 2 to 16-bit + vld1.32 d10[0], [r1], r4 @load row 3 in source 2 + vld1.32 d10[1], [r1], r4 @load row 4 in source 2 + + vmovl.u8 q4, d8 @converting rows 3,4 in source 1 to 16-bit + vmovl.u8 q5, d10 @converting rows 3,4 in source 2 to 16-bit + + vmul.s16 q2, q2, d2[0] @weight 1 mult. for rows 1,2 + vmla.s16 q2, q3, d2[2] @weight 2 mult. for rows 1,2 + vmul.s16 q4, q4, d2[0] @weight 1 mult. for rows 3,4 + vmla.s16 q4, q5, d2[2] @weight 2 mult. for rows 3,4 + + subs r11, r11, #4 @decrement ht by 4 + vrshl.s16 q2, q2, q0 @rounds off the weighted samples from rows 1,2 + vrshl.s16 q4, q4, q0 @rounds off the weighted samples from rows 3,4 + + vaddw.s8 q2, q2, d3 @adding offset for rows 1,2 + vaddw.s8 q4, q4, d3 @adding offset for rows 3,4 + + vqmovun.s16 d4, q2 @saturating rows 1,2 to unsigned 8-bit + vqmovun.s16 d8, q4 @saturating rows 3,4 to unsigned 8-bit + + vst1.32 d4[0], [r2], r5 @store row 1 in destination + vst1.32 d4[1], [r2], r5 @store row 2 in destination + vst1.32 d8[0], [r2], r5 @store row 3 in destination + vst1.32 d8[1], [r2], r5 @store row 4 in destination + + bgt loop_4 @if greater than 0 repeat the loop again + + b end_loops + +loop_8: @each iteration processes four rows + + vld1.8 d4, [r0], r3 @load row 1 in source 1 + vld1.8 d6, [r1], r4 @load row 1 in source 2 + vld1.8 d8, [r0], r3 @load row 2 in source 1 + vld1.8 d10, [r1], r4 @load row 2 in source 2 + vmovl.u8 q2, d4 @converting row 1 in source 1 to 16-bit + vld1.8 d12, [r0], r3 @load row 3 in source 1 + vld1.8 d14, [r1], r4 @load row 3 in source 2 + vmovl.u8 q3, d6 @converting row 1 in source 2 to 16-bit + vld1.8 d16, [r0], r3 @load row 4 in source 1 + vld1.8 d18, [r1], r4 @load row 4 in source 2 + + vmovl.u8 q4, d8 @converting row 2 in source 1 to 16-bit + vmovl.u8 q5, d10 @converting row 2 in source 2 to 16-bit + + vmul.s16 q2, q2, d2[0] @weight 1 mult. for row 1 + vmla.s16 q2, q3, d2[2] @weight 2 mult. for row 1 + vmovl.u8 q6, d12 @converting row 3 in source 1 to 16-bit + vmovl.u8 q7, d14 @converting row 3 in source 2 to 16-bit + vmul.s16 q4, q4, d2[0] @weight 1 mult. for row 2 + vmla.s16 q4, q5, d2[2] @weight 2 mult. for row 2 + vmovl.u8 q8, d16 @converting row 4 in source 1 to 16-bit + vmovl.u8 q9, d18 @converting row 4 in source 2 to 16-bit + + vmul.s16 q6, q6, d2[0] @weight 1 mult. for row 3 + vmla.s16 q6, q7, d2[2] @weight 2 mult. for row 3 + vmul.s16 q8, q8, d2[0] @weight 1 mult. for row 4 + vmla.s16 q8, q9, d2[2] @weight 2 mult. for row 4 + + vrshl.s16 q2, q2, q0 @rounds off the weighted samples from row 1 + vrshl.s16 q4, q4, q0 @rounds off the weighted samples from row 2 + vrshl.s16 q6, q6, q0 @rounds off the weighted samples from row 3 + vaddw.s8 q2, q2, d3 @adding offset for row 1 + vrshl.s16 q8, q8, q0 @rounds off the weighted samples from row 4 + vaddw.s8 q4, q4, d3 @adding offset for row 2 + + vaddw.s8 q6, q6, d3 @adding offset for row 3 + vqmovun.s16 d4, q2 @saturating row 1 to unsigned 8-bit + vaddw.s8 q8, q8, d3 @adding offset for row 4 + vqmovun.s16 d8, q4 @saturating row 2 to unsigned 8-bit + + vqmovun.s16 d12, q6 @saturating row 3 to unsigned 8-bit + vqmovun.s16 d16, q8 @saturating row 4 to unsigned 8-bit + + vst1.8 d4, [r2], r5 @store row 1 in destination + vst1.8 d8, [r2], r5 @store row 2 in destination + subs r11, r11, #4 @decrement ht by 4 + vst1.8 d12, [r2], r5 @store row 3 in destination + vst1.8 d16, [r2], r5 @store row 4 in destination + + bgt loop_8 @if greater than 0 repeat the loop again + + b end_loops + +loop_16: @each iteration processes two rows + + vld1.8 {q2}, [r0], r3 @load row 1 in source 1 + vld1.8 {q3}, [r1], r4 @load row 1 in source 2 + vld1.8 {q4}, [r0], r3 @load row 2 in source 1 + vld1.8 {q5}, [r1], r4 @load row 2 in source 2 + vmovl.u8 q10, d4 @converting row 1L in source 1 to 16-bit + vld1.8 {q6}, [r0], r3 @load row 3 in source 1 + vld1.8 {q7}, [r1], r4 @load row 3 in source 2 + vmovl.u8 q11, d6 @converting row 1L in source 2 to 16-bit + vld1.8 {q8}, [r0], r3 @load row 4 in source 1 + vld1.8 {q9}, [r1], r4 @load row 4 in source 2 + + vmovl.u8 q2, d5 @converting row 1H in source 1 to 16-bit + vmovl.u8 q3, d7 @converting row 1H in source 2 to 16-bit + + vmul.s16 q10, q10, d2[0] @weight 1 mult. for row 1L + vmla.s16 q10, q11, d2[2] @weight 2 mult. for row 1L + vmovl.u8 q12, d8 @converting row 2L in source 1 to 16-bit + vmovl.u8 q13, d10 @converting row 2L in source 2 to 16-bit + + vmul.s16 q2, q2, d2[0] @weight 1 mult. for row 1H + vmla.s16 q2, q3, d2[2] @weight 2 mult. for row 1H + vmovl.u8 q4, d9 @converting row 2H in source 1 to 16-bit + vmovl.u8 q5, d11 @converting row 2H in source 2 to 16-bit + + vmul.s16 q12, q12, d2[0] @weight 1 mult. for row 2L + vmla.s16 q12, q13, d2[2] @weight 2 mult. for row 2L + vmovl.u8 q14, d12 @converting row 3L in source 1 to 16-bit + vmovl.u8 q15, d14 @converting row 3L in source 2 to 16-bit + + vmul.s16 q4, q4, d2[0] @weight 1 mult. for row 2H + vmla.s16 q4, q5, d2[2] @weight 2 mult. for row 2H + vmovl.u8 q6, d13 @converting row 3H in source 1 to 16-bit + vmovl.u8 q7, d15 @converting row 3H in source 2 to 16-bit + + vmul.s16 q14, q14, d2[0] @weight 1 mult. for row 3L + vmla.s16 q14, q15, d2[2] @weight 2 mult. for row 3L + vmovl.u8 q11, d16 @converting row 4L in source 1 to 16-bit + vmovl.u8 q3, d18 @converting row 4L in source 2 to 16-bit + + vmul.s16 q6, q6, d2[0] @weight 1 mult. for row 3H + vmla.s16 q6, q7, d2[2] @weight 2 mult. for row 3H + vmovl.u8 q8, d17 @converting row 4H in source 1 to 16-bit + vmovl.u8 q9, d19 @converting row 4H in source 2 to 16-bit + + vmul.s16 q11, q11, d2[0] @weight 1 mult. for row 4L + vmla.s16 q11, q3, d2[2] @weight 2 mult. for row 4L + vrshl.s16 q10, q10, q0 @rounds off the weighted samples from row 1L + + vmul.s16 q8, q8, d2[0] @weight 1 mult. for row 4H + vmla.s16 q8, q9, d2[2] @weight 2 mult. for row 4H + vrshl.s16 q2, q2, q0 @rounds off the weighted samples from row 1H + + vrshl.s16 q12, q12, q0 @rounds off the weighted samples from row 2L + vaddw.s8 q10, q10, d3 @adding offset for row 1L + vrshl.s16 q4, q4, q0 @rounds off the weighted samples from row 2H + vaddw.s8 q2, q2, d3 @adding offset for row 1H + vrshl.s16 q14, q14, q0 @rounds off the weighted samples from row 3L + vaddw.s8 q12, q12, d3 @adding offset for row 2L + vrshl.s16 q6, q6, q0 @rounds off the weighted samples from row 3H + vaddw.s8 q4, q4, d3 @adding offset for row 2H + vrshl.s16 q11, q11, q0 @rounds off the weighted samples from row 4L + vaddw.s8 q14, q14, d3 @adding offset for row 3L + vrshl.s16 q8, q8, q0 @rounds off the weighted samples from row 4H + vaddw.s8 q6, q6, d3 @adding offset for row 3H + + vqmovun.s16 d26, q10 @saturating row 1L to unsigned 8-bit + vaddw.s8 q11, q11, d3 @adding offset for row 4L + vqmovun.s16 d27, q2 @saturating row 1H to unsigned 8-bit + vaddw.s8 q8, q8, d3 @adding offset for row 4H + + vqmovun.s16 d10, q12 @saturating row 2L to unsigned 8-bit + vqmovun.s16 d11, q4 @saturating row 2H to unsigned 8-bit + vqmovun.s16 d30, q14 @saturating row 3L to unsigned 8-bit + vqmovun.s16 d31, q6 @saturating row 3H to unsigned 8-bit + vst1.8 {q13}, [r2], r5 @store row 1 in destination + vqmovun.s16 d14, q11 @saturating row 4L to unsigned 8-bit + vqmovun.s16 d15, q8 @saturating row 4H to unsigned 8-bit + + vst1.8 {q5}, [r2], r5 @store row 2 in destination + subs r11, r11, #4 @decrement ht by 4 + vst1.8 {q15}, [r2], r5 @store row 3 in destination + vst1.8 {q7}, [r2], r5 @store row 4 in destination + + bgt loop_16 @if greater than 0 repeat the loop again + +end_loops: + + vpop {d8-d15} + ldmfd sp!, {r4-r12, r15} @Reload the registers from sp + + +@******************************************************************************* +@* @function +@* ih264_weighted_bi_pred_chroma_a9q() +@* +@* @brief +@* This routine performs the default weighted prediction as described in sec +@* 8.4.2.3.2 titled "Weighted sample prediction process" for chroma. +@* +@* @par Description: +@* This function gets two ht x wd blocks, calculates the weighted samples, +@* rounds off, adds offset and stores it in the destination block for U and V. +@* +@* @param[in] pu1_src1 +@* UWORD8 Pointer to the buffer containing the input block 1. +@* +@* @param[in] pu1_src2 +@* UWORD8 Pointer to the buffer containing the input block 2. +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination where the output block is stored. +@* +@* @param[in] src_strd1 +@* Stride of the input buffer 1 +@* +@* @param[in] src_strd2 +@* Stride of the input buffer 2 +@* +@* @param[in] dst_strd +@* Stride of the destination buffer +@* +@* @param[in] log_wd +@* number of bits to be rounded off +@* +@* @param[in] wt1 +@* weights for the weighted prediction in U and V +@* +@* @param[in] wt2 +@* weights for the weighted prediction in U and V +@* +@* @param[in] ofst1 +@* offset 1 used after rounding off for U an dV +@* +@* @param[in] ofst2 +@* offset 2 used after rounding off for U and V +@* +@* @param[in] ht +@* integer height of the array +@* +@* @param[in] wd +@* integer width of the array +@* +@* @returns +@* None +@* +@* @remarks +@* (ht,wd) can be (2,2), (2,4), (4,2), (4,4), (4,8), (8,4) or (8,8). +@* +@******************************************************************************* +@*/ +@void ih264_weighted_bi_pred_chroma_a9q(UWORD8 *pu1_src1, +@ UWORD8 *pu1_src2, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd1, +@ WORD32 src_strd2, +@ WORD32 dst_strd, +@ WORD32 log_wd, +@ WORD32 wt1, +@ WORD32 wt2, +@ WORD32 ofst1, +@ WORD32 ofst2, +@ WORD32 ht, +@ WORD32 wd) +@ +@**************Variables Vs Registers***************************************** +@ r0 => pu1_src1 +@ r1 => pu1_src2 +@ r2 => pu1_dst +@ r3 => src_strd1 +@ [sp] => src_strd2 (r4) +@ [sp+4] => dst_strd (r5) +@ [sp+8] => log_wd (r6) +@ [sp+12] => wt1 (r7) +@ [sp+16] => wt2 (r8) +@ [sp+20] => ofst1 (r9) +@ [sp+24] => ofst2 (r10) +@ [sp+28] => ht (r11) +@ [sp+32] => wd (r12) +@ + + + .global ih264_weighted_bi_pred_chroma_a9q + +ih264_weighted_bi_pred_chroma_a9q: + + stmfd sp!, {r4-r12, r14} @stack stores the values of the arguments + + ldr r6, [sp, #48] @Load log_wd in r6 + ldr r7, [sp, #52] @Load wt1 in r7 + ldr r8, [sp, #56] @Load wt2 in r8 + add r6, r6, #1 @r6 = log_wd + 1 + ldr r9, [sp, #60] @Load ofst1 in r9 + ldr r10, [sp, #64] @Load ofst2 in r10 + + rsb r12, r6, #0 @r12 = -(log_wd + 1) + ldr r4, [sp, #40] @Load src_strd2 in r4 + ldr r5, [sp, #44] @Load dst_strd in r5 + vdup.16 q0, r12 @Q0 = -(log_wd + 1) (16-bit) + + ldr r11, [sp, #68] @Load ht in r11 + vdup.32 q1, r7 @Q1 = (wt1_u, wt1_v) (32-bit) + ldr r12, [sp, #72] @Load wd in r12 + vdup.32 q2, r8 @Q2 = (wt2_u, wt2_v) (32-bit) + asr r7, r9, #8 @r7 = ofst1_v + asr r8, r10, #8 @r8 = ofst2_v + vpush {d8-d15} + sxtb r9, r9 @sign-extend 8-bit ofst1_u to 32-bit + sxtb r10, r10 @sign-extend 8-bit ofst2_u to 32-bit + sxtb r7, r7 @sign-extend 8-bit ofst1_v to 32-bit + sxtb r8, r8 @sign-extend 8-bit ofst2_v to 32-bit + + add r9, r9, #1 @r9 = ofst1_u + 1 + add r7, r7, #1 @r7 = ofst1_v + 1 + add r9, r9, r10 @r9 = ofst1_u + ofst2_u + 1 + add r7, r7, r8 @r7 = ofst1_v + ofst2_v + 1 + asr r9, r9, #1 @r9 = ofst_u = (ofst1_u + ofst2_u + 1) >> 1 + asr r7, r7, #1 @r7 = ofst_v = (ofst1_v + ofst2_v + 1) >> 1 + cmp r12, #8 @check if wd is 8 + pkhbt r9, r9, r7, lsl #16 @r9 = {ofst_u(16-bit), ofst_v(16-bit)} + vdup.32 q3, r9 @Q3 = {ofst_u(16-bit), ofst_v(16-bit)} + beq loop_8_uv @branch if wd is 8 + + cmp r12, #4 @check if wd is 4 + beq loop_4_uv @branch if wd is 4 + +loop_2_uv: @each iteration processes two rows + + vld1.32 d8[0], [r0], r3 @load row 1 in source 1 + vld1.32 d8[1], [r0], r3 @load row 2 in source 1 + vld1.32 d10[0], [r1], r4 @load row 1 in source 2 + vld1.32 d10[1], [r1], r4 @load row 2 in source 2 + + vmovl.u8 q4, d8 @converting rows 1,2 in source 1 to 16-bit + vmovl.u8 q5, d10 @converting rows 1,2 in source 2 to 16-bit + + vmul.s16 q4, q4, q1 @weight 1 mult. for rows 1,2 + vmla.s16 q4, q5, q2 @weight 2 mult. for rows 1,2 + + vrshl.s16 q4, q4, q0 @rounds off the weighted samples from rows 1,2 + + vadd.s16 q4, q4, q3 @adding offset for rows 1,2 + + vqmovun.s16 d8, q4 @saturating rows 1,2 to unsigned 8-bit + + vst1.32 d8[0], [r2], r5 @store row 1 in destination + vst1.32 d8[1], [r2], r5 @store row 2 in destination + + subs r11, r11, #2 @decrement ht by 2 + bgt loop_2_uv @if greater than 0 repeat the loop again + + b end_loops_uv + +loop_4_uv: @each iteration processes two rows + + vld1.8 d8, [r0], r3 @load row 1 in source 1 + vld1.8 d10, [r1], r4 @load row 1 in source 2 + vmovl.u8 q4, d8 @converting row 1 in source 1 to 16-bit + vld1.8 d12, [r0], r3 @load row 2 in source 1 + vmovl.u8 q5, d10 @converting row 1 in source 2 to 16-bit + vld1.8 d14, [r1], r4 @load row 2 in source 2 + + vmovl.u8 q6, d12 @converting row 2 in source 1 to 16-bit + vmul.s16 q4, q4, q1 @weight 1 mult. for row 1 + vmla.s16 q4, q5, q2 @weight 2 mult. for row 1 + vmovl.u8 q7, d14 @converting row 2 in source 2 to 16-bit + + vmul.s16 q6, q6, q1 @weight 1 mult. for row 2 + vmla.s16 q6, q7, q2 @weight 2 mult. for row 2 + + subs r11, r11, #2 @decrement ht by 2 + vrshl.s16 q4, q4, q0 @rounds off the weighted samples from row 1 + vrshl.s16 q6, q6, q0 @rounds off the weighted samples from row 2 + vadd.s16 q4, q4, q3 @adding offset for row 1 + vadd.s16 q6, q6, q3 @adding offset for row 2 + + vqmovun.s16 d8, q4 @saturating row 1 to unsigned 8-bit + vqmovun.s16 d12, q6 @saturating row 2 to unsigned 8-bit + + vst1.8 d8, [r2], r5 @store row 1 in destination + vst1.8 d12, [r2], r5 @store row 2 in destination + + bgt loop_4_uv @if greater than 0 repeat the loop again + + b end_loops_uv + +loop_8_uv: @each iteration processes two rows + + vld1.8 {q4}, [r0], r3 @load row 1 in source 1 + vld1.8 {q5}, [r1], r4 @load row 1 in source 2 + vld1.8 {q6}, [r0], r3 @load row 2 in source 1 + vld1.8 {q7}, [r1], r4 @load row 2 in source 2 + vmovl.u8 q12, d8 @converting row 1L in source 1 to 16-bit + vld1.8 {q8}, [r0], r3 @load row 3 in source 1 + vld1.8 {q9}, [r1], r4 @load row 3 in source 2 + vmovl.u8 q13, d10 @converting row 1L in source 2 to 16-bit + vld1.8 {q10}, [r0], r3 @load row 4 in source 1 + vld1.8 {q11}, [r1], r4 @load row 4 in source 2 + + vmovl.u8 q4, d9 @converting row 1H in source 1 to 16-bit + vmovl.u8 q5, d11 @converting row 1H in source 2 to 16-bit + + vmul.s16 q12, q12, q1 @weight 1 mult. for row 1L + vmla.s16 q12, q13, q2 @weight 2 mult. for row 1L + vmovl.u8 q14, d12 @converting row 2L in source 1 to 16-bit + vmovl.u8 q15, d14 @converting row 2L in source 2 to 16-bit + + vmul.s16 q4, q4, q1 @weight 1 mult. for row 1H + vmla.s16 q4, q5, q2 @weight 2 mult. for row 1H + vmovl.u8 q6, d13 @converting row 2H in source 1 to 16-bit + vmovl.u8 q7, d15 @converting row 2H in source 2 to 16-bit + + vmul.s16 q14, q14, q1 @weight 1 mult. for row 2L + vmla.s16 q14, q15, q2 @weight 2 mult. for row 2L + vmovl.u8 q13, d16 @converting row 3L in source 1 to 16-bit + vmovl.u8 q5, d18 @converting row 3L in source 2 to 16-bit + + vmul.s16 q6, q6, q1 @weight 1 mult. for row 2H + vmla.s16 q6, q7, q2 @weight 2 mult. for row 2H + vmovl.u8 q8, d17 @converting row 3H in source 1 to 16-bit + vmovl.u8 q9, d19 @converting row 3H in source 2 to 16-bit + + vmul.s16 q13, q13, q1 @weight 1 mult. for row 3L + vmla.s16 q13, q5, q2 @weight 2 mult. for row 3L + vmovl.u8 q15, d20 @converting row 4L in source 1 to 16-bit + vmovl.u8 q7, d22 @converting row 4L in source 2 to 16-bit + + vmul.s16 q8, q8, q1 @weight 1 mult. for row 3H + vmla.s16 q8, q9, q2 @weight 2 mult. for row 3H + vmovl.u8 q10, d21 @converting row 4H in source 1 to 16-bit + vmovl.u8 q11, d23 @converting row 4H in source 2 to 16-bit + + vmul.s16 q15, q15, q1 @weight 1 mult. for row 4L + vmla.s16 q15, q7, q2 @weight 2 mult. for row 4L + vrshl.s16 q12, q12, q0 @rounds off the weighted samples from row 1L + + vmul.s16 q10, q10, q1 @weight 1 mult. for row 4H + vmla.s16 q10, q11, q2 @weight 2 mult. for row 4H + vrshl.s16 q4, q4, q0 @rounds off the weighted samples from row 1H + + vrshl.s16 q14, q14, q0 @rounds off the weighted samples from row 2L + vadd.s16 q12, q12, q3 @adding offset for row 1L + vrshl.s16 q6, q6, q0 @rounds off the weighted samples from row 2H + vadd.s16 q4, q4, q3 @adding offset for row 1H + vrshl.s16 q13, q13, q0 @rounds off the weighted samples from row 3L + vadd.s16 q14, q14, q3 @adding offset for row 2L + vrshl.s16 q8, q8, q0 @rounds off the weighted samples from row 3H + vadd.s16 q6, q6, q3 @adding offset for row 2H + vrshl.s16 q15, q15, q0 @rounds off the weighted samples from row 4L + vadd.s16 q13, q13, q3 @adding offset for row 3L + vrshl.s16 q10, q10, q0 @rounds off the weighted samples from row 4H + vadd.s16 q8, q8, q3 @adding offset for row 3H + + vqmovun.s16 d10, q12 @saturating row 1L to unsigned 8-bit + vadd.s16 q15, q15, q3 @adding offset for row 4L + vqmovun.s16 d11, q4 @saturating row 1H to unsigned 8-bit + vadd.s16 q10, q10, q3 @adding offset for row 4H + + vqmovun.s16 d18, q14 @saturating row 2L to unsigned 8-bit + vqmovun.s16 d19, q6 @saturating row 2H to unsigned 8-bit + vqmovun.s16 d14, q13 @saturating row 3L to unsigned 8-bit + vqmovun.s16 d15, q8 @saturating row 3H to unsigned 8-bit + vst1.8 {q5}, [r2], r5 @store row 1 in destination + vqmovun.s16 d22, q15 @saturating row 4L to unsigned 8-bit + vqmovun.s16 d23, q10 @saturating row 4H to unsigned 8-bit + + vst1.8 {q9}, [r2], r5 @store row 2 in destination + subs r11, r11, #4 @decrement ht by 4 + vst1.8 {q7}, [r2], r5 @store row 3 in destination + vst1.8 {q11}, [r2], r5 @store row 4 in destination + + bgt loop_8_uv @if greater than 0 repeat the loop again + +end_loops_uv: + + vpop {d8-d15} + ldmfd sp!, {r4-r12, r15} @Reload the registers from sp + + diff --git a/common/arm/ih264_weighted_pred_a9q.s b/common/arm/ih264_weighted_pred_a9q.s new file mode 100755 index 0000000..1ce94d0 --- /dev/null +++ b/common/arm/ih264_weighted_pred_a9q.s @@ -0,0 +1,479 @@ +@/****************************************************************************** +@ * +@ * Copyright (C) 2015 The Android Open Source Project +@ * +@ * Licensed under the Apache License, Version 2.0 (the "License"); +@ * you may not use this file except in compliance with the License. +@ * You may obtain a copy of the License at: +@ * +@ * http://www.apache.org/licenses/LICENSE-2.0 +@ * +@ * Unless required by applicable law or agreed to in writing, software +@ * distributed under the License is distributed on an "AS IS" BASIS, +@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ * See the License for the specific language governing permissions and +@ * limitations under the License. +@ * +@ ***************************************************************************** +@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +@*/ +@/** +@****************************************************************************** +@* @file +@* ih264_weighted_pred_a9q.s +@* +@* @brief +@* Contains function definitions for weighted prediction. +@* +@* @author +@* Kaushik Senthoor R +@* +@* @par List of Functions: +@* +@* - ih264_weighted_pred_luma_a9q() +@* - ih264_weighted_pred_chroma_a9q() +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ +@******************************************************************************* +@* @function +@* ih264_weighted_pred_luma_a9q() +@* +@* @brief +@* This routine performs the default weighted prediction as described in sec +@* 8.4.2.3.2 titled "Weighted sample prediction process" for luma. +@* +@* @par Description: +@* This function gets a ht x wd block, calculates the weighted sample, rounds +@* off, adds offset and stores it in the destination block. +@* +@* @param[in] pu1_src: +@* UWORD8 Pointer to the buffer containing the input block. +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination where the output block is stored. +@* +@* @param[in] src_strd +@* Stride of the input buffer +@* +@* @param[in] dst_strd +@* Stride of the destination buffer +@* +@* @param[in] log_wd +@* number of bits to be rounded off +@* +@* @param[in] wt +@* weight for the weighted prediction +@* +@* @param[in] ofst +@* offset used after rounding off +@* +@* @param[in] ht +@* integer height of the array +@* +@* @param[in] wd +@* integer width of the array +@* +@* @returns +@* None +@* +@* @remarks +@* (ht,wd) can be (4,4), (4,8), (8,4), (8,8), (8,16), (16,8) or (16,16). +@* +@******************************************************************************* +@*/ +@void ih264_weighted_pred_luma_a9q(UWORD8 *pu1_src, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd, +@ WORD32 dst_strd, +@ WORD32 log_wd, +@ WORD32 wt, +@ WORD32 ofst, +@ WORD32 ht, +@ WORD32 wd) +@ +@**************Variables Vs Registers***************************************** +@ r0 => pu1_src +@ r1 => pu1_dst +@ r2 => src_strd +@ r3 => dst_strd +@ [sp] => log_wd (r4) +@ [sp+4] => wt (r5) +@ [sp+8] => ofst (r6) +@ [sp+12] => ht (r7) +@ [sp+16] => wd (r8) +@ +.text +.p2align 2 + + .global ih264_weighted_pred_luma_a9q + +ih264_weighted_pred_luma_a9q: + + stmfd sp!, {r4-r9, r14} @stack stores the values of the arguments + ldr r5, [sp, #32] @Load wt + ldr r4, [sp, #28] @Load log_wd in r4 + ldr r6, [sp, #36] @Load ofst + ldr r7, [sp, #40] @Load ht + ldr r8, [sp, #44] @Load wd + vpush {d8-d15} + + vdup.16 d2, r5 @D2 = wt (16-bit) + rsb r9, r4, #0 @r9 = -log_wd + vdup.8 d3, r6 @D3 = ofst (8-bit) + cmp r8, #16 @check if wd is 16 + vdup.16 q0, r9 @Q0 = -log_wd (16-bit) + beq loop_16 @branch if wd is 16 + + cmp r8, #8 @check if wd is 8 + beq loop_8 @branch if wd is 8 + +loop_4: @each iteration processes four rows + + vld1.32 d4[0], [r0], r2 @load row 1 in source + vld1.32 d4[1], [r0], r2 @load row 2 in source + vld1.32 d6[0], [r0], r2 @load row 3 in source + vld1.32 d6[1], [r0], r2 @load row 4 in source + + vmovl.u8 q2, d4 @converting rows 1,2 to 16-bit + vmovl.u8 q3, d6 @converting rows 3,4 to 16-bit + + vmul.s16 q2, q2, d2[0] @weight mult. for rows 1,2 + vmul.s16 q3, q3, d2[0] @weight mult. for rows 3,4 + + subs r7, r7, #4 @decrement ht by 4 + vrshl.s16 q2, q2, q0 @rounds off the weighted samples from rows 1,2 + vrshl.s16 q3, q3, q0 @rounds off the weighted samples from rows 3,4 + + vaddw.s8 q2, q2, d3 @adding offset for rows 1,2 + vaddw.s8 q3, q3, d3 @adding offset for rows 3,4 + + vqmovun.s16 d4, q2 @saturating rows 1,2 to unsigned 8-bit + vqmovun.s16 d6, q3 @saturating rows 3,4 to unsigned 8-bit + + vst1.32 d4[0], [r1], r3 @store row 1 in destination + vst1.32 d4[1], [r1], r3 @store row 2 in destination + vst1.32 d6[0], [r1], r3 @store row 3 in destination + vst1.32 d6[1], [r1], r3 @store row 4 in destination + + bgt loop_4 @if greater than 0 repeat the loop again + + b end_loops + +loop_8: @each iteration processes four rows + + vld1.8 d4, [r0], r2 @load row 1 in source + vld1.8 d6, [r0], r2 @load row 2 in source + vld1.8 d8, [r0], r2 @load row 3 in source + vmovl.u8 q2, d4 @converting row 1 to 16-bit + vld1.8 d10, [r0], r2 @load row 4 in source + vmovl.u8 q3, d6 @converting row 2 to 16-bit + + vmovl.u8 q4, d8 @converting row 3 to 16-bit + vmul.s16 q2, q2, d2[0] @weight mult. for row 1 + vmovl.u8 q5, d10 @converting row 4 to 16-bit + vmul.s16 q3, q3, d2[0] @weight mult. for row 2 + vmul.s16 q4, q4, d2[0] @weight mult. for row 3 + vmul.s16 q5, q5, d2[0] @weight mult. for row 4 + + vrshl.s16 q2, q2, q0 @rounds off the weighted samples from row 1 + vrshl.s16 q3, q3, q0 @rounds off the weighted samples from row 2 + vrshl.s16 q4, q4, q0 @rounds off the weighted samples from row 3 + vaddw.s8 q2, q2, d3 @adding offset for row 1 + vrshl.s16 q5, q5, q0 @rounds off the weighted samples from row 4 + vaddw.s8 q3, q3, d3 @adding offset for row 2 + + vaddw.s8 q4, q4, d3 @adding offset for row 3 + vqmovun.s16 d4, q2 @saturating row 1 to unsigned 8-bit + vaddw.s8 q5, q5, d3 @adding offset for row 4 + vqmovun.s16 d6, q3 @saturating row 2 to unsigned 8-bit + vqmovun.s16 d8, q4 @saturating row 3 to unsigned 8-bit + vqmovun.s16 d10, q5 @saturating row 4 to unsigned 8-bit + + vst1.8 d4, [r1], r3 @store row 1 in destination + vst1.8 d6, [r1], r3 @store row 2 in destination + subs r7, r7, #4 @decrement ht by 4 + vst1.8 d8, [r1], r3 @store row 3 in destination + vst1.8 d10, [r1], r3 @store row 4 in destination + + bgt loop_8 @if greater than 0 repeat the loop again + + b end_loops + +loop_16: @each iteration processes two rows + + vld1.8 {q2}, [r0], r2 @load row 1 in source + vld1.8 {q3}, [r0], r2 @load row 2 in source + vmovl.u8 q6, d4 @converting row 1L to 16-bit + vld1.8 {q4}, [r0], r2 @load row 3 in source + vmovl.u8 q7, d5 @converting row 1H to 16-bit + vld1.8 {q5}, [r0], r2 @load row 4 in source + + vmovl.u8 q8, d6 @converting row 2L to 16-bit + vmul.s16 q6, q6, d2[0] @weight mult. for row 1L + vmovl.u8 q9, d7 @converting row 2H to 16-bit + vmul.s16 q7, q7, d2[0] @weight mult. for row 1H + vmovl.u8 q10, d8 @converting row 3L to 16-bit + vmul.s16 q8, q8, d2[0] @weight mult. for row 2L + vmovl.u8 q11, d9 @converting row 3H to 16-bit + vmul.s16 q9, q9, d2[0] @weight mult. for row 2H + vmovl.u8 q12, d10 @converting row 4L to 16-bit + vmul.s16 q10, q10, d2[0] @weight mult. for row 3L + vmovl.u8 q13, d11 @converting row 4H to 16-bit + vmul.s16 q11, q11, d2[0] @weight mult. for row 3H + + vmul.s16 q12, q12, d2[0] @weight mult. for row 4L + vrshl.s16 q6, q6, q0 @rounds off the weighted samples from row 1L + vmul.s16 q13, q13, d2[0] @weight mult. for row 4H + + vrshl.s16 q7, q7, q0 @rounds off the weighted samples from row 1H + vrshl.s16 q8, q8, q0 @rounds off the weighted samples from row 2L + vaddw.s8 q6, q6, d3 @adding offset for row 1L + vrshl.s16 q9, q9, q0 @rounds off the weighted samples from row 2H + vaddw.s8 q7, q7, d3 @adding offset for row 1H + vqmovun.s16 d4, q6 @saturating row 1L to unsigned 8-bit + vrshl.s16 q10, q10, q0 @rounds off the weighted samples from row 3L + vaddw.s8 q8, q8, d3 @adding offset for row 2L + vqmovun.s16 d5, q7 @saturating row 1H to unsigned 8-bit + vrshl.s16 q11, q11, q0 @rounds off the weighted samples from row 3H + vaddw.s8 q9, q9, d3 @adding offset for row 2H + vqmovun.s16 d6, q8 @saturating row 2L to unsigned 8-bit + vrshl.s16 q12, q12, q0 @rounds off the weighted samples from row 4L + vaddw.s8 q10, q10, d3 @adding offset for row 3L + vqmovun.s16 d7, q9 @saturating row 2H to unsigned 8-bit + vrshl.s16 q13, q13, q0 @rounds off the weighted samples from row 4H + vaddw.s8 q11, q11, d3 @adding offset for row 3H + + vqmovun.s16 d8, q10 @saturating row 3L to unsigned 8-bit + vaddw.s8 q12, q12, d3 @adding offset for row 4L + vqmovun.s16 d9, q11 @saturating row 3H to unsigned 8-bit + vaddw.s8 q13, q13, d3 @adding offset for row 4H + + vqmovun.s16 d10, q12 @saturating row 4L to unsigned 8-bit + vst1.8 {q2}, [r1], r3 @store row 1 in destination + vqmovun.s16 d11, q13 @saturating row 4H to unsigned 8-bit + vst1.8 {q3}, [r1], r3 @store row 2 in destination + subs r7, r7, #4 @decrement ht by 4 + vst1.8 {q4}, [r1], r3 @store row 3 in destination + vst1.8 {q5}, [r1], r3 @store row 4 in destination + + bgt loop_16 @if greater than 0 repeat the loop again + +end_loops: + + vpop {d8-d15} + ldmfd sp!, {r4-r9, r15} @Reload the registers from sp + + +@******************************************************************************* +@* @function +@* ih264_weighted_pred_chroma_a9q() +@* +@* @brief +@* This routine performs the default weighted prediction as described in sec +@* 8.4.2.3.2 titled "Weighted sample prediction process" for chroma. +@* +@* @par Description: +@* This function gets a ht x wd block, calculates the weighted sample, rounds +@* off, adds offset and stores it in the destination block for U and V. +@* +@* @param[in] pu1_src: +@* UWORD8 Pointer to the buffer containing the input block. +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination where the output block is stored. +@* +@* @param[in] src_strd +@* Stride of the input buffer +@* +@* @param[in] dst_strd +@* Stride of the destination buffer +@* +@* @param[in] log_wd +@* number of bits to be rounded off +@* +@* @param[in] wt +@* weights for the weighted prediction for U and V +@* +@* @param[in] ofst +@* offsets used after rounding off for U and V +@* +@* @param[in] ht +@* integer height of the array +@* +@* @param[in] wd +@* integer width of the array +@* +@* @returns +@* None +@* +@* @remarks +@* (ht,wd) can be (2,2), (2,4), (4,2), (4,4), (4,8), (8,4) or (8,8). +@* +@******************************************************************************* +@*/ +@void ih264_weighted_pred_chroma_a9q(UWORD8 *pu1_src, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd, +@ WORD32 dst_strd, +@ WORD32 log_wd, +@ WORD32 wt, +@ WORD32 ofst, +@ WORD32 ht, +@ WORD32 wd) +@ +@**************Variables Vs Registers***************************************** +@ r0 => pu1_src +@ r1 => pu1_dst +@ r2 => src_strd +@ r3 => dst_strd +@ [sp] => log_wd (r4) +@ [sp+4] => wt (r5) +@ [sp+8] => ofst (r6) +@ [sp+12] => ht (r7) +@ [sp+16] => wd (r8) +@ + + + .global ih264_weighted_pred_chroma_a9q + +ih264_weighted_pred_chroma_a9q: + + stmfd sp!, {r4-r9, r14} @stack stores the values of the arguments + + ldr r4, [sp, #28] @Load log_wd in r4 + ldr r5, [sp, #32] @Load wt = {wt_u (16-bit), wt_v (16-bit)} + ldr r6, [sp, #36] @Load ofst = {ofst_u (8-bit), ofst_v (8-bit)} + ldr r8, [sp, #44] @Load wd + + rsb r9, r4, #0 @r9 = -log_wd + vdup.32 q1, r5 @Q1 = {wt_u (16-bit), wt_v (16-bit)} + ldr r7, [sp, #40] @Load ht + vpush {d8-d15} + vdup.16 d4, r6 @D4 = {ofst_u (8-bit), ofst_v (8-bit)} + cmp r8, #8 @check if wd is 8 + vdup.16 q0, r9 @Q0 = -log_wd (16-bit) + beq loop_8_uv @branch if wd is 8 + + cmp r8, #4 @check if ws is 4 + beq loop_4_uv @branch if wd is 4 + +loop_2_uv: @each iteration processes two rows + + vld1.32 d6[0], [r0], r2 @load row 1 in source + vld1.32 d6[1], [r0], r2 @load row 2 in source + + vmovl.u8 q3, d6 @converting rows 1,2 to 16-bit + + vmul.s16 q3, q3, q1 @weight mult. for rows 1,2 + + vrshl.s16 q3, q3, q0 @rounds off the weighted samples from rows 1,2 + + vaddw.s8 q3, q3, d4 @adding offset for rows 1,2 + + vqmovun.s16 d6, q3 @saturating rows 1,2 to unsigned 8-bit + + subs r7, r7, #2 @decrement ht by 2 + vst1.32 d6[0], [r1], r3 @store row 1 in destination + vst1.32 d6[1], [r1], r3 @store row 2 in destination + + bgt loop_2_uv @if greater than 0 repeat the loop again + + b end_loops_uv + +loop_4_uv: @each iteration processes two rows + + vld1.8 d6, [r0], r2 @load row 1 in source + vld1.8 d8, [r0], r2 @load row 2 in source + + vmovl.u8 q3, d6 @converting row 1 to 16-bit + vmovl.u8 q4, d8 @converting row 2 to 16-bit + + vmul.s16 q3, q3, q1 @weight mult. for row 1 + vmul.s16 q4, q4, q1 @weight mult. for row 2 + + subs r7, r7, #2 @decrement ht by 2 + vrshl.s16 q3, q3, q0 @rounds off the weighted samples from row 1 + vrshl.s16 q4, q4, q0 @rounds off the weighted samples from row 2 + + vaddw.s8 q3, q3, d4 @adding offset for row 1 + vaddw.s8 q4, q4, d4 @adding offset for row 2 + + vqmovun.s16 d6, q3 @saturating row 1 to unsigned 8-bit + vqmovun.s16 d8, q4 @saturating row 2 to unsigned 8-bit + + vst1.8 d6, [r1], r3 @store row 1 in destination + vst1.8 d8, [r1], r3 @store row 2 in destination + + bgt loop_4_uv @if greater than 0 repeat the loop again + + b end_loops_uv + +loop_8_uv: @each iteration processes two rows + + vld1.8 {q3}, [r0], r2 @load row 1 in source + vld1.8 {q4}, [r0], r2 @load row 2 in source + vmovl.u8 q7, d6 @converting row 1L to 16-bit + vld1.8 {q5}, [r0], r2 @load row 3 in source + vmovl.u8 q8, d7 @converting row 1H to 16-bit + vld1.8 {q6}, [r0], r2 @load row 4 in source + + vmul.s16 q7, q7, q1 @weight mult. for row 1L + vmovl.u8 q9, d8 @converting row 2L to 16-bit + vmul.s16 q8, q8, q1 @weight mult. for row 1H + vmovl.u8 q10, d9 @converting row 2H to 16-bit + vmul.s16 q9, q9, q1 @weight mult. for row 2L + vmovl.u8 q11, d10 @converting row 3L to 16-bit + vmul.s16 q10, q10, q1 @weight mult. for row 2H + vmovl.u8 q12, d11 @converting row 3H to 16-bit + vmul.s16 q11, q11, q1 @weight mult. for row 3L + vmovl.u8 q13, d12 @converting row 4L to 16-bit + vmul.s16 q12, q12, q1 @weight mult. for row 3H + vmovl.u8 q14, d13 @converting row 4H to 16-bit + + vmul.s16 q13, q13, q1 @weight mult. for row 4L + vrshl.s16 q7, q7, q0 @rounds off the weighted samples from row 1L + vmul.s16 q14, q14, q1 @weight mult. for row 4H + + vrshl.s16 q8, q8, q0 @rounds off the weighted samples from row 1H + vrshl.s16 q9, q9, q0 @rounds off the weighted samples from row 2L + vaddw.s8 q7, q7, d4 @adding offset for row 1L + vrshl.s16 q10, q10, q0 @rounds off the weighted samples from row 2H + vaddw.s8 q8, q8, d4 @adding offset for row 1H + vqmovun.s16 d6, q7 @saturating row 1L to unsigned 8-bit + vrshl.s16 q11, q11, q0 @rounds off the weighted samples from row 3L + vaddw.s8 q9, q9, d4 @adding offset for row 2L + vqmovun.s16 d7, q8 @saturating row 1H to unsigned 8-bit + vrshl.s16 q12, q12, q0 @rounds off the weighted samples from row 3H + vaddw.s8 q10, q10, d4 @adding offset for row 2H + vqmovun.s16 d8, q9 @saturating row 2L to unsigned 8-bit + vrshl.s16 q13, q13, q0 @rounds off the weighted samples from row 4L + vaddw.s8 q11, q11, d4 @adding offset for row 3L + vqmovun.s16 d9, q10 @saturating row 2H to unsigned 8-bit + vrshl.s16 q14, q14, q0 @rounds off the weighted samples from row 4H + vaddw.s8 q12, q12, d4 @adding offset for row 3H + + vqmovun.s16 d10, q11 @saturating row 3L to unsigned 8-bit + vaddw.s8 q13, q13, d4 @adding offset for row 4L + vqmovun.s16 d11, q12 @saturating row 3H to unsigned 8-bit + vaddw.s8 q14, q14, d4 @adding offset for row 4H + + vqmovun.s16 d12, q13 @saturating row 4L to unsigned 8-bit + vst1.8 {q3}, [r1], r3 @store row 1 in destination + vqmovun.s16 d13, q14 @saturating row 4H to unsigned 8-bit + vst1.8 {q4}, [r1], r3 @store row 2 in destination + subs r7, r7, #4 @decrement ht by 4 + vst1.8 {q5}, [r1], r3 @store row 3 in destination + vst1.8 {q6}, [r1], r3 @store row 4 in destination + + bgt loop_8_uv @if greater than 0 repeat the loop again + +end_loops_uv: + + vpop {d8-d15} + ldmfd sp!, {r4-r9, r15} @Reload the registers from sp + + diff --git a/common/armv8/ih264_deblk_chroma_av8.s b/common/armv8/ih264_deblk_chroma_av8.s new file mode 100755 index 0000000..3021556 --- /dev/null +++ b/common/armv8/ih264_deblk_chroma_av8.s @@ -0,0 +1,585 @@ +//****************************************************************************** +//* +//* Copyright (C) 2015 The Android Open Source Project +//* +//* Licensed under the Apache License, Version 2.0 (the "License"); +//* you may not use this file except in compliance with the License. +//* You may obtain a copy of the License at: +//* +//* http://www.apache.org/licenses/LICENSE-2.0 +//* +//* Unless required by applicable law or agreed to in writing, software +//* distributed under the License is distributed on an "AS IS" BASIS, +//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//* See the License for the specific language governing permissions and +//* limitations under the License. +//* +//***************************************************************************** +//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +//*/ +///*****************************************************************************/ +///* */ +///* File Name : ih264_deblk_chroma_av8.s */ +///* */ +///* Description : Contains function definitions for deblocking luma */ +///* edge. Functions are coded in NEON assembly and can */ +///* be compiled using ARM RVDS. */ +///* */ +///* List of Functions : ih264_deblk_chroma_vert_bs4_av8() */ +///* ih264_deblk_chroma_vert_bslt4_av8() */ +///* ih264_deblk_chroma_horz_bs4_av8() */ +///* ih264_deblk_chroma_horz_bslt4_av8() */ +///* Issues / Problems : None */ +///* */ +///* Revision History : */ +///* */ +///* DD MM YYYY Author(s) Changes (Describe the changes made) */ +///* 28 11 2013 Ittiam Draft */ +///*****************************************************************************/ + + +.text +.p2align 2 +.include "ih264_neon_macros.s" + +///** +//******************************************************************************* +//* +//* @brief +//* Performs filtering of a chroma block horizontal edge when the +//* boundary strength is set to 4 in high profile +//* +//* @par Description: +//* This operation is described in Sec. 8.7.2.4 under the title +//* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264. +//* +//* @param[in] x0 - pu1_src +//* Pointer to the src sample q0 +//* +//* @param[in] x1 - src_strd +//* Source stride +//* +//* @param[in] x2 - alpha_cb +//* Alpha Value for the boundary in U +//* +//* @param[in] x3 - beta_cb +//* Beta Value for the boundary in U +//* +//* @param[in] sp(0) - alpha_cr +//* Alpha Value for the boundary in V +//* +//* @param[in] sp(4) - beta_cr +//* Beta Value for the boundary in V +//* +//* @returns +//* None +//* +//* @remarks +//* None +//* +//******************************************************************************* +//*/ + + .global ih264_deblk_chroma_horz_bs4_av8 + +ih264_deblk_chroma_horz_bs4_av8: + + // STMFD sp!,{x4-x6,x14} // + push_v_regs + stp x19, x20, [sp, #-16]! + mov x6, x5 + mov x5, x4 + sub x0, x0, x1, lsl #1 //x0 = uc_edgePixel pointing to p1 of chroma + ld2 {v6.8b, v7.8b}, [x0], x1 //D6 = p1u , D7 = p1v + mov x4, x0 //Keeping a backup of the pointer p0 of chroma + ld2 {v4.8b, v5.8b}, [x0], x1 //D4 = p0u , D5 = p0v + dup v20.8b, w2 //D20 contains alpha_cb + dup v21.8b, w5 //D21 contains alpha_cr + mov v20.d[1], v21.d[0] + ld2 {v0.8b, v1.8b}, [x0], x1 //D0 = q0u , D1 = q0v + uaddl v8.8h, v6.8b, v0.8b // + uaddl v10.8h, v7.8b, v1.8b //Q4,Q5 = q0 + p1 + movi v31.8b, #2 // + ld2 {v2.8b, v3.8b}, [x0] //D2 = q1u , D3 = q1v + mov v0.d[1], v1.d[0] + mov v2.d[1], v3.d[0] + mov v4.d[1], v5.d[0] + mov v6.d[1], v7.d[0] + uabd v26.16b, v6.16b , v4.16b //Q13 = ABS(p1 - p0) + umlal v8.8h, v2.8b, v31.8b // + umlal v10.8h, v3.8b, v31.8b //Q5,Q4 = (X2(q1U) + q0U + p1U) + uabd v22.16b, v4.16b , v0.16b //Q11 = ABS(p0 - q0) + uabd v24.16b, v2.16b , v0.16b //Q12 = ABS(q1 - q0) + uaddl v14.8h, v4.8b, v2.8b // + uaddl v28.8h, v5.8b, v3.8b //Q14,Q7 = P0 + Q1 + dup v16.8b, w3 //D16 contains beta_cb + dup v17.8b, w6 //D17 contains beta_cr + mov v16.d[1], v17.d[0] + umlal v14.8h, v6.8b, v31.8b // + umlal v28.8h, v7.8b, v31.8b //Q14,Q7 = (X2(p1U) + p0U + q1U) + cmhs v18.16b, v22.16b, v20.16b + cmhs v24.16b, v24.16b, v16.16b + cmhs v26.16b, v26.16b, v16.16b + rshrn v8.8b, v8.8h, #2 // + rshrn v9.8b, v10.8h, #2 //Q4 = (X2(q1U) + q0U + p1U + 2) >> 2 + mov v8.d[1], v9.d[0] + orr v18.16b, v18.16b , v24.16b //Q9 = ( ABS(p0 - q0) >= Alpha ) | ( ABS(q1 - q0) >= Beta ) + rshrn v10.8b, v14.8h, #2 // + rshrn v11.8b, v28.8h, #2 //Q5 = (X2(p1U) + p0U + q1U + 2) >> 2 + mov v10.d[1], v11.d[0] + orr v18.16b, v18.16b , v26.16b //Q9 = ( ABS(p0 - q0) >= Alpha ) | ( ABS(q1 - q0) >= Beta ) | ( ABS(p1 - p0) >= Beta ) + bit v10.16b, v4.16b , v18.16b // + bit v8.16b, v0.16b , v18.16b // + mov v11.d[0], v10.d[1] + mov v9.d[0], v8.d[1] + st2 {v10.8b, v11.8b}, [x4], x1 // + st2 {v8.8b, v9.8b}, [x4] // + // LDMFD sp!,{x4-x6,pc} // + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + + +///** +//******************************************************************************* +//* +//* @brief +//* Performs filtering of a chroma block vertical edge when the +//* boundary strength is set to 4 in high profile +//* +//* @par Description: +//* This operation is described in Sec. 8.7.2.4 under the title +//* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264. +//* +//* @param[in] x0 - pu1_src +//* Pointer to the src sample q0 +//* +//* @param[in] x1 - src_strd +//* Source stride +//* +//* @param[in] x2 - alpha_cb +//* Alpha Value for the boundary in U +//* +//* @param[in] x3 - beta_cb +//* Beta Value for the boundary in U +//* +//* @param[in] sp(0) - alpha_cr +//* Alpha Value for the boundary in V +//* +//* @param[in] sp(4) - beta_cr +//* Beta Value for the boundary in V +//* +//* @returns +//* None +//* +//* @remarks +//* None +//* +//******************************************************************************* +//*/ + + .global ih264_deblk_chroma_vert_bs4_av8 + +ih264_deblk_chroma_vert_bs4_av8: + + // STMFD sp!,{x4,x5,x12,x14} + push_v_regs + stp x19, x20, [sp, #-16]! + + sub x0, x0, #4 //point x0 to p1u of row0. + mov x12, x0 //keep a back up of x0 for buffer write + + add x2, x2, x4, lsl #8 //x2 = (alpha_cr,alpha_cb) + add x3, x3, x5, lsl #8 //x3 = (beta_cr,beta_cb) + + ld4 {v0.h, v1.h, v2.h, v3.h}[0], [x0], x1 + ld4 {v0.h, v1.h, v2.h, v3.h}[1], [x0], x1 + ld4 {v0.h, v1.h, v2.h, v3.h}[2], [x0], x1 + ld4 {v0.h, v1.h, v2.h, v3.h}[3], [x0], x1 + + ld4 {v4.h, v5.h, v6.h, v7.h}[0], [x0], x1 + ld4 {v4.h, v5.h, v6.h, v7.h}[1], [x0], x1 + ld4 {v4.h, v5.h, v6.h, v7.h}[2], [x0], x1 + ld4 {v4.h, v5.h, v6.h, v7.h}[3], [x0], x1 + + mov v10.16b, v2.16b + mov v2.16b, v1.16b + mov v1.16b, v4.16b + mov v4.16b, v10.16b + mov v10.16b, v6.16b + mov v6.16b, v3.16b + mov v3.16b, v5.16b + mov v5.16b, v10.16b + + dup v22.8h, w2 //Q11 = alpha + dup v24.8h, w3 //Q12 = beta + movi v31.8b, #2 + + mov v0.d[1], v1.d[0] + mov v2.d[1], v3.d[0] + mov v4.d[1], v5.d[0] + mov v6.d[1], v7.d[0] + + uabd v8.16b, v2.16b , v4.16b //|p0-q0| + uabd v10.16b, v6.16b , v4.16b //|q1-q0| + uabd v12.16b, v0.16b , v2.16b //|p1-p0| + uaddl v14.8h, v2.8b, v6.8b + uaddl v16.8h, v3.8b, v7.8b //(p0 + q1) + cmhi v8.16b, v22.16b , v8.16b //|p0-q0| < alpha ? + cmhi v10.16b, v24.16b , v10.16b //|q1-q0| < beta ? + cmhi v12.16b, v24.16b , v12.16b //|p1-p0| < beta ? + umlal v14.8h, v0.8b, v31.8b + umlal v16.8h, v1.8b, v31.8b //2*p1 + (p0 + q1) + uaddl v18.8h, v0.8b, v4.8b + uaddl v20.8h, v1.8b, v5.8b //(p1 + q0) + and v8.16b, v8.16b , v10.16b //|p0-q0| < alpha && |q1-q0| < beta + umlal v18.8h, v6.8b, v31.8b + umlal v20.8h, v7.8b, v31.8b //2*q1 + (p1 + q0) + + rshrn v14.8b, v14.8h, #2 + rshrn v15.8b, v16.8h, #2 //(2*p1 + (p0 + q1) + 2) >> 2 + mov v14.d[1], v15.d[0] + and v8.16b, v8.16b , v12.16b //|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta + rshrn v18.8b, v18.8h, #2 + rshrn v19.8b, v20.8h, #2 //(2*q1 + (p1 + q0) + 2) >> 2 + mov v18.d[1], v19.d[0] + bit v2.16b, v14.16b , v8.16b + bit v4.16b, v18.16b , v8.16b + + mov v1.d[0], v0.d[1] + mov v3.d[0], v2.d[1] + mov v5.d[0], v4.d[1] + mov v7.d[0], v6.d[1] + + mov v10.16b, v1.16b + mov v1.16b, v2.16b + mov v2.16b, v4.16b + mov v4.16b, v10.16b + mov v10.16b, v3.16b + mov v3.16b, v6.16b + mov v6.16b, v5.16b + mov v5.16b, v10.16b + + st4 {v0.h, v1.h, v2.h, v3.h}[0], [x12], x1 + st4 {v0.h, v1.h, v2.h, v3.h}[1], [x12], x1 + st4 {v0.h, v1.h, v2.h, v3.h}[2], [x12], x1 + st4 {v0.h, v1.h, v2.h, v3.h}[3], [x12], x1 + + st4 {v4.h, v5.h, v6.h, v7.h}[0], [x12], x1 + st4 {v4.h, v5.h, v6.h, v7.h}[1], [x12], x1 + st4 {v4.h, v5.h, v6.h, v7.h}[2], [x12], x1 + st4 {v4.h, v5.h, v6.h, v7.h}[3], [x12], x1 + + // LDMFD sp!,{x4,x5,x12,pc} + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + + +///** +//******************************************************************************* +//* +//* @brief +//* Performs filtering of a chroma block horizontal edge for cases where the +//* boundary strength is less than 4 in high profile +//* +//* @par Description: +//* This operation is described in Sec. 8.7.2.4 under the title +//* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264. +//* +//* @param[in] x0 - pu1_src +//* Pointer to the src sample q0 +//* +//* @param[in] x1 - src_strd +//* Source stride +//* +//* @param[in] x2 - alpha_cb +//* Alpha Value for the boundary in U +//* +//* @param[in] x3 - beta_cb +//* Beta Value for the boundary in U +//* +//* @param[in] sp(0) - alpha_cr +//* Alpha Value for the boundary in V +//* +//* @param[in] sp(4) - beta_cr +//* Beta Value for the boundary in V +//* +//* @param[in] sp(8) - u4_bs +//* Packed Boundary strength array +//* +//* @param[in] sp(12) - pu1_cliptab_cb +//* tc0_table for U +//* +//* @param[in] sp(16) - pu1_cliptab_cr +//* tc0_table for V +//* +//* @returns +//* None +//* +//* @remarks +//* None +//* +//******************************************************************************* +//*/ + + .global ih264_deblk_chroma_horz_bslt4_av8 + +ih264_deblk_chroma_horz_bslt4_av8: + + // STMFD sp!,{x4-x9,x14} // + push_v_regs + stp x19, x20, [sp, #-16]! + mov x8, x7 + mov x7, x6 + ldr x9, [sp, #80] + sub x0, x0, x1, lsl #1 //x0 = uc_edgePixelU pointing to p1 of chroma U + rev w7, w7 // + mov v12.2s[0], w7 //D12[0] = ui_Bs + ld1 {v16.s}[0], [x8] //D16[0] contains cliptab_cb + ld1 {v17.s}[0], [x9] //D17[0] contains cliptab_cr + ld2 {v6.8b, v7.8b}, [x0], x1 //Q3=p1 + tbl v14.8b, {v16.16b}, v12.8b //Retreiving cliptab values for U + tbl v28.8b, {v17.16b}, v12.8b //Retrieving cliptab values for V + uxtl v12.8h, v12.8b //Q6 = uc_Bs in each 16 bit scalar + mov x6, x0 //Keeping a backup of the pointer to chroma U P0 + ld2 {v4.8b, v5.8b}, [x0], x1 //Q2=p0 + movi v30.8b, #1 // + dup v20.8b, w2 //D20 contains alpha_cb + dup v21.8b, w4 //D21 contains alpha_cr + mov v20.d[1], v21.d[0] + ld2 {v0.8b, v1.8b}, [x0], x1 //Q0=q0 + uxtl v14.8h, v14.8b // + uxtl v28.8h, v28.8b // + mov v15.d[0], v28.d[0] //D14 has cliptab values for U, D15 for V + mov v14.d[1], v28.d[0] + ld2 {v2.8b, v3.8b}, [x0] //Q1=q1 + usubl v10.8h, v1.8b, v5.8b // + usubl v8.8h, v0.8b, v4.8b //Q5,Q4 = (q0 - p0) + mov v6.d[1], v7.d[0] + mov v4.d[1], v5.d[0] + uabd v26.16b, v6.16b , v4.16b //Q13 = ABS(p1 - p0) + shl v10.8h, v10.8h, #2 //Q5 = (q0 - p0)<<2 + mov v0.d[1], v1.d[0] + uabd v22.16b, v4.16b , v0.16b //Q11 = ABS(p0 - q0) + shl v8.8h, v8.8h, #2 //Q4 = (q0 - p0)<<2 + mov v14.d[1], v15.d[0] + sli v14.8h, v14.8h, #8 + mov v15.d[0], v14.d[1] + mov v2.d[1], v3.d[0] + uabd v24.16b, v2.16b , v0.16b //Q12 = ABS(q1 - q0) + cmhs v18.16b, v22.16b, v20.16b + usubl v20.8h, v6.8b, v2.8b //Q10 = (p1 - q1)L + usubl v6.8h, v7.8b, v3.8b //Q3 = (p1 - q1)H + dup v16.8b, w3 //Q8 contains beta_cb + dup v17.8b, w5 //Q8 contains beta_cr + mov v16.d[1], v17.d[0] + add v8.8h, v8.8h , v20.8h // + add v10.8h, v10.8h , v6.8h //Q5,Q4 = [ (q0 - p0)<<2 ] + (p1 - q1) + cmhs v24.16b, v24.16b, v16.16b + cmgt v12.4h, v12.4h, #0 + sqrshrn v8.8b, v8.8h, #3 // + sqrshrn v9.8b, v10.8h, #3 //Q4 = i_macro = (((q0 - p0)<<2) + (p1 - q1) + 4)>>3 + mov v8.d[1], v9.d[0] + add v14.8b, v14.8b , v30.8b //D14 = C = C0+1 for U + cmhs v26.16b, v26.16b, v16.16b + orr v18.16b, v18.16b , v24.16b //Q9 = ( ABS(p0 - q0) >= Alpha ) | ( ABS(q1 - q0) >= Beta ) + abs v6.16b, v8.16b //Q4 = ABS (i_macro) + add v15.8b, v15.8b , v30.8b //D15 = C = C0+1 for V + mov v14.d[1], v15.d[0] + mov v13.8b, v12.8b + mov v12.d[1], v13.d[0] // + orr v18.16b, v18.16b , v26.16b //Q9 = ( ABS(p0 - q0) >= Alpha ) | ( ABS(q1 - q0) >= Beta ) | ( ABS(p1 - p0) >= Beta ) + umin v14.16b, v6.16b , v14.16b //Q7 = delta = (ABS(i_macro) > C) ? C : ABS(i_macro) + bic v12.16b, v12.16b , v18.16b //final condition + cmge v8.16b, v8.16b, #0 + and v14.16b, v14.16b , v12.16b //Making delta zero in places where values shouldn be filterd + uqadd v16.16b, v4.16b , v14.16b //Q8 = p0 + delta + uqsub v4.16b, v4.16b , v14.16b //Q2 = p0 - delta + uqadd v18.16b, v0.16b , v14.16b //Q9 = q0 + delta + uqsub v0.16b, v0.16b , v14.16b //Q0 = q0 - delta + bif v16.16b, v4.16b , v8.16b //Q8 = (i_macro >= 0 ) ? (p0+delta) : (p0-delta) + bif v0.16b, v18.16b , v8.16b //Q0 = (i_macro >= 0 ) ? (q0-delta) : (q0+delta) + mov v17.d[0], v16.d[1] + mov v1.d[0], v0.d[1] + st2 {v16.8b, v17.8b}, [x6], x1 // + st2 {v0.8b, v1.8b}, [x6] // + + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + + + +///** +//******************************************************************************* +//* +//* @brief +//* Performs filtering of a chroma block vertical edge for cases where the +//* boundary strength is less than 4 in high profile +//* +//* @par Description: +//* This operation is described in Sec. 8.7.2.4 under the title +//* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264. +//* +//* @param[in] x0 - pu1_src +//* Pointer to the src sample q0 +//* +//* @param[in] x1 - src_strd +//* Source stride +//* +//* @param[in] x2 - alpha_cb +//* Alpha Value for the boundary in U +//* +//* @param[in] x3 - beta_cb +//* Beta Value for the boundary in U +//* +//* @param[in] sp(0) - alpha_cr +//* Alpha Value for the boundary in V +//* +//* @param[in] sp(4) - beta_cr +//* Beta Value for the boundary in V +//* +//* @param[in] sp(8) - u4_bs +//* Packed Boundary strength array +//* +//* @param[in] sp(12) - pu1_cliptab_cb +//* tc0_table for U +//* +//* @param[in] sp(16) - pu1_cliptab_cr +//* tc0_table for V +//* +//* @returns +//* None +//* +//* @remarks +//* None +//* +//******************************************************************************* +//*/ + + .global ih264_deblk_chroma_vert_bslt4_av8 + +ih264_deblk_chroma_vert_bslt4_av8: + + // STMFD sp!,{x4-x7,x10-x12,x14} + push_v_regs + stp x19, x20, [sp, #-16]! + mov x10, x7 + ldr x11, [sp, #80] //x6 = u4_bs + sub x0, x0, #4 //point x0 to p1u of row0. + add x2, x2, x4, lsl #8 + add x3, x3, x5, lsl #8 + mov x12, x0 //keep a back up of x0 for buffer write + ld4 {v0.h, v1.h, v2.h, v3.h}[0], [x0], x1 + ld4 {v0.h, v1.h, v2.h, v3.h}[1], [x0], x1 + ld4 {v0.h, v1.h, v2.h, v3.h}[2], [x0], x1 + ld4 {v0.h, v1.h, v2.h, v3.h}[3], [x0], x1 + + ld4 {v4.h, v5.h, v6.h, v7.h}[0], [x0], x1 + ld4 {v4.h, v5.h, v6.h, v7.h}[1], [x0], x1 + ld4 {v4.h, v5.h, v6.h, v7.h}[2], [x0], x1 + ld4 {v4.h, v5.h, v6.h, v7.h}[3], [x0], x1 + + mov v10.16b, v2.16b + mov v2.16b, v1.16b + mov v1.16b, v4.16b + mov v4.16b, v10.16b + mov v10.16b, v6.16b + mov v6.16b, v3.16b + mov v3.16b, v5.16b + mov v5.16b, v10.16b + dup v22.8h, w2 //Q11 = alpha + mov v2.d[1], v3.d[0] + mov v4.d[1], v5.d[0] + uabd v8.16b, v2.16b , v4.16b //|p0-q0| + dup v24.8h, w3 //Q12 = beta + mov v25.d[0], v24.d[1] + mov v6.d[1], v7.d[0] + mov v0.d[1], v1.d[0] + uabd v10.16b, v6.16b , v4.16b //|q1-q0| + uabd v12.16b, v0.16b , v2.16b //|p1-p0| + cmhi v8.16b, v22.16b , v8.16b //|p0-q0| < alpha ? + usubl v14.8h, v0.8b, v6.8b + cmhi v10.16b, v24.16b , v10.16b //|q1-q0| < beta ? + usubl v16.8h, v1.8b, v7.8b //(p1 - q1) + cmhi v12.16b, v24.16b , v12.16b //|p1-p0| < beta ? + usubl v18.8h, v4.8b, v2.8b + and v8.16b, v8.16b , v10.16b //|p0-q0| < alpha && |q1-q0| < beta + usubl v20.8h, v5.8b, v3.8b //(q0 - p0) + movi v28.8h, #4 + ld1 {v24.s}[0], [x10] //Load ClipTable for U + ld1 {v25.s}[0], [x11] //Load ClipTable for V + rev w6, w6 //Blocking strengths + and v8.16b, v8.16b , v12.16b //|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta + mov v10.s[0], w6 + mla v14.8h, v18.8h , v28.8h + mla v16.8h, v20.8h , v28.8h //4*(q0 - p0) + (p1 - q1) + uxtl v10.8h, v10.8b + sli v10.4h, v10.4h, #8 + tbl v12.8b, {v24.16b}, v10.8b //tC0 for U + tbl v13.8b, {v25.16b}, v10.8b //tC0 for V + zip1 v31.8b, v12.8b, v13.8b + zip2 v13.8b, v12.8b, v13.8b + mov v12.8b, v31.8b + mov v12.d[1], v13.d[0] + uxtl v10.4s, v10.4h + sli v10.4s, v10.4s, #16 + movi v24.16b, #1 + add v12.16b, v12.16b , v24.16b //tC0 + 1 + cmhs v10.16b, v10.16b , v24.16b + and v8.16b, v8.16b , v10.16b //|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta && u4_bs != 0 + // Q0 - Q3(inputs), + // Q4 (|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta && u4_bs != 0), + // Q6 (tC) + srshr v14.8h, v14.8h, #3 + srshr v16.8h, v16.8h, #3 //(((q0 - p0) << 2) + (p1 - q1) + 4) >> 3) + cmgt v18.8h, v14.8h , #0 + cmgt v20.8h, v16.8h , #0 + xtn v18.8b, v18.8h + xtn v19.8b, v20.8h //Q9 = sign(delta) + mov v18.d[1], v19.d[0] + abs v14.8h, v14.8h + abs v16.8h, v16.8h + xtn v14.8b, v14.8h + xtn v15.8b, v16.8h + mov v14.d[1], v15.d[0] + umin v14.16b, v14.16b , v12.16b //Q7 = |delta| + uqadd v20.16b, v2.16b , v14.16b //p0+|delta| + uqadd v22.16b, v4.16b , v14.16b //q0+|delta| + uqsub v24.16b, v2.16b , v14.16b //p0-|delta| + uqsub v26.16b, v4.16b , v14.16b //q0-|delta| + bit v24.16b, v20.16b , v18.16b //p0 + delta + bit v22.16b, v26.16b , v18.16b //q0 - delta + bit v2.16b, v24.16b , v8.16b + bit v4.16b, v22.16b , v8.16b + mov v1.d[0], v0.d[1] + mov v3.d[0], v2.d[1] + mov v5.d[0], v4.d[1] + mov v7.d[0], v6.d[1] + mov v10.16b, v1.16b + mov v1.16b, v2.16b + mov v2.16b, v4.16b + mov v4.16b, v10.16b + mov v10.16b, v3.16b + mov v3.16b, v6.16b + mov v6.16b, v5.16b + mov v5.16b, v10.16b + st4 {v0.h, v1.h, v2.h, v3.h}[0], [x12], x1 + st4 {v0.h, v1.h, v2.h, v3.h}[1], [x12], x1 + st4 {v0.h, v1.h, v2.h, v3.h}[2], [x12], x1 + st4 {v0.h, v1.h, v2.h, v3.h}[3], [x12], x1 + + st4 {v4.h, v5.h, v6.h, v7.h}[0], [x12], x1 + st4 {v4.h, v5.h, v6.h, v7.h}[1], [x12], x1 + st4 {v4.h, v5.h, v6.h, v7.h}[2], [x12], x1 + st4 {v4.h, v5.h, v6.h, v7.h}[3], [x12], x1 + + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + diff --git a/common/armv8/ih264_deblk_luma_av8.s b/common/armv8/ih264_deblk_luma_av8.s new file mode 100755 index 0000000..bcdb03f --- /dev/null +++ b/common/armv8/ih264_deblk_luma_av8.s @@ -0,0 +1,1084 @@ +//****************************************************************************** +//* +//* Copyright (C) 2015 The Android Open Source Project +//* +//* Licensed under the Apache License, Version 2.0 (the "License"); +//* you may not use this file except in compliance with the License. +//* You may obtain a copy of the License at: +//* +//* http://www.apache.org/licenses/LICENSE-2.0 +//* +//* Unless required by applicable law or agreed to in writing, software +//* distributed under the License is distributed on an "AS IS" BASIS, +//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//* See the License for the specific language governing permissions and +//* limitations under the License. +//* +//***************************************************************************** +//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +//*/ +///*****************************************************************************/ +///* */ +///* File Name : ih264_deblk_luma_av8.s */ +///* */ +///* Description : Contains function definitions for deblocking luma */ +///* edge. Functions are coded in NEON assembly and can */ +///* be compiled using ARM RVDS. */ +///* */ +///* List of Functions : ih264_deblk_luma_vert_bs4_av8() */ +///* ih264_deblk_luma_vert_bslt4_av8() */ +///* ih264_deblk_luma_horz_bs4_av8() */ +///* ih264_deblk_luma_horz_bslt4_av8() */ +///* */ +///* Issues / Problems : None */ +///* */ +///* Revision History : */ +///* */ +///* DD MM YYYY Author(s) Changes (Describe the changes made) */ +///* 28 11 2013 Ittiam Draft */ +///* */ +///*****************************************************************************/ + + +.text +.p2align 2 +.include "ih264_neon_macros.s" + + + +///** +//******************************************************************************* +//* +//* @brief +//* Performs filtering of a luma block horizontal edge for cases where the +//* boundary strength is less than 4 +//* +//* @par Description: +//* This operation is described in Sec. 8.7.2.4 under the title +//* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264. +//* +//* @param[in] x0 - pu1_src +//* Pointer to the src sample q0 +//* +//* @param[in] x1 - src_strd +//* Source stride +//* +//* @param[in] x2 - alpha +//* Alpha Value for the boundary +//* +//* @param[in] x3 - beta +//* Beta Value for the boundary +//* +//* @param[in] sp(0) - u4_bs +//* Packed Boundary strength array +//* +//* @param[in] sp(4) - pu1_cliptab +//* tc0_table +//* +//* @returns +//* None +//* +//* @remarks +//* None +//* +//******************************************************************************* +//*/ + + .global ih264_deblk_luma_horz_bslt4_av8 + +ih264_deblk_luma_horz_bslt4_av8: + + // STMFD sp!,{x4-x7,x14} + push_v_regs + stp x19, x20, [sp, #-16]! + + //LDRD x4,x5,[SP,#0x14] //x4 = ui_Bs , x5 = *puc_ClpTab + sub x0, x0, x1, lsl #1 //x1 = uc_Horizonpad + sub x0, x0, x1 //x0 pointer to p2 + rev w4, w4 // + ld1 {v10.8b, v11.8b}, [x0], x1 //p2 values are loaded into q5 + mov v12.2s[0], w4 //d12[0] = ui_Bs + mov x6, x0 //keeping backup of pointer to p1 + ld1 {v8.8b, v9.8b}, [x0], x1 //p1 values are loaded into q4 + mov x7, x0 //keeping backup of pointer to p0 + ld1 {v6.8b, v7.8b}, [x0], x1 //p0 values are loaded into q3 + uxtl v12.8h, v12.8b //q6 = uc_Bs in each 16 bt scalar + ld1 {v0.8b, v1.8b}, [x0], x1 //q0 values are loaded into q0 + mov v10.d[1], v11.d[0] + mov v8.d[1], v9.d[0] + mov v6.d[1], v7.d[0] + uabd v26.16b, v8.16b, v6.16b + ld1 {v2.8b, v3.8b}, [x0], x1 //q1 values are loaded into q1 + mov v0.d[1], v1.d[0] + mov v2.d[1], v3.d[0] + uabd v22.16b, v6.16b, v0.16b + ld1 {v16.s}[0], [x5] //D16[0] contains cliptab + uabd v24.16b, v2.16b, v0.16b + ld1 {v4.8b, v5.8b}, [x0], x1 //q2 values are loaded into q2 + tbl v14.8b, {v16.16b}, v12.8b // + mov v4.d[1], v5.d[0] + dup v20.16b, w2 //Q10 contains alpha + dup v16.16b, w3 //Q8 contains beta + uxtl v12.4s, v12.4h // + uxtl v14.4s, v14.4h // + uabd v28.16b, v10.16b, v6.16b + uabd v30.16b, v4.16b, v0.16b + cmgt v12.4s, v12.4s, #0 + sli v14.4s, v14.4s, #8 + cmhs v18.16b, v22.16b, v20.16b + cmhs v24.16b, v24.16b, v16.16b + cmhs v26.16b, v26.16b, v16.16b + cmhi v20.16b, v16.16b , v28.16b //Q10=(Ap= Alpha ) | ( ABS(q1 - q0) >= Beta ) + usubl v30.8h, v1.8b, v7.8b // + usubl v24.8h, v0.8b, v6.8b //Q15,Q12 = (q0 - p0) + orr v18.16b, v18.16b , v26.16b //Q9 = ( ABS(p0 - q0) >= Alpha ) | ( ABS(q1 - q0) >= Beta ) | ( ABS(p1 - p0) >= Beta ) + usubl v28.8h, v8.8b, v2.8b //Q14 = (p1 - q1)L + shl v26.8h, v30.8h, #2 //Q13 = (q0 - p0)<<2 + shl v24.8h, v24.8h, #2 //Q12 = (q0 - p0)<<2 + usubl v30.8h, v9.8b, v3.8b //Q15 = (p1 - q1)H + bic v12.16b, v12.16b , v18.16b //final condition + add v24.8h, v24.8h , v28.8h // + add v26.8h, v26.8h , v30.8h //Q13,Q12 = [ (q0 - p0)<<2 ] + (p1 - q1) + sub v18.16b, v14.16b , v20.16b //Q9 = C0 + (Ap < Beta) + urhadd v16.16b, v6.16b , v0.16b //Q8 = ((p0+q0+1) >> 1) + mov v17.d[0], v16.d[1] + sqrshrn v24.8b, v24.8h, #3 // + sqrshrn v25.8b, v26.8h, #3 //Q12 = i_macro = (((q0 - p0)<<2) + (p1 - q1) + 4)>>3 + mov v24.d[1], v25.d[0] + sub v18.16b, v18.16b , v22.16b //Q9 = C0 + (Ap < Beta) + (Aq < Beta) + and v20.16b, v20.16b , v12.16b // + and v22.16b, v22.16b , v12.16b // + abs v26.16b, v24.16b //Q13 = ABS (i_macro) + uaddl v28.8h, v17.8b, v11.8b // + uaddl v10.8h, v16.8b, v10.8b //Q14,Q5 = p2 + (p0+q0+1)>>1 + uaddl v30.8h, v17.8b, v5.8b // + umin v18.16b, v26.16b , v18.16b //Q9 = delta = (ABS(i_macro) > C) ? C : ABS(i_macro) + ushll v26.8h, v9.8b, #1 // + uaddl v4.8h, v16.8b, v4.8b //Q15,Q2 = q2 + (p0+q0+1)>>1 + ushll v16.8h, v8.8b, #1 //Q13,Q8 = (p1<<1) + and v18.16b, v18.16b , v12.16b //Making delta zero in places where values shouldn be filterd + sub v28.8h, v28.8h , v26.8h //Q14,Q5 = [p2 + (p0+q0+1)>>1] - (p1<<1) + sub v10.8h, v10.8h , v16.8h // + ushll v16.8h, v2.8b, #1 // + ushll v26.8h, v3.8b, #1 //Q13,Q8 = (q1<<1) + sqshrn v29.8b, v28.8h, #1 // + sqshrn v28.8b, v10.8h, #1 //Q14 = i_macro_p1 + mov v28.d[1], v29.d[0] + sub v4.8h, v4.8h , v16.8h // + sub v30.8h, v30.8h , v26.8h //Q15,Q2 = [q2 + (p0+q0+1)>>1] - (q1<<1) + neg v26.16b, v14.16b //Q13 = -C0 + smin v28.16b, v28.16b , v14.16b //Q14 = min(C0,i_macro_p1) + cmge v24.16b, v24.16b, #0 + sqshrn v31.8b, v30.8h, #1 // + sqshrn v30.8b, v4.8h, #1 //Q15 = i_macro_q1 + mov v30.d[1], v31.d[0] + smax v28.16b, v28.16b , v26.16b //Q14 = max( - C0 , min(C0, i_macro_p1) ) + uqadd v16.16b, v6.16b , v18.16b //Q8 = p0 + delta + uqsub v6.16b, v6.16b , v18.16b //Q3 = p0 - delta + smin v30.16b, v30.16b , v14.16b //Q15 = min(C0,i_macro_q1) + and v28.16b, v20.16b , v28.16b //condition check Ap= 0 ) ? (p0+delta) : (p0-delta) + bif v0.16b, v14.16b , v24.16b //Q0 = (i_macro >= 0 ) ? (q0-delta) : (q0+delta) + add v28.16b, v28.16b , v8.16b // + and v30.16b, v22.16b , v30.16b //condition check Aq= Alpha + cmhs v14.16b, v14.16b , v2.16b //ABS(q1 - q0) >= Beta + cmhs v16.16b, v16.16b , v2.16b //ABS(q1 - q0) >= Beta + movi v20.16b, #2 + orr v18.16b, v18.16b , v14.16b //ABS(p0 - q0) >= Alpha || ABS(q1 - q0) >= Beta + ld1 {v14.8b, v15.8b}, [x0], x1 //load q2 to Q7, q0 = q0 + src_strd + mov v14.d[1] , v15.d[0] + orr v18.16b, v18.16b , v16.16b //ABS(p0 - q0) >= Alpha || ABS(q1 - q0) >= Beta || ABS(p1 - p0) >= Beta + usra v20.16b, v0.16b, #2 //alpha >>2 +2 + uabd v22.16b , v14.16b, v4.16b + uaddl v24.8h, v4.8b, v6.8b //p0+q0 L + uaddl v26.8h, v5.8b, v7.8b //p0+q0 H + cmhi v22.16b, v2.16b , v22.16b //Aq < Beta + cmhi v20.16b, v20.16b , v12.16b //(ABS(p0 - q0) <((Alpha >>2) + 2)) + // Deblock Filtering q0', q1', q2' + uaddw v28.8h, v24.8h , v8.8b //p0+q0+q1 L + uaddw v30.8h, v26.8h , v9.8b //p0+q0+q1 H + and v22.16b, v22.16b , v20.16b //(Aq < Beta && ABS(p0 - q0) <((Alpha >>2) + 2)) + // q0' if (Aq < Beta && ABS(p0 - q0) <((Alpha >>2) + 2)) TRUE + add v16.8h, v28.8h , v28.8h //2*(p0+q0+q1)L + add v0.8h, v30.8h , v30.8h //2*(p0+q0+q1)H + uaddw v16.8h, v16.8h , v14.8b //2*(p0+q0+q1)+q2 L + uaddw v0.8h, v0.8h , v15.8b //2*(p0+q0+q1)+q2 H + uaddw v16.8h, v16.8h , v10.8b //2*(p0+q0+q1)+q2 +p1 L + uaddw v0.8h, v0.8h , v11.8b //2*(p0+q0+q1)+q2 +p1 H + rshrn v12.8b, v16.8h, #3 //(2*(p0+q0+q1)+q2 +p1 +4)>> 3 L [q0'] + rshrn v13.8b, v0.8h, #3 //(2*(p0+q0+q1)+q2 +p1 +4)>> 3 H [q0'] + mov v12.d[1] , v13.d[0] + // q0" if (Aq < Beta && ABS(p0 - q0) <((Alpha >>2) + 2)) FALSE + uaddl v16.8h, v8.8b, v8.8b //2*q1 L + uaddl v0.8h, v9.8b, v9.8b //2*q1 H + uaddw v16.8h, v16.8h , v4.8b //2*q1+q0 L + uaddw v0.8h, v0.8h , v5.8b //2*q1+q0 H + uaddw v16.8h, v16.8h , v10.8b //2*q1+q0+p1 L + uaddw v0.8h, v0.8h , v11.8b //2*q1+q0+p1 H + rshrn v16.8b, v16.8h, #2 //(2*q1+q0+p1+2)>>2 L [q0"] + rshrn v17.8b, v0.8h, #2 //(2*q1+q0+p1+2)>>2 H [q0"] + mov v16.d[1] , v17.d[0] + uaddw v28.8h, v28.8h , v14.8b //p0+q0+q1+q2 L + uaddw v30.8h, v30.8h , v15.8b //p0+q0+q1+q2 H + ld1 {v0.8b, v1.8b}, [x0], x1 //load q3 to Q0, q0 = q0 + src_strd + mov v0.d[1] , v1.d[0] + bit v16.16b, v12.16b , v22.16b //choosing between q0' and q0" depending on condn + sub x0, x0, x1, lsl #2 //pointer to q0 + bic v22.16b, v22.16b , v18.16b //((ABS(p0 - q0) >= Alpha || ABS(q1 - q0) >= Beta || ABS(p1 - p0) >= Beta)) + // && (Aq < Beta && ABS(p0 - q0) <((Alpha >>2) + 2)) + rshrn v12.8b, v28.8h, #2 //(p0+q0+q1+q2+2)>>2 L [q1'] + rshrn v13.8b, v30.8h, #2 //(p0+q0+q1+q2+2)>>2 H [q1'] + mov v12.d[1] , v13.d[0] + bif v4.16b, v16.16b , v18.16b //choose q0 or filtered q0 + mov v5.d[0] , v4.d[1] + uaddl v16.8h, v14.8b, v0.8b //q2+q3,L + uaddl v0.8h, v15.8b, v1.8b //q2+q3,H + add v28.8h, v28.8h , v16.8h //p0+q0+q1+2*q2+q3 L + st1 {v4.8b, v5.8b}, [x0], x1 //store q0 + add v30.8h, v30.8h , v0.8h //p0+q0+q1+2*q2+q3 H + add v28.8h, v28.8h , v16.8h //p0+q0+q1+3*q2+2*q3 L + add v30.8h, v30.8h , v0.8h //p0+q0+q1+3*q2+2*q3 H + rshrn v0.8b, v28.8h, #3 //(p0+q0+q1+3*q2+2*q3+4)>>3 L [q2'] + rshrn v1.8b, v30.8h, #3 //(p0+q0+q1+3*q2+2*q3+4)>>3 H [q2'] + mov v0.d[1] , v1.d[0] + ld1 {v30.8b, v31.8b}, [x3] //load p2 to Q15 + mov v30.d[1] , v31.d[0] + bif v12.16b, v8.16b , v22.16b //choose q1 or filtered value of q1 + mov v13.d[0] , v12.d[1] + uabd v16.16b , v30.16b, v6.16b + uaddw v24.8h, v24.8h , v10.8b //p0+q0+p1 L + bif v0.16b, v14.16b , v22.16b //choose q2 or filtered q2 + mov v1.d[0] , v0.d[1] + uaddw v26.8h, v26.8h , v11.8b //p0+q0+p1 H + st1 {v12.8b, v13.8b}, [x0], x1 //store q1 + cmhi v16.16b, v2.16b , v16.16b //Ap < Beta + add v28.8h, v24.8h , v24.8h //2*(p0+q0+p1) L + add v4.8h, v26.8h , v26.8h //2*(p0+q0+p1) H + st1 {v0.8b, v1.8b}, [x0], x1 //store q2 + and v20.16b, v20.16b , v16.16b //((Ap < Beta) && (ABS(p0 - q0) <((Alpha >>2) + 2))) + uaddw v28.8h, v28.8h , v30.8b //2*(p0+q0+p1)+p2 l + uaddw v4.8h, v4.8h , v31.8b //2*(p0+q0+p1)+p2 H + uaddw v28.8h, v28.8h , v8.8b //2*(p0+q0+p1)+p2+q1 L + uaddw v4.8h, v4.8h , v9.8b //2*(p0+q0+p1)+p2+q1 H + rshrn v28.8b, v28.8h, #3 //(2*(p0+q0+p1)+p2+q1+4)>>3 L,p0' + rshrn v29.8b, v4.8h, #3 //(2*(p0+q0+p1)+p2+q1+4)>>3 H,p0' + mov v28.d[1] , v29.d[0] + movi v0.8b, #2 + movi v1.4h, #2 + uaddl v2.8h, v6.8b, v8.8b //p0+q1 L + umlal v2.8h, v10.8b, v0.8b //2*p1+p0+q1 L + uaddl v16.8h, v7.8b, v9.8b //p0+q1 H + umlal v16.8h, v11.8b, v0.8b //2*p1+p0+q1 H + uaddw v12.8h, v24.8h , v30.8b //(p0+q0+p1) +p2 L + ld1 {v24.8b, v25.8b}, [x2] //load p3,Q12 + mov v24.d[1] , v25.d[0] + uaddw v4.8h, v26.8h , v31.8b //(p0+q0+p1) +p2 H + uaddl v8.8h, v30.8b, v24.8b //p2+p3 L + rshrn v26.8b, v12.8h, #2 //((p0+q0+p1)+p2 +2)>>2,p1' L + rshrn v2.8b, v2.8h, #2 //(2*p1+p0+q1+2)>>2,p0"L + rshrn v27.8b, v4.8h, #2 //((p0+q0+p1)+p2 +2)>>2,p1' H + rshrn v3.8b, v16.8h, #2 //(2*p1+p0+q1+2)>>2,p0" H + mov v26.d[1] , v27.d[0] + mov v2.d[1] , v3.d[0] + uaddl v16.8h, v31.8b, v25.8b //p2+p3 H + mla v12.8h, v8.8h , v1.4h[0] //(p0+q0+p1)+3*p2+2*p3 L + mla v4.8h, v16.8h , v1.4h[0] //(p0+q0+p1)+3*p2+2*p3 H + bic v16.16b, v20.16b , v18.16b //((ABS(p0 - q0) >= Alpha || ABS(q1 - q0) >= Beta || ABS(p1 - p0) >= Beta)) + mov v17.d[0] , v16.d[1] //&& (Ap < Beta && ABS(p0 - q0) <((Alpha >>2) + 2)) + bit v2.16b, v28.16b , v20.16b //choosing between po' and p0" + mov v3.d[0] , v2.d[1] + rshrn v12.8b, v12.8h, #3 //((p0+q0+p1)+3*p2+2*p3+4)>>3 L p2' + rshrn v13.8b, v4.8h, #3 //((p0+q0+p1)+3*p2+2*p3+4)>>3 H p2' + mov v12.d[1] , v13.d[0] + bif v6.16b, v2.16b , v18.16b //choosing between p0 and filtered value of p0 + bit v10.16b, v26.16b , v16.16b //choosing between p1 and p1' + bit v30.16b, v12.16b , v16.16b //choosing between p2 and p2' + st1 {v6.16b}, [x12] //store p0 + st1 {v10.16b}, [x14] //store p1 + st1 {v30.16b}, [x3] //store p2 + + // LDMFD sp!,{x12,pc} + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + + +///** +//******************************************************************************* +//* +//* @brief +//* Performs filtering of a luma block vertical edge for cases where the +//* boundary strength is less than 4 +//* +//* @par Description: +//* This operation is described in Sec. 8.7.2.4 under the title +//* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264. +//* +//* @param[in] x0 - pu1_src +//* Pointer to the src sample q0 +//* +//* @param[in] x1 - src_strd +//* Source stride +//* +//* @param[in] x2 - alpha +//* Alpha Value for the boundary +//* +//* @param[in] x3 - beta +//* Beta Value for the boundary +//* +//* @param[in] sp(0) - u4_bs +//* Packed Boundary strength array +//* +//* @param[in] sp(4) - pu1_cliptab +//* tc0_table +//* +//* @returns +//* None +//* +//* @remarks +//* None +//* +//******************************************************************************* +//*/ + + .global ih264_deblk_luma_vert_bslt4_av8 + +ih264_deblk_luma_vert_bslt4_av8: + + // STMFD sp!,{x12,x14} + push_v_regs + stp x19, x20, [sp, #-16]! + + sub x0, x0, #4 //pointer uc_edgePixel-4 + mov x12, x4 + mov x14, x5 + mov x17, x0 + //loading p3:p2:p1:p0:q0:q1:q2:q3 for every row + ld1 {v0.8b}, [x0], x1 //row1 + ld1 {v2.8b}, [x0], x1 //row2 + ld1 {v4.8b}, [x0], x1 //row3 + rev w12, w12 //reversing ui_bs + ld1 {v6.8b}, [x0], x1 //row4 + mov v18.2s[0], w12 //d12[0] = ui_Bs + ld1 {v16.s}[0], [x14] //D16[0] contains cliptab + ld1 {v8.8b}, [x0], x1 //row5 + uxtl v18.8h, v18.8b //q6 = uc_Bs in each 16 bt scalar + ld1 {v10.8b}, [x0], x1 //row6 + ld1 {v12.8b}, [x0], x1 //row7 + tbl v16.8b, {v16.16b}, v18.8b //puc_ClipTab[uc_Bs] + ld1 {v14.8b}, [x0], x1 //row8 + ld1 {v1.8b}, [x0], x1 //row9 + uxtl v16.4s, v16.4h // + ld1 {v3.8b}, [x0], x1 //row10 + ld1 {v5.8b}, [x0], x1 //row11 + ld1 {v7.8b}, [x0], x1 //row12 + sli v16.4s, v16.4s, #8 // + ld1 {v9.8b}, [x0], x1 //row13 + ld1 {v11.8b}, [x0], x1 //row14 + ld1 {v13.8b}, [x0], x1 //row15 + sli v16.4s, v16.4s, #16 + ld1 {v15.8b}, [x0], x1 //row16 + + + //taking two 8x8 transposes + //2X2 transposes + trn1 v21.8b, v0.8b, v2.8b + trn2 v2.8b, v0.8b, v2.8b //row1 &2 + mov v0.8b, v21.8b + trn1 v21.8b, v4.8b, v6.8b + trn2 v6.8b, v4.8b, v6.8b //row3&row4 + mov v4.8b, v21.8b + trn1 v21.8b, v8.8b, v10.8b + trn2 v10.8b, v8.8b, v10.8b //row5&6 + mov v8.8b, v21.8b + trn1 v21.8b, v12.8b, v14.8b + trn2 v14.8b, v12.8b, v14.8b //row7 & 8 + mov v12.8b, v21.8b + trn1 v21.8b, v1.8b, v3.8b + trn2 v3.8b, v1.8b, v3.8b //row9 &10 + mov v1.8b, v21.8b + trn1 v21.8b, v5.8b, v7.8b + trn2 v7.8b, v5.8b, v7.8b //row11 & 12 + mov v5.8b, v21.8b + trn1 v21.8b, v9.8b, v11.8b + trn2 v11.8b, v9.8b, v11.8b //row13 &14 + mov v9.8b, v21.8b + trn1 v21.8b, v13.8b, v15.8b + trn2 v15.8b, v13.8b, v15.8b //row15 & 16 + mov v13.8b, v21.8b + //4x4 transposes + trn1 v21.4h, v2.4h, v6.4h + trn2 v6.4h, v2.4h, v6.4h //row2 & row4 + mov v2.8b, v21.8b + trn1 v21.4h, v10.4h, v14.4h + trn2 v14.4h, v10.4h, v14.4h //row6 & row8 + mov v10.8b, v21.8b + trn1 v21.4h, v3.4h, v7.4h + trn2 v7.4h, v3.4h, v7.4h //row10 & 12 + mov v3.8b, v21.8b + trn1 v21.4h, v11.4h, v15.4h + trn2 v15.4h, v11.4h, v15.4h //row14 & row16 + mov v11.8b, v21.8b + trn1 v21.2s, v6.2s, v14.2s + trn2 v14.2s, v6.2s, v14.2s //row4 & 8 + mov v6.8b, v21.8b + trn1 v21.2s, v7.2s, v15.2s + trn2 v15.2s, v7.2s, v15.2s //row 12 & 16 + mov v7.8b, v21.8b + //now Q3 ->p0 and Q7->q3 + trn1 v21.4h, v0.4h, v4.4h + trn2 v4.4h, v0.4h, v4.4h //row1 & 3 + mov v0.8b, v21.8b + trn1 v21.4h, v8.4h, v12.4h + trn2 v12.4h, v8.4h, v12.4h //row 5 & 7 + mov v8.8b, v21.8b + trn1 v21.4h, v1.4h, v5.4h + trn2 v5.4h, v1.4h, v5.4h //row9 & row11 + mov v1.8b, v21.8b + trn1 v21.4h, v9.4h, v13.4h + trn2 v13.4h, v9.4h, v13.4h //row13 & row15 + mov v9.8b, v21.8b + trn1 v21.2s, v0.2s, v8.2s + trn2 v8.2s, v0.2s, v8.2s //row1 & row5 + mov v0.8b, v21.8b + trn1 v21.2s, v1.2s, v9.2s + trn2 v9.2s, v1.2s, v9.2s //row9 & 13 + mov v1.8b, v21.8b + //now Q0->p3 & Q4->q0 + //starting processing as p0 and q0 are now ready + trn1 v21.2s, v2.2s, v10.2s + trn2 v10.2s, v2.2s, v10.2s //row2 &6 + mov v2.8b, v21.8b + mov v6.d[1] , v7.d[0] + mov v8.d[1] , v9.d[0] + urhadd v20.16b, v6.16b , v8.16b //((p0 + q0 + 1) >> 1) + mov v21.d[0], v20.d[1] + trn1 v31.2s, v3.2s, v11.2s + trn2 v11.2s, v3.2s, v11.2s //row10&row14 + mov v3.8b, v31.8b + movi v19.8b, #2 + mov v18.d[1], v19.d[0] + //now Q1->p2 & Q5->q1 + trn1 v31.2s, v4.2s, v12.2s + trn2 v12.2s, v4.2s, v12.2s //row3 & 7 + mov v4.8b, v31.8b + uabd v22.16b , v6.16b, v8.16b //ABS(q1 - q0) + trn1 v31.2s, v5.2s, v13.2s + trn2 v13.2s, v5.2s, v13.2s //row11 & row15 + mov v5.8b, v31.8b + mov v0.d[1] , v1.d[0] + mov v2.d[1] , v3.d[0] + mov v4.d[1] , v5.d[0] + mov v10.d[1] , v11.d[0] + mov v12.d[1] , v13.d[0] + mov v14.d[1] , v15.d[0] + uaddl v24.8h, v20.8b, v2.8b //(p2 + ((p0 + q0 + 1) >> 1) L + //now Q2->p1,Q6->q2 + uaddl v26.8h, v21.8b, v3.8b //(p2 + ((p0 + q0 + 1) >> 1) H + umlsl v24.8h, v4.8b, v19.8b //(p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1)) L + umlsl v26.8h, v5.8b, v19.8b //(p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1)) H + dup v28.16b, w2 //alpha + cmhs v22.16b, v22.16b , v28.16b //ABS(p0 - q0) >= Alpha(Alpha <=ABS(p0 - q0)) + dup v28.16b, w3 //beta + uabd v30.16b , v10.16b, v8.16b //ABS(q1 - q0) + sqshrn v24.8b, v24.8h, #1 //((p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1)) >> 1) L + sqshrn v25.8b, v26.8h, #1 //((p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1)) >> 1) H + mov v24.d[1], v25.d[0] + cmhs v30.16b, v30.16b , v28.16b //ABS(p0 - q0) >= Alpha(Alpha <=ABS(p0 - q0)) + uabd v26.16b , v4.16b, v6.16b //ABS(q1 - q0) + + smin v24.16b, v24.16b , v16.16b //min(deltap1 ,C0) + orr v22.16b, v22.16b , v30.16b //ABS(q1 - q0) >= Beta ||ABS(p0 - q0) >= Alpha + neg v30.16b, v16.16b //-C0 + cmhs v26.16b, v26.16b , v28.16b //ABS(p0 - q0) >= Alpha(Alpha <=ABS(p0 - q0)) + smax v24.16b, v24.16b , v30.16b //max(deltap1,-C0) + orr v22.16b, v22.16b , v26.16b //ABS(p0 - q0) >= Alpha || ABS(q1 - q0) >= Beta || ABS(p1 - p0) >= Beta) + uxtl v26.4s, v18.4h //ui_bs + uaddl v18.8h, v20.8b, v12.8b //q2 + ((p0 + q0 + 1) >> 1) L + cmeq v26.4s, v26.4s , #0 //ABS(p0 - q0) >= Alpha(Alpha <=ABS(p0 - q0)) + usubw v18.8h, v18.8h , v10.8b //(q2 + ((p0 + q0 + 1) >> 1) - q1) L + uaddl v20.8h, v21.8b, v13.8b //q2 + ((p0 + q0 + 1) >> 1) H + usubw v18.8h, v18.8h , v10.8b //(q2 + ((p0 + q0 + 1) >> 1) - 2*q1)L + usubw v20.8h, v20.8h , v11.8b //(q2 + ((p0 + q0 + 1) >> 1) - q1) H + orr v26.16b, v26.16b , v22.16b //(ABS(p0 - q0) >= Alpha || ABS(q1 - q0) >= Beta || ABS(p1 - p0) >= Beta)) &&(ui_bs) + usubw v20.8h, v20.8h , v11.8b //(q2 + ((p0 + q0 + 1) >> 1) - 2*q1) H + sqshrn v18.8b, v18.8h, #1 //((q2 + ((p0 + q0 + 1) >> 1) - (q1 << 1)) >> 1) L + uabd v22.16b , v2.16b, v6.16b //ABS(q1 - q0) + sqshrn v19.8b, v20.8h, #1 //((q2 + ((p0 + q0 + 1) >> 1) - (q1 << 1)) >> 1) H + mov v18.d[1], v19.d[0] + uabd v20.16b , v12.16b, v8.16b //ABS(q1 - q0) + cmhi v22.16b, v28.16b , v22.16b //Ap < Beta + smin v18.16b, v18.16b , v16.16b //min(delatq1,C0) + cmhi v20.16b, v28.16b , v20.16b //Aq > 3); L + rshrn v29.8b, v30.8h, #3 //delta = ((((q0 - p0) << 2) + (p1 - q1) + 4) >> 3) H + mov v28.d[1], v29.d[0] + sub v16.16b, v16.16b , v20.16b //C0 + (Ap < Beta) + (Aq < Beta) + bic v20.16b, v20.16b , v26.16b //final condition for q1 + abs v30.16b, v28.16b //abs(delta) + and v24.16b, v24.16b , v22.16b //delatp1 + and v18.16b, v18.16b , v20.16b //delta q1 + umin v30.16b, v30.16b , v16.16b //min((abs(delta),C) + add v4.16b, v4.16b , v24.16b //p1+deltap1 + add v10.16b, v10.16b , v18.16b //q1+deltaq1 + mov v5.d[0], v4.d[1] + mov v11.d[0], v10.d[1] + bic v30.16b, v30.16b , v26.16b //abs(delta) of pixels to be changed only + // VCGE.S8 Q14, Q14,#0 //sign(delta) + cmge v28.16b, v28.16b , #0 + uqsub v22.16b, v6.16b , v30.16b //clip(p0-delta) + + trn1 v21.8b, v0.8b, v2.8b + trn2 v2.8b, v0.8b, v2.8b //row1 &2 + mov v0.8b, v21.8b + uqadd v6.16b, v6.16b , v30.16b //clip(p0+delta) + + trn1 v21.8b, v1.8b, v3.8b + trn2 v3.8b, v1.8b, v3.8b //row9 &10 + mov v1.8b, v21.8b + uqadd v24.16b, v8.16b , v30.16b //clip(q0+delta) + trn1 v21.8b, v12.8b, v14.8b + trn2 v14.8b, v12.8b, v14.8b //row7 & 8 + mov v12.8b, v21.8b + uqsub v8.16b, v8.16b , v30.16b //clip(q0-delta) + trn1 v21.8b, v13.8b, v15.8b + trn2 v15.8b, v13.8b, v15.8b //row15 & 16 + mov v13.8b, v21.8b + bif v6.16b, v22.16b , v28.16b //p0 + bif v8.16b, v24.16b , v28.16b //q0 + mov v7.d[0], v6.d[1] + mov v9.d[0], v8.d[1] + trn1 v21.8b, v4.8b, v6.8b + trn2 v6.8b, v4.8b, v6.8b //row3&row4 + mov v4.8b, v21.8b + trn1 v21.8b, v8.8b, v10.8b + trn2 v10.8b, v8.8b, v10.8b //row5&6 + mov v8.8b, v21.8b + trn1 v21.8b, v5.8b, v7.8b + trn2 v7.8b, v5.8b, v7.8b //row11 & 12 + mov v5.8b, v21.8b + trn1 v21.8b, v9.8b, v11.8b + trn2 v11.8b, v9.8b, v11.8b //row13 &14 + mov v9.8b, v21.8b + trn1 v21.4h, v2.4h, v6.4h + trn2 v6.4h, v2.4h, v6.4h //row2 & row4 + mov v2.8b, v21.8b + trn1 v21.4h, v10.4h, v14.4h + trn2 v14.4h, v10.4h, v14.4h //row6 & row8 + mov v10.8b, v21.8b + trn1 v21.4h, v3.4h, v7.4h + trn2 v7.4h, v3.4h, v7.4h //row10 & 12 + mov v3.8b, v21.8b + trn1 v21.4h, v11.4h, v15.4h + trn2 v15.4h, v11.4h, v15.4h //row14 & row16 + mov v11.8b, v21.8b + trn1 v21.2s, v6.2s, v14.2s + trn2 v14.2s, v6.2s, v14.2s //row4 & 8 + mov v6.8b, v21.8b + trn1 v21.2s, v7.2s, v15.2s + trn2 v15.2s, v7.2s, v15.2s //row 12 & 16 + mov v7.8b, v21.8b + //now Q3 ->p0 and Q7->q3 + trn1 v21.4h, v0.4h, v4.4h + trn2 v4.4h, v0.4h, v4.4h //row1 & 3 + mov v0.8b, v21.8b + trn1 v21.4h, v8.4h, v12.4h + trn2 v12.4h, v8.4h, v12.4h //row 5 & 7 + mov v8.8b, v21.8b + trn1 v21.4h, v1.4h, v5.4h + trn2 v5.4h, v1.4h, v5.4h //row9 & row11 + mov v1.8b, v21.8b + trn1 v21.4h, v9.4h, v13.4h + trn2 v13.4h, v9.4h, v13.4h //row13 & row15 + mov v9.8b, v21.8b + sub x0, x0, x1, lsl#4 //restore pointer + trn1 v21.2s, v0.2s, v8.2s + trn2 v8.2s, v0.2s, v8.2s //row1 & row5 + mov v0.8b, v21.8b + trn1 v21.2s, v1.2s, v9.2s + trn2 v9.2s, v1.2s, v9.2s //row9 & 13 + mov v1.8b, v21.8b + trn1 v21.2s, v2.2s, v10.2s + trn2 v10.2s, v2.2s, v10.2s //row2 &6 + mov v2.8b, v21.8b + trn1 v21.2s, v3.2s, v11.2s + trn2 v11.2s, v3.2s, v11.2s //row10&row14 + mov v3.8b, v21.8b + trn1 v21.2s, v4.2s, v12.2s + trn2 v12.2s, v4.2s, v12.2s //row3 & 7 + mov v4.8b, v21.8b + trn1 v21.2s, v5.2s, v13.2s + trn2 v13.2s, v5.2s, v13.2s //row11 & row15 + mov v5.8b, v21.8b + st1 {v0.8b}, [x0], x1 //row1 + st1 {v2.8b}, [x0], x1 //row2 + st1 {v4.8b}, [x0], x1 //row3 + st1 {v6.8b}, [x0], x1 //row4 + st1 {v8.8b}, [x0], x1 //row5 + st1 {v10.8b}, [x0], x1 //row6 + st1 {v12.8b}, [x0], x1 //row7 + st1 {v14.8b}, [x0], x1 //row8 + st1 {v1.8b}, [x0], x1 //row9 + st1 {v3.8b}, [x0], x1 //row10 + st1 {v5.8b}, [x0], x1 //row11 + st1 {v7.8b}, [x0], x1 //row12 + st1 {v9.8b}, [x0], x1 //row13 + st1 {v11.8b}, [x0], x1 //row14 + st1 {v13.8b}, [x0], x1 //row15 + st1 {v15.8b}, [x0], x1 //row16 + + // LDMFD sp!,{x12,pc} + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + + +///** +//******************************************************************************* +//* +//* @brief +//* Performs filtering of a luma block vertical edge when the +//* boundary strength is set to 4 +//* +//* @par Description: +//* This operation is described in Sec. 8.7.2.4 under the title +//* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264. +//* +//* @param[in] x0 - pu1_src +//* Pointer to the src sample q0 +//* +//* @param[in] x1 - src_strd +//* Source stride +//* +//* @param[in] x2 - alpha +//* Alpha Value for the boundary +//* +//* @param[in] x3 - beta +//* Beta Value for the boundary +//* +//* @returns +//* None +//* +//* @remarks +//* None +//* +//******************************************************************************* +//*/ + + .global ih264_deblk_luma_vert_bs4_av8 + +ih264_deblk_luma_vert_bs4_av8: + + // STMFD sp!,{x12,x14} + push_v_regs + stp x19, x20, [sp, #-16]! + + sub x0, x0, #4 //pointer uc_edgePixel-4 + mov x17, x0 + //loading p3:p2:p1:p0:q0:q1:q2:q3 for every row + ld1 {v0.8b}, [x0], x1 //row1 + ld1 {v2.8b}, [x0], x1 //row2 + ld1 {v4.8b}, [x0], x1 //row3 + ld1 {v6.8b}, [x0], x1 //row4 + ld1 {v8.8b}, [x0], x1 //row5 + ld1 {v10.8b}, [x0], x1 //row6 + ld1 {v12.8b}, [x0], x1 //row7 + ld1 {v14.8b}, [x0], x1 //row8 + ld1 {v1.8b}, [x0], x1 //row9 + ld1 {v3.8b}, [x0], x1 //row10 + ld1 {v5.8b}, [x0], x1 //row11 + ld1 {v7.8b}, [x0], x1 //row12 + ld1 {v9.8b}, [x0], x1 //row13 + ld1 {v11.8b}, [x0], x1 //row14 + ld1 {v13.8b}, [x0], x1 //row15 + ld1 {v15.8b}, [x0], x1 //row16 + + //taking two 8x8 transposes + //2X2 transposes + trn1 v21.8b, v0.8b, v2.8b + trn2 v2.8b, v0.8b, v2.8b //row1 &2 + mov v0.8b, v21.8b + trn1 v21.8b, v4.8b, v6.8b + trn2 v6.8b, v4.8b, v6.8b //row3&row4 + mov v4.8b, v21.8b + trn1 v21.8b, v8.8b, v10.8b + trn2 v10.8b, v8.8b, v10.8b //row5&6 + mov v8.8b, v21.8b + trn1 v21.8b, v12.8b, v14.8b + trn2 v14.8b, v12.8b, v14.8b //row7 & 8 + mov v12.8b, v21.8b + trn1 v21.8b, v1.8b, v3.8b + trn2 v3.8b, v1.8b, v3.8b //row9 &10 + mov v1.8b , v21.8b + trn1 v21.8b, v5.8b, v7.8b + trn2 v7.8b, v5.8b, v7.8b //row11 & 12 + mov v5.8b , v21.8b + trn1 v21.8b, v9.8b, v11.8b + trn2 v11.8b, v9.8b, v11.8b //row13 &14 + mov v9.8b , v21.8b + trn1 v21.8b, v13.8b, v15.8b + trn2 v15.8b, v13.8b, v15.8b //row15 & 16 + mov v13.8b , v21.8b + //4x4 transposes + trn1 v21.4h, v2.4h, v6.4h + trn2 v6.4h, v2.4h, v6.4h //row2 & row4 + mov v2.8b, v21.8b + trn1 v21.4h, v10.4h, v14.4h + trn2 v14.4h, v10.4h, v14.4h //row6 & row8 + mov v10.8b , v21.8b + trn1 v21.4h, v3.4h, v7.4h + trn2 v7.4h, v3.4h, v7.4h //row10 & 12 + mov v3.8b, v21.8b + trn1 v21.4h, v11.4h, v15.4h + trn2 v15.4h, v11.4h, v15.4h //row14 & row16 + mov v11.8b, v21.8b + trn1 v21.2s, v6.2s, v14.2s + trn2 v14.2s, v6.2s, v14.2s //row4 & 8 + mov v6.8b, v21.8b + trn1 v21.2s, v7.2s, v15.2s + trn2 v15.2s, v7.2s, v15.2s //row 12 & 16 + mov v7.8b, v21.8b + //now Q3 ->p0 and Q7->q3 + trn1 v21.4h, v0.4h, v4.4h + trn2 v4.4h, v0.4h, v4.4h //row1 & 3 + mov v0.8b , v21.8b + trn1 v21.4h, v8.4h, v12.4h + trn2 v12.4h, v8.4h, v12.4h //row 5 & 7 + mov v8.8b, v21.8b + trn1 v21.4h, v1.4h, v5.4h + trn2 v5.4h, v1.4h, v5.4h //row9 & row11 + mov v1.8b, v21.8b + trn1 v21.4h, v9.4h, v13.4h + trn2 v13.4h, v9.4h, v13.4h //row13 & row15 + mov v9.8b , v21.8b + trn1 v21.2s, v0.2s, v8.2s + trn2 v8.2s, v0.2s, v8.2s //row1 & row5 + mov v0.8b, v21.8b + trn1 v21.2s, v1.2s, v9.2s + trn2 v9.2s, v1.2s, v9.2s //row9 & 13 + mov v1.8b, v21.8b + //now Q0->p3 & Q4->q0 + //starting processing as p0 and q0 are now ready + //now Q1->p2 & Q5->q1 + mov v31.d[0], v14.d[0] + mov v31.d[1], v15.d[0] + trn1 v21.2s, v4.2s, v12.2s + trn2 v12.2s, v4.2s, v12.2s //row3 & 7 + mov v4.8b, v21.8b + movi v28.8h, #2 + trn1 v21.2s, v5.2s, v13.2s + trn2 v13.2s, v5.2s, v13.2s //row11 & row15 + mov v5.8b, v21.8b + uaddl v16.8h, v6.8b, v8.8b //p0+q0 L + trn1 v21.2s, v2.2s, v10.2s + trn2 v10.2s, v2.2s, v10.2s //row2 &6 + mov v2.8b, v21.8b + uaddl v18.8h, v7.8b, v9.8b //p0+q0 H + trn1 v21.2s, v3.2s, v11.2s + trn2 v11.2s, v3.2s, v11.2s //row10&row14 + mov v3.8b, v21.8b + uaddw v20.8h, v16.8h , v4.8b //p0+q0+p1 L + uaddw v22.8h, v18.8h , v5.8b //p0+q0+p1 H + uaddl v24.8h, v2.8b, v10.8b //p2+q1 L + uaddl v26.8h, v3.8b, v11.8b //p2+q1 H + mla v24.8h, v20.8h , v28.8h //p2 + X2(p1) + X2(p0) + X2(q0) + q1 L + mla v26.8h, v22.8h , v28.8h //p2 + X2(p1) + X2(p0) + X2(q0) + q1 H + movi v28.16b, #2 + uaddw v16.8h, v20.8h , v2.8b //p0+q0+p1+p2 L + uaddw v18.8h, v22.8h , v3.8b //p0+q0+p1+p2 H + dup v30.16b, w2 //duplicate alpha + rshrn v20.8b, v16.8h, #2 //(p2 + p1 + p0 + q0 + 2) >> 2)L p1' + rshrn v21.8b, v18.8h, #2 //(p2 + p1 + p0 + q0 + 2) >> 2)H p1' + mov v20.d[1] , v21.d[0] + mov v0.d[1] , v1.d[0] + mov v2.d[1] , v3.d[0] + mov v4.d[1] , v5.d[0] + mov v6.d[1] , v7.d[0] + mov v8.d[1] , v9.d[0] + mov v10.d[1] , v11.d[0] + mov v12.d[1] , v13.d[0] + mov v14.d[1] , v15.d[0] + uabd v22.16b , v6.16b, v8.16b + usra v28.16b, v30.16b, #2 //alpha >>2 +2 + uabd v30.16b , v2.16b, v6.16b + rshrn v24.8b, v24.8h, #3 //((p2 + X2(p1) + X2(p0) + X2(q0) + q1 + 4) >> 3) L p0' + rshrn v25.8b, v26.8h, #3 //((p2 + X2(p1) + X2(p0) + X2(q0) + q1 + 4) >> 3) H p0' + mov v24.d[1] , v25.d[0] + dup v26.16b, w3 //beta + cmhi v28.16b, v28.16b , v22.16b //ABS(p0 - q0) <((Alpha >>2) + 2) + uaddl v22.8h, v6.8b, v10.8b //p0+q1 L + cmhi v14.16b, v26.16b , v30.16b //beta>Ap + uaddl v30.8h, v7.8b, v11.8b //p0+q1 H + uaddw v22.8h, v22.8h , v4.8b //p0+q1+p1 L + uaddw v30.8h, v30.8h , v5.8b //p0+q1+p1 H + uaddw v22.8h, v22.8h , v4.8b //p0+q1+2*p1 L + uaddw v30.8h, v30.8h , v5.8b //p0+q1+2*p1 H + and v14.16b, v14.16b , v28.16b //(Ap < Beta && ABS(p0 - q0) <((Alpha >>2) + 2) + rshrn v22.8b, v22.8h, #2 //((X2(p1) + p0 + q1 + 2) >> 2) L p0" + rshrn v23.8b, v30.8h, #2 //((X2(p1) + p0 + q1 + 2) >> 2) H p0" + mov v22.d[1] , v23.d[0] + uaddl v30.8h, v2.8b, v0.8b //p2+p3 L + bif v24.16b, v22.16b , v14.16b //p0' or p0 " + uaddl v22.8h, v3.8b, v1.8b //p2+p3 H + add v30.8h, v30.8h , v30.8h //2*(p2+p3) L + add v22.8h, v22.8h , v22.8h //2*(p2+p3)H + add v16.8h, v16.8h , v30.8h //(X2(p3) + X3(p2) + p1 + p0 + q0) L + add v18.8h, v18.8h , v22.8h //(X2(p3) + X3(p2) + p1 + p0 + q0) H + uabd v30.16b , v12.16b, v8.16b + uabd v22.16b , v10.16b, v8.16b + rshrn v16.8b, v16.8h, #3 //((X2(p3) + X3(p2) + p1 + p0 + q0 + 4) >> 3); L p2' + rshrn v17.8b, v18.8h, #3 //((X2(p3) + X3(p2) + p1 + p0 + q0 + 4) >> 3); H p2' + mov v16.d[1] , v17.d[0] + uabd v18.16b , v4.16b, v6.16b + cmhi v30.16b, v26.16b , v30.16b //Aq < Beta + cmhs v22.16b, v22.16b, v26.16b + cmhs v18.16b, v18.16b, v26.16b + dup v26.16b, w2 //duplicate alpha + and v30.16b, v30.16b , v28.16b //(Aq < Beta && ABS(p0 - q0) <((Alpha >>2) + 2)) + uabd v28.16b , v6.16b, v8.16b + orr v22.16b, v22.16b , v18.16b //ABS(p1 - p0) >= Beta || ABS(q1 - q0) >= Beta + uaddl v18.8h, v6.8b, v8.8b //p0+q0 L + cmhs v28.16b, v28.16b, v26.16b + uaddl v26.8h, v7.8b, v9.8b //p0+q0 H + uaddw v18.8h, v18.8h , v10.8b //p0+q0+q1 L + orr v22.16b, v22.16b , v28.16b //ABS(p1 - p0) >= Beta || ABS(q1 - q0) >= Beta||ABS(p0 - q0) >= Alpha + uaddw v26.8h, v26.8h , v11.8b //p0+q0+q1 H + bic v14.16b, v14.16b , v22.16b //final condn for p's + movi v28.16b, #2 + bif v6.16b, v24.16b , v22.16b //final p0 + bit v2.16b, v16.16b , v14.16b //final p2 + bif v20.16b, v4.16b , v14.16b //final p1 + mov v7.d[0] , v6.d[1] + mov v3.d[0] , v2.d[1] + mov v21.d[0] , v20.d[1] + uaddl v24.8h, v8.8b, v4.8b //q0+p1 L + umlal v24.8h, v10.8b, v28.8b //X2(q1) + q0 + p1 L + uaddl v16.8h, v9.8b, v5.8b //q0+p1 H + umlal v16.8h, v11.8b, v28.8b //X2(q1) + q0 + p1 H + movi v28.8h, #2 + uaddl v14.8h, v4.8b, v12.8b //p1+q2 L + mla v14.8h, v18.8h , v28.8h //p1 + X2(p0) + X2(q0) + X2(q1) + q2L + uaddl v4.8h, v5.8b, v13.8b //p1+q2H + mla v4.8h, v26.8h , v28.8h //p1 + X2(p0) + X2(q0) + X2(q1) + q2H + rshrn v24.8b, v24.8h, #2 //(X2(q1) + q0 + p1 + 2) >> 2; L q0' + rshrn v25.8b, v16.8h, #2 //(X2(q1) + q0 + p1 + 2) >> 2; H q0' + mov v24.d[1] , v25.d[0] + uaddw v18.8h, v18.8h , v12.8b //p0 + q0 + q1 + q2 L + uaddw v26.8h, v26.8h , v13.8b //p0 + q0 + q1 + q2 H + rshrn v16.8b, v14.8h, #3 //(p1 + X2(p0) + X2(q0) + X2(q1) + q2 + 4) >> 3 L qo" + mov v14.16b, v31.16b + rshrn v17.8b, v4.8h, #3 //(p1 + X2(p0) + X2(q0) + X2(q1) + q2 + 4) >> 3 H qo" + mov v16.d[1] , v17.d[0] + rshrn v4.8b, v18.8h, #2 //p0 + q0 + q1 + q2 + 2)>>2 L q1' + rshrn v5.8b, v26.8h, #2 //p0 + q0 + q1 + q2 + 2)>>2 H q1' + mov v4.d[1] , v5.d[0] + bit v24.16b, v16.16b , v30.16b //q0' or q0" + bic v30.16b, v30.16b , v22.16b //final condn for q's + trn1 v31.8b, v0.8b, v2.8b + trn2 v2.8b, v0.8b, v2.8b //row1 &2 + mov v0.8b, v31.8b + bit v10.16b, v4.16b , v30.16b + mov v11.d[0] , v10.d[1] + mov v25.d[0] , v24.d[1] + mov v31.d[0] , v30.d[1] + trn1 v31.8b, v1.8b, v3.8b + trn2 v3.8b, v1.8b, v3.8b //row9 &10 + mov v1.8b, v31.8b + uaddl v16.8h, v12.8b, v14.8b //q2+q3 L + trn1 v31.8b, v20.8b, v6.8b + trn2 v6.8b, v20.8b, v6.8b //row3&row4 + mov v20.8b , v31.8b + uaddl v4.8h, v13.8b, v15.8b //q2+q3 H + trn1 v31.8b, v21.8b, v7.8b + trn2 v7.8b, v21.8b, v7.8b //row11 & 12 + mov v21.8b , v31.8b + mla v18.8h, v16.8h , v28.8h //X2(q3) + X3(q2) + q1 + q0 + p0 L + trn1 v31.4h, v2.4h, v6.4h + trn2 v6.4h, v2.4h, v6.4h //row2 & row4 + mov v2.8b, v31.8b + mla v26.8h, v4.8h , v28.8h //X2(q3) + X3(q2) + q1 + q0 + p0 H + trn1 v31.4h, v3.4h, v7.4h + trn2 v7.4h, v3.4h, v7.4h //row10 & 12 + mov v3.8b , v31.8b + bif v8.16b, v24.16b , v22.16b //final q0 + mov v9.d[0] , v8.d[1] + trn1 v31.4h, v0.4h, v20.4h + trn2 v20.4h, v0.4h, v20.4h //row1 & 3 + mov v0.8b , v31.8b + rshrn v18.8b, v18.8h, #3 //(X2(q3) + X3(q2) + q1 + q0 + p0 + 4) >> 3; L + trn1 v31.4h, v1.4h, v21.4h + trn2 v21.4h, v1.4h, v21.4h //row9 & row11 + mov v1.8b, v31.8b + rshrn v19.8b, v26.8h, #3 //(X2(q3) + X3(q2) + q1 + q0 + p0 + 4) >> 3; H + mov v18.d[1] , v19.d[0] + trn1 v31.8b, v8.8b, v10.8b + trn2 v10.8b, v8.8b, v10.8b //row5&6 + mov v8.8b, v31.8b + bit v12.16b, v18.16b , v30.16b //final q2 + mov v13.d[0] , v12.d[1] + trn1 v31.8b, v9.8b, v11.8b + trn2 v11.8b, v9.8b, v11.8b //row13 &14 + mov v9.8b, v31.8b + trn1 v31.8b, v12.8b, v14.8b + trn2 v14.8b, v12.8b, v14.8b //row7 & 8 + mov v12.8b, v31.8b + trn1 v31.8b, v13.8b, v15.8b + trn2 v15.8b, v13.8b, v15.8b //row15 & 16 + mov v13.8b , v31.8b + trn1 v31.4h, v10.4h, v14.4h + trn2 v14.4h, v10.4h, v14.4h //row6 & row8 + mov v10.8b, v31.8b + trn1 v31.4h, v11.4h, v15.4h + trn2 v15.4h, v11.4h, v15.4h //row14 & row16 + mov v11.8b, v31.8b + //now Q3 ->p0 and Q7->q3 + trn1 v31.4h, v8.4h, v12.4h + trn2 v12.4h, v8.4h, v12.4h //row 5 & 7 + mov v8.8b, v31.8b + trn1 v31.4h, v9.4h, v13.4h + trn2 v13.4h, v9.4h, v13.4h //row13 & row15 + mov v9.8b, v31.8b + sub x0, x0, x1, lsl#4 //restore pointer + trn1 v31.2s, v6.2s, v14.2s + trn2 v14.2s, v6.2s, v14.2s //row4 & 8 + mov v6.8b , v31.8b + trn1 v31.2s, v7.2s, v15.2s + trn2 v15.2s, v7.2s, v15.2s //row 12 & 16 + mov v7.8b, v31.8b + trn1 v31.2s, v0.2s, v8.2s + trn2 v8.2s, v0.2s, v8.2s //row1 & row5 + mov v0.8b , v31.8b + trn1 v31.2s, v1.2s, v9.2s + trn2 v9.2s, v1.2s, v9.2s //row9 & 13 + mov v1.8b , v31.8b + trn1 v31.2s, v2.2s, v10.2s + trn2 v10.2s, v2.2s, v10.2s //row2 &6 + mov v2.8b , v31.8b + trn1 v31.2s, v3.2s, v11.2s + trn2 v11.2s, v3.2s, v11.2s //row10&row14 + mov v3.8b , v31.8b + trn1 v31.2s, v20.2s, v12.2s + trn2 v12.2s, v20.2s, v12.2s //row3 & 7 + mov v20.8b , v31.8b + trn1 v31.2s, v21.2s, v13.2s + trn2 v13.2s, v21.2s, v13.2s //row11 & row15 + mov v21.8b, v31.8b + st1 {v0.8b}, [x0], x1 //row1 + st1 {v2.8b}, [x0], x1 //row2 + st1 {v20.8b}, [x0], x1 //row3 + st1 {v6.8b}, [x0], x1 //row4 + st1 {v8.8b}, [x0], x1 //row5 + st1 {v10.8b}, [x0], x1 //row6 + st1 {v12.8b}, [x0], x1 //row7 + st1 {v14.8b}, [x0], x1 //row8 + st1 {v1.8b}, [x0], x1 //row9 + st1 {v3.8b}, [x0], x1 //row10 + st1 {v21.8b}, [x0], x1 //row11 + st1 {v7.8b}, [x0], x1 //row12 + st1 {v9.8b}, [x0], x1 //row13 + st1 {v11.8b}, [x0], x1 //row14 + st1 {v13.8b}, [x0], x1 //row15 + st1 {v15.8b}, [x0], x1 //row16 + + // LDMFD sp!,{x12,pc} + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + diff --git a/common/armv8/ih264_default_weighted_pred_av8.s b/common/armv8/ih264_default_weighted_pred_av8.s new file mode 100755 index 0000000..aefb902 --- /dev/null +++ b/common/armv8/ih264_default_weighted_pred_av8.s @@ -0,0 +1,353 @@ +//****************************************************************************** +//* +//* Copyright (C) 2015 The Android Open Source Project +//* +//* Licensed under the Apache License, Version 2.0 (the "License"); +//* you may not use this file except in compliance with the License. +//* You may obtain a copy of the License at: +//* +//* http://www.apache.org/licenses/LICENSE-2.0 +//* +//* Unless required by applicable law or agreed to in writing, software +//* distributed under the License is distributed on an "AS IS" BASIS, +//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//* See the License for the specific language governing permissions and +//* limitations under the License. +//* +//***************************************************************************** +//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +//*/ +///** +//****************************************************************************** +//* @file +//* ih264_default_weighted_pred_av8.s +//* +//* @brief +//* Contains function definitions for default weighted prediction. +//* Functions are coded using NEON intrinsics and can be compiled using ARM RVCT +//* +//* @author +//* Kaushik Senthoor R +//* +//* @par List of Functions: +//* +//* - ih264_default_weighted_pred_luma_av8() +//* - ih264_default_weighted_pred_chroma_av8() +//* +//* @remarks +//* None +//* +//******************************************************************************* +//*/ +//******************************************************************************* +//* @function +//* ih264_default_weighted_pred_luma_av8() +//* +//* @brief +//* This routine performs the default weighted prediction as described in sec +//* 8.4.2.3.1 titled "Default weighted sample prediction process" for luma. +//* +//* @par Description: +//* This function gets two ht x wd blocks, calculates their rounded-average and +//* stores it in the destination block. +//* +//* @param[in] puc_src1: +//* UWORD8 Pointer to the buffer containing the first input block. +//* +//* @param[in] puc_src2: +//* UWORD8 Pointer to the buffer containing the second input block. +//* +//* @param[out] puc_dst +//* UWORD8 pointer to the destination where the output block is stored. +//* +//* @param[in] src_strd1 +//* Stride of the first input buffer +//* +//* @param[in] src_strd2 +//* Stride of the second input buffer +//* +//* @param[in] dst_strd +//* Stride of the destination buffer +//* +//* @param[in] ht +//* integer height of the array +//* +//* @param[in] wd +//* integer width of the array +//* +//* @returns +//* None +//* +//* @remarks +//* (ht,wd) can be (4,4), (4,8), (8,4), (8,8), (8,16), (16,8) or (16,16). +//* +//******************************************************************************* +//*/ +//void ih264_default_weighted_pred_luma_av8(UWORD8 *puc_src1, +// UWORD8 *puc_src2, +// UWORD8 *puc_dst, +// WORD32 src_strd1, +// WORD32 src_strd2, +// WORD32 dst_strd, +// UWORD8 ht, +// UWORD8 wd) +// +//**************Variables Vs Registers***************************************** +// x0 => puc_src1 +// x1 => puc_src2 +// x2 => puc_dst +// x3 => src_strd1 +// [sp] => src_strd2 (x4) +// [sp+4] => dst_strd (x5) +// [sp+8] => ht (x6) +// [sp+12] => wd (x7) +// +.text +.p2align 2 +.include "ih264_neon_macros.s" + + + + .global ih264_default_weighted_pred_luma_av8 + +ih264_default_weighted_pred_luma_av8: + + push_v_regs + stp x19, x20, [sp, #-16]! + cmp w7, #16 + beq loop_16 //branch if wd is 16 + cmp w7, #8 + beq loop_8 //branch if wd is 8 + +loop_4: //each iteration processes four rows + + ld1 {v0.s}[0], [x0], x3 //load row 1 in source 1 + ld1 {v0.s}[1], [x0], x3 //load row 2 in source 1 + ld1 {v2.s}[0], [x1], x4 //load row 1 in source 2 + ld1 {v2.s}[1], [x1], x4 //load row 2 in source 2 + ld1 {v1.s}[0], [x0], x3 //load row 3 in source 1 + ld1 {v1.s}[1], [x0], x3 //load row 4 in source 1 + urhadd v0.8b, v0.8b , v2.8b + ld1 {v3.s}[0], [x1], x4 //load row 3 in source 2 + ld1 {v3.s}[1], [x1], x4 //load row 4 in source 2 + subs w6, w6, #4 //decrement ht by 4 + st1 {v0.s}[0], [x2], x5 //load row 1 in destination + st1 {v0.s}[1], [x2], x5 //load row 2 in destination + urhadd v1.8b, v1.8b , v3.8b + st1 {v1.s}[0], [x2], x5 //load row 3 in destination + st1 {v1.s}[1], [x2], x5 //load row 4 in destination + bgt loop_4 //if greater than 0 repeat the loop again + b end_loops + +loop_8: //each iteration processes four rows + + ld1 {v0.8b}, [x0], x3 //load row 1 in source 1 + ld1 {v4.8b}, [x1], x4 //load row 1 in source 2 + ld1 {v1.8b}, [x0], x3 //load row 2 in source 1 + ld1 {v5.8b}, [x1], x4 //load row 2 in source 2 + ld1 {v2.8b}, [x0], x3 //load row 3 in source 1 + urhadd v0.16b, v0.16b , v4.16b + urhadd v1.16b, v1.16b , v5.16b + ld1 {v6.8b}, [x1], x4 //load row 3 in source 2 + ld1 {v3.8b}, [x0], x3 //load row 4 in source 1 + urhadd v2.8b, v2.8b , v6.8b + ld1 {v7.8b}, [x1], x4 //load row 4 in source 2 + subs w6, w6, #4 //decrement ht by 4 + st1 {v0.8b}, [x2], x5 //load row 1 in destination + urhadd v3.8b, v3.8b , v7.8b + st1 {v1.8b}, [x2], x5 //load row 2 in destination + st1 {v2.8b}, [x2], x5 //load row 3 in destination + st1 {v3.8b}, [x2], x5 //load row 4 in destination + bgt loop_8 //if greater than 0 repeat the loop again + b end_loops + +loop_16: //each iteration processes eight rows + + ld1 {v0.8b, v1.8b}, [x0], x3 //load row 1 in source 1 + ld1 {v16.8b, v17.8b}, [x1], x4 //load row 1 in source 2 + ld1 {v2.8b, v3.8b}, [x0], x3 //load row 2 in source 1 + ld1 {v18.8b, v19.8b}, [x1], x4 //load row 2 in source 2 + urhadd v0.16b, v0.16b , v16.16b + urhadd v1.16b, v1.16b , v17.16b + ld1 {v4.8b, v5.8b}, [x0], x3 //load row 3 in source 1 + ld1 {v20.8b, v21.8b}, [x1], x4 //load row 3 in source 2 + urhadd v2.16b, v2.16b , v18.16b + urhadd v3.16b, v3.16b , v19.16b + ld1 {v6.8b, v7.8b}, [x0], x3 //load row 4 in source 1 + ld1 {v22.8b, v23.8b}, [x1], x4 //load row 4 in source 2 + urhadd v4.16b, v4.16b , v20.16b + urhadd v5.16b, v5.16b , v21.16b + ld1 {v8.8b, v9.8b}, [x0], x3 //load row 5 in source 1 + ld1 {v24.8b, v25.8b}, [x1], x4 //load row 5 in source 2 + urhadd v6.16b, v6.16b , v22.16b + urhadd v7.16b, v7.16b , v23.16b + ld1 {v10.8b, v11.8b}, [x0], x3 //load row 6 in source 1 + ld1 {v26.8b, v27.8b}, [x1], x4 //load row 6 in source 2 + urhadd v8.16b, v8.16b , v24.16b + urhadd v9.16b, v9.16b , v25.16b + ld1 {v12.8b, v13.8b}, [x0], x3 //load row 7 in source 1 + ld1 {v28.8b, v29.8b}, [x1], x4 //load row 7 in source 2 + urhadd v10.16b, v10.16b , v26.16b + urhadd v11.16b, v11.16b , v27.16b + ld1 {v14.8b, v15.8b}, [x0], x3 //load row 8 in source 1 + ld1 {v30.8b, v31.8b}, [x1], x4 //load row 8 in source 2 + urhadd v12.16b, v12.16b , v28.16b + urhadd v13.16b, v13.16b , v29.16b + st1 {v0.8b, v1.8b}, [x2], x5 //load row 1 in destination + st1 {v2.8b, v3.8b}, [x2], x5 //load row 2 in destination + urhadd v14.16b, v14.16b , v30.16b + urhadd v15.16b, v15.16b , v31.16b + st1 {v4.8b, v5.8b}, [x2], x5 //load row 3 in destination + st1 {v6.8b, v7.8b}, [x2], x5 //load row 4 in destination + subs w6, w6, #8 //decrement ht by 8 + st1 {v8.8b, v9.8b}, [x2], x5 //load row 5 in destination + st1 {v10.8b, v11.8b}, [x2], x5 //load row 6 in destination + st1 {v12.8b, v13.8b}, [x2], x5 //load row 7 in destination + st1 {v14.8b, v15.8b}, [x2], x5 //load row 8 in destination + bgt loop_16 //if greater than 0 repeat the loop again + +end_loops: + + // LDMFD sp!,{x4-x7,x15} //Reload the registers from sp + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + +//******************************************************************************* +//* @function +//* ih264_default_weighted_pred_chroma_av8() +//* +//* @brief +//* This routine performs the default weighted prediction as described in sec +//* 8.4.2.3.1 titled "Default weighted sample prediction process" for chroma. +//* +//* @par Description: +//* This function gets two ht x wd blocks, calculates their rounded-average and +//* stores it in the destination block for U and V. +//* +//* @param[in] puc_src1: +//* UWORD8 Pointer to the buffer containing the first input block. +//* +//* @param[in] puc_src2: +//* UWORD8 Pointer to the buffer containing the second input block. +//* +//* @param[out] puc_dst +//* UWORD8 pointer to the destination where the output block is stored. +//* +//* @param[in] src_strd1 +//* Stride of the first input buffer +//* +//* @param[in] src_strd2 +//* Stride of the second input buffer +//* +//* @param[in] dst_strd +//* Stride of the destination buffer +//* +//* @param[in] ht +//* integer height of the array +//* +//* @param[in] wd +//* integer width of the array +//* +//* @returns +//* None +//* +//* @remarks +//* (ht,wd) can be (2,2), (2,4), (4,2), (4,4), (4,8), (8,4) or (8,8). +//* +//******************************************************************************* +//*/ +//void ih264_default_weighted_pred_chroma_av8(UWORD8 *puc_src1, +// UWORD8 *puc_src2, +// UWORD8 *puc_dst, +// WORD32 src_strd1, +// WORD32 src_strd2, +// WORD32 dst_strd, +// UWORD8 ht, +// UWORD8 wd) +// +//**************Variables Vs Registers***************************************** +// x0 => puc_src1 +// x1 => puc_src2 +// x2 => puc_dst +// x3 => src_strd1 +// [sp] => src_strd2 (x4) +// [sp+4] => dst_strd (x5) +// [sp+8] => ht (x6) +// [sp+12] => wd (x7) +// + + + + + .global ih264_default_weighted_pred_chroma_av8 + +ih264_default_weighted_pred_chroma_av8: + + push_v_regs + stp x19, x20, [sp, #-16]! + cmp w7, #8 + beq loop_8_uv //branch if wd is 8 + cmp w7, #4 + beq loop_4_uv //branch if wd is 4 + +loop_2_uv: //each iteration processes two rows + + ld1 {v0.s}[0], [x0], x3 //load row 1 in source 1 + ld1 {v0.s}[1], [x0], x3 //load row 2 in source 1 + ld1 {v1.s}[0], [x1], x4 //load row 1 in source 2 + ld1 {v1.s}[1], [x1], x4 //load row 2 in source 2 + urhadd v0.8b, v0.8b , v1.8b + subs w6, w6, #2 //decrement ht by 2 + st1 {v0.s}[0], [x2], x5 //load row 1 in destination + st1 {v0.s}[1], [x2], x5 //load row 2 in destination + bgt loop_2_uv //if greater than 0 repeat the loop again + b end_loops_uv + +loop_4_uv: //each iteration processes two rows + + ld1 {v0.8b}, [x0], x3 //load row 1 in source 1 + ld1 {v2.8b}, [x1], x4 //load row 1 in source 2 + ld1 {v1.8b}, [x0], x3 //load row 2 in source 1 + urhadd v0.8b, v0.8b , v2.8b + ld1 {v3.8b}, [x1], x4 //load row 2 in source 2 + urhadd v1.8b, v1.8b , v3.8b + st1 {v0.8b}, [x2], x5 //load row 1 in destination + subs w6, w6, #2 //decrement ht by 2 + st1 {v1.8b}, [x2], x5 //load row 2 in destination + bgt loop_4_uv //if greater than 0 repeat the loop again + b end_loops_uv + +loop_8_uv: //each iteration processes four rows + + ld1 {v0.8b, v1.8b}, [x0], x3 //load row 1 in source 1 + ld1 {v8.8b, v9.8b}, [x1], x4 //load row 1 in source 2 + ld1 {v2.8b, v3.8b}, [x0], x3 //load row 2 in source 1 + urhadd v0.16b, v0.16b , v8.16b + urhadd v1.16b, v1.16b , v9.16b + ld1 {v10.8b, v11.8b}, [x1], x4 //load row 2 in source 2 + ld1 {v4.8b, v5.8b}, [x0], x3 //load row 3 in source 1 + urhadd v2.16b, v2.16b , v10.16b + urhadd v3.16b, v3.16b , v11.16b + ld1 {v12.8b, v13.8b}, [x1], x4 //load row 3 in source 2 + ld1 {v6.8b, v7.8b}, [x0], x3 //load row 4 in source 1 + urhadd v4.16b, v4.16b , v12.16b + urhadd v5.16b, v5.16b , v13.16b + ld1 {v14.8b, v15.8b}, [x1], x4 //load row 4 in source 2 + st1 {v0.8b, v1.8b}, [x2], x5 //load row 1 in destination + urhadd v6.16b, v6.16b , v14.16b + urhadd v7.16b, v7.16b , v15.16b + st1 {v2.8b, v3.8b}, [x2], x5 //load row 2 in destination + subs w6, w6, #4 //decrement ht by 4 + st1 {v4.8b, v5.8b}, [x2], x5 //load row 3 in destination + st1 {v6.8b, v7.8b}, [x2], x5 //load row 4 in destination + bgt loop_8_uv //if greater than 0 repeat the loop again + +end_loops_uv: + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + + diff --git a/common/armv8/ih264_ihadamard_scaling_av8.s b/common/armv8/ih264_ihadamard_scaling_av8.s new file mode 100755 index 0000000..712c9ae --- /dev/null +++ b/common/armv8/ih264_ihadamard_scaling_av8.s @@ -0,0 +1,250 @@ +//****************************************************************************** +//* +//* Copyright (C) 2015 The Android Open Source Project +//* +//* Licensed under the Apache License, Version 2.0 (the "License"); +//* you may not use this file except in compliance with the License. +//* You may obtain a copy of the License at: +//* +//* http://www.apache.org/licenses/LICENSE-2.0 +//* +//* Unless required by applicable law or agreed to in writing, software +//* distributed under the License is distributed on an "AS IS" BASIS, +//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//* See the License for the specific language governing permissions and +//* limitations under the License. +//* +//***************************************************************************** +//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +//*/ +///** +// ******************************************************************************* +// * @file +// * ih264_ihadamard_scaling_av8.s +// * +// * @brief +// * Contains function definitions for inverse hadamard transform on 4x4 DC outputs +// * of 16x16 intra-prediction +// * +// * @author +// * Mohit +// * +// * @par List of Functions: +// * - ih264_ihadamard_scaling_4x4_av8() +// * +// * @remarks +// * None +// * +.include "ih264_neon_macros.s" + +// ******************************************************************************* +// */ +// * @brief This function performs a 4x4 inverse hadamard transform on the 4x4 DC coefficients +// * of a 16x16 intra prediction macroblock, and then performs scaling. +// * prediction buffer +// * +// * @par Description: +// * The DC coefficients pass through a 2-stage inverse hadamard transform. +// * This inverse transformed content is scaled to based on Qp value. +// * +// * @param[in] pi2_src +// * input 4x4 block of DC coefficients +// * +// * @param[out] pi2_out +// * output 4x4 block +// * +// * @param[in] pu2_iscal_mat +// * pointer to scaling list +// * +// * @param[in] pu2_weigh_mat +// * pointer to weight matrix +// * +// * @param[in] u4_qp_div_6 +// * Floor (qp/6) +// * +// * @param[in] pi4_tmp +// * temporary buffer of size 1*16 +// * +// * @returns none +// * +// * @remarks none +// * +// ******************************************************************************* +// */ +// * +// ******************************************************************************* +// */ +// void ih264_ihadamard_scaling_4x4(word16* pi2_src, +// word16* pi2_out, +// const uword16 *pu2_iscal_mat, +// const uword16 *pu2_weigh_mat, +// uword32 u4_qp_div_6, +// word32* pi4_tmp) +//**************variables vs registers***************************************** +//x0 => *pi2_src +//x1 => *pi2_out +//x2 => *pu2_iscal_mat +//x3 => *pu2_weigh_mat +//x4=> u4_qp_div_6 + +.text +.p2align 2 + + .global ih264_ihadamard_scaling_4x4_av8 +ih264_ihadamard_scaling_4x4_av8: + +//only one shift is done in horizontal inverse because, +//if u4_qp_div_6 is lesser than 4 then shift value will be neagative and do negative left shift, in this case rnd_factor has value +//if u4_qp_div_6 is greater than 4 then shift value will be positive and do left shift, here rnd_factor is 0 + push_v_regs + +//=======================inverse hadamard transform================================ + + ld4 {v0.4h-v3.4h}, [x0] //load x4,x5,x6,x7 + + dup v14.4s, w4 // populate the u4_qp_div_6 + ld1 {v15.h}[0], [x3] // pu2_weigh_mat + ld1 {v16.h}[0], [x2] //pu2_iscal_mat + + saddl v4.4s, v0.4h, v3.4h //x0 = x4 + x7 + saddl v5.4s, v1.4h, v2.4h //x1 = x5 + x6 + ssubl v6.4s, v1.4h, v2.4h //x2 = x5 - x6 + ssubl v7.4s, v0.4h, v3.4h //x3 = x4 - x7 + + add v0.4s, v4.4s, v5.4s //pi4_tmp_ptr[0] = x0 + x1 + add v1.4s, v7.4s, v6.4s //pi4_tmp_ptr[1] = x3 + x2 + sub v2.4s, v4.4s, v5.4s //pi4_tmp_ptr[2] = x0 - x1 + sub v3.4s, v7.4s, v6.4s //pi4_tmp_ptr[3] = x3 - x2 + + umull v15.4s, v15.4h, v16.4h + dup v15.4s, v15.s[0] //pu2_weigh_mat[0]*pu2_iscal_mat[0] + + //transpose + trn1 v4.4s, v0.4s, v1.4s + trn2 v5.4s, v0.4s, v1.4s + trn1 v6.4s, v2.4s, v3.4s + trn2 v7.4s, v2.4s, v3.4s + + trn1 v0.2d, v4.2d, v6.2d + trn2 v2.2d, v4.2d, v6.2d + trn1 v1.2d, v5.2d, v7.2d + trn2 v3.2d, v5.2d, v7.2d + //end transpose + + add v4.4s, v0.4s, v3.4s //x0 = x4+x7 + add v5.4s, v1.4s, v2.4s //x1 = x5+x6 + sub v6.4s, v1.4s, v2.4s //x2 = x5-x6 + sub v7.4s, v0.4s, v3.4s //x3 = x4-x7 + + add v0.4s, v4.4s, v5.4s //pi4_tmp_ptr[0] = x0 + x1 + add v1.4s, v7.4s, v6.4s //pi4_tmp_ptr[1] = x3 + x2 + sub v2.4s, v4.4s, v5.4s //pi4_tmp_ptr[2] = x0 - x1 + sub v3.4s, v7.4s, v6.4s //pi4_tmp_ptr[3] = x3 - x2 + + mul v0.4s, v0.4s, v15.4s // q0 = p[i] = (x[i] * trns_coeff[i]) where i = 0..3 + mul v1.4s, v1.4s, v15.4s // q1 = p[i] = (x[i] * trns_coeff[i]) where i = 4..7 + mul v2.4s, v2.4s, v15.4s // q2 = p[i] = (x[i] * trns_coeff[i]) where i = 8..11 + mul v3.4s, v3.4s, v15.4s // q3 = p[i] = (x[i] * trns_coeff[i]) where i = 12..15 + + sshl v0.4s, v0.4s, v14.4s // q0 = q[i] = (p[i] << (qp/6)) where i = 0..3 + sshl v1.4s, v1.4s, v14.4s // q1 = q[i] = (p[i] << (qp/6)) where i = 4..7 + sshl v2.4s, v2.4s, v14.4s // q2 = q[i] = (p[i] << (qp/6)) where i = 8..11 + sshl v3.4s, v3.4s, v14.4s // q3 = q[i] = (p[i] << (qp/6)) where i = 12..15 + + sqrshrn v0.4h, v0.4s, #6 // d0 = c[i] = ((q[i] + 32) >> 4) where i = 0..3 + sqrshrn v1.4h, v1.4s, #6 // d1 = c[i] = ((q[i] + 32) >> 4) where i = 4..7 + sqrshrn v2.4h, v2.4s, #6 // d2 = c[i] = ((q[i] + 32) >> 4) where i = 8..11 + sqrshrn v3.4h, v3.4s, #6 // d3 = c[i] = ((q[i] + 32) >> 4) where i = 12..15 + + st1 {v0.4h-v3.4h}, [x1] //store the result + + pop_v_regs + ret + + +// ******************************************************************************* +// */ +// * @brief This function performs a 2x2 inverse hadamard transform for chroma block +// * +// * @par Description: +// * The DC coefficients pass through a 2-stage inverse hadamard transform. +// * This inverse transformed content is scaled to based on Qp value. +// * Both DC blocks of U and v blocks are processesd +// * +// * @param[in] pi2_src +// * input 1x8 block of ceffs. First 4 are from U and next from V +// * +// * @param[out] pi2_out +// * output 1x8 block +// * +// * @param[in] pu2_iscal_mat +// * pointer to scaling list +// * +// * @param[in] pu2_weigh_mat +// * pointer to weight matrix +// * +// * @param[in] u4_qp_div_6 +// * Floor (qp/6) +// * +// * @returns none +// * +// * @remarks none +// * +// ******************************************************************************* +// */ +// * +// ******************************************************************************* +// */ +// void ih264_ihadamard_scaling_2x2_uv(WORD16* pi2_src, +// WORD16* pi2_out, +// const UWORD16 *pu2_iscal_mat, +// const UWORD16 *pu2_weigh_mat, +// UWORD32 u4_qp_div_6, + + .global ih264_ihadamard_scaling_2x2_uv_av8 +ih264_ihadamard_scaling_2x2_uv_av8: + +//Registers used +// x0 : *pi2_src +// x1 : *pi2_out +// x2 : *pu2_iscal_mat +// x3 : *pu2_weigh_mat +// x4 : u4_qp_div_6 + push_v_regs + ld1 {v26.h}[0], [x2] + ld1 {v27.h}[0], [x3] + + sub w4, w4, #5 //qp/6 - 4 + dup v28.4s, w4 //load qp/6 + + ld2 {v0.4h, v1.4h}, [x0] //load 8 dc coeffs + //i2_x4,i2_x6,i2_y4,i1_y6 -> d0 + //i2_x5,i2_x7,i2_y5,i1_y6 -> d1 + + saddl v2.4s, v0.4h, v1.4h //i4_x0 = i4_x4 + i4_x5;...x2 + ssubl v4.4s, v0.4h, v1.4h //i4_x1 = i4_x4 - i4_x5;...x3 + + umull v30.4s, v26.4h, v27.4h //pu2_iscal_mat[0]*pu2_weigh_mat[0] + dup v30.4s, v30.s[0] + + trn1 v0.4s, v2.4s, v4.4s + trn2 v1.4s, v2.4s, v4.4s //i4_x0 i4_x1 -> q1 + + add v2.4s, v0.4s, v1.4s //i4_x4 = i4_x0+i4_x2;.. i4_x5 + sub v3.4s, v0.4s, v1.4s //i4_x6 = i4_x0-i4_x2;.. i4_x7 + + mul v2.4s, v2.4s, v30.4s + mul v3.4s, v3.4s, v30.4s + + sshl v2.4s, v2.4s, v28.4s + sshl v3.4s, v3.4s, v28.4s + + xtn v0.4h, v2.4s //i4_x4 i4_x5 i4_y4 i4_y5 + xtn v1.4h, v3.4s //i4_x6 i4_x7 i4_y6 i4_y7 + + st2 {v0.4s-v1.4s}, [x1] + pop_v_regs + ret + + + diff --git a/common/armv8/ih264_inter_pred_chroma_av8.s b/common/armv8/ih264_inter_pred_chroma_av8.s new file mode 100755 index 0000000..714e271 --- /dev/null +++ b/common/armv8/ih264_inter_pred_chroma_av8.s @@ -0,0 +1,392 @@ +//****************************************************************************** +//* +//* Copyright (C) 2015 The Android Open Source Project +//* +//* Licensed under the Apache License, Version 2.0 (the "License"); +//* you may not use this file except in compliance with the License. +//* You may obtain a copy of the License at: +//* +//* http://www.apache.org/licenses/LICENSE-2.0 +//* +//* Unless required by applicable law or agreed to in writing, software +//* distributed under the License is distributed on an "AS IS" BASIS, +//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//* See the License for the specific language governing permissions and +//* limitations under the License. +//* +//***************************************************************************** +//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +//*/ +///** +//****************************************************************************** +//* @file +//* ih264_inter_pred_chroma_av8.s +//* +//* @brief +//* Contains function definitions for inter prediction interpolation. +//* +//* @author +//* Ittaim +//* +//* @par List of Functions: +//* +//* - ih264_inter_pred_chroma_av8() +//* +//* @remarks +//* None +//* +//******************************************************************************* +//*/ + +///* All the functions here are replicated from ih264_inter_pred_filters.c +// + +///** +///** +///** +// +///** +//******************************************************************************* +//* +//* @brief +//* Interprediction chroma filter +//* +//* @par Description: +//* Applies filtering to chroma samples as mentioned in +//* sec 8.4.2.2.2 titled "chroma sample interpolation process" +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source containing alternate U and V samples +//* +//* @param[out] pu1_dst +//* UWORD8 pointer to the destination +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride +//* +//* @param[in]uc_dx +//* dx value where the sample is to be produced(refer sec 8.4.2.2.2 ) +//* +//* @param[in] uc_dy +//* dy value where the sample is to be produced(refer sec 8.4.2.2.2 ) +//* +//* @param[in] ht +//* integer height of the array +//* +//* @param[in] wd +//* integer width of the array +//* +//* @returns +//* +//* @remarks +//* None +//* +//******************************************************************************* +//*/ + +//void ih264_inter_pred_chroma(UWORD8 *pu1_src, +// UWORD8 *pu1_dst, +// WORD32 src_strd, +// WORD32 dst_strd, +// UWORD8 u1_dx, +// UWORD8 u1_dy, +// WORD32 ht, +// WORD32 wd) +//**************Variables Vs Registers***************************************** +// x0 => *pu1_src +// x1 => *pu1_dst +// x2 => src_strd +// x3 => dst_strd +// x4 => u1_dx +// x5 => u1_dy +// x6 => height +// x7 => width +// +.text +.p2align 2 +.include "ih264_neon_macros.s" + + + + .global ih264_inter_pred_chroma_av8 + +ih264_inter_pred_chroma_av8: + + + + // STMFD sp!, {x4-x12, x14} //store register values to stack + push_v_regs + stp x19, x20, [sp, #-16]! + + + + + + sub x20, x4, #8 //8-u1_dx + neg x8, x20 + sub x20, x5, #8 //8-u1_dy + neg x9, x20 + mul x10, x8, x9 // + mul x11, x4, x9 // + + dup v28.8b, w10 + dup v29.8b, w11 + + mul x10, x8, x5 // + mul x11, x4, x5 // + + dup v30.8b, w10 + dup v31.8b, w11 + + subs x12, x7, #2 //if wd=4 branch to loop_4 + beq loop_2 + subs x12, x7, #4 //if wd=8 branch to loop_8 + beq loop_4 + +loop_8: + ld1 {v0.8b, v1.8b, v2.8b}, [x0], x2 //// Load row0 ; + ext v3.8b, v0.8b , v1.8b , #2 + ld1 {v5.8b, v6.8b, v7.8b}, [x0], x2 //// Load row1; + umull v20.8h, v0.8b, v28.8b + ext v8.8b, v5.8b , v6.8b , #2 + umlal v20.8h, v3.8b, v29.8b + ext v9.8b, v6.8b , v7.8b , #2 + umlal v20.8h, v5.8b, v30.8b + ext v4.8b, v1.8b , v2.8b , #2 + umlal v20.8h, v8.8b, v31.8b + sqrshrun v26.8b, v20.8h, #6 + umull v22.8h, v1.8b, v28.8b + ld1 {v10.8b, v11.8b, v12.8b}, [x0], x2 //// Load row2 ; + umlal v22.8h, v4.8b, v29.8b + ext v13.8b, v10.8b , v11.8b , #2 + umlal v22.8h, v6.8b, v30.8b + ext v14.8b, v11.8b , v12.8b , #2 + umlal v22.8h, v9.8b, v31.8b + sqrshrun v27.8b, v22.8h, #6 + umull v24.8h, v5.8b, v28.8b + st1 { v26.8b, v27.8b}, [x1], x3 ////Store dest row + umlal v24.8h, v8.8b, v29.8b + ld1 {v0.8b, v1.8b, v2.8b}, [x0], x2 //// Load row3 ; + umlal v24.8h, v10.8b, v30.8b + ext v3.8b, v0.8b , v1.8b , #2 + umlal v24.8h, v13.8b, v31.8b + ext v4.8b, v1.8b , v2.8b , #2 + umull v16.8h, v6.8b, v28.8b + sqrshrun v18.8b, v24.8h, #6 + umlal v16.8h, v9.8b, v29.8b + umlal v16.8h, v11.8b, v30.8b + umlal v16.8h, v14.8b, v31.8b + sqrshrun v19.8b, v16.8h, #6 + st1 {v18.8b, v19.8b}, [x1], x3 // store row 1 + umull v20.8h, v10.8b, v28.8b + umlal v20.8h, v13.8b, v29.8b + umlal v20.8h, v0.8b, v30.8b + umlal v20.8h, v3.8b, v31.8b + sqrshrun v26.8b, v20.8h, #6 + umull v24.8h, v11.8b, v28.8b + ld1 {v5.8b, v6.8b, v7.8b}, [x0], x2 //// Load row4; + umlal v24.8h, v14.8b, v29.8b + ext v8.8b, v5.8b , v6.8b , #2 + umlal v24.8h, v1.8b, v30.8b + ext v9.8b, v6.8b , v7.8b , #2 + umlal v24.8h, v4.8b, v31.8b + umull v20.8h, v0.8b, v28.8b + sqrshrun v27.8b, v24.8h, #6 + umlal v20.8h, v3.8b, v29.8b + st1 { v26.8b, v27.8b}, [x1], x3 ////Store dest row2 + umlal v20.8h, v5.8b, v30.8b + umlal v20.8h, v8.8b, v31.8b + umull v22.8h, v1.8b, v28.8b + umlal v22.8h, v4.8b, v29.8b + umlal v22.8h, v6.8b, v30.8b + sqrshrun v26.8b, v20.8h, #6 + umlal v22.8h, v9.8b, v31.8b + subs x12, x6, #4 + sqrshrun v27.8b, v22.8h, #6 + st1 { v26.8b, v27.8b}, [x1], x3 ////Store dest row3 + + beq end_func //If ht=4 + + ld1 {v10.8b, v11.8b, v12.8b}, [x0], x2 //// Load row5 + ext v13.8b, v10.8b , v11.8b , #2 + umull v24.8h, v5.8b, v28.8b + ext v14.8b, v11.8b , v12.8b , #2 + ld1 {v0.8b, v1.8b, v2.8b}, [x0], x2 //// Load row6; + umlal v24.8h, v8.8b, v29.8b + umlal v24.8h, v10.8b, v30.8b + umlal v24.8h, v13.8b, v31.8b + ext v3.8b, v0.8b , v1.8b , #2 + umull v16.8h, v6.8b, v28.8b + sqrshrun v18.8b, v24.8h, #6 + umlal v16.8h, v9.8b, v29.8b + umlal v16.8h, v11.8b, v30.8b + umlal v16.8h, v14.8b, v31.8b + ext v4.8b, v1.8b , v2.8b , #2 + sqrshrun v19.8b, v16.8h, #6 + st1 { v18.8b, v19.8b}, [x1], x3 // store row 4 + umull v20.8h, v10.8b, v28.8b + umlal v20.8h, v13.8b, v29.8b + umlal v20.8h, v0.8b, v30.8b + umlal v20.8h, v3.8b, v31.8b + ld1 {v5.8b, v6.8b, v7.8b}, [x0], x2 //// Load row7; + sqrshrun v26.8b, v20.8h, #6 + umull v24.8h, v11.8b, v28.8b + umlal v24.8h, v14.8b, v29.8b + ext v8.8b, v5.8b , v6.8b , #2 + umlal v24.8h, v1.8b, v30.8b + umlal v24.8h, v4.8b, v31.8b + ext v9.8b, v6.8b , v7.8b , #2 + sqrshrun v27.8b, v24.8h, #6 + st1 {v26.8b, v27.8b}, [x1], x3 ////Store dest row5 + umull v20.8h, v0.8b, v28.8b + umlal v20.8h, v3.8b, v29.8b + umlal v20.8h, v5.8b, v30.8b + umlal v20.8h, v8.8b, v31.8b + ld1 {v10.8b, v11.8b, v12.8b}, [x0], x2 //// Load row8 ; + sqrshrun v26.8b, v20.8h, #6 + umull v22.8h, v1.8b, v28.8b + umlal v22.8h, v4.8b, v29.8b + umlal v22.8h, v6.8b, v30.8b + ext v13.8b, v10.8b , v11.8b , #2 + umlal v22.8h, v9.8b, v31.8b + ext v14.8b, v11.8b , v12.8b , #2 + sqrshrun v27.8b, v22.8h, #6 + st1 { v26.8b, v27.8b}, [x1], x3 ////Store dest row6 + umull v24.8h, v5.8b, v28.8b + umlal v24.8h, v8.8b, v29.8b + umlal v24.8h, v10.8b, v30.8b + umlal v24.8h, v13.8b, v31.8b + umull v16.8h, v6.8b, v28.8b + sqrshrun v18.8b, v24.8h, #6 + umlal v16.8h, v9.8b, v29.8b + umlal v16.8h, v11.8b, v30.8b + umlal v16.8h, v14.8b, v31.8b + sqrshrun v19.8b, v16.8h, #6 + st1 { v18.8b, v19.8b}, [x1], x3 // store row 7 + b end_func + +loop_4: + ld1 {v0.8b, v1.8b}, [x0], x2 //// Load row0 ; + ext v2.8b, v0.8b , v1.8b , #2 + ld1 {v3.8b, v4.8b}, [x0], x2 //// Load row1; + ext v5.8b, v3.8b , v4.8b , #2 + umull v20.8h, v0.8b, v28.8b + umlal v20.8h, v2.8b, v29.8b + umlal v20.8h, v3.8b, v30.8b + umlal v20.8h, v5.8b, v31.8b + ld1 {v6.8b, v7.8b}, [x0], x2 //// Load row2 + sqrshrun v26.8b, v20.8h, #6 + ext v8.8b, v6.8b , v7.8b , #2 + st1 {v26.8b}, [x1], x3 ////Store dest row0 + umull v22.8h, v3.8b, v28.8b + umlal v22.8h, v5.8b, v29.8b + umlal v22.8h, v6.8b, v30.8b + umlal v22.8h, v8.8b, v31.8b + subs x12, x6, #2 + sqrshrun v27.8b, v22.8h, #6 + st1 {v27.8b}, [x1], x3 ////Store dest row1 + beq end_func //If ht=2 + + ld1 {v9.8b, v10.8b}, [x0], x2 //// Load row3; + ext v11.8b, v9.8b , v10.8b , #2 + umull v24.8h, v6.8b, v28.8b + umlal v24.8h, v8.8b, v29.8b + umlal v24.8h, v9.8b, v30.8b + umlal v24.8h, v11.8b, v31.8b + ld1 {v0.8b, v1.8b}, [x0], x2 //// Load row4 ; + sqrshrun v16.8b, v24.8h, #6 + ext v2.8b, v0.8b , v1.8b , #2 + st1 {v16.8b}, [x1], x3 ////Store dest row2 + umull v18.8h, v9.8b, v28.8b + umlal v18.8h, v11.8b, v29.8b + umlal v18.8h, v0.8b, v30.8b + umlal v18.8h, v2.8b, v31.8b + subs x12, x6, #4 + sqrshrun v17.8b, v18.8h, #6 + st1 {v17.8b}, [x1], x3 ////Store dest row3 + beq end_func //If ht=4 + + ld1 {v3.8b, v4.8b}, [x0], x2 //// Load row5; + ext v5.8b, v3.8b , v4.8b , #2 + umull v20.8h, v0.8b, v28.8b + umlal v20.8h, v2.8b, v29.8b + umlal v20.8h, v3.8b, v30.8b + umlal v20.8h, v5.8b, v31.8b + ld1 {v6.8b, v7.8b}, [x0], x2 //// Load row6 ; + sqrshrun v26.8b, v20.8h, #6 + ext v8.8b, v6.8b , v7.8b , #2 + st1 {v26.8b}, [x1], x3 ////Store dest row4 + umull v22.8h, v3.8b, v28.8b + umlal v22.8h, v5.8b, v29.8b + umlal v22.8h, v6.8b, v30.8b + umlal v22.8h, v8.8b, v31.8b + ld1 {v9.8b, v10.8b}, [x0], x2 //// Load row7; + sqrshrun v27.8b, v22.8h, #6 + ext v11.8b, v9.8b , v10.8b , #2 + st1 {v27.8b}, [x1], x3 ////Store dest row5 + umull v24.8h, v6.8b, v28.8b + umlal v24.8h, v8.8b, v29.8b + umlal v24.8h, v9.8b, v30.8b + umlal v24.8h, v11.8b, v31.8b + ld1 {v0.8b, v1.8b}, [x0], x2 //// Load row8; + sqrshrun v16.8b, v24.8h, #6 + ext v2.8b, v0.8b , v1.8b , #2 + st1 {v16.8b}, [x1], x3 ////Store dest row6 + umull v18.8h, v9.8b, v28.8b + umlal v18.8h, v11.8b, v29.8b + umlal v18.8h, v0.8b, v30.8b + umlal v18.8h, v2.8b, v31.8b + sqrshrun v17.8b, v18.8h, #6 + st1 {v17.8b}, [x1], x3 ////Store dest row7 + b end_func + +loop_2: + ld1 {v0.8b}, [x0], x2 //// Load row0 ; + ext v2.8b, v0.8b , v0.8b , #2 + ld1 {v3.8b}, [x0], x2 //// Load row1; + ext v5.8b, v3.8b , v3.8b , #2 + umull v20.8h, v0.8b, v28.8b + umlal v20.8h, v2.8b, v29.8b + umlal v20.8h, v3.8b, v30.8b + umlal v20.8h, v5.8b, v31.8b + ld1 {v6.8b}, [x0], x2 //// Load row2 + sqrshrun v26.8b, v20.8h, #6 + ext v8.8b, v6.8b , v6.8b , #2 + st1 {v26.s}[0], [x1], x3 ////Store dest row0 + umull v22.8h, v3.8b, v28.8b + umlal v22.8h, v5.8b, v29.8b + umlal v22.8h, v6.8b, v30.8b + umlal v22.8h, v8.8b, v31.8b + subs x12, x6, #2 + sqrshrun v27.8b, v22.8h, #6 + st1 {v27.s}[0], [x1], x3 ////Store dest row1 + beq end_func //If ht=2 + + ld1 {v9.8b}, [x0], x2 //// Load row3; + ext v11.8b, v9.8b , v9.8b , #2 + umull v24.8h, v6.8b, v28.8b + umlal v24.8h, v8.8b, v29.8b + umlal v24.8h, v9.8b, v30.8b + umlal v24.8h, v11.8b, v31.8b + ld1 {v0.8b}, [x0], x2 //// Load row4 ; + sqrshrun v16.8b, v24.8h, #6 + ext v2.8b, v0.8b , v0.8b , #2 + st1 {v16.s}[0], [x1], x3 ////Store dest row2 + umull v18.8h, v9.8b, v28.8b + umlal v18.8h, v11.8b, v29.8b + umlal v18.8h, v0.8b, v30.8b + umlal v18.8h, v2.8b, v31.8b + sqrshrun v17.8b, v18.8h, #6 + st1 {v17.s}[0], [x1], x3 ////Store dest row3 + + +end_func: + // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + diff --git a/common/armv8/ih264_inter_pred_filters_luma_horz_av8.s b/common/armv8/ih264_inter_pred_filters_luma_horz_av8.s new file mode 100755 index 0000000..6ad463a --- /dev/null +++ b/common/armv8/ih264_inter_pred_filters_luma_horz_av8.s @@ -0,0 +1,530 @@ +//****************************************************************************** +//* +//* Copyright (C) 2015 The Android Open Source Project +//* +//* Licensed under the Apache License, Version 2.0 (the "License"); +//* you may not use this file except in compliance with the License. +//* You may obtain a copy of the License at: +//* +//* http://www.apache.org/licenses/LICENSE-2.0 +//* +//* Unless required by applicable law or agreed to in writing, software +//* distributed under the License is distributed on an "AS IS" BASIS, +//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//* See the License for the specific language governing permissions and +//* limitations under the License. +//* +//***************************************************************************** +//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +//*/ +///** +//****************************************************************************** +//* @file +//* ih264_inter_pred_luma_horz_av8.s +//* +//* @brief +//* Contains function definitions for inter prediction interpolation. +//* +//* @author +//* Ittiam +//* +//* @par List of Functions: +//* +//* - ih264_inter_pred_luma_horz_av8() +//* +//* @remarks +//* None +//* +//******************************************************************************* +//*/ + +///* All the functions here are replicated from ih264_inter_pred_filters.c +// + +///** +///** +//******************************************************************************* +//* +//* @brief +//* Interprediction luma filter for horizontal input +//* +//* @par Description: +//* Applies a 6 tap horizontal filter .The output is clipped to 8 bits +//* sec 8.4.2.2.1 titled "Luma sample interpolation process" +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[out] pu1_dst +//* UWORD8 pointer to the destination +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride +//* +//* @param[in] ht +//* integer height of the array +//* +//* @param[in] wd +//* integer width of the array +//* +//* @returns +//* +// @remarks +//* None +//* +//******************************************************************************* +//*/ + +//void ih264_inter_pred_luma_horz ( +// UWORD8 *pu1_src, +// UWORD8 *pu1_dst, +// WORD32 src_strd, +// WORD32 dst_strd, +// WORD32 ht, +// WORD32 wd ) + +//**************Variables Vs Registers***************************************** +// x0 => *pu1_src +// x1 => *pu1_dst +// x2 => src_strd +// x3 => dst_strd +// x4 => ht +// x5 => wd + +.text +.p2align 2 + +.include "ih264_neon_macros.s" + + + + .global ih264_inter_pred_luma_horz_av8 + +ih264_inter_pred_luma_horz_av8: + + + + + // STMFD sp!, {x4-x12, x14} //store register values to stack + push_v_regs + stp x19, x20, [sp, #-16]! + sub x0, x0, #2 //pu1_src-2 + sub x14, x4, #16 + movi v0.8b, #5 //filter coeff + subs x12, x5, #8 //if wd=8 branch to loop_8 + movi v1.8b, #20 //filter coeff + beq loop_8 + + subs x12, x5, #4 //if wd=4 branch to loop_4 + beq loop_4 + +loop_16: //when wd=16 + //// Processing row0 and row1 + ld1 {v2.8b, v3.8b, v4.8b}, [x0], x2 //// Load row0 + add x14, x14, #1 //for checking loop + ext v31.8b, v2.8b , v3.8b, #5 ////extract a[5] (column1,row0) + ld1 {v5.8b, v6.8b, v7.8b}, [x0], x2 //// Load row1 + ext v30.8b, v3.8b , v4.8b, #5 ////extract a[5] (column2,row0) + uaddl v8.8h, v31.8b, v2.8b //// a0 + a5 (column1,row0) + ext v28.8b, v5.8b , v6.8b, #5 ////extract a[5] (column1,row1) + uaddl v10.8h, v30.8b, v3.8b //// a0 + a5 (column2,row0) + ext v27.8b, v6.8b , v7.8b, #5 ////extract a[5] (column2,row1) + uaddl v14.8h, v28.8b, v5.8b //// a0 + a5 (column1,row1) + ext v31.8b, v2.8b , v3.8b, #2 ////extract a[2] (column1,row0) + uaddl v16.8h, v27.8b, v6.8b //// a0 + a5 (column2,row1) + ext v30.8b, v3.8b , v4.8b, #2 ////extract a[2] (column2,row0) + umlal v8.8h, v31.8b, v1.8b //// a0 + a5 + 20a2 (column1,row0) + ext v28.8b, v5.8b , v6.8b, #2 ////extract a[2] (column1,row1) + umlal v10.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 (column2,row0) + ext v27.8b, v6.8b , v7.8b, #2 ////extract a[2] (column2,row1) + umlal v14.8h, v28.8b, v1.8b //// a0 + a5 + 20a2 (column1,row1) + ext v31.8b, v2.8b , v3.8b, #3 ////extract a[3] (column1,row0) + umlal v16.8h, v27.8b, v1.8b //// a0 + a5 + 20a2 (column2,row1) + ext v30.8b, v3.8b , v4.8b, #3 ////extract a[3] (column2,row0) + umlal v8.8h, v31.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row0) + ext v28.8b, v5.8b , v6.8b, #3 ////extract a[3] (column1,row1) + umlal v10.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row0) + ext v27.8b, v6.8b , v7.8b, #3 ////extract a[3] (column2,row1) + umlal v14.8h, v28.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row1) + ext v31.8b, v2.8b , v3.8b, #1 ////extract a[1] (column1,row0) + umlal v16.8h, v27.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row1) + ext v30.8b, v3.8b , v4.8b, #1 ////extract a[1] (column2,row0) + umlsl v8.8h, v31.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0) + ext v28.8b, v5.8b , v6.8b, #1 ////extract a[1] (column1,row1) + umlsl v10.8h, v30.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0) + ext v27.8b, v6.8b , v7.8b, #1 ////extract a[1] (column2,row1) + umlsl v14.8h, v28.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row1) + ext v31.8b, v2.8b , v3.8b, #4 ////extract a[4] (column1,row0) + umlsl v16.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row1) + ext v30.8b, v3.8b , v4.8b, #4 ////extract a[4] (column2,row0) + umlsl v8.8h, v31.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0) + ext v28.8b, v5.8b , v6.8b, #4 ////extract a[4] (column1,row1) + umlsl v10.8h, v30.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0) + ext v27.8b, v6.8b , v7.8b, #4 ////extract a[4] (column2,row1) + umlsl v14.8h, v28.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row1) + ld1 {v2.8b, v3.8b, v4.8b}, [x0], x2 //// Load row2 + umlsl v16.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row1) + + sqrshrun v20.8b, v8.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0) + ld1 {v5.8b, v6.8b, v7.8b}, [x0], x2 //// Load row3 + sqrshrun v21.8b, v10.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0) + ext v31.8b, v2.8b , v3.8b, #5 ////extract a[5] (column1,row2) + st1 {v20.8b, v21.8b}, [x1], x3 ////Store dest row0 + sqrshrun v23.8b, v14.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row1) + ext v30.8b, v3.8b , v4.8b, #5 ////extract a[5] (column2,row2) + sqrshrun v24.8b, v16.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row1) + + + +//// Processing row2 and row3 + ext v28.8b, v5.8b , v6.8b, #5 ////extract a[5] (column1,row3) + uaddl v8.8h, v31.8b, v2.8b //// a0 + a5 (column1,row2) + st1 {v23.8b, v24.8b}, [x1], x3 ////Store dest row1 + uaddl v10.8h, v30.8b, v3.8b //// a0 + a5 (column2,row2) + ext v27.8b, v6.8b , v7.8b, #5 ////extract a[5] (column2,row3) + uaddl v14.8h, v28.8b, v5.8b //// a0 + a5 (column1,row3) + ext v31.8b, v2.8b , v3.8b, #2 ////extract a[2] (column1,row2) + uaddl v16.8h, v27.8b, v6.8b //// a0 + a5 (column2,row3) + ext v30.8b, v3.8b , v4.8b, #2 ////extract a[2] (column2,row2) + umlal v8.8h, v31.8b, v1.8b //// a0 + a5 + 20a2 (column1,row2) + ext v27.8b, v6.8b , v7.8b, #2 ////extract a[2] (column2,row3) + umlal v10.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 (column2,row2) + ext v28.8b, v5.8b , v6.8b, #2 ////extract a[2] (column1,row3) + umlal v14.8h, v28.8b, v1.8b //// a0 + a5 + 20a2 (column1,row3) + ext v31.8b, v2.8b , v3.8b, #3 ////extract a[3] (column1,row2) + umlal v16.8h, v27.8b, v1.8b //// a0 + a5 + 20a2 (column2,row3) + ext v30.8b, v3.8b , v4.8b, #3 ////extract a[3] (column2,row2) + umlal v8.8h, v31.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row2) + ext v28.8b, v5.8b , v6.8b, #3 ////extract a[3] (column1,row3) + umlal v10.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row2) + ext v27.8b, v6.8b , v7.8b, #3 ////extract a[3] (column2,row3) + umlal v14.8h, v28.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row3) + ext v31.8b, v2.8b , v3.8b, #1 ////extract a[1] (column1,row2) + umlal v16.8h, v27.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row3) + ext v30.8b, v3.8b , v4.8b, #1 ////extract a[1] (column2,row2) + umlsl v8.8h, v31.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row2) + ext v28.8b, v5.8b , v6.8b, #1 ////extract a[1] (column1,row3) + umlsl v10.8h, v30.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row2) + ext v27.8b, v6.8b , v7.8b, #1 ////extract a[1] (column2,row3) + umlsl v14.8h, v28.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row3) + ext v31.8b, v2.8b , v3.8b, #4 ////extract a[4] (column1,row2) + umlsl v16.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row3) + ext v30.8b, v3.8b , v4.8b, #4 ////extract a[4] (column2,row2) + umlsl v8.8h, v31.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row2) + ext v28.8b, v5.8b , v6.8b, #4 ////extract a[4] (column1,row3) + umlsl v10.8h, v30.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row2) + ext v27.8b, v6.8b , v7.8b, #4 ////extract a[4] (column2,row3) + umlsl v14.8h, v28.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row3) + ld1 {v2.8b, v3.8b, v4.8b}, [x0], x2 //// Load row4 + umlsl v16.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row3) + + sqrshrun v20.8b, v8.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row2) + ld1 {v5.8b, v6.8b, v7.8b}, [x0], x2 //// Load row5 + sqrshrun v21.8b, v10.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row2) + ext v31.8b, v2.8b , v3.8b, #5 ////extract a[5] (column1,row4) + st1 {v20.8b, v21.8b}, [x1], x3 ////Store dest row2 + sqrshrun v23.8b, v14.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row3) + ext v30.8b, v3.8b , v4.8b, #5 ////extract a[5] (column2,row4) + sqrshrun v24.8b, v16.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row3) + + +//// Processing row4 and row5 + ext v28.8b, v5.8b , v6.8b, #5 ////extract a[5] (column1,row5) + uaddl v8.8h, v31.8b, v2.8b //// a0 + a5 (column1,row4) + st1 {v23.8b, v24.8b}, [x1], x3 ////Store dest row3 + uaddl v10.8h, v30.8b, v3.8b //// a0 + a5 (column2,row4) + ext v27.8b, v6.8b , v7.8b, #5 ////extract a[5] (column2,row5) + uaddl v14.8h, v28.8b, v5.8b //// a0 + a5 (column1,row5) + ext v31.8b, v2.8b , v3.8b, #2 ////extract a[2] (column1,row4) + uaddl v16.8h, v27.8b, v6.8b //// a0 + a5 (column2,row5) + ext v30.8b, v3.8b , v4.8b, #2 ////extract a[2] (column2,row4) + umlal v8.8h, v31.8b, v1.8b //// a0 + a5 + 20a2 (column1,row4) + ext v27.8b, v6.8b , v7.8b, #2 ////extract a[2] (column2,row5) + umlal v10.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 (column2,row4) + ext v28.8b, v5.8b , v6.8b, #2 ////extract a[2] (column1,row5) + umlal v14.8h, v28.8b, v1.8b //// a0 + a5 + 20a2 (column1,row5) + ext v31.8b, v2.8b , v3.8b, #3 ////extract a[3] (column1,row4) + umlal v16.8h, v27.8b, v1.8b //// a0 + a5 + 20a2 (column2,row5) + ext v30.8b, v3.8b , v4.8b, #3 ////extract a[3] (column2,row4) + umlal v8.8h, v31.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row4) + ext v28.8b, v5.8b , v6.8b, #3 ////extract a[3] (column1,row5) + umlal v10.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row4) + ext v27.8b, v6.8b , v7.8b, #3 ////extract a[3] (column2,row5) + umlal v14.8h, v28.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row5) + ext v31.8b, v2.8b , v3.8b, #1 ////extract a[1] (column1,row4) + umlal v16.8h, v27.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row5) + ext v30.8b, v3.8b , v4.8b, #1 ////extract a[1] (column2,row4) + umlsl v8.8h, v31.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row4) + ext v28.8b, v5.8b , v6.8b, #1 ////extract a[1] (column1,row5) + umlsl v10.8h, v30.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row4) + ext v27.8b, v6.8b , v7.8b, #1 ////extract a[1] (column2,row5) + umlsl v14.8h, v28.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row4) + ext v31.8b, v2.8b , v3.8b, #4 ////extract a[4] (column1,row4) + umlsl v16.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row5) + ext v30.8b, v3.8b , v4.8b, #4 ////extract a[4] (column2,row4) + umlsl v8.8h, v31.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row4) + ext v28.8b, v5.8b , v6.8b, #4 ////extract a[4] (column1,row5) + umlsl v10.8h, v30.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row4) + ext v27.8b, v6.8b , v7.8b, #4 ////extract a[4] (column2,row5) + umlsl v14.8h, v28.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row5) + ld1 {v2.8b, v3.8b, v4.8b}, [x0], x2 //// Load row6 + umlsl v16.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row5) + + sqrshrun v20.8b, v8.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row4) + ld1 {v5.8b, v6.8b, v7.8b}, [x0], x2 //// Load row7 + sqrshrun v21.8b, v10.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row4) + ext v31.8b, v2.8b , v3.8b, #5 ////extract a[5] (column1,row6) + st1 {v20.8b, v21.8b}, [x1], x3 ////Store dest row2 + sqrshrun v23.8b, v14.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row5) + ext v30.8b, v3.8b , v4.8b, #5 ////extract a[5] (column2,row6) + sqrshrun v24.8b, v16.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row5) + + + + //// Processing row6 and row7 + + ext v28.8b, v5.8b , v6.8b, #5 ////extract a[5] (column1,row7) + uaddl v8.8h, v31.8b, v2.8b //// a0 + a5 (column1,row6) + st1 {v23.8b, v24.8b}, [x1], x3 ////Store dest row5 + uaddl v10.8h, v30.8b, v3.8b //// a0 + a5 (column2,row6) + ext v27.8b, v6.8b , v7.8b, #5 ////extract a[5] (column2,row7) + uaddl v14.8h, v28.8b, v5.8b //// a0 + a5 (column1,row7) + ext v31.8b, v2.8b , v3.8b, #2 ////extract a[2] (column1,row6) + uaddl v16.8h, v27.8b, v6.8b //// a0 + a5 (column2,row7) + ext v30.8b, v3.8b , v4.8b, #2 ////extract a[2] (column2,row6) + umlal v8.8h, v31.8b, v1.8b //// a0 + a5 + 20a2 (column1,row6) + ext v27.8b, v6.8b , v7.8b, #2 ////extract a[2] (column2,row7) + umlal v10.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 (column2,row6) + ext v28.8b, v5.8b , v6.8b, #2 ////extract a[2] (column1,row7) + umlal v14.8h, v28.8b, v1.8b //// a0 + a5 + 20a2 (column1,row7) + ext v31.8b, v2.8b , v3.8b, #3 ////extract a[3] (column1,row6) + umlal v16.8h, v27.8b, v1.8b //// a0 + a5 + 20a2 (column2,row7) + ext v30.8b, v3.8b , v4.8b, #3 ////extract a[3] (column2,row6) + umlal v8.8h, v31.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row6) + ext v28.8b, v5.8b , v6.8b, #3 ////extract a[3] (column1,row7) + umlal v10.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row6) + ext v27.8b, v6.8b , v7.8b, #3 ////extract a[3] (column2,row7) + umlal v14.8h, v28.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row7) + ext v31.8b, v2.8b , v3.8b, #1 ////extract a[1] (column1,row6) + umlal v16.8h, v27.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row7) + ext v30.8b, v3.8b , v4.8b, #1 ////extract a[1] (column2,row6) + umlsl v8.8h, v31.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row6) + ext v28.8b, v5.8b , v6.8b, #1 ////extract a[1] (column1,row7) + umlsl v10.8h, v30.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row6) + ext v27.8b, v6.8b , v7.8b, #1 ////extract a[1] (column2,row7) + umlsl v14.8h, v28.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row6) + ext v31.8b, v2.8b , v3.8b, #4 ////extract a[4] (column1,row6) + umlsl v16.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row7) + ext v30.8b, v3.8b , v4.8b, #4 ////extract a[4] (column2,row6) + umlsl v8.8h, v31.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row6) + ext v28.8b, v5.8b , v6.8b, #4 ////extract a[4] (column1,row7) + umlsl v10.8h, v30.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row6) + ext v27.8b, v6.8b , v7.8b, #4 ////extract a[4] (column2,row6) + + sqrshrun v20.8b, v8.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row6) + umlsl v14.8h, v28.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row7) + sqrshrun v21.8b, v10.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row6) + umlsl v16.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row7) + sqrshrun v23.8b, v14.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row7) + st1 {v20.8b, v21.8b}, [x1], x3 ////Store dest row6 + sqrshrun v24.8b, v16.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row7) + subs x12, x14, #1 // if height==16 - looping + st1 {v23.8b, v24.8b}, [x1], x3 ////Store dest row7 + + + + beq loop_16 + b end_func + + + +loop_8: +//// Processing row0 and row1 + + + ld1 {v5.8b, v6.8b}, [x0], x2 //// Load row1 + add x14, x14, #1 //for checking loop + ext v28.8b, v5.8b , v6.8b, #5 ////extract a[5] (column1,row1) + ld1 {v2.8b, v3.8b}, [x0], x2 //// Load row0 + ext v25.8b, v5.8b , v6.8b, #2 ////extract a[2] (column1,row1) + ext v31.8b, v2.8b , v3.8b, #5 ////extract a[5] (column1,row0) + ext v24.8b, v5.8b , v6.8b, #3 ////extract a[3] (column1,row1) + ext v23.8b, v5.8b , v6.8b, #1 ////extract a[1] (column1,row1) + ext v22.8b, v5.8b , v6.8b, #4 ////extract a[4] (column1,row1) + uaddl v14.8h, v28.8b, v5.8b //// a0 + a5 (column1,row1) + ext v29.8b, v2.8b , v3.8b, #3 ////extract a[3] (column1,row0) + umlal v14.8h, v25.8b, v1.8b //// a0 + a5 + 20a2 (column1,row1) + umlal v14.8h, v24.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row1) + umlsl v14.8h, v23.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row1) + umlsl v14.8h, v22.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row1) + ext v30.8b, v2.8b , v3.8b, #2 ////extract a[2] (column1,row0) + uaddl v8.8h, v31.8b, v2.8b //// a0 + a5 (column1,row0) + ext v27.8b, v2.8b , v3.8b, #1 ////extract a[1] (column1,row0) + ext v26.8b, v2.8b , v3.8b, #4 ////extract a[4] (column1,row0) + ld1 {v2.8b, v3.8b}, [x0], x2 //// Load row2 + umlal v8.8h, v29.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row0) + umlal v8.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 (column1,row0) + umlsl v8.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0) + umlsl v8.8h, v26.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0) + ld1 {v5.8b, v6.8b}, [x0], x2 //// Load row3 + sqrshrun v23.8b, v14.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0) + + //// Processing row2 and row3 + ext v28.8b, v5.8b , v6.8b, #5 ////extract a[5] (column1,row3) + ext v25.8b, v5.8b , v6.8b, #2 ////extract a[2] (column1,row3) + ext v31.8b, v2.8b , v3.8b, #5 ////extract a[5] (column1,row2) + uaddl v14.8h, v28.8b, v5.8b //// a0 + a5 (column1,row3) + st1 {v23.8b}, [x1], x3 ////Store dest row0 + ext v24.8b, v5.8b , v6.8b, #3 ////extract a[3] (column1,row2) + ext v23.8b, v5.8b , v6.8b, #1 ////extract a[1] (column1,row3) + sqrshrun v20.8b, v8.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row1) + ext v22.8b, v5.8b , v6.8b, #4 ////extract a[4] (column1,row3) + ext v29.8b, v2.8b , v3.8b, #3 ////extract a[3] (column1,row2) + umlal v14.8h, v25.8b, v1.8b //// a0 + a5 + 20a2 (column1,row3) + umlal v14.8h, v24.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row3) + umlsl v14.8h, v23.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row3) + umlsl v14.8h, v22.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row3) + st1 {v20.8b}, [x1], x3 ////Store dest row1 + uaddl v8.8h, v31.8b, v2.8b //// a0 + a5 (column1,row2) + ext v30.8b, v2.8b , v3.8b, #2 ////extract a[2] (column1,row2) + ext v27.8b, v2.8b , v3.8b, #1 ////extract a[1] (column1,row2) + ext v26.8b, v2.8b , v3.8b, #4 ////extract a[4] (column1,row2) + ld1 {v2.8b, v3.8b}, [x0], x2 //// Load row4 + umlal v8.8h, v29.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row2) + umlal v8.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 (column1,row2) + umlsl v8.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row2) + umlsl v8.8h, v26.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row2) + ld1 {v5.8b, v6.8b}, [x0], x2 //// Load row3 + subs x9, x4, #4 + sqrshrun v23.8b, v14.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row3) + ext v28.8b, v5.8b , v6.8b, #5 ////extract a[5] (column1,row5) + ext v25.8b, v5.8b , v6.8b, #2 ////extract a[2] (column1,row5) + ext v31.8b, v2.8b , v3.8b, #5 ////extract a[5] (column1,row4) + uaddl v14.8h, v28.8b, v5.8b //// a0 + a5 (column1,row5) + ext v24.8b, v5.8b , v6.8b, #3 ////extract a[3] (column1,row5) + sqrshrun v20.8b, v8.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row2) + ext v22.8b, v5.8b , v6.8b, #4 ////extract a[4] (column1,row5) + ext v29.8b, v2.8b , v3.8b, #3 ////extract a[3] (column1,row4) + st1 {v20.8b}, [x1], x3 ////Store dest row2 + ext v30.8b, v2.8b , v3.8b, #2 ////extract a[2] (column1,row4) + uaddl v8.8h, v31.8b, v2.8b //// a0 + a5 (column1,row4) + st1 {v23.8b}, [x1], x3 ////Store dest row3 + beq end_func // Branch if height==4 + +//// Processing row4 and row5 + ext v23.8b, v5.8b , v6.8b, #1 ////extract a[1] (column1,row5) + umlal v14.8h, v25.8b, v1.8b //// a0 + a5 + 20a2 (column1,row5) + umlal v14.8h, v24.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row5) + umlsl v14.8h, v23.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row5) + umlsl v14.8h, v22.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row5) + ext v27.8b, v2.8b , v3.8b, #1 ////extract a[1] (column1,row4) + ext v26.8b, v2.8b , v3.8b, #4 ////extract a[4] (column1,row4) + ld1 {v2.8b, v3.8b}, [x0], x2 //// Load row6 + umlal v8.8h, v29.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row4) + umlal v8.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 (column1,row4) + umlsl v8.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row4) + umlsl v8.8h, v26.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row4) + sqrshrun v23.8b, v14.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row5) + ld1 {v5.8b, v6.8b}, [x0], x2 //// Load row7 + ext v31.8b, v2.8b , v3.8b, #5 ////extract a[5] (column1,row6) + ext v28.8b, v5.8b , v6.8b, #5 ////extract a[5] (column1,row7) + ext v25.8b, v5.8b , v6.8b, #2 ////extract a[2] (column1,row7) + uaddl v14.8h, v28.8b, v5.8b //// a0 + a5 (column1,row7) + ext v24.8b, v5.8b , v6.8b, #3 ////extract a[3] (column1,row7) + ext v22.8b, v5.8b , v6.8b, #4 ////extract a[4] (column1,row7) + sqrshrun v20.8b, v8.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row4) + ext v29.8b, v2.8b , v3.8b, #3 ////extract a[3] (column1,row6) + ext v30.8b, v2.8b , v3.8b, #2 ////extract a[2] (column1,row6) + st1 {v20.8b}, [x1], x3 ////Store dest row4 + ext v27.8b, v2.8b , v3.8b, #1 ////extract a[1] (column1,row6) + uaddl v8.8h, v31.8b, v2.8b //// a0 + a5 (column1,row6) + ext v26.8b, v2.8b , v3.8b, #4 ////extract a[4] (column1,row6) + umlal v8.8h, v29.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row6) + umlal v8.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 (column1,row6) + umlsl v8.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row6) + umlsl v8.8h, v26.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row6) + //// Processing row6 and row7 + st1 {v23.8b}, [x1], x3 ////Store dest row5 + ext v23.8b, v5.8b , v6.8b, #1 ////extract a[1] (column1,row7) + umlal v14.8h, v25.8b, v1.8b //// a0 + a5 + 20a2 (column1,row7) + umlal v14.8h, v24.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row7) + umlsl v14.8h, v23.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row7) + umlsl v14.8h, v22.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row7) + sqrshrun v20.8b, v8.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row6) + subs x12, x14, #1 + st1 {v20.8b}, [x1], x3 ////Store dest row6 + sqrshrun v23.8b, v14.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row7) + st1 {v23.8b}, [x1], x3 ////Store dest row7 + + beq loop_8 //looping if height ==16 + + b end_func +loop_4: + ld1 {v5.8b, v6.8b}, [x0], x2 //// Load row1 + ext v28.8b, v5.8b , v6.8b, #5 ////extract a[5] (column1,row1) + ld1 {v2.8b, v3.8b}, [x0], x2 //// Load row0 + ext v25.8b, v5.8b , v6.8b, #2 ////extract a[2] (column1,row1) + ext v31.8b, v2.8b , v3.8b, #5 ////extract a[5] (column1,row0) + uaddl v14.8h, v28.8b, v5.8b //// a0 + a5 (column1,row1) + ext v24.8b, v5.8b , v6.8b, #3 ////extract a[3] (column1,row1) + ext v23.8b, v5.8b , v6.8b, #1 ////extract a[1] (column1,row1) + ext v22.8b, v5.8b , v6.8b, #4 ////extract a[4] (column1,row1) + ext v29.8b, v2.8b , v3.8b, #3 ////extract a[3] (column1,row0) + umlal v14.8h, v25.8b, v1.8b //// a0 + a5 + 20a2 (column1,row1) + umlal v14.8h, v24.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row1) + umlsl v14.8h, v23.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row1) + umlsl v14.8h, v22.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row1) + uaddl v8.8h, v31.8b, v2.8b //// a0 + a5 (column1,row0) + ext v30.8b, v2.8b , v3.8b, #2 ////extract a[2] (column1,row0) + ext v27.8b, v2.8b , v3.8b, #1 ////extract a[1] (column1,row0) + ext v26.8b, v2.8b , v3.8b, #4 ////extract a[4] (column1,row0) + ld1 {v2.8b, v3.8b}, [x0], x2 //// Load row2 + umlal v8.8h, v29.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row0) + umlal v8.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 (column1,row0) + umlsl v8.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0) + umlsl v8.8h, v26.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0) + ld1 {v5.8b, v6.8b}, [x0], x2 //// Load row3 + ext v28.8b, v5.8b , v6.8b, #5 ////extract a[5] (column1,row3) + ext v25.8b, v5.8b , v6.8b, #2 ////extract a[2] (column1,row3) + sqrshrun v23.8b, v14.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0) + ext v31.8b, v2.8b , v3.8b, #5 ////extract a[5] (column1,row2) + ext v24.8b, v5.8b , v6.8b, #3 ////extract a[3] (column1,row2) + st1 {v23.s}[0], [x1], x3 ////Store dest row0 + ext v23.8b, v5.8b , v6.8b, #1 ////extract a[1] (column1,row3) + ext v22.8b, v5.8b , v6.8b, #4 ////extract a[4] (column1,row3) + ext v29.8b, v2.8b , v3.8b, #3 ////extract a[3] (column1,row2) + sqrshrun v20.8b, v8.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row1) + ext v30.8b, v2.8b , v3.8b, #2 ////extract a[2] (column1,row2) + ext v27.8b, v2.8b , v3.8b, #1 ////extract a[1] (column1,row2) + + //// Processing row2 and row3 + st1 {v20.s}[0], [x1], x3 ////Store dest row1 + uaddl v14.8h, v28.8b, v5.8b //// a0 + a5 (column1,row3) + ext v26.8b, v2.8b , v3.8b, #4 ////extract a[4] (column1,row2) + umlal v14.8h, v25.8b, v1.8b //// a0 + a5 + 20a2 (column1,row3) + umlal v14.8h, v24.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row3) + umlsl v14.8h, v23.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row3) + umlsl v14.8h, v22.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row3) + uaddl v8.8h, v31.8b, v2.8b //// a0 + a5 (column1,row2) + umlal v8.8h, v29.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row2) + umlal v8.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 (column1,row2) + umlsl v8.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row2) + umlsl v8.8h, v26.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row2) + sqrshrun v23.8b, v14.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row3) + sqrshrun v20.8b, v8.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row2) + st1 {v20.s}[0], [x1], x3 ////Store dest row2 + subs x4, x4, #8 // Loop if height =8 + st1 {v23.s}[0], [x1], x3 ////Store dest row3 + beq loop_4 + +end_func: + // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + + diff --git a/common/armv8/ih264_inter_pred_filters_luma_vert_av8.s b/common/armv8/ih264_inter_pred_filters_luma_vert_av8.s new file mode 100755 index 0000000..38934c9 --- /dev/null +++ b/common/armv8/ih264_inter_pred_filters_luma_vert_av8.s @@ -0,0 +1,452 @@ +//****************************************************************************** +//* +//* Copyright (C) 2015 The Android Open Source Project +//* +//* Licensed under the Apache License, Version 2.0 (the "License"); +//* you may not use this file except in compliance with the License. +//* You may obtain a copy of the License at: +//* +//* http://www.apache.org/licenses/LICENSE-2.0 +//* +//* Unless required by applicable law or agreed to in writing, software +//* distributed under the License is distributed on an "AS IS" BASIS, +//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//* See the License for the specific language governing permissions and +//* limitations under the License. +//* +//***************************************************************************** +//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +//*/ +///** +//****************************************************************************** +//* @file +//* ih264_inter_pred_luma_vert_av8.s +//* +//* @brief +//* Contains function definitions for inter prediction interpolation. +//* +//* @author +//* Ittiam +//* +//* @par List of Functions: +//* +//* - ih264_inter_pred_luma_vert_av8() +//* +//* @remarks +//* None +//* +//******************************************************************************* +//*/ + +///* All the functions here are replicated from ih264_inter_pred_filters.c +// + +///** +///** +///** +// ******************************************************************************* +// * +// * @brief +// * Interprediction luma filter for vertical input +// * +// * @par Description: +// * Applies a 6 tap vertcal filter.The output is clipped to 8 bits +// * sec 8.4.2.2.1 titled "Luma sample interpolation process" +// * +// * @param[in] pu1_src +// * UWORD8 pointer to the source +// * +// * @param[out] pu1_dst +// * UWORD8 pointer to the destination +// * +// * @param[in] src_strd +// * integer source stride +// * +// * @param[in] dst_strd +// * integer destination stride +// * +// * @param[in] ht +// * integer height of the array +// * +// * @param[in] wd +// * integer width of the array +// * +// * @returns +// * +// * @remarks +// * None +// * +// ******************************************************************************* + +//void ih264_inter_pred_luma_vert ( +// UWORD8 *pu1_src, +// UWORD8 *pu1_dst, +// WORD32 src_strd, +// WORD32 dst_strd, +// WORD32 ht, +// WORD32 wd ) + +//**************Variables Vs Registers***************************************** +// x0 => *pu1_src +// x1 => *pu1_dst +// x2 => src_strd +// x3 => dst_strd +// x4 => ht +// x5 => wd + +.text +.p2align 2 +.include "ih264_neon_macros.s" + + + + + .global ih264_inter_pred_luma_vert_av8 + +ih264_inter_pred_luma_vert_av8: + + // STMFD sp!, {x4-x12, x14} //store register values to stack + push_v_regs + stp x19, x20, [sp, #-16]! + + sub x0, x0, x2, lsl #1 //pu1_src-2*src_strd + + sub x14, x4, #16 + movi v22.8h, #20 // Filter coeff 0x14 into Q11 + + subs x12, x5, #8 //if wd=8 branch to loop_8 + movi v24.8h, #5 // Filter coeff 0x4 into Q12 + beq loop_8_start + + subs x12, x5, #4 //if wd=4 branch to loop_4 + beq loop_4_start + + + ld1 {v0.2s, v1.2s}, [x0], x2 // Vector load from src[0_0] + ld1 {v2.2s, v3.2s}, [x0], x2 // Vector load from src[1_0] + ld1 {v4.2s, v5.2s}, [x0], x2 // Vector load from src[2_0] + ld1 {v6.2s, v7.2s}, [x0], x2 // Vector load from src[3_0] + add x14, x14, #1 //for checking loop + ld1 {v8.2s, v9.2s}, [x0], x2 // Vector load from src[4_0] + uaddl v12.8h, v4.8b, v6.8b // temp1 = src[2_0] + src[3_0] + ld1 {v10.2s, v11.2s}, [x0], x2 // Vector load from src[5_0] + +loop_16: //when wd=16 + + uaddl v14.8h, v0.8b, v10.8b // temp = src[0_0] + src[5_0] + uaddl v16.8h, v2.8b, v8.8b // temp2 = src[1_0] + src[4_0] + mla v14.8h, v12.8h, v22.8h // temp += temp1 * 20 + uaddl v20.8h, v1.8b, v11.8b // temp4 = src[0_8] + src[5_8] + uaddl v18.8h, v5.8b, v7.8b // temp3 = src[2_8] + src[3_8] + mla v20.8h, v18.8h , v22.8h // temp4 += temp3 * 20 + ld1 {v0.2s, v1.2s}, [x0], x2 + uaddl v26.8h, v3.8b, v9.8b // temp5 = src[1_8] + src[4_8] + uaddl v12.8h, v6.8b, v8.8b + mls v14.8h, v16.8h , v24.8h // temp -= temp2 * 5 + uaddl v16.8h, v2.8b, v0.8b + uaddl v18.8h, v4.8b, v10.8b + mla v16.8h, v12.8h , v22.8h + mls v20.8h, v26.8h , v24.8h // temp4 -= temp5 * 5 + uaddl v26.8h, v5.8b, v11.8b + uaddl v12.8h, v7.8b, v9.8b + sqrshrun v30.8b, v14.8h, #5 // dst[0_0] = CLIP_U8((temp +16) >> 5) + uaddl v14.8h, v3.8b, v1.8b + ld1 {v2.2s, v3.2s}, [x0], x2 + mla v14.8h, v12.8h , v22.8h + mls v16.8h, v18.8h , v24.8h + sqrshrun v31.8b, v20.8h, #5 // dst[0_8] = CLIP_U8((temp4 +16) >> 5) + uaddl v18.8h, v4.8b, v2.8b + uaddl v12.8h, v8.8b, v10.8b + + st1 {v30.2s, v31.2s}, [x1], x3 // Vector store to dst[0_0] + mla v18.8h, v12.8h , v22.8h + uaddl v20.8h, v6.8b, v0.8b + mls v14.8h, v26.8h , v24.8h + sqrshrun v30.8b, v16.8h, #5 + uaddl v12.8h, v9.8b, v11.8b + uaddl v16.8h, v5.8b, v3.8b + uaddl v26.8h, v7.8b, v1.8b + mla v16.8h, v12.8h , v22.8h + mls v18.8h, v20.8h , v24.8h + ld1 {v4.2s, v5.2s}, [x0], x2 + + sqrshrun v31.8b, v14.8h, #5 + uaddl v12.8h, v10.8b, v0.8b + uaddl v14.8h, v6.8b, v4.8b + uaddl v20.8h, v8.8b, v2.8b + mla v14.8h, v12.8h , v22.8h + mls v16.8h, v26.8h , v24.8h + st1 {v30.2s, v31.2s}, [x1], x3 //store row 1 + sqrshrun v30.8b, v18.8h, #5 + uaddl v18.8h, v7.8b, v5.8b + uaddl v12.8h, v11.8b, v1.8b + mla v18.8h, v12.8h , v22.8h + uaddl v26.8h, v9.8b, v3.8b + mls v14.8h, v20.8h , v24.8h + ld1 {v6.2s, v7.2s}, [x0], x2 + sqrshrun v31.8b, v16.8h, #5 + mls v18.8h, v26.8h , v24.8h + uaddl v12.8h, v0.8b, v2.8b // temp1 = src[2_0] + src[3_0] + st1 {v30.2s, v31.2s}, [x1], x3 //store row 2 + uaddl v16.8h, v10.8b, v4.8b // temp2 = src[1_0] + src[4_0] + uaddl v20.8h, v9.8b, v7.8b // temp4 = src[0_8] + src[5_8] + sqrshrun v30.8b, v14.8h, #5 + uaddl v26.8h, v5.8b, v11.8b // temp5 = src[1_8] + src[4_8] + uaddl v14.8h, v8.8b, v6.8b // temp = src[0_0] + src[5_0] + sqrshrun v31.8b, v18.8h, #5 + mla v14.8h, v12.8h , v22.8h // temp += temp1 * 20 + uaddl v18.8h, v1.8b, v3.8b // temp3 = src[2_8] + src[3_8] + st1 {v30.2s, v31.2s}, [x1], x3 //store row 3 + // 4 rows processed + mla v20.8h, v18.8h , v22.8h // temp4 += temp3 * 20 + ld1 {v8.2s, v9.2s}, [x0], x2 + uaddl v12.8h, v2.8b, v4.8b + uaddl v18.8h, v3.8b, v5.8b + mls v14.8h, v16.8h , v24.8h // temp -= temp2 * 5 + uaddl v28.8h, v9.8b, v11.8b + uaddl v16.8h, v6.8b, v0.8b + mla v28.8h, v18.8h , v22.8h // temp4 += temp3 * 20 + mls v20.8h, v26.8h , v24.8h // temp4 -= temp5 * 5 + uaddl v26.8h, v1.8b, v7.8b + uaddl v18.8h, v5.8b, v7.8b + sqrshrun v30.8b, v14.8h, #5 // dst[0_0] = CLIP_U8((temp +16) >> 5) + uaddl v14.8h, v8.8b, v10.8b + + sqrshrun v31.8b, v20.8h, #5 // dst[0_8] = CLIP_U8((temp4 +16) >> 5) + ld1 {v10.2s, v11.2s}, [x0], x2 + mls v28.8h, v26.8h , v24.8h // temp4 -= temp5 * 5 + st1 {v30.2s, v31.2s}, [x1], x3 // store row 4 + mla v14.8h, v12.8h , v22.8h // temp += temp1 * 20 + uaddl v20.8h, v11.8b, v1.8b + uaddl v26.8h, v3.8b, v9.8b + mla v20.8h, v18.8h , v22.8h // temp4 += temp3 * 20 + uaddl v12.8h, v6.8b, v4.8b + uaddl v18.8h, v7.8b, v9.8b + sqrshrun v31.8b, v28.8h, #5 // dst[0_8] = CLIP_U8((temp4 +16) >> 5) + mls v14.8h, v16.8h , v24.8h // temp -= temp2 * 5 + uaddl v16.8h, v8.8b, v2.8b + sqrshrun v30.8b, v14.8h, #5 // dst[0_0] = CLIP_U8((temp +16) >> 5) + mls v20.8h, v26.8h , v24.8h // temp4 -= temp5 * 5 + uaddl v14.8h, v10.8b, v0.8b + st1 {v30.2s, v31.2s}, [x1], x3 // store row 5 + mla v14.8h, v12.8h , v22.8h // temp += temp1 * 20 + ld1 {v0.2s, v1.2s}, [x0], x2 + uaddl v26.8h, v5.8b, v11.8b + uaddl v12.8h, v8.8b, v6.8b + uaddl v28.8h, v0.8b, v2.8b + sqrshrun v31.8b, v20.8h, #5 // dst[0_8] = CLIP_U8((temp4 +16) >> 5) + mla v28.8h, v12.8h , v22.8h // temp += temp1 * 20 + uaddl v20.8h, v1.8b, v3.8b + mls v14.8h, v16.8h , v24.8h // temp -= temp2 * 5 + mla v20.8h, v18.8h , v22.8h // temp4 += temp3 * 20 + uaddl v16.8h, v10.8b, v4.8b + sqrshrun v30.8b, v14.8h, #5 // dst[0_0] = CLIP_U8((temp +16) >> 5) + mov v2.8b, v6.8b + mov v3.8b, v7.8b + mls v28.8h, v16.8h , v24.8h // temp -= temp2 * 5 + st1 {v30.2s, v31.2s}, [x1], x3 // store row 6 + sqrshrun v30.8b, v28.8h, #5 // dst[0_0] = CLIP_U8((temp +16) >> 5) + + swp v0.8b v4.8b + swp v1.8b v5.8b + + + + mls v20.8h, v26.8h , v24.8h // temp4 -= temp5 * 5 + mov v6.8b, v10.8b + mov v7.8b, v11.8b + subs x12, x14, #1 // if height==16 - looping + + swp v4.8b v8.8b + swp v5.8b v9.8b + + + sqrshrun v31.8b, v20.8h, #5 // dst[0_8] = CLIP_U8((temp4 +16) >> 5) + st1 {v30.2s, v31.2s}, [x1], x3 // store row 7 + bne end_func //if height =8 end function + add x14, x14, #1 //for checking loop + ld1 {v10.2s, v11.2s}, [x0], x2 + uaddl v12.8h, v4.8b, v6.8b // temp1 = src[2_0] + src[3_0] + + b loop_16 // looping if height =16 + +loop_8_start: +//// Processing row0 and row1 + + ld1 {v0.2s}, [x0], x2 // Vector load from src[0_0] + ld1 {v1.2s}, [x0], x2 // Vector load from src[1_0] + ld1 {v2.2s}, [x0], x2 // Vector load from src[2_0] + ld1 {v3.2s}, [x0], x2 // Vector load from src[3_0] + add x14, x14, #1 //for checking loop + ld1 {v4.2s}, [x0], x2 // Vector load from src[4_0] + ld1 {v5.2s}, [x0], x2 // Vector load from src[5_0] + +loop_8: + //for checking loop + uaddl v6.8h, v2.8b, v3.8b // temp1 = src[2_0] + src[3_0] + uaddl v8.8h, v0.8b, v5.8b // temp = src[0_0] + src[5_0] + uaddl v10.8h, v1.8b, v4.8b // temp2 = src[1_0] + src[4_0] + mla v8.8h, v6.8h , v22.8h // temp += temp1 * 20 + ld1 {v6.2s}, [x0], x2 + uaddl v14.8h, v3.8b, v4.8b + uaddl v16.8h, v1.8b, v6.8b + uaddl v18.8h, v2.8b, v5.8b + mls v8.8h, v10.8h , v24.8h // temp -= temp2 * 5 + mla v16.8h, v14.8h , v22.8h + ld1 {v7.2s}, [x0], x2 + uaddl v20.8h, v4.8b, v5.8b + uaddl v12.8h, v2.8b, v7.8b + uaddl v10.8h, v3.8b, v6.8b + mls v16.8h, v18.8h , v24.8h + sqrshrun v26.8b, v8.8h, #5 // dst[0_0] = CLIP_U8( (temp + 16) >> 5) + mla v12.8h, v20.8h , v22.8h + ld1 {v0.2s}, [x0], x2 + uaddl v14.8h, v5.8b, v6.8b + sqrshrun v27.8b, v16.8h, #5 + uaddl v20.8h, v3.8b, v0.8b + mls v12.8h, v10.8h , v24.8h + st1 {v26.2s}, [x1], x3 // Vector store to dst[0_0] + uaddl v18.8h, v4.8b, v7.8b + mla v20.8h, v14.8h , v22.8h + st1 {v27.2s}, [x1], x3 + sqrshrun v28.8b, v12.8h, #5 + st1 {v28.2s}, [x1], x3 + mls v20.8h, v18.8h , v24.8h + ld1 {v1.2s}, [x0], x2 + sqrshrun v29.8b, v20.8h, #5 + subs x9, x4, #4 + st1 {v29.2s}, [x1], x3 //store row 3 + + + beq end_func // Branch if height==4 + + + uaddl v14.8h, v6.8b, v7.8b // temp1 = src[2_0] + src[3_0] + uaddl v16.8h, v0.8b, v5.8b // temp = src[0_0] + src[5_0] + uaddl v18.8h, v1.8b, v4.8b // temp2 = src[1_0] + src[4_0] + mla v18.8h, v14.8h , v22.8h // temp += temp1 * 20 + ld1 {v2.2s}, [x0], x2 + mls v18.8h, v16.8h , v24.8h // temp -= temp2 * 5 + uaddl v8.8h, v0.8b, v7.8b + uaddl v10.8h, v1.8b, v6.8b + uaddl v12.8h, v2.8b, v5.8b + sqrshrun v26.8b, v18.8h, #5 + mla v12.8h, v8.8h , v22.8h + ld1 {v3.2s}, [x0], x2 + mls v12.8h, v10.8h , v24.8h + st1 {v26.2s}, [x1], x3 + sqrshrun v27.8b, v12.8h, #5 + st1 {v27.2s}, [x1], x3 + uaddl v14.8h, v0.8b, v1.8b // temp1 = src[2_0] + src[3_0] + uaddl v16.8h, v2.8b, v7.8b // temp = src[0_0] + src[5_0] + uaddl v18.8h, v3.8b, v6.8b // temp2 = src[1_0] + src[4_0] + mla v18.8h, v14.8h , v22.8h // temp += temp1 * 20 + ld1 {v4.2s}, [x0], x2 + mls v18.8h, v16.8h , v24.8h // temp -= temp2 * 5 + uaddl v8.8h, v2.8b, v1.8b + uaddl v10.8h, v3.8b, v0.8b + uaddl v12.8h, v4.8b, v7.8b + sqrshrun v26.8b, v18.8h, #5 + mla v12.8h, v8.8h , v22.8h + ld1 {v5.2s}, [x0], x2 + mls v12.8h, v10.8h , v24.8h + st1 {v26.2s}, [x1], x3 + sqrshrun v27.8b, v12.8h, #5 + subs x12, x14, #1 + st1 {v27.2s}, [x1], x3 + add x14, x14, #1 + beq loop_8 //looping if height ==16 + + b end_func + + +loop_4_start: +//// Processing row0 and row1 + + + ld1 {v0.s}[0], [x0], x2 // Vector load from src[0_0] + ld1 {v1.s}[0], [x0], x2 // Vector load from src[1_0] + ld1 {v2.s}[0], [x0], x2 // Vector load from src[2_0] + ld1 {v3.s}[0], [x0], x2 // Vector load from src[3_0] + ld1 {v4.s}[0], [x0], x2 // Vector load from src[4_0] + ld1 {v5.s}[0], [x0], x2 // Vector load from src[5_0] + + uaddl v6.8h, v2.8b, v3.8b // temp1 = src[2_0] + src[3_0] + uaddl v8.8h, v0.8b, v5.8b // temp = src[0_0] + src[5_0] + uaddl v10.8h, v1.8b, v4.8b // temp2 = src[1_0] + src[4_0] + mla v8.8h, v6.8h , v22.8h // temp += temp1 * 20 + ld1 {v6.2s}, [x0], x2 + uaddl v14.8h, v3.8b, v4.8b + uaddl v16.8h, v1.8b, v6.8b + uaddl v18.8h, v2.8b, v5.8b + mls v8.8h, v10.8h , v24.8h // temp -= temp2 * 5 + ld1 {v7.s}[0], [x0], x2 + mla v16.8h, v14.8h , v22.8h + uaddl v20.8h, v4.8b, v5.8b + uaddl v12.8h, v2.8b, v7.8b + uaddl v10.8h, v3.8b, v6.8b + mls v16.8h, v18.8h , v24.8h + sqrshrun v26.8b, v8.8h, #5 // dst[0_0] = CLIP_U8( (temp + 16) >> 5) + mla v12.8h, v20.8h , v22.8h + ld1 {v0.s}[0], [x0], x2 + uaddl v14.8h, v5.8b, v6.8b + sqrshrun v27.8b, v16.8h, #5 + uaddl v20.8h, v3.8b, v0.8b + mls v12.8h, v10.8h , v24.8h + st1 {v26.s}[0], [x1], x3 // Vector store to dst[0_0] + uaddl v18.8h, v4.8b, v7.8b + mla v20.8h, v14.8h , v22.8h + st1 {v27.s}[0], [x1], x3 + sqrshrun v28.8b, v12.8h, #5 + st1 {v28.s}[0], [x1], x3 + mls v20.8h, v18.8h , v24.8h + ld1 {v1.s}[0], [x0], x2 + sqrshrun v29.8b, v20.8h, #5 + st1 {v29.s}[0], [x1], x3 //store row 3 + + subs x9, x4, #4 + beq end_func // Branch if height==4 + + + uaddl v14.8h, v6.8b, v7.8b // temp1 = src[2_0] + src[3_0] + uaddl v16.8h, v0.8b, v5.8b // temp = src[0_0] + src[5_0] + uaddl v18.8h, v1.8b, v4.8b // temp2 = src[1_0] + src[4_0] + mla v18.8h, v14.8h , v22.8h // temp += temp1 * 20 + ld1 {v2.s}[0], [x0], x2 + mls v18.8h, v16.8h , v24.8h // temp -= temp2 * 5 + uaddl v8.8h, v0.8b, v7.8b + uaddl v10.8h, v1.8b, v6.8b + uaddl v12.8h, v2.8b, v5.8b + sqrshrun v26.8b, v18.8h, #5 + mla v12.8h, v8.8h , v22.8h + ld1 {v3.s}[0], [x0], x2 + mls v12.8h, v10.8h , v24.8h + st1 {v26.s}[0], [x1], x3 + sqrshrun v27.8b, v12.8h, #5 + st1 {v27.s}[0], [x1], x3 + uaddl v14.8h, v0.8b, v1.8b // temp1 = src[2_0] + src[3_0] + uaddl v16.8h, v2.8b, v7.8b // temp = src[0_0] + src[5_0] + uaddl v18.8h, v3.8b, v6.8b // temp2 = src[1_0] + src[4_0] + mla v18.8h, v14.8h , v22.8h // temp += temp1 * 20 + ld1 {v4.s}[0], [x0], x2 + mls v18.8h, v16.8h , v24.8h // temp -= temp2 * 5 + uaddl v8.8h, v2.8b, v1.8b + uaddl v10.8h, v3.8b, v0.8b + uaddl v12.8h, v4.8b, v7.8b + sqrshrun v26.8b, v18.8h, #5 + mla v12.8h, v8.8h , v22.8h + ld1 {v5.s}[0], [x0], x2 + mls v12.8h, v10.8h , v24.8h + st1 {v26.s}[0], [x1], x3 + sqrshrun v27.8b, v12.8h, #5 + st1 {v27.s}[0], [x1], x3 + + +end_func: + // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + + diff --git a/common/armv8/ih264_inter_pred_luma_copy_av8.s b/common/armv8/ih264_inter_pred_luma_copy_av8.s new file mode 100755 index 0000000..1a76c1c --- /dev/null +++ b/common/armv8/ih264_inter_pred_luma_copy_av8.s @@ -0,0 +1,267 @@ +//****************************************************************************** +//* +//* Copyright (C) 2015 The Android Open Source Project +//* +//* Licensed under the Apache License, Version 2.0 (the "License"); +//* you may not use this file except in compliance with the License. +//* You may obtain a copy of the License at: +//* +//* http://www.apache.org/licenses/LICENSE-2.0 +//* +//* Unless required by applicable law or agreed to in writing, software +//* distributed under the License is distributed on an "AS IS" BASIS, +//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//* See the License for the specific language governing permissions and +//* limitations under the License. +//* +//***************************************************************************** +//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +//*/ +///** +///** +//******************************************************************************* +//* +//* @brief +//* Interprediction luma function for copy +//* +//* @par Description: +//* Copies the array of width 'wd' and height 'ht' from the location pointed +//* by 'src' to the location pointed by 'dst' +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[out] pu1_dst +//* UWORD8 pointer to the destination +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride +//* +//* +//* @param[in] ht +//* integer height of the array +//* +//* @param[in] wd +//* integer width of the array +//* +//* @returns +//* +//* @remarks +//* None +//* +//******************************************************************************* +//*/ +//void ih264_inter_pred_luma_copy ( +// UWORD8 *pu1_src, +// UWORD8 *pu1_dst, +// WORD32 src_strd, +// WORD32 dst_strd, +// WORD32 ht, +// WORD32 wd ) + +//**************Variables Vs Registers***************************************** +// x0 => *pu1_src +// x1 => *pu1_dst +// x2 => src_strd +// x3 => dst_strd +// x7 => ht +// x12 => wd + +.text +.p2align 2 +.include "ih264_neon_macros.s" + + + + .global ih264_inter_pred_luma_copy_av8 + +ih264_inter_pred_luma_copy_av8: + + push_v_regs + stp x19, x20, [sp, #-16]! + + mov x12, x5 + mov x7, x4 + cmp x7, #0 //checks ht == 0 + ble end_loops + tst x12, #15 //checks wd for multiples for 4 & 8 + beq core_loop_wd_16 + tst x12, #7 //checks wd for multiples for 4 & 8 + beq core_loop_wd_8 + sub x11, x12, #4 + +outer_loop_wd_4: + subs x4, x12, #0 //checks wd == 0 + ble end_inner_loop_wd_4 + +inner_loop_wd_4: + ld1 {v0.s}[0], [x0] //vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp, 0) + add x5, x0, x2 //pu1_src_tmp += src_strd + add x6, x1, x3 //pu1_dst_tmp += dst_strd + st1 {v0.s}[0], [x1] //vst1_lane_u32((uint32_t *)pu1_dst_tmp, src_tmp, 0) + ld1 {v0.s}[0], [x5], x2 //vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp, 0) + add x0, x0, #4 //pu1_src += 4 + st1 {v0.s}[0], [x6], x3 //vst1_lane_u32((uint32_t *)pu1_dst_tmp, src_tmp, 0) + ld1 {v0.s}[0], [x5], x2 //vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp, 0) + subs x4, x4, #4 //(wd -4) + st1 {v0.s}[0], [x6], x3 //vst1_lane_u32((uint32_t *)pu1_dst_tmp, src_tmp, 0) + ld1 {v0.s}[0], [x5], x2 //vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp, 0) + add x1, x1, #4 //pu1_dst += 4 + st1 {v0.s}[0], [x6], x3 //vst1_lane_u32((uint32_t *)pu1_dst_tmp, src_tmp, 0) + + bgt inner_loop_wd_4 + +end_inner_loop_wd_4: + subs x7, x7, #4 //ht - 4 + sub x0, x5, x11 //pu1_src = pu1_src_tmp + sub x1, x6, x11 //pu1_dst = pu1_dst_tmp + bgt outer_loop_wd_4 + +end_loops: + // LDMFD sp!,{x4-x12,x15} //Reload the registers from SP + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + +core_loop_wd_8: + sub x11, x12, #8 + +outer_loop_wd_8: + subs x4, x12, #0 //checks wd + ble end_inner_loop_wd_8 + +inner_loop_wd_8: + add x5, x0, x2 //pu1_src_tmp += src_strd + ld1 {v0.8b}, [x0], #8 //vld1_u8(pu1_src_tmp) + add x6, x1, x3 //pu1_dst_tmp += dst_strd + st1 {v0.8b}, [x1], #8 //vst1_u8(pu1_dst_tmp, tmp_src) + ld1 {v1.8b}, [x5], x2 //vld1_u8(pu1_src_tmp) + st1 {v1.8b}, [x6], x3 //vst1_u8(pu1_dst_tmp, tmp_src) + subs x4, x4, #8 //wd - 8(Loop condition) + ld1 {v2.8b}, [x5], x2 //vld1_u8(pu1_src_tmp) + st1 {v2.8b}, [x6], x3 //vst1_u8(pu1_dst_tmp, tmp_src) + ld1 {v3.8b}, [x5], x2 //vld1_u8(pu1_src_tmp) + st1 {v3.8b}, [x6], x3 //vst1_u8(pu1_dst_tmp, tmp_src) + bgt inner_loop_wd_8 + +end_inner_loop_wd_8: + subs x7, x7, #4 //ht -= 4 + sub x0, x5, x11 //pu1_src = pu1_src_tmp + sub x1, x6, x11 //pu1_dst = pu1_dst_tmp + bgt outer_loop_wd_8 + + // LDMFD sp!,{x4-x12,x15} //Reload the registers from SP + ldp x19, x20, [sp], #16 + pop_v_regs + ret + +core_loop_wd_16: + sub x11, x12, #16 + +outer_loop_wd_16: + subs x4, x12, #0 //checks wd + ble end_inner_loop_wd_16 + +inner_loop_wd_16: + add x5, x0, x2 //pu1_src_tmp += src_strd + ld1 { v0.16b}, [x0], #16 //vld1_u8(pu1_src_tmp) + add x6, x1, x3 //pu1_dst_tmp += dst_strd + st1 { v0.16b}, [x1], #16 //vst1_u8(pu1_dst_tmp, tmp_src) + ld1 { v2.16b}, [x5], x2 //vld1_u8(pu1_src_tmp) + st1 { v2.16b}, [x6], x3 //vst1_u8(pu1_dst_tmp, tmp_src) + subs x4, x4, #16 //wd - 8(Loop condition) + ld1 { v4.16b}, [x5], x2 //vld1_u8(pu1_src_tmp) + st1 { v4.16b}, [x6], x3 //vst1_u8(pu1_dst_tmp, tmp_src) + ld1 { v6.16b}, [x5], x2 //vld1_u8(pu1_src_tmp) + st1 { v6.16b}, [x6], x3 //vst1_u8(pu1_dst_tmp, tmp_src) + bgt inner_loop_wd_16 + +end_inner_loop_wd_16: + subs x7, x7, #4 //ht -= 4 + sub x0, x5, x11 //pu1_src = pu1_src_tmp + sub x1, x6, x11 //pu1_dst = pu1_dst_tmp + bgt outer_loop_wd_16 + + + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + +// /* +// ******************************************************************************** +// * +// * @brief This function copies a 4x4 block to destination +// * +// * @par Description: +// * Copies a 4x4 block to destination, where both src and dst are interleaved +// * +// * @param[in] pi2_src +// * Source +// * +// * @param[in] pu1_out +// * Output pointer +// * +// * @param[in] pred_strd, +// * Prediction buffer stride +// * +// * @param[in] out_strd +// * output buffer buffer Stride +// * +// * @returns none +// * +// * @remarks none +// * Currently wd and height is not used, ie a 4x4 block is always copied +// * +// ******************************************************************************* +// */ +// void ih264_interleave_copy(WORD16 *pi2_src, +// UWORD8 *pu1_out, +// WORD32 pred_strd, +// WORD32 out_strd +// WORD32 wd +// WORD32 ht) +// Register Usage +// x0 : pi2_src +// x1 : pu1_out +// x2 : src_strd +// x3 : out_strd +// Neon registers d0-d7, d16-d30 are used +// No need for pushing arm and neon registers + + .global ih264_interleave_copy_av8 +ih264_interleave_copy_av8: + push_v_regs + ld1 {v2.8b}, [x0], x2 //load src plane 1 => d2 &pred palne 2 => d3 + ld1 {v3.8b}, [x0], x2 + mov v2.d[1], v3.d[0] + ld1 {v4.8b}, [x0], x2 + ld1 {v5.8b}, [x0], x2 + mov v4.d[1], v5.d[0] + + mov x0, x1 + + ld1 {v18.8b}, [x1], x3 //load out [8 bit size) -8 coeffs + ld1 {v19.8b}, [x1], x3 + mov v18.d[1], v19.d[0] + movi v30.8h, #0x00ff + ld1 {v20.8b}, [x1], x3 + ld1 {v21.8b}, [x1], x3 + mov v20.d[1], v21.d[0] + + bit v18.16b, v2.16b , v30.16b + bit v20.16b, v4.16b , v30.16b + + st1 {v18.8b}, [x0], x3 //store out + st1 {v18.d}[1], [x0], x3 + st1 {v20.8b}, [x0], x3 + st1 {v20.d}[1], [x0], x3 + + pop_v_regs + ret + + diff --git a/common/armv8/ih264_inter_pred_luma_horz_hpel_vert_hpel_av8.s b/common/armv8/ih264_inter_pred_luma_horz_hpel_vert_hpel_av8.s new file mode 100755 index 0000000..ea7645e --- /dev/null +++ b/common/armv8/ih264_inter_pred_luma_horz_hpel_vert_hpel_av8.s @@ -0,0 +1,820 @@ +//****************************************************************************** +//* +//* Copyright (C) 2015 The Android Open Source Project +//* +//* Licensed under the Apache License, Version 2.0 (the "License"); +//* you may not use this file except in compliance with the License. +//* You may obtain a copy of the License at: +//* +//* http://www.apache.org/licenses/LICENSE-2.0 +//* +//* Unless required by applicable law or agreed to in writing, software +//* distributed under the License is distributed on an "AS IS" BASIS, +//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//* See the License for the specific language governing permissions and +//* limitations under the License. +//* +//***************************************************************************** +//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +//*/ +///** +//****************************************************************************** +//* @file +//* ih264_inter_pred_luma_horz_hpel_vert_hpel_av8.s +//* +//* @brief +//* Contains function definitions for inter prediction interpolation. +//* +//* @author +//* Mohit +//* +//* @par List of Functions: +//* +//* - ih264_inter_pred_luma_horz_hpel_vert_hpel_av8() +//* +//* @remarks +//* None +//* +//******************************************************************************* +//*/ + + + +//void ih264_inter_pred_luma_horz_hpel_vert_hpel(UWORD8 *pu1_src, +// UWORD8 *pu1_dst, +// WORD32 src_strd,, +// WORD32 dst_strd, +// WORD32 ht, +// WORD32 wd, +// UWORD8* pu1_tmp, +// UWORD32 dydx) + +//**************Variables Vs Registers***************************************** +// x0 => *pu1_src +// x1 => *pu1_dst +// x2 => src_strd +// x3 => dst_strd +// x4 => ht +// x5 => wd + + +.text +.p2align 2 +.include "ih264_neon_macros.s" + + + + .global ih264_inter_pred_luma_horz_hpel_vert_hpel_av8 + +ih264_inter_pred_luma_horz_hpel_vert_hpel_av8: + + //store register values to stack + push_v_regs + stp x19, x20, [sp, #-16]! + + sub x0, x0, x2, lsl #1 //pu1_src-2*src_strd + sub x0, x0, #2 //pu1_src-2 + + movi v26.8h, #0x14 // Filter coeff 20 into Q13 + movi v24.8h, #0x5 // Filter coeff 5 into Q12 + movi v27.8h, #0x14 // Filter coeff 20 into Q13 + movi v25.8h, #0x5 // Filter coeff 5 into Q12 + mov x7, #0x20 + mov x8, #0x30 + subs x12, x5, #4 //if wd=4 branch to loop_4 + beq loop_4_start + + subs x12, x5, #8 //if wd=8 branch to loop_8 + beq loop_8_start + + //when wd=16 + movi v28.8h, #0x14 // Filter coeff 20 into Q13 + movi v30.8h, #0x5 // Filter coeff 5 into Q12 + sub x2, x2, #16 + ld1 {v0.2s, v1.2s}, [x0], #16 // Vector load from src[0_0] + ld1 {v12.2s}, [x0], x2 // Vector load from src[0_0] + ld1 {v2.2s, v3.2s}, [x0], #16 // Vector load from src[1_0] + ld1 {v13.2s}, [x0], x2 // Vector load from src[1_0] + ld1 {v4.2s, v5.2s}, [x0], #16 // Vector load from src[2_0] + ld1 {v14.2s}, [x0], x2 // Vector load from src[2_0] + ld1 {v6.2s, v7.2s}, [x0], #16 // Vector load from src[3_0] + ld1 {v15.2s}, [x0], x2 // Vector load from src[3_0] + ld1 {v8.2s, v9.2s}, [x0], #16 // Vector load from src[4_0] + ld1 {v16.2s}, [x0], x2 // Vector load from src[4_0] +loop_16: + + ld1 {v10.2s, v11.2s}, [x0], #16 // Vector load from src[5_0] + ld1 {v17.2s}, [x0], x2 // Vector load from src[5_0] + + + uaddl v20.8h, v4.8b, v6.8b + uaddl v18.8h, v0.8b, v10.8b + uaddl v22.8h, v2.8b, v8.8b + mla v18.8h, v20.8h , v28.8h + uaddl v24.8h, v5.8b, v7.8b + uaddl v20.8h, v1.8b, v11.8b + uaddl v26.8h, v3.8b, v9.8b + mla v20.8h, v24.8h , v28.8h + uaddl v24.8h, v14.8b, v15.8b + mls v18.8h, v22.8h , v30.8h + uaddl v22.8h, v12.8b, v17.8b + mls v20.8h, v26.8h , v30.8h + uaddl v26.8h, v13.8b, v16.8b + mla v22.8h, v24.8h , v28.8h + mls v22.8h, v26.8h , v30.8h + + ext v24.16b, v18.16b , v20.16b , #4 + ext v26.16b, v18.16b , v20.16b , #6 + + ext v23.16b, v18.16b , v20.16b , #10 + add v0.8h, v24.8h , v26.8h + ext v24.16b, v18.16b , v20.16b , #2 + ext v26.16b, v18.16b , v20.16b , #8 + add v24.8h, v24.8h , v26.8h + + saddl v26.4s, v18.4h, v23.4h + smlal v26.4s, v0.4h, v28.4h + smlsl v26.4s, v24.4h, v30.4h + + saddl2 v23.4s, v18.8h, v23.8h + smlal2 v23.4s, v0.8h, v28.8h + smlsl2 v23.4s, v24.8h, v30.8h + + sqrshrun v18.4h, v26.4s, #10 + sqrshrun v19.4h, v23.4s, #10 + + + uqxtn v18.8b, v18.8h + uqxtn v19.8b, v19.8h + mov v18.2s[1], v19.2s[0] + + ext v24.16b, v20.16b , v22.16b , #4 + ext v26.16b, v20.16b , v22.16b , #6 + ext v0.16b, v20.16b , v22.16b , #10 + + add v25.8h, v24.8h , v26.8h + ext v24.16b, v20.16b , v22.16b , #2 + ext v26.16b, v20.16b , v22.16b , #8 + add v24.8h, v24.8h , v26.8h + + saddl v26.4s, v0.4h, v20.4h + smlal v26.4s, v25.4h, v28.4h + smlsl v26.4s, v24.4h, v30.4h + + saddl2 v22.4s, v0.8h, v20.8h + smlal2 v22.4s, v25.8h, v28.8h + smlsl2 v22.4s, v24.8h, v30.8h + + sqrshrun v19.4h, v26.4s, #10 + sqrshrun v25.4h, v22.4s, #10 + + uaddl v24.8h, v7.8b, v9.8b + + + + uqxtn v19.8b, v19.8h + uqxtn v25.8b, v25.8h + mov v19.2s[1], v25.2s[0] + + uaddl v22.8h, v4.8b, v10.8b + ld1 {v0.2s, v1.2s}, [x0], #16 // Vector load from src[6_0] + + + ld1 {v12.2s}, [x0], x2 // Vector load from src[6_0] + uaddl v20.8h, v6.8b, v8.8b + uaddl v26.8h, v5.8b, v11.8b + st1 {v18.2s, v19.2s}, [x1], x3 // store row 0 + + +//ROW_2 + + + uaddl v18.8h, v2.8b, v0.8b + + mla v18.8h, v20.8h , v28.8h + + uaddl v20.8h, v3.8b, v1.8b + + mla v20.8h, v24.8h , v28.8h + uaddl v24.8h, v15.8b, v16.8b + mls v18.8h, v22.8h , v30.8h + uaddl v22.8h, v13.8b, v12.8b + mls v20.8h, v26.8h , v30.8h + uaddl v26.8h, v14.8b, v17.8b + mla v22.8h, v24.8h , v28.8h + mls v22.8h, v26.8h , v30.8h + + ext v24.16b, v18.16b , v20.16b , #4 + ext v26.16b, v18.16b , v20.16b , #6 + + ext v23.16b, v18.16b , v20.16b , #10 + add v2.8h, v24.8h , v26.8h + ext v24.16b, v18.16b , v20.16b , #2 + ext v26.16b, v18.16b , v20.16b , #8 + add v24.8h, v24.8h , v26.8h + + saddl v26.4s, v18.4h, v23.4h + smlal v26.4s, v2.4h, v28.4h + smlsl v26.4s, v24.4h, v30.4h + + saddl2 v23.4s, v18.8h, v23.8h + smlal2 v23.4s, v2.8h, v28.8h + smlsl2 v23.4s, v24.8h, v30.8h + + sqrshrun v18.4h, v26.4s, #10 + sqrshrun v19.4h, v23.4s, #10 + + + + uqxtn v18.8b, v18.8h + uqxtn v19.8b, v19.8h + mov v18.2s[1], v19.2s[0] + + ext v24.16b, v20.16b , v22.16b , #4 + ext v26.16b, v20.16b , v22.16b , #6 + ext v2.16b, v20.16b , v22.16b , #10 + + add v25.8h, v24.8h , v26.8h + ext v24.16b, v20.16b , v22.16b , #2 + ext v26.16b, v20.16b , v22.16b , #8 + add v24.8h, v24.8h , v26.8h + + saddl v26.4s, v2.4h, v20.4h + smlal v26.4s, v25.4h, v28.4h + smlsl v26.4s, v24.4h, v30.4h + + saddl2 v22.4s, v2.8h, v20.8h + smlal2 v22.4s, v25.8h, v28.8h + smlsl2 v22.4s, v24.8h, v30.8h + + sqrshrun v19.4h, v26.4s, #10 + sqrshrun v25.4h, v22.4s, #10 + uaddl v24.8h, v9.8b, v11.8b + + uqxtn v19.8b, v19.8h + uqxtn v25.8b, v25.8h + mov v19.2s[1], v25.2s[0] + + + uaddl v22.8h, v6.8b, v0.8b + ld1 {v2.2s, v3.2s}, [x0], #16 // Vector load from src[7_0] + + + ld1 {v13.2s}, [x0], x2 // Vector load from src[7_0] + uaddl v20.8h, v8.8b, v10.8b + uaddl v26.8h, v7.8b, v1.8b + st1 {v18.2s, v19.2s}, [x1], x3 // store row 1 + +//ROW_3 + + + uaddl v18.8h, v4.8b, v2.8b + + mla v18.8h, v20.8h , v28.8h + + uaddl v20.8h, v5.8b, v3.8b + + mla v20.8h, v24.8h , v28.8h + uaddl v24.8h, v16.8b, v17.8b + mls v18.8h, v22.8h , v30.8h + uaddl v22.8h, v14.8b, v13.8b + mls v20.8h, v26.8h , v30.8h + uaddl v26.8h, v15.8b, v12.8b + mla v22.8h, v24.8h , v28.8h + mls v22.8h, v26.8h , v30.8h + + ext v24.16b, v18.16b , v20.16b , #4 + ext v26.16b, v18.16b , v20.16b , #6 + + ext v23.16b, v18.16b , v20.16b , #10 + add v4.8h, v24.8h , v26.8h + ext v24.16b, v18.16b , v20.16b , #2 + ext v26.16b, v18.16b , v20.16b , #8 + add v24.8h, v24.8h , v26.8h + + saddl v26.4s, v18.4h, v23.4h + smlal v26.4s, v4.4h, v28.4h + smlsl v26.4s, v24.4h, v30.4h + + saddl2 v23.4s, v18.8h, v23.8h + smlal2 v23.4s, v4.8h, v28.8h + smlsl2 v23.4s, v24.8h, v30.8h + + sqrshrun v18.4h, v26.4s, #10 + sqrshrun v19.4h, v23.4s, #10 + + + uqxtn v18.8b, v18.8h + uqxtn v19.8b, v19.8h + mov v18.2s[1], v19.2s[0] + + + ext v24.16b, v20.16b , v22.16b , #4 + ext v26.16b, v20.16b , v22.16b , #6 + ext v4.16b, v20.16b , v22.16b , #10 + + add v25.8h, v24.8h , v26.8h + ext v24.16b, v20.16b , v22.16b , #2 + ext v26.16b, v20.16b , v22.16b , #8 + add v24.8h, v24.8h , v26.8h + + saddl v26.4s, v4.4h, v20.4h + smlal v26.4s, v25.4h, v28.4h + smlsl v26.4s, v24.4h, v30.4h + + saddl2 v22.4s, v4.8h, v20.8h + smlal2 v22.4s, v25.8h, v28.8h + smlsl2 v22.4s, v24.8h, v30.8h + + sqrshrun v19.4h, v26.4s, #10 + sqrshrun v25.4h, v22.4s, #10 + + uaddl v24.8h, v11.8b, v1.8b + + + uqxtn v19.8b, v19.8h + uqxtn v25.8b, v25.8h + mov v19.2s[1], v25.2s[0] + + + + uaddl v22.8h, v8.8b, v2.8b + ld1 {v4.2s, v5.2s}, [x0], #16 // Vector load from src[8_0] + + + ld1 {v14.2s}, [x0], x2 // Vector load from src[8_0] + uaddl v20.8h, v10.8b, v0.8b + uaddl v26.8h, v9.8b, v3.8b + st1 {v18.2s, v19.2s}, [x1], x3 // store row 2 + + +//ROW_4 + + uaddl v18.8h, v6.8b, v4.8b + + mla v18.8h, v20.8h , v28.8h + + uaddl v20.8h, v7.8b, v5.8b + + mla v20.8h, v24.8h , v28.8h + uaddl v24.8h, v17.8b, v12.8b + mls v18.8h, v22.8h , v30.8h + uaddl v22.8h, v15.8b, v14.8b + mls v20.8h, v26.8h , v30.8h + uaddl v26.8h, v16.8b, v13.8b + mla v22.8h, v24.8h , v28.8h + mls v22.8h, v26.8h , v30.8h + + ext v24.16b, v18.16b , v20.16b , #4 + ext v26.16b, v18.16b , v20.16b , #6 + + ext v23.16b, v18.16b , v20.16b , #10 + add v6.8h, v24.8h , v26.8h + ext v24.16b, v18.16b , v20.16b , #2 + ext v26.16b, v18.16b , v20.16b , #8 + add v24.8h, v24.8h , v26.8h + + saddl v26.4s, v18.4h, v23.4h + smlal v26.4s, v6.4h, v28.4h + smlsl v26.4s, v24.4h, v30.4h + + saddl2 v23.4s, v18.8h, v23.8h + smlal2 v23.4s, v6.8h, v28.8h + smlsl2 v23.4s, v24.8h, v30.8h + + sqrshrun v18.4h, v26.4s, #10 + sqrshrun v19.4h, v23.4s, #10 + + uqxtn v18.8b, v18.8h + uqxtn v19.8b, v19.8h + mov v18.2s[1], v19.2s[0] + + + ext v24.16b, v20.16b , v22.16b , #4 + ext v26.16b, v20.16b , v22.16b , #6 + ext v6.16b, v20.16b , v22.16b , #10 + + add v25.8h, v24.8h , v26.8h + ext v24.16b, v20.16b , v22.16b , #2 + ext v26.16b, v20.16b , v22.16b , #8 + add v24.8h, v24.8h , v26.8h + + saddl v26.4s, v6.4h, v20.4h + smlal v26.4s, v25.4h, v28.4h + smlsl v26.4s, v24.4h, v30.4h + + saddl2 v22.4s, v6.8h, v20.8h + smlal2 v22.4s, v25.8h, v28.8h + smlsl2 v22.4s, v24.8h, v30.8h + + mov v6.16b, v2.16b + mov v7.16b, v3.16b + + mov v2.16b, v10.16b + mov v3.16b, v11.16b + + subs x4, x4, #4 + sqrshrun v19.4h, v26.4s, #10 + sqrshrun v25.4h, v22.4s, #10 + mov v10.16b, v0.16b + mov v11.16b, v1.16b + + mov v24.8b, v14.8b + + mov v14.16b, v12.16b + mov v15.16b, v13.16b + + + uqxtn v19.8b, v19.8h + uqxtn v25.8b, v25.8h + mov v19.2s[1], v25.2s[0] + + + + mov v0.16b, v8.16b + mov v1.16b, v9.16b + + mov v8.16b, v4.16b + mov v9.16b, v5.16b + + mov v12.16b, v16.16b + mov v13.16b, v17.16b + + mov v4.16b, v10.16b + mov v5.16b, v11.16b + + mov v16.8b, v24.8b + st1 {v18.2s, v19.2s}, [x1], x3 // store row 3 + + bgt loop_16 // looping if height =16 + b end_func + +loop_8_start: + ld1 {v0.2s, v1.2s}, [x0], x2 // Vector load from src[0_0] + ld1 {v2.2s, v3.2s}, [x0], x2 // Vector load from src[1_0] + ld1 {v4.2s, v5.2s}, [x0], x2 // Vector load from src[2_0] + ld1 {v6.2s, v7.2s}, [x0], x2 // Vector load from src[3_0] + ld1 {v8.2s, v9.2s}, [x0], x2 // Vector load from src[4_0] + +loop_8: + + ld1 {v10.2s, v11.2s}, [x0], x2 // Vector load from src[5_0] + uaddl v14.8h, v4.8b, v6.8b + uaddl v12.8h, v0.8b, v10.8b + uaddl v16.8h, v2.8b, v8.8b + mla v12.8h, v14.8h , v26.8h + uaddl v18.8h, v5.8b, v7.8b + uaddl v14.8h, v1.8b, v11.8b + uaddl v22.8h, v3.8b, v9.8b + mla v14.8h, v18.8h , v26.8h + mls v12.8h, v16.8h , v24.8h + ld1 {v0.2s, v1.2s}, [x0], x2 // Vector load from src[6_0] + uaddl v16.8h, v6.8b, v8.8b + mls v14.8h, v22.8h , v24.8h + uaddl v28.8h, v2.8b, v0.8b + + ext v22.16b, v12.16b , v14.16b , #10 + uaddl v18.8h, v4.8b, v10.8b + mla v28.8h, v16.8h , v26.8h + saddl v30.4s, v12.4h, v22.4h + + saddl2 v22.4s, v12.8h, v22.8h + ext v16.16b, v12.16b , v14.16b , #4 + mls v28.8h, v18.8h , v24.8h + ext v18.16b, v12.16b , v14.16b , #6 + ext v20.16b, v12.16b , v14.16b , #8 + ext v14.16b, v12.16b , v14.16b , #2 + add v16.8h, v16.8h , v18.8h + add v18.8h, v14.8h , v20.8h + uaddl v20.8h, v7.8b, v9.8b + smlal v30.4s, v16.4h, v26.4h + smlsl v30.4s, v18.4h, v24.4h + smlal2 v22.4s, v16.8h, v26.8h + smlsl2 v22.4s, v18.8h, v24.8h + uaddl v14.8h, v3.8b, v1.8b + + mla v14.8h, v20.8h , v26.8h + sqrshrun v12.4h, v30.4s, #10 + uaddl v16.8h, v5.8b, v11.8b + sqrshrun v13.4h, v22.4s, #10 + mls v14.8h, v16.8h , v24.8h + ld1 {v2.2s, v3.2s}, [x0], x2 // Vector load from src[7_0] + uqxtn v25.8b, v12.8h + uqxtn v13.8b, v13.8h + mov v25.2s[1], v13.2s[0] + uaddl v16.8h, v8.8b, v10.8b + + + ext v22.16b, v28.16b , v14.16b , #10 + uaddl v20.8h, v4.8b, v2.8b + saddl v30.4s, v28.4h, v22.4h + mla v20.8h, v16.8h , v26.8h + + saddl2 v22.4s, v28.8h, v22.8h + ext v16.16b, v28.16b , v14.16b , #4 + ext v18.16b, v28.16b , v14.16b , #6 + ext v12.16b, v28.16b , v14.16b , #8 + ext v14.16b, v28.16b , v14.16b , #2 + add v16.8h, v16.8h , v18.8h + add v18.8h, v12.8h , v14.8h + + smlal v30.4s, v16.4h, v26.4h + smlsl v30.4s, v18.4h, v24.4h + smlal2 v22.4s, v16.8h, v26.8h + smlsl2 v22.4s, v18.8h, v24.8h + + + uaddl v18.8h, v6.8b, v0.8b + sqrshrun v16.4h, v30.4s, #10 + + sqrshrun v17.4h, v22.4s, #10 + + mov v12.8b, v25.8b + mov v25.8b, v24.8b + + uaddl v28.8h, v9.8b, v11.8b + uqxtn v13.8b, v16.8h + uqxtn v17.8b, v17.8h + mov v13.2s[1], v17.2s[0] + + + uaddl v14.8h, v5.8b, v3.8b + uaddl v22.8h, v7.8b, v1.8b + mls v20.8h, v18.8h , v24.8h + st1 {v12.2s}, [x1], x3 // store row 0 + mla v14.8h, v28.8h , v26.8h + ld1 {v4.2s, v5.2s}, [x0], x2 // Vector load from src[8_0] + uaddl v30.8h, v10.8b, v0.8b + uaddl v28.8h, v6.8b, v4.8b + mls v14.8h, v22.8h , v24.8h + st1 {v13.2s}, [x1], x3 // store row 1 + mla v28.8h, v30.8h , v26.8h + + ext v22.16b, v20.16b , v14.16b , #10 + saddl v30.4s, v20.4h, v22.4h + + saddl2 v22.4s, v20.8h, v22.8h + ext v16.16b, v20.16b , v14.16b , #4 + ext v18.16b, v20.16b , v14.16b , #6 + ext v12.16b, v20.16b , v14.16b , #8 + ext v14.16b, v20.16b , v14.16b , #2 + add v16.8h, v16.8h , v18.8h + add v18.8h, v14.8h , v12.8h + uaddl v20.8h, v8.8b, v2.8b + smlal v30.4s, v16.4h, v26.4h + smlsl v30.4s, v18.4h, v24.4h + smlal2 v22.4s, v16.8h, v26.8h + smlsl2 v22.4s, v18.8h, v24.8h + uaddl v18.8h, v11.8b, v1.8b + uaddl v16.8h, v7.8b, v5.8b + sqrshrun v12.4h, v30.4s, #10 + uaddl v30.8h, v9.8b, v3.8b + mla v16.8h, v18.8h , v26.8h + sqrshrun v13.4h, v22.4s, #10 + mls v28.8h, v20.8h , v24.8h + + mls v16.8h, v30.8h , v24.8h + uqxtn v27.8b, v12.8h + uqxtn v13.8b, v13.8h + mov v27.2s[1], v13.2s[0] + + + ext v22.16b, v28.16b , v16.16b , #10 + + saddl v30.4s, v28.4h, v22.4h + + saddl2 v22.4s, v28.8h, v22.8h + ext v12.16b, v28.16b , v16.16b , #4 + ext v18.16b, v28.16b , v16.16b , #6 + ext v20.16b, v28.16b , v16.16b , #8 + ext v28.16b, v28.16b , v16.16b , #2 + add v12.8h, v12.8h , v18.8h + add v18.8h, v28.8h , v20.8h + + smlal v30.4s, v12.4h, v26.4h + smlsl v30.4s, v18.4h, v24.4h + smlal2 v22.4s, v12.8h, v26.8h + smlsl2 v22.4s, v18.8h, v24.8h + + + mov v12.8b, v27.8b + mov v27.8b, v26.8b + + sqrshrun v16.4h, v30.4s, #10 + + mov v6.16b, v2.16b + mov v7.16b, v3.16b + + sqrshrun v17.4h, v22.4s, #10 + + mov v2.16b, v10.16b + mov v3.16b, v11.16b + + mov v10.16b, v0.16b + mov v11.16b, v1.16b + + subs x4, x4, #4 + uqxtn v13.8b, v16.8h + uqxtn v17.8b, v17.8h + mov v13.2s[1], v17.2s[0] + + + mov v0.16b, v8.16b + mov v1.16b, v9.16b + + mov v8.16b, v4.16b + mov v9.16b, v5.16b + + mov v4.16b, v10.16b + mov v5.16b, v11.16b + + st1 {v12.2s}, [x1], x3 // store row 2 + st1 {v13.2s}, [x1], x3 // store row 3 + + bgt loop_8 //if height =8 loop + b end_func + +loop_4_start: + ld1 {v0.2s, v1.2s}, [x0], x2 // Vector load from src[0_0] + ld1 {v2.2s, v3.2s}, [x0], x2 // Vector load from src[1_0] + ld1 {v4.2s, v5.2s}, [x0], x2 // Vector load from src[2_0] + ld1 {v6.2s, v7.2s}, [x0], x2 // Vector load from src[3_0] + ld1 {v8.2s, v9.2s}, [x0], x2 // Vector load from src[4_0] + +loop_4: + ld1 {v10.2s, v11.2s}, [x0], x2 // Vector load from src[5_0] + uaddl v14.8h, v4.8b, v6.8b // temp1 = src[2_0] + src[3_0] + uaddl v12.8h, v0.8b, v10.8b // temp = src[0_0] + src[5_0] + uaddl v16.8h, v2.8b, v8.8b // temp2 = src[1_0] + src[4_0] + mla v12.8h, v14.8h , v26.8h // temp += temp1 * 20 + uaddl v18.8h, v5.8b, v7.8b // temp1 = src[2_0] + src[3_0] + uaddl v14.8h, v1.8b, v11.8b // temp = src[0_0] + src[5_0] + uaddl v22.8h, v3.8b, v9.8b // temp2 = src[1_0] + src[4_0] + mla v14.8h, v18.8h , v26.8h // temp += temp1 * 20 + mls v12.8h, v16.8h , v24.8h // temp -= temp2 * 5 + ld1 {v0.2s, v1.2s}, [x0], x2 // Vector load from src[6_0] + uaddl v16.8h, v6.8b, v8.8b + mls v14.8h, v22.8h , v24.8h // temp -= temp2 * 5 + //Q6 and Q7 have filtered values + uaddl v28.8h, v2.8b, v0.8b + + ext v22.16b, v12.16b , v14.16b , #10 + uaddl v18.8h, v4.8b, v10.8b + mla v28.8h, v16.8h , v26.8h + saddl v30.4s, v12.4h, v22.4h + + saddl v22.4s, v13.4h, v23.4h + ext v16.16b, v12.16b , v14.16b , #4 + mls v28.8h, v18.8h , v24.8h + ext v18.16b, v12.16b , v14.16b , #6 + ext v20.16b, v12.16b , v14.16b , #8 + ext v14.16b, v12.16b , v14.16b , #2 + add v16.8h, v16.8h , v18.8h + add v18.8h, v14.8h , v20.8h + uaddl v20.8h, v7.8b, v9.8b + smlal v30.4s, v16.4h, v26.4h + smlsl v30.4s, v18.4h, v24.4h + smlal v22.4s, v17.4h, v26.4h + smlsl v22.4s, v19.4h, v24.4h + uaddl v14.8h, v3.8b, v1.8b + + mla v14.8h, v20.8h , v26.8h + sqrshrun v12.4h, v30.4s, #10 + uaddl v16.8h, v5.8b, v11.8b + sqrshrun v13.4h, v22.4s, #10 + mls v14.8h, v16.8h , v24.8h + ld1 {v2.2s, v3.2s}, [x0], x2 // Vector load from src[7_0] + uqxtn v25.8b, v12.8h + uaddl v16.8h, v8.8b, v10.8b + + ext v22.16b, v28.16b , v14.16b , #10 + uaddl v20.8h, v4.8b, v2.8b + saddl v30.4s, v28.4h, v22.4h + mla v20.8h, v16.8h , v26.8h + + saddl v22.4s, v29.4h, v23.4h + ext v16.16b, v28.16b , v14.16b , #4 + ext v18.16b, v28.16b , v14.16b , #6 + ext v12.16b, v28.16b , v14.16b , #8 + ext v14.16b, v28.16b , v14.16b , #2 + add v16.8h, v16.8h , v18.8h + add v18.8h, v12.8h , v14.8h + + smlal v30.4s, v16.4h, v26.4h + smlsl v30.4s, v18.4h, v24.4h + smlal v22.4s, v17.4h, v26.4h + smlsl v22.4s, v19.4h, v24.4h + + + uaddl v18.8h, v6.8b, v0.8b + sqrshrun v16.4h, v30.4s, #10 + + sqrshrun v17.4h, v22.4s, #10 + + mov v12.8b, v25.8b + mov v25.8b, v24.8b + + uaddl v28.8h, v9.8b, v11.8b + uqxtn v13.8b, v16.8h + + + + uaddl v14.8h, v5.8b, v3.8b + uaddl v22.8h, v7.8b, v1.8b + mls v20.8h, v18.8h , v24.8h + st1 {v12.s}[0], [x1], x3 // store row 0 + mla v14.8h, v28.8h , v26.8h + ld1 {v4.2s, v5.2s}, [x0], x2 // Vector load from src[8_0] + uaddl v30.8h, v10.8b, v0.8b + uaddl v28.8h, v6.8b, v4.8b + mls v14.8h, v22.8h , v24.8h + st1 {v13.s}[0], [x1], x3 //store row 1 + mla v28.8h, v30.8h , v26.8h + + ext v22.16b, v20.16b , v14.16b , #10 + saddl v30.4s, v20.4h, v22.4h + + saddl v22.4s, v21.4h, v23.4h + ext v16.16b, v20.16b , v14.16b , #4 + ext v18.16b, v20.16b , v14.16b , #6 + ext v12.16b, v20.16b , v14.16b , #8 + ext v14.16b, v20.16b , v14.16b , #2 + add v16.8h, v16.8h , v18.8h + add v18.8h, v14.8h , v12.8h + uaddl v20.8h, v8.8b, v2.8b + smlal v30.4s, v16.4h, v26.4h + smlsl v30.4s, v18.4h, v24.4h + smlal v22.4s, v17.4h, v26.4h + smlsl v22.4s, v19.4h, v24.4h + uaddl v18.8h, v11.8b, v1.8b + uaddl v16.8h, v7.8b, v5.8b + sqrshrun v12.4h, v30.4s, #10 + uaddl v30.8h, v9.8b, v3.8b + mla v16.8h, v18.8h , v26.8h + sqrshrun v13.4h, v22.4s, #10 + mls v28.8h, v20.8h , v24.8h + + mls v16.8h, v30.8h , v24.8h + uqxtn v27.8b, v12.8h + + ext v22.16b, v28.16b , v16.16b , #10 + + saddl v30.4s, v28.4h, v22.4h + + saddl v22.4s, v29.4h, v23.4h + ext v12.16b, v28.16b , v16.16b , #4 + ext v18.16b, v28.16b , v16.16b , #6 + ext v20.16b, v28.16b , v16.16b , #8 + ext v28.16b, v28.16b , v16.16b , #2 + add v12.8h, v12.8h , v18.8h + add v18.8h, v28.8h , v20.8h + + smlal v30.4s, v12.4h, v26.4h + smlsl v30.4s, v18.4h, v24.4h + smlal v22.4s, v13.4h, v26.4h + smlsl v22.4s, v19.4h, v24.4h + + + mov v12.8b, v27.8b + mov v27.8b, v26.8b + + sqrshrun v16.4h, v30.4s, #10 + + mov v6.16b, v2.16b + mov v7.16b, v3.16b + + sqrshrun v17.4h, v22.4s, #10 + + mov v2.16b, v10.16b + mov v3.16b, v11.16b + + mov v10.16b, v0.16b + mov v11.16b, v1.16b + + subs x4, x4, #4 + uqxtn v13.8b, v16.8h + + mov v0.16b, v8.16b + mov v1.16b, v9.16b + + mov v8.16b, v4.16b + mov v9.16b, v5.16b + + + mov v4.16b, v10.16b + mov v5.16b, v11.16b + + + st1 {v12.s}[0], [x1], x3 // store row 2 + st1 {v13.s}[0], [x1], x3 // store row 3 + + bgt loop_4 + +end_func: + //Restoring registers from stack + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + + diff --git a/common/armv8/ih264_inter_pred_luma_horz_hpel_vert_qpel_av8.s b/common/armv8/ih264_inter_pred_luma_horz_hpel_vert_qpel_av8.s new file mode 100755 index 0000000..3737e3f --- /dev/null +++ b/common/armv8/ih264_inter_pred_luma_horz_hpel_vert_qpel_av8.s @@ -0,0 +1,1120 @@ +//****************************************************************************** +//* +//* Copyright (C) 2015 The Android Open Source Project +//* +//* Licensed under the Apache License, Version 2.0 (the "License"); +//* you may not use this file except in compliance with the License. +//* You may obtain a copy of the License at: +//* +//* http://www.apache.org/licenses/LICENSE-2.0 +//* +//* Unless required by applicable law or agreed to in writing, software +//* distributed under the License is distributed on an "AS IS" BASIS, +//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//* See the License for the specific language governing permissions and +//* limitations under the License. +//* +//***************************************************************************** +//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +//*/ +///** +//****************************************************************************** +//* @file +//* ih264_inter_pred_luma_horz_hpel_vert_qpel_av8.s +//* +//* @brief +//* Contains function definitions for inter prediction interpolation. +//* +//* @author +//* Mohit +//* +//* @par List of Functions: +//* +//* - ih264_inter_pred_luma_horz_hpel_vert_qpel_av8() +//* +//* @remarks +//* None +//* +//******************************************************************************* +//*/ + +///* All the functions here are replicated from ih264_inter_pred_filters.c +// + +///** +///** +///** +//******************************************************************************* +//* +//* @brief +//* This function implements a two stage cascaded six tap filter. It +//* applies the six tap filter in the horizontal direction on the +//* predictor values, followed by applying the same filter in the +//* vertical direction on the output of the first stage. It then averages +//* the output of the 1st stage and the output of the 2nd stage to obtain +//* the quarter pel values. The six tap filtering operation is described +//* in sec 8.4.2.2.1 titled "Luma sample interpolation process". +//* +//* @par Description: +//* This function is called to obtain pixels lying at the following +//* location (1/2,1/4) or (1/2,3/4). The function interpolates +//* the predictors first in the horizontal direction and then in the +//* vertical direction to output the (1/2,1/2). It then averages +//* the output of the 2nd stage and (1/2,1/2) value to obtain (1/2,1/4) +//* or (1/2,3/4) depending on the offset. +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[out] pu1_dst +//* UWORD8 pointer to the destination +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride +//* +//* @param[in] ht +//* integer height of the array +//* +//* @param[in] wd +//* integer width of the array +//* +//* @param[in] pu1_tmp: temporary buffer +//* +//* @param[in] dydx: x and y reference offset for qpel calculations +//* +//* @returns +//* +//* @remarks +//* None +//* +//******************************************************************************* +//*/; + +//void ih264_inter_pred_luma_horz_hpel_vert_qpel(UWORD8 *pu1_src, +// UWORD8 *pu1_dst, +// WORD32 src_strd,, +// WORD32 dst_strd, +// WORD32 ht, +// WORD32 wd, +// UWORD8* pu1_tmp, +// UWORD32 dydx) + +//**************Variables Vs Registers***************************************** +// x0 => *pu1_src +// x1 => *pu1_dst +// x2 => src_strd +// x3 => dst_strd +// x4 => ht +// x5 => wd +// x7 => dydx +// x9 => *pu1_tmp + +.text +.p2align 2 +.include "ih264_neon_macros.s" + + + + .global ih264_inter_pred_luma_horz_hpel_vert_qpel_av8 + +ih264_inter_pred_luma_horz_hpel_vert_qpel_av8: + + + // store register values to stack + push_v_regs + stp x19, x20, [sp, #-16]! + + + + sub x0, x0, x2, lsl #1 // pu1_src-2*src_strd + sub x0, x0, #2 // pu1_src-2 + + mov x9, x6 + + lsr x7, x7, #3 // dydx >> 2 followed by dydx & 0x3 and dydx>>1 to obtain the deciding bit + + add x7, x7, #2 + mov x6, #48 + madd x7, x7, x6, x9 + + subs x12, x5, #4 //if wd=4 branch to loop_4 + beq loop_4_start + + subs x12, x5, #8 //if wd=8 branch to loop_8 + beq loop_8_start + + //when wd=16 + movi v22.8h, #20 // Filter coeff 0x14 into Q11 + movi v24.8h, #5 // Filter coeff 0x5 into Q12 + add x8, x0, #8 + add x14, x1, #8 + add x10, x9, #8 + mov x12, x4 + add x11, x7, #8 +loop_16_lowhalf_start: + ld1 {v0.2s, v1.2s}, [x0], x2 // row -2 load for horizontal filter + ext v5.8b, v0.8b , v1.8b , #5 + uaddl v6.8h, v0.8b, v5.8b + + ext v2.8b, v0.8b , v1.8b , #2 + ext v3.8b, v0.8b , v1.8b , #3 + uaddl v8.8h, v2.8b, v3.8b + ext v4.8b, v0.8b , v1.8b , #4 + mla v6.8h, v8.8h , v22.8h + ext v1.8b, v0.8b , v1.8b , #1 + uaddl v8.8h, v1.8b, v4.8b + ld1 {v0.2s, v1.2s}, [x0], x2 // row -1 load for horizontal filter + mls v6.8h, v8.8h , v24.8h + ext v5.8b, v0.8b , v1.8b , #5 + uaddl v8.8h, v0.8b, v5.8b + ext v2.8b, v0.8b , v1.8b , #2 + ext v3.8b, v0.8b , v1.8b , #3 + uaddl v10.8h, v2.8b, v3.8b + + st1 {v6.4s}, [x9], x6 // store temp buffer 0 + + ext v4.8b, v0.8b , v1.8b , #4 + mla v8.8h, v10.8h , v22.8h + ext v1.8b, v0.8b , v1.8b , #1 + uaddl v10.8h, v1.8b, v4.8b + ld1 {v0.2s, v1.2s}, [x0], x2 // row 0 load for horizontal filter + mls v8.8h, v10.8h , v24.8h + ext v5.8b, v0.8b , v1.8b , #5 + uaddl v10.8h, v0.8b, v5.8b + ext v2.8b, v0.8b , v1.8b , #2 + ext v3.8b, v0.8b , v1.8b , #3 + uaddl v12.8h, v2.8b, v3.8b + + st1 {v8.4s}, [x9], x6 // store temp buffer 1 + + ext v4.8b, v0.8b , v1.8b , #4 + mla v10.8h, v12.8h , v22.8h + ext v1.8b, v0.8b , v1.8b , #1 + uaddl v12.8h, v1.8b, v4.8b + ld1 {v0.2s, v1.2s}, [x0], x2 // row 1 load for horizontal filter + mls v10.8h, v12.8h , v24.8h + ext v5.8b, v0.8b , v1.8b , #5 + uaddl v12.8h, v0.8b, v5.8b + ext v2.8b, v0.8b , v1.8b , #2 + ext v3.8b, v0.8b , v1.8b , #3 + uaddl v14.8h, v2.8b, v3.8b + + st1 {v10.4s}, [x9], x6 // store temp buffer 2 + + ext v4.8b, v0.8b , v1.8b , #4 + mla v12.8h, v14.8h , v22.8h + ext v1.8b, v0.8b , v1.8b , #1 + uaddl v14.8h, v1.8b, v4.8b + ld1 {v0.2s, v1.2s}, [x0], x2 // row 2 load for horizontal filter + mls v12.8h, v14.8h , v24.8h + ext v5.8b, v0.8b , v1.8b , #5 + uaddl v14.8h, v0.8b, v5.8b + ext v2.8b, v0.8b , v1.8b , #2 + ext v3.8b, v0.8b , v1.8b , #3 + uaddl v16.8h, v2.8b, v3.8b + + st1 {v12.4s}, [x9], x6 // store temp buffer 3 + + ext v4.8b, v0.8b , v1.8b , #4 + mla v14.8h, v16.8h , v22.8h + ext v1.8b, v0.8b , v1.8b , #1 + uaddl v16.8h, v1.8b, v4.8b + + mls v14.8h, v16.8h , v24.8h +loop_16_lowhalf: + + ld1 {v0.2s, v1.2s}, [x0], x2 // row 3 load for horizontal filter + ext v5.8b, v0.8b , v1.8b , #5 + ext v2.8b, v0.8b , v1.8b , #2 + ext v3.8b, v0.8b , v1.8b , #3 + uaddl v16.8h, v0.8b, v5.8b + + st1 {v14.4s}, [x9], x6 // store temp buffer 4 + + uaddl v18.8h, v2.8b, v3.8b + ext v4.8b, v0.8b , v1.8b , #4 + mla v16.8h, v18.8h , v22.8h + ext v1.8b, v0.8b , v1.8b , #1 + add v28.8h, v8.8h , v14.8h + uaddl v18.8h, v1.8b, v4.8b + add v30.8h, v10.8h , v12.8h + mls v16.8h, v18.8h , v24.8h + ld1 {v0.2s, v1.2s}, [x0], x2 // row 4 load for hoorizontal filter + ext v5.8b, v0.8b , v1.8b , #5 + ext v2.8b, v0.8b , v1.8b , #2 + ext v3.8b, v0.8b , v1.8b , #3 + uaddl v20.8h, v0.8b, v5.8b + + st1 {v16.4s}, [x9], x6 // store temp buffer x5 + + saddl v18.4s, v6.4h, v16.4h + + ld1 {v26.4s}, [x7], x6 // load from temp buffer 0 + + saddl2 v6.4s, v6.8h, v16.8h + + sqrshrun v26.8b, v26.8h, #5 + + smlal v18.4s, v30.4h, v22.4h + smlsl v18.4s, v28.4h, v24.4h + smlal2 v6.4s, v30.8h, v22.8h + smlsl2 v6.4s, v28.8h, v24.8h + uaddl v2.8h, v2.8b, v3.8b + ext v4.8b, v0.8b , v1.8b , #4 + mla v20.8h, v2.8h , v22.8h + sqrshrun v18.4h, v18.4s, #10 + ext v1.8b, v0.8b , v1.8b , #1 + sqrshrun v19.4h, v6.4s, #10 + add v28.8h, v10.8h , v16.8h + uaddl v2.8h, v1.8b, v4.8b + add v30.8h, v12.8h , v14.8h + mls v20.8h, v2.8h , v24.8h + + uqxtn v18.8b, v18.8h + uqxtn v19.8b, v19.8h + mov v18.2s[1], v19.2s[0] + + ld1 {v0.2s, v1.2s}, [x0], x2 // row 5 load for horizontal filter + + urhadd v26.8b, v18.8b , v26.8b + + ext v5.8b, v0.8b , v1.8b , #5 + ext v2.8b, v0.8b , v1.8b , #2 + + st1 {v20.4s}, [x9], x6 // store temp buffer x6 + + saddl v18.4s, v8.4h, v20.4h + + saddl2 v6.4s, v8.8h, v20.8h + + ld1 {v8.4s}, [x7], x6 //load from temp buffer 1 + + + st1 {v26.2s}, [x1], x3 // store row 0 + + smlal v18.4s, v30.4h, v22.4h + smlsl v18.4s, v28.4h, v24.4h + smlal2 v6.4s, v30.8h, v22.8h + smlsl2 v6.4s, v28.8h, v24.8h + + sqrshrun v28.8b, v8.8h, #5 + ext v3.8b, v0.8b , v1.8b , #3 + uaddl v8.8h, v0.8b, v5.8b + uaddl v2.8h, v2.8b, v3.8b + sqrshrun v18.4h, v18.4s, #10 + ext v4.8b, v0.8b , v1.8b , #4 + sqrshrun v19.4h, v6.4s, #10 + mla v8.8h, v2.8h , v22.8h + ext v1.8b, v0.8b , v1.8b , #1 + add v26.8h, v12.8h , v20.8h + uaddl v2.8h, v1.8b, v4.8b + uqxtn v18.8b, v18.8h + uqxtn v19.8b, v19.8h + mov v18.2s[1], v19.2s[0] + add v30.8h, v14.8h , v16.8h + mls v8.8h, v2.8h , v24.8h + ld1 {v0.2s, v1.2s}, [x0], x2 // row 6 load for horizontal filter + + urhadd v28.8b, v28.8b , v18.8b + + ext v5.8b, v0.8b , v1.8b , #5 + ext v2.8b, v0.8b , v1.8b , #2 + ext v3.8b, v0.8b , v1.8b , #3 + + st1 {v28.2s}, [x1], x3 // store row 1 + + uaddl v28.8h, v0.8b, v5.8b + + st1 {v8.4s}, [x9], x6 // store temp buffer x7 + + saddl v18.4s, v10.4h, v8.4h + saddl2 v6.4s, v10.8h, v8.8h + + ld1 {v10.4s}, [x7], x6 // load from temp buffer 2 + + smlal v18.4s, v30.4h, v22.4h + smlsl v18.4s, v26.4h, v24.4h + + smlal2 v6.4s, v30.8h, v22.8h + smlsl2 v6.4s, v26.8h, v24.8h + + sqrshrun v26.8b, v10.8h, #5 + + uaddl v2.8h, v2.8b, v3.8b + ext v4.8b, v0.8b , v1.8b , #4 + mla v28.8h, v2.8h , v22.8h + sqrshrun v18.4h, v18.4s, #10 + ext v1.8b, v0.8b , v1.8b , #1 + sqrshrun v19.4h, v6.4s, #10 + add v10.8h, v14.8h , v8.8h + uaddl v2.8h, v1.8b, v4.8b + add v30.8h, v16.8h , v20.8h + mls v28.8h, v2.8h , v24.8h + uqxtn v27.8b, v18.8h + uqxtn v19.8b, v19.8h + mov v27.2s[1], v19.2s[0] + saddl v18.4s, v12.4h, v28.4h + saddl2 v6.4s, v12.8h, v28.8h + + urhadd v26.8b, v26.8b , v27.8b + + smlal v18.4s, v30.4h, v22.4h + smlsl v18.4s, v10.4h, v24.4h + smlal2 v6.4s, v30.8h, v22.8h + smlsl2 v6.4s, v10.8h, v24.8h + + st1 {v26.2s}, [x1], x3 // store row 2 + + st1 {v28.2s, v29.2s}, [x9] + + + sqrshrun v18.4h, v18.4s, #10 + + mov v10.16b, v20.16b + mov v11.16b, v21.16b + ld1 {v30.4s}, [x7], x6 // load from temp buffer 3 + + sqrshrun v19.4h, v6.4s, #10 + subs x4, x4, #4 + + sqrshrun v30.8b, v30.8h, #5 + + uqxtn v18.8b, v18.8h + uqxtn v19.8b, v19.8h + mov v18.2s[1], v19.2s[0] + + mov v12.16b, v8.16b + mov v13.16b, v9.16b + mov v6.16b, v14.16b + mov v7.16b, v15.16b + + urhadd v30.8b, v18.8b , v30.8b + + mov v8.16b, v16.16b + mov v9.16b, v17.16b + mov v14.16b, v28.16b + mov v15.16b, v29.16b + + st1 {v30.2s}, [x1], x3 // store row 3 + + bgt loop_16_lowhalf // looping if height =16 + + +loop_16_highhalf_start: + ld1 {v0.2s, v1.2s}, [x8], x2 + ext v5.8b, v0.8b , v1.8b , #5 + uaddl v6.8h, v0.8b, v5.8b + ext v2.8b, v0.8b , v1.8b , #2 + ext v3.8b, v0.8b , v1.8b , #3 + uaddl v8.8h, v2.8b, v3.8b + ext v4.8b, v0.8b , v1.8b , #4 + mla v6.8h, v8.8h , v22.8h + ext v1.8b, v0.8b , v1.8b , #1 + uaddl v8.8h, v1.8b, v4.8b + ld1 {v0.2s, v1.2s}, [x8], x2 + mls v6.8h, v8.8h , v24.8h + ext v5.8b, v0.8b , v1.8b , #5 + uaddl v8.8h, v0.8b, v5.8b + ext v2.8b, v0.8b , v1.8b , #2 + ext v3.8b, v0.8b , v1.8b , #3 + uaddl v10.8h, v2.8b, v3.8b + + st1 {v6.4s}, [x10], x6 + + ext v4.8b, v0.8b , v1.8b , #4 + mla v8.8h, v10.8h , v22.8h + ext v1.8b, v0.8b , v1.8b , #1 + uaddl v10.8h, v1.8b, v4.8b + ld1 {v0.2s, v1.2s}, [x8], x2 + mls v8.8h, v10.8h , v24.8h + ext v5.8b, v0.8b , v1.8b , #5 + uaddl v10.8h, v0.8b, v5.8b + ext v2.8b, v0.8b , v1.8b , #2 + ext v3.8b, v0.8b , v1.8b , #3 + uaddl v12.8h, v2.8b, v3.8b + + st1 {v8.4s}, [x10], x6 + + ext v4.8b, v0.8b , v1.8b , #4 + mla v10.8h, v12.8h , v22.8h + ext v1.8b, v0.8b , v1.8b , #1 + uaddl v12.8h, v1.8b, v4.8b + ld1 {v0.2s, v1.2s}, [x8], x2 + mls v10.8h, v12.8h , v24.8h + ext v5.8b, v0.8b , v1.8b , #5 + uaddl v12.8h, v0.8b, v5.8b + ext v2.8b, v0.8b , v1.8b , #2 + ext v3.8b, v0.8b , v1.8b , #3 + uaddl v14.8h, v2.8b, v3.8b + + st1 {v10.4s}, [x10], x6 + + ext v4.8b, v0.8b , v1.8b , #4 + mla v12.8h, v14.8h , v22.8h + ext v1.8b, v0.8b , v1.8b , #1 + uaddl v14.8h, v1.8b, v4.8b + ld1 {v0.2s, v1.2s}, [x8], x2 + mls v12.8h, v14.8h , v24.8h + ext v5.8b, v0.8b , v1.8b , #5 + uaddl v14.8h, v0.8b, v5.8b + ext v2.8b, v0.8b , v1.8b , #2 + ext v3.8b, v0.8b , v1.8b , #3 + uaddl v16.8h, v2.8b, v3.8b + + st1 {v12.4s}, [x10], x6 + + ext v4.8b, v0.8b , v1.8b , #4 + mla v14.8h, v16.8h , v22.8h + ext v1.8b, v0.8b , v1.8b , #1 + uaddl v16.8h, v1.8b, v4.8b + + mls v14.8h, v16.8h , v24.8h + +loop_16_highhalf: + + ld1 {v0.2s, v1.2s}, [x8], x2 + ext v5.8b, v0.8b , v1.8b , #5 + ext v2.8b, v0.8b , v1.8b , #2 + ext v3.8b, v0.8b , v1.8b , #3 + uaddl v16.8h, v0.8b, v5.8b + + st1 {v14.4s}, [x10], x6 + + uaddl v18.8h, v2.8b, v3.8b + ext v4.8b, v0.8b , v1.8b , #4 + mla v16.8h, v18.8h , v22.8h + ext v1.8b, v0.8b , v1.8b , #1 + add v28.8h, v8.8h , v14.8h + uaddl v18.8h, v1.8b, v4.8b + add v30.8h, v10.8h , v12.8h + mls v16.8h, v18.8h , v24.8h + ld1 {v0.2s, v1.2s}, [x8], x2 + ext v5.8b, v0.8b , v1.8b , #5 + ext v2.8b, v0.8b , v1.8b , #2 + ext v3.8b, v0.8b , v1.8b , #3 + uaddl v20.8h, v0.8b, v5.8b + + st1 {v16.4s}, [x10], x6 + + saddl v18.4s, v6.4h, v16.4h + + ld1 {v26.4s}, [x11], x6 + + saddl2 v6.4s, v6.8h, v16.8h + + sqrshrun v26.8b, v26.8h, #5 + + smlal v18.4s, v30.4h, v22.4h + smlsl v18.4s, v28.4h, v24.4h + smlal2 v6.4s, v30.8h, v22.8h + smlsl2 v6.4s, v28.8h, v24.8h + uaddl v2.8h, v2.8b, v3.8b + ext v4.8b, v0.8b , v1.8b , #4 + mla v20.8h, v2.8h , v22.8h + sqrshrun v18.4h, v18.4s, #10 + ext v1.8b, v0.8b , v1.8b , #1 + sqrshrun v19.4h, v6.4s, #10 + add v28.8h, v10.8h , v16.8h + uaddl v2.8h, v1.8b, v4.8b + add v30.8h, v12.8h , v14.8h + mls v20.8h, v2.8h , v24.8h + uqxtn v18.8b, v18.8h + uqxtn v19.8b, v19.8h + mov v18.2s[1], v19.2s[0] + ld1 {v0.2s, v1.2s}, [x8], x2 + + urhadd v26.8b, v18.8b , v26.8b + + ext v5.8b, v0.8b , v1.8b , #5 + ext v2.8b, v0.8b , v1.8b , #2 + + st1 {v20.4s}, [x10], x6 + + saddl v18.4s, v8.4h, v20.4h + saddl2 v6.4s, v8.8h, v20.8h + + ld1 {v8.4s}, [x11], x6 + + + st1 {v26.2s}, [x14], x3 //store row 0 + + smlal v18.4s, v30.4h, v22.4h + smlsl v18.4s, v28.4h, v24.4h + smlal2 v6.4s, v30.8h, v22.8h + smlsl2 v6.4s, v28.8h, v24.8h + sqrshrun v28.8b, v8.8h, #5 + ext v3.8b, v0.8b , v1.8b , #3 + uaddl v8.8h, v0.8b, v5.8b + uaddl v2.8h, v2.8b, v3.8b + sqrshrun v18.4h, v18.4s, #10 + ext v4.8b, v0.8b , v1.8b , #4 + sqrshrun v19.4h, v6.4s, #10 + mla v8.8h, v2.8h , v22.8h + ext v1.8b, v0.8b , v1.8b , #1 + add v26.8h, v12.8h , v20.8h + uaddl v2.8h, v1.8b, v4.8b + uqxtn v18.8b, v18.8h + uqxtn v19.8b, v19.8h + mov v18.2s[1], v19.2s[0] + add v30.8h, v14.8h , v16.8h + mls v8.8h, v2.8h , v24.8h + ld1 {v0.2s, v1.2s}, [x8], x2 + + urhadd v28.8b, v28.8b , v18.8b + + ext v5.8b, v0.8b , v1.8b , #5 + ext v2.8b, v0.8b , v1.8b , #2 + ext v3.8b, v0.8b , v1.8b , #3 + + st1 {v28.2s}, [x14], x3 //store row 1 + + uaddl v28.8h, v0.8b, v5.8b + + st1 {v8.4s}, [x10], x6 + + saddl v18.4s, v10.4h, v8.4h + saddl2 v6.4s, v10.8h, v8.8h + + ld1 {v10.4s}, [x11], x6 + + smlal v18.4s, v30.4h, v22.4h + smlsl v18.4s, v26.4h, v24.4h + smlal2 v6.4s, v30.8h, v22.8h + smlsl2 v6.4s, v26.8h, v24.8h + + sqrshrun v26.8b, v10.8h, #5 + uaddl v2.8h, v2.8b, v3.8b + ext v4.8b, v0.8b , v1.8b , #4 + mla v28.8h, v2.8h , v22.8h + sqrshrun v18.4h, v18.4s, #10 + ext v1.8b, v0.8b , v1.8b , #1 + sqrshrun v19.4h, v6.4s, #10 + add v10.8h, v14.8h , v8.8h + uaddl v2.8h, v1.8b, v4.8b + add v30.8h, v16.8h , v20.8h + mls v28.8h, v2.8h , v24.8h + uqxtn v27.8b, v18.8h + uqxtn v19.8b, v19.8h + mov v27.2s[1], v19.2s[0] + + + saddl v18.4s, v12.4h, v28.4h + saddl2 v6.4s, v12.8h, v28.8h + + urhadd v26.8b, v26.8b , v27.8b + + smlal v18.4s, v30.4h, v22.4h + smlsl v18.4s, v10.4h, v24.4h + smlal2 v6.4s, v30.8h, v22.8h + smlsl2 v6.4s, v10.8h, v24.8h + + st1 {v26.2s}, [x14], x3 // store row 2 + + st1 {v28.4s}, [x10] + + sqrshrun v18.4h, v18.4s, #10 + mov v10.16b, v20.16b + mov v11.16b, v21.16b + ld1 {v30.4s}, [x11], x6 + + sqrshrun v19.4h, v6.4s, #10 + subs x12, x12, #4 + + sqrshrun v30.8b, v30.8h, #5 + + uqxtn v18.8b, v18.8h + uqxtn v19.8b, v19.8h + mov v18.2s[1], v19.2s[0] + + mov v12.16b, v8.16b + mov v13.16b, v9.16b + mov v6.16b, v14.16b + mov v7.16b, v15.16b + urhadd v30.8b, v18.8b , v30.8b + + mov v8.16b, v16.16b + mov v9.16b, v17.16b + mov v14.16b, v28.16b + mov v15.16b, v29.16b + st1 {v30.2s}, [x14], x3 // store row 3 + + bgt loop_16_highhalf // looping if height = 8 or 16 + b end_func + +loop_8_start: + + movi v22.8h, #0x14 // Filter coeff 20 into Q11 + movi v24.8h, #5 // Filter coeff 5 into Q12 + ld1 {v0.2s, v1.2s}, [x0], x2 // row -2 load for horizontal filter + ext v5.8b, v0.8b , v1.8b , #5 + uaddl v6.8h, v0.8b, v5.8b + + ext v2.8b, v0.8b , v1.8b , #2 + ext v3.8b, v0.8b , v1.8b , #3 + uaddl v8.8h, v2.8b, v3.8b + ext v4.8b, v0.8b , v1.8b , #4 + mla v6.8h, v8.8h , v22.8h + ext v1.8b, v0.8b , v1.8b , #1 + uaddl v8.8h, v1.8b, v4.8b + ld1 {v0.2s, v1.2s}, [x0], x2 // row -1 load for horizontal filter + mls v6.8h, v8.8h , v24.8h + ext v5.8b, v0.8b , v1.8b , #5 + uaddl v8.8h, v0.8b, v5.8b + ext v2.8b, v0.8b , v1.8b , #2 + ext v3.8b, v0.8b , v1.8b , #3 + uaddl v10.8h, v2.8b, v3.8b + + st1 {v6.4s}, [x9], x6 // store temp buffer 0 + + ext v4.8b, v0.8b , v1.8b , #4 + mla v8.8h, v10.8h , v22.8h + ext v1.8b, v0.8b , v1.8b , #1 + uaddl v10.8h, v1.8b, v4.8b + ld1 {v0.2s, v1.2s}, [x0], x2 // row 0 load for horizontal filter + mls v8.8h, v10.8h , v24.8h + ext v5.8b, v0.8b , v1.8b , #5 + uaddl v10.8h, v0.8b, v5.8b + ext v2.8b, v0.8b , v1.8b , #2 + ext v3.8b, v0.8b , v1.8b , #3 + uaddl v12.8h, v2.8b, v3.8b + + st1 {v8.4s}, [x9], x6 // store temp buffer 1 + + ext v4.8b, v0.8b , v1.8b , #4 + mla v10.8h, v12.8h , v22.8h + ext v1.8b, v0.8b , v1.8b , #1 + uaddl v12.8h, v1.8b, v4.8b + ld1 {v0.2s, v1.2s}, [x0], x2 // row 1 load for horizontal filter + mls v10.8h, v12.8h , v24.8h + ext v5.8b, v0.8b , v1.8b , #5 + uaddl v12.8h, v0.8b, v5.8b + ext v2.8b, v0.8b , v1.8b , #2 + ext v3.8b, v0.8b , v1.8b , #3 + uaddl v14.8h, v2.8b, v3.8b + + st1 {v10.4s}, [x9], x6 // store temp buffer 2 + + ext v4.8b, v0.8b , v1.8b , #4 + mla v12.8h, v14.8h , v22.8h + ext v1.8b, v0.8b , v1.8b , #1 + uaddl v14.8h, v1.8b, v4.8b + ld1 {v0.2s, v1.2s}, [x0], x2 // row 2 load for horizontal filter + mls v12.8h, v14.8h , v24.8h + ext v5.8b, v0.8b , v1.8b , #5 + uaddl v14.8h, v0.8b, v5.8b + ext v2.8b, v0.8b , v1.8b , #2 + ext v3.8b, v0.8b , v1.8b , #3 + uaddl v16.8h, v2.8b, v3.8b + + st1 {v12.4s}, [x9], x6 // store temp buffer 3 + + ext v4.8b, v0.8b , v1.8b , #4 + mla v14.8h, v16.8h , v22.8h + ext v1.8b, v0.8b , v1.8b , #1 + uaddl v16.8h, v1.8b, v4.8b + + mls v14.8h, v16.8h , v24.8h +loop_8: + + ld1 {v0.2s, v1.2s}, [x0], x2 // row 3 load for horizontal filter + ext v5.8b, v0.8b , v1.8b , #5 + ext v2.8b, v0.8b , v1.8b , #2 + ext v3.8b, v0.8b , v1.8b , #3 + uaddl v16.8h, v0.8b, v5.8b + + st1 {v14.4s}, [x9], x6 // store temp buffer 4 + + uaddl v18.8h, v2.8b, v3.8b + ext v4.8b, v0.8b , v1.8b , #4 + mla v16.8h, v18.8h , v22.8h + ext v1.8b, v0.8b , v1.8b , #1 + add v28.8h, v8.8h , v14.8h + uaddl v18.8h, v1.8b, v4.8b + add v30.8h, v10.8h , v12.8h + mls v16.8h, v18.8h , v24.8h + ld1 {v0.2s, v1.2s} , [x0], x2 // row 4 load for hoorizontal filter + ext v5.8b, v0.8b , v1.8b , #5 + ext v2.8b, v0.8b , v1.8b , #2 + ext v3.8b, v0.8b , v1.8b , #3 + uaddl v20.8h, v0.8b, v5.8b + + st1 {v16.4s}, [x9], x6 // store temp buffer x5 + + saddl v18.4s, v6.4h, v16.4h + + ld1 {v26.4s}, [x7], x6 // load from temp buffer 0 + + saddl2 v6.4s, v6.8h, v16.8h + + sqrshrun v26.8b, v26.8h, #5 + + smlal v18.4s, v30.4h, v22.4h + smlsl v18.4s, v28.4h, v24.4h + smlal2 v6.4s, v30.8h, v22.8h + smlsl2 v6.4s, v28.8h, v24.8h + uaddl v2.8h, v2.8b, v3.8b + ext v4.8b, v0.8b , v1.8b , #4 + mla v20.8h, v2.8h , v22.8h + sqrshrun v18.4h, v18.4s, #10 + ext v1.8b, v0.8b , v1.8b , #1 + sqrshrun v19.4h, v6.4s, #10 + add v28.8h, v10.8h , v16.8h + uaddl v2.8h, v1.8b, v4.8b + add v30.8h, v12.8h , v14.8h + mls v20.8h, v2.8h , v24.8h + + uqxtn v18.8b, v18.8h + uqxtn v19.8b, v19.8h + mov v18.2s[1], v19.2s[0] + + ld1 {v0.2s, v1.2s}, [x0], x2 // row 5 load for horizontal filter + + urhadd v26.8b, v18.8b , v26.8b + + ext v5.8b, v0.8b , v1.8b , #5 + ext v2.8b, v0.8b , v1.8b , #2 + + st1 {v20.4s}, [x9], x6 // store temp buffer x6 + + saddl v18.4s, v8.4h, v20.4h + + saddl2 v6.4s, v8.8h, v20.8h + + ld1 {v8.4s}, [x7], x6 //load from temp buffer 1 + + + st1 {v26.2s}, [x1], x3 // store row 0 + + smlal v18.4s, v30.4h, v22.4h + smlsl v18.4s, v28.4h, v24.4h + + + + smlal2 v6.4s, v30.8h, v22.8h + smlsl2 v6.4s, v28.8h, v24.8h + + sqrshrun v28.8b, v8.8h, #5 + + ext v3.8b, v0.8b , v1.8b , #3 + uaddl v8.8h, v0.8b, v5.8b + uaddl v2.8h, v2.8b, v3.8b + sqrshrun v18.4h, v18.4s, #10 + ext v4.8b, v0.8b , v1.8b , #4 + sqrshrun v19.4h, v6.4s, #10 + mla v8.8h, v2.8h , v22.8h + ext v1.8b, v0.8b , v1.8b , #1 + add v26.8h, v12.8h , v20.8h + uaddl v2.8h, v1.8b, v4.8b + + + uqxtn v18.8b, v18.8h + uqxtn v19.8b, v19.8h + mov v18.2s[1], v19.2s[0] + + add v30.8h, v14.8h , v16.8h + mls v8.8h, v2.8h , v24.8h + ld1 {v0.2s, v1.2s}, [x0], x2 // row 6 load for horizontal filter + + urhadd v28.8b, v28.8b , v18.8b + + ext v5.8b, v0.8b , v1.8b , #5 + ext v2.8b, v0.8b , v1.8b , #2 + ext v3.8b, v0.8b , v1.8b , #3 + + st1 {v28.2s}, [x1], x3 // store row 1 + + uaddl v28.8h, v0.8b, v5.8b + + st1 {v8.4s}, [x9], x6 // store temp buffer x7 + + saddl v18.4s, v10.4h, v8.4h + saddl2 v6.4s, v10.8h, v8.8h + + ld1 {v10.4s}, [x7], x6 // load from temp buffer 2 + + smlal v18.4s, v30.4h, v22.4h + smlsl v18.4s, v26.4h, v24.4h + smlal2 v6.4s, v30.8h, v22.8h + smlsl2 v6.4s, v26.8h, v24.8h + + sqrshrun v26.8b, v10.8h, #5 + uaddl v2.8h, v2.8b, v3.8b + ext v4.8b, v0.8b , v1.8b , #4 + mla v28.8h, v2.8h , v22.8h + sqrshrun v18.4h, v18.4s, #10 + ext v1.8b, v0.8b , v1.8b , #1 + sqrshrun v19.4h, v6.4s, #10 + add v10.8h, v14.8h , v8.8h + uaddl v2.8h, v1.8b, v4.8b + add v30.8h, v16.8h , v20.8h + mls v28.8h, v2.8h , v24.8h + + uqxtn v27.8b, v18.8h + uqxtn v19.8b, v19.8h + + mov v27.2s[1], v19.2s[0] + + saddl v18.4s, v12.4h, v28.4h + saddl2 v6.4s, v12.8h, v28.8h + + urhadd v26.8b, v26.8b , v27.8b + + smlal v18.4s, v30.4h, v22.4h + smlsl v18.4s, v10.4h, v24.4h + smlal2 v6.4s, v30.8h, v22.8h + smlsl2 v6.4s, v10.8h, v24.8h + + st1 {v26.2s}, [x1], x3 // store row 2 + + st1 {v28.2s, v29.2s}, [x9] + + + sqrshrun v18.4h, v18.4s, #10 + mov v10.16b, v20.16b + mov v11.16b, v21.16b + ld1 {v30.4s}, [x7], x6 // load from temp buffer 3 + + sqrshrun v19.4h, v6.4s, #10 + subs x4, x4, #4 + + sqrshrun v30.8b, v30.8h, #5 + + + uqxtn v18.8b, v18.8h + uqxtn v19.8b, v19.8h + mov v18.2s[1], v19.2s[0] + + + mov v12.16b, v8.16b + mov v13.16b, v9.16b + mov v6.16b, v14.16b + mov v7.16b, v15.16b + + urhadd v30.8b, v18.8b , v30.8b + mov v8.16b, v16.16b + mov v9.16b, v17.16b + mov v14.16b, v28.16b + mov v15.16b, v29.16b + st1 {v30.2s}, [x1], x3 // store row 3 + + bgt loop_8 //if height =8 or 16 loop + b end_func + +loop_4_start: + movi v22.8h, #20 // Filter coeff 20 into D22 + movi v23.8h, #5 // Filter coeff 5 into D23 + + ld1 {v0.2s, v1.2s}, [x0], x2 //row -2 load + ext v5.8b, v0.8b , v1.8b , #5 + uaddl v6.8h, v0.8b, v5.8b + ext v2.8b, v0.8b , v1.8b , #2 + ext v3.8b, v0.8b , v1.8b , #3 + uaddl v8.8h, v2.8b, v3.8b + ext v4.8b, v0.8b , v1.8b , #4 + mla v6.4h, v8.4h , v22.4h + ext v1.8b, v0.8b , v1.8b , #1 + uaddl v8.8h, v1.8b, v4.8b + ld1 {v0.2s, v1.2s}, [x0], x2 // row -1 load + mls v6.4h, v8.4h , v23.4h + ext v5.8b, v0.8b , v1.8b , #5 + uaddl v8.8h, v0.8b, v5.8b + ext v2.8b, v0.8b , v1.8b , #2 + ext v3.8b, v0.8b , v1.8b , #3 + uaddl v10.8h, v2.8b, v3.8b + + st1 {v6.2s}, [x9], x6 // store temp buffer 0 + + ext v4.8b, v0.8b , v1.8b , #4 + mla v8.4h, v10.4h , v22.4h + ext v1.8b, v0.8b , v1.8b , #1 + uaddl v10.8h, v1.8b, v4.8b + ld1 {v0.2s, v1.2s}, [x0], x2 // row 0 load + mls v8.4h, v10.4h , v23.4h + ext v5.8b, v0.8b , v1.8b , #5 + uaddl v10.8h, v0.8b, v5.8b + ext v2.8b, v0.8b , v1.8b , #2 + ext v3.8b, v0.8b , v1.8b , #3 + uaddl v12.8h, v2.8b, v3.8b + + st1 {v8.2s}, [x9], x6 // store temp buffer 1 + + ext v4.8b, v0.8b , v1.8b , #4 + mla v10.4h, v12.4h , v22.4h + ext v1.8b, v0.8b , v1.8b , #1 + uaddl v12.8h, v1.8b, v4.8b + ld1 {v0.2s, v1.2s}, [x0], x2 // row 1 load + mls v10.4h, v12.4h , v23.4h + ext v5.8b, v0.8b , v1.8b , #5 + uaddl v12.8h, v0.8b, v5.8b + ext v2.8b, v0.8b , v1.8b , #2 + ext v3.8b, v0.8b , v1.8b , #3 + uaddl v14.8h, v2.8b, v3.8b + + st1 {v10.2s}, [x9], x6 // store temp buffer 2 + + ext v4.8b, v0.8b , v1.8b , #4 + mla v12.4h, v14.4h , v22.4h + ext v1.8b, v0.8b , v1.8b , #1 + uaddl v14.8h, v1.8b, v4.8b + ld1 {v0.2s, v1.2s}, [x0], x2 // row 2 load + mls v12.4h, v14.4h , v23.4h + ext v5.8b, v0.8b , v1.8b , #5 + uaddl v14.8h, v0.8b, v5.8b + ext v2.8b, v0.8b , v1.8b , #2 + ext v3.8b, v0.8b , v1.8b , #3 + uaddl v16.8h, v2.8b, v3.8b + ext v4.8b, v0.8b , v1.8b , #4 + mla v14.4h, v16.4h , v22.4h + ext v1.8b, v0.8b , v1.8b , #1 + uaddl v16.8h, v1.8b, v4.8b + + st1 {v12.2s}, [x9], x6 // store temp buffer 3 + + mls v14.4h, v16.4h , v23.4h + +loop_4: + + ld1 {v0.2s, v1.2s}, [x0], x2 // row 3 load + ext v5.8b, v0.8b , v1.8b , #5 + uaddl v16.8h, v0.8b, v5.8b + ext v2.8b, v0.8b , v1.8b , #2 + ext v3.8b, v0.8b , v1.8b , #3 + uaddl v18.8h, v2.8b, v3.8b + st1 {v14.2s}, [x9], x6 // store temp buffer 4 + ext v4.8b, v0.8b , v1.8b , #4 + mla v16.4h, v18.4h , v22.4h + ext v1.8b, v0.8b , v1.8b , #1 + uaddl v18.8h, v1.8b, v4.8b + add v2.4h, v10.4h , v12.4h + mls v16.4h, v18.4h , v23.4h + add v3.4h, v8.4h , v14.4h + ld1 {v18.2s, v19.2s}, [x0], x2 // row 4 load + ext v25.8b, v18.8b , v19.8b , #5 + uaddl v26.8h, v18.8b, v25.8b + ext v20.8b, v18.8b , v19.8b , #2 + + st1 {v16.2s}, [x9], x6 // store temp buffer 5 + + saddl v0.4s, v6.4h, v16.4h + smlal v0.4s, v2.4h, v22.4h + ext v21.8b, v18.8b , v19.8b , #3 + uaddl v28.8h, v20.8b, v21.8b + ext v24.8b, v18.8b , v19.8b , #4 + smlsl v0.4s, v3.4h, v23.4h + mla v26.4h, v28.4h , v22.4h + ext v19.8b, v18.8b , v19.8b , #1 + uaddl v28.8h, v19.8b, v24.8b + add v2.4h, v12.4h , v14.4h + mls v26.4h, v28.4h , v23.4h + sqrshrun v0.4h, v0.4s, #0xa + add v3.4h, v10.4h , v16.4h + ld1 {v18.2s, v19.2s}, [x0], x2 // row 5 load + ext v25.8b, v18.8b , v19.8b , #5 + uqxtn v11.8b, v0.8h + uaddl v28.8h, v18.8b, v25.8b + + st1 {v26.2s}, [x9], x6 // store temp buffer 6 + + //Q3 available here + ld1 {v6.2s}, [x7], x6 // load from temp buffer 0 + ld1 {v7.2s}, [x7], x6 // load from temp buffer 1 + + sqrshrun v9.8b, v6.8h, #5 + sqrshrun v7.8b, v7.8h, #5 + mov v9.2s[1], v7.2s[0] + + ext v20.8b, v18.8b , v19.8b , #2 + + saddl v0.4s, v8.4h, v26.4h + smlal v0.4s, v2.4h, v22.4h + ext v21.8b, v18.8b , v19.8b , #3 + uaddl v6.8h, v20.8b, v21.8b + ext v24.8b, v18.8b , v19.8b , #4 + smlsl v0.4s, v3.4h, v23.4h + mla v28.4h, v6.4h , v22.4h + ext v19.8b, v18.8b , v19.8b , #1 + uaddl v6.8h, v19.8b, v24.8b + add v2.4h, v14.4h , v16.4h + mls v28.4h, v6.4h , v23.4h + sqrshrun v0.4h, v0.4s, #0xa + add v3.4h, v12.4h , v26.4h + ld1 {v18.2s, v19.2s}, [x0], x2 // row 6 load + ext v25.8b, v18.8b , v19.8b , #5 + uqxtn v13.8b, v0.8h + + trn1 v11.2s, v11.2s, v13.2s + trn2 v13.2s, v11.2s, v13.2s + saddl v0.4s, v10.4h, v28.4h + urhadd v9.8b, v9.8b , v11.8b + + st1 {v28.2s}, [x9], x6 // store temp buffer 7 + + smlal v0.4s, v2.4h, v22.4h + uaddl v30.8h, v18.8b, v25.8b + + st1 {v9.s}[0], [x1], x3 // store row 0 + + ext v20.8b, v18.8b , v19.8b , #2 + + st1 {v9.s}[1], [x1], x3 // store row 1 + + ext v21.8b, v18.8b , v19.8b , #3 + smlsl v0.4s, v3.4h, v23.4h + uaddl v8.8h, v20.8b, v21.8b + ext v24.8b, v18.8b , v19.8b , #4 + mla v30.4h, v8.4h , v22.4h + ext v19.8b, v18.8b , v19.8b , #1 + uaddl v8.8h, v19.8b, v24.8b + sqrshrun v0.4h, v0.4s, #0xa + add v2.4h, v16.4h , v26.4h + mls v30.4h, v8.4h , v23.4h + uqxtn v4.8b, v0.8h + + add v3.4h, v14.4h , v28.4h + + + saddl v0.4s, v12.4h, v30.4h + + st1 {v30.2s}, [x9] + + smlal v0.4s, v2.4h, v22.4h + + ld1 {v8.2s}, [x7], x6 // load from temp buffer 2 + ld1 {v9.2s}, [x7], x6 // load from temp buffer 3 + smlsl v0.4s, v3.4h, v23.4h + subs x4, x4, #4 + + sqrshrun v10.8b, v8.8h, #5 + sqrshrun v9.8b, v9.8h, #5 + mov v10.2s[1], v9.2s[0] + + mov v12.8b, v28.8b + + sqrshrun v0.4h, v0.4s, #0xa + mov v6.8b, v14.8b + mov v8.8b, v16.8b + + uqxtn v5.8b, v0.8h + + trn1 v4.2s, v4.2s, v5.2s + trn2 v5.2s, v4.2s, v5.2s + urhadd v4.8b, v4.8b , v10.8b + mov v10.8b, v26.8b + mov v14.8b, v30.8b + + st1 {v4.s}[0], [x1], x3 // store row 2 + st1 {v4.s}[1], [x1], x3 // store row 3 + + bgt loop_4 + +end_func: + //Restoring registers from stack + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + + diff --git a/common/armv8/ih264_inter_pred_luma_horz_qpel_av8.s b/common/armv8/ih264_inter_pred_luma_horz_qpel_av8.s new file mode 100755 index 0000000..39e3253 --- /dev/null +++ b/common/armv8/ih264_inter_pred_luma_horz_qpel_av8.s @@ -0,0 +1,597 @@ +//****************************************************************************** +//* +//* Copyright (C) 2015 The Android Open Source Project +//* +//* Licensed under the Apache License, Version 2.0 (the "License"); +//* you may not use this file except in compliance with the License. +//* You may obtain a copy of the License at: +//* +//* http://www.apache.org/licenses/LICENSE-2.0 +//* +//* Unless required by applicable law or agreed to in writing, software +//* distributed under the License is distributed on an "AS IS" BASIS, +//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//* See the License for the specific language governing permissions and +//* limitations under the License. +//* +//***************************************************************************** +//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +//*/ +///** +//****************************************************************************** +//* @file +//* ih264_inter_pred_luma_horz_qpel_av8.s +//* +//* @brief +//* Contains function definitions for inter prediction horizontal quarter pel interpolation. +//* +//* @author +//* Mohit +//* +//* @par List of Functions: +//* +//* - ih264_inter_pred_luma_horz_qpel_av8() +//* +//* @remarks +//* None +//* +//******************************************************************************* +//*/ + +///* All the functions here are replicated from ih264_inter_pred_filters.c +// + +///** +///** +//******************************************************************************* +//* +//* @brief +//* Quarter pel interprediction luma filter for horizontal input +//* +//* @par Description: +//* Applies a 6 tap horizontal filter .The output is clipped to 8 bits +//* sec 8.4.2.2.1 titled "Luma sample interpolation process" +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[out] pu1_dst +//* UWORD8 pointer to the destination +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride +//* +//* @param[in] ht +//* integer height of the array +//* +//* @param[in] wd +//* integer width of the array +//* +// @param[in] pu1_tmp: temporary buffer: UNUSED in this function +//* +//* @param[in] dydx: x and y reference offset for qpel calculations. +//* @returns +//* +// @remarks +//* None +//* +//******************************************************************************* +//*/ + +//void ih264_inter_pred_luma_horz ( +// UWORD8 *pu1_src, +// UWORD8 *pu1_dst, +// WORD32 src_strd, +// WORD32 dst_strd, +// WORD32 ht, +// WORD32 wd, +// UWORD8* pu1_tmp, +// UWORD32 dydx) + +//**************Variables Vs Registers***************************************** +// x0 => *pu1_src +// x1 => *pu1_dst +// x2 => src_strd +// x3 => dst_strd +// x4 => ht +// x5 => wd +// x7 => dydx + +.text +.p2align 2 +.include "ih264_neon_macros.s" + + + + + .global ih264_inter_pred_luma_horz_qpel_av8 + +ih264_inter_pred_luma_horz_qpel_av8: + + + push_v_regs + stp x19, x20, [sp, #-16]! + + + and x7, x7, #3 //Finds x-offset + add x7, x0, x7, lsr #1 //pu1_src + (x_offset>>1) + sub x0, x0, #2 //pu1_src-2 + sub x14, x4, #16 + movi v0.16b, #5 //filter coeff + subs x12, x5, #8 //if wd=8 branch to loop_8 + movi v1.16b, #20 //filter coeff + + beq loop_8 + + subs x12, x5, #4 //if wd=4 branch to loop_4 + beq loop_4 + +loop_16: //when wd=16 + //// Processing row0 and row1 + ld1 {v2.8b, v3.8b, v4.8b}, [x0], x2 //// Load row0 + add x14, x14, #1 //for checking loop + ext v31.8b, v2.8b , v3.8b , #5 + ld1 {v5.8b, v6.8b, v7.8b}, [x0], x2 //// Load row1 + ext v30.8b, v3.8b , v4.8b , #5 + uaddl v8.8h, v31.8b, v2.8b //// a0 + a5 (column1,row0) + ext v28.8b, v5.8b , v6.8b , #5 + uaddl v10.8h, v30.8b, v3.8b //// a0 + a5 (column2,row0) + ext v27.8b, v6.8b , v7.8b , #5 + uaddl v14.8h, v28.8b, v5.8b //// a0 + a5 (column1,row1) + ext v31.8b, v2.8b , v3.8b , #2 + uaddl v16.8h, v27.8b, v6.8b //// a0 + a5 (column2,row1) + ext v30.8b, v3.8b , v4.8b , #2 + umlal v8.8h, v31.8b, v1.8b //// a0 + a5 + 20a2 (column1,row0) + ext v28.8b, v5.8b , v6.8b , #2 + umlal v10.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 (column2,row0) + ext v27.8b, v6.8b , v7.8b , #2 + umlal v14.8h, v28.8b, v1.8b //// a0 + a5 + 20a2 (column1,row1) + ext v31.8b, v2.8b , v3.8b , #3 + umlal v16.8h, v27.8b, v1.8b //// a0 + a5 + 20a2 (column2,row1) + ext v30.8b, v3.8b , v4.8b , #3 + umlal v8.8h, v31.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row0) + ext v28.8b, v5.8b , v6.8b , #3 + umlal v10.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row0) + ext v27.8b, v6.8b , v7.8b , #3 + umlal v14.8h, v28.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row1) + ext v31.8b, v2.8b , v3.8b , #1 + umlal v16.8h, v27.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row1) + ext v30.8b, v3.8b , v4.8b , #1 + umlsl v8.8h, v31.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0) + ext v28.8b, v5.8b , v6.8b , #1 + umlsl v10.8h, v30.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0) + ext v27.8b, v6.8b , v7.8b , #1 + umlsl v14.8h, v28.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row1) + ext v31.8b, v2.8b , v3.8b , #4 + umlsl v16.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row1) + ext v30.8b, v3.8b , v4.8b , #4 + umlsl v8.8h, v31.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0) + ext v28.8b, v5.8b , v6.8b , #4 + umlsl v10.8h, v30.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0) + ext v27.8b, v6.8b , v7.8b , #4 + umlsl v14.8h, v28.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row1) + ld1 {v2.8b, v3.8b, v4.8b}, [x0], x2 //// Load row2 + umlsl v16.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row1) + + ld1 {v12.2s, v13.2s}, [x7], x2 //Load value for interpolation (column1,row0) + sqrshrun v20.8b, v8.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0) + ld1 {v5.8b, v6.8b, v7.8b}, [x0], x2 //// Load row3 + sqrshrun v21.8b, v10.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0) + ext v31.8b, v2.8b , v3.8b , #5 + urhadd v20.16b, v12.16b , v20.16b //Interpolation step for qpel calculation + urhadd v21.16b, v13.16b , v21.16b //Interpolation step for qpel calculation + + sqrshrun v18.8b, v14.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row1) + st1 {v20.8b, v21.8b}, [x1], x3 ////Store dest row0 + ext v30.8b, v3.8b , v4.8b , #5 + sqrshrun v19.8b, v16.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row1) + + + +//// Processing row2 and row3 + ld1 {v12.2s, v13.2s}, [x7], x2 //Load value for interpolation (column1,row1) + ext v28.8b, v5.8b , v6.8b , #5 + urhadd v18.16b, v12.16b , v18.16b //Interpolation step for qpel calculation + urhadd v19.16b, v13.16b , v19.16b //Interpolation step for qpel calculation + + uaddl v8.8h, v31.8b, v2.8b //// a0 + a5 (column1,row2) + st1 {v18.8b, v19.8b}, [x1], x3 ////Store dest row1 + uaddl v10.8h, v30.8b, v3.8b //// a0 + a5 (column2,row2) + ext v27.8b, v6.8b , v7.8b , #5 + uaddl v14.8h, v28.8b, v5.8b //// a0 + a5 (column1,row3) + ext v31.8b, v2.8b , v3.8b , #2 + uaddl v16.8h, v27.8b, v6.8b //// a0 + a5 (column2,row3) + ext v30.8b, v3.8b , v4.8b , #2 + umlal v8.8h, v31.8b, v1.8b //// a0 + a5 + 20a2 (column1,row2) + ext v27.8b, v6.8b , v7.8b , #2 + umlal v10.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 (column2,row2) + ext v28.8b, v5.8b , v6.8b , #2 + umlal v14.8h, v28.8b, v1.8b //// a0 + a5 + 20a2 (column1,row3) + ext v31.8b, v2.8b , v3.8b , #3 + umlal v16.8h, v27.8b, v1.8b //// a0 + a5 + 20a2 (column2,row3) + ext v30.8b, v3.8b , v4.8b , #3 + umlal v8.8h, v31.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row2) + ext v28.8b, v5.8b , v6.8b , #3 + umlal v10.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row2) + ext v27.8b, v6.8b , v7.8b , #3 + umlal v14.8h, v28.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row3) + ext v31.8b, v2.8b , v3.8b , #1 + umlal v16.8h, v27.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row3) + ext v30.8b, v3.8b , v4.8b , #1 + umlsl v8.8h, v31.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row2) + ext v28.8b, v5.8b , v6.8b , #1 + umlsl v10.8h, v30.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row2) + ext v27.8b, v6.8b , v7.8b , #1 + umlsl v14.8h, v28.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row3) + ext v31.8b, v2.8b , v3.8b , #4 + umlsl v16.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row3) + ext v30.8b, v3.8b , v4.8b , #4 + umlsl v8.8h, v31.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row2) + ext v28.8b, v5.8b , v6.8b , #4 + umlsl v10.8h, v30.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row2) + ext v27.8b, v6.8b , v7.8b , #4 + umlsl v14.8h, v28.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row3) + ld1 {v2.8b, v3.8b, v4.8b}, [x0], x2 //// Load row4 + umlsl v16.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row3) + + ld1 {v12.2s, v13.2s}, [x7], x2 //Load value for interpolation (column1,row2) + sqrshrun v20.8b, v8.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row2) + ld1 {v5.8b, v6.8b, v7.8b}, [x0], x2 //// Load row5 + sqrshrun v21.8b, v10.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row2) + ext v31.8b, v2.8b , v3.8b , #5 + urhadd v20.16b, v12.16b , v20.16b //Interpolation step for qpel calculation + urhadd v21.16b, v13.16b , v21.16b //Interpolation step for qpel calculation + + sqrshrun v18.8b, v14.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row3) + ext v30.8b, v3.8b , v4.8b , #5 + st1 {v20.8b, v21.8b}, [x1], x3 ////Store dest row2 + sqrshrun v19.8b, v16.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row3) + ld1 {v12.2s, v13.2s}, [x7], x2 //Load value for interpolation (column1,row3) + +//// Processing row4 and row5 + ext v28.8b, v5.8b , v6.8b , #5 + urhadd v18.16b, v12.16b , v18.16b //Interpolation step for qpel calculation + urhadd v19.16b, v13.16b , v19.16b //Interpolation step for qpel calculation + + uaddl v8.8h, v31.8b, v2.8b //// a0 + a5 (column1,row4) + st1 {v18.8b, v19.8b}, [x1], x3 ////Store dest row3 + uaddl v10.8h, v30.8b, v3.8b //// a0 + a5 (column2,row4) + ext v27.8b, v6.8b , v7.8b , #5 + uaddl v14.8h, v28.8b, v5.8b //// a0 + a5 (column1,row5) + ext v31.8b, v2.8b , v3.8b , #2 + uaddl v16.8h, v27.8b, v6.8b //// a0 + a5 (column2,row5) + ext v30.8b, v3.8b , v4.8b , #2 + umlal v8.8h, v31.8b, v1.8b //// a0 + a5 + 20a2 (column1,row4) + ext v27.8b, v6.8b , v7.8b , #2 + umlal v10.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 (column2,row4) + ext v28.8b, v5.8b , v6.8b , #2 + umlal v14.8h, v28.8b, v1.8b //// a0 + a5 + 20a2 (column1,row5) + ext v31.8b, v2.8b , v3.8b , #3 + umlal v16.8h, v27.8b, v1.8b //// a0 + a5 + 20a2 (column2,row5) + ext v30.8b, v3.8b , v4.8b , #3 + umlal v8.8h, v31.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row4) + ext v28.8b, v5.8b , v6.8b , #3 + umlal v10.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row4) + ext v27.8b, v6.8b , v7.8b , #3 + umlal v14.8h, v28.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row5) + ext v31.8b, v2.8b , v3.8b , #1 + umlal v16.8h, v27.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row5) + ext v30.8b, v3.8b , v4.8b , #1 + umlsl v8.8h, v31.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row4) + ext v28.8b, v5.8b , v6.8b , #1 + umlsl v10.8h, v30.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row4) + ext v27.8b, v6.8b , v7.8b , #1 + umlsl v14.8h, v28.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row4) + ext v31.8b, v2.8b , v3.8b , #4 + umlsl v16.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row5) + ext v30.8b, v3.8b , v4.8b , #4 + umlsl v8.8h, v31.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row4) + ext v28.8b, v5.8b , v6.8b , #4 + umlsl v10.8h, v30.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row4) + ext v27.8b, v6.8b , v7.8b , #4 + umlsl v14.8h, v28.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row5) + ld1 {v2.8b, v3.8b, v4.8b}, [x0], x2 //// Load row6 + umlsl v16.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row5) + ld1 {v12.2s, v13.2s}, [x7], x2 //Load value for interpolation (column1,row4) + sqrshrun v20.8b, v8.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row4) + ld1 {v5.8b, v6.8b, v7.8b}, [x0], x2 //// Load row7 + sqrshrun v21.8b, v10.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row4) + ext v31.8b, v2.8b , v3.8b , #5 + urhadd v20.16b, v12.16b , v20.16b //Interpolation step for qpel calculation + urhadd v21.16b, v13.16b , v21.16b //Interpolation step for qpel calculation + + sqrshrun v18.8b, v14.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row5) + st1 {v20.8b, v21.8b}, [x1], x3 ////Store dest row4 + ext v30.8b, v3.8b , v4.8b , #5 + sqrshrun v19.8b, v16.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row5) + ld1 {v12.2s, v13.2s}, [x7], x2 //Load value for interpolation (column1,row5) + + + //// Processing row6 and row7 + + ext v28.8b, v5.8b , v6.8b , #5 + urhadd v18.16b, v12.16b , v18.16b //Interpolation step for qpel calculation + urhadd v19.16b, v13.16b , v19.16b //Interpolation step for qpel calculation + + uaddl v8.8h, v31.8b, v2.8b //// a0 + a5 (column1,row6) + st1 {v18.8b, v19.8b}, [x1], x3 ////Store dest row5 + uaddl v10.8h, v30.8b, v3.8b //// a0 + a5 (column2,row6) + ext v27.8b, v6.8b , v7.8b , #5 + uaddl v14.8h, v28.8b, v5.8b //// a0 + a5 (column1,row7) + ext v31.8b, v2.8b , v3.8b , #2 + uaddl v16.8h, v27.8b, v6.8b //// a0 + a5 (column2,row7) + ext v30.8b, v3.8b , v4.8b , #2 + umlal v8.8h, v31.8b, v1.8b //// a0 + a5 + 20a2 (column1,row6) + ext v27.8b, v6.8b , v7.8b , #2 + umlal v10.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 (column2,row6) + ext v28.8b, v5.8b , v6.8b , #2 + umlal v14.8h, v28.8b, v1.8b //// a0 + a5 + 20a2 (column1,row7) + ext v31.8b, v2.8b , v3.8b , #3 + umlal v16.8h, v27.8b, v1.8b //// a0 + a5 + 20a2 (column2,row7) + ext v30.8b, v3.8b , v4.8b , #3 + umlal v8.8h, v31.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row6) + ext v28.8b, v5.8b , v6.8b , #3 + umlal v10.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row6) + ext v27.8b, v6.8b , v7.8b , #3 + umlal v14.8h, v28.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row7) + ext v31.8b, v2.8b , v3.8b , #1 + umlal v16.8h, v27.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row7) + ext v30.8b, v3.8b , v4.8b , #1 + umlsl v8.8h, v31.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row6) + ext v28.8b, v5.8b , v6.8b , #1 + umlsl v10.8h, v30.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row6) + ext v27.8b, v6.8b , v7.8b , #1 + umlsl v14.8h, v28.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row6) + ext v31.8b, v2.8b , v3.8b , #4 + umlsl v16.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row7) + ext v30.8b, v3.8b , v4.8b , #4 + umlsl v8.8h, v31.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row6) + ext v28.8b, v5.8b , v6.8b , #4 + umlsl v10.8h, v30.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row6) + ext v27.8b, v6.8b , v7.8b , #4 + ld1 {v12.2s, v13.2s}, [x7], x2 //Load value for interpolation (column1,row6) + sqrshrun v20.8b, v8.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row6) + umlsl v14.8h, v28.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row7) + sqrshrun v21.8b, v10.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row6) + umlsl v16.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row7) + urhadd v20.16b, v12.16b , v20.16b //Interpolation step for qpel calculation + urhadd v21.16b, v13.16b , v21.16b //Interpolation step for qpel calculation + + ld1 {v12.2s, v13.2s}, [x7], x2 //Load value for interpolation (column1,row7) + sqrshrun v18.8b, v14.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row7) + st1 {v20.8b, v21.8b}, [x1], x3 ////Store dest row6 + sqrshrun v19.8b, v16.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row7) + urhadd v18.16b, v12.16b , v18.16b //Interpolation step for qpel calculation + urhadd v19.16b, v13.16b , v19.16b //Interpolation step for qpel calculation + + subs x12, x14, #1 // if height==16 - looping + st1 {v18.8b, v19.8b}, [x1], x3 ////Store dest row7 + + + + beq loop_16 + b end_func + +loop_8: +//// Processing row0 and row1 + + ld1 {v5.8b, v6.8b}, [x0], x2 //// Load row1 + add x14, x14, #1 //for checking loop + ext v28.8b, v5.8b , v6.8b , #5 + ld1 {v2.8b, v3.8b}, [x0], x2 //// Load row0 + ext v25.8b, v5.8b , v6.8b , #2 + ext v31.8b, v2.8b , v3.8b , #5 + ext v24.8b, v5.8b , v6.8b , #3 + ext v23.8b, v5.8b , v6.8b , #1 + ext v22.8b, v5.8b , v6.8b , #4 + uaddl v14.8h, v28.8b, v5.8b //// a0 + a5 (column1,row1) + ext v29.8b, v2.8b , v3.8b , #3 + umlal v14.8h, v25.8b, v1.8b //// a0 + a5 + 20a2 (column1,row1) + umlal v14.8h, v24.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row1) + umlsl v14.8h, v23.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row1) + umlsl v14.8h, v22.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row1) + ext v30.8b, v2.8b , v3.8b , #2 + uaddl v8.8h, v31.8b, v2.8b //// a0 + a5 (column1,row0) + ext v27.8b, v2.8b , v3.8b , #1 + ext v26.8b, v2.8b , v3.8b , #4 + ld1 {v2.8b, v3.8b}, [x0], x2 //// Load row2 + umlal v8.8h, v29.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row0) + umlal v8.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 (column1,row0) + umlsl v8.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0) + umlsl v8.8h, v26.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0) + ld1 {v5.8b, v6.8b}, [x0], x2 //// Load row3 + sqrshrun v18.8b, v14.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0) + + //// Processing row2 and row3 + ext v28.8b, v5.8b , v6.8b , #5 + ext v25.8b, v5.8b , v6.8b , #2 + ext v31.8b, v2.8b , v3.8b , #5 + uaddl v14.8h, v28.8b, v5.8b //// a0 + a5 (column1,row3) + ld1 {v12.2s}, [x7], x2 //Load value for interpolation (column1,row0) + ld1 {v13.2s}, [x7], x2 //Load value for interpolation (column1,row1) + ext v24.8b, v5.8b , v6.8b , #3 + ext v23.8b, v5.8b , v6.8b , #1 + sqrshrun v19.8b, v8.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row1) + ext v22.8b, v5.8b , v6.8b , #4 + ext v29.8b, v2.8b , v3.8b , #3 + umlal v14.8h, v25.8b, v1.8b //// a0 + a5 + 20a2 (column1,row3) + umlal v14.8h, v24.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row3) + umlsl v14.8h, v23.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row3) + umlsl v14.8h, v22.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row3) + urhadd v18.16b, v12.16b , v18.16b //Interpolation step for qpel calculation + urhadd v19.16b, v13.16b , v19.16b //Interpolation step for qpel calculation + + st1 {v18.8b}, [x1], x3 ////Store dest row0 + st1 {v19.8b}, [x1], x3 ////Store dest row1 + uaddl v8.8h, v31.8b, v2.8b //// a0 + a5 (column1,row2) + ext v30.8b, v2.8b , v3.8b , #2 + ext v27.8b, v2.8b , v3.8b , #1 + ext v26.8b, v2.8b , v3.8b , #4 + ld1 {v2.8b, v3.8b}, [x0], x2 //// Load row4 + umlal v8.8h, v29.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row2) + umlal v8.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 (column1,row2) + umlsl v8.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row2) + umlsl v8.8h, v26.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row2) + ld1 {v5.8b, v6.8b}, [x0], x2 //// Load row5 + subs x9, x4, #4 + sqrshrun v19.8b, v14.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row3) + ld1 {v12.2s}, [x7], x2 //Load value for interpolation (column1,row2) + ld1 {v13.2s}, [x7], x2 //Load value for interpolation (column1,row3) + ext v28.8b, v5.8b , v6.8b , #5 + ext v25.8b, v5.8b , v6.8b , #2 + ext v31.8b, v2.8b , v3.8b , #5 + uaddl v14.8h, v28.8b, v5.8b //// a0 + a5 (column1,row5) + ext v24.8b, v5.8b , v6.8b , #3 + sqrshrun v18.8b, v8.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row2) + ext v22.8b, v5.8b , v6.8b , #4 + ext v29.8b, v2.8b , v3.8b , #3 + urhadd v18.16b, v12.16b , v18.16b //Interpolation step for qpel calculation + urhadd v19.16b, v13.16b , v19.16b //Interpolation step for qpel calculation + + st1 {v18.8b}, [x1], x3 ////Store dest row2 + ext v30.8b, v2.8b , v3.8b , #2 + uaddl v8.8h, v31.8b, v2.8b //// a0 + a5 (column1,row4) + st1 {v19.8b}, [x1], x3 ////Store dest row3 + beq end_func // Branch if height==4 + +//// Processing row4 and row5 + ext v23.8b, v5.8b , v6.8b , #1 + umlal v14.8h, v25.8b, v1.8b //// a0 + a5 + 20a2 (column1,row5) + umlal v14.8h, v24.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row5) + umlsl v14.8h, v23.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row5) + umlsl v14.8h, v22.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row5) + ext v27.8b, v2.8b , v3.8b , #1 + ext v26.8b, v2.8b , v3.8b , #4 + ld1 {v2.8b, v3.8b}, [x0], x2 //// Load row6 + umlal v8.8h, v29.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row4) + umlal v8.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 (column1,row4) + umlsl v8.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row4) + umlsl v8.8h, v26.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row4) + sqrshrun v19.8b, v14.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row5) + ld1 {v5.8b, v6.8b}, [x0], x2 //// Load row7 + ext v31.8b, v2.8b , v3.8b , #5 + ext v28.8b, v5.8b , v6.8b , #5 + ld1 {v12.2s}, [x7], x2 //Load value for interpolation (column1,row4) + ld1 {v13.2s}, [x7], x2 //Load value for interpolation (column1,row5) + ext v25.8b, v5.8b , v6.8b , #2 + uaddl v14.8h, v28.8b, v5.8b //// a0 + a5 (column1,row7) + ext v24.8b, v5.8b , v6.8b , #3 + ext v22.8b, v5.8b , v6.8b , #4 + sqrshrun v18.8b, v8.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row4) + ext v29.8b, v2.8b , v3.8b , #3 + ext v30.8b, v2.8b , v3.8b , #2 + urhadd v18.16b, v12.16b , v18.16b //Interpolation step for qpel calculation + urhadd v19.16b, v13.16b , v19.16b //Interpolation step for qpel calculation + + st1 {v18.8b}, [x1], x3 ////Store dest row4 + ext v27.8b, v2.8b , v3.8b , #1 + uaddl v8.8h, v31.8b, v2.8b //// a0 + a5 (column1,row6) + ext v26.8b, v2.8b , v3.8b , #4 + umlal v8.8h, v29.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row6) + umlal v8.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 (column1,row6) + umlsl v8.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row6) + umlsl v8.8h, v26.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row6) + //// Processing row6 and row7 + st1 {v19.8b}, [x1], x3 ////Store dest row5 + ext v23.8b, v5.8b , v6.8b , #1 + umlal v14.8h, v25.8b, v1.8b //// a0 + a5 + 20a2 (column1,row7) + umlal v14.8h, v24.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row7) + umlsl v14.8h, v23.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row7) + umlsl v14.8h, v22.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row7) + ld1 {v12.2s}, [x7], x2 //Load value for interpolation (column1,row6) + ld1 {v13.2s}, [x7], x2 //Load value for interpolation (column1,row7) + sqrshrun v18.8b, v8.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row6) + subs x12, x14, #1 + sqrshrun v19.8b, v14.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row7) + urhadd v18.16b, v12.16b , v18.16b //Interpolation step for qpel calculation + urhadd v19.16b, v13.16b , v19.16b //Interpolation step for qpel calculation + + st1 {v18.8b}, [x1], x3 ////Store dest row6 + st1 {v19.8b}, [x1], x3 ////Store dest row7 + + beq loop_8 //looping if height ==16 + + b end_func + +loop_4: + ld1 {v5.8b, v6.8b}, [x0], x2 //// Load row1 + ext v28.8b, v5.8b , v6.8b , #5 + ld1 {v2.8b, v3.8b}, [x0], x2 //// Load row0 + ext v25.8b, v5.8b , v6.8b , #2 + ext v31.8b, v2.8b , v3.8b , #5 + uaddl v14.8h, v28.8b, v5.8b //// a0 + a5 (column1,row1) + ext v24.8b, v5.8b , v6.8b , #3 + ext v23.8b, v5.8b , v6.8b , #1 + ext v22.8b, v5.8b , v6.8b , #4 + ext v29.8b, v2.8b , v3.8b , #3 + umlal v14.8h, v25.8b, v1.8b //// a0 + a5 + 20a2 (column1,row1) + umlal v14.8h, v24.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row1) + umlsl v14.8h, v23.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row1) + umlsl v14.8h, v22.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row1) + uaddl v8.8h, v31.8b, v2.8b //// a0 + a5 (column1,row0) + ext v30.8b, v2.8b , v3.8b , #2 + ld1 {v12.2s}, [x7], x2 //Load value for interpolation (column1,row0) + ld1 {v13.2s}, [x7], x2 //Load value for interpolation (column1,row1) + ext v27.8b, v2.8b , v3.8b , #1 + ext v26.8b, v2.8b , v3.8b , #4 + ld1 {v2.8b, v3.8b}, [x0], x2 //// Load row2 + umlal v8.8h, v29.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row0) + umlal v8.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 (column1,row0) + umlsl v8.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0) + umlsl v8.8h, v26.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0) + ld1 {v5.8b, v6.8b}, [x0], x2 //// Load row3 + ext v28.8b, v5.8b , v6.8b , #5 + ext v25.8b, v5.8b , v6.8b , #2 + sqrshrun v18.8b, v14.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0) + ext v31.8b, v2.8b , v3.8b , #5 + ext v24.8b, v5.8b , v6.8b , #3 + + ext v23.8b, v5.8b , v6.8b , #1 + ext v22.8b, v5.8b , v6.8b , #4 + ext v29.8b, v2.8b , v3.8b , #3 + sqrshrun v19.8b, v8.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row1) + ext v30.8b, v2.8b , v3.8b , #2 + ext v27.8b, v2.8b , v3.8b , #1 + + //// Processing row2 and row3 + urhadd v18.16b, v12.16b , v18.16b //Interpolation step for qpel calculation + urhadd v19.16b, v13.16b , v19.16b //Interpolation step for qpel calculation + + st1 {v18.s}[0], [x1], x3 ////Store dest row0 + st1 {v19.s}[0], [x1], x3 ////Store dest row1 + uaddl v14.8h, v28.8b, v5.8b //// a0 + a5 (column1,row3) + ext v26.8b, v2.8b , v3.8b , #4 + ld1 {v12.2s}, [x7], x2 //Load value for interpolation (column1,row2) + ld1 {v13.2s}, [x7], x2 //Load value for interpolation (column1,row3) + + umlal v14.8h, v25.8b, v1.8b //// a0 + a5 + 20a2 (column1,row3) + umlal v14.8h, v24.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row3) + umlsl v14.8h, v23.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row3) + umlsl v14.8h, v22.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row3) + uaddl v8.8h, v31.8b, v2.8b //// a0 + a5 (column1,row2) + umlal v8.8h, v29.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row2) + umlal v8.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 (column1,row2) + umlsl v8.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row2) + umlsl v8.8h, v26.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row2) + sqrshrun v19.8b, v14.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row3) + sqrshrun v18.8b, v8.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row2) + urhadd v18.16b, v12.16b , v18.16b //Interpolation step for qpel calculation + urhadd v19.16b, v13.16b , v19.16b //Interpolation step for qpel calculation + + st1 {v18.s}[0], [x1], x3 ////Store dest row2 + subs x4, x4, #8 // Loop if height =8 + st1 {v19.s}[0], [x1], x3 ////Store dest row3 + + beq loop_4 + +end_func: + + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + + diff --git a/common/armv8/ih264_inter_pred_luma_horz_qpel_vert_hpel_av8.s b/common/armv8/ih264_inter_pred_luma_horz_qpel_vert_hpel_av8.s new file mode 100755 index 0000000..b1e4866 --- /dev/null +++ b/common/armv8/ih264_inter_pred_luma_horz_qpel_vert_hpel_av8.s @@ -0,0 +1,910 @@ +//****************************************************************************** +//* +//* Copyright (C) 2015 The Android Open Source Project +//* +//* Licensed under the Apache License, Version 2.0 (the "License"); +//* you may not use this file except in compliance with the License. +//* You may obtain a copy of the License at: +//* +//* http://www.apache.org/licenses/LICENSE-2.0 +//* +//* Unless required by applicable law or agreed to in writing, software +//* distributed under the License is distributed on an "AS IS" BASIS, +//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//* See the License for the specific language governing permissions and +//* limitations under the License. +//* +//***************************************************************************** +//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +//*/ +///** +//****************************************************************************** +//* @file +//* ih264_inter_pred_luma_horz_qpel_vert_hpel_av8.s +//* +//* @brief +//* Contains function definitions for inter prediction interpolation. +//* +//* @author +//* Mohit +//* +//* @par List of Functions: +//* +//* - ih264_inter_pred_luma_horz_qpel_vert_hpel_av8() +//* +//* @remarks +//* None +//* +//******************************************************************************* +//*/ + +///* All the functions here are replicated from ih264_inter_pred_filters.c +// + +///** +///** +///** +//******************************************************************************* +//* +//* @brief +//* This function implements a two stage cascaded six tap filter. It +//* applies the six tap filter in the vertical direction on the +//* predictor values, followed by applying the same filter in the +//* horizontal direction on the output of the first stage. It then averages +//* the output of the 1st stage and the final stage to obtain the quarter +//* pel values.The six tap filtering operation is described in sec 8.4.2.2.1 +//* titled "Luma sample interpolation process". +//* +//* @par Description: +//* This function is called to obtain pixels lying at the following +//* location (1/4,1/2) or (3/4,1/2). The function interpolates +//* the predictors first in the verical direction and then in the +//* horizontal direction to output the (1/2,1/2). It then averages +//* the output of the 2nd stage and (1/2,1/2) value to obtain (1/4,1/2) +//* or (3/4,1/2) depending on the offset. +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[out] pu1_dst +//* UWORD8 pointer to the destination +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride +//* +//* @param[in] ht +//* integer height of the array +//* +//* @param[in] wd +//* integer width of the array +//* +//* @param[in] pu1_tmp: temporary buffer +//* +//* @param[in] dydx: x and y reference offset for qpel calculations +//* +//* @returns +//* +//* @remarks +//* None +//* +//******************************************************************************* +//*/; + +//void ih264_inter_pred_luma_horz_qpel_vert_hpel(UWORD8 *pu1_src, +// UWORD8 *pu1_dst, +// WORD32 src_strd,, +// WORD32 dst_strd, +// WORD32 ht, +// WORD32 wd, +// UWORD8* pu1_tmp, +// UWORD32 dydx) + +//**************Variables Vs Registers***************************************** +// x0 => *pu1_src +// x1 => *pu1_dst +// x2 => src_strd +// x3 => dst_strd +// x4 => ht +// x5 => wd +// x6 => dydx +// x9 => *pu1_tmp + +.text +.p2align 2 +.include "ih264_neon_macros.s" + + + + .global ih264_inter_pred_luma_horz_qpel_vert_hpel_av8 + +ih264_inter_pred_luma_horz_qpel_vert_hpel_av8: + + // STMFD sp!, {x4-x12, x14} //store register values to stack + push_v_regs + stp x19, x20, [sp, #-16]! + + sub x0, x0, x2, lsl #1 //pu1_src-2*src_strd + sub x0, x0, #2 //pu1_src-2 + mov x9, x6 + mov x6, x7 + + and x6, x6, #2 // dydx & 0x3 followed by dydx>>1 and dydx<<1 + + add x7, x9, #4 + add x6, x7, x6 // pi16_pred1_temp += (x_offset>>1) + + movi v26.8h, #0x14 // Filter coeff 20 into Q13 + movi v24.8h, #0x5 // Filter coeff 5 into Q12 + movi v27.8h, #0x14 // Filter coeff 20 into Q13 + movi v25.8h, #0x5 // Filter coeff 5 into Q12 + mov x7, #0x20 + mov x8, #0x30 + subs x12, x5, #4 //if wd=4 branch to loop_4 + beq loop_4_start + + subs x12, x5, #8 //if wd=8 branch to loop_8 + beq loop_8_start + + //when wd=16 + movi v28.8h, #0x14 // Filter coeff 20 into Q13 + movi v30.8h, #0x5 // Filter coeff 5 into Q12 + sub x2, x2, #16 + ld1 {v0.2s, v1.2s}, [x0], #16 // Vector load from src[0_0] + ld1 {v12.2s}, [x0], x2 // Vector load from src[0_0] + ld1 {v2.2s, v3.2s}, [x0], #16 // Vector load from src[1_0] + ld1 {v13.2s}, [x0], x2 // Vector load from src[1_0] + ld1 {v4.2s, v5.2s}, [x0], #16 // Vector load from src[2_0] + ld1 {v14.2s}, [x0], x2 // Vector load from src[2_0] + ld1 {v6.2s, v7.2s}, [x0], #16 // Vector load from src[3_0] + ld1 {v15.2s}, [x0], x2 // Vector load from src[3_0] + ld1 {v8.2s, v9.2s}, [x0], #16 // Vector load from src[4_0] + ld1 {v16.2s}, [x0], x2 // Vector load from src[4_0] + +loop_16: + + ld1 {v10.2s, v11.2s}, [x0], #16 // Vector load from src[5_0] + ld1 {v17.2s}, [x0], x2 // Vector load from src[5_0] + + + uaddl v20.8h, v4.8b, v6.8b + uaddl v18.8h, v0.8b, v10.8b + uaddl v22.8h, v2.8b, v8.8b + mla v18.8h, v20.8h , v28.8h + uaddl v24.8h, v5.8b, v7.8b + uaddl v20.8h, v1.8b, v11.8b + uaddl v26.8h, v3.8b, v9.8b + mla v20.8h, v24.8h , v28.8h + uaddl v24.8h, v14.8b, v15.8b + mls v18.8h, v22.8h , v30.8h + uaddl v22.8h, v12.8b, v17.8b + mls v20.8h, v26.8h , v30.8h + uaddl v26.8h, v13.8b, v16.8b + mla v22.8h, v24.8h , v28.8h + mls v22.8h, v26.8h , v30.8h + st1 {v18.4s }, [x9], #16 + st1 {v20.4s}, [x9], #16 + ext v24.16b, v18.16b , v20.16b , #4 + ext v26.16b, v18.16b , v20.16b , #6 + st1 {v22.4s}, [x9] + ext v22.16b, v18.16b , v20.16b , #10 + add v0.8h, v24.8h , v26.8h + ext v24.16b, v18.16b , v20.16b , #2 + ext v26.16b, v18.16b , v20.16b , #8 + add v24.8h, v24.8h , v26.8h + + saddl v26.4s, v18.4h, v22.4h + smlal v26.4s, v0.4h, v28.4h + smlsl v26.4s, v24.4h, v30.4h + + saddl2 v22.4s, v18.8h, v22.8h + smlal2 v22.4s, v0.8h, v28.8h + smlsl2 v22.4s, v24.8h, v30.8h + + sqrshrun v18.4h, v26.4s, #10 + sqrshrun v19.4h, v22.4s, #10 + ld1 {v22.4s}, [x9], #16 + + uqxtn v18.8b, v18.8h + uqxtn v19.8b, v19.8h + mov v18.2s[1], v19.2s[0] + + ext v24.16b, v20.16b , v22.16b , #4 + ext v26.16b, v20.16b , v22.16b , #6 + ext v0.16b, v20.16b , v22.16b , #10 + st1 {v18.2s}, [x1] + add v18.8h, v24.8h , v26.8h + ext v24.16b, v20.16b , v22.16b , #2 + ext v26.16b, v20.16b , v22.16b , #8 + add v24.8h, v24.8h , v26.8h + + saddl v26.4s, v0.4h, v20.4h + smlal v26.4s, v18.4h, v28.4h + smlsl v26.4s, v24.4h, v30.4h + + saddl2 v22.4s, v0.8h, v20.8h + smlal2 v22.4s, v18.8h, v28.8h + smlsl2 v22.4s, v24.8h, v30.8h + + sqrshrun v19.4h, v26.4s, #10 + sqrshrun v18.4h, v22.4s, #10 + + uaddl v24.8h, v7.8b, v9.8b + ld1 {v20.4s}, [x6], #16 + ld1 {v22.4s}, [x6], x7 + + + uqxtn v19.8b, v19.8h + uqxtn v18.8b, v18.8h + mov v19.2s[1], v18.2s[0] + + ld1 {v18.2s}, [x1] + sqrshrun v20.8b, v20.8h, #5 + sqrshrun v21.8b, v22.8h, #5 + uaddl v22.8h, v4.8b, v10.8b + ld1 {v0.2s, v1.2s}, [x0], #16 // Vector load from src[6_0] + urhadd v18.16b, v18.16b , v20.16b + urhadd v19.16b, v19.16b , v21.16b + + ld1 {v12.2s}, [x0], x2 // Vector load from src[6_0] + uaddl v20.8h, v6.8b, v8.8b + uaddl v26.8h, v5.8b, v11.8b + st1 {v18.2s, v19.2s}, [x1], x3 // store row 0 + + +//ROW_2 + + + uaddl v18.8h, v2.8b, v0.8b + + mla v18.8h, v20.8h , v28.8h + + uaddl v20.8h, v3.8b, v1.8b + + mla v20.8h, v24.8h , v28.8h + uaddl v24.8h, v15.8b, v16.8b + mls v18.8h, v22.8h , v30.8h + uaddl v22.8h, v13.8b, v12.8b + mls v20.8h, v26.8h , v30.8h + uaddl v26.8h, v14.8b, v17.8b + mla v22.8h, v24.8h , v28.8h + mls v22.8h, v26.8h , v30.8h + st1 {v18.4s}, [x9], #16 + st1 {v20.4s}, [x9], #16 + ext v24.16b, v18.16b , v20.16b , #4 + ext v26.16b, v18.16b , v20.16b , #6 + st1 {v22.4s}, [x9] + ext v22.16b, v18.16b , v20.16b , #10 + add v2.8h, v24.8h , v26.8h + ext v24.16b, v18.16b , v20.16b , #2 + ext v26.16b, v18.16b , v20.16b , #8 + add v24.8h, v24.8h , v26.8h + + saddl v26.4s, v18.4h, v22.4h + smlal v26.4s, v2.4h, v28.4h + smlsl v26.4s, v24.4h, v30.4h + + saddl2 v22.4s, v18.8h, v22.8h + smlal2 v22.4s, v2.8h, v28.8h + smlsl2 v22.4s, v24.8h, v30.8h + + sqrshrun v18.4h, v26.4s, #10 + sqrshrun v19.4h, v22.4s, #10 + + ld1 {v22.4s}, [x9], #16 + + uqxtn v18.8b, v18.8h + uqxtn v19.8b, v19.8h + mov v18.2s[1], v19.2s[0] + + ext v24.16b, v20.16b , v22.16b , #4 + ext v26.16b, v20.16b , v22.16b , #6 + ext v2.16b, v20.16b , v22.16b , #10 + st1 {v18.2s}, [x1] + add v18.8h, v24.8h , v26.8h + ext v24.16b, v20.16b , v22.16b , #2 + ext v26.16b, v20.16b , v22.16b , #8 + add v24.8h, v24.8h , v26.8h + + saddl v26.4s, v2.4h, v20.4h + smlal v26.4s, v18.4h, v28.4h + smlsl v26.4s, v24.4h, v30.4h + + saddl2 v22.4s, v2.8h, v20.8h + smlal2 v22.4s, v18.8h, v28.8h + smlsl2 v22.4s, v24.8h, v30.8h + + sqrshrun v19.4h, v26.4s, #10 + sqrshrun v18.4h, v22.4s, #10 + uaddl v24.8h, v9.8b, v11.8b + ld1 {v20.4s}, [x6], #16 + ld1 {v22.4s}, [x6], x7 + uqxtn v19.8b, v19.8h + uqxtn v18.8b, v18.8h + mov v19.2s[1], v18.2s[0] + ld1 {v18.4s}, [x1] + sqrshrun v20.8b, v20.8h, #5 + sqrshrun v21.8b, v22.8h, #5 + + uaddl v22.8h, v6.8b, v0.8b + ld1 {v2.2s, v3.2s}, [x0], #16 // Vector load from src[7_0] + + urhadd v18.16b, v18.16b , v20.16b + urhadd v19.16b, v19.16b , v21.16b + ld1 {v13.2s}, [x0], x2 // Vector load from src[7_0] + uaddl v20.8h, v8.8b, v10.8b + uaddl v26.8h, v7.8b, v1.8b + st1 {v18.2s, v19.2s}, [x1], x3 // store row 1 + +//ROW_3 + + + uaddl v18.8h, v4.8b, v2.8b + + mla v18.8h, v20.8h , v28.8h + + uaddl v20.8h, v5.8b, v3.8b + + mla v20.8h, v24.8h , v28.8h + uaddl v24.8h, v16.8b, v17.8b + mls v18.8h, v22.8h , v30.8h + uaddl v22.8h, v14.8b, v13.8b + mls v20.8h, v26.8h , v30.8h + uaddl v26.8h, v15.8b, v12.8b + mla v22.8h, v24.8h , v28.8h + mls v22.8h, v26.8h , v30.8h + st1 {v18.4s}, [x9], #16 + st1 {v20.4s}, [x9], #16 + ext v24.16b, v18.16b , v20.16b , #4 + ext v26.16b, v18.16b , v20.16b , #6 + st1 {v22.4s}, [x9] + ext v22.16b, v18.16b , v20.16b , #10 + add v4.8h, v24.8h , v26.8h + ext v24.16b, v18.16b , v20.16b , #2 + ext v26.16b, v18.16b , v20.16b , #8 + add v24.8h, v24.8h , v26.8h + + saddl v26.4s, v18.4h, v22.4h + smlal v26.4s, v4.4h, v28.4h + smlsl v26.4s, v24.4h, v30.4h + + saddl2 v22.4s, v18.8h, v22.8h + smlal2 v22.4s, v4.8h, v28.8h + smlsl2 v22.4s, v24.8h, v30.8h + + sqrshrun v18.4h, v26.4s, #10 + sqrshrun v19.4h, v22.4s, #10 + ld1 {v22.4s}, [x9], #16 + + uqxtn v18.8b, v18.8h + uqxtn v19.8b, v19.8h + mov v18.2s[1], v19.2s[0] + + + ext v24.16b, v20.16b , v22.16b , #4 + ext v26.16b, v20.16b , v22.16b , #6 + ext v4.16b, v20.16b , v22.16b , #10 + st1 {v18.2s}, [x1] + add v18.8h, v24.8h , v26.8h + ext v24.16b, v20.16b , v22.16b , #2 + ext v26.16b, v20.16b , v22.16b , #8 + add v24.8h, v24.8h , v26.8h + + saddl v26.4s, v4.4h, v20.4h + smlal v26.4s, v18.4h, v28.4h + smlsl v26.4s, v24.4h, v30.4h + + saddl2 v22.4s, v4.8h, v20.8h + smlal2 v22.4s, v18.8h, v28.8h + smlsl2 v22.4s, v24.8h, v30.8h + + sqrshrun v19.4h, v26.4s, #10 + sqrshrun v18.4h, v22.4s, #10 + + uaddl v24.8h, v11.8b, v1.8b + ld1 {v20.4s}, [x6], #16 + ld1 {v22.4s}, [x6], x7 + + uqxtn v19.8b, v19.8h + uqxtn v18.8b, v18.8h + mov v19.2s[1], v18.2s[0] + + ld1 {v18.2s}, [x1] + sqrshrun v20.8b, v20.8h, #5 + sqrshrun v21.8b, v22.8h, #5 + + uaddl v22.8h, v8.8b, v2.8b + ld1 {v4.2s, v5.2s}, [x0], #16 // Vector load from src[8_0] + + urhadd v18.16b, v18.16b , v20.16b + urhadd v19.16b, v19.16b , v21.16b + ld1 {v14.2s}, [x0], x2 // Vector load from src[8_0] + uaddl v20.8h, v10.8b, v0.8b + uaddl v26.8h, v9.8b, v3.8b + st1 {v18.2s, v19.2s}, [x1], x3 // store row 2 + + +//ROW_4 + + uaddl v18.8h, v6.8b, v4.8b + + mla v18.8h, v20.8h , v28.8h + + uaddl v20.8h, v7.8b, v5.8b + + mla v20.8h, v24.8h , v28.8h + uaddl v24.8h, v17.8b, v12.8b + mls v18.8h, v22.8h , v30.8h + uaddl v22.8h, v15.8b, v14.8b + mls v20.8h, v26.8h , v30.8h + uaddl v26.8h, v16.8b, v13.8b + mla v22.8h, v24.8h , v28.8h + mls v22.8h, v26.8h , v30.8h + st1 {v18.4s}, [x9], #16 + st1 {v20.4s}, [x9], #16 + ext v24.16b, v18.16b , v20.16b , #4 + ext v26.16b, v18.16b , v20.16b , #6 + st1 {v22.4s}, [x9] + ext v22.16b, v18.16b , v20.16b , #10 + add v6.8h, v24.8h , v26.8h + ext v24.16b, v18.16b , v20.16b , #2 + ext v26.16b, v18.16b , v20.16b , #8 + add v24.8h, v24.8h , v26.8h + + saddl v26.4s, v18.4h, v22.4h + smlal v26.4s, v6.4h, v28.4h + smlsl v26.4s, v24.4h, v30.4h + + saddl2 v22.4s, v18.8h, v22.8h + smlal2 v22.4s, v6.8h, v28.8h + smlsl2 v22.4s, v24.8h, v30.8h + + sqrshrun v18.4h, v26.4s, #10 + sqrshrun v19.4h, v22.4s, #10 + ld1 {v22.4s}, [x9], #16 + uqxtn v18.8b, v18.8h + uqxtn v19.8b, v19.8h + mov v18.2s[1], v19.2s[0] + + + ext v24.16b, v20.16b , v22.16b , #4 + ext v26.16b, v20.16b , v22.16b , #6 + ext v6.16b, v20.16b , v22.16b , #10 + st1 {v18.2s}, [x1] + add v18.8h, v24.8h , v26.8h + ext v24.16b, v20.16b , v22.16b , #2 + ext v26.16b, v20.16b , v22.16b , #8 + add v24.8h, v24.8h , v26.8h + + saddl v26.4s, v6.4h, v20.4h + smlal v26.4s, v18.4h, v28.4h + smlsl v26.4s, v24.4h, v30.4h + + saddl2 v22.4s, v6.8h, v20.8h + smlal2 v22.4s, v18.8h, v28.8h + smlsl2 v22.4s, v24.8h, v30.8h + + mov v6.16b, v2.16b + mov v7.16b, v3.16b + + mov v2.16b, v10.16b + mov v3.16b, v11.16b + + subs x4, x4, #4 + sqrshrun v19.4h, v26.4s, #10 + sqrshrun v18.4h, v22.4s, #10 + mov v10.16b, v0.16b + mov v11.16b, v1.16b + + mov v24.8b, v14.8b + + mov v14.16b, v12.16b + mov v15.16b, v13.16b + + + uqxtn v19.8b, v19.8h + uqxtn v18.8b, v18.8h + mov v19.2s[1], v18.2s[0] + + ld1 {v20.4s}, [x6], #16 + ld1 {v22.4s}, [x6], x7 + ld1 {v18.2s}, [x1] + sqrshrun v20.8b, v20.8h, #5 + sqrshrun v21.8b, v22.8h, #5 + + mov v0.16b, v8.16b + mov v1.16b, v9.16b + + mov v8.16b, v4.16b + mov v9.16b, v5.16b + + mov v12.16b, v16.16b + mov v13.16b, v17.16b + urhadd v18.16b, v18.16b , v20.16b + urhadd v19.16b, v19.16b , v21.16b + + mov v4.16b, v10.16b + mov v5.16b, v11.16b + + mov v16.8b, v24.8b + st1 {v18.2s, v19.2s}, [x1], x3 // store row 3 + + bgt loop_16 // looping if height =16 + b end_func + +loop_8_start: + ld1 {v0.2s, v1.2s}, [x0], x2 // Vector load from src[0_0] + ld1 {v2.2s, v3.2s}, [x0], x2 // Vector load from src[1_0] + ld1 {v4.2s, v5.2s}, [x0], x2 // Vector load from src[2_0] + ld1 {v6.2s, v7.2s}, [x0], x2 // Vector load from src[3_0] + ld1 {v8.2s, v9.2s}, [x0], x2 // Vector load from src[4_0] + +loop_8: + + ld1 {v10.2s, v11.2s}, [x0], x2 // Vector load from src[5_0] + uaddl v14.8h, v4.8b, v6.8b + uaddl v12.8h, v0.8b, v10.8b + uaddl v16.8h, v2.8b, v8.8b + mla v12.8h, v14.8h , v26.8h + uaddl v18.8h, v5.8b, v7.8b + uaddl v14.8h, v1.8b, v11.8b + uaddl v22.8h, v3.8b, v9.8b + mla v14.8h, v18.8h , v26.8h + mls v12.8h, v16.8h , v24.8h + ld1 {v0.2s, v1.2s}, [x0], x2 // Vector load from src[6_0] + uaddl v16.8h, v6.8b, v8.8b + mls v14.8h, v22.8h , v24.8h + uaddl v28.8h, v2.8b, v0.8b + st1 {v12.4s}, [x9], #16 // store row 0 to temp buffer: col 0 + ext v22.16b, v12.16b , v14.16b , #10 + uaddl v18.8h, v4.8b, v10.8b + mla v28.8h, v16.8h , v26.8h + saddl v30.4s, v12.4h, v22.4h + st1 {v14.4s}, [x9], x7 // store row 0 to temp buffer: col 1 + saddl2 v22.4s, v12.8h, v22.8h + ext v16.16b, v12.16b , v14.16b , #4 + mls v28.8h, v18.8h , v24.8h + ext v18.16b, v12.16b , v14.16b , #6 + ext v20.16b, v12.16b , v14.16b , #8 + ext v14.16b, v12.16b , v14.16b , #2 + add v16.8h, v16.8h , v18.8h + add v18.8h, v14.8h , v20.8h + uaddl v20.8h, v7.8b, v9.8b + smlal v30.4s, v16.4h, v26.4h + smlsl v30.4s, v18.4h, v24.4h + smlal2 v22.4s, v16.8h, v26.8h + smlsl2 v22.4s, v18.8h, v24.8h + uaddl v14.8h, v3.8b, v1.8b + st1 {v28.4s}, [x9], #16 // store row 1 to temp buffer: col 0 + mla v14.8h, v20.8h , v26.8h + sqrshrun v12.4h, v30.4s, #10 + uaddl v16.8h, v5.8b, v11.8b + sqrshrun v13.4h, v22.4s, #10 + mls v14.8h, v16.8h , v24.8h + ld1 {v2.2s, v3.2s}, [x0], x2 // Vector load from src[7_0] + uqxtn v25.8b, v12.8h + uqxtn v13.8b, v13.8h + mov v25.2s[1], v13.2s[0] + uaddl v16.8h, v8.8b, v10.8b + + + ext v22.16b, v28.16b , v14.16b , #10 + uaddl v20.8h, v4.8b, v2.8b + saddl v30.4s, v28.4h, v22.4h + mla v20.8h, v16.8h , v26.8h + st1 {v14.4s}, [x9], x7 // store row 1 to temp buffer: col 1 + saddl2 v22.4s, v28.8h, v22.8h + ext v16.16b, v28.16b , v14.16b , #4 + ext v18.16b, v28.16b , v14.16b , #6 + ext v12.16b, v28.16b , v14.16b , #8 + ext v14.16b, v28.16b , v14.16b , #2 + add v16.8h, v16.8h , v18.8h + add v18.8h, v12.8h , v14.8h + ld1 {v14.4s, v15.4s}, [x6], x8 // load row 0 from temp buffer + smlal v30.4s, v16.4h, v26.4h + smlsl v30.4s, v18.4h, v24.4h + smlal2 v22.4s, v16.8h, v26.8h + smlsl2 v22.4s, v18.8h, v24.8h + sqrshrun v14.8b, v14.8h, #0x5 + ld1 {v28.4s, v29.4s}, [x6], x8 // load row 1 from temp buffer + uaddl v18.8h, v6.8b, v0.8b + sqrshrun v16.4h, v30.4s, #10 + sqrshrun v15.8b, v28.8h, #0x5 + sqrshrun v17.4h, v22.4s, #10 + + mov v12.8b, v25.8b + mov v25.8b, v24.8b + + uaddl v28.8h, v9.8b, v11.8b + uqxtn v13.8b, v16.8h + uqxtn v17.8b, v17.8h + mov v13.2s[1], v17.2s[0] + + urhadd v12.16b, v12.16b , v14.16b + urhadd v13.16b, v13.16b , v15.16b + uaddl v14.8h, v5.8b, v3.8b + uaddl v22.8h, v7.8b, v1.8b + mls v20.8h, v18.8h , v24.8h + st1 {v12.2s}, [x1], x3 // store row 0 + mla v14.8h, v28.8h , v26.8h + ld1 {v4.2s, v5.2s}, [x0], x2 // Vector load from src[8_0] + uaddl v30.8h, v10.8b, v0.8b + uaddl v28.8h, v6.8b, v4.8b + mls v14.8h, v22.8h , v24.8h + st1 {v13.2s}, [x1], x3 // store row 1 + mla v28.8h, v30.8h , v26.8h + st1 {v20.4s}, [x9], #16 // store row 2 to temp buffer: col 0 + ext v22.16b, v20.16b , v14.16b , #10 + saddl v30.4s, v20.4h, v22.4h + st1 {v14.2s, v15.2s}, [x9], x7 // store row 2 to temp buffer: col 0 + saddl2 v22.4s, v20.8h, v22.8h + ext v16.16b, v20.16b , v14.16b , #4 + ext v18.16b, v20.16b , v14.16b , #6 + ext v12.16b, v20.16b , v14.16b , #8 + ext v14.16b, v20.16b , v14.16b , #2 + add v16.8h, v16.8h , v18.8h + add v18.8h, v14.8h , v12.8h + uaddl v20.8h, v8.8b, v2.8b + smlal v30.4s, v16.4h, v26.4h + smlsl v30.4s, v18.4h, v24.4h + smlal2 v22.4s, v16.8h, v26.8h + smlsl2 v22.4s, v18.8h, v24.8h + uaddl v18.8h, v11.8b, v1.8b + uaddl v16.8h, v7.8b, v5.8b + sqrshrun v12.4h, v30.4s, #10 + uaddl v30.8h, v9.8b, v3.8b + mla v16.8h, v18.8h , v26.8h + sqrshrun v13.4h, v22.4s, #10 + mls v28.8h, v20.8h , v24.8h + ld1 {v14.4s, v15.4s}, [x6], x8 // load row 2 from temp buffer + mls v16.8h, v30.8h , v24.8h + uqxtn v27.8b, v12.8h + uqxtn v13.8b, v13.8h + mov v27.2s[1], v13.2s[0] + + sqrshrun v14.8b, v14.8h, #5 + ext v22.16b, v28.16b , v16.16b , #10 + st1 {v28.4s}, [x9], #16 // store row 3 to temp buffer: col 0 + saddl v30.4s, v28.4h, v22.4h + st1 {v16.2s, v17.2s}, [x9], x7 // store row 3 to temp buffer: col 1 + saddl2 v22.4s, v28.8h, v22.8h + ext v12.16b, v28.16b , v16.16b , #4 + ext v18.16b, v28.16b , v16.16b , #6 + ext v20.16b, v28.16b , v16.16b , #8 + ext v28.16b, v28.16b , v16.16b , #2 + add v12.8h, v12.8h , v18.8h + add v18.8h, v28.8h , v20.8h + ld1 {v16.4s, v17.4s}, [x6], x8 // load row 3 from temp buffer + smlal v30.4s, v12.4h, v26.4h + smlsl v30.4s, v18.4h, v24.4h + smlal2 v22.4s, v12.8h, v26.8h + smlsl2 v22.4s, v18.8h, v24.8h + sqrshrun v15.8b, v16.8h, #0x5 + + mov v12.8b, v27.8b + mov v27.8b, v26.8b + + sqrshrun v16.4h, v30.4s, #10 + + mov v6.16b, v2.16b + mov v7.16b, v3.16b + + sqrshrun v17.4h, v22.4s, #10 + + mov v2.16b, v10.16b + mov v3.16b, v11.16b + + mov v10.16b, v0.16b + mov v11.16b, v1.16b + + subs x4, x4, #4 + uqxtn v13.8b, v16.8h + uqxtn v17.8b, v17.8h + mov v13.2s[1], v17.2s[0] + urhadd v12.16b, v12.16b , v14.16b + urhadd v13.16b, v13.16b , v15.16b + + mov v0.16b, v8.16b + mov v1.16b, v9.16b + + mov v8.16b, v4.16b + mov v9.16b, v5.16b + + mov v4.16b, v10.16b + mov v5.16b, v11.16b + + st1 {v12.2s}, [x1], x3 // store row 2 + st1 {v13.2s}, [x1], x3 // store row 3 + + bgt loop_8 //if height =8 loop + b end_func + +loop_4_start: + ld1 {v0.2s, v1.2s}, [x0], x2 // Vector load from src[0_0] + ld1 {v2.2s, v3.2s}, [x0], x2 // Vector load from src[1_0] + ld1 {v4.2s, v5.2s}, [x0], x2 // Vector load from src[2_0] + ld1 {v6.2s, v7.2s}, [x0], x2 // Vector load from src[3_0] + ld1 {v8.2s, v9.2s}, [x0], x2 // Vector load from src[4_0] + +loop_4: + ld1 {v10.2s, v11.2s}, [x0], x2 // Vector load from src[5_0] + uaddl v14.8h, v4.8b, v6.8b // temp1 = src[2_0] + src[3_0] + uaddl v12.8h, v0.8b, v10.8b // temp = src[0_0] + src[5_0] + uaddl v16.8h, v2.8b, v8.8b // temp2 = src[1_0] + src[4_0] + mla v12.8h, v14.8h , v26.8h // temp += temp1 * 20 + uaddl v18.8h, v5.8b, v7.8b // temp1 = src[2_0] + src[3_0] + uaddl v14.8h, v1.8b, v11.8b // temp = src[0_0] + src[5_0] + uaddl v22.8h, v3.8b, v9.8b // temp2 = src[1_0] + src[4_0] + mla v14.8h, v18.8h , v26.8h // temp += temp1 * 20 + mls v12.8h, v16.8h , v24.8h // temp -= temp2 * 5 + ld1 {v0.2s, v1.2s}, [x0], x2 // Vector load from src[6_0] + uaddl v16.8h, v6.8b, v8.8b + mls v14.8h, v22.8h , v24.8h // temp -= temp2 * 5 + //Q6 and Q7 have filtered values + uaddl v28.8h, v2.8b, v0.8b + st1 {v12.4s}, [x9], #16 // store row 0 to temp buffer: col 0 + ext v22.16b, v12.16b , v14.16b , #10 + uaddl v18.8h, v4.8b, v10.8b + mla v28.8h, v16.8h , v26.8h + saddl v30.4s, v12.4h, v22.4h + st1 {v14.4s}, [x9], x7 // store row 0 to temp buffer: col 1 + saddl v22.4s, v13.4h, v23.4h + ext v16.16b, v12.16b , v14.16b , #4 + mls v28.8h, v18.8h , v24.8h + ext v18.16b, v12.16b , v14.16b , #6 + ext v20.16b, v12.16b , v14.16b , #8 + ext v14.16b, v12.16b , v14.16b , #2 + add v16.8h, v16.8h , v18.8h + add v18.8h, v14.8h , v20.8h + uaddl v20.8h, v7.8b, v9.8b + smlal v30.4s, v16.4h, v26.4h + smlsl v30.4s, v18.4h, v24.4h + smlal v22.4s, v17.4h, v26.4h + smlsl v22.4s, v19.4h, v24.4h + uaddl v14.8h, v3.8b, v1.8b + st1 {v28.4s}, [x9], #16 // store row 1 to temp buffer: col 0 + mla v14.8h, v20.8h , v26.8h + sqrshrun v12.4h, v30.4s, #10 + uaddl v16.8h, v5.8b, v11.8b + sqrshrun v13.4h, v22.4s, #10 + mls v14.8h, v16.8h , v24.8h + ld1 {v2.2s, v3.2s}, [x0], x2 // Vector load from src[7_0] + uqxtn v25.8b, v12.8h + uaddl v16.8h, v8.8b, v10.8b + + ext v22.16b, v28.16b , v14.16b , #10 + uaddl v20.8h, v4.8b, v2.8b + saddl v30.4s, v28.4h, v22.4h + mla v20.8h, v16.8h , v26.8h + st1 {v14.4s}, [x9], x7 // store row 1 to temp buffer: col 1 + saddl v22.4s, v29.4h, v23.4h + ext v16.16b, v28.16b , v14.16b , #4 + ext v18.16b, v28.16b , v14.16b , #6 + ext v12.16b, v28.16b , v14.16b , #8 + ext v14.16b, v28.16b , v14.16b , #2 + add v16.8h, v16.8h , v18.8h + add v18.8h, v12.8h , v14.8h + ld1 {v14.2s}, [x6], x8 //load row 0 from temp buffer + smlal v30.4s, v16.4h, v26.4h + smlsl v30.4s, v18.4h, v24.4h + smlal v22.4s, v17.4h, v26.4h + smlsl v22.4s, v19.4h, v24.4h + sqrshrun v14.8b, v14.8h, #0x5 + ld1 {v28.2s}, [x6], x8 //load row 1 from temp buffer + uaddl v18.8h, v6.8b, v0.8b + sqrshrun v16.4h, v30.4s, #10 + sqrshrun v15.8b, v28.8h, #0x5 + sqrshrun v17.4h, v22.4s, #10 + + mov v12.8b, v25.8b + mov v25.8b, v24.8b + + uaddl v28.8h, v9.8b, v11.8b + uqxtn v13.8b, v16.8h + + urhadd v12.16b, v12.16b , v14.16b + urhadd v13.16b, v13.16b , v15.16b + + uaddl v14.8h, v5.8b, v3.8b + uaddl v22.8h, v7.8b, v1.8b + mls v20.8h, v18.8h , v24.8h + st1 {v12.s}[0], [x1], x3 // store row 0 + mla v14.8h, v28.8h , v26.8h + ld1 {v4.2s, v5.2s}, [x0], x2 // Vector load from src[8_0] + uaddl v30.8h, v10.8b, v0.8b + uaddl v28.8h, v6.8b, v4.8b + mls v14.8h, v22.8h , v24.8h + st1 {v13.s}[0], [x1], x3 //store row 1 + mla v28.8h, v30.8h , v26.8h + st1 {v20.4s}, [x9], #16 // store row 2 to temp buffer: col 0 + ext v22.16b, v20.16b , v14.16b , #10 + saddl v30.4s, v20.4h, v22.4h + st1 {v14.4s}, [x9], x7 // store row 2 to temp buffer: col 1 + saddl v22.4s, v21.4h, v23.4h + ext v16.16b, v20.16b , v14.16b , #4 + ext v18.16b, v20.16b , v14.16b , #6 + ext v12.16b, v20.16b , v14.16b , #8 + ext v14.16b, v20.16b , v14.16b , #2 + add v16.8h, v16.8h , v18.8h + add v18.8h, v14.8h , v12.8h + uaddl v20.8h, v8.8b, v2.8b + smlal v30.4s, v16.4h, v26.4h + smlsl v30.4s, v18.4h, v24.4h + smlal v22.4s, v17.4h, v26.4h + smlsl v22.4s, v19.4h, v24.4h + uaddl v18.8h, v11.8b, v1.8b + uaddl v16.8h, v7.8b, v5.8b + sqrshrun v12.4h, v30.4s, #10 + uaddl v30.8h, v9.8b, v3.8b + mla v16.8h, v18.8h , v26.8h + sqrshrun v13.4h, v22.4s, #10 + mls v28.8h, v20.8h , v24.8h + ld1 {v14.2s}, [x6], x8 //load row 3 from temp buffer + mls v16.8h, v30.8h , v24.8h + uqxtn v27.8b, v12.8h + sqrshrun v14.8b, v14.8h, #5 + ext v22.16b, v28.16b , v16.16b , #10 + st1 {v28.4s}, [x9], #16 // store row 3 to temp buffer: col 0 + saddl v30.4s, v28.4h, v22.4h + st1 {v16.4s}, [x9], x7 // store row 3 to temp buffer: col 1 + saddl v22.4s, v29.4h, v23.4h + ext v12.16b, v28.16b , v16.16b , #4 + ext v18.16b, v28.16b , v16.16b , #6 + ext v20.16b, v28.16b , v16.16b , #8 + ext v28.16b, v28.16b , v16.16b , #2 + add v12.8h, v12.8h , v18.8h + add v18.8h, v28.8h , v20.8h + ld1 {v16.2s}, [x6], x8 //load row 4 from temp buffer + smlal v30.4s, v12.4h, v26.4h + smlsl v30.4s, v18.4h, v24.4h + smlal v22.4s, v13.4h, v26.4h + smlsl v22.4s, v19.4h, v24.4h + sqrshrun v15.8b, v16.8h, #0x5 + + mov v12.8b, v27.8b + mov v27.8b, v26.8b + + sqrshrun v16.4h, v30.4s, #10 + + mov v6.16b, v2.16b + mov v7.16b, v3.16b + + sqrshrun v17.4h, v22.4s, #10 + + mov v2.16b, v10.16b + mov v3.16b, v11.16b + + mov v10.16b, v0.16b + mov v11.16b, v1.16b + + subs x4, x4, #4 + uqxtn v13.8b, v16.8h + urhadd v12.16b, v12.16b , v14.16b + urhadd v13.16b, v13.16b , v15.16b + + mov v0.16b, v8.16b + mov v1.16b, v9.16b + + mov v8.16b, v4.16b + mov v9.16b, v5.16b + + + mov v4.16b, v10.16b + mov v5.16b, v11.16b + + + st1 {v12.s}[0], [x1], x3 // store row 2 + st1 {v13.s}[0], [x1], x3 // store row 3 + + bgt loop_4 + +end_func: + // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + + diff --git a/common/armv8/ih264_inter_pred_luma_horz_qpel_vert_qpel_av8.s b/common/armv8/ih264_inter_pred_luma_horz_qpel_vert_qpel_av8.s new file mode 100755 index 0000000..ab663d0 --- /dev/null +++ b/common/armv8/ih264_inter_pred_luma_horz_qpel_vert_qpel_av8.s @@ -0,0 +1,958 @@ +//****************************************************************************** +//* +//* Copyright (C) 2015 The Android Open Source Project +//* +//* Licensed under the Apache License, Version 2.0 (the "License"); +//* you may not use this file except in compliance with the License. +//* You may obtain a copy of the License at: +//* +//* http://www.apache.org/licenses/LICENSE-2.0 +//* +//* Unless required by applicable law or agreed to in writing, software +//* distributed under the License is distributed on an "AS IS" BASIS, +//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//* See the License for the specific language governing permissions and +//* limitations under the License. +//* +//***************************************************************************** +//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +//*/ +///** +//****************************************************************************** +//* @file +//* ih264_inter_pred_luma_horz_qpel_vert_qpel_av8.s +//* +//* @brief +//* Contains function definitions for inter prediction interpolation. +//* +//* @author +//* Mohit +//* +//* @par List of Functions: +//* +//* - ih264_inter_pred_luma_horz_qpel_vert_qpel_av8() +//* +//* @remarks +//* None +//* +//******************************************************************************* +//*/ + +///* All the functions here are replicated from ih264_inter_pred_filters.c +// + +///** +///** +///** +//******************************************************************************* +//* +//* @brief +//* This function implements two six tap filters. It +//* applies the six tap filter in the horizontal direction on the +//* predictor values, then applies the same filter in the +//* vertical direction on the predictor values. It then averages these +//* two outputs to obtain quarter pel values in horizontal and vertical direction. +//* The six tap filtering operation is described in sec 8.4.2.2.1 titled +//* "Luma sample interpolation process" +//* +//* @par Description: +//* This function is called to obtain pixels lying at the following +//* location (1/4,1/4) or (3/4,1/4) or (1/4,3/4) or (3/4,3/4). +//* The function interpolates the predictors first in the horizontal direction +//* and then in the vertical direction, and then averages these two +//* values. +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[out] pu1_dst +//* UWORD8 pointer to the destination +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride +//* +//* @param[in] ht +//* integer height of the array +//* +//* @param[in] wd +//* integer width of the array +//* +//* @param[in] pu1_tmp: temporary buffer +//* +//* @param[in] dydx: x and y reference offset for qpel calculations +//* +//* @returns +//* +//* @remarks +//* None +//* +//******************************************************************************* +//*/; + +//void ih264_inter_pred_luma_horz_qpel_vert_qpel(UWORD8 *pu1_src, +// UWORD8 *pu1_dst, +// WORD32 src_strd,, +// WORD32 dst_strd, +// WORD32 ht, +// WORD32 wd, +// UWORD8* pu1_tmp, +// UWORD32 dydx) + +//**************Variables Vs Registers***************************************** +// x0 => *pu1_src +// x1 => *pu1_dst +// x2 => src_strd +// x3 => dst_strd +// x4 => ht +// x5 => wd +// x6 => dydx + +.text +.p2align 2 +.include "ih264_neon_macros.s" + + + + .global ih264_inter_pred_luma_horz_qpel_vert_qpel_av8 + +ih264_inter_pred_luma_horz_qpel_vert_qpel_av8: + + push_v_regs + stp x19, x20, [sp, #-16]! + mov x6, x7 + and x7, x6, #3 + add x7, x0, x7, lsr #1 //pu1_pred_vert = pu1_src + (x_offset>>1) + + and x6, x6, #12 //Finds y-offset + lsr x6, x6, #3 //dydx>>3 + mul x6, x2, x6 + add x6, x0, x6 //pu1_pred_horz = pu1_src + (y_offset>>1)*src_strd + sub x7, x7, x2, lsl #1 //pu1_pred_vert-2*src_strd + sub x6, x6, #2 //pu1_pred_horz-2 + movi v30.8b, #20 // Filter coeff 20 + movi v31.8b, #5 // Filter coeff 5 + + subs x12, x5, #4 //if wd=4 branch to loop_4 + beq loop_4_start + subs x12, x5, #8 //if wd=8 branch to loop_8 + beq loop_8_start + + ld1 {v0.2s, v1.2s}, [x7], x2 // Vector load from src[0_0] + ld1 {v2.2s, v3.2s}, [x7], x2 // Vector load from src[1_0] + + ld1 {v4.2s, v5.2s}, [x7], x2 // Vector load from src[2_0] + ld1 {v6.2s, v7.2s}, [x7], x2 // Vector load from src[3_0] + ld1 {v8.2s, v9.2s}, [x7], x2 // Vector load from src[4_0] + add x11, x6, #8 +loop_16: + ld1 {v10.2s, v11.2s}, [x7], x2 // Vector load from src[5_0] + ld1 {v18.2s, v19.2s}, [x6], x2 // horz row0, col 0 + uaddl v24.8h, v0.8b, v10.8b + umlal v24.8h, v4.8b, v30.8b + umlal v24.8h, v6.8b, v30.8b + umlsl v24.8h, v2.8b, v31.8b + umlsl v24.8h, v8.8b, v31.8b + ext v23.8b, v18.8b , v19.8b , #5 + ext v20.8b, v18.8b , v19.8b , #2 + ext v21.8b, v18.8b , v19.8b , #3 + ext v22.8b, v18.8b , v19.8b , #4 + ext v19.8b, v18.8b , v19.8b , #1 + sqrshrun v26.8b, v24.8h, #5 + uaddl v28.8h, v18.8b, v23.8b + umlal v28.8h, v20.8b, v30.8b + umlal v28.8h, v21.8b, v30.8b + umlsl v28.8h, v19.8b, v31.8b + umlsl v28.8h, v22.8b, v31.8b + ld1 {v18.2s, v19.2s}, [x11], x2 // horz row 0, col 1 + uaddl v24.8h, v1.8b, v11.8b + umlal v24.8h, v5.8b, v30.8b + umlal v24.8h, v7.8b, v30.8b + umlsl v24.8h, v3.8b, v31.8b + umlsl v24.8h, v9.8b, v31.8b + sqrshrun v28.8b, v28.8h, #5 + ext v23.8b, v18.8b , v19.8b , #5 + ext v20.8b, v18.8b , v19.8b , #2 + ext v21.8b, v18.8b , v19.8b , #3 + ext v22.8b, v18.8b , v19.8b , #4 + ext v19.8b, v18.8b , v19.8b , #1 + + sqrshrun v27.8b, v24.8h, #5 + ld1 {v12.2s, v13.2s}, [x7], x2 // src[6_0] + + uaddl v24.8h, v18.8b, v23.8b + umlal v24.8h, v20.8b, v30.8b + umlal v24.8h, v21.8b, v30.8b + umlsl v24.8h, v19.8b, v31.8b + umlsl v24.8h, v22.8b, v31.8b + + uaddl v16.8h, v2.8b, v12.8b + umlal v16.8h, v6.8b, v30.8b + umlal v16.8h, v8.8b, v30.8b + umlsl v16.8h, v4.8b, v31.8b + umlsl v16.8h, v10.8b, v31.8b + + sqrshrun v29.8b, v24.8h, #5 + ld1 {v18.2s, v19.2s}, [x6], x2 // horz row 1, col 0 + + uaddl v24.8h, v3.8b, v13.8b + umlal v24.8h, v7.8b, v30.8b + umlal v24.8h, v9.8b, v30.8b + umlsl v24.8h, v5.8b, v31.8b + umlsl v24.8h, v11.8b, v31.8b + urhadd v28.16b, v28.16b , v26.16b + urhadd v29.16b, v29.16b , v27.16b + sqrshrun v26.8b, v16.8h, #5 + ext v23.8b, v18.8b , v19.8b , #5 + ext v20.8b, v18.8b , v19.8b , #2 + ext v21.8b, v18.8b , v19.8b , #3 + ext v22.8b, v18.8b , v19.8b , #4 + st1 {v28.2s, v29.2s}, [x1], x3 // store row 0 + ext v19.8b, v18.8b , v19.8b , #1 + + sqrshrun v27.8b, v24.8h, #5 + + uaddl v28.8h, v18.8b, v23.8b + umlal v28.8h, v20.8b, v30.8b + umlal v28.8h, v21.8b, v30.8b + umlsl v28.8h, v19.8b, v31.8b + umlsl v28.8h, v22.8b, v31.8b + + ld1 {v18.2s, v19.2s}, [x11], x2 // horz row 1, col 1 + ld1 {v14.2s, v15.2s}, [x7], x2 // src[7_0] + + ext v23.8b, v18.8b , v19.8b , #5 + ext v20.8b, v18.8b , v19.8b , #2 + ext v21.8b, v18.8b , v19.8b , #3 + ext v22.8b, v18.8b , v19.8b , #4 + ext v19.8b, v18.8b , v19.8b , #1 + + sqrshrun v28.8b, v28.8h, #5 + uaddl v24.8h, v18.8b, v23.8b + umlal v24.8h, v20.8b, v30.8b + umlal v24.8h, v21.8b, v30.8b + umlsl v24.8h, v19.8b, v31.8b + umlsl v24.8h, v22.8b, v31.8b + + ld1 {v18.2s, v19.2s}, [x6], x2 // horz row 2, col 0 + uaddl v16.8h, v4.8b, v14.8b + umlal v16.8h, v8.8b, v30.8b + umlal v16.8h, v10.8b, v30.8b + umlsl v16.8h, v6.8b, v31.8b + umlsl v16.8h, v12.8b, v31.8b + + sqrshrun v29.8b, v24.8h, #5 + ext v23.8b, v18.8b , v19.8b , #5 + ext v20.8b, v18.8b , v19.8b , #2 + ext v21.8b, v18.8b , v19.8b , #3 + ext v22.8b, v18.8b , v19.8b , #4 + ext v19.8b, v18.8b , v19.8b , #1 + urhadd v28.16b, v28.16b , v26.16b + urhadd v29.16b, v29.16b , v27.16b + sqrshrun v26.8b, v16.8h, #5 + + uaddl v24.8h, v5.8b, v15.8b + umlal v24.8h, v9.8b, v30.8b + umlal v24.8h, v11.8b, v30.8b + umlsl v24.8h, v7.8b, v31.8b + umlsl v24.8h, v13.8b, v31.8b + + st1 {v28.2s, v29.2s}, [x1], x3 // store row 1 + + uaddl v28.8h, v18.8b, v23.8b + umlal v28.8h, v20.8b, v30.8b + umlal v28.8h, v21.8b, v30.8b + umlsl v28.8h, v19.8b, v31.8b + umlsl v28.8h, v22.8b, v31.8b + + ld1 {v18.2s, v19.2s}, [x11], x2 // horz row 2, col 1 + sqrshrun v27.8b, v24.8h, #5 + ext v23.8b, v18.8b , v19.8b , #5 + ext v20.8b, v18.8b , v19.8b , #2 + ext v21.8b, v18.8b , v19.8b , #3 + ext v22.8b, v18.8b , v19.8b , #4 + ext v19.8b, v18.8b , v19.8b , #1 + + sqrshrun v28.8b, v28.8h, #5 + ld1 {v16.2s, v17.2s}, [x7], x2 // src[8_0] + uaddl v24.8h, v18.8b, v23.8b + umlal v24.8h, v20.8b, v30.8b + umlal v24.8h, v21.8b, v30.8b + umlsl v24.8h, v19.8b, v31.8b + umlsl v24.8h, v22.8b, v31.8b + + ld1 {v18.2s, v19.2s}, [x6], x2 // horz row 3, col 0 + uaddl v0.8h, v6.8b, v16.8b + umlal v0.8h, v10.8b, v30.8b + umlal v0.8h, v12.8b, v30.8b + umlsl v0.8h, v8.8b, v31.8b + umlsl v0.8h, v14.8b, v31.8b + + sqrshrun v29.8b, v24.8h, #5 + + ext v23.8b, v18.8b , v19.8b , #5 + ext v20.8b, v18.8b , v19.8b , #2 + ext v21.8b, v18.8b , v19.8b , #3 + urhadd v28.16b, v28.16b , v26.16b + urhadd v29.16b, v29.16b , v27.16b + ext v22.8b, v18.8b , v19.8b , #4 + ext v19.8b, v18.8b , v19.8b , #1 + sqrshrun v26.8b, v0.8h, #5 + st1 {v28.2s, v29.2s}, [x1], x3 // store row 2 + + uaddl v24.8h, v18.8b, v23.8b + umlal v24.8h, v20.8b, v30.8b + umlal v24.8h, v21.8b, v30.8b + umlsl v24.8h, v19.8b, v31.8b + umlsl v24.8h, v22.8b, v31.8b + + ld1 {v18.2s, v19.2s}, [x11], x2 // horz row 3, col 1 + + uaddl v0.8h, v7.8b, v17.8b + umlal v0.8h, v11.8b, v30.8b + umlal v0.8h, v13.8b, v30.8b + umlsl v0.8h, v9.8b, v31.8b + umlsl v0.8h, v15.8b, v31.8b + + sqrshrun v28.8b, v24.8h, #5 + + ext v23.8b, v18.8b , v19.8b , #5 + ext v20.8b, v18.8b , v19.8b , #2 + ext v21.8b, v18.8b , v19.8b , #3 + ext v22.8b, v18.8b , v19.8b , #4 + ext v19.8b, v18.8b , v19.8b , #1 + + sqrshrun v27.8b, v0.8h, #5 + + uaddl v24.8h, v18.8b, v23.8b + umlal v24.8h, v20.8b, v30.8b + umlal v24.8h, v21.8b, v30.8b + umlsl v24.8h, v19.8b, v31.8b + umlsl v24.8h, v22.8b, v31.8b + + mov v0.16b, v8.16b + mov v1.16b, v9.16b + + mov v2.16b, v10.16b + mov v3.16b, v11.16b + + mov v4.16b, v12.16b + mov v5.16b, v13.16b + + mov v6.16b, v14.16b + mov v7.16b, v15.16b + + mov v8.16b, v16.16b + mov v9.16b, v17.16b + + sqrshrun v29.8b, v24.8h, #5 + urhadd v28.16b, v28.16b , v26.16b + urhadd v29.16b, v29.16b , v27.16b + st1 {v28.2s, v29.2s}, [x1], x3 // store row 3 + + ld1 {v10.2s, v11.2s}, [x7], x2 // Vector load from src[9_0] + ld1 {v18.2s, v19.2s}, [x6], x2 // horz row4, col 0 + uaddl v24.8h, v0.8b, v10.8b + umlal v24.8h, v4.8b, v30.8b + umlal v24.8h, v6.8b, v30.8b + umlsl v24.8h, v2.8b, v31.8b + umlsl v24.8h, v8.8b, v31.8b + ext v23.8b, v18.8b , v19.8b , #5 + ext v20.8b, v18.8b , v19.8b , #2 + ext v21.8b, v18.8b , v19.8b , #3 + ext v22.8b, v18.8b , v19.8b , #4 + ext v19.8b, v18.8b , v19.8b , #1 + sqrshrun v26.8b, v24.8h, #5 + uaddl v28.8h, v18.8b, v23.8b + umlal v28.8h, v20.8b, v30.8b + umlal v28.8h, v21.8b, v30.8b + umlsl v28.8h, v19.8b, v31.8b + umlsl v28.8h, v22.8b, v31.8b + ld1 {v18.2s, v19.2s}, [x11], x2 // horz row 4, col 1 + uaddl v24.8h, v1.8b, v11.8b + umlal v24.8h, v5.8b, v30.8b + umlal v24.8h, v7.8b, v30.8b + umlsl v24.8h, v3.8b, v31.8b + umlsl v24.8h, v9.8b, v31.8b + sqrshrun v28.8b, v28.8h, #5 + ext v23.8b, v18.8b , v19.8b , #5 + ext v20.8b, v18.8b , v19.8b , #2 + ext v21.8b, v18.8b , v19.8b , #3 + ext v22.8b, v18.8b , v19.8b , #4 + ext v19.8b, v18.8b , v19.8b , #1 + + sqrshrun v27.8b, v24.8h, #5 + ld1 {v12.2s, v13.2s}, [x7], x2 // src[10_0] + uaddl v24.8h, v18.8b, v23.8b + umlal v24.8h, v20.8b, v30.8b + umlal v24.8h, v21.8b, v30.8b + umlsl v24.8h, v19.8b, v31.8b + umlsl v24.8h, v22.8b, v31.8b + uaddl v16.8h, v2.8b, v12.8b + umlal v16.8h, v6.8b, v30.8b + umlal v16.8h, v8.8b, v30.8b + umlsl v16.8h, v4.8b, v31.8b + umlsl v16.8h, v10.8b, v31.8b + sqrshrun v29.8b, v24.8h, #5 + ld1 {v18.2s, v19.2s}, [x6], x2 // horz row 5, col 0 + uaddl v24.8h, v3.8b, v13.8b + umlal v24.8h, v7.8b, v30.8b + umlal v24.8h, v9.8b, v30.8b + umlsl v24.8h, v5.8b, v31.8b + umlsl v24.8h, v11.8b, v31.8b + urhadd v28.16b, v28.16b , v26.16b + urhadd v29.16b, v29.16b , v27.16b + sqrshrun v26.8b, v16.8h, #5 + ext v23.8b, v18.8b , v19.8b , #5 + ext v20.8b, v18.8b , v19.8b , #2 + ext v21.8b, v18.8b , v19.8b , #3 + ext v22.8b, v18.8b , v19.8b , #4 + st1 {v28.2s, v29.2s}, [x1], x3 // store row 4 + ext v19.8b, v18.8b , v19.8b , #1 + + sqrshrun v27.8b, v24.8h, #5 + + uaddl v28.8h, v18.8b, v23.8b + umlal v28.8h, v20.8b, v30.8b + umlal v28.8h, v21.8b, v30.8b + umlsl v28.8h, v19.8b, v31.8b + umlsl v28.8h, v22.8b, v31.8b + + ld1 {v18.2s, v19.2s}, [x11], x2 // horz row 5, col 1 + ld1 {v14.2s, v15.2s}, [x7], x2 // src[11_0] + + ext v23.8b, v18.8b , v19.8b , #5 + ext v20.8b, v18.8b , v19.8b , #2 + ext v21.8b, v18.8b , v19.8b , #3 + ext v22.8b, v18.8b , v19.8b , #4 + ext v19.8b, v18.8b , v19.8b , #1 + + sqrshrun v28.8b, v28.8h, #5 + uaddl v24.8h, v18.8b, v23.8b + umlal v24.8h, v20.8b, v30.8b + umlal v24.8h, v21.8b, v30.8b + umlsl v24.8h, v19.8b, v31.8b + umlsl v24.8h, v22.8b, v31.8b + + ld1 {v18.2s, v19.2s}, [x6], x2 // horz row 6, col 0 + uaddl v16.8h, v4.8b, v14.8b + umlal v16.8h, v8.8b, v30.8b + umlal v16.8h, v10.8b, v30.8b + umlsl v16.8h, v6.8b, v31.8b + umlsl v16.8h, v12.8b, v31.8b + + sqrshrun v29.8b, v24.8h, #5 + ext v23.8b, v18.8b , v19.8b , #5 + ext v20.8b, v18.8b , v19.8b , #2 + ext v21.8b, v18.8b , v19.8b , #3 + ext v22.8b, v18.8b , v19.8b , #4 + ext v19.8b, v18.8b , v19.8b , #1 + urhadd v28.16b, v28.16b , v26.16b + urhadd v29.16b, v29.16b , v27.16b + sqrshrun v26.8b, v16.8h, #5 + + uaddl v24.8h, v5.8b, v15.8b + umlal v24.8h, v9.8b, v30.8b + umlal v24.8h, v11.8b, v30.8b + umlsl v24.8h, v7.8b, v31.8b + umlsl v24.8h, v13.8b, v31.8b + + st1 {v28.2s, v29.2s}, [x1], x3 // store row 5 + + uaddl v28.8h, v18.8b, v23.8b + umlal v28.8h, v20.8b, v30.8b + umlal v28.8h, v21.8b, v30.8b + umlsl v28.8h, v19.8b, v31.8b + umlsl v28.8h, v22.8b, v31.8b + + ld1 {v18.2s, v19.2s}, [x11], x2 // horz row 6, col 1 + sqrshrun v27.8b, v24.8h, #5 + ext v23.8b, v18.8b , v19.8b , #5 + ext v20.8b, v18.8b , v19.8b , #2 + ext v21.8b, v18.8b , v19.8b , #3 + ext v22.8b, v18.8b , v19.8b , #4 + ext v19.8b, v18.8b , v19.8b , #1 + + sqrshrun v28.8b, v28.8h, #5 + ld1 {v16.2s, v17.2s}, [x7], x2 // src[12_0] + uaddl v24.8h, v18.8b, v23.8b + umlal v24.8h, v20.8b, v30.8b + umlal v24.8h, v21.8b, v30.8b + umlsl v24.8h, v19.8b, v31.8b + umlsl v24.8h, v22.8b, v31.8b + + ld1 {v18.2s, v19.2s}, [x6], x2 // horz row 7, col 0 + uaddl v0.8h, v6.8b, v16.8b + umlal v0.8h, v10.8b, v30.8b + umlal v0.8h, v12.8b, v30.8b + umlsl v0.8h, v8.8b, v31.8b + umlsl v0.8h, v14.8b, v31.8b + + sqrshrun v29.8b, v24.8h, #5 + + ext v23.8b, v18.8b , v19.8b , #5 + ext v20.8b, v18.8b , v19.8b , #2 + ext v21.8b, v18.8b , v19.8b , #3 + urhadd v28.16b, v28.16b , v26.16b + urhadd v29.16b, v29.16b , v27.16b + ext v22.8b, v18.8b , v19.8b , #4 + ext v19.8b, v18.8b , v19.8b , #1 + sqrshrun v26.8b, v0.8h, #5 + st1 {v28.2s, v29.2s}, [x1], x3 // store row 6 + + uaddl v24.8h, v18.8b, v23.8b + umlal v24.8h, v20.8b, v30.8b + umlal v24.8h, v21.8b, v30.8b + umlsl v24.8h, v19.8b, v31.8b + umlsl v24.8h, v22.8b, v31.8b + + ld1 {v18.2s, v19.2s}, [x11], x2 // horz row 7, col 1 + + uaddl v0.8h, v7.8b, v17.8b + umlal v0.8h, v11.8b, v30.8b + umlal v0.8h, v13.8b, v30.8b + umlsl v0.8h, v9.8b, v31.8b + umlsl v0.8h, v15.8b, v31.8b + + sqrshrun v28.8b, v24.8h, #5 + + ext v23.8b, v18.8b , v19.8b , #5 + ext v20.8b, v18.8b , v19.8b , #2 + ext v21.8b, v18.8b , v19.8b , #3 + ext v22.8b, v18.8b , v19.8b , #4 + ext v19.8b, v18.8b , v19.8b , #1 + + sqrshrun v27.8b, v0.8h, #5 + + uaddl v24.8h, v18.8b, v23.8b + umlal v24.8h, v20.8b, v30.8b + umlal v24.8h, v21.8b, v30.8b + umlsl v24.8h, v19.8b, v31.8b + umlsl v24.8h, v22.8b, v31.8b + + mov v0.16b, v8.16b + mov v1.16b, v9.16b + + mov v2.16b, v10.16b + mov v3.16b, v11.16b + + mov v4.16b, v12.16b + mov v5.16b, v13.16b + + mov v6.16b, v14.16b + mov v7.16b, v15.16b + + mov v8.16b, v16.16b + mov v9.16b, v17.16b + + sqrshrun v29.8b, v24.8h, #5 + subs x4, x4, #8 + urhadd v28.16b, v28.16b , v26.16b + urhadd v29.16b, v29.16b , v27.16b + st1 {v28.2s, v29.2s}, [x1], x3 // store row 7 + + beq end_func // stop looping if ht == 8 + b loop_16 + + +loop_8_start: + ld1 {v0.2s}, [x7], x2 // Vector load from src[0_0] + ld1 {v1.2s}, [x7], x2 // Vector load from src[1_0] + ld1 {v2.2s}, [x7], x2 // Vector load from src[2_0] + ld1 {v3.2s}, [x7], x2 // Vector load from src[3_0] + ld1 {v4.2s}, [x7], x2 // Vector load from src[4_0] + +loop_8: + ld1 {v5.2s}, [x7], x2 // Vector load from src[5_0] + uaddl v10.8h, v0.8b, v5.8b + umlal v10.8h, v2.8b, v30.8b + umlal v10.8h, v3.8b, v30.8b + umlsl v10.8h, v1.8b, v31.8b + umlsl v10.8h, v4.8b, v31.8b + ld1 {v12.2s, v13.2s}, [x6], x2 //horz row 0 + ext v17.8b, v12.8b , v13.8b , #5 + ext v14.8b, v12.8b , v13.8b , #2 + ext v15.8b, v12.8b , v13.8b , #3 + ext v16.8b, v12.8b , v13.8b , #4 + ext v13.8b, v12.8b , v13.8b , #1 + sqrshrun v26.8b, v10.8h, #5 + ld1 {v6.2s}, [x7], x2 // src[6_0] + uaddl v10.8h, v12.8b, v17.8b + umlal v10.8h, v14.8b, v30.8b + umlal v10.8h, v15.8b, v30.8b + umlsl v10.8h, v13.8b, v31.8b + umlsl v10.8h, v16.8b, v31.8b + ld1 {v12.2s, v13.2s}, [x6], x2 // horz row 1 + uaddl v18.8h, v1.8b, v6.8b + umlal v18.8h, v3.8b, v30.8b + umlal v18.8h, v4.8b, v30.8b + umlsl v18.8h, v2.8b, v31.8b + umlsl v18.8h, v5.8b, v31.8b + sqrshrun v28.8b, v10.8h, #5 + ext v17.8b, v12.8b , v13.8b , #5 + ext v14.8b, v12.8b , v13.8b , #2 + ext v15.8b, v12.8b , v13.8b , #3 + ext v16.8b, v12.8b , v13.8b , #4 + ext v13.8b, v12.8b , v13.8b , #1 + sqrshrun v27.8b, v18.8h, #5 + ld1 {v7.2s}, [x7], x2 // src[7_0] + uaddl v10.8h, v12.8b, v17.8b + umlal v10.8h, v14.8b, v30.8b + umlal v10.8h, v15.8b, v30.8b + umlsl v10.8h, v13.8b, v31.8b + umlsl v10.8h, v16.8b, v31.8b + ld1 {v12.2s, v13.2s}, [x6], x2 // horz row 2 + uaddl v18.8h, v2.8b, v7.8b + umlal v18.8h, v4.8b, v30.8b + umlal v18.8h, v5.8b, v30.8b + umlsl v18.8h, v3.8b, v31.8b + umlsl v18.8h, v6.8b, v31.8b + sqrshrun v29.8b, v10.8h, #5 + ext v17.8b, v12.8b , v13.8b , #5 + ext v14.8b, v12.8b , v13.8b , #2 + ext v15.8b, v12.8b , v13.8b , #3 + ext v16.8b, v12.8b , v13.8b , #4 + ext v13.8b, v12.8b , v13.8b , #1 + urhadd v26.16b, v26.16b , v28.16b + urhadd v27.16b, v27.16b , v29.16b + sqrshrun v28.8b, v18.8h, #5 + ld1 {v8.2s}, [x7], x2 // src[8_0] + uaddl v10.8h, v12.8b, v17.8b + umlal v10.8h, v14.8b, v30.8b + umlal v10.8h, v15.8b, v30.8b + umlsl v10.8h, v13.8b, v31.8b + umlsl v10.8h, v16.8b, v31.8b + ld1 {v12.2s, v13.2s}, [x6], x2 // horz row 3 + uaddl v18.8h, v3.8b, v8.8b + umlal v18.8h, v5.8b, v30.8b + umlal v18.8h, v6.8b, v30.8b + umlsl v18.8h, v4.8b, v31.8b + umlsl v18.8h, v7.8b, v31.8b + sqrshrun v24.8b, v10.8h, #5 + ext v17.8b, v12.8b , v13.8b , #5 + ext v14.8b, v12.8b , v13.8b , #2 + ext v15.8b, v12.8b , v13.8b , #3 + ext v16.8b, v12.8b , v13.8b , #4 + ext v13.8b, v12.8b , v13.8b , #1 + sqrshrun v29.8b, v18.8h, #5 + uaddl v10.8h, v12.8b, v17.8b + umlal v10.8h, v14.8b, v30.8b + umlal v10.8h, v15.8b, v30.8b + umlsl v10.8h, v13.8b, v31.8b + umlsl v10.8h, v16.8b, v31.8b + st1 {v26.2s}, [x1], x3 + + mov v0.16b, v4.16b + mov v1.16b, v5.16b + + st1 {v27.2s}, [x1], x3 + + mov v2.16b, v6.16b + mov v3.16b, v7.16b + + mov v4.8b, v8.8b + + sqrshrun v25.8b, v10.8h, #5 + subs x9, x4, #4 + urhadd v24.16b, v24.16b , v28.16b + urhadd v25.16b, v25.16b , v29.16b + st1 {v24.2s}, [x1], x3 + st1 {v25.2s}, [x1], x3 + beq end_func // Branch if height==4 + + ld1 {v5.2s}, [x7], x2 // Vector load from src[9_0] + uaddl v10.8h, v0.8b, v5.8b + umlal v10.8h, v2.8b, v30.8b + umlal v10.8h, v3.8b, v30.8b + umlsl v10.8h, v1.8b, v31.8b + umlsl v10.8h, v4.8b, v31.8b + ld1 {v12.2s, v13.2s}, [x6], x2 //horz row 4 + ext v17.8b, v12.8b , v13.8b , #5 + ext v14.8b, v12.8b , v13.8b , #2 + ext v15.8b, v12.8b , v13.8b , #3 + ext v16.8b, v12.8b , v13.8b , #4 + ext v13.8b, v12.8b , v13.8b , #1 + sqrshrun v26.8b, v10.8h, #5 + ld1 {v6.2s}, [x7], x2 // src[10_0] + uaddl v10.8h, v12.8b, v17.8b + umlal v10.8h, v14.8b, v30.8b + umlal v10.8h, v15.8b, v30.8b + umlsl v10.8h, v13.8b, v31.8b + umlsl v10.8h, v16.8b, v31.8b + ld1 {v12.2s, v13.2s}, [x6], x2 // horz row 5 + uaddl v18.8h, v1.8b, v6.8b + umlal v18.8h, v3.8b, v30.8b + umlal v18.8h, v4.8b, v30.8b + umlsl v18.8h, v2.8b, v31.8b + umlsl v18.8h, v5.8b, v31.8b + sqrshrun v28.8b, v10.8h, #5 + ext v17.8b, v12.8b , v13.8b , #5 + ext v14.8b, v12.8b , v13.8b , #2 + ext v15.8b, v12.8b , v13.8b , #3 + ext v16.8b, v12.8b , v13.8b , #4 + ext v13.8b, v12.8b , v13.8b , #1 + sqrshrun v27.8b, v18.8h, #5 + ld1 {v7.2s}, [x7], x2 // src[11_0] + uaddl v10.8h, v12.8b, v17.8b + umlal v10.8h, v14.8b, v30.8b + umlal v10.8h, v15.8b, v30.8b + umlsl v10.8h, v13.8b, v31.8b + umlsl v10.8h, v16.8b, v31.8b + ld1 {v12.2s, v13.2s}, [x6], x2 // horz row 6 + uaddl v18.8h, v2.8b, v7.8b + umlal v18.8h, v4.8b, v30.8b + umlal v18.8h, v5.8b, v30.8b + umlsl v18.8h, v3.8b, v31.8b + umlsl v18.8h, v6.8b, v31.8b + sqrshrun v29.8b, v10.8h, #5 + ext v17.8b, v12.8b , v13.8b , #5 + ext v14.8b, v12.8b , v13.8b , #2 + ext v15.8b, v12.8b , v13.8b , #3 + ext v16.8b, v12.8b , v13.8b , #4 + ext v13.8b, v12.8b , v13.8b , #1 + urhadd v26.16b, v26.16b , v28.16b + urhadd v27.16b, v27.16b , v29.16b + sqrshrun v28.8b, v18.8h, #5 + ld1 {v8.2s}, [x7], x2 // src[12_0] + uaddl v10.8h, v12.8b, v17.8b + umlal v10.8h, v14.8b, v30.8b + umlal v10.8h, v15.8b, v30.8b + umlsl v10.8h, v13.8b, v31.8b + umlsl v10.8h, v16.8b, v31.8b + ld1 {v12.2s, v13.2s}, [x6], x2 // horz row 7 + uaddl v18.8h, v3.8b, v8.8b + umlal v18.8h, v5.8b, v30.8b + umlal v18.8h, v6.8b, v30.8b + umlsl v18.8h, v4.8b, v31.8b + umlsl v18.8h, v7.8b, v31.8b + sqrshrun v24.8b, v10.8h, #5 + ext v17.8b, v12.8b , v13.8b , #5 + ext v14.8b, v12.8b , v13.8b , #2 + ext v15.8b, v12.8b , v13.8b , #3 + ext v16.8b, v12.8b , v13.8b , #4 + ext v13.8b, v12.8b , v13.8b , #1 + sqrshrun v29.8b, v18.8h, #5 + uaddl v10.8h, v12.8b, v17.8b + umlal v10.8h, v14.8b, v30.8b + umlal v10.8h, v15.8b, v30.8b + umlsl v10.8h, v13.8b, v31.8b + umlsl v10.8h, v16.8b, v31.8b + st1 {v26.2s}, [x1], x3 + + mov v0.16b, v4.16b + mov v1.16b, v5.16b + st1 {v27.2s}, [x1], x3 + + mov v2.16b, v6.16b + mov v3.16b, v7.16b + + mov v4.8b, v8.8b + mov v5.8b, v9.8b + + sqrshrun v25.8b, v10.8h, #5 + subs x4, x4, #8 + urhadd v24.16b, v24.16b , v28.16b + urhadd v25.16b, v25.16b , v29.16b + st1 {v24.2s}, [x1], x3 + st1 {v25.2s}, [x1], x3 + bgt loop_8 //if height =8 loop + b end_func + +loop_4_start: + ld1 {v0.s}[0], [x7], x2 // Vector load from src[0_0] + ld1 {v1.s}[0], [x7], x2 // Vector load from src[1_0] + + ld1 {v2.s}[0], [x7], x2 // Vector load from src[2_0] + ld1 {v3.s}[0], [x7], x2 // Vector load from src[3_0] + ld1 {v4.s}[0], [x7], x2 // Vector load from src[4_0] + + ld1 {v5.s}[0], [x7], x2 // Vector load from src[5_0] + uaddl v10.8h, v0.8b, v5.8b + umlal v10.8h, v2.8b, v30.8b + umlal v10.8h, v3.8b, v30.8b + umlsl v10.8h, v1.8b, v31.8b + umlsl v10.8h, v4.8b, v31.8b + ld1 {v12.2s, v13.2s}, [x6], x2 //load for horz filter row 0 + ext v17.8b, v12.8b , v13.8b , #5 + ext v14.8b, v12.8b , v13.8b , #2 + ext v15.8b, v12.8b , v13.8b , #3 + ext v16.8b, v12.8b , v13.8b , #4 + ext v13.8b, v12.8b , v13.8b , #1 + sqrshrun v26.8b, v10.8h, #5 + ld1 {v6.s}[0], [x7], x2 // Vector load from src[6_0] + uaddl v10.8h, v12.8b, v17.8b + umlal v10.8h, v14.8b, v30.8b + umlal v10.8h, v15.8b, v30.8b + umlsl v10.8h, v13.8b, v31.8b + umlsl v10.8h, v16.8b, v31.8b + ld1 {v12.2s, v13.2s}, [x6], x2 //horz row 1 + uaddl v18.8h, v1.8b, v6.8b + umlal v18.8h, v3.8b, v30.8b + umlal v18.8h, v4.8b, v30.8b + umlsl v18.8h, v2.8b, v31.8b + umlsl v18.8h, v5.8b, v31.8b + sqrshrun v28.8b, v10.8h, #5 + ext v17.8b, v12.8b , v13.8b , #5 + ext v14.8b, v12.8b , v13.8b , #2 + ext v15.8b, v12.8b , v13.8b , #3 + ext v16.8b, v12.8b , v13.8b , #4 + ext v13.8b, v12.8b , v13.8b , #1 + sqrshrun v27.8b, v18.8h, #5 + ld1 {v7.s}[0], [x7], x2 // Vector load from src[7_0] + uaddl v10.8h, v12.8b, v17.8b + umlal v10.8h, v14.8b, v30.8b + umlal v10.8h, v15.8b, v30.8b + umlsl v10.8h, v13.8b, v31.8b + umlsl v10.8h, v16.8b, v31.8b + ld1 {v12.2s, v13.2s}, [x6], x2 //horz row 2 + uaddl v18.8h, v2.8b, v7.8b + umlal v18.8h, v4.8b, v30.8b + umlal v18.8h, v5.8b, v30.8b + umlsl v18.8h, v3.8b, v31.8b + umlsl v18.8h, v6.8b, v31.8b + sqrshrun v29.8b, v10.8h, #5 + ext v17.8b, v12.8b , v13.8b , #5 + ext v14.8b, v12.8b , v13.8b , #2 + ext v15.8b, v12.8b , v13.8b , #3 + ext v16.8b, v12.8b , v13.8b , #4 + ext v13.8b, v12.8b , v13.8b , #1 + urhadd v26.16b, v26.16b , v28.16b + urhadd v27.16b, v27.16b , v29.16b + sqrshrun v28.8b, v18.8h, #5 + ld1 {v8.s}[0], [x7], x2 // Vector load from src[8_0] + uaddl v10.8h, v12.8b, v17.8b + umlal v10.8h, v14.8b, v30.8b + umlal v10.8h, v15.8b, v30.8b + umlsl v10.8h, v13.8b, v31.8b + umlsl v10.8h, v16.8b, v31.8b + ld1 {v12.2s, v13.2s}, [x6], x2 //horz row 3 + uaddl v18.8h, v3.8b, v8.8b + umlal v18.8h, v5.8b, v30.8b + umlal v18.8h, v6.8b, v30.8b + umlsl v18.8h, v4.8b, v31.8b + umlsl v18.8h, v7.8b, v31.8b + sqrshrun v24.8b, v10.8h, #5 + ext v17.8b, v12.8b , v13.8b , #5 + ext v14.8b, v12.8b , v13.8b , #2 + ext v15.8b, v12.8b , v13.8b , #3 + ext v16.8b, v12.8b , v13.8b , #4 + ext v13.8b, v12.8b , v13.8b , #1 + sqrshrun v29.8b, v18.8h, #5 + uaddl v10.8h, v12.8b, v17.8b + umlal v10.8h, v14.8b, v30.8b + umlal v10.8h, v15.8b, v30.8b + umlsl v10.8h, v13.8b, v31.8b + umlsl v10.8h, v16.8b, v31.8b + st1 {v26.s}[0], [x1], x3 + + mov v0.16b, v4.16b + mov v1.16b, v5.16b + + st1 {v27.s}[0], [x1], x3 + + mov v2.16b, v6.16b + mov v3.16b, v7.16b + mov v4.8b, v8.8b + + sqrshrun v25.8b, v10.8h, #5 + subs x4, x4, #4 + urhadd v24.16b, v24.16b , v28.16b + urhadd v25.16b, v25.16b , v29.16b + st1 {v24.s}[0], [x1], x3 + st1 {v25.s}[0], [x1], x3 + beq end_func // Branch if height==4 + + ld1 {v5.s}[0], [x7], x2 // Vector load from src[5_0] + uaddl v10.8h, v0.8b, v5.8b + umlal v10.8h, v2.8b, v30.8b + umlal v10.8h, v3.8b, v30.8b + umlsl v10.8h, v1.8b, v31.8b + umlsl v10.8h, v4.8b, v31.8b + ld1 {v12.2s, v13.2s}, [x6], x2 //load for horz filter row 4 + ext v17.8b, v12.8b , v13.8b , #5 + ext v14.8b, v12.8b , v13.8b , #2 + ext v15.8b, v12.8b , v13.8b , #3 + ext v16.8b, v12.8b , v13.8b , #4 + ext v13.8b, v12.8b , v13.8b , #1 + sqrshrun v26.8b, v10.8h, #5 + ld1 {v6.s}[0], [x7], x2 + uaddl v10.8h, v12.8b, v17.8b + umlal v10.8h, v14.8b, v30.8b + umlal v10.8h, v15.8b, v30.8b + umlsl v10.8h, v13.8b, v31.8b + umlsl v10.8h, v16.8b, v31.8b + ld1 {v12.2s, v13.2s}, [x6], x2 //horz row 5 + uaddl v18.8h, v1.8b, v6.8b + umlal v18.8h, v3.8b, v30.8b + umlal v18.8h, v4.8b, v30.8b + umlsl v18.8h, v2.8b, v31.8b + umlsl v18.8h, v5.8b, v31.8b + sqrshrun v28.8b, v10.8h, #5 + ext v17.8b, v12.8b , v13.8b , #5 + ext v14.8b, v12.8b , v13.8b , #2 + ext v15.8b, v12.8b , v13.8b , #3 + ext v16.8b, v12.8b , v13.8b , #4 + ext v13.8b, v12.8b , v13.8b , #1 + sqrshrun v27.8b, v18.8h, #5 + ld1 {v7.s}[0], [x7], x2 + uaddl v10.8h, v12.8b, v17.8b + umlal v10.8h, v14.8b, v30.8b + umlal v10.8h, v15.8b, v30.8b + umlsl v10.8h, v13.8b, v31.8b + umlsl v10.8h, v16.8b, v31.8b + ld1 {v12.2s, v13.2s}, [x6], x2 //horz row 6 + uaddl v18.8h, v2.8b, v7.8b + umlal v18.8h, v4.8b, v30.8b + umlal v18.8h, v5.8b, v30.8b + umlsl v18.8h, v3.8b, v31.8b + umlsl v18.8h, v6.8b, v31.8b + sqrshrun v29.8b, v10.8h, #5 + ext v17.8b, v12.8b , v13.8b , #5 + ext v14.8b, v12.8b , v13.8b , #2 + ext v15.8b, v12.8b , v13.8b , #3 + ext v16.8b, v12.8b , v13.8b , #4 + ext v13.8b, v12.8b , v13.8b , #1 + urhadd v26.16b, v26.16b , v28.16b + urhadd v27.16b, v27.16b , v29.16b + sqrshrun v28.8b, v18.8h, #5 + ld1 {v8.s}[0], [x7], x2 + uaddl v10.8h, v12.8b, v17.8b + umlal v10.8h, v14.8b, v30.8b + umlal v10.8h, v15.8b, v30.8b + umlsl v10.8h, v13.8b, v31.8b + umlsl v10.8h, v16.8b, v31.8b + ld1 {v12.2s, v13.2s}, [x6], x2 //horz row 7 + uaddl v18.8h, v3.8b, v8.8b + umlal v18.8h, v5.8b, v30.8b + umlal v18.8h, v6.8b, v30.8b + umlsl v18.8h, v4.8b, v31.8b + umlsl v18.8h, v7.8b, v31.8b + sqrshrun v24.8b, v10.8h, #5 + ext v17.8b, v12.8b , v13.8b , #5 + ext v14.8b, v12.8b , v13.8b , #2 + ext v15.8b, v12.8b , v13.8b , #3 + ext v16.8b, v12.8b , v13.8b , #4 + ext v13.8b, v12.8b , v13.8b , #1 + sqrshrun v29.8b, v18.8h, #5 + uaddl v10.8h, v12.8b, v17.8b + umlal v10.8h, v14.8b, v30.8b + umlal v10.8h, v15.8b, v30.8b + umlsl v10.8h, v13.8b, v31.8b + umlsl v10.8h, v16.8b, v31.8b + st1 {v26.s}[0], [x1], x3 + st1 {v27.s}[0], [x1], x3 + sqrshrun v25.8b, v10.8h, #5 + urhadd v24.16b, v24.16b , v28.16b + urhadd v25.16b, v25.16b , v29.16b + st1 {v24.s}[0], [x1], x3 + st1 {v25.s}[0], [x1], x3 + +end_func: + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + + diff --git a/common/armv8/ih264_inter_pred_luma_vert_qpel_av8.s b/common/armv8/ih264_inter_pred_luma_vert_qpel_av8.s new file mode 100755 index 0000000..9d19a2d --- /dev/null +++ b/common/armv8/ih264_inter_pred_luma_vert_qpel_av8.s @@ -0,0 +1,511 @@ +//****************************************************************************** +//* +//* Copyright (C) 2015 The Android Open Source Project +//* +//* Licensed under the Apache License, Version 2.0 (the "License"); +//* you may not use this file except in compliance with the License. +//* You may obtain a copy of the License at: +//* +//* http://www.apache.org/licenses/LICENSE-2.0 +//* +//* Unless required by applicable law or agreed to in writing, software +//* distributed under the License is distributed on an "AS IS" BASIS, +//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//* See the License for the specific language governing permissions and +//* limitations under the License. +//* +//***************************************************************************** +//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +//*/ +///** +//****************************************************************************** +//* @file +//* ih264_inter_pred_luma_vert_qpel_av8.s +//* +//* @brief +//* Contains function definitions for inter prediction vertical quarter pel interpolation. +//* +//* @author +//* Mohit +//* +//* @par List of Functions: +//* +//* - ih264_inter_pred_luma_vert_qpel_av8() +//* +//* @remarks +//* None +//* +//******************************************************************************* +//*/ + +///* All the functions here are replicated from ih264_inter_pred_filters.c +// + +///** +///** +//******************************************************************************* +//* +//* @brief +//* Quarter pel interprediction luma filter for vertical input +//* +//* @par Description: +//* Applies a 6 tap horizontal filter .The output is clipped to 8 bits +//* sec 8.4.2.2.1 titled "Luma sample interpolation process" +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[out] pu1_dst +//* UWORD8 pointer to the destination +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride +//* +//* @param[in] ht +//* integer height of the array +//* +//* @param[in] wd +//* integer width of the array +//* +//* @param[in] pu1_tmp: temporary buffer: UNUSED in this function +//* +//* @param[in] dydx: x and y reference offset for qpel calculations. +//* @returns +//* +// @remarks +//* None +//* +//******************************************************************************* +//*/ + +//void ih264_inter_pred_luma_vert ( +// UWORD8 *pu1_src, +// UWORD8 *pu1_dst, +// WORD32 src_strd, +// WORD32 dst_strd, +// WORD32 ht, +// WORD32 wd, +// UWORD8* pu1_tmp, +// UWORD32 dydx) + +//**************Variables Vs Registers***************************************** +// x0 => *pu1_src +// x1 => *pu1_dst +// x2 => src_strd +// x3 => dst_strd +// x4 => ht +// x5 => wd +// x7 => dydx + +.text +.p2align 2 +.include "ih264_neon_macros.s" + + + + .global ih264_inter_pred_luma_vert_qpel_av8 + +ih264_inter_pred_luma_vert_qpel_av8: + + push_v_regs + stp x19, x20, [sp, #-16]! + + + and x7, x7, #12 //Finds y-offset + lsr x7, x7, #3 //dydx>>3 + mul x7, x2, x7 + add x7, x0, x7 //pu1_src + (y_offset>>1)*src_strd + sub x14, x4, #16 + movi v22.8h, #20 // Filter coeff 0x14 into Q11 + sub x0, x0, x2, lsl #1 //pu1_src-2*src_strd + subs x12, x5, #8 //if wd=8 branch to loop_8 + movi v24.8h, #5 // Filter coeff 0x4 into Q12 + beq loop_8_start + + subs x12, x5, #4 //if wd=4 branch to loop_4 + beq loop_4_start + + + ld1 {v0.2s, v1.2s}, [x0], x2 // Vector load from src[0_0] + ld1 {v2.2s, v3.2s}, [x0], x2 // Vector load from src[1_0] + ld1 {v4.2s, v5.2s}, [x0], x2 // Vector load from src[2_0] + ld1 {v6.2s, v7.2s}, [x0], x2 // Vector load from src[3_0] + add x14, x14, #1 //for checking loop + ld1 {v8.2s, v9.2s}, [x0], x2 // Vector load from src[4_0] + uaddl v12.8h, v4.8b, v6.8b // temp1 = src[2_0] + src[3_0] + ld1 {v10.2s, v11.2s}, [x0], x2 // Vector load from src[5_0] + +loop_16: //when wd=16 + + uaddl v14.8h, v0.8b, v10.8b // temp = src[0_0] + src[5_0] + uaddl v16.8h, v2.8b, v8.8b // temp2 = src[1_0] + src[4_0] + mla v14.8h, v12.8h , v22.8h // temp += temp1 * 20 + uaddl v20.8h, v1.8b, v11.8b // temp4 = src[0_8] + src[5_8] + uaddl v18.8h, v5.8b, v7.8b // temp3 = src[2_8] + src[3_8] + mla v20.8h, v18.8h , v22.8h // temp4 += temp3 * 20 + ld1 {v0.2s, v1.2s}, [x0], x2 + uaddl v26.8h, v3.8b, v9.8b // temp5 = src[1_8] + src[4_8] + uaddl v12.8h, v6.8b, v8.8b + mls v14.8h, v16.8h , v24.8h // temp -= temp2 * 5 + uaddl v16.8h, v2.8b, v0.8b + uaddl v18.8h, v4.8b, v10.8b + mla v16.8h, v12.8h , v22.8h + mls v20.8h, v26.8h , v24.8h // temp4 -= temp5 * 5 + uaddl v26.8h, v5.8b, v11.8b + uaddl v12.8h, v7.8b, v9.8b + sqrshrun v30.8b, v14.8h, #5 // dst[0_0] = CLIP_U8((temp +16) >> 5) + uaddl v14.8h, v3.8b, v1.8b + ld1 {v2.2s, v3.2s}, [x0], x2 + mla v14.8h, v12.8h , v22.8h + mls v16.8h, v18.8h , v24.8h + sqrshrun v31.8b, v20.8h, #5 // dst[0_8] = CLIP_U8((temp4 +16) >> 5) + ld1 {v20.2s, v21.2s}, [x7], x2 // Load for interpolation row 0 + urhadd v30.16b, v20.16b , v30.16b // Interpolation to obtain qpel value + urhadd v31.16b, v21.16b , v31.16b // Interpolation to obtain qpel value + uaddl v18.8h, v4.8b, v2.8b + uaddl v12.8h, v8.8b, v10.8b + st1 {v30.2s, v31.2s}, [x1], x3 // Vector store to dst[0_0] + mla v18.8h, v12.8h , v22.8h + uaddl v20.8h, v6.8b, v0.8b + mls v14.8h, v26.8h , v24.8h + sqrshrun v30.8b, v16.8h, #5 + uaddl v12.8h, v9.8b, v11.8b + uaddl v16.8h, v5.8b, v3.8b + uaddl v26.8h, v7.8b, v1.8b + mla v16.8h, v12.8h , v22.8h + mls v18.8h, v20.8h , v24.8h + ld1 {v4.2s, v5.2s}, [x0], x2 + sqrshrun v31.8b, v14.8h, #5 + ld1 {v14.2s, v15.2s}, [x7], x2 // Load for interpolation row 1 + uaddl v12.8h, v10.8b, v0.8b + urhadd v30.16b, v14.16b , v30.16b // Interpolation to obtain qpel value + urhadd v31.16b, v15.16b , v31.16b // Interpolation to obtain qpel value + uaddl v14.8h, v6.8b, v4.8b + uaddl v20.8h, v8.8b, v2.8b + mla v14.8h, v12.8h , v22.8h + mls v16.8h, v26.8h , v24.8h + st1 {v30.2s, v31.2s}, [x1], x3 //store row 1 + sqrshrun v30.8b, v18.8h, #5 + uaddl v18.8h, v7.8b, v5.8b + uaddl v12.8h, v11.8b, v1.8b + mla v18.8h, v12.8h , v22.8h + uaddl v26.8h, v9.8b, v3.8b + mls v14.8h, v20.8h , v24.8h + ld1 {v6.2s, v7.2s}, [x0], x2 + sqrshrun v31.8b, v16.8h, #5 + ld1 {v16.2s, v17.2s}, [x7], x2 // Load for interpolation row 2 + mls v18.8h, v26.8h , v24.8h + urhadd v30.16b, v16.16b , v30.16b // Interpolation to obtain qpel value + urhadd v31.16b, v17.16b , v31.16b // Interpolation to obtain qpel value + uaddl v12.8h, v0.8b, v2.8b // temp1 = src[2_0] + src[3_0] + st1 {v30.2s, v31.2s}, [x1], x3 //store row 2 + uaddl v16.8h, v10.8b, v4.8b // temp2 = src[1_0] + src[4_0] + uaddl v20.8h, v9.8b, v7.8b // temp4 = src[0_8] + src[5_8] + sqrshrun v30.8b, v14.8h, #5 + uaddl v26.8h, v5.8b, v11.8b // temp5 = src[1_8] + src[4_8] + uaddl v14.8h, v8.8b, v6.8b // temp = src[0_0] + src[5_0] + sqrshrun v31.8b, v18.8h, #5 + ld1 {v18.2s, v19.2s}, [x7], x2 // Load for interpolation row 3 + mla v14.8h, v12.8h , v22.8h // temp += temp1 * 20 + urhadd v30.16b, v18.16b , v30.16b // Interpolation to obtain qpel value + urhadd v31.16b, v19.16b , v31.16b // Interpolation to obtain qpel value + uaddl v18.8h, v1.8b, v3.8b // temp3 = src[2_8] + src[3_8] + st1 {v30.2s, v31.2s}, [x1], x3 //store row 3 + // 4 rows processed + mla v20.8h, v18.8h , v22.8h // temp4 += temp3 * 20 + ld1 {v8.2s, v9.2s}, [x0], x2 + uaddl v12.8h, v2.8b, v4.8b + uaddl v18.8h, v3.8b, v5.8b + mls v14.8h, v16.8h , v24.8h // temp -= temp2 * 5 + uaddl v28.8h, v9.8b, v11.8b + uaddl v16.8h, v6.8b, v0.8b + mla v28.8h, v18.8h , v22.8h // temp4 += temp3 * 20 + mls v20.8h, v26.8h , v24.8h // temp4 -= temp5 * 5 + uaddl v26.8h, v1.8b, v7.8b + uaddl v18.8h, v5.8b, v7.8b + sqrshrun v30.8b, v14.8h, #5 // dst[0_0] = CLIP_U8((temp +16) >> 5) + uaddl v14.8h, v8.8b, v10.8b + sqrshrun v31.8b, v20.8h, #5 // dst[0_8] = CLIP_U8((temp4 +16) >> 5) + ld1 {v20.2s, v21.2s}, [x7], x2 // Load for interpolation row 4 + ld1 {v10.2s, v11.2s}, [x0], x2 + urhadd v30.16b, v20.16b , v30.16b // Interpolation to obtain qpel value + urhadd v31.16b, v21.16b , v31.16b // Interpolation to obtain qpel value + mls v28.8h, v26.8h , v24.8h // temp4 -= temp5 * 5 + st1 {v30.2s, v31.2s}, [x1], x3 // store row 4 + mla v14.8h, v12.8h , v22.8h // temp += temp1 * 20 + uaddl v20.8h, v11.8b, v1.8b + uaddl v26.8h, v3.8b, v9.8b + mla v20.8h, v18.8h , v22.8h // temp4 += temp3 * 20 + uaddl v12.8h, v6.8b, v4.8b + uaddl v18.8h, v7.8b, v9.8b + sqrshrun v31.8b, v28.8h, #5 // dst[0_8] = CLIP_U8((temp4 +16) >> 5) + mls v14.8h, v16.8h , v24.8h // temp -= temp2 * 5 + uaddl v16.8h, v8.8b, v2.8b + sqrshrun v30.8b, v14.8h, #5 // dst[0_0] = CLIP_U8((temp +16) >> 5) + ld1 {v14.2s, v15.2s}, [x7], x2 // Load for interpolation row 5 + mls v20.8h, v26.8h , v24.8h // temp4 -= temp5 * 5 + urhadd v30.16b, v14.16b , v30.16b // Interpolation to obtain qpel value + urhadd v31.16b, v15.16b , v31.16b // Interpolation to obtain qpel value + uaddl v14.8h, v10.8b, v0.8b + st1 {v30.2s, v31.2s}, [x1], x3 // store row 5 + mla v14.8h, v12.8h , v22.8h // temp += temp1 * 20 + ld1 {v0.2s, v1.2s}, [x0], x2 + uaddl v26.8h, v5.8b, v11.8b + uaddl v12.8h, v8.8b, v6.8b + uaddl v28.8h, v0.8b, v2.8b + sqrshrun v31.8b, v20.8h, #5 // dst[0_8] = CLIP_U8((temp4 +16) >> 5) + mla v28.8h, v12.8h , v22.8h // temp += temp1 * 20 + uaddl v20.8h, v1.8b, v3.8b + mls v14.8h, v16.8h , v24.8h // temp -= temp2 * 5 + mla v20.8h, v18.8h , v22.8h // temp4 += temp3 * 20 + uaddl v16.8h, v10.8b, v4.8b + sqrshrun v30.8b, v14.8h, #5 // dst[0_0] = CLIP_U8((temp +16) >> 5) + ld1 {v14.2s, v15.2s}, [x7], x2 // Load for interpolation row 6 + mov v2.8b, v6.8b + mov v3.8b, v7.8b + urhadd v30.16b, v14.16b , v30.16b // Interpolation to obtain qpel value + urhadd v31.16b, v15.16b , v31.16b // Interpolation to obtain qpel value + + mls v28.8h, v16.8h , v24.8h // temp -= temp2 * 5 + st1 {v30.2s, v31.2s}, [x1], x3 // store row 6 + sqrshrun v30.8b, v28.8h, #5 // dst[0_0] = CLIP_U8((temp +16) >> 5) + swp v0.8b, v4.8b // swapping registers to put it in order + swp v1.8b, v5.8b // swapping registers to put it in order + + mls v20.8h, v26.8h , v24.8h // temp4 -= temp5 * 5 + mov v6.8b, v10.8b + mov v7.8b, v11.8b + subs x12, x14, #1 // if height==16 - looping + swp v4.8b, v8.8b + swp v5.8b, v9.8b + sqrshrun v31.8b, v20.8h, #5 // dst[0_8] = CLIP_U8((temp4 +16) >> 5) + ld1 {v20.2s, v21.2s}, [x7], x2 // Load for interpolation row 7 + urhadd v30.16b, v20.16b , v30.16b // Interpolation to obtain qpel value + urhadd v31.16b, v21.16b , v31.16b // Interpolation to obtain qpel value + st1 {v30.2s, v31.2s}, [x1], x3 // store row 7 + bne end_func //if height =8 end function + add x14, x14, #1 //for checking loop + ld1 {v10.2s, v11.2s}, [x0], x2 + uaddl v12.8h, v4.8b, v6.8b // temp1 = src[2_0] + src[3_0] + + b loop_16 // looping if height =16 + +loop_8_start: +//// Processing row0 and row1 + + ld1 {v0.2s}, [x0], x2 // Vector load from src[0_0] + ld1 {v1.2s}, [x0], x2 // Vector load from src[1_0] + ld1 {v2.2s}, [x0], x2 // Vector load from src[2_0] + ld1 {v3.2s}, [x0], x2 // Vector load from src[3_0] + add x14, x14, #1 //for checking loop + ld1 {v4.2s}, [x0], x2 // Vector load from src[4_0] + ld1 {v5.2s}, [x0], x2 // Vector load from src[5_0] + +loop_8: + //for checking loop + uaddl v6.8h, v2.8b, v3.8b // temp1 = src[2_0] + src[3_0] + uaddl v8.8h, v0.8b, v5.8b // temp = src[0_0] + src[5_0] + uaddl v10.8h, v1.8b, v4.8b // temp2 = src[1_0] + src[4_0] + mla v8.8h, v6.8h , v22.8h // temp += temp1 * 20 + ld1 {v6.2s}, [x0], x2 + uaddl v14.8h, v3.8b, v4.8b + uaddl v16.8h, v1.8b, v6.8b + uaddl v18.8h, v2.8b, v5.8b + mls v8.8h, v10.8h , v24.8h // temp -= temp2 * 5 + mla v16.8h, v14.8h , v22.8h + ld1 {v7.2s}, [x0], x2 + uaddl v20.8h, v4.8b, v5.8b + uaddl v12.8h, v2.8b, v7.8b + uaddl v10.8h, v3.8b, v6.8b + mls v16.8h, v18.8h , v24.8h + sqrshrun v26.8b, v8.8h, #5 // dst[0_0] = CLIP_U8( (temp + 16) >> 5) + mla v12.8h, v20.8h , v22.8h + ld1 {v8.2s}, [x7], x2 //Load value for interpolation (row0) + ld1 {v9.2s}, [x7], x2 //Load value for interpolation (row1) + ld1 {v0.2s}, [x0], x2 + uaddl v14.8h, v5.8b, v6.8b + sqrshrun v27.8b, v16.8h, #5 + urhadd v26.16b, v8.16b , v26.16b // Interpolation step for qpel calculation + urhadd v27.16b, v9.16b , v27.16b // Interpolation step for qpel calculation + + uaddl v20.8h, v3.8b, v0.8b + mls v12.8h, v10.8h , v24.8h + st1 {v26.2s}, [x1], x3 // Vector store to dst[0_0] + uaddl v18.8h, v4.8b, v7.8b + mla v20.8h, v14.8h , v22.8h + st1 {v27.2s}, [x1], x3 // Vector store to dst[1_0] + sqrshrun v28.8b, v12.8h, #5 + mls v20.8h, v18.8h , v24.8h + ld1 {v12.2s}, [x7], x2 //Load value for interpolation (row2) + ld1 {v13.2s}, [x7], x2 //Load value for interpolation (row3) + ld1 {v1.2s}, [x0], x2 + sqrshrun v29.8b, v20.8h, #5 + subs x9, x4, #4 + urhadd v28.16b, v12.16b , v28.16b + urhadd v29.16b, v13.16b , v29.16b + st1 {v28.2s}, [x1], x3 //store row 2 + st1 {v29.2s}, [x1], x3 //store row 3 + beq end_func // Branch if height==4 + uaddl v14.8h, v6.8b, v7.8b // temp1 = src[2_0] + src[3_0] + uaddl v16.8h, v0.8b, v5.8b // temp = src[0_0] + src[5_0] + uaddl v18.8h, v1.8b, v4.8b // temp2 = src[1_0] + src[4_0] + mla v18.8h, v14.8h , v22.8h // temp += temp1 * 20 + ld1 {v2.2s}, [x0], x2 + mls v18.8h, v16.8h , v24.8h // temp -= temp2 * 5 + uaddl v8.8h, v0.8b, v7.8b + uaddl v10.8h, v1.8b, v6.8b + uaddl v12.8h, v2.8b, v5.8b + sqrshrun v26.8b, v18.8h, #5 + mla v12.8h, v8.8h , v22.8h + ld1 {v18.2s}, [x7], x2 //Load value for interpolation (row4) + ld1 {v19.2s}, [x7], x2 //Load value for interpolation (row5) + ld1 {v3.2s}, [x0], x2 + mls v12.8h, v10.8h , v24.8h + sqrshrun v27.8b, v12.8h, #5 + urhadd v26.16b, v18.16b , v26.16b // Interpolation step for qpel calculation + urhadd v27.16b, v19.16b , v27.16b // Interpolation step for qpel calculation + + st1 {v26.2s}, [x1], x3 // store row 4 + st1 {v27.2s}, [x1], x3 // store row 5 + uaddl v14.8h, v0.8b, v1.8b // temp1 = src[2_0] + src[3_0] + uaddl v16.8h, v2.8b, v7.8b // temp = src[0_0] + src[5_0] + uaddl v18.8h, v3.8b, v6.8b // temp2 = src[1_0] + src[4_0] + mla v18.8h, v14.8h , v22.8h // temp += temp1 * 20 + ld1 {v4.2s}, [x0], x2 + mls v18.8h, v16.8h , v24.8h // temp -= temp2 * 5 + uaddl v8.8h, v2.8b, v1.8b + uaddl v10.8h, v3.8b, v0.8b + uaddl v12.8h, v4.8b, v7.8b + sqrshrun v26.8b, v18.8h, #5 + mla v12.8h, v8.8h , v22.8h + ld1 {v18.2s}, [x7], x2 //Load value for interpolation (row6) + ld1 {v19.2s}, [x7], x2 //Load value for interpolation (row7) + ld1 {v5.2s}, [x0], x2 + mls v12.8h, v10.8h , v24.8h + sqrshrun v27.8b, v12.8h, #5 + urhadd v26.16b, v18.16b , v26.16b // Interpolation step for qpel calculation + urhadd v27.16b, v19.16b , v27.16b // Interpolation step for qpel calculation + + subs x12, x14, #1 + st1 {v26.2s}, [x1], x3 // store row 6 + st1 {v27.2s}, [x1], x3 // store row 7 + add x14, x14, #1 + beq loop_8 //looping if height ==16 + + b end_func + + +loop_4_start: +//// Processing row0 and row1 + + + ld1 {v0.s}[0], [x0], x2 // Vector load from src[0_0] + ld1 {v1.s}[0], [x0], x2 // Vector load from src[1_0] + ld1 {v2.s}[0], [x0], x2 // Vector load from src[2_0] + ld1 {v3.s}[0], [x0], x2 // Vector load from src[3_0] + ld1 {v4.s}[0], [x0], x2 // Vector load from src[4_0] + ld1 {v5.s}[0], [x0], x2 // Vector load from src[5_0] + + uaddl v6.8h, v2.8b, v3.8b // temp1 = src[2_0] + src[3_0] + uaddl v8.8h, v0.8b, v5.8b // temp = src[0_0] + src[5_0] + uaddl v10.8h, v1.8b, v4.8b // temp2 = src[1_0] + src[4_0] + mla v8.8h, v6.8h , v22.8h // temp += temp1 * 20 + ld1 {v6.2s}, [x0], x2 + uaddl v14.8h, v3.8b, v4.8b + uaddl v16.8h, v1.8b, v6.8b + uaddl v18.8h, v2.8b, v5.8b + mls v8.8h, v10.8h , v24.8h // temp -= temp2 * 5 + ld1 {v7.s}[0], [x0], x2 + mla v16.8h, v14.8h , v22.8h + uaddl v20.8h, v4.8b, v5.8b + uaddl v12.8h, v2.8b, v7.8b + uaddl v10.8h, v3.8b, v6.8b + mls v16.8h, v18.8h , v24.8h + sqrshrun v26.8b, v8.8h, #5 // dst[0_0] = CLIP_U8( (temp + 16) >> 5) + ld1 {v8.s}[0], [x7], x2 //Load value for interpolation - row 0 + ld1 {v9.s}[0], [x7], x2 //Load value for interpolation - row 1 + mla v12.8h, v20.8h , v22.8h + ld1 {v0.s}[0], [x0], x2 + uaddl v14.8h, v5.8b, v6.8b + sqrshrun v27.8b, v16.8h, #5 + uaddl v20.8h, v3.8b, v0.8b + urhadd v26.16b, v26.16b , v8.16b //Interpolation step for qpel calculation + urhadd v27.16b, v27.16b , v9.16b //Interpolation step for qpel calculation + + mls v12.8h, v10.8h , v24.8h + st1 {v26.s}[0], [x1], x3 // Vector store to dst[0_0] + uaddl v18.8h, v4.8b, v7.8b + mla v20.8h, v14.8h , v22.8h + st1 {v27.s}[0], [x1], x3 // store row 1 + sqrshrun v28.8b, v12.8h, #5 + ld1 {v12.s}[0], [x7], x2 //Load value for interpolation - row 2 + ld1 {v13.s}[0], [x7], x2 //Load value for interpolation - row 3 + + mls v20.8h, v18.8h , v24.8h + ld1 {v1.s}[0], [x0], x2 + sqrshrun v29.8b, v20.8h, #5 + urhadd v28.16b, v12.16b , v28.16b //Interpolation step for qpel calculation + urhadd v29.16b, v13.16b , v29.16b //Interpolation step for qpel calculation + + st1 {v28.s}[0], [x1], x3 //store row 2 + st1 {v29.s}[0], [x1], x3 //store row 3 + + subs x9, x4, #4 + beq end_func // Branch if height==4 + + + uaddl v14.8h, v6.8b, v7.8b // temp1 = src[2_0] + src[3_0] + uaddl v16.8h, v0.8b, v5.8b // temp = src[0_0] + src[5_0] + uaddl v18.8h, v1.8b, v4.8b // temp2 = src[1_0] + src[4_0] + mla v18.8h, v14.8h , v22.8h // temp += temp1 * 20 + ld1 {v2.s}[0], [x0], x2 + mls v18.8h, v16.8h , v24.8h // temp -= temp2 * 5 + uaddl v8.8h, v0.8b, v7.8b + uaddl v10.8h, v1.8b, v6.8b + uaddl v12.8h, v2.8b, v5.8b + sqrshrun v26.8b, v18.8h, #5 + ld1 {v18.s}[0], [x7], x2 //Load value for interpolation - row 4 + ld1 {v19.s}[0], [x7], x2 //Load value for interpolation - row 5 + mla v12.8h, v8.8h , v22.8h + ld1 {v3.s}[0], [x0], x2 + mls v12.8h, v10.8h , v24.8h + sqrshrun v27.8b, v12.8h, #5 + urhadd v26.16b, v18.16b , v26.16b //Interpolation step for qpel calculation + urhadd v27.16b, v27.16b , v19.16b //Interpolation step for qpel calculation + + st1 {v26.s}[0], [x1], x3 //store row 4 + st1 {v27.s}[0], [x1], x3 // store row 5 + uaddl v14.8h, v0.8b, v1.8b // temp1 = src[2_0] + src[3_0] + uaddl v16.8h, v2.8b, v7.8b // temp = src[0_0] + src[5_0] + uaddl v18.8h, v3.8b, v6.8b // temp2 = src[1_0] + src[4_0] + mla v18.8h, v14.8h , v22.8h // temp += temp1 * 20 + ld1 {v4.s}[0], [x0], x2 + mls v18.8h, v16.8h , v24.8h // temp -= temp2 * 5 + uaddl v8.8h, v2.8b, v1.8b + uaddl v10.8h, v3.8b, v0.8b + uaddl v12.8h, v4.8b, v7.8b + sqrshrun v26.8b, v18.8h, #5 + ld1 {v18.s}[0], [x7], x2 //Load value for interpolation - row 6 + ld1 {v19.s}[0], [x7], x2 //Load value for interpolation - row 7 + mla v12.8h, v8.8h , v22.8h + ld1 {v5.s}[0], [x0], x2 + mls v12.8h, v10.8h , v24.8h + sqrshrun v27.8b, v12.8h, #5 + urhadd v26.16b, v18.16b , v26.16b //Interpolation step for qpel calculation + urhadd v27.16b, v19.16b , v27.16b //Interpolation step for qpel calculation + + st1 {v26.s}[0], [x1], x3 // store row 6 + st1 {v27.s}[0], [x1], x3 // store row 7 + + +end_func: + // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + + diff --git a/common/armv8/ih264_intra_pred_chroma_av8.s b/common/armv8/ih264_intra_pred_chroma_av8.s new file mode 100755 index 0000000..62edfdc --- /dev/null +++ b/common/armv8/ih264_intra_pred_chroma_av8.s @@ -0,0 +1,574 @@ +//****************************************************************************** +//* +//* Copyright (C) 2015 The Android Open Source Project +//* +//* Licensed under the Apache License, Version 2.0 (the "License"); +//* you may not use this file except in compliance with the License. +//* You may obtain a copy of the License at: +//* +//* http://www.apache.org/licenses/LICENSE-2.0 +//* +//* Unless required by applicable law or agreed to in writing, software +//* distributed under the License is distributed on an "AS IS" BASIS, +//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//* See the License for the specific language governing permissions and +//* limitations under the License. +//* +//***************************************************************************** +//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +//*/ +///** +//****************************************************************************** +//* @file +//* ih264_intra_pred_chroma.s +//* +//* @brief +//* Contains function definitions for intra chroma prediction . +//* +//* @author +//* Ittiam +//* +//* @par List of Functions: +//* +//* - ih264_intra_pred_luma_chroma_mode_vert_av8() +//* - ih264_intra_pred_luma_chroma_mode_horz_av8() +//* - ih264_intra_pred_luma_chroma_mode_dc_av8() +//* - ih264_intra_pred_luma_chroma_mode_plane_av8() +//* +//* @remarks +//* None +//* +//******************************************************************************* +//*/ + +///* All the functions here are replicated from ih264_chroma_intra_pred_filters.c +// + +///** +///** +///** +// + + +.text +.p2align 2 +.include "ih264_neon_macros.s" + +.extern ih264_gai1_intrapred_chroma_plane_coeffs1 +.extern ih264_gai1_intrapred_chroma_plane_coeffs2 + + + +///** +//******************************************************************************* +//* +//*ih264_intra_pred_chroma_8x8_mode_dc +//* +//* @brief +//* Perform Intra prediction for chroma_8x8 mode:DC +//* +//* @par Description: +//* Perform Intra prediction for chroma_8x8 mode:DC ,described in sec 8.3.4.1 +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source containing alternate U and V samples +//* +//* @param[out] pu1_dst +//* UWORD8 pointer to the destination with alternate U and V samples +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride +//* +//** @param[in] ui_neighboravailability +//* availability of neighbouring pixels +//* +//* @returns +//* +//* @remarks +//* None +//* +//*******************************************************************************/ +//void ih264_intra_pred_chroma_8x8_mode_dc(UWORD8 *pu1_src, +// UWORD8 *pu1_dst, +// WORD32 src_strd, +// WORD32 dst_strd, +// WORD32 ui_neighboravailability) + +//**************Variables Vs Registers***************************************** +// x0 => *pu1_src +// x1 => *pu1_dst +// x2 => src_strd +// x3 => dst_strd +// x4 => ui_neighboravailability + + + + .global ih264_intra_pred_chroma_8x8_mode_dc_av8 + +ih264_intra_pred_chroma_8x8_mode_dc_av8: + + + push_v_regs + stp x19, x20, [sp, #-16]! + + mov x19, #5 + ands x6, x4, x19 + beq none_available + cmp x6, #1 + beq left_only_available + cmp x6, #4 + beq top_only_available + +all_available: + ld1 {v0.8b, v1.8b}, [x0] + add x6, x0, #18 + ld1 {v2.8b, v3.8b}, [x6] + uxtl v0.8h, v0.8b + uxtl v1.8h, v1.8b + addp v0.4s, v0.4s , v0.4s + addp v1.4s, v1.4s , v1.4s + addp v0.4s, v0.4s , v0.4s + addp v1.4s, v1.4s , v1.4s + uxtl v2.8h, v2.8b + uxtl v3.8h, v3.8b + addp v2.4s, v2.4s , v2.4s + addp v3.4s, v3.4s , v3.4s + addp v2.4s, v2.4s , v2.4s + addp v3.4s, v3.4s , v3.4s + rshrn v5.8b, v0.8h, #2 + dup v21.8h, v5.h[0] + rshrn v6.8b, v3.8h, #2 + dup v20.8h, v6.h[0] + add v1.8h, v1.8h, v2.8h + rshrn v1.8b, v1.8h, #3 + dup v23.8h, v1.h[0] + mov v20.d[0], v23.d[0] + add v0.8h, v0.8h, v3.8h + rshrn v0.8b, v0.8h, #3 + dup v23.8h, v0.h[0] + mov v21.d[1], v23.d[0] + b store +left_only_available: + ld1 {v0.8b, v1.8b}, [x0] + uxtl v0.8h, v0.8b + uxtl v1.8h, v1.8b + addp v0.4s, v0.4s , v0.4s + addp v1.4s, v1.4s , v1.4s + addp v0.4s, v0.4s , v0.4s + addp v1.4s, v1.4s , v1.4s + rshrn v0.8b, v0.8h, #2 + rshrn v1.8b, v1.8h, #2 + dup v20.8h , v1.h[0] + dup v21.8h, v0.h[0] + b store + +top_only_available: + add x6, x0, #18 + ld1 {v0.8b, v1.8b}, [x6] + uxtl v0.8h, v0.8b + uxtl v1.8h, v1.8b + addp v0.4s, v0.4s , v0.4s + addp v1.4s, v1.4s , v1.4s + addp v0.4s, v0.4s , v0.4s + addp v1.4s, v1.4s , v1.4s + rshrn v0.8b, v0.8h, #2 + rshrn v1.8b, v1.8h, #2 + dup v20.8h , v0.h[0] + dup v21.8h, v1.h[0] + mov v20.d[1], v21.d[1] + mov v21.d[0], v20.d[0] + b store +none_available: + mov w15, #128 + dup v20.16b, w15 + dup v21.16b, w15 + + +store: + + st1 { v20.16b}, [x1], x3 + st1 { v20.16b}, [x1], x3 + st1 { v20.16b}, [x1], x3 + st1 { v20.16b}, [x1], x3 + st1 { v21.16b}, [x1], x3 + st1 { v21.16b}, [x1], x3 + st1 { v21.16b}, [x1], x3 + st1 { v21.16b}, [x1], x3 +end_func: + + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + + + + +///****************************************************************************** + + +///** +//******************************************************************************* +//* +//*ih264_intra_pred_chroma_8x8_mode_horz +//* +//* @brief +//* Perform Intra prediction for chroma_8x8 mode:Horizontal +//* +//* @par Description: +//* Perform Intra prediction for chroma_8x8 mode:Horizontal ,described in sec 8.3.4.2 +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source containing alternate U and V samples +//* +//* @param[out] pu1_dst +//* UWORD8 pointer to the destination with alternate U and V samples +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride +//* +//* @param[in] ui_neighboravailability +//* availability of neighbouring pixels(Not used in this function) +//* +//* @returns +//* +//* @remarks +//* None +//* +//******************************************************************************* +//*/ +//void ih264_intra_pred_chroma_8x8_mode_horz(UWORD8 *pu1_src, +// UWORD8 *pu1_dst, +// WORD32 src_strd, +// WORD32 dst_strd, +// WORD32 ui_neighboravailability) +//**************Variables Vs Registers***************************************** +// x0 => *pu1_src +// x1 => *pu1_dst +// x2 => src_strd +// x3 => dst_strd +// x4 => ui_neighboravailability + + + .global ih264_intra_pred_chroma_8x8_mode_horz_av8 + +ih264_intra_pred_chroma_8x8_mode_horz_av8: + + + + push_v_regs + ld1 {v0.8h}, [x0] + + dup v10.8h, v0.h[7] + dup v11.8h, v0.h[6] + dup v12.8h, v0.h[5] + dup v13.8h, v0.h[4] + st1 {v10.8h}, [x1], x3 + dup v14.8h, v0.h[3] + st1 {v11.8h}, [x1], x3 + dup v15.8h, v0.h[2] + st1 {v12.8h}, [x1], x3 + dup v16.8h, v0.h[1] + st1 {v13.8h}, [x1], x3 + dup v17.8h, v0.h[0] + st1 {v14.8h}, [x1], x3 + st1 {v15.8h}, [x1], x3 + st1 {v16.8h}, [x1], x3 + st1 {v17.8h}, [x1], x3 + + + pop_v_regs + ret + + + + + + +///** +//******************************************************************************* +//* +//*ih264_intra_pred_chroma_8x8_mode_vert +//* +//* @brief +//* Perform Intra prediction for chroma_8x8 mode:vertical +//* +//* @par Description: +//*Perform Intra prediction for chroma_8x8 mode:vertical ,described in sec 8.3.4.3 +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source containing alternate U and V samples +//* +//* @param[out] pu1_dst +//* UWORD8 pointer to the destination with alternate U and V samples +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride +//* +//* @param[in] ui_neighboravailability +//* availability of neighbouring pixels(Not used in this function) +//* +//* @returns +//* +//* @remarks +//* None +//* +//******************************************************************************* +//void ih264_intra_pred_chroma_8x8_mode_vert(UWORD8 *pu1_src, +// UWORD8 *pu1_dst, +// WORD32 src_strd, +// WORD32 dst_strd, +// WORD32 ui_neighboravailability) + +//**************Variables Vs Registers***************************************** +// x0 => *pu1_src +// x1 => *pu1_dst +// x2 => src_strd +// x3 => dst_strd +// x4 => ui_neighboravailability + + + .global ih264_intra_pred_chroma_8x8_mode_vert_av8 + +ih264_intra_pred_chroma_8x8_mode_vert_av8: + + push_v_regs + + add x0, x0, #18 + ld1 {v0.8b, v1.8b}, [x0] + + st1 {v0.8b, v1.8b}, [x1], x3 + st1 {v0.8b, v1.8b}, [x1], x3 + st1 {v0.8b, v1.8b}, [x1], x3 + st1 {v0.8b, v1.8b}, [x1], x3 + st1 {v0.8b, v1.8b}, [x1], x3 + st1 {v0.8b, v1.8b}, [x1], x3 + st1 {v0.8b, v1.8b}, [x1], x3 + st1 {v0.8b, v1.8b}, [x1], x3 + + pop_v_regs + ret + + + + +///****************************************************************************** + + +///** +//******************************************************************************* +//* +//*ih264_intra_pred_chroma_8x8_mode_plane +//* +//* @brief +//* Perform Intra prediction for chroma_8x8 mode:PLANE +//* +//* @par Description: +//* Perform Intra prediction for chroma_8x8 mode:PLANE ,described in sec 8.3.4.4 +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source containing alternate U and V samples +//* +//* @param[out] pu1_dst +//* UWORD8 pointer to the destination with alternate U and V samples +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride +//* +//* @param[in] ui_neighboravailability +//* availability of neighbouring pixels +//* +//* @returns +//* +//* @remarks +//* None +//* +//*******************************************************************************/ +//void ih264_intra_pred_chroma_8x8_mode_plane(UWORD8 *pu1_src, +// UWORD8 *pu1_dst, +// WORD32 src_strd, +// WORD32 dst_strd, +// WORD32 ui_neighboravailability) + +//**************Variables Vs Registers***************************************** +// x0 => *pu1_src +// x1 => *pu1_dst +// x2 => src_strd +// x3 => dst_strd +// x4 => ui_neighboravailability + + .global ih264_intra_pred_chroma_8x8_mode_plane_av8 +ih264_intra_pred_chroma_8x8_mode_plane_av8: + + push_v_regs + stp x19, x20, [sp, #-16]! + + ld1 {v0.2s}, [x0] + add x10, x0, #10 + ld1 {v1.2s}, [x10] + add x10, x10, #6 + rev64 v5.4h, v0.4h + ld1 {v2.2s}, [x10], #8 + add x10, x10, #2 + rev64 v7.4h, v2.4h + ld1 {v3.2s}, [x10] + sub x5, x3, #8 + adrp x12, :got:ih264_gai1_intrapred_chroma_plane_coeffs1 + ldr x12, [x12, #:got_lo12:ih264_gai1_intrapred_chroma_plane_coeffs1] + usubl v10.8h, v5.8b, v1.8b + ld1 {v8.8b, v9.8b}, [x12] // Load multiplication factors 1 to 8 into D3 + mov v8.d[1], v9.d[0] + usubl v12.8h, v3.8b, v7.8b + mul v14.8h, v10.8h , v8.8h + mul v16.8h, v12.8h , v8.8h + uzp1 v15.8h, v14.8h, v16.8h + uzp2 v16.8h, v14.8h, v16.8h + mov v14.16b, v15.16b + mov v15.d[0], v14.d[1] + mov v17.d[0], v16.d[1] + addp v14.4h, v14.4h, v14.4h + addp v15.4h, v15.4h, v15.4h + addp v16.4h, v16.4h, v16.4h + addp v17.4h, v17.4h, v17.4h + addp v14.4h, v14.4h, v14.4h + addp v15.4h, v15.4h, v15.4h + addp v16.4h, v16.4h, v16.4h + addp v17.4h, v17.4h, v17.4h + mov x6, #34 + dup v18.8h, w6 + smull v22.4s, v14.4h, v18.4h + smull v24.4s, v15.4h, v18.4h + smull v26.4s, v16.4h, v18.4h + smull v28.4s, v17.4h, v18.4h + rshrn v10.4h, v22.4s, #6 + rshrn v12.4h, v24.4s, #6 + rshrn v13.4h, v26.4s, #6 + rshrn v14.4h, v28.4s, #6 + ldrb w6, [x0], #1 + sxtw x6, w6 + add x10, x0, #31 + ldrb w8, [x0], #1 + sxtw x8, w8 + ldrb w7, [x10], #1 + sxtw x7, w7 + ldrb w9, [x10], #1 + sxtw x9, w9 + add x6, x6, x7 + add x8, x8, x9 + lsl x6, x6, #4 + lsl x8, x8, #4 + dup v0.8h, w6 + dup v2.8h, w8 + dup v4.8h, v12.h[0] + dup v6.8h, v10.h[0] + dup v24.8h, v14.h[0] + dup v26.8h, v13.h[0] + zip1 v5.8h, v4.8h, v24.8h + zip2 v24.8h, v4.8h, v24.8h + mov v4.16b, v5.16b + zip1 v7.8h, v6.8h, v26.8h + zip2 v26.8h, v6.8h, v26.8h + mov v6.16b, v7.16b + zip1 v1.8h, v0.8h, v2.8h + zip2 v2.8h, v0.8h, v2.8h + mov v0.16b, v1.16b + + adrp x12, :got:ih264_gai1_intrapred_chroma_plane_coeffs2 + ldr x12, [x12, #:got_lo12:ih264_gai1_intrapred_chroma_plane_coeffs2] + + ld1 {v8.2s, v9.2s}, [x12] + mov v8.d[1], v9.d[0] + mov v10.16b, v8.16b + mov v22.16b, v8.16b + zip1 v9.8h, v8.8h, v10.8h + zip2 v10.8h, v8.8h, v10.8h + mov v8.16b, v9.16b + mul v12.8h, v4.8h , v8.8h + mul v16.8h, v4.8h , v10.8h + add v12.8h, v0.8h , v12.8h + add v16.8h, v0.8h , v16.8h + dup v20.8h, v22.h[0] + mul v4.8h, v6.8h , v20.8h + dup v30.8h, v22.4h[1] + mul v18.8h, v6.8h , v20.8h + mul v14.8h, v6.8h , v30.8h + mul v8.8h, v6.8h , v30.8h + add v24.8h, v12.8h , v4.8h + add v0.8h, v16.8h , v18.8h + add v2.8h, v12.8h , v14.8h + sqrshrun v28.8b, v24.8h, #5 + add v26.8h, v16.8h , v8.8h + sqrshrun v29.8b, v0.8h, #5 + dup v20.8h, v22.4h[2] + st1 {v28.8b, v29.8b}, [x1], x3 + sqrshrun v28.8b, v2.8h, #5 + sqrshrun v29.8b, v26.8h, #5 + mul v4.8h, v6.8h , v20.8h + mul v18.8h, v6.8h , v20.8h + st1 {v28.8b, v29.8b}, [x1], x3 + add v24.8h, v12.8h , v4.8h + add v0.8h, v16.8h , v18.8h + dup v30.8h, v22.4h[3] + sqrshrun v28.8b, v24.8h, #5 + sqrshrun v29.8b, v0.8h, #5 + mul v14.8h, v6.8h , v30.8h + mul v8.8h, v6.8h , v30.8h + st1 {v28.8b, v29.8b}, [x1], x3 + add v2.8h, v12.8h , v14.8h + add v26.8h, v16.8h , v8.8h + dup v20.8h, v22.h[4] + sqrshrun v28.8b, v2.8h, #5 + sqrshrun v29.8b, v26.8h, #5 + mul v4.8h, v6.8h , v20.8h + mul v18.8h, v6.8h , v20.8h + st1 {v28.8b, v29.8b}, [x1], x3 + add v24.8h, v12.8h , v4.8h + add v0.8h, v16.8h , v18.8h + dup v30.8h, v22.h[5] + sqrshrun v28.8b, v24.8h, #5 + sqrshrun v29.8b, v0.8h, #5 + mul v14.8h, v6.8h , v30.8h + mul v8.8h, v6.8h , v30.8h + st1 {v28.8b, v29.8b}, [x1], x3 + add v2.8h, v12.8h , v14.8h + add v26.8h, v16.8h , v8.8h + dup v20.8h, v22.h[6] + sqrshrun v28.8b, v2.8h, #5 + sqrshrun v29.8b, v26.8h, #5 + mul v4.8h, v6.8h , v20.8h + mul v18.8h, v6.8h , v20.8h + st1 {v28.8b, v29.8b}, [x1], x3 + add v24.8h, v12.8h , v4.8h + add v0.8h, v16.8h , v18.8h + dup v30.8h, v22.h[7] + sqrshrun v28.8b, v24.8h, #5 + sqrshrun v29.8b, v0.8h, #5 + mul v14.8h, v6.8h , v30.8h + mul v8.8h, v6.8h , v30.8h + st1 {v28.8b, v29.8b}, [x1], x3 + add v2.8h, v12.8h , v14.8h + add v26.8h, v16.8h , v8.8h + sqrshrun v28.8b, v2.8h, #5 + sqrshrun v29.8b, v26.8h, #5 + st1 {v28.8b, v29.8b}, [x1], x3 + +end_func_plane: + + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + + diff --git a/common/armv8/ih264_intra_pred_luma_16x16_av8.s b/common/armv8/ih264_intra_pred_luma_16x16_av8.s new file mode 100755 index 0000000..a9eb165 --- /dev/null +++ b/common/armv8/ih264_intra_pred_luma_16x16_av8.s @@ -0,0 +1,606 @@ +//****************************************************************************** +//* +//* Copyright (C) 2015 The Android Open Source Project +//* +//* Licensed under the Apache License, Version 2.0 (the "License"); +//* you may not use this file except in compliance with the License. +//* You may obtain a copy of the License at: +//* +//* http://www.apache.org/licenses/LICENSE-2.0 +//* +//* Unless required by applicable law or agreed to in writing, software +//* distributed under the License is distributed on an "AS IS" BASIS, +//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//* See the License for the specific language governing permissions and +//* limitations under the License. +//* +//***************************************************************************** +//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +//*/ +///** +//****************************************************************************** +//* @file +//* ih264_intra_pred_luma_16x16_av8.s +//* +//* @brief +//* Contains function definitions for intra 16x16 Luma prediction . +//* +//* @author +//* Ittiam +//* +//* @par List of Functions: +//* +//* - ih264_intra_pred_luma_16x16_mode_vert_av8() +//* - ih264_intra_pred_luma_16x16_mode_horz_av8() +//* - ih264_intra_pred_luma_16x16_mode_dc_av8() +//* - ih264_intra_pred_luma_16x16_mode_plane_av8() +//* +//* @remarks +//* None +//* +//******************************************************************************* +//*/ + +///* All the functions here are replicated from ih264_intra_pred_filters.c +// + +///** +///** +///** +// + + +.text +.p2align 2 +.include "ih264_neon_macros.s" +.extern ih264_gai1_intrapred_luma_plane_coeffs + + + +///** +//******************************************************************************* +//* +//*ih264_intra_pred_luma_16x16_mode_vert +//* +//* @brief +//* Perform Intra prediction for luma_16x16 mode:vertical +//* +//* @par Description: +//* Perform Intra prediction for luma_16x16 mode:Vertical ,described in sec 8.3.3.1 +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[out] pu1_dst +//* UWORD8 pointer to the destination +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride +//* +//* @param[in] ui_neighboravailability +//* availability of neighbouring pixels(Not used in this function) +//* +//* @returns +//* +//* @remarks +//* None +//* +//******************************************************************************* +//void ih264_intra_pred_luma_16x16_mode_vert(UWORD8 *pu1_src, +// UWORD8 *pu1_dst, +// WORD32 src_strd, +// WORD32 dst_strd, +// WORD32 ui_neighboravailability) + +//**************Variables Vs Registers***************************************** +// x0 => *pu1_src +// x1 => *pu1_dst +// x2 => src_strd +// x3 => dst_strd +// x4 => ui_neighboravailability + + + .global ih264_intra_pred_luma_16x16_mode_vert_av8 + +ih264_intra_pred_luma_16x16_mode_vert_av8: + + push_v_regs + + + add x0, x0, #17 + ld1 {v0.8b, v1.8b}, [x0] + + st1 {v0.8b, v1.8b}, [x1], x3 + st1 {v0.8b, v1.8b}, [x1], x3 + st1 {v0.8b, v1.8b}, [x1], x3 + st1 {v0.8b, v1.8b}, [x1], x3 + st1 {v0.8b, v1.8b}, [x1], x3 + st1 {v0.8b, v1.8b}, [x1], x3 + st1 {v0.8b, v1.8b}, [x1], x3 + st1 {v0.8b, v1.8b}, [x1], x3 + st1 {v0.8b, v1.8b}, [x1], x3 + st1 {v0.8b, v1.8b}, [x1], x3 + st1 {v0.8b, v1.8b}, [x1], x3 + st1 {v0.8b, v1.8b}, [x1], x3 + st1 {v0.8b, v1.8b}, [x1], x3 + st1 {v0.8b, v1.8b}, [x1], x3 + st1 {v0.8b, v1.8b}, [x1], x3 + st1 {v0.8b, v1.8b}, [x1], x3 + + pop_v_regs + ret + + + + + +///****************************************************************************** + + +///** +//******************************************************************************* +//* +//*ih264_intra_pred_luma_16x16_mode_horz +//* +//* @brief +//* Perform Intra prediction for luma_16x16 mode:horizontal +//* +//* @par Description: +//* Perform Intra prediction for luma_16x16 mode:horizontal ,described in sec 8.3.3.2 +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[out] pu1_dst +//* UWORD8 pointer to the destination +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride +//* +//* @param[in] ui_neighboravailability +//* availability of neighbouring pixels(Not used in this function) +//* +//* @returns +//* +//* @remarks +//* None +//* +//******************************************************************************* +//*/ +//void ih264_intra_pred_luma_16x16_mode_horz(UWORD8 *pu1_src, +// UWORD8 *pu1_dst, +// WORD32 src_strd, +// WORD32 dst_strd, +// WORD32 ui_neighboravailability) +//**************Variables Vs Registers***************************************** +// x0 => *pu1_src +// x1 => *pu1_dst +// x2 => src_strd +// x3 => dst_strd +// x4 => ui_neighboravailability + + .global ih264_intra_pred_luma_16x16_mode_horz_av8 + +ih264_intra_pred_luma_16x16_mode_horz_av8: + + + + push_v_regs + + ld1 {v0.16b}, [x0] + + + + dup v10.16b, v0.b[15] + dup v11.16b, v0.b[14] + dup v12.16b, v0.b[13] + dup v13.16b, v0.b[12] + st1 {v10.16b}, [x1], x3 + dup v14.16b, v0.b[11] + st1 {v11.16b}, [x1], x3 + dup v15.16b, v0.b[10] + st1 {v12.16b}, [x1], x3 + dup v16.16b, v0.b[9] + st1 {v13.16b}, [x1], x3 + dup v17.16b, v0.b[8] + st1 {v14.16b}, [x1], x3 + dup v18.16b, v0.b[7] + st1 {v15.16b}, [x1], x3 + dup v19.16b, v0.b[6] + st1 {v16.16b}, [x1], x3 + dup v20.16b, v0.b[5] + st1 {v17.16b}, [x1], x3 + dup v21.16b, v0.b[4] + st1 {v18.16b}, [x1], x3 + dup v22.16b, v0.b[3] + st1 {v19.16b}, [x1], x3 + dup v23.16b, v0.b[2] + st1 {v20.16b}, [x1], x3 + dup v24.16b, v0.b[1] + st1 {v21.16b}, [x1], x3 + dup v25.16b, v0.b[0] + st1 {v22.16b}, [x1], x3 + st1 {v23.16b}, [x1], x3 + st1 {v24.16b}, [x1], x3 + st1 {v25.16b}, [x1], x3 + + pop_v_regs + ret + + + + + + + +///****************************************************************************** + + +///** +//******************************************************************************* +//* +//*ih264_intra_pred_luma_16x16_mode_dc +//* +//* @brief +//* Perform Intra prediction for luma_16x16 mode:DC +//* +//* @par Description: +//* Perform Intra prediction for luma_16x16 mode:DC ,described in sec 8.3.3.3 +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[out] pu1_dst +//* UWORD8 pointer to the destination +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride +//* +//* @param[in] ui_neighboravailability +//* availability of neighbouring pixels +//* +//* @returns +//* +//* @remarks +//* None +//* +//*******************************************************************************/ +//void ih264_intra_pred_luma_16x16_mode_dc(UWORD8 *pu1_src, +// UWORD8 *pu1_dst, +// WORD32 src_strd, +// WORD32 dst_strd, +// WORD32 ui_neighboravailability) + +//**************Variables Vs Registers***************************************** +// x0 => *pu1_src +// x1 => *pu1_dst +// x2 => src_strd +// x3 => dst_strd +// x4 => ui_neighboravailability + + .global ih264_intra_pred_luma_16x16_mode_dc_av8 + +ih264_intra_pred_luma_16x16_mode_dc_av8: + + + + push_v_regs + stp x19, x20, [sp, #-16]! + + sub v0.16b, v0.16b, v0.16b + sub v1.16b, v1.16b, v1.16b + mov w10, #0 + mov w11 , #3 + ands x6, x4, #0x01 + beq top_available //LEFT NOT AVAILABLE + ld1 {v0.16b}, [x0] + add w10, w10, #8 + add w11, w11, #1 +top_available: + ands x6, x4, #0x04 + beq none_available + add x6, x0, #17 + ld1 {v1.16b}, [x6] + add w10, w10, #8 + add w11, w11, #1 + b summation +none_available: + cmp x4, #0 + bne summation + mov w15, #128 + dup v20.16b, w15 + b store +summation: + uaddl v2.8h, v0.8b, v1.8b + uaddl2 v3.8h, v0.16b, v1.16b + dup v10.8h, w10 + neg w11, w11 + dup v20.8h, w11 + add v0.8h, v2.8h, v3.8h + mov v1.d[0], v0.d[1] + add v0.4h, v0.4h, v1.4h + addp v0.4h, v0.4h , v0.4h + addp v0.4h, v0.4h , v0.4h + add v0.4h, v0.4h, v10.4h + uqshl v0.8h, v0.8h, v20.8h + sqxtun v0.8b, v0.8h + dup v20.16b, v0.b[0] + +store: + + st1 { v20.16b}, [x1], x3 + st1 { v20.16b}, [x1], x3 + st1 { v20.16b}, [x1], x3 + st1 { v20.16b}, [x1], x3 + st1 { v20.16b}, [x1], x3 + st1 { v20.16b}, [x1], x3 + st1 { v20.16b}, [x1], x3 + st1 { v20.16b}, [x1], x3 + st1 { v20.16b}, [x1], x3 + st1 { v20.16b}, [x1], x3 + st1 { v20.16b}, [x1], x3 + st1 { v20.16b}, [x1], x3 + st1 { v20.16b}, [x1], x3 + st1 { v20.16b}, [x1], x3 + st1 { v20.16b}, [x1], x3 + st1 { v20.16b}, [x1], x3 + + + +end_func: + + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + + + + +///****************************************************************************** + + +///** +//******************************************************************************* +//* +//*ih264_intra_pred_luma_16x16_mode_plane +//* +//* @brief +//* Perform Intra prediction for luma_16x16 mode:PLANE +//* +//* @par Description: +//* Perform Intra prediction for luma_16x16 mode:PLANE ,described in sec 8.3.3.4 +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[out] pu1_dst +//* UWORD8 pointer to the destination +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride +//* +//* @param[in] ui_neighboravailability +//* availability of neighbouring pixels +//* +//* @returns +//* +//* @remarks +//* None +//* +//*******************************************************************************/ +//void ih264_intra_pred_luma_16x16_mode_plane(UWORD8 *pu1_src, +// UWORD8 *pu1_dst, +// WORD32 src_strd, +// WORD32 dst_strd, +// WORD32 ui_neighboravailability) + +//**************Variables Vs Registers***************************************** +// x0 => *pu1_src +// x1 => *pu1_dst +// x2 => src_strd +// x3 => dst_strd +// x4 => ui_neighboravailability + + .global ih264_intra_pred_luma_16x16_mode_plane_av8 +ih264_intra_pred_luma_16x16_mode_plane_av8: + + push_v_regs + stp x19, x20, [sp, #-16]! + mov x2, x1 + add x1, x0, #17 + add x0, x0, #15 + mov x8, #9 + sub x1, x1, #1 + mov x10, x1 //top_left + mov x4, #-1 + ld1 {v2.2s}, [x1], x8 + + adrp x7, :got:ih264_gai1_intrapred_luma_plane_coeffs + ldr x7, [x7, #:got_lo12:ih264_gai1_intrapred_luma_plane_coeffs] + + ld1 {v0.2s}, [x1] + rev64 v2.8b, v2.8b + ld1 {v6.2s, v7.2s}, [x7] + usubl v0.8h, v0.8b, v2.8b + uxtl v16.8h, v6.8b + mul v0.8h, v0.8h , v16.8h + uxtl v18.8h, v7.8b + add x7, x0, x4, lsl #3 + sub x0, x7, x4, lsl #1 + sub x20, x4, #0x0 + neg x14, x20 + addp v0.8h, v0.8h, v1.8h + ldrb w8, [x7], #-1 + sxtw x8, w8 + ldrb w9, [x0], #1 + sxtw x9, w9 + saddlp v0.2s, v0.4h + sub x12, x8, x9 + ldrb w8, [x7], #-1 + sxtw x8, w8 + saddlp v0.1d, v0.2s + ldrb w9, [x0], #1 + sxtw x9, w9 + sub x8, x8, x9 + shl v2.2s, v0.2s, #2 + add x12, x12, x8, lsl #1 + add v0.2s, v0.2s , v2.2s + ldrb w8, [x7], #-1 + sxtw x8, w8 + ldrb w9, [x0], #1 + sxtw x9, w9 + srshr v0.2s, v0.2s, #6 // i_b = D0[0] + sub x8, x8, x9 + ldrb w5, [x7], #-1 + sxtw x5, w5 + add x8, x8, x8, lsl #1 + dup v4.8h, v0.4h[0] + add x12, x12, x8 + ldrb w9, [x0], #1 + sxtw x9, w9 + mul v0.8h, v4.8h , v16.8h + sub x5, x5, x9 + mul v2.8h, v4.8h , v18.8h + add x12, x12, x5, lsl #2 + ldrb w8, [x7], #-1 + sxtw x8, w8 + ldrb w9, [x0], #1 + sxtw x9, w9 + sub x8, x8, x9 + ldrb w5, [x7], #-1 + sxtw x5, w5 + add x8, x8, x8, lsl #2 + ldrb w6, [x0], #1 + sxtw x6, w6 + add x12, x12, x8 + ldrb w8, [x7], #-1 + sxtw x8, w8 + ldrb w9, [x0], #1 + sxtw x9, w9 + sub x5, x5, x6 + sub x8, x8, x9 + add x5, x5, x5, lsl #1 + sub x20, x8, x8, lsl #3 + neg x8, x20 + add x12, x12, x5, lsl #1 + ldrb w5, [x7], #-1 + sxtw x5, w5 + ldrb w6, [x10] //top_left + sxtw x6, w6 + add x12, x12, x8 + sub x9, x5, x6 + ldrb w6, [x1, #7] + sxtw x6, w6 + add x12, x12, x9, lsl #3 // i_c = x12 + add x8, x5, x6 + add x12, x12, x12, lsl #2 + lsl x8, x8, #4 // i_a = x8 + add x12, x12, #0x20 + lsr x12, x12, #6 + shl v28.8h, v4.8h, #3 + dup v6.8h, w12 + dup v30.8h, w8 + shl v26.8h, v6.8h, #3 + sub v30.8h, v30.8h , v28.8h + sub v30.8h, v30.8h , v26.8h + add v28.8h, v30.8h , v6.8h + add v26.8h, v28.8h , v0.8h + add v28.8h, v28.8h , v2.8h + sqrshrun v20.8b, v26.8h, #5 + sqrshrun v21.8b, v28.8h, #5 + add v26.8h, v26.8h , v6.8h + add v28.8h, v28.8h , v6.8h + sqrshrun v22.8b, v26.8h, #5 + st1 {v20.2s, v21.2s}, [x2], x3 + sqrshrun v23.8b, v28.8h, #5 + add v26.8h, v26.8h , v6.8h + add v28.8h, v28.8h , v6.8h + sqrshrun v20.8b, v26.8h, #5 + st1 {v22.2s, v23.2s}, [x2], x3 + sqrshrun v21.8b, v28.8h, #5 + add v26.8h, v26.8h , v6.8h + add v28.8h, v28.8h , v6.8h + sqrshrun v22.8b, v26.8h, #5 + st1 {v20.2s, v21.2s}, [x2], x3 + sqrshrun v23.8b, v28.8h, #5 + add v26.8h, v26.8h , v6.8h + add v28.8h, v28.8h , v6.8h + sqrshrun v20.8b, v26.8h, #5 + st1 {v22.2s, v23.2s}, [x2], x3 + sqrshrun v21.8b, v28.8h, #5 + add v26.8h, v26.8h , v6.8h + add v28.8h, v28.8h , v6.8h + sqrshrun v22.8b, v26.8h, #5 + st1 {v20.2s, v21.2s}, [x2], x3 + sqrshrun v23.8b, v28.8h, #5 + add v26.8h, v26.8h , v6.8h + add v28.8h, v28.8h , v6.8h + sqrshrun v20.8b, v26.8h, #5 + st1 {v22.2s, v23.2s}, [x2], x3 + sqrshrun v21.8b, v28.8h, #5 + add v26.8h, v26.8h , v6.8h + add v28.8h, v28.8h , v6.8h + sqrshrun v22.8b, v26.8h, #5 + st1 {v20.2s, v21.2s}, [x2], x3 + sqrshrun v23.8b, v28.8h, #5 + add v26.8h, v26.8h , v6.8h + add v28.8h, v28.8h , v6.8h + sqrshrun v20.8b, v26.8h, #5 + st1 {v22.2s, v23.2s}, [x2], x3 + sqrshrun v21.8b, v28.8h, #5 + add v26.8h, v26.8h , v6.8h + add v28.8h, v28.8h , v6.8h + sqrshrun v22.8b, v26.8h, #5 + st1 {v20.2s, v21.2s}, [x2], x3 + sqrshrun v23.8b, v28.8h, #5 + add v26.8h, v26.8h , v6.8h + add v28.8h, v28.8h , v6.8h + sqrshrun v20.8b, v26.8h, #5 + st1 {v22.2s, v23.2s}, [x2], x3 + sqrshrun v21.8b, v28.8h, #5 + add v26.8h, v26.8h , v6.8h + add v28.8h, v28.8h , v6.8h + sqrshrun v22.8b, v26.8h, #5 + st1 {v20.2s, v21.2s}, [x2], x3 + sqrshrun v23.8b, v28.8h, #5 + add v26.8h, v26.8h , v6.8h + add v28.8h, v28.8h , v6.8h + sqrshrun v20.8b, v26.8h, #5 + st1 {v22.2s, v23.2s}, [x2], x3 + sqrshrun v21.8b, v28.8h, #5 + add v26.8h, v26.8h , v6.8h + add v28.8h, v28.8h , v6.8h + sqrshrun v22.8b, v26.8h, #5 + st1 {v20.2s, v21.2s}, [x2], x3 + sqrshrun v23.8b, v28.8h, #5 + add v26.8h, v26.8h , v6.8h + add v28.8h, v28.8h , v6.8h + sqrshrun v20.8b, v26.8h, #5 + st1 {v22.2s, v23.2s}, [x2], x3 + sqrshrun v21.8b, v28.8h, #5 + add v26.8h, v26.8h , v6.8h + add v28.8h, v28.8h , v6.8h + sqrshrun v22.8b, v26.8h, #5 + st1 {v20.2s, v21.2s}, [x2], x3 + sqrshrun v23.8b, v28.8h, #5 + st1 {v22.2s, v23.2s}, [x2], x3 + +end_func_plane: + + ldp x19, x20, [sp], #16 + pop_v_regs + ret + diff --git a/common/armv8/ih264_intra_pred_luma_4x4_av8.s b/common/armv8/ih264_intra_pred_luma_4x4_av8.s new file mode 100755 index 0000000..62e8cee --- /dev/null +++ b/common/armv8/ih264_intra_pred_luma_4x4_av8.s @@ -0,0 +1,876 @@ +//****************************************************************************** +//* +//* Copyright (C) 2015 The Android Open Source Project +//* +//* Licensed under the Apache License, Version 2.0 (the "License"); +//* you may not use this file except in compliance with the License. +//* You may obtain a copy of the License at: +//* +//* http://www.apache.org/licenses/LICENSE-2.0 +//* +//* Unless required by applicable law or agreed to in writing, software +//* distributed under the License is distributed on an "AS IS" BASIS, +//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//* See the License for the specific language governing permissions and +//* limitations under the License. +//* +//***************************************************************************** +//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +//*/ +///** +//****************************************************************************** +//* @file +//* ih264_intra_pred_luma_4x4_av8.s +//* +//* @brief +//* Contains function definitions for intra 4x4 Luma prediction . +//* +//* @author +//* Ittiam +//* +//* @par List of Functions: +//* +//* -ih264_intra_pred_luma_4x4_mode_vert_av8 +//* -ih264_intra_pred_luma_4x4_mode_horz_av8 +//* -ih264_intra_pred_luma_4x4_mode_dc_av8 +//* -ih264_intra_pred_luma_4x4_mode_diag_dl_av8 +//* -ih264_intra_pred_luma_4x4_mode_diag_dr_av8 +//* -ih264_intra_pred_luma_4x4_mode_vert_r_av8 +//* -ih264_intra_pred_luma_4x4_mode_horz_d_av8 +//* -ih264_intra_pred_luma_4x4_mode_vert_l_av8 +//* -ih264_intra_pred_luma_4x4_mode_horz_u_av8 +//* +//* @remarks +//* None +//* +//******************************************************************************* +//*/ + +///* All the functions here are replicated from ih264_intra_pred_filters.c +// + +///** +///** +///** +// + +.text +.p2align 2 +.include "ih264_neon_macros.s" + + + + +///** +//******************************************************************************* +//* +//*ih264_intra_pred_luma_4x4_mode_vert +//* +//* @brief +//* Perform Intra prediction for luma_4x4 mode:vertical +//* +//* @par Description: +//* Perform Intra prediction for luma_4x4 mode:vertical ,described in sec 8.3.1.2.1 +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[out] pu1_dst +//* UWORD8 pointer to the destination +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride +//* +//* @param[in] ui_neighboravailability +//* availability of neighbouring pixels(Not used in this function) +//* +//* @returns +//* +//* @remarks +//* None +//* +//******************************************************************************* +//void ih264_intra_pred_luma_4x4_mode_vert(UWORD8 *pu1_src, +// UWORD8 *pu1_dst, +// WORD32 src_strd, +// WORD32 dst_strd, +// WORD32 ui_neighboravailability) + +//**************Variables Vs Registers***************************************** +// x0 => *pu1_src +// x1 => *pu1_dst +// x2 => src_strd +// x3 => dst_strd +// x4 => ui_neighboravailability + + .global ih264_intra_pred_luma_4x4_mode_vert_av8 + +ih264_intra_pred_luma_4x4_mode_vert_av8: + + push_v_regs + + add x0, x0, #5 + + ld1 {v0.s}[0], [x0] + st1 {v0.s}[0], [x1], x3 + st1 {v0.s}[0], [x1], x3 + st1 {v0.s}[0], [x1], x3 + st1 {v0.s}[0], [x1], x3 + + pop_v_regs + ret + + + + + +///****************************************************************************** + + +///** +//******************************************************************************* +//* +//*ih264_intra_pred_luma_4x4_mode_horz +//* +//* @brief +//* Perform Intra prediction for luma_4x4 mode:horizontal +//* +//* @par Description: +//* Perform Intra prediction for luma_4x4 mode:horizontal ,described in sec 8.3.1.2.2 +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[out] pu1_dst +//* UWORD8 pointer to the destination +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride +//* +//* @param[in] ui_neighboravailability +//* availability of neighbouring pixels(Not used in this function) +//* +//* @returns +//* +//* @remarks +//* None +//* +//******************************************************************************* +//*/ +//void ih264_intra_pred_luma_4x4_mode_horz(UWORD8 *pu1_src, +// UWORD8 *pu1_dst, +// WORD32 src_strd, +// WORD32 dst_strd, +// WORD32 ui_neighboravailability) +//**************Variables Vs Registers***************************************** +// x0 => *pu1_src +// x1 => *pu1_dst +// x2 => src_strd +// x3 => dst_strd +// x4 => ui_neighboravailability + + + + .global ih264_intra_pred_luma_4x4_mode_horz_av8 + +ih264_intra_pred_luma_4x4_mode_horz_av8: + + push_v_regs + + ld1 {v1.s}[0], [x0] + dup v0.8b, v1.b[3] + dup v2.8b, v1.b[2] + st1 {v0.s}[0], [x1], x3 + dup v3.8b, v1.b[1] + st1 {v2.s}[0], [x1], x3 + dup v4.8b, v1.b[0] + st1 {v3.s}[0], [x1], x3 + st1 {v4.s}[0], [x1], x3 + + pop_v_regs + ret + + + + + + + +///****************************************************************************** + + +///** +//******************************************************************************* +//* +//*ih264_intra_pred_luma_4x4_mode_dc +//* +//* @brief +//* Perform Intra prediction for luma_4x4 mode:DC +//* +//* @par Description: +//* Perform Intra prediction for luma_4x4 mode:DC ,described in sec 8.3.1.2.3 +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[out] pu1_dst +//* UWORD8 pointer to the destination +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride +//* +//* @param[in] ui_neighboravailability +//* availability of neighbouring pixels +//* +//* @returns +//* +//* @remarks +//* None +//* +//*******************************************************************************/ +//void ih264_intra_pred_luma_4x4_mode_dc(UWORD8 *pu1_src, +// UWORD8 *pu1_dst, +// WORD32 src_strd, +// WORD32 dst_strd, +// WORD32 ui_neighboravailability) + +//**************Variables Vs Registers***************************************** +// x0 => *pu1_src +// x1 => *pu1_dst +// x2 => src_strd +// x3 => dst_strd +// x4 => ui_neighboravailability + + + + .global ih264_intra_pred_luma_4x4_mode_dc_av8 + +ih264_intra_pred_luma_4x4_mode_dc_av8: + + + + + push_v_regs + stp x19, x20, [sp, #-16]! + + ands x5, x4, #0x01 + beq top_available //LEFT NOT AVAILABLE + + add x10, x0, #3 + mov x2, #-1 + ldrb w5, [x10], #-1 + sxtw x5, w5 + ldrb w6, [x10], #-1 + sxtw x6, w6 + ldrb w7, [x10], #-1 + sxtw x7, w7 + add x5, x5, x6 + ldrb w8, [x10], #-1 + sxtw x8, w8 + add x5, x5, x7 + ands x11, x4, #0x04 // CHECKING IF TOP_AVAILABLE ELSE BRANCHING TO ONLY LEFT AVAILABLE + add x5, x5, x8 + beq left_available + add x10, x0, #5 + // BOTH LEFT AND TOP AVAILABLE + ldrb w6, [x10], #1 + sxtw x6, w6 + ldrb w7, [x10], #1 + sxtw x7, w7 + add x5, x5, x6 + ldrb w8, [x10], #1 + sxtw x8, w8 + add x5, x5, x7 + ldrb w9, [x10], #1 + sxtw x9, w9 + add x5, x5, x8 + add x5, x5, x9 + add x5, x5, #4 + lsr x5, x5, #3 + dup v0.8b, w5 + st1 {v0.s}[0], [x1], x3 + st1 {v0.s}[0], [x1], x3 + st1 {v0.s}[0], [x1], x3 + st1 {v0.s}[0], [x1], x3 + b end_func + +top_available: // ONLT TOP AVAILABLE + ands x11, x4, #0x04 // CHECKING TOP AVAILABILTY OR ELSE BRANCH TO NONE AVAILABLE + beq none_available + + add x10, x0, #5 + ldrb w6, [x10], #1 + sxtw x6, w6 + ldrb w7, [x10], #1 + sxtw x7, w7 + ldrb w8, [x10], #1 + sxtw x8, w8 + add x5, x6, x7 + ldrb w9, [x10], #1 + sxtw x9, w9 + add x5, x5, x8 + add x5, x5, x9 + add x5, x5, #2 + lsr x5, x5, #2 + dup v0.8b, w5 + st1 {v0.s}[0], [x1], x3 + st1 {v0.s}[0], [x1], x3 + st1 {v0.s}[0], [x1], x3 + st1 {v0.s}[0], [x1], x3 + b end_func + +left_available: //ONLY LEFT AVAILABLE + add x5, x5, #2 + lsr x5, x5, #2 + dup v0.8b, w5 + st1 {v0.s}[0], [x1], x3 + st1 {v0.s}[0], [x1], x3 + st1 {v0.s}[0], [x1], x3 + st1 {v0.s}[0], [x1], x3 + b end_func + +none_available: //NONE AVAILABLE + mov x5, #128 + dup v0.8b, w5 + st1 {v0.s}[0], [x1], x3 + st1 {v0.s}[0], [x1], x3 + st1 {v0.s}[0], [x1], x3 + st1 {v0.s}[0], [x1], x3 + b end_func + + +end_func: + + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + + + + + + +///** +//******************************************************************************* +//* +//*ih264_intra_pred_luma_4x4_mode_diag_dl +//* +//* @brief +//* Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Left +//* +//* @par Description: +//* Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Left ,described in sec 8.3.1.2.4 +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[out] pu1_dst +//* UWORD8 pointer to the destination +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride +//* +//* @param[in] ui_neighboravailability +//* availability of neighbouring pixels +//* +//* @returns +//* +//* @remarks +//* None +//* +//*******************************************************************************/ +//void ih264_intra_pred_luma_4x4_mode_diag_dl(UWORD8 *pu1_src, +// UWORD8 *pu1_dst, +// WORD32 src_strd, +// WORD32 dst_strd, +// WORD32 ui_neighboravailability) + +//**************Variables Vs Registers***************************************** +// x0 => *pu1_src +// x1 => *pu1_dst +// x2 => src_strd +// x3 => dst_strd +// x4 => ui_neighboravailability + + + .global ih264_intra_pred_luma_4x4_mode_diag_dl_av8 + +ih264_intra_pred_luma_4x4_mode_diag_dl_av8: + + + push_v_regs + stp x19, x20, [sp, #-16]! + + add x0, x0, #5 + sub x5, x3, #2 + add x6, x0, #7 + ld1 {v0.8b}, [x0] + ext v1.8b, v0.8b , v0.8b , #1 + ext v2.8b, v0.8b , v0.8b , #2 + ld1 {v2.b}[6], [x6] + uaddl v20.8h, v0.8b, v1.8b + uaddl v22.8h, v1.8b, v2.8b + add v24.8h, v20.8h , v22.8h + sqrshrun v3.8b, v24.8h, #2 + st1 {v3.s}[0], [x1], x3 + ext v4.8b, v3.8b , v3.8b , #1 + st1 {v4.s}[0], [x1], x3 + st1 {v3.h}[1], [x1], #2 + st1 {v3.h}[2], [x1], x5 + st1 {v4.h}[1], [x1], #2 + st1 {v4.h}[2], [x1] + +end_func_diag_dl: + + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + + + + + + + + +///** +//******************************************************************************* +//* +//*ih264_intra_pred_luma_4x4_mode_diag_dr +//* +//* @brief +//* Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Right +//* +//* @par Description: +//* Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Right ,described in sec 8.3.1.2.5 +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[out] pu1_dst +//* UWORD8 pointer to the destination +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride +//* +//* @param[in] ui_neighboravailability +//* availability of neighbouring pixels +//* +//* @returns +//* +//* @remarks +//* None +//* +//*******************************************************************************/ +//void ih264_intra_pred_luma_4x4_mode_diag_dr(UWORD8 *pu1_src, +// UWORD8 *pu1_dst, +// WORD32 src_strd, +// WORD32 dst_strd, +// WORD32 ui_neighboravailability) + +//**************Variables Vs Registers***************************************** +// x0 => *pu1_src +// x1 => *pu1_dst +// x2 => src_strd +// x3 => dst_strd +// x4 => ui_neighboravailability + + + .global ih264_intra_pred_luma_4x4_mode_diag_dr_av8 + +ih264_intra_pred_luma_4x4_mode_diag_dr_av8: + + push_v_regs + stp x19, x20, [sp, #-16]! + + + ld1 {v0.8b}, [x0] + add x0, x0, #1 + ld1 {v1.8b}, [x0] + ext v2.8b, v1.8b , v1.8b , #1 + uaddl v20.8h, v0.8b, v1.8b + uaddl v22.8h, v1.8b, v2.8b + add v24.8h, v20.8h , v22.8h + sqrshrun v3.8b, v24.8h, #2 + + ext v4.8b, v3.8b , v3.8b , #1 + sub x5, x3, #2 + st1 {v4.h}[1], [x1], #2 + st1 {v4.h}[2], [x1], x5 + st1 {v3.h}[1], [x1], #2 + st1 {v3.h}[2], [x1], x5 + st1 {v4.s}[0], [x1], x3 + st1 {v3.s}[0], [x1], x3 + +end_func_diag_dr: + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + + + + + + +///** +//******************************************************************************* +//* +//*ih264_intra_pred_luma_4x4_mode_vert_r +//* +//* @brief +//* Perform Intra prediction for luma_4x4 mode:Vertical_Right +//* +//* @par Description: +//* Perform Intra prediction for luma_4x4 mode:Vertical_Right ,described in sec 8.3.1.2.6 +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[out] pu1_dst +//* UWORD8 pointer to the destination +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride +//* +//* @param[in] ui_neighboravailability +//* availability of neighbouring pixels +//* +//* @returns +//* +//* @remarks +//* None +//* +//*******************************************************************************/ +//void ih264_intra_pred_luma_4x4_mode_vert_r(UWORD8 *pu1_src, +// UWORD8 *pu1_dst, +// WORD32 src_strd, +// WORD32 dst_strd, +// WORD32 ui_neighboravailability) + +//**************Variables Vs Registers***************************************** +// x0 => *pu1_src +// x1 => *pu1_dst +// x2 => src_strd +// x3 => dst_strd +// x4 => ui_neighboravailability + + + .global ih264_intra_pred_luma_4x4_mode_vert_r_av8 + +ih264_intra_pred_luma_4x4_mode_vert_r_av8: + + push_v_regs + stp x19, x20, [sp, #-16]! + + + ld1 {v0.8b}, [x0] + add x0, x0, #1 + ld1 {v1.8b}, [x0] + ext v2.8b, v1.8b , v1.8b , #1 + uaddl v20.8h, v0.8b, v1.8b + uaddl v22.8h, v1.8b, v2.8b + add v24.8h, v20.8h , v22.8h + sqrshrun v4.8b, v20.8h, #1 + sqrshrun v3.8b, v24.8h, #2 + sub x5, x3, #2 + ext v5.8b, v3.8b , v3.8b , #3 + st1 {v4.s}[1], [x1], x3 + st1 {v5.s}[0], [x1], x3 + sub x8, x3, #3 + st1 {v3.b}[2], [x1], #1 + st1 {v4.h}[2], [x1], #2 + st1 {v4.b}[6], [x1], x8 + st1 {v3.b}[1], [x1], #1 + st1 {v5.h}[0], [x1], #2 + st1 {v5.b}[2], [x1] + + +end_func_vert_r: + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + + + + +///** +//******************************************************************************* +//* +//*ih264_intra_pred_luma_4x4_mode_horz_d +//* +//* @brief +//* Perform Intra prediction for luma_4x4 mode:Horizontal_Down +//* +//* @par Description: +//* Perform Intra prediction for luma_4x4 mode:Horizontal_Down ,described in sec 8.3.1.2.7 +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[out] pu1_dst +//* UWORD8 pointer to the destination +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride +//* +//* @param[in] ui_neighboravailability +//* availability of neighbouring pixels +//* +//* @returns +//* +//* @remarks +//* None +//* +//*******************************************************************************/ +//void ih264_intra_pred_luma_4x4_mode_horz_d(UWORD8 *pu1_src, +// UWORD8 *pu1_dst, +// WORD32 src_strd, +// WORD32 dst_strd, +// WORD32 ui_neighboravailability) + +//**************Variables Vs Registers***************************************** +// x0 => *pu1_src +// x1 => *pu1_dst +// x2 => src_strd +// x3 => dst_strd +// x4 => ui_neighboravailability + + + .global ih264_intra_pred_luma_4x4_mode_horz_d_av8 + +ih264_intra_pred_luma_4x4_mode_horz_d_av8: + + push_v_regs + stp x19, x20, [sp, #-16]! + + ld1 {v0.8b}, [x0] + add x0, x0, #1 + ld1 {v1.8b}, [x0] + ext v2.8b, v1.8b , v0.8b , #1 + uaddl v20.8h, v0.8b, v1.8b + uaddl v22.8h, v1.8b, v2.8b + add v24.8h, v20.8h , v22.8h + sqrshrun v4.8b, v20.8h, #1 + sqrshrun v5.8b, v24.8h, #2 + sub x5, x3, #2 + mov v6.8b, v5.8b + trn1 v10.8b, v4.8b, v5.8b + trn2 v5.8b, v4.8b, v5.8b // + mov v4.8b, v10.8b + st1 {v5.h}[1], [x1], #2 + st1 {v6.h}[2], [x1], x5 + st1 {v4.h}[1], [x1], #2 + st1 {v5.h}[1], [x1], x5 + st1 {v5.h}[0], [x1], #2 + st1 {v4.h}[1], [x1], x5 + st1 {v4.h}[0], [x1], #2 + st1 {v5.h}[0], [x1], x5 + +end_func_horz_d: + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + + + + + + +///** +//******************************************************************************* +//* +//*ih264_intra_pred_luma_4x4_mode_vert_l +//* +//* @brief +//* Perform Intra prediction for luma_4x4 mode:Vertical_Left +//* +//* @par Description: +//* Perform Intra prediction for luma_4x4 mode:Vertical_Left ,described in sec 8.3.1.2.8 +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[out] pu1_dst +//* UWORD8 pointer to the destination +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride +//* +//* @param[in] ui_neighboravailability +//* availability of neighbouring pixels +//* +//* @returns +//* +//* @remarks +//* None +//* +//*******************************************************************************/ +//void ih264_intra_pred_luma_4x4_mode_vert_l(UWORD8 *pu1_src, +// UWORD8 *pu1_dst, +// WORD32 src_strd, +// WORD32 dst_strd, +// WORD32 ui_neighboravailability) + +//**************Variables Vs Registers***************************************** +// x0 => *pu1_src +// x1 => *pu1_dst +// x2 => src_strd +// x3 => dst_strd +// x4 => ui_neighboravailability + + + .global ih264_intra_pred_luma_4x4_mode_vert_l_av8 + +ih264_intra_pred_luma_4x4_mode_vert_l_av8: + + push_v_regs + stp x19, x20, [sp, #-16]! + add x0, x0, #4 + ld1 {v0.8b}, [x0] + add x0, x0, #1 + ld1 {v1.8b}, [x0] + ext v2.8b, v1.8b , v0.8b , #1 + uaddl v20.8h, v0.8b, v1.8b + uaddl v22.8h, v1.8b, v2.8b + add v24.8h, v20.8h , v22.8h + sqrshrun v4.8b, v20.8h, #1 + sqrshrun v5.8b, v24.8h, #2 + ext v6.8b, v4.8b , v4.8b , #1 + ext v7.8b, v5.8b , v5.8b , #1 + st1 {v6.s}[0], [x1], x3 + ext v8.8b, v4.8b , v4.8b , #2 + ext v9.8b, v5.8b , v5.8b , #2 + st1 {v7.s}[0], [x1], x3 + st1 {v8.s}[0], [x1], x3 + st1 {v9.s}[0], [x1], x3 + +end_func_vert_l: + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + + + + + + +///** +//******************************************************************************* +//* +//*ih264_intra_pred_luma_4x4_mode_horz_u +//* +//* @brief +//* Perform Intra prediction for luma_4x4 mode:Horizontal_Up +//* +//* @par Description: +//* Perform Intra prediction for luma_4x4 mode:Horizontal_Up ,described in sec 8.3.1.2.9 +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[out] pu1_dst +//* UWORD8 pointer to the destination +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride +//* +//* @param[in] ui_neighboravailability +//* availability of neighbouring pixels +//* +//* @returns +//* +//* @remarks +//* None +//* +//*******************************************************************************/ +//void ih264_intra_pred_luma_4x4_mode_horz_u(UWORD8 *pu1_src, +// UWORD8 *pu1_dst, +// WORD32 src_strd, +// WORD32 dst_strd, +// WORD32 ui_neighboravailability) + +//**************Variables Vs Registers***************************************** +// x0 => *pu1_src +// x1 => *pu1_dst +// x2 => src_strd +// x3 => dst_strd +// x4 => ui_neighboravailability + + + .global ih264_intra_pred_luma_4x4_mode_horz_u_av8 + +ih264_intra_pred_luma_4x4_mode_horz_u_av8: + + push_v_regs + stp x19, x20, [sp, #-16]! + mov x10, x0 + ld1 {v0.8b}, [x0] + ldrb w9, [x0], #1 + sxtw x9, w9 + ext v1.8b, v0.8b , v0.8b , #1 + ld1 {v0.b}[7], [x10] + ext v2.8b, v1.8b , v1.8b , #1 + uaddl v20.8h, v0.8b, v1.8b + uaddl v22.8h, v1.8b, v2.8b + add v24.8h, v20.8h , v22.8h + sqrshrun v4.8b, v20.8h, #1 + sqrshrun v5.8b, v24.8h, #2 + mov v6.8b, v4.8b + ext v6.8b, v5.8b , v4.8b , #1 + st1 {v4.b}[2], [x1], #1 + st1 {v6.b}[0], [x1], #1 + trn1 v10.8b, v6.8b, v5.8b + trn2 v5.8b, v6.8b, v5.8b // + mov v6.8b , v10.8b + sub x5, x3, #2 + trn1 v10.8b, v4.8b, v6.8b + trn2 v6.8b, v4.8b, v6.8b // + mov v4.8b , v10.8b + dup v7.8b, w9 + st1 {v6.h}[0], [x1], x5 + st1 {v6.h}[0], [x1], #2 + st1 {v5.h}[3], [x1], x5 + st1 {v5.h}[3], [x1], #2 + st1 {v7.h}[3], [x1], x5 + st1 {v7.s}[0], [x1], x3 + +end_func_horz_u: + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + + diff --git a/common/armv8/ih264_intra_pred_luma_8x8_av8.s b/common/armv8/ih264_intra_pred_luma_8x8_av8.s new file mode 100755 index 0000000..2b972ca --- /dev/null +++ b/common/armv8/ih264_intra_pred_luma_8x8_av8.s @@ -0,0 +1,1084 @@ +//****************************************************************************** +//* +//* Copyright (C) 2015 The Android Open Source Project +//* +//* Licensed under the Apache License, Version 2.0 (the "License"); +//* you may not use this file except in compliance with the License. +//* You may obtain a copy of the License at: +//* +//* http://www.apache.org/licenses/LICENSE-2.0 +//* +//* Unless required by applicable law or agreed to in writing, software +//* distributed under the License is distributed on an "AS IS" BASIS, +//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//* See the License for the specific language governing permissions and +//* limitations under the License. +//* +//***************************************************************************** +//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +//*/ +///** +//****************************************************************************** +//* @file +//* ih264_intra_pred_luma_8x8_av8.s +//* +//* @brief +//* Contains function definitions for intra 8x8 Luma prediction . +//* +//* @author +//* Ittiam +//* +//* @par List of Functions: +//* +//* -ih264_intra_pred_luma_8x8_mode_vert_av8 +//* -ih264_intra_pred_luma_8x8_mode_horz_av8 +//* -ih264_intra_pred_luma_8x8_mode_dc_av8 +//* -ih264_intra_pred_luma_8x8_mode_diag_dl_av8 +//* -ih264_intra_pred_luma_8x8_mode_diag_dr_av8 +//* -ih264_intra_pred_luma_8x8_mode_vert_r_av8 +//* -ih264_intra_pred_luma_8x8_mode_horz_d_av8 +//* -ih264_intra_pred_luma_8x8_mode_vert_l_av8 +//* -ih264_intra_pred_luma_8x8_mode_horz_u_av8 +//* +//* @remarks +//* None +//* +//******************************************************************************* +//*/ + +///* All the functions here are replicated from ih264_intra_pred_filters.c +// + +///** +///** +///** + +.text +.p2align 2 +.include "ih264_neon_macros.s" + +.extern ih264_gai1_intrapred_luma_8x8_horz_u + + + +///** +//******************************************************************************* +//* +//*ih264_intra_pred_luma_8x8_mode_vert +//* +//* @brief +//* Perform Intra prediction for luma_8x8 mode:vertical +//* +//* @par Description: +//* Perform Intra prediction for luma_8x8 mode:vertical ,described in sec 8.3.2.2.2 +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[out] pu1_dst +//* UWORD8 pointer to the destination +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride +//* +//* @param[in] ui_neighboravailability +//* availability of neighbouring pixels(Not used in this function) +//* +//* @returns +//* +//* @remarks +//* None +//* +//******************************************************************************* +//void ih264_intra_pred_luma_8x8_mode_vert(UWORD8 *pu1_src, +// UWORD8 *pu1_dst, +// WORD32 src_strd, +// WORD32 dst_strd, +// WORD32 ui_neighboravailability) + +//**************Variables Vs Registers***************************************** +// x0 => *pu1_src +// x1 => *pu1_dst +// x2 => src_strd +// x3 => dst_strd +// x4 => ui_neighboravailability + + + .global ih264_intra_pred_luma_8x8_mode_vert_av8 + +ih264_intra_pred_luma_8x8_mode_vert_av8: + + // STMFD sp!, {x4-x12, x14} //store register values to stack + push_v_regs + //stp x19, x20,[sp,#-16]! + + add x0, x0, #9 + ld1 {v0.8b}, [x0] + + st1 {v0.8b}, [x1], x3 + st1 {v0.8b}, [x1], x3 + st1 {v0.8b}, [x1], x3 + st1 {v0.8b}, [x1], x3 + st1 {v0.8b}, [x1], x3 + st1 {v0.8b}, [x1], x3 + st1 {v0.8b}, [x1], x3 + st1 {v0.8b}, [x1], x3 + + // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack + //ldp x19, x20,[sp],#16 + pop_v_regs + ret + + + + + +///****************************************************************************** + + +///** +//******************************************************************************* +//* +//*ih264_intra_pred_luma_8x8_mode_horz +//* +//* @brief +//* Perform Intra prediction for luma_8x8 mode:horizontal +//* +//* @par Description: +//* Perform Intra prediction for luma_8x8 mode:horizontal ,described in sec 8.3.2.2.2 +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[out] pu1_dst +//* UWORD8 pointer to the destination +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride +//* +//* @param[in] ui_neighboravailability +//* availability of neighbouring pixels(Not used in this function) +//* +//* @returns +//* +//* @remarks +//* None +//* +//******************************************************************************* +//*/ +//void ih264_intra_pred_luma_8x8_mode_horz(UWORD8 *pu1_src, +// UWORD8 *pu1_dst, +// WORD32 src_strd, +// WORD32 dst_strd, +// WORD32 ui_neighboravailability) +//**************Variables Vs Registers***************************************** +// x0 => *pu1_src +// x1 => *pu1_dst +// x2 => src_strd +// x3 => dst_strd +// x4 => ui_neighboravailability + + + .global ih264_intra_pred_luma_8x8_mode_horz_av8 + +ih264_intra_pred_luma_8x8_mode_horz_av8: + + + + // STMFD sp!, {x4-x12, x14} //store register values to stack + push_v_regs + stp x19, x20, [sp, #-16]! + add x0, x0, #7 + mov x2 , #-1 + + ldrb w5, [x0], #-1 + sxtw x5, w5 + ldrb w6, [x0], #-1 + sxtw x6, w6 + dup v0.8b, w5 + st1 {v0.8b}, [x1], x3 + ldrb w7, [x0], #-1 + sxtw x7, w7 + dup v1.8b, w6 + st1 {v1.8b}, [x1], x3 + dup v2.8b, w7 + ldrb w8, [x0], #-1 + sxtw x8, w8 + dup v3.8b, w8 + st1 {v2.8b}, [x1], x3 + ldrb w5, [x0], #-1 + sxtw x5, w5 + st1 {v3.8b}, [x1], x3 + dup v0.8b, w5 + ldrb w6, [x0], #-1 + sxtw x6, w6 + st1 {v0.8b}, [x1], x3 + ldrb w7, [x0], #-1 + sxtw x7, w7 + dup v1.8b, w6 + dup v2.8b, w7 + st1 {v1.8b}, [x1], x3 + ldrb w8, [x0], #-1 + sxtw x8, w8 + dup v3.8b, w8 + st1 {v2.8b}, [x1], x3 + st1 {v3.8b}, [x1], x3 + + // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + + + + + + +///****************************************************************************** + + +///** +//******************************************************************************* +//* +//*ih264_intra_pred_luma_8x8_mode_dc +//* +//* @brief +//* Perform Intra prediction for luma_8x8 mode:DC +//* +//* @par Description: +//* Perform Intra prediction for luma_8x8 mode:DC ,described in sec 8.3.2.2.3 +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[out] pu1_dst +//* UWORD8 pointer to the destination +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride +//* +//* @param[in] ui_neighboravailability +//* availability of neighbouring pixels +//* +//* @returns +//* +//* @remarks +//* None +//* +//*******************************************************************************/ +//void ih264_intra_pred_luma_8x8_mode_dc(UWORD8 *pu1_src, +// UWORD8 *pu1_dst, +// WORD32 src_strd, +// WORD32 dst_strd, +// WORD32 ui_neighboravailability) + +//**************Variables Vs Registers***************************************** +// x0 => *pu1_src +// x1 => *pu1_dst +// x2 => src_strd +// x3 => dst_strd +// x4 => ui_neighboravailability + + + .global ih264_intra_pred_luma_8x8_mode_dc_av8 + +ih264_intra_pred_luma_8x8_mode_dc_av8: + + + + // STMFD sp!, {x4-x12, x14} //store register values to stack + push_v_regs + stp x19, x20, [sp, #-16]! + + ands x6, x4, #0x01 + beq top_available //LEFT NOT AVAILABLE + + add x10, x0, #7 + mov x2, #-1 + ldrb w5, [x10], -1 + sxtw x5, w5 + ldrb w6, [x10], -1 + sxtw x6, w6 + ldrb w7, [x10], -1 + sxtw x7, w7 + add x5, x5, x6 + ldrb w8, [x10], -1 + sxtw x8, w8 + add x5, x5, x7 + ldrb w6, [x10], -1 + sxtw x6, w6 + add x5, x5, x8 + ldrb w7, [x10], -1 + sxtw x7, w7 + add x5, x5, x6 + ldrb w8, [x10], -1 + sxtw x8, w8 + add x5, x5, x7 + ands x11, x4, #0x04 // CHECKING IF TOP_AVAILABLE ELSE BRANCHING TO ONLY LEFT AVAILABLE + add x5, x5, x8 + ldrb w6, [x10], -1 + sxtw x6, w6 + add x5, x5, x6 + beq left_available + add x10, x0, #9 + // BOTH LEFT AND TOP AVAILABLE + ld1 {v0.8b}, [x10] + uaddlp v1.4h, v0.8b + uaddlp v3.2s, v1.4h + uaddlp v2.1d, v3.2s + dup v10.8h, w5 + dup v8.8h, v2.4h[0] + add v12.8h, v8.8h , v10.8h + sqrshrun v31.8b, v12.8h, #4 + st1 {v31.8b}, [x1], x3 + st1 {v31.8b}, [x1], x3 + st1 {v31.8b}, [x1], x3 + st1 {v31.8b}, [x1], x3 + st1 {v31.8b}, [x1], x3 + st1 {v31.8b}, [x1], x3 + st1 {v31.8b}, [x1], x3 + st1 {v31.8b}, [x1], x3 + b end_func + +top_available: // ONLT TOP AVAILABLE + ands x11, x4, #0x04 // CHECKING TOP AVAILABILTY OR ELSE BRANCH TO NONE AVAILABLE + beq none_available + + add x10, x0, #9 + ld1 {v10.8b}, [x10] + uaddlp v14.4h, v10.8b + uaddlp v13.2s, v14.4h + uaddlp v12.1d, v13.2s + rshrn v4.8b, v12.8h, #3 + dup v31.8b, v4.8b[0] + st1 {v31.8b}, [x1], x3 + st1 {v31.8b}, [x1], x3 + st1 {v31.8b}, [x1], x3 + st1 {v31.8b}, [x1], x3 + st1 {v31.8b}, [x1], x3 + st1 {v31.8b}, [x1], x3 + st1 {v31.8b}, [x1], x3 + st1 {v31.8b}, [x1], x3 + b end_func + + +left_available: //ONLY LEFT AVAILABLE + add x5, x5, #4 + lsr x5, x5, #3 + dup v0.8b, w5 + st1 {v0.8b}, [x1], x3 + st1 {v0.8b}, [x1], x3 + st1 {v0.8b}, [x1], x3 + st1 {v0.8b}, [x1], x3 + st1 {v0.8b}, [x1], x3 + st1 {v0.8b}, [x1], x3 + st1 {v0.8b}, [x1], x3 + st1 {v0.8b}, [x1], x3 + b end_func + +none_available: //NONE AVAILABLE + mov x9, #128 + dup v0.8b, w9 + st1 {v0.8b}, [x1], x3 + st1 {v0.8b}, [x1], x3 + st1 {v0.8b}, [x1], x3 + st1 {v0.8b}, [x1], x3 + st1 {v0.8b}, [x1], x3 + st1 {v0.8b}, [x1], x3 + st1 {v0.8b}, [x1], x3 + st1 {v0.8b}, [x1], x3 + + +end_func: + + // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + + + + + +///** +//******************************************************************************* +//* +//*ih264_intra_pred_luma_8x8_mode_diag_dl +//* +//* @brief +//* Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Left +//* +//* @par Description: +//* Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Left ,described in sec 8.3.2.2.4 +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[out] pu1_dst +//* UWORD8 pointer to the destination +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride +//* +//* @param[in] ui_neighboravailability +//* availability of neighbouring pixels +//* +//* @returns +//* +//* @remarks +//* None +//* +//*******************************************************************************/ +//void ih264_intra_pred_luma_8x8_mode_diag_dl(UWORD8 *pu1_src, +// UWORD8 *pu1_dst, +// WORD32 src_strd, +// WORD32 dst_strd, +// WORD32 ui_neighboravailability) + +//**************Variables Vs Registers***************************************** +// x0 => *pu1_src +// x1 => *pu1_dst +// x2 => src_strd +// x3 => dst_strd +// x4 => ui_neighboravailability + + .global ih264_intra_pred_luma_8x8_mode_diag_dl_av8 + +ih264_intra_pred_luma_8x8_mode_diag_dl_av8: + + // STMFD sp!, {x4-x12, x14} //store register values to stack + push_v_regs + stp x19, x20, [sp, #-16]! + + add x0, x0, #9 + sub x5, x3, #4 + add x6, x0, #15 + ld1 { v0.16b}, [x0] + mov v1.d[0], v0.d[1] + ext v4.16b, v0.16b , v0.16b , #2 + mov v5.d[0], v4.d[1] + ext v2.16b, v0.16b , v0.16b , #1 + mov v3.d[0], v2.d[1] + ld1 {v5.b}[6], [x6] + // q1 = q0 shifted to left once + // q2 = q1 shifted to left once + uaddl v20.8h, v0.8b, v2.8b //Adding for FILT121 + uaddl v22.8h, v1.8b, v3.8b + uaddl v24.8h, v2.8b, v4.8b + uaddl v26.8h, v3.8b, v5.8b + add v24.8h, v20.8h , v24.8h + add v26.8h, v22.8h , v26.8h + + sqrshrun v4.8b, v24.8h, #2 + sqrshrun v5.8b, v26.8h, #2 + mov v4.d[1], v5.d[0] + //Q2 has all FILT121 values + st1 {v4.8b}, [x1], x3 + ext v18.16b, v4.16b , v4.16b , #1 + ext v16.16b, v18.16b , v18.16b , #1 + st1 {v18.8b}, [x1], x3 + ext v14.16b, v16.16b , v16.16b , #1 + st1 {v16.8b}, [x1], x3 + st1 {v14.8b}, [x1], x3 + st1 {v4.s}[1], [x1], #4 + st1 {v5.s}[0], [x1], x5 + st1 {v18.s}[1], [x1], #4 + st1 {v18.s}[2], [x1], x5 + st1 {v16.s}[1], [x1], #4 + st1 {v16.s}[2], [x1], x5 + st1 {v14.s}[1], [x1], #4 + st1 {v14.s}[2], [x1], x5 + + +end_func_diag_dl: + // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + + + +///** +//******************************************************************************* +//* +//*ih264_intra_pred_luma_8x8_mode_diag_dr +//* +//* @brief +//* Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Right +//* +//* @par Description: +//* Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Right ,described in sec 8.3.2.2.5 +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[out] pu1_dst +//* UWORD8 pointer to the destination +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride +//* +//* @param[in] ui_neighboravailability +//* availability of neighbouring pixels +//* +//* @returns +//* +//* @remarks +//* None +//* +//*******************************************************************************/ +//void ih264_intra_pred_luma_8x8_mode_diag_dr(UWORD8 *pu1_src, +// UWORD8 *pu1_dst, +// WORD32 src_strd, +// WORD32 dst_strd, +// WORD32 ui_neighboravailability) + +//**************Variables Vs Registers***************************************** +// x0 => *pu1_src +// x1 => *pu1_dst +// x2 => src_strd +// x3 => dst_strd +// x4 => ui_neighboravailability + + + .global ih264_intra_pred_luma_8x8_mode_diag_dr_av8 + +ih264_intra_pred_luma_8x8_mode_diag_dr_av8: + + // STMFD sp!, {x4-x12, x14} //store register values to stack + push_v_regs + stp x19, x20, [sp, #-16]! + + + ld1 { v0.16b}, [x0] + mov v1.d[0], v0.d[1] + add x0, x0, #1 + ld1 { v2.16b}, [x0] + mov v3.d[0], v2.d[1] + ext v4.16b, v2.16b , v2.16b , #1 + mov v5.d[0], v4.d[1] + // q1 = q0 shifted to left once + // q2 = q1 shifted to left once + uaddl v20.8h, v0.8b, v2.8b //Adding for FILT121 + uaddl v22.8h, v1.8b, v3.8b + uaddl v24.8h, v2.8b, v4.8b + uaddl v26.8h, v3.8b, v5.8b + add v24.8h, v20.8h , v24.8h + add v26.8h, v22.8h , v26.8h + sqrshrun v4.8b, v24.8h, #2 + sqrshrun v5.8b, v26.8h, #2 + mov v4.d[1], v5.d[0] + //Q2 has all FILT121 values + sub x5, x3, #4 + ext v18.16b, v4.16b , v4.16b , #15 + st1 {v18.d}[1], [x1], x3 + ext v16.16b, v18.16b , v18.16b , #15 + st1 {v16.d}[1], [x1], x3 + ext v14.16b, v16.16b , v16.16b , #15 + st1 {v14.d}[1], [x1], x3 + st1 {v4.s}[1], [x1], #4 + st1 {v5.s}[0], [x1], x5 + st1 {v18.s}[1], [x1], #4 + st1 {v18.s}[2], [x1], x5 + st1 {v16.s}[1], [x1], #4 + st1 {v16.s}[2], [x1], x5 + st1 {v14.s}[1], [x1], #4 + st1 {v14.s}[2], [x1], x5 + st1 {v4.8b}, [x1], x3 + +end_func_diag_dr: + // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + + + +///** +//******************************************************************************* +//* +//*ih264_intra_pred_luma_8x8_mode_vert_r +//* +//* @brief +//* Perform Intra prediction for luma_8x8 mode:Vertical_Right +//* +//* @par Description: +//* Perform Intra prediction for luma_8x8 mode:Vertical_Right ,described in sec 8.3.2.2.6 +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[out] pu1_dst +//* UWORD8 pointer to the destination +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride +//* +//* @param[in] ui_neighboravailability +//* availability of neighbouring pixels +//* +//* @returns +//* +//* @remarks +//* None +//* +//*******************************************************************************/ +//void ih264_intra_pred_luma_8x8_mode_vert_r(UWORD8 *pu1_src, +// UWORD8 *pu1_dst, +// WORD32 src_strd, +// WORD32 dst_strd, +// WORD32 ui_neighboravailability) + +//**************Variables Vs Registers***************************************** +// x0 => *pu1_src +// x1 => *pu1_dst +// x2 => src_strd +// x3 => dst_strd +// x4 => ui_neighboravailability + + + .global ih264_intra_pred_luma_8x8_mode_vert_r_av8 + +ih264_intra_pred_luma_8x8_mode_vert_r_av8: + + // STMFD sp!, {x4-x12, x14} //store register values to stack + push_v_regs + stp x19, x20, [sp, #-16]! + + ld1 { v0.16b}, [x0] + mov v1.d[0], v0.d[1] + add x0, x0, #1 + ld1 { v2.16b}, [x0] + mov v3.d[0], v2.d[1] + ext v4.16b, v2.16b , v2.16b , #1 + mov v5.d[0], v4.d[1] + // q1 = q0 shifted to left once + // q2 = q1 shifted to left once + uaddl v20.8h, v0.8b, v2.8b + uaddl v22.8h, v1.8b, v3.8b + uaddl v24.8h, v2.8b, v4.8b + uaddl v26.8h, v3.8b, v5.8b + add v24.8h, v20.8h , v24.8h + add v26.8h, v22.8h , v26.8h + + sqrshrun v4.8b, v20.8h, #1 + sqrshrun v5.8b, v22.8h, #1 + mov v4.d[1], v5.d[0] + sqrshrun v6.8b, v24.8h, #2 + sqrshrun v7.8b, v26.8h, #2 + mov v6.d[1], v7.d[0] + //Q2 has all FILT11 values + //Q3 has all FILT121 values + sub x5, x3, #6 + sub x6, x3, #4 + st1 {v5.8b}, [x1], x3 // row 0 + ext v18.16b, v6.16b , v6.16b , #15 + mov v22.16b , v18.16b + ext v16.16b, v4.16b , v4.16b , #1 + st1 {v18.d}[1], [x1], x3 //row 1 + mov v14.16b , v16.16b + ext v20.16b, v4.16b , v4.16b , #15 + uzp1 v17.16b, v16.16b, v18.16b + uzp2 v18.16b, v16.16b, v18.16b + mov v16.16b , v17.16b + //row 2 + ext v12.16b, v16.16b , v16.16b , #1 + st1 {v20.d}[1], [x1] + st1 {v6.b}[6], [x1], x3 + //row 3 + + st1 {v12.h}[5], [x1], #2 + st1 {v6.s}[2], [x1], #4 + st1 {v6.h}[6], [x1], x5 + //row 4 + st1 {v18.h}[5], [x1], #2 + st1 {v4.s}[2], [x1], #4 + st1 {v4.h}[6], [x1], x5 + //row 5 + ext v26.16b, v18.16b , v18.16b , #1 + st1 {v16.h}[5], [x1], #2 + st1 {v22.s}[2], [x1], #4 + st1 {v22.h}[6], [x1], x5 + //row 6 + st1 {v26.h}[4], [x1], #2 + st1 {v26.b}[10], [x1], #1 + st1 {v4.b}[8], [x1], #1 + st1 {v14.s}[2], [x1], x6 + //row 7 + st1 {v12.s}[2], [x1], #4 + st1 {v6.s}[2], [x1], #4 + +end_func_vert_r: + // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + + + +///** +//******************************************************************************* +//* +//*ih264_intra_pred_luma_8x8_mode_horz_d +//* +//* @brief +//* Perform Intra prediction for luma_8x8 mode:Horizontal_Down +//* +//* @par Description: +//* Perform Intra prediction for luma_8x8 mode:Horizontal_Down ,described in sec 8.3.2.2.7 +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[out] pu1_dst +//* UWORD8 pointer to the destination +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride +//* +//* @param[in] ui_neighboravailability +//* availability of neighbouring pixels +//* +//* @returns +//* +//* @remarks +//* None +//* +//*******************************************************************************/ +//void ih264_intra_pred_luma_8x8_mode_horz_d(UWORD8 *pu1_src, +// UWORD8 *pu1_dst, +// WORD32 src_strd, +// WORD32 dst_strd, +// WORD32 ui_neighboravailability) + +//**************Variables Vs Registers***************************************** +// x0 => *pu1_src +// x1 => *pu1_dst +// x2 => src_strd +// x3 => dst_strd +// x4 => ui_neighboravailability + + .global ih264_intra_pred_luma_8x8_mode_horz_d_av8 + +ih264_intra_pred_luma_8x8_mode_horz_d_av8: + + // STMFD sp!, {x4-x12, x14} //store register values to stack + push_v_regs + stp x19, x20, [sp, #-16]! + + ld1 { v0.16b}, [x0] + mov v1.d[0], v0.d[1] + add x0, x0, #1 + ld1 { v2.16b}, [x0] + mov v3.d[0], v2.d[1] + ext v4.16b, v2.16b , v2.16b , #1 + mov v5.d[0], v4.d[1] + // q1 = q0 shifted to left once + // q2 = q1 shifted to left once + uaddl v20.8h, v0.8b, v2.8b + uaddl v22.8h, v1.8b, v3.8b + uaddl v24.8h, v2.8b, v4.8b + uaddl v26.8h, v3.8b, v5.8b + add v24.8h, v20.8h , v24.8h + add v26.8h, v22.8h , v26.8h + + sqrshrun v4.8b, v20.8h, #1 + sqrshrun v5.8b, v22.8h, #1 + mov v4.d[1], v5.d[0] + sqrshrun v6.8b, v24.8h, #2 + sqrshrun v7.8b, v26.8h, #2 + mov v6.d[1], v7.d[0] + //Q2 has all FILT11 values + //Q3 has all FILT121 values + mov v8.16b, v4.16b + mov v10.16b, v6.16b + sub x6, x3, #6 + trn1 v9.16b, v8.16b, v10.16b + trn2 v10.16b, v8.16b, v10.16b // + mov v8.16b, v9.16b + mov v12.16b, v8.16b + mov v14.16b, v10.16b + sub x5, x3, #4 + trn1 v13.8h, v12.8h, v14.8h + trn2 v14.8h, v12.8h, v14.8h + mov v12.16b, v13.16b + ext v16.16b, v6.16b , v6.16b , #14 + //ROW 0 + st1 {v16.d}[1], [x1] + st1 {v10.h}[3], [x1], x3 + + //ROW 1 + st1 {v14.s}[1], [x1], #4 + st1 {v6.s}[2], [x1], x5 + //ROW 2 + st1 {v10.h}[2], [x1], #2 + st1 {v14.s}[1], [x1], #4 + st1 {v7.h}[0], [x1], x6 + //ROW 3 + st1 {v12.s}[1], [x1], #4 + st1 {v14.s}[1], [x1], x5 + //ROW 4 + st1 {v14.h}[1], [x1], #2 + st1 {v12.s}[1], [x1], #4 + st1 {v14.h}[2], [x1], x6 + //ROW 5 + st1 {v14.s}[0], [x1], #4 + st1 {v12.s}[1], [x1], x5 + //ROW 6 + st1 {v10.h}[0], [x1], #2 + st1 {v8.h}[1], [x1], #2 + st1 {v14.h}[1], [x1], #2 + st1 {v12.h}[2], [x1], x6 + //ROW 7 + st1 {v12.s}[0], [x1], #4 + st1 {v14.s}[0], [x1], x5 + +end_func_horz_d: + // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + + + + +///** +//******************************************************************************* +//* +//*ih264_intra_pred_luma_8x8_mode_vert_l +//* +//* @brief +//* Perform Intra prediction for luma_8x8 mode:Vertical_Left +//* +//* @par Description: +//* Perform Intra prediction for luma_8x8 mode:Vertical_Left ,described in sec 8.3.2.2.8 +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[out] pu1_dst +//* UWORD8 pointer to the destination +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride +//* +//* @param[in] ui_neighboravailability +//* availability of neighbouring pixels +//* +//* @returns +//* +//* @remarks +//* None +//* +//*******************************************************************************/ +//void ih264_intra_pred_luma_8x8_mode_vert_l(UWORD8 *pu1_src, +// UWORD8 *pu1_dst, +// WORD32 src_strd, +// WORD32 dst_strd, +// WORD32 ui_neighboravailability) + +//**************Variables Vs Registers***************************************** +// x0 => *pu1_src +// x1 => *pu1_dst +// x2 => src_strd +// x3 => dst_strd +// x4 => ui_neighboravailability + + + .global ih264_intra_pred_luma_8x8_mode_vert_l_av8 + +ih264_intra_pred_luma_8x8_mode_vert_l_av8: + + // STMFD sp!, {x4-x12, x14} //Restoring registers from stack + push_v_regs + stp x19, x20, [sp, #-16]! + add x0, x0, #9 + ld1 { v0.16b}, [x0] + mov v1.d[0], v0.d[1] + add x0, x0, #1 + ld1 { v2.16b}, [x0] + mov v3.d[0], v2.d[1] + ext v4.16b, v2.16b , v2.16b , #1 + mov v5.d[0], v4.d[1] + uaddl v20.8h, v0.8b, v2.8b + uaddl v22.8h, v1.8b, v3.8b + uaddl v24.8h, v2.8b, v4.8b + uaddl v26.8h, v3.8b, v5.8b + add v24.8h, v20.8h , v24.8h + add v26.8h, v22.8h , v26.8h + + sqrshrun v4.8b, v20.8h, #1 + sqrshrun v5.8b, v22.8h, #1 + mov v4.d[1], v5.d[0] + sqrshrun v6.8b, v24.8h, #2 + ext v8.16b, v4.16b , v4.16b , #1 + sqrshrun v7.8b, v26.8h, #2 + mov v6.d[1], v7.d[0] + //Q2 has all FILT11 values + //Q3 has all FILT121 values + + ext v10.16b, v6.16b , v6.16b , #1 + //ROW 0,1 + st1 {v4.8b}, [x1], x3 + st1 {v6.8b}, [x1], x3 + + ext v12.16b, v8.16b , v8.16b , #1 + ext v14.16b, v10.16b , v10.16b , #1 + //ROW 2,3 + st1 {v8.8b}, [x1], x3 + st1 {v10.8b}, [x1], x3 + + ext v16.16b, v12.16b , v12.16b , #1 + ext v18.16b, v14.16b , v14.16b , #1 + //ROW 4,5 + st1 {v12.8b}, [x1], x3 + st1 {v14.8b}, [x1], x3 + //ROW 6,7 + st1 {v16.8b}, [x1], x3 + st1 {v18.8b}, [x1], x3 + +end_func_vert_l: + // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + + + + +///** +//******************************************************************************* +//* +//*ih264_intra_pred_luma_8x8_mode_horz_u +//* +//* @brief +//* Perform Intra prediction for luma_8x8 mode:Horizontal_Up +//* +//* @par Description: +//* Perform Intra prediction for luma_8x8 mode:Horizontal_Up ,described in sec 8.3.2.2.9 +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[out] pu1_dst +//* UWORD8 pointer to the destination +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride +//* +//* @param[in] ui_neighboravailability +//* availability of neighbouring pixels +//* +//* @returns +//* +//* @remarks +//* None +//* +//*******************************************************************************/ +//void ih264_intra_pred_luma_8x8_mode_horz_u(UWORD8 *pu1_src, +// UWORD8 *pu1_dst, +// WORD32 src_strd, +// WORD32 dst_strd, +// WORD32 ui_neighboravailability) + +//**************Variables Vs Registers***************************************** +// x0 => *pu1_src +// x1 => *pu1_dst +// x2 => src_strd +// x3 => dst_strd +// x4 => ui_neighboravailability + + .global ih264_intra_pred_luma_8x8_mode_horz_u_av8 + +ih264_intra_pred_luma_8x8_mode_horz_u_av8: + + // STMFD sp!, {x4-x12, x14} //store register values to stack + push_v_regs + stp x19, x20, [sp, #-16]! + + ld1 {v0.8b}, [x0] + ld1 {v1.b}[7], [x0] + mov v0.d[1], v1.d[0] + ext v2.16b, v0.16b , v0.16b , #1 + mov v3.d[0], v2.d[1] + ext v4.16b, v2.16b , v2.16b , #1 + mov v5.d[0], v4.d[1] + + adrp x12, :got:ih264_gai1_intrapred_luma_8x8_horz_u + ldr x12, [x12, #:got_lo12:ih264_gai1_intrapred_luma_8x8_horz_u] + uaddl v20.8h, v0.8b, v2.8b + uaddl v22.8h, v1.8b, v3.8b + uaddl v24.8h, v2.8b, v4.8b + uaddl v26.8h, v3.8b, v5.8b + add v24.8h, v20.8h , v24.8h + add v26.8h, v22.8h , v26.8h + ld1 { v10.16b}, [x12] + mov v11.d[0], v10.d[1] + sqrshrun v4.8b, v20.8h, #1 + sqrshrun v5.8b, v22.8h, #1 + mov v4.d[1], v5.d[0] + sqrshrun v6.8b, v24.8h, #2 + sqrshrun v7.8b, v26.8h, #2 + mov v6.d[1], v7.d[0] + //Q2 has all FILT11 values + //Q3 has all FILT121 values + mov v30.16b, v4.16b + mov v31.16b, v6.16b + tbl v12.8b, {v30.16b, v31.16b}, v10.8b + dup v14.16b, v5.8b[7] // + tbl v13.8b, {v30.16b, v31.16b}, v11.8b + mov v12.d[1], v13.d[0] + ext v16.16b, v12.16b , v14.16b , #2 + ext v18.16b, v16.16b , v14.16b , #2 + st1 {v12.8b}, [x1], x3 //0 + ext v20.16b, v18.16b , v14.16b , #2 + st1 {v16.8b}, [x1], x3 //1 + st1 {v18.8b}, [x1], x3 //2 + st1 {v20.8b}, [x1], x3 //3 + st1 {v13.8b}, [x1], x3 //4 + st1 {v16.d}[1], [x1], x3 //5 + st1 {v18.d}[1], [x1], x3 //6 + st1 {v20.d}[1], [x1], x3 //7 + + +end_func_horz_u: + // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + diff --git a/common/armv8/ih264_iquant_itrans_recon_av8.s b/common/armv8/ih264_iquant_itrans_recon_av8.s new file mode 100755 index 0000000..4c83036 --- /dev/null +++ b/common/armv8/ih264_iquant_itrans_recon_av8.s @@ -0,0 +1,778 @@ +//****************************************************************************** +//* +//* Copyright (C) 2015 The Android Open Source Project +//* +//* Licensed under the Apache License, Version 2.0 (the "License"); +//* you may not use this file except in compliance with the License. +//* You may obtain a copy of the License at: +//* +//* http://www.apache.org/licenses/LICENSE-2.0 +//* +//* Unless required by applicable law or agreed to in writing, software +//* distributed under the License is distributed on an "AS IS" BASIS, +//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//* See the License for the specific language governing permissions and +//* limitations under the License. +//* +//***************************************************************************** +//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +//*/ +///** +///******************************************************************************* +// * //file +// * ih264_iquant_itrans_recon_a9.s +// * +// * //brief +// * Contains function definitions for single stage inverse transform +// * +// * //author +// * Parthiban V +// * Mohit +// * Harinarayanaan +// * +// * //par List of Functions: +// * - ih264_iquant_itrans_recon_4x4_av8() +// * - ih264_iquant_itrans_recon_8x8_av8() +// * - ih264_iquant_itrans_recon_chroma_4x4_av8() +// * +// * //remarks +// * None +// * +// ******************************************************************************* + +.text +.p2align 2 +.include "ih264_neon_macros.s" + +///* +// ******************************************************************************* +// * +// * //brief +// * This function performs inverse quant and Inverse transform type Ci4 for 4*4 block +// * +// * //par Description: +// * Performs inverse transform Ci4 and adds the residue to get the +// * reconstructed block +// * +// * //param[in] pi2_src +// * Input 4x4 coefficients +// * +// * //param[in] pu1_pred +// * Prediction 4x4 block +// * +// * //param[out] pu1_out +// * Output 4x4 block +// * +// * //param[in] u4_qp_div_6 +// * QP +// * +// * //param[in] pu2_weigh_mat +// * Pointer to weight matrix +// * +// * //param[in] pred_strd, +// * Prediction stride +// * +// * //param[in] out_strd +// * Output Stride +// * +// *//param[in] pi2_tmp +// * temporary buffer of size 1*16 +// * +// * //param[in] pu2_iscal_mat +// * Pointer to the inverse quantization matrix +// * +// * //returns Void +// * +// * //remarks +// * None +// * +// ******************************************************************************* +// */ +//void ih264_iquant_itrans_recon_4x4(WORD16 *pi2_src, +// UWORD8 *pu1_pred, +// UWORD8 *pu1_out, +// WORD32 pred_strd, +// WORD32 out_strd, +// const UWORD16 *pu2_iscal_mat, +// const UWORD16 *pu2_weigh_mat, +// UWORD32 u4_qp_div_6, +// WORD32 *pi4_tmp, +// WORD32 iq_start_idx +// WORD16 *pi2_dc_ld_addr) +//**************Variables Vs Registers***************************************** +//x0 => *pi2_src +//x1 => *pu1_pred +//x2 => *pu1_out +//x3 => pred_strd +//x4 => out_strd +//x5 => *pu2_iscal_mat +//x6 => *pu2_weigh_mat +//x7 => u4_qp_div_6 +// => pi4_tmp +// => iq_start_idx +// => pi2_dc_ld_addr +//Only one shift is done in horizontal inverse because, +//if u4_qp_div_6 is lesser than 4 then shift value will be neagative and do negative left shift, in this case rnd_factor has value +//if u4_qp_div_6 is greater than 4 then shift value will be positive and do left shift, here rnd_factor is 0 + + .global ih264_iquant_itrans_recon_4x4_av8 +ih264_iquant_itrans_recon_4x4_av8: + + push_v_regs + + dup v30.4s, w7 //Populate the u4_qp_div_6 in Q15 + + ldr w8, [sp, #72] //Loads iq_start_idx + sxtw x8, w8 + + ldr x10, [sp, #80] //Load alternate dc address + + subs x8, x8, #1 // if x8 == 1 => intra case , so result of subtraction is zero and z flag is set + + +//=======================DEQUANT FROM HERE=================================== + + ld4 {v20.4h - v23.4h}, [x5] // load pu2_iscal_mat[i], i =0..15 + ld4 {v26.4h - v29.4h}, [x6] // pu2_weigh_mat[i], i =0..15 + ld4 {v16.4h - v19.4h}, [x0] // pi2_src_tmp[i], i =0..15 + + + mul v20.4h, v20.4h, v26.4h // x[i]=(scale[i] * dequant[i]) where i = 0..3 + mul v21.4h, v21.4h, v27.4h // x[i]=(scale[i] * dequant[i]) where i = 4..7 + mul v22.4h, v22.4h, v28.4h // x[i]=(scale[i] * dequant[i]) where i = 8..11 + mul v23.4h, v23.4h, v29.4h // x[i]=(scale[i] * dequant[i]) where i = 12..14 + + smull v0.4s, v16.4h, v20.4h // q0 = p[i] = (x[i] * trns_coeff[i]) where i = 0..3 + smull v2.4s, v17.4h, v21.4h // q1 = p[i] = (x[i] * trns_coeff[i]) where i = 4..7 + smull v4.4s, v18.4h, v22.4h // q2 = p[i] = (x[i] * trns_coeff[i]) where i = 8..11 + smull v6.4s, v19.4h, v23.4h // q3 = p[i] = (x[i] * trns_coeff[i]) where i = 12..15 + + sshl v0.4s, v0.4s, v30.4s // q0 = q[i] = (p[i] << (qp/6)) where i = 0..3 + sshl v2.4s, v2.4s, v30.4s // q1 = q[i] = (p[i] << (qp/6)) where i = 4..7 + sshl v4.4s, v4.4s, v30.4s // q2 = q[i] = (p[i] << (qp/6)) where i = 8..11 + sshl v6.4s, v6.4s, v30.4s // q3 = q[i] = (p[i] << (qp/6)) where i = 12..15 + + sqrshrn v0.4h, v0.4s, #0x4 // d0 = c[i] = ((q[i] + 32) >> 4) where i = 0..3 + sqrshrn v1.4h, v2.4s, #0x4 // d1 = c[i] = ((q[i] + 32) >> 4) where i = 4..7 + sqrshrn v2.4h, v4.4s, #0x4 // d2 = c[i] = ((q[i] + 32) >> 4) where i = 8..11 + sqrshrn v3.4h, v6.4s, #0x4 // d3 = c[i] = ((q[i] + 32) >> 4) where i = 12..15 + + bne skip_loading_luma_dc_src + ld1 {v0.h}[0], [x10] // loads signed halfword pi2_dc_ld_addr[0], if x8==1 +skip_loading_luma_dc_src: + + //========= PROCESS IDCT FROM HERE ======= + //Steps for Stage 1: + //------------------ + ld1 {v30.s}[0], [x1], x3 // i row load pu1_pred buffer + + sshr v8.4h, v1.4h, #1 // d1>>1 + sshr v9.4h, v3.4h, #1 // d3>>1 + + add v4.4h, v0.4h, v2.4h // x0 = d0 + d2// + sub v5.4h, v0.4h, v2.4h // x1 = d0 - d2// + sub v6.4h, v8.4h, v3.4h // x2 = (d1 >> 1) - d3// + add v7.4h, v1.4h, v9.4h // x3 = d1 + (d3 >> 1)// + + ld1 {v30.s}[1], [x1], x3 // ii row load pu1_pred buffer + + add v10.4h, v4.4h , v7.4h // x0+x3 + add v11.4h, v5.4h , v6.4h // x1+x2 + sub v12.4h, v5.4h , v6.4h // x1-x2 + sub v13.4h, v4.4h , v7.4h + + ld1 {v31.s}[0], [x1], x3 // iii row load pu1_pred buf + + + //Steps for Stage 2: + //transopose + trn1 v4.4h, v10.4h, v11.4h + trn2 v5.4h, v10.4h, v11.4h + trn1 v6.4h, v12.4h, v13.4h + trn2 v7.4h, v12.4h, v13.4h + + trn1 v10.2s, v4.2s, v6.2s // 0 + trn1 v11.2s, v5.2s, v7.2s // 8 + trn2 v12.2s, v4.2s, v6.2s // 4 + trn2 v13.2s, v5.2s, v7.2s + //end transpose + + sshr v18.4h, v11.4h, #1 // q0>>1 + sshr v19.4h, v13.4h, #1 // q1>>1 + + add v14.4h, v10.4h, v12.4h // x0 = q0 + q2// + sub v15.4h, v10.4h, v12.4h // x1 = q0 - q2// + sub v16.4h, v18.4h, v13.4h // x2 = (q1 >> 1) - q3// + add v17.4h, v11.4h, v19.4h // x3 = q1+ (q3 >> 3)// + + + ld1 {v31.s}[1], [x1], x3 // iv row load pu1_pred buffer + + add v20.4h, v14.4h, v17.4h // x0 + x3 + add v21.4h, v15.4h, v16.4h // x1 + x2 + sub v22.4h, v15.4h, v16.4h // x1 - x2 + sub v23.4h, v14.4h, v17.4h // x0 - x3 + + mov v20.d[1], v21.d[0] + mov v22.d[1], v23.d[0] + + srshr v20.8h, v20.8h, #6 + srshr v22.8h, v22.8h, #6 + + uaddw v20.8h, v20.8h , v30.8b + uaddw v22.8h, v22.8h , v31.8b + + sqxtun v0.8b, v20.8h + sqxtun v1.8b, v22.8h + + st1 {v0.s}[0], [x2], x4 //i row store the value + st1 {v0.s}[1], [x2], x4 //ii row store the value + st1 {v1.s}[0], [x2], x4 //iii row store the value + st1 {v1.s}[1], [x2] //iv row store the value + + pop_v_regs + ret + + +///** +// ******************************************************************************* +// * +// * @brief +// * This function performs inverse quant and Inverse transform type Ci4 for 4*4 block +// * +// * @par Description: +// * Performs inverse transform Ci4 and adds the residue to get the +// * reconstructed block +// * +// * @param[in] pi2_src +// * Input 4x4 coefficients +// * +// * @param[in] pu1_pred +// * Prediction 4x4 block +// * +// * @param[out] pu1_out +// * Output 4x4 block +// * +// * @param[in] u4_qp_div_6 +// * QP +// * +// * @param[in] pu2_weigh_mat +// * Pointer to weight matrix +// * +// * @param[in] pred_strd, +// * Prediction stride +// * +// * @param[in] out_strd +// * Output Stride +// * +// *@param[in] pi2_tmp +// * temporary buffer of size 1*16 +// * +// * @param[in] pu2_iscal_mat +// * Pointer to the inverse quantization matrix +// * +// * @returns Void +// * +// * @remarks +// * None +// * +// ******************************************************************************* +// */ +//void ih264_iquant_itrans_recon_chroma_4x4(WORD16 *pi2_src, +// UWORD8 *pu1_pred, +// UWORD8 *pu1_out, +// WORD32 pred_strd, +// WORD32 out_strd, +// const UWORD16 *pu2_iscal_mat, +// const UWORD16 *pu2_weigh_mat, +// UWORD32 u4_qp_div_6, +// WORD32 *pi4_tmp +// WORD16 *pi2_dc_src) +//**************Variables Vs Registers***************************************** +//x0 => *pi2_src +//x1 => *pu1_pred +//x2 => *pu1_out +//x3 => pred_strd +//x4 => out_strd +//x5 => *pu2_iscal_mat +//x6 => *pu2_weigh_mat +//x7 => u4_qp_div_6 +//sp => pi4_tmp +//sp#8 => *pi2_dc_src + + .global ih264_iquant_itrans_recon_chroma_4x4_av8 +ih264_iquant_itrans_recon_chroma_4x4_av8: + +//VLD4.S16 is used because the pointer is incremented by SUB_BLK_WIDTH_4x4 +//If the macro value changes need to change the instruction according to it. +//Only one shift is done in horizontal inverse because, +//if u4_qp_div_6 is lesser than 4 then shift value will be neagative and do negative left shift, in this case rnd_factor has value +//if u4_qp_div_6 is greater than 4 then shift value will be positive and do left shift, here rnd_factor is 0 + +//at the end of the fucntion, we could have moved 64 bits into heigher 64 bits of register and done further processing +//but it seem to give only reduce the number of instruction by 1. [Since a15 we saw add and sub to be very high throughput +//all instructions were taken as equal + + //reduce sp by 64 + push_v_regs + + dup v30.4s, w7 //Populate the u4_qp_div_6 in Q15 + + //was at sp + 8, hence now at sp+64+8 = sp+72 + ldr x10, [sp, #72] //Load alternate dc address + +//=======================DEQUANT FROM HERE=================================== + + ld4 {v20.4h - v23.4h}, [x5] // load pu2_iscal_mat[i], i =0..15 + ld4 {v26.4h - v29.4h}, [x6] // pu2_weigh_mat[i], i =0..15 + ld4 {v16.4h - v19.4h}, [x0] // pi2_src_tmp[i], i =0..15 + + + mul v20.4h, v20.4h, v26.4h // x[i]=(scale[i] * dequant[i]) where i = 0..3 + mul v21.4h, v21.4h, v27.4h // x[i]=(scale[i] * dequant[i]) where i = 4..7 + mul v22.4h, v22.4h, v28.4h // x[i]=(scale[i] * dequant[i]) where i = 8..11 + mul v23.4h, v23.4h, v29.4h // x[i]=(scale[i] * dequant[i]) where i = 12..14 + + smull v0.4s, v16.4h, v20.4h // q0 = p[i] = (x[i] * trns_coeff[i]) where i = 0..3 + smull v2.4s, v17.4h, v21.4h // q1 = p[i] = (x[i] * trns_coeff[i]) where i = 4..7 + smull v4.4s, v18.4h, v22.4h // q2 = p[i] = (x[i] * trns_coeff[i]) where i = 8..11 + smull v6.4s, v19.4h, v23.4h // q3 = p[i] = (x[i] * trns_coeff[i]) where i = 12..15 + + sshl v0.4s, v0.4s, v30.4s // q0 = q[i] = (p[i] << (qp/6)) where i = 0..3 + sshl v2.4s, v2.4s, v30.4s // q1 = q[i] = (p[i] << (qp/6)) where i = 4..7 + sshl v4.4s, v4.4s, v30.4s // q2 = q[i] = (p[i] << (qp/6)) where i = 8..11 + sshl v6.4s, v6.4s, v30.4s // q3 = q[i] = (p[i] << (qp/6)) where i = 12..15 + + sqrshrn v0.4h, v0.4s, #0x4 // d0 = c[i] = ((q[i] + 32) >> 4) where i = 0..3 + sqrshrn v1.4h, v2.4s, #0x4 // d1 = c[i] = ((q[i] + 32) >> 4) where i = 4..7 + sqrshrn v2.4h, v4.4s, #0x4 // d2 = c[i] = ((q[i] + 32) >> 4) where i = 8..11 + sqrshrn v3.4h, v6.4s, #0x4 // d3 = c[i] = ((q[i] + 32) >> 4) where i = 12..15 + + ld1 {v0.h}[0], [x10] // loads signed halfword pi2_dc_src[0] + + //========= PROCESS IDCT FROM HERE ======= + //Steps for Stage 1: + //------------------ + + sshr v8.4h, v1.4h, #1 // d1>>1 + sshr v9.4h, v3.4h, #1 // d3>>1 + + add v4.4h, v0.4h, v2.4h // x0 = d0 + d2// + sub v5.4h, v0.4h, v2.4h // x1 = d0 - d2// + sub v6.4h, v8.4h, v3.4h // x2 = (d1 >> 1) - d3// + add v7.4h, v1.4h, v9.4h // x3 = d1 + (d3 >> 1)// + + + add v10.4h, v4.4h , v7.4h // x0+x3 + add v11.4h, v5.4h , v6.4h // x1+x2 + sub v12.4h, v5.4h , v6.4h // x1-x2 + sub v13.4h, v4.4h , v7.4h + + ld1 {v26.8b}, [x1], x3 // i row load pu1_pred buffer + ld1 {v27.8b}, [x1], x3 // ii row load pu1_pred buffer + ld1 {v28.8b}, [x1], x3 // iii row load pu1_pred buf + ld1 {v29.8b}, [x1], x3 // iv row load pu1_pred buffer + + //Steps for Stage 2: + //transopose + trn1 v4.4h, v10.4h, v11.4h + trn2 v5.4h, v10.4h, v11.4h + trn1 v6.4h, v12.4h, v13.4h + trn2 v7.4h, v12.4h, v13.4h + + trn1 v10.2s, v4.2s, v6.2s // 0 + trn1 v11.2s, v5.2s, v7.2s // 8 + trn2 v12.2s, v4.2s, v6.2s // 4 + trn2 v13.2s, v5.2s, v7.2s + //end transpose + + sshr v18.4h, v11.4h, #1 // q0>>1 + sshr v19.4h, v13.4h, #1 // q1>>1 + + add v14.4h, v10.4h, v12.4h // x0 = q0 + q2// + sub v15.4h, v10.4h, v12.4h // x1 = q0 - q2// + sub v16.4h, v18.4h, v13.4h // x2 = (q1 >> 1) - q3// + add v17.4h, v11.4h, v19.4h // x3 = q1+ (q3 >> 3)// + + //Backup the output addr + mov x0, x2 + + //load outpt buufer for interleaving + ld1 {v10.8b}, [x2], x4 + ld1 {v11.8b}, [x2], x4 + ld1 {v12.8b}, [x2], x4 + ld1 {v13.8b}, [x2] + + add v20.4h, v14.4h, v17.4h // x0 + x3 + add v21.4h, v15.4h, v16.4h // x1 + x2 + sub v22.4h, v15.4h, v16.4h // x1 - x2 + sub v23.4h, v14.4h, v17.4h // x0 - x3 + + srshr v20.4h, v20.4h, #6 + srshr v21.4h, v21.4h, #6 + srshr v22.4h, v22.4h, #6 + srshr v23.4h, v23.4h, #6 + + //nop v30.8b //dummy for deinterleaving + movi v31.4h, #0x00ff //mask for interleaving [copy lower 8 bits] + + //Extract u/v plane from interleaved data + uzp1 v26.8b, v26.8b, v30.8b + uzp1 v27.8b, v27.8b, v30.8b + uzp1 v28.8b, v28.8b, v30.8b + uzp1 v29.8b, v29.8b, v30.8b + + uaddw v20.8h, v20.8h, v26.8b + uaddw v21.8h, v21.8h, v27.8b + uaddw v22.8h, v22.8h, v28.8b + uaddw v23.8h, v23.8h, v29.8b + + sqxtun v0.8b, v20.8h + sqxtun v1.8b, v21.8h + sqxtun v2.8b, v22.8h + sqxtun v3.8b, v23.8h + + //long the output so that we have 0 at msb and value at lsb + uxtl v6.8h, v0.8b + uxtl v7.8h, v1.8b + uxtl v8.8h, v2.8b + uxtl v9.8h, v3.8b + + //select lsbs from proceesd data and msbs from pu1_out loaded data + bit v10.8b, v6.8b, v31.8b + bit v11.8b, v7.8b, v31.8b + bit v12.8b, v8.8b, v31.8b + bit v13.8b, v9.8b, v31.8b + + //store the interleaved result + st1 {v10.8b}, [x0], x4 + st1 {v11.8b}, [x0], x4 + st1 {v12.8b}, [x0], x4 + st1 {v13.8b}, [x0] + + pop_v_regs + ret + +///* +// ******************************************************************************* +// * +// * //brief +// * This function performs inverse quant and Inverse transform type Ci4 for 8*8 block +// * +// * //par Description: +// * Performs inverse transform Ci8 and adds the residue to get the +// * reconstructed block +// * +// * //param[in] pi2_src +// * Input 4x4 coefficients +// * +// * //param[in] pu1_pred +// * Prediction 4x4 block +// * +// * //param[out] pu1_out +// * Output 4x4 block +// * +// * //param[in] u4_qp_div_6 +// * QP +// * +// * //param[in] pu2_weigh_mat +// * Pointer to weight matrix +// * +// * //param[in] pred_strd, +// * Prediction stride +// * +// * //param[in] out_strd +// * Output Stride +// * +// *//param[in] pi2_tmp +// * temporary buffer of size 1*64 +// * +// * //param[in] pu2_iscal_mat +// * Pointer to the inverse quantization matrix +// * +// * //returns Void +// * +// * //remarks +// * None +// * +// ******************************************************************************* +// */ +//void ih264_iquant_itrans_recon_8x8(WORD16 *pi2_src, +// UWORD8 *pu1_pred, +// UWORD8 *pu1_out, +// WORD32 pred_strd, +// WORD32 out_strd, +// const UWORD16 *pu2_iscal_mat, +// const UWORD16 *pu2_weigh_mat, +// UWORD32 u4_qp_div_6, +// WORD32 *pi4_tmp, +// WORD32 iq_start_idx +// WORD16 *pi2_dc_ld_addr) +//**************Variables Vs Registers***************************************** +//x0 => *pi2_src +//x1 => *pu1_pred +//x2 => *pu1_out +//x3 => pred_strd +//x4 => out_strd +//x5 => *pu2_iscal_mat +//x6 => *pu2_weigh_mat +//x7 => u4_qp_div_6 +//NOT USED => pi4_tmp +//NOT USED => iq_start_idx +//NOT USED => pi2_dc_ld_addr + + .global ih264_iquant_itrans_recon_8x8_av8 +ih264_iquant_itrans_recon_8x8_av8: + + push_v_regs + + ld1 {v8.8h -v11.8h}, [x5], #64 + ld1 {v12.8h-v15.8h}, [x5] + + ld1 {v16.8h -v19.8h}, [x6], #64 + ld1 {v20.8h -v23.8h}, [x6] + + mov x8, #16 + ld1 {v0.8h}, [x0], x8 + ld1 {v1.8h}, [x0], x8 + ld1 {v2.8h}, [x0], x8 + ld1 {v3.8h}, [x0], x8 + ld1 {v4.8h}, [x0], x8 + ld1 {v5.8h}, [x0], x8 + ld1 {v6.8h}, [x0], x8 + ld1 {v7.8h}, [x0] + + mul v8.8h, v8.8h, v16.8h + mul v9.8h, v9.8h, v17.8h + mul v10.8h, v10.8h, v18.8h + mul v11.8h, v11.8h, v19.8h + mul v12.8h, v12.8h, v20.8h + mul v13.8h, v13.8h, v21.8h + mul v14.8h, v14.8h, v22.8h + mul v15.8h, v15.8h, v23.8h + + smull v16.4s, v0.4h, v8.4h + smull2 v17.4s, v0.8h, v8.8h + smull v18.4s, v1.4h, v9.4h + smull2 v19.4s, v1.8h, v9.8h + smull v20.4s, v2.4h, v10.4h + smull2 v21.4s, v2.8h, v10.8h + smull v22.4s, v3.4h, v11.4h + smull2 v23.4s, v3.8h, v11.8h + smull v24.4s, v4.4h, v12.4h + smull2 v25.4s, v4.8h, v12.8h + smull v26.4s, v5.4h, v13.4h + smull2 v27.4s, v5.8h, v13.8h + smull v28.4s, v6.4h, v14.4h + smull2 v29.4s, v6.8h, v14.8h + smull v30.4s, v7.4h, v15.4h + smull2 v31.4s, v7.8h, v15.8h + + dup v0.4s, w7 + + sshl v16.4s, v16.4s, v0.4s + sshl v17.4s, v17.4s, v0.4s + sshl v18.4s, v18.4s, v0.4s + sshl v19.4s, v19.4s, v0.4s + sshl v20.4s, v20.4s, v0.4s + sshl v21.4s, v21.4s, v0.4s + sshl v22.4s, v22.4s, v0.4s + sshl v23.4s, v23.4s, v0.4s + sshl v24.4s, v24.4s, v0.4s + sshl v25.4s, v25.4s, v0.4s + sshl v26.4s, v26.4s, v0.4s + sshl v27.4s, v27.4s, v0.4s + sshl v28.4s, v28.4s, v0.4s + sshl v29.4s, v29.4s, v0.4s + sshl v30.4s, v30.4s, v0.4s + sshl v31.4s, v31.4s, v0.4s + + sqrshrn v0.4h, v16.4s, #6 + sqrshrn2 v0.8h, v17.4s, #6 + sqrshrn v1.4h, v18.4s, #6 + sqrshrn2 v1.8h, v19.4s, #6 + sqrshrn v2.4h, v20.4s, #6 + sqrshrn2 v2.8h, v21.4s, #6 + sqrshrn v3.4h, v22.4s, #6 + sqrshrn2 v3.8h, v23.4s, #6 + sqrshrn v4.4h, v24.4s, #6 + sqrshrn2 v4.8h, v25.4s, #6 + sqrshrn v5.4h, v26.4s, #6 + sqrshrn2 v5.8h, v27.4s, #6 + sqrshrn v6.4h, v28.4s, #6 + sqrshrn2 v6.8h, v29.4s, #6 + sqrshrn v7.4h, v30.4s, #6 + sqrshrn2 v7.8h, v31.4s, #6 + + //loop counter + mov x8, #2 +//1x8 transofORM +trans_1x8_1d: + + //transpose 8x8 + trn1 v8.8h, v0.8h, v1.8h + trn2 v9.8h, v0.8h, v1.8h + trn1 v10.8h, v2.8h, v3.8h + trn2 v11.8h, v2.8h, v3.8h + trn1 v12.8h, v4.8h, v5.8h + trn2 v13.8h, v4.8h, v5.8h + trn1 v14.8h, v6.8h, v7.8h + trn2 v15.8h, v6.8h, v7.8h + + trn1 v0.4s, v8.4s, v10.4s + trn2 v2.4s, v8.4s, v10.4s + trn1 v1.4s, v9.4s, v11.4s + trn2 v3.4s, v9.4s, v11.4s + trn1 v4.4s, v12.4s, v14.4s + trn2 v6.4s, v12.4s, v14.4s + trn1 v5.4s, v13.4s, v15.4s + trn2 v7.4s, v13.4s, v15.4s + + trn1 v8.2d, v0.2d, v4.2d //0 + trn2 v12.2d, v0.2d, v4.2d //1 + trn1 v9.2d, v1.2d, v5.2d //2 + trn2 v13.2d, v1.2d, v5.2d //3 + trn1 v10.2d, v2.2d, v6.2d //4 + trn2 v14.2d, v2.2d, v6.2d //5 + trn1 v11.2d, v3.2d, v7.2d //6 + trn2 v15.2d, v3.2d, v7.2d //7 + + // 1 3 5 6 7 + sshr v16.8h, v9.8h, #1 //(pi2_tmp_ptr[1] >> 1) + sshr v17.8h, v10.8h, #1 //(pi2_tmp_ptr[2] >> 1) + sshr v18.8h, v11.8h, #1 //(pi2_tmp_ptr[3] >> 1) + sshr v19.8h, v13.8h, #1 //(pi2_tmp_ptr[5] >> 1) + sshr v20.8h, v14.8h, #1 //(pi2_tmp_ptr[6] >> 1) + sshr v21.8h, v15.8h, #1 //(pi2_tmp_ptr[7] >> 1) + + add v0.8h, v8.8h, v12.8h // i_y0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[4] ); + sub v2.8h, v8.8h, v12.8h // i_y2 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[4] ); + + sub v4.8h, v17.8h, v14.8h //i_y4 = ((pi2_tmp_ptr[2] >> 1) - pi2_tmp_ptr[6] ); + add v6.8h, v10.8h, v20.8h //i_y6 = (pi2_tmp_ptr[2] + (pi2_tmp_ptr[6] >> 1)); + + //-w3 + w5 + ssubl v22.4s, v13.4h, v11.4h + ssubl2 v23.4s, v13.8h, v11.8h + //w3 + w5 + saddl v24.4s, v13.4h, v11.4h + saddl2 v25.4s, v13.8h, v11.8h + //-w1 + w7 + ssubl v26.4s, v15.4h, v9.4h + ssubl2 v27.4s, v15.8h, v9.8h + //w1 + w7 + saddl v28.4s, v15.4h, v9.4h + saddl2 v29.4s, v15.8h, v9.8h + + //-w3 + w5 - w7 + ssubw v22.4s, v22.4s, v15.4h + ssubw2 v23.4s, v23.4s, v15.8h + //w3 + w5 + w1 + saddw v24.4s, v24.4s, v9.4h + saddw2 v25.4s, v25.4s, v9.8h + //-w1 + w7 + w5 + saddw v26.4s, v26.4s, v13.4h + saddw2 v27.4s, v27.4s, v13.8h + //w1 + w7 - w3 + ssubw v28.4s, v28.4s, v11.4h + ssubw2 v29.4s, v29.4s, v11.8h + + //-w3 + w5 - w7 - (w7 >> 1) + ssubw v22.4s, v22.4s, v21.4h + ssubw2 v23.4s, v23.4s, v21.8h + //w3 + w5 + w1 + (w1 >> 1) + saddw v24.4s, v24.4s, v16.4h + saddw2 v25.4s, v25.4s, v16.8h + //-w1 + w7 + w5 + (w5 >> 1) + saddw v26.4s, v26.4s, v19.4h + saddw2 v27.4s, v27.4s, v19.8h + //w1 + w7 - w3 - (w3 >> 1) + ssubw v28.4s, v28.4s, v18.4h + ssubw2 v29.4s, v29.4s, v18.8h + + xtn v1.4h, v22.4s + xtn2 v1.8h, v23.4s + xtn v3.4h, v28.4s + xtn2 v3.8h, v29.4s + xtn v5.4h, v26.4s + xtn2 v5.8h, v27.4s + xtn v7.4h, v24.4s + xtn2 v7.8h, v25.4s + + sshr v16.8h, v1.8h, #2 //(y1 >> 2) + sshr v17.8h, v3.8h, #2 //(y3 >> 2) + sshr v18.8h, v5.8h, #2 //(y5 >> 2) + sshr v19.8h, v7.8h, #2 //(y7 >> 2) + + add v8.8h, v0.8h, v6.8h + add v9.8h, v1.8h, v19.8h + add v10.8h, v2.8h, v4.8h + add v11.8h, v3.8h, v18.8h + sub v12.8h, v2.8h, v4.8h + sub v13.8h, v17.8h, v5.8h + sub v14.8h, v0.8h, v6.8h + sub v15.8h, v7.8h, v16.8h + + add v0.8h, v8.8h, v15.8h + add v1.8h, v10.8h, v13.8h + add v2.8h, v12.8h, v11.8h + add v3.8h, v14.8h, v9.8h + sub v4.8h, v14.8h, v9.8h + sub v5.8h, v12.8h, v11.8h + sub v6.8h, v10.8h, v13.8h + sub v7.8h, v8.8h, v15.8h + + subs x8, x8, #1 + bne trans_1x8_1d + + ld1 {v22.8b}, [x1], x3 + ld1 {v23.8b}, [x1], x3 + ld1 {v24.8b}, [x1], x3 + ld1 {v25.8b}, [x1], x3 + ld1 {v26.8b}, [x1], x3 + ld1 {v27.8b}, [x1], x3 + ld1 {v28.8b}, [x1], x3 + ld1 {v29.8b}, [x1] + + srshr v0.8h, v0.8h, #6 + srshr v1.8h, v1.8h, #6 + srshr v2.8h, v2.8h, #6 + srshr v3.8h, v3.8h, #6 + srshr v4.8h, v4.8h, #6 + srshr v5.8h, v5.8h, #6 + srshr v6.8h, v6.8h, #6 + srshr v7.8h, v7.8h, #6 + + uaddw v0.8h, v0.8h, v22.8b + uaddw v1.8h, v1.8h, v23.8b + uaddw v2.8h, v2.8h, v24.8b + uaddw v3.8h, v3.8h, v25.8b + uaddw v4.8h, v4.8h, v26.8b + uaddw v5.8h, v5.8h, v27.8b + uaddw v6.8h, v6.8h, v28.8b + uaddw v7.8h, v7.8h, v29.8b + + sqxtun v0.8b, v0.8h + sqxtun v1.8b, v1.8h + sqxtun v2.8b, v2.8h + sqxtun v3.8b, v3.8h + sqxtun v4.8b, v4.8h + sqxtun v5.8b, v5.8h + sqxtun v6.8b, v6.8h + sqxtun v7.8b, v7.8h + + st1 {v0.8b}, [x2], x4 + st1 {v1.8b}, [x2], x4 + st1 {v2.8b}, [x2], x4 + st1 {v3.8b}, [x2], x4 + st1 {v4.8b}, [x2], x4 + st1 {v5.8b}, [x2], x4 + st1 {v6.8b}, [x2], x4 + st1 {v7.8b}, [x2] + + pop_v_regs + ret + + + + diff --git a/common/armv8/ih264_iquant_itrans_recon_dc_av8.s b/common/armv8/ih264_iquant_itrans_recon_dc_av8.s new file mode 100755 index 0000000..8bb9c32 --- /dev/null +++ b/common/armv8/ih264_iquant_itrans_recon_dc_av8.s @@ -0,0 +1,397 @@ +//****************************************************************************** +//* +//* Copyright (C) 2015 The Android Open Source Project +//* +//* Licensed under the Apache License, Version 2.0 (the "License"); +//* you may not use this file except in compliance with the License. +//* You may obtain a copy of the License at: +//* +//* http://www.apache.org/licenses/LICENSE-2.0 +//* +//* Unless required by applicable law or agreed to in writing, software +//* distributed under the License is distributed on an "AS IS" BASIS, +//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//* See the License for the specific language governing permissions and +//* limitations under the License. +//* +//***************************************************************************** +//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +//*/ +///** +// ******************************************************************************* +// * @file +// * ih264_iquant_itrans_recon_dc_av8.s +// * +// * @brief +// * Contains function definitions for single stage inverse transform +// * +// * @author +// * Mohit +// * +// * @par List of Functions: +// * - ih264_iquant_itrans_recon_4x4_dc_av8() +// * - ih264_iquant_itrans_recon_8x8_dc_av8() +// * - ih264_iquant_itrans_recon_chroma_4x4_dc_av8() +// * +// * @remarks +// * None +// * +// ******************************************************************************* +//*/ + + +.include "ih264_neon_macros.s" + + +///** +// ******************************************************************************* +// * +// * @brief +// * This function performs inverse quant and Inverse transform type Ci4 for 4*4 block +// * for dc input pattern only, i.e. only the (0,0) element of the input 4x4 block is +// * non-zero. For complete function, refer ih264_iquant_itrans_recon_a9.s +// * +// * @par Description: +// * Performs inverse transform Ci4 and adds the residue to get the +// * reconstructed block +// * +// * @param[in] pi2_src +// * Input 4x4 coefficients +// * +// * @param[in] pu1_pred +// * Prediction 4x4 block +// * +// * @param[out] pu1_out +// * Output 4x4 block +// * +// * @param[in] u4_qp_div_6 +// * QP +// * +// * @param[in] pu2_weigh_mat +// * Pointer to weight matrix +// * +// * @param[in] pred_strd, +// * Prediction stride +// * +// * @param[in] out_strd +// * Output Stride +// * +// *@param[in] pi2_tmp +// * temporary buffer of size 1*16 +// * +// * @param[in] pu2_iscal_mat +// * Pointer to the inverse quantization matrix +// * +// * @returns Void +// * +// * @remarks +// * None +// * +// ******************************************************************************* +// */ +//void ih264_iquant_itrans_recon_4x4_dc(WORD16 *pi2_src, +// UWORD8 *pu1_pred, +// UWORD8 *pu1_out, +// WORD32 pred_strd, +// WORD32 out_strd, +// const UWORD16 *pu2_iscal_mat, +// const UWORD16 *pu2_weigh_mat, +// UWORD32 u4_qp_div_6, +// WORD32 *pi4_tmp, +// WORD32 iq_start_idx +// WORD16 *pi2_dc_ld_addr) +//**************Variables Vs Registers***************************************** +//x0 => *pi2_src +//x1 => *pu1_pred +//x2 => *pu1_out +//x3 => pred_strd +//x4 => out_strd +//x5 => *pu2_iscal_mat +//x6 => *pu2_weigh_mat +//x7 => u4_qp_div_6 +// => pi4_tmp +// => iq_start_idx +// => pi2_dc_ld_addr + +.text +.p2align 2 + + .global ih264_iquant_itrans_recon_4x4_dc_av8 +ih264_iquant_itrans_recon_4x4_dc_av8: + + ldr w8, [sp, #8] //Loads iq_start_idx + subs w8, w8, #1 // if x8 == 1 => intra case , so result of subtraction is zero and z flag is set + + ldr x10, [sp, #16] //Load alternate dc address + push_v_regs + dup v30.4s, w7 //Populate the u4_qp_div_6 in Q15 + + + bne donot_use_pi2_dc_ld_addr_luma_dc + ld1 {v0.h}[0], [x10] +donot_use_pi2_dc_ld_addr_luma_dc: + + beq donot_use_pi2_src_luma_dc + ld1 {v0.h}[0], [x5] + ld1 {v1.h}[0], [x6] + ld1 {v2.h}[0], [x0] + mul v0.4h, v1.4h, v0.4h + smull v0.4s, v0.4h, v2.4h + sshl v0.4s, v0.4s, v30.4s + sqrshrn v0.4h, v0.4s, #4 +donot_use_pi2_src_luma_dc: + + + dup v0.8h, v0.h[0] + srshr v0.8h, v0.8h, #6 + + ld1 {v1.s}[0], [x1], x3 + ld1 {v1.s}[1], [x1], x3 + ld1 {v2.s}[0], [x1], x3 + ld1 {v2.s}[1], [x1] + + uxtl v1.8h, v1.8b + uxtl v2.8h, v2.8b + + add v1.8h, v0.8h, v1.8h + add v2.8h, v0.8h, v2.8h + + sqxtun v1.8b, v1.8h + sqxtun v2.8b, v2.8h + + st1 {v1.s}[0], [x2], x4 + st1 {v1.s}[1], [x2], x4 + st1 {v2.s}[0], [x2], x4 + st1 {v2.s}[1], [x2] + pop_v_regs + ret + +// /* +// ******************************************************************************** +// * +// * @brief This function reconstructs a 4x4 sub block from quantized resiude and +// * prediction buffer if only dc value is present for residue +// * +// * @par Description: +// * The quantized residue is first inverse quantized, +// * This inverse quantized content is added to the prediction buffer to recon- +// * struct the end output +// * +// * @param[in] pi2_src +// * quantized dc coeffiient +// * +// * @param[in] pu1_pred +// * prediction 4x4 block in interleaved format +// * +// * @param[in] pred_strd, +// * Prediction buffer stride in interleaved format +// * +// * @param[in] out_strd +// * recon buffer Stride +// * +// * @returns none +// * +// * @remarks none +// * +// ******************************************************************************* +// */ +// void ih264_iquant_itrans_recon_chroma_4x4_dc(WORD16 *pi2_src, +// UWORD8 *pu1_pred, +// UWORD8 *pu1_out, +// WORD32 pred_strd, +// WORD32 out_strd, +// const UWORD16 *pu2_iscal_mat, +// const UWORD16 *pu2_weigh_mat, +// UWORD32 u4_qp_div_6, +// WORD16 *pi2_tmp, +// WORD16 *pi2_dc_src) +// Register Usage +// x0 : pi2_src +// x1 : pu1_pred +// x2 : pu1_out +// x3 : pred_strd +// x4 : out_strd +// x5 : pu2_iscal_mat +// x6 : pu2_weigh_mat +// x7 : u4_qp_div_6 +// : pi2_tmp +// : pi2_dc_src +// Neon registers d0-d7, d16-d30 are used +// No need for pushing arm and neon registers + + + .global ih264_iquant_itrans_recon_chroma_4x4_dc_av8 +ih264_iquant_itrans_recon_chroma_4x4_dc_av8: + + ldr x0, [sp, #8] + push_v_regs + ld1 {v0.h}[0], [x0] + dup v0.8h, v0.h[0] + srshr v0.8h, v0.8h, #6 + + + //backup pu1_out + mov x0, x2 + + //nop v3.16b //dummy for deinterleaving + movi v31.8h, #0x00ff //mask for interleaving [copy lower 8 bits] + + ld1 {v1.d}[0], [x1], x3 + ld1 {v1.d}[1], [x1], x3 + ld1 {v2.d}[0], [x1], x3 + ld1 {v2.d}[1], [x1], x3 + + ld1 {v11.d}[0], [x2], x4 //load pu1_out for interleaving + ld1 {v11.d}[1], [x2], x4 + ld1 {v12.d}[0], [x2], x4 + ld1 {v12.d}[1], [x2] + + uzp1 v1.16b, v1.16b, v3.16b + uzp1 v2.16b, v2.16b, v3.16b + + uaddw v1.8h, v0.8h, v1.8b + uaddw v2.8h, v0.8h, v2.8b + + sqxtun v1.8b, v1.8h + sqxtun v2.8b, v2.8h + + uxtl v1.8h, v1.8b + uxtl v2.8h, v2.8b + + bit v11.16b, v1.16b, v31.16b + bit v12.16b, v2.16b, v31.16b + + st1 {v11.d}[0], [x0], x4 + st1 {v11.d}[1], [x0], x4 + st1 {v12.d}[0], [x0], x4 + st1 {v12.d}[1], [x0] + pop_v_regs + ret + +///* +// ******************************************************************************* +// * +// * //brief +// * This function performs inverse quant and Inverse transform type Ci4 for 8*8 block +// * [Only for Dc coeff] +// * //par Description: +// * Performs inverse transform Ci8 and adds the residue to get the +// * reconstructed block +// * +// * //param[in] pi2_src +// * Input 4x4 coefficients +// * +// * //param[in] pu1_pred +// * Prediction 4x4 block +// * +// * //param[out] pu1_out +// * Output 4x4 block +// * +// * //param[in] u4_qp_div_6 +// * QP +// * +// * //param[in] pu2_weigh_mat +// * Pointer to weight matrix +// * +// * //param[in] pred_strd, +// * Prediction stride +// * +// * //param[in] out_strd +// * Output Stride +// * +// *//param[in] pi2_tmp +// * temporary buffer of size 1*64 +// * +// * //param[in] pu2_iscal_mat +// * Pointer to the inverse quantization matrix +// * +// * //returns Void +// * +// * //remarks +// * None +// * +// ******************************************************************************* +// */ +//void ih264_iquant_itrans_recon_dc_8x8(WORD16 *pi2_src, +// UWORD8 *pu1_pred, +// UWORD8 *pu1_out, +// WORD32 pred_strd, +// WORD32 out_strd, +// const UWORD16 *pu2_iscal_mat, +// const UWORD16 *pu2_weigh_mat, +// UWORD32 u4_qp_div_6, +// WORD32 *pi4_tmp, +// WORD32 iq_start_idx +// WORD16 *pi2_dc_ld_addr) +//**************Variables Vs Registers***************************************** +//x0 => *pi2_src +//x1 => *pu1_pred +//x2 => *pu1_out +//x3 => pred_strd +//x4 => out_strd +//x5 => *pu2_iscal_mat +//x6 => *pu2_weigh_mat +//x7 => u4_qp_div_6 +//NOT USED => pi4_tmp +//NOT USED => iq_start_idx +//NOT USED => pi2_dc_ld_addr + + .global ih264_iquant_itrans_recon_8x8_dc_av8 +ih264_iquant_itrans_recon_8x8_dc_av8: + + push_v_regs + + ld1 {v1.h}[0], [x5] + ld1 {v2.h}[0], [x6] + ld1 {v0.h}[0], [x0] + dup v3.4s, w7 + + + mul v1.8h, v1.8h, v2.8h + smull v0.4s, v0.4h, v1.4h + sshl v0.4s, v0.4s, v3.4s + + sqrshrn v0.4h, v0.4s, #6 + srshr v0.8h, v0.8h, #6 + dup v0.8h, v0.h[0] + + ld1 {v22.8b}, [x1], x3 + ld1 {v23.8b}, [x1], x3 + ld1 {v24.8b}, [x1], x3 + ld1 {v25.8b}, [x1], x3 + ld1 {v26.8b}, [x1], x3 + ld1 {v27.8b}, [x1], x3 + ld1 {v28.8b}, [x1], x3 + ld1 {v29.8b}, [x1] + + uaddw v1.8h, v0.8h, v22.8b + uaddw v2.8h, v0.8h, v23.8b + uaddw v3.8h, v0.8h, v24.8b + uaddw v8.8h, v0.8h, v25.8b + uaddw v9.8h, v0.8h, v26.8b + uaddw v10.8h, v0.8h, v27.8b + uaddw v11.8h, v0.8h, v28.8b + uaddw v12.8h, v0.8h, v29.8b + + sqxtun v1.8b, v1.8h + sqxtun v2.8b, v2.8h + sqxtun v3.8b, v3.8h + sqxtun v8.8b, v8.8h + sqxtun v9.8b, v9.8h + sqxtun v10.8b, v10.8h + sqxtun v11.8b, v11.8h + sqxtun v12.8b, v12.8h + + st1 {v1.8b}, [x2], x4 + st1 {v2.8b}, [x2], x4 + st1 {v3.8b}, [x2], x4 + st1 {v8.8b}, [x2], x4 + st1 {v9.8b}, [x2], x4 + st1 {v10.8b}, [x2], x4 + st1 {v11.8b}, [x2], x4 + st1 {v12.8b}, [x2] + + pop_v_regs + ret + + diff --git a/common/armv8/ih264_mem_fns_neon_av8.s b/common/armv8/ih264_mem_fns_neon_av8.s new file mode 100755 index 0000000..f5c2e29 --- /dev/null +++ b/common/armv8/ih264_mem_fns_neon_av8.s @@ -0,0 +1,274 @@ +//****************************************************************************** +//* +//* Copyright (C) 2015 The Android Open Source Project +//* +//* Licensed under the Apache License, Version 2.0 (the "License"); +//* you may not use this file except in compliance with the License. +//* You may obtain a copy of the License at: +//* +//* http://www.apache.org/licenses/LICENSE-2.0 +//* +//* Unless required by applicable law or agreed to in writing, software +//* distributed under the License is distributed on an "AS IS" BASIS, +//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//* See the License for the specific language governing permissions and +//* limitations under the License. +//* +//***************************************************************************** +//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +//*/ +///** +// ******************************************************************************* +// * @file +// * ih264_mem_fns_neon.s +// * +// * @brief +// * Contains function definitions for memory manipulation +// * +// * @author +// * Naveen SR +// * +// * @par List of Functions: +// * - ih264_memcpy_av8() +// * - ih264_memcpy_mul_8_av8() +// * - ih264_memset_mul_8_av8() +// * - ih264_memset_16bit_mul_8_av8() +// * - ih264_memset_16bit_av8() +// * +// * @remarks +// * None +// * +// ******************************************************************************* +//*/ + +.text +.p2align 2 +.include "ih264_neon_macros.s" +///** +//******************************************************************************* +//* +//* @brief +//* memcpy of a 1d array +//* +//* @par Description: +//* Does memcpy of 8bit data from source to destination for 8,16 or 32 number of bytes +//* +//* @param[in] pu1_dst +//* UWORD8 pointer to the destination +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[in] num_bytes +//* number of bytes to copy +//* @returns +//* +//* @remarks +//* None +//* +//******************************************************************************* +//*/ +//void ih264_memcpy_mul_8(UWORD8 *pu1_dst, +// UWORD8 *pu1_src, +// UWORD8 num_bytes) +//**************Variables Vs Registers************************* +// x0 => *pu1_dst +// x1 => *pu1_src +// x2 => num_bytes + + + + + + .global ih264_memcpy_mul_8_av8 + +ih264_memcpy_mul_8_av8: + +loop_neon_memcpy_mul_8: + // Memcpy 8 bytes + ld1 {v0.8b}, [x1], #8 + st1 {v0.8b}, [x0], #8 + + subs x2, x2, #8 + bne loop_neon_memcpy_mul_8 + ret + + + +//******************************************************************************* +//*/ +//void ih264_memcpy(UWORD8 *pu1_dst, +// UWORD8 *pu1_src, +// UWORD8 num_bytes) +//**************Variables Vs Registers************************* +// x0 => *pu1_dst +// x1 => *pu1_src +// x2 => num_bytes + + + + .global ih264_memcpy_av8 + +ih264_memcpy_av8: + subs x2, x2, #8 + blt arm_memcpy +loop_neon_memcpy: + // Memcpy 8 bytes + ld1 {v0.8b}, [x1], #8 + st1 {v0.8b}, [x0], #8 + + subs x2, x2, #8 + bge loop_neon_memcpy + cmp x2, #-8 + beq end_func1 + +arm_memcpy: + add x2, x2, #8 + +loop_arm_memcpy: + ldrb w3, [x1], #1 + sxtw x3, w3 + strb w3, [x0], #1 + sxtw x3, w3 + subs x2, x2, #1 + bne loop_arm_memcpy + ret +end_func1: + ret + + +//void ih264_memset_mul_8(UWORD8 *pu1_dst, +// UWORD8 value, +// UWORD8 num_bytes) +//**************Variables Vs Registers************************* +// x0 => *pu1_dst +// x1 => value +// x2 => num_bytes + + + .global ih264_memset_mul_8_av8 + +ih264_memset_mul_8_av8: + +// Assumptions: numbytes is either 8, 16 or 32 + dup v0.8b, w1 +loop_memset_mul_8: + // Memset 8 bytes + st1 {v0.8b}, [x0], #8 + + subs x2, x2, #8 + bne loop_memset_mul_8 + + ret + + +//void ih264_memset(UWORD8 *pu1_dst, +// UWORD8 value, +// UWORD8 num_bytes) +//**************Variables Vs Registers************************* +// x0 => *pu1_dst +// x1 => value +// x2 => num_bytes + + + + .global ih264_memset_av8 + +ih264_memset_av8: + subs x2, x2, #8 + blt arm_memset + dup v0.8b, w1 +loop_neon_memset: + // Memcpy 8 bytes + st1 {v0.8b}, [x0], #8 + + subs x2, x2, #8 + bge loop_neon_memset + cmp x2, #-8 + beq end_func2 + +arm_memset: + add x2, x2, #8 + +loop_arm_memset: + strb w1, [x0], #1 + sxtw x1, w1 + subs x2, x2, #1 + bne loop_arm_memset + ret +end_func2: + ret + + + + + +//void ih264_memset_16bit_mul_8(UWORD16 *pu2_dst, +// UWORD16 value, +// UWORD8 num_words) +//**************Variables Vs Registers************************* +// x0 => *pu2_dst +// x1 => value +// x2 => num_words + + + .global ih264_memset_16bit_mul_8_av8 + +ih264_memset_16bit_mul_8_av8: + +// Assumptions: num_words is either 8, 16 or 32 + + // Memset 8 words + dup v0.4h, w1 +loop_memset_16bit_mul_8: + st1 {v0.4h}, [x0], #8 + st1 {v0.4h}, [x0], #8 + + subs x2, x2, #8 + bne loop_memset_16bit_mul_8 + + ret + + + +//void ih264_memset_16bit(UWORD16 *pu2_dst, +// UWORD16 value, +// UWORD8 num_words) +//**************Variables Vs Registers************************* +// x0 => *pu2_dst +// x1 => value +// x2 => num_words + + + + .global ih264_memset_16bit_av8 + +ih264_memset_16bit_av8: + subs x2, x2, #8 + blt arm_memset_16bit + dup v0.4h, w1 +loop_neon_memset_16bit: + // Memset 8 words + st1 {v0.4h}, [x0], #8 + st1 {v0.4h}, [x0], #8 + + subs x2, x2, #8 + bge loop_neon_memset_16bit + cmp x2, #-8 + beq end_func3 + +arm_memset_16bit: + add x2, x2, #8 + +loop_arm_memset_16bit: + strh w1, [x0], #2 + sxtw x1, w1 + subs x2, x2, #1 + bne loop_arm_memset_16bit + ret + +end_func3: + ret + + + diff --git a/common/armv8/ih264_neon_macros.s b/common/armv8/ih264_neon_macros.s new file mode 100755 index 0000000..6ff5b91 --- /dev/null +++ b/common/armv8/ih264_neon_macros.s @@ -0,0 +1,41 @@ +//****************************************************************************** +//* +//* Copyright (C) 2015 The Android Open Source Project +//* +//* Licensed under the Apache License, Version 2.0 (the "License"); +//* you may not use this file except in compliance with the License. +//* You may obtain a copy of the License at: +//* +//* http://www.apache.org/licenses/LICENSE-2.0 +//* +//* Unless required by applicable law or agreed to in writing, software +//* distributed under the License is distributed on an "AS IS" BASIS, +//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//* See the License for the specific language governing permissions and +//* limitations under the License. +//* +//***************************************************************************** +//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +//*/ +//******************************************************************************* + + +.macro push_v_regs + stp d8, d9, [sp, #-16]! + stp d10, d11, [sp, #-16]! + stp d12, d13, [sp, #-16]! + stp d14, d15, [sp, #-16]! +.endm +.macro pop_v_regs + ldp d14, d15, [sp], #16 + ldp d12, d13, [sp], #16 + ldp d10, d11, [sp], #16 + ldp d8, d9, [sp], #16 +.endm + +.macro swp reg1, reg2 + eor \reg1, \reg1, \reg2 + eor \reg2, \reg1, \reg2 + eor \reg1, \reg1, \reg2 +.endm + diff --git a/common/armv8/ih264_padding_neon_av8.s b/common/armv8/ih264_padding_neon_av8.s new file mode 100755 index 0000000..35d9c8a --- /dev/null +++ b/common/armv8/ih264_padding_neon_av8.s @@ -0,0 +1,784 @@ +//****************************************************************************** +//* +//* Copyright (C) 2015 The Android Open Source Project +//* +//* Licensed under the Apache License, Version 2.0 (the "License"); +//* you may not use this file except in compliance with the License. +//* You may obtain a copy of the License at: +//* +//* http://www.apache.org/licenses/LICENSE-2.0 +//* +//* Unless required by applicable law or agreed to in writing, software +//* distributed under the License is distributed on an "AS IS" BASIS, +//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//* See the License for the specific language governing permissions and +//* limitations under the License. +//* +//***************************************************************************** +//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +//*/ +///** +// ******************************************************************************* +// * @file +// * ih264_padding_neon.s +// * +// * @brief +// * Contains function definitions padding +// * +// * @author +// * Ittiam +// * +// * @par List of Functions: +// * - ih264_pad_top_av8() +// * - ih264_pad_left_luma_av8() +// * - ih264_pad_left_chroma_av8() +// * - ih264_pad_right_luma_av8() +// * - ih264_pad_right_chroma_av8() +// * +// * @remarks +// * None +// * +// ******************************************************************************* +//*/ + +.text +.p2align 2 +.include "ih264_neon_macros.s" +///** +//******************************************************************************* +//* +//* @brief pad at the top of a 2d array +//* +//* @par Description: +//* The top row of a 2d array is replicated for pad_size times at the top +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] wd +//* integer width of the array +//* +//* @param[in] pad_size +//* integer -padding size of the array +//* +//* @returns none +//* +//* @remarks none +//* +//******************************************************************************* +//*/ +//void ih264_pad_top(UWORD8 *pu1_src, +// WORD32 src_strd, +// WORD32 wd, +// WORD32 pad_size) +//**************Variables Vs Registers************************* +// x0 => *pu1_src +// x1 => src_strd +// x2 => wd +// x3 => pad_size + + .global ih264_pad_top_av8 + +ih264_pad_top_av8: + + // STMFD sp!, {x4-x11,x14} //stack stores the values of the arguments + push_v_regs + stp x19, x20, [sp, #-16]! + + sub x5, x0, x1 + sub x20, x1, #0 + neg x6, x20 + +loop_neon_memcpy_mul_16: + // Load 16 bytes + ld1 {v0.8b, v1.8b}, [x0], #16 + mov x4, x5 + mov x7, x3 + add x5, x5, #16 + +loop_neon_pad_top: + st1 {v0.8b, v1.8b}, [x4], x6 + subs x7, x7, #1 + bne loop_neon_pad_top + + subs x2, x2, #16 + bne loop_neon_memcpy_mul_16 + + // LDMFD sp!,{x4-x11,pc} //Reload the registers from SP + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + + + +///** +//******************************************************************************* +//* +//* @brief +//* Padding (luma block) at the left of a 2d array +//* +//* @par Description: +//* The left column of a 2d array is replicated for pad_size times at the left +//* +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] ht +//* integer height of the array +//* +//* @param[in] wd +//* integer width of the array +//* +//* @param[in] pad_size +//* integer -padding size of the array +//* +//* @param[in] ht +//* integer height of the array +//* +//* @param[in] wd +//* integer width of the array +//* +//* @returns +//* +//* @remarks +//* None +//* +//******************************************************************************* +//*/ +//#if PAD_LEFT_LUMA == C +//void ih264_pad_left_luma(UWORD8 *pu1_src, +// WORD32 src_strd, +// WORD32 ht, +// WORD32 pad_size) +//**************Variables Vs Registers************************* +// x0 => *pu1_src +// x1 => src_strd +// x2 => ht +// x3 => pad_size + + + + .global ih264_pad_left_luma_av8 + +ih264_pad_left_luma_av8: + + // STMFD sp!, {x4-x11,x14} //stack stores the values of the arguments + push_v_regs + stp x19, x20, [sp, #-16]! + + + sub x4, x0, x3 + sub x6, x1, #16 + subs x5, x3, #16 + bne loop_32 +loop_16: // /*hard coded for width=16 ,height =8,16*/ + ldrb w8, [x0] + add x0, x0, x1 + sxtw x8, w8 + ldrb w9, [x0] + add x0, x0, x1 + sxtw x9, w9 + dup v0.16b, w8 + ldrb w10, [x0] + add x0, x0, x1 + sxtw x10, w10 + st1 {v0.16b}, [x4], x1 // 16 bytes store + dup v2.16b, w9 + st1 {v2.16b}, [x4], x1 // 16 bytes store + ldrb w11, [x0] + add x0, x0, x1 + sxtw x11, w11 + dup v4.16b, w10 + dup v6.16b, w11 + st1 {v4.16b}, [x4], x1 // 16 bytes store + ldrb w8, [x0] + add x0, x0, x1 + sxtw x8, w8 + st1 {v6.16b}, [x4], x1 // 16 bytes store + ldrb w9, [x0] + add x0, x0, x1 + sxtw x9, w9 + dup v0.16b, w8 + ldrb w10, [x0] + add x0, x0, x1 + sxtw x10, w10 + st1 {v0.16b}, [x4], x1 // 16 bytes store + dup v2.16b, w9 + ldrb w11, [x0] + add x0, x0, x1 + sxtw x11, w11 + st1 {v2.16b}, [x4], x1 // 16 bytes store + dup v4.16b, w10 + dup v6.16b, w11 + subs x2, x2, #8 + st1 {v4.16b}, [x4], x1 // 16 bytes store + st1 {v6.16b}, [x4], x1 // 16 bytes store + bne loop_16 + b end_func + +loop_32: // /*hard coded for width=32 ,height =8,16*/ + ldrb w8, [x0] + add x0, x0, x1 + sxtw x8, w8 + ldrb w9, [x0] + add x0, x0, x1 + sxtw x9, w9 + dup v0.16b, w8 + ldrb w10, [x0] + add x0, x0, x1 + sxtw x10, w10 + st1 {v0.16b}, [x4], #16 // 16 bytes store + dup v2.16b, w9 + st1 {v0.16b}, [x4], x6 + st1 {v2.16b}, [x4], #16 // 16 bytes store + dup v4.16b, w10 + st1 {v2.16b}, [x4], x6 // 16 bytes store + ldrb w11, [x0] + add x0, x0, x1 + sxtw x11, w11 + st1 {v4.16b}, [x4], #16 // 16 bytes store + dup v6.16b, w11 + st1 {v4.16b}, [x4], x6 // 16 bytes store + ldrb w8, [x0] + add x0, x0, x1 + sxtw x8, w8 + st1 {v6.16b}, [x4], #16 // 16 bytes store + dup v0.16b, w8 + ldrb w9, [x0] + add x0, x0, x1 + sxtw x9, w9 + st1 {v6.16b}, [x4], x6 // 16 bytes store + ldrb w10, [x0] + add x0, x0, x1 + sxtw x10, w10 + st1 {v0.16b}, [x4], #16 // 16 bytes store + dup v2.16b, w9 + st1 {v0.16b}, [x4], x6 // 16 bytes store + ldrb w11, [x0] + add x0, x0, x1 + sxtw x11, w11 + st1 {v2.16b}, [x4], #16 // 16 bytes store + dup v4.16b, w10 + st1 {v2.16b}, [x4], x6 // 16 bytes store + st1 {v4.16b}, [x4], #16 // 16 bytes store + dup v6.16b, w11 + st1 {v4.16b}, [x4], x6 // 16 bytes store + subs x2, x2, #8 + st1 {v6.16b}, [x4], #16 // 16 bytes store + st1 {v6.16b}, [x4], x6 // 16 bytes store + bne loop_32 + + + +end_func: + // LDMFD sp!,{x4-x11,pc} //Reload the registers from SP + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + + + + +///** +//******************************************************************************* +//* +//* @brief +//* Padding (chroma block) at the left of a 2d array +//* +//* @par Description: +//* The left column of a 2d array is replicated for pad_size times at the left +//* +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] ht +//* integer height of the array +//* +//* @param[in] wd +//* integer width of the array (each colour component) +//* +//* @param[in] pad_size +//* integer -padding size of the array +//* +//* @param[in] ht +//* integer height of the array +//* +//* @param[in] wd +//* integer width of the array +//* +//* @returns +//* +//* @remarks +//* None +//* +//******************************************************************************* +//*/ +//#if PAD_LEFT_CHROMA == C +//void ih264_pad_left_chroma(UWORD8 *pu1_src, +// WORD32 src_strd, +// WORD32 ht, +// WORD32 pad_size) +//{ +// x0 => *pu1_src +// x1 => src_strd +// x2 => ht +// x3 => pad_size + + + + .global ih264_pad_left_chroma_av8 + +ih264_pad_left_chroma_av8: + + // STMFD sp!, {x4-x11, x14} //stack stores the values of the arguments + push_v_regs + stp x19, x20, [sp, #-16]! + + sub x4, x0, x3 + sub x6, x1, #16 + + +loop_32_l_c: // /*hard coded for width=32 ,height =4,8,12*/ + ldrh w8, [x0] + add x0, x0, x1 + sxtw x8, w8 + ldrh w9, [x0] + add x0, x0, x1 + sxtw x9, w9 + dup v0.8h, w8 + ldrh w10, [x0] + add x0, x0, x1 + sxtw x10, w10 + st1 {v0.16b}, [x4], #16 // 16 bytes store + dup v2.8h, w9 + st1 {v0.16b}, [x4], x6 // 16 bytes store + ldrh w11, [x0] + add x0, x0, x1 + sxtw x11, w11 + st1 {v2.16b}, [x4], #16 // 16 bytes store + dup v4.8h, w10 + st1 {v2.16b}, [x4], x6 // 16 bytes store + dup v6.8h, w11 + st1 {v4.16b}, [x4], #16 // 16 bytes store + st1 {v4.16b}, [x4], x6 // 16 bytes store + subs x2, x2, #4 + st1 {v6.16b}, [x4], #16 // 16 bytes store + st1 {v6.16b}, [x4], x6 // 16 bytes store + + + beq end_func_l_c ///* Branching when ht=4*/ + + ldrh w8, [x0] + add x0, x0, x1 + sxtw x8, w8 + ldrh w9, [x0] + add x0, x0, x1 + sxtw x9, w9 + dup v0.8h, w8 + ldrh w10, [x0] + add x0, x0, x1 + sxtw x10, w10 + st1 {v0.16b}, [x4], #16 // 16 bytes store + dup v2.8h, w9 + st1 {v0.16b}, [x4], x6 + ldrh w11, [x0] + add x0, x0, x1 + sxtw x11, w11 + st1 {v2.16b}, [x4], #16 // 16 bytes store + dup v4.8h, w10 + st1 {v2.16b}, [x4], x6 // 16 bytes store + dup v6.8h, w11 + st1 {v4.16b}, [x4], #16 // 16 bytes store + st1 {v4.16b}, [x4], x6 // 16 bytes store + subs x2, x2, #4 + st1 {v6.16b}, [x4], #16 // 16 bytes store + st1 {v6.16b}, [x4], x6 // 16 bytes store + + beq end_func_l_c ///* Branching when ht=8*/ + bne loop_32_l_c + + ldrh w8, [x0] + add x0, x0, x1 + sxtw x8, w8 + ldrh w9, [x0] + add x0, x0, x1 + sxtw x9, w9 + dup v0.8h, w8 + ldrh w10, [x0] + add x0, x0, x1 + sxtw x10, w10 + st1 {v0.16b}, [x4], #16 // 16 bytes store + dup v2.8h, w9 + st1 {v0.16b}, [x4], x6 + ldrh w11, [x0] + add x0, x0, x1 + sxtw x11, w11 + st1 {v2.16b}, [x4], #16 // 16 bytes store + dup v4.8h, w10 + st1 {v2.16b}, [x4], x6 // 16 bytes store + dup v6.8h, w11 + st1 {v4.16b}, [x4], #16 // 16 bytes store + st1 {v4.16b}, [x4], x6 // 16 bytes store + st1 {v6.16b}, [x4], #16 // 16 bytes store + st1 {v6.16b}, [x4], x6 // 16 bytes store + +end_func_l_c: + // LDMFD sp!,{x4-x11,pc} //Reload the registers from SP + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + + + + +///** +//******************************************************************************* +//* +//* @brief +//* Padding (luma block) at the right of a 2d array +//* +//* @par Description: +//* The right column of a 2d array is replicated for pad_size times at the right +//* +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] ht +//* integer height of the array +//* +//* @param[in] wd +//* integer width of the array +//* +//* @param[in] pad_size +//* integer -padding size of the array +//* +//* @param[in] ht +//* integer height of the array +//* +//* @param[in] wd +//* integer width of the array +//* +//* @returns +//* +//* @remarks +//* None +//* +//******************************************************************************* +//*/ +//#if PAD_RIGHT_LUMA == C +//void ih264_pad_right_luma(UWORD8 *pu1_src, +// WORD32 src_strd, +// WORD32 ht, +// WORD32 pad_size) +//{ +// WORD32 row; +// +// for(row = 0; row < ht; row++) +// { +// memset(pu1_src, *(pu1_src -1), pad_size); +// +// pu1_src += src_strd; +// } +//} +// +// x0 => *pu1_src +// x1 => src_strd +// x2 => ht +// x3 => pad_size + + + + .global ih264_pad_right_luma_av8 + +ih264_pad_right_luma_av8: + + // STMFD sp!, {x4-x11, x14} //stack stores the values of the arguments + push_v_regs + stp x19, x20, [sp, #-16]! + + mov x4, x0 + sub x6, x1, #16 + sub x0, x0, #1 + subs x5, x3, #16 + bne loop_32 +loop_16_r: // /*hard coded for width=16 ,height =8,16*/ + ldrb w8, [x0] + add x0, x0, x1 + sxtw x8, w8 + ldrb w9, [x0] + add x0, x0, x1 + sxtw x9, w9 + dup v0.16b, w8 + ldrb w10, [x0] + add x0, x0, x1 + sxtw x10, w10 + st1 {v0.16b}, [x4], x1 // 16 bytes store + dup v2.16b, w9 + st1 {v2.16b}, [x4], x1 // 16 bytes store + ldrb w11, [x0] + add x0, x0, x1 + sxtw x11, w11 + dup v4.16b, w10 + dup v6.16b, w11 + st1 {v4.16b}, [x4], x1 // 16 bytes store + ldrb w8, [x0] + add x0, x0, x1 + sxtw x8, w8 + st1 {v6.16b}, [x4], x1 // 16 bytes store + ldrb w9, [x0] + add x0, x0, x1 + sxtw x9, w9 + dup v0.16b, w8 + ldrb w10, [x0] + add x0, x0, x1 + sxtw x10, w10 + st1 {v0.16b}, [x4], x1 // 16 bytes store + dup v2.16b, w9 + ldrb w11, [x0] + add x0, x0, x1 + sxtw x11, w11 + st1 {v2.16b}, [x4], x1 // 16 bytes store + dup v4.16b, w10 + dup v6.16b, w11 + subs x2, x2, #8 + st1 {v4.16b}, [x4], x1 // 16 bytes store + st1 {v6.16b}, [x4], x1 // 16 bytes store + bne loop_16_r + b end_func_r + +loop_32_r: // /*hard coded for width=32 ,height =8,16*/ + ldrb w8, [x0] + add x0, x0, x1 + sxtw x8, w8 + ldrb w9, [x0] + add x0, x0, x1 + sxtw x9, w9 + dup v0.16b, w8 + ldrb w10, [x0] + add x0, x0, x1 + sxtw x10, w10 + st1 {v0.16b}, [x4], #16 // 16 bytes store + dup v2.16b, w9 + st1 {v0.16b}, [x4], x6 + st1 {v2.16b}, [x4], #16 // 16 bytes store + dup v4.16b, w10 + st1 {v2.16b}, [x4], x6 // 16 bytes store + ldrb w11, [x0] + add x0, x0, x1 + sxtw x11, w11 + st1 {v4.16b}, [x4], #16 // 16 bytes store + dup v6.16b, w11 + st1 {v4.16b}, [x4], x6 // 16 bytes store + ldrb w8, [x0] + add x0, x0, x1 + sxtw x8, w8 + st1 {v6.16b}, [x4], #16 // 16 bytes store + ldrb w9, [x0] + add x0, x0, x1 + sxtw x9, w9 + dup v0.16b, w8 + st1 {v6.16b}, [x4], x6 // 16 bytes store + ldrb w10, [x0] + add x0, x0, x1 + sxtw x10, w10 + st1 {v0.16b}, [x4], #16 // 16 bytes store + dup v2.16b, w9 + st1 {v0.16b}, [x4], x6 // 16 bytes store + ldrb w11, [x0] + add x0, x0, x1 + sxtw x11, w11 + st1 {v2.16b}, [x4], #16 // 16 bytes store + dup v4.16b, w10 + st1 {v2.16b}, [x4], x6 // 16 bytes store + st1 {v4.16b}, [x4], #16 // 16 bytes store + dup v6.16b, w11 + st1 {v4.16b}, [x4], x6 // 16 bytes store + subs x2, x2, #8 + st1 {v6.16b}, [x4], #16 // 16 bytes store + st1 {v6.16b}, [x4], x6 // 16 bytes store + bne loop_32_r + + + +end_func_r: + // LDMFD sp!,{x4-x11,pc} //Reload the registers from SP + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + + + + +///** +//******************************************************************************* +//* +//* @brief +//;* Padding (chroma block) at the right of a 2d array +//* +//* @par Description: +//* The right column of a 2d array is replicated for pad_size times at the right +//* +//* +//* @param[in] pu1_src +//;* UWORD8 pointer to the source +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] ht +//;* integer height of the array +//* +//* @param[in] wd +//* integer width of the array (each colour component) +//* +//* @param[in] pad_size +//* integer -padding size of the array +//* +//* @param[in] ht +//;* integer height of the array +//* +//* @param[in] wd +//* integer width of the array +//* +//* @returns +//* +//* @remarks +//* None +//* +//******************************************************************************* +//*/ +//#if PAD_RIGHT_CHROMA == C +//void ih264_pad_right_chroma(UWORD8 *pu1_src, +// WORD32 src_strd, +// WORD32 ht, +// WORD32 pad_size) +// x0 => *pu1_src +// x1 => src_strd +// x2 => ht +// x3 => pad_size + + + + .global ih264_pad_right_chroma_av8 + +ih264_pad_right_chroma_av8: + + // STMFD sp!, {x4-x11, x14} //stack stores the values of the arguments + push_v_regs + stp x19, x20, [sp, #-16]! + + mov x4, x0 + sub x6, x1, #16 + sub x0, x0, #2 +loop_32_r_c: // /*hard coded for width=32 ,height =8,4*/ + ldrh w8, [x0] + add x0, x0, x1 + sxtw x8, w8 + ldrh w9, [x0] + add x0, x0, x1 + sxtw x9, w9 + dup v0.8h, w8 + ldrh w10, [x0] + add x0, x0, x1 + sxtw x10, w10 + st1 {v0.16b}, [x4], #16 // 16 bytes store + dup v2.8h, w9 + st1 {v0.16b}, [x4], x6 + st1 {v2.16b}, [x4], #16 // 16 bytes store + dup v4.8h, w10 + st1 {v2.16b}, [x4], x6 // 16 bytes store + subs x2, x2, #4 + ldrh w11, [x0] + add x0, x0, x1 + sxtw x11, w11 + st1 {v4.16b}, [x4], #16 // 16 bytes store + dup v6.8h, w11 + st1 {v4.16b}, [x4], x6 // 16 bytes store + st1 {v6.16b}, [x4], #16 // 16 bytes store + st1 {v6.16b}, [x4], x6 // 16 bytes store + + beq end_func_r_c ///* Branching when ht=4*/ + + ldrh w8, [x0] + add x0, x0, x1 + sxtw x8, w8 + dup v0.8h, w8 + ldrh w9, [x0] + add x0, x0, x1 + sxtw x9, w9 + ldrh w10, [x0] + add x0, x0, x1 + sxtw x10, w10 + st1 {v0.16b}, [x4], #16 // 16 bytes store + dup v2.8h, w9 + st1 {v0.16b}, [x4], x6 // 16 bytes store + ldrh w11, [x0] + add x0, x0, x1 + sxtw x11, w11 + st1 {v2.16b}, [x4], #16 // 16 bytes store + dup v4.8h, w10 + st1 {v2.16b}, [x4], x6 // 16 bytes store + st1 {v4.16b}, [x4], #16 // 16 bytes store + dup v6.8h, w11 + st1 {v4.16b}, [x4], x6 // 16 bytes store + subs x2, x2, #4 + st1 {v6.16b}, [x4], #16 // 16 bytes store + st1 {v6.16b}, [x4], x6 // 16 bytes store + + beq end_func_r_c ///* Branching when ht=8*/ + bne loop_32_r_c + ldrh w8, [x0] + add x0, x0, x1 + sxtw x8, w8 + dup v0.8h, w8 + ldrh w9, [x0] + add x0, x0, x1 + sxtw x9, w9 + ldrh w10, [x0] + add x0, x0, x1 + sxtw x10, w10 + st1 {v0.16b}, [x4], #16 // 16 bytes store + dup v2.8h, w9 + st1 {v0.16b}, [x4], x6 // 16 bytes store + ldrh w11, [x0] + add x0, x0, x1 + sxtw x11, w11 + st1 {v2.16b}, [x4], #16 // 16 bytes store + dup v4.8h, w10 + st1 {v2.16b}, [x4], x6 // 16 bytes store + st1 {v4.16b}, [x4], #16 // 16 bytes store + dup v6.8h, w11 + st1 {v4.16b}, [x4], x6 // 16 bytes store + st1 {v6.16b}, [x4], #16 // 16 bytes store + st1 {v6.16b}, [x4], x6 // 16 bytes store + +end_func_r_c: + // LDMFD sp!,{x4-x11,pc} //Reload the registers from SP + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + + + + + diff --git a/common/armv8/ih264_platform_macros.h b/common/armv8/ih264_platform_macros.h new file mode 100755 index 0000000..1f67403 --- /dev/null +++ b/common/armv8/ih264_platform_macros.h @@ -0,0 +1,152 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ih264_platform_macros.h +* +* @brief +* Platform specific Macro definitions used in the codec +* +* @author +* Ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ +#ifndef _IHEVC_PLATFORM_MACROS_H_ +#define _IHEVC_PLATFORM_MACROS_H_ + +#ifndef ARMV8 +void ih264_arm_dsb(void); + +#define DATA_SYNC() ih264_arm_dsb() +static __inline WORD32 CLIP_U8(WORD32 x) +{ + asm("usat %0, #8, %1" : "=r"(x) : "r"(x)); + return x; +} + +static __inline WORD32 CLIP_S8(WORD32 x) +{ + asm("ssat %0, #8, %1" : "=r"(x) : "r"(x)); + return x; +} + +static __inline WORD32 CLIP_U10(WORD32 x) +{ + asm("usat %0, #10, %1" : "=r"(x) : "r"(x)); + return x; +} + +static __inline WORD32 CLIP_S10(WORD32 x) +{ + asm("ssat %0, #10, %1" : "=r"(x) : "r"(x)); + return x; +} + +static __inline WORD32 CLIP_U12(WORD32 x) +{ + asm("usat %0, #12, %1" : "=r"(x) : "r"(x)); + return x; +} + +static __inline WORD32 CLIP_S12(WORD32 x) +{ + asm("ssat %0, #12, %1" : "=r"(x) : "r"(x)); + return x; +} + +static __inline WORD32 CLIP_U16(WORD32 x) +{ + asm("usat %0, #16, %1" : "=r"(x) : "r"(x)); + return x; +} +static __inline WORD32 CLIP_S16(WORD32 x) +{ + asm("ssat %0, #16, %1" : "=r"(x) : "r"(x)); + return x; +} + + +static __inline UWORD32 ITT_BIG_ENDIAN(UWORD32 x) +{ + asm("rev %0, %1" : "=r"(x) : "r"(x)); + return x; +} +#else +#define DATA_SYNC() ; + +#define CLIP_U8(x) CLIP3(0, 255, (x)) +#define CLIP_S8(x) CLIP3(-128, 127, (x)) + +#define CLIP_U10(x) CLIP3(0, 1023, (x)) +#define CLIP_S10(x) CLIP3(-512, 511, (x)) + +#define CLIP_U12(x) CLIP3(0, 4095, (x)) +#define CLIP_S12(x) CLIP3(-2048, 2047, (x)) + +#define CLIP_U16(x) CLIP3(0, 65535, (x)) +#define CLIP_S16(x) CLIP3(-32768, 32767, (x)) + +#define ITT_BIG_ENDIAN(x) ((x & 0x000000ff) << 24) | \ + ((x & 0x0000ff00) << 8) | \ + ((x & 0x00ff0000) >> 8) | \ + ((UWORD32)x >> 24); +#endif + +#define SHL(x,y) (((y) < 32) ? ((x) << (y)) : 0) +#define SHR(x,y) (((y) < 32) ? ((x) >> (y)) : 0) + +#define SHR_NEG(val,shift) ((shift>0)?(val>>shift):(val<<(-shift))) +#define SHL_NEG(val,shift) ((shift<0)?(val>>(-shift)):(val<> 1; + shrn2 v0.8h, v23.4s, #1 //i4_value = (x3 + x2) >> 1; + shrn v1.4h, v24.4s, #1 //i4_value = (x0 - x1) >> 1; + shrn2 v1.8h, v25.4s, #1 //i4_value = (x3 - x2) >> 1; + + abs v2.8h, v0.8h + abs v3.8h, v1.8h + + cmgt v4.8h, v0.8h, #0 //get the sign row 1,2 + cmgt v5.8h, v1.8h, #0 + + neg w4, w4 //-u4_qbits + dup v22.4s, w4 //load -u4_qbits + + umlal v14.4s, v2.4h, v30.4h + umlal2 v15.4s, v2.8h, v30.8h + umlal v16.4s, v3.4h, v30.4h + umlal2 v17.4s, v3.8h, v30.8h + + ushl v14.4s, v14.4s, v22.4s + ushl v15.4s, v15.4s, v22.4s + ushl v16.4s, v16.4s, v22.4s + ushl v17.4s, v17.4s, v22.4s + + uqxtn v14.4h, v14.4s + uqxtn2 v14.8h, v15.4s + uqxtn v16.4h, v16.4s + uqxtn2 v16.8h, v17.4s + + neg v15.8h, v14.8h + neg v17.8h, v16.8h + + bsl v4.16b, v14.16b, v15.16b + bsl v5.16b, v16.16b, v17.16b + + cmeq v0.8h, v14.8h, #0 + cmeq v1.8h, v16.8h, #0 + + st1 {v4.8h-v5.8h}, [x1] + + movi v20.8b, #16 + + xtn v2.8b, v0.8h + xtn v3.8b, v1.8h + + ushr v2.8b, v2.8b, #7 + ushr v3.8b, v3.8b, #7 + + add v2.8b, v2.8b, v3.8b + addp v2.8b, v2.8b, v2.8b + addp v2.8b, v2.8b, v2.8b + addp v2.8b, v2.8b, v2.8b + sub v20.8b, v20.8b, v2.8b + st1 {v20.b}[0], [x6] + + pop_v_regs + ret + + +//***************************************************************************** +//* +//* function name : ih264_hadamard_quant_2x2_uv +//* description : this function does forward hadamard transform and +//* quantization for dc block of chroma for both planes +//* +//* arguments : x0 :pointer to src buffer +// x1 :pointer to dst buffer +// x2 :pu2_scale_matrix +// x2 :pu2_threshold_matrix +// x3 :u4_qbits +// x4 :u4_round_factor +// x5 :pu1_nnz +// values returned : none +// +// register usage : +// stack usage : 0 bytes +// cycles : around +// interruptiaility : interruptable +// +// known limitations +// \assumptions : +// +// revision history : +// dd mm yyyy author(s) changes +// 20 2 2015 100633 first version +// +//***************************************************************************** +// ih264_hadamard_quant_2x2_uv_av8(word16 *pi2_src, word16 *pi2_dst, +// const uword16 *pu2_scale_matrix, +// const uword16 *pu2_threshold_matrix, uword32 u4_qbits, +// uword32 u4_round_factor,uword8 *pu1_nnz +// ) + + .global ih264_hadamard_quant_2x2_uv_av8 +ih264_hadamard_quant_2x2_uv_av8: + + push_v_regs + + ld2 {v0.4h-v1.4h}, [x0] //load src + + ld1 {v30.h}[0], [x2] //load pu2_scale_matrix[0] + dup v30.4h, v30.4h[0] //pu2_scale_matrix + uxtl v30.4s, v30.4h //pu2_scale_matrix + + neg w4, w4 + dup v24.4s, w4 //u4_qbits + + dup v25.4s, w5 //round fact + dup v26.4s, v25.s[0] + + saddl v2.4s, v0.4h, v1.4h //x0 = x4 + x5;, x2 = x6 + x7; + ssubl v3.4s, v0.4h, v1.4h //x1 = x4 - x5; x3 = x6 - x7; + + trn1 v4.4s, v2.4s, v3.4s + trn2 v5.4s, v2.4s, v3.4s //q1 -> x0 x1, q2 -> x2 x3 + + add v0.4s, v4.4s , v5.4s // (x0 + x2) (x1 + x3) (y0 + y2); (y1 + y3); + sub v1.4s, v4.4s , v5.4s // (x0 - x2) (x1 - x3) (y0 - y2); (y1 - y3); + + abs v2.4s, v0.4s + abs v3.4s, v1.4s + + cmgt v4.4s, v0.4s, #0 //get the sign row 1,2 + cmgt v5.4s, v1.4s, #0 + + uqxtn v4.4h, v4.4s + sqxtn2 v4.8h, v5.4s + + mla v25.4s, v2.4s, v30.4s + mla v26.4s, v3.4s, v30.4s + + ushl v2.4s, v25.4s, v24.4s //>>qbit + ushl v3.4s, v26.4s, v24.4s //>>qbit + + uqxtn v2.4h, v2.4s + uqxtn2 v2.8h, v3.4s + + neg v5.8h, v2.8h + + bsl v4.16b, v2.16b, v5.16b //*sign + + //rearrange such that we get each plane coeffs as continous + mov v5.s[0], v4.s[1] + mov v4.s[1], v4.s[2] + mov v4.s[2], v5.s[0] + + cmeq v5.8h, v4.8h, #0 //compute nnz + xtn v5.8b, v5.8h //reduce nnz comparison to 1 bit + ushr v5.8b, v5.8b, #7 //reduce nnz comparison to 1 bit + movi v20.8b, #4 //since we add zeros, we need to subtract from 4 to get nnz + addp v5.8b, v5.8b, v5.8b //sum up nnz + addp v5.8b, v5.8b, v5.8b //sum up nnz + + st1 {v4.8h}, [x1] //store the block + + st1 {v4.8h}, [x1] //store the block + sub v20.8b, v20.8b, v5.8b //4- numzeros + + st1 {v20.h}[0], [x6] //store nnz + + pop_v_regs + ret + + + diff --git a/common/armv8/ih264_weighted_bi_pred_av8.s b/common/armv8/ih264_weighted_bi_pred_av8.s new file mode 100755 index 0000000..f7d0846 --- /dev/null +++ b/common/armv8/ih264_weighted_bi_pred_av8.s @@ -0,0 +1,574 @@ +//****************************************************************************** +//* +//* Copyright (C) 2015 The Android Open Source Project +//* +//* Licensed under the Apache License, Version 2.0 (the "License"); +//* you may not use this file except in compliance with the License. +//* You may obtain a copy of the License at: +//* +//* http://www.apache.org/licenses/LICENSE-2.0 +//* +//* Unless required by applicable law or agreed to in writing, software +//* distributed under the License is distributed on an "AS IS" BASIS, +//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//* See the License for the specific language governing permissions and +//* limitations under the License. +//* +//***************************************************************************** +//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +//*/ +///** +//****************************************************************************** +//* @file +//* ih264_weighted_bi_pred_av8.s +//* +//* @brief +//* Contains function definitions for weighted biprediction. +//* Functions are coded using NEON intrinsics and can be compiled using ARM RVCT +//* +//* @author +//* Kaushik Senthoor R +//* +//* @par List of Functions: +//* +//* - ih264_weighted_bi_pred_luma_av8() +//* - ih264_weighted_bi_pred_chroma_av8() +//* +//* @remarks +//* None +//* +//******************************************************************************* +//*/ +//******************************************************************************* +//* @function +//* ih264_weighted_bi_pred_luma_av8() +//* +//* @brief +//* This routine performs the default weighted prediction as described in sec +//* 8.4.2.3.2 titled "Weighted sample prediction process" for luma. +//* +//* @par Description: +//* This function gets two ht x wd blocks, calculates the weighted samples, +//* rounds off, adds offset and stores it in the destination block. +//* +//* @param[in] puc_src1 +//* UWORD8 Pointer to the buffer containing the input block 1. +//* +//* @param[in] puc_src2 +//* UWORD8 Pointer to the buffer containing the input block 2. +//* +//* @param[out] puc_dst +//* UWORD8 pointer to the destination where the output block is stored. +//* +//* @param[in] src_strd1 +//* Stride of the input buffer 1 +//* +//* @param[in] src_strd2 +//* Stride of the input buffer 2 +//* +//* @param[in] dst_strd +//* Stride of the destination buffer +//* +//* @param[in] log_WD +//* number of bits to be rounded off +//* +//* @param[in] wt1 +//* weight for the weighted prediction +//* +//* @param[in] wt2 +//* weight for the weighted prediction +//* +//* @param[in] ofst1 +//* offset 1 used after rounding off +//* +//* @param[in] ofst2 +//* offset 2 used after rounding off +//* +//* @param[in] ht +//* integer height of the array +//* +//* @param[in] wd +//* integer width of the array +//* +//* @returns +//* None +//* +//* @remarks +//* (ht,wd) can be (4,4), (4,8), (8,4), (8,8), (8,16), (16,8) or (16,16). +//* +//******************************************************************************* +//*/ +//void ih264_weighted_bi_pred_luma_av8(UWORD8 *puc_src1, +// UWORD8 *puc_src2, +// UWORD8 *puc_dst, +// WORD32 src_strd1, +// WORD32 src_strd2, +// WORD32 dst_strd, +// UWORD16 log_WD, +// UWORD32 wt1, +// UWORD32 wt2, +// UWORD16 ofst1, +// UWORD16 ofst2, +// UWORD8 ht, +// UWORD8 wd) +// +//**************Variables Vs Registers***************************************** +// x0 => puc_src1 +// x1 => puc_src2 +// x2 => puc_dst +// x3 => src_strd1 +// [sp] => src_strd2 (x4) +// [sp+4] => dst_strd (x5) +// [sp+8] => log_WD (x6) +// [sp+12] => wt1 (x7) +// [sp+16] => wt2 (x8) +// [sp+20] => ofst1 (x9) +// [sp+24] => ofst2 (x10) +// [sp+28] => ht (x11) +// [sp+32] => wd (x12) +// +.text +.p2align 2 +.include "ih264_neon_macros.s" + + + + .global ih264_weighted_bi_pred_luma_av8 + +ih264_weighted_bi_pred_luma_av8: + + // STMFD sp!, {x4-x12,x14} //stack stores the values of the arguments + push_v_regs + stp x19, x20, [sp, #-16]! + ldr x8, [sp, #80] //Load wt2 in x8 + ldr x9, [sp, #88] //Load ofst1 in x9 + add x6, x6, #1 //x6 = log_WD + 1 + sub x20, x6, #0 //x13 = -(log_WD + 1) + neg x10, x20 + dup v0.8h, w10 //Q0 = -(log_WD + 1) (32-bit) + ldr x10, [sp, #96] //Load ofst2 in x10 + ldr x11, [sp, #104] //Load ht in x11 + ldr x12, [sp, #112] //Load wd in x12 + add x9, x9, #1 //x9 = ofst1 + 1 + add x9, x9, x10 //x9 = ofst1 + ofst2 + 1 + mov v2.s[0], w7 + mov v2.s[1], w8 //D2 = {wt1(32-bit), wt2(32-bit)} + asr x9, x9, #1 //x9 = ofst = (ofst1 + ofst2 + 1) >> 1 + dup v3.8b, w9 //D3 = ofst (8-bit) + cmp w12, #16 + beq loop_16 //branch if wd is 16 + cmp w12, #8 //check if wd is 8 + beq loop_8 //branch if wd is 8 + +loop_4: //each iteration processes four rows + + ld1 {v4.s}[0], [x0], x3 //load row 1 in source 1 + ld1 {v4.s}[1], [x0], x3 //load row 2 in source 1 + ld1 {v6.s}[0], [x1], x4 //load row 1 in source 2 + ld1 {v6.s}[1], [x1], x4 //load row 2 in source 2 + uxtl v4.8h, v4.8b //converting rows 1,2 in source 1 to 16-bit + ld1 {v8.s}[0], [x0], x3 //load row 3 in source 1 + ld1 {v8.s}[1], [x0], x3 //load row 4 in source 1 + uxtl v6.8h, v6.8b //converting rows 1,2 in source 2 to 16-bit + ld1 {v10.s}[0], [x1], x4 //load row 3 in source 2 + ld1 {v10.s}[1], [x1], x4 //load row 4 in source 2 + uxtl v8.8h, v8.8b //converting rows 3,4 in source 1 to 16-bit + uxtl v10.8h, v10.8b //converting rows 3,4 in source 2 to 16-bit + mul v4.8h, v4.8h , v2.4h[0] //weight 1 mult. for rows 1,2 + mla v4.8h, v6.8h , v2.4h[2] //weight 2 mult. for rows 1,2 + mul v8.8h, v8.8h , v2.4h[0] //weight 1 mult. for rows 3,4 + mla v8.8h, v10.8h , v2.4h[2] //weight 2 mult. for rows 3,4 + subs w11, w11, #4 //decrement ht by 4 + srshl v4.8h, v4.8h , v0.8h //rounds off the weighted samples from rows 1,2 + srshl v8.8h, v8.8h , v0.8h //rounds off the weighted samples from rows 3,4 + saddw v4.8h, v4.8h , v3.8b //adding offset for rows 1,2 + saddw v8.8h, v8.8h , v3.8b //adding offset for rows 3,4 + sqxtun v4.8b, v4.8h //saturating rows 1,2 to unsigned 8-bit + sqxtun v8.8b, v8.8h //saturating rows 3,4 to unsigned 8-bit + st1 {v4.s}[0], [x2], x5 //store row 1 in destination + st1 {v4.s}[1], [x2], x5 //store row 2 in destination + st1 {v8.s}[0], [x2], x5 //store row 3 in destination + st1 {v8.s}[1], [x2], x5 //store row 4 in destination + bgt loop_4 //if greater than 0 repeat the loop again + b end_loops + +loop_8: //each iteration processes four rows + + ld1 {v4.8b}, [x0], x3 //load row 1 in source 1 + ld1 {v6.8b}, [x1], x4 //load row 1 in source 2 + ld1 {v8.8b}, [x0], x3 //load row 2 in source 1 + ld1 {v10.8b}, [x1], x4 //load row 2 in source 2 + uxtl v4.8h, v4.8b //converting row 1 in source 1 to 16-bit + ld1 {v12.8b}, [x0], x3 //load row 3 in source 1 + ld1 {v14.8b}, [x1], x4 //load row 3 in source 2 + uxtl v6.8h, v6.8b //converting row 1 in source 2 to 16-bit + ld1 {v16.8b}, [x0], x3 //load row 4 in source 1 + ld1 {v18.8b}, [x1], x4 //load row 4 in source 2 + uxtl v8.8h, v8.8b //converting row 2 in source 1 to 16-bit + uxtl v10.8h, v10.8b //converting row 2 in source 2 to 16-bit + mul v4.8h, v4.8h , v2.4h[0] //weight 1 mult. for row 1 + mla v4.8h, v6.8h , v2.4h[2] //weight 2 mult. for row 1 + uxtl v12.8h, v12.8b //converting row 3 in source 1 to 16-bit + uxtl v14.8h, v14.8b //converting row 3 in source 2 to 16-bit + mul v8.8h, v8.8h , v2.4h[0] //weight 1 mult. for row 2 + mla v8.8h, v10.8h , v2.4h[2] //weight 2 mult. for row 2 + uxtl v16.8h, v16.8b //converting row 4 in source 1 to 16-bit + uxtl v18.8h, v18.8b //converting row 4 in source 2 to 16-bit + mul v12.8h, v12.8h , v2.4h[0] //weight 1 mult. for row 3 + mla v12.8h, v14.8h , v2.4h[2] //weight 2 mult. for row 3 + mul v16.8h, v16.8h , v2.4h[0] //weight 1 mult. for row 4 + mla v16.8h, v18.8h , v2.4h[2] //weight 2 mult. for row 4 + srshl v4.8h, v4.8h , v0.8h //rounds off the weighted samples from row 1 + srshl v8.8h, v8.8h , v0.8h //rounds off the weighted samples from row 2 + srshl v12.8h, v12.8h , v0.8h //rounds off the weighted samples from row 3 + saddw v4.8h, v4.8h , v3.8b //adding offset for row 1 + srshl v16.8h, v16.8h , v0.8h //rounds off the weighted samples from row 4 + saddw v8.8h, v8.8h , v3.8b //adding offset for row 2 + saddw v12.8h, v12.8h , v3.8b //adding offset for row 3 + sqxtun v4.8b, v4.8h //saturating row 1 to unsigned 8-bit + saddw v16.8h, v16.8h , v3.8b //adding offset for row 4 + sqxtun v8.8b, v8.8h //saturating row 2 to unsigned 8-bit + sqxtun v12.8b, v12.8h //saturating row 3 to unsigned 8-bit + sqxtun v16.8b, v16.8h //saturating row 4 to unsigned 8-bit + st1 {v4.8b}, [x2], x5 //store row 1 in destination + st1 {v8.8b}, [x2], x5 //store row 2 in destination + subs w11, w11, #4 //decrement ht by 4 + st1 {v12.8b}, [x2], x5 //store row 3 in destination + st1 {v16.8b}, [x2], x5 //store row 4 in destination + bgt loop_8 //if greater than 0 repeat the loop again + b end_loops + +loop_16: //each iteration processes two rows + + ld1 {v4.8b, v5.8b}, [x0], x3 //load row 1 in source 1 + ld1 {v6.8b, v7.8b}, [x1], x4 //load row 1 in source 2 + ld1 {v8.8b, v9.8b}, [x0], x3 //load row 2 in source 1 + ld1 {v10.8b, v11.8b}, [x1], x4 //load row 2 in source 2 + uxtl v20.8h, v4.8b //converting row 1L in source 1 to 16-bit + ld1 {v12.8b, v13.8b}, [x0], x3 //load row 3 in source 1 + ld1 {v14.8b, v15.8b}, [x1], x4 //load row 3 in source 2 + uxtl v22.8h, v6.8b //converting row 1L in source 2 to 16-bit + ld1 {v16.8b, v17.8b}, [x0], x3 //load row 4 in source 1 + ld1 {v18.8b, v19.8b}, [x1], x4 //load row 4 in source 2 + uxtl v4.8h, v5.8b //converting row 1H in source 1 to 16-bit + uxtl v6.8h, v7.8b //converting row 1H in source 2 to 16-bit + mul v20.8h, v20.8h , v2.4h[0] //weight 1 mult. for row 1L + mla v20.8h, v22.8h , v2.4h[2] //weight 2 mult. for row 1L + uxtl v24.8h, v8.8b //converting row 2L in source 1 to 16-bit + uxtl v26.8h, v10.8b //converting row 2L in source 2 to 16-bit + mul v4.8h, v4.8h , v2.4h[0] //weight 1 mult. for row 1H + mla v4.8h, v6.8h , v2.4h[2] //weight 2 mult. for row 1H + uxtl v8.8h, v9.8b //converting row 2H in source 1 to 16-bit + uxtl v10.8h, v11.8b //converting row 2H in source 2 to 16-bit + mul v24.8h, v24.8h , v2.4h[0] //weight 1 mult. for row 2L + mla v24.8h, v26.8h , v2.4h[2] //weight 2 mult. for row 2L + uxtl v28.8h, v12.8b //converting row 3L in source 1 to 16-bit + uxtl v30.8h, v14.8b //converting row 3L in source 2 to 16-bit + mul v8.8h, v8.8h , v2.4h[0] //weight 1 mult. for row 2H + mla v8.8h, v10.8h , v2.4h[2] //weight 2 mult. for row 2H + uxtl v12.8h, v13.8b //converting row 3H in source 1 to 16-bit + uxtl v14.8h, v15.8b //converting row 3H in source 2 to 16-bit + mul v28.8h, v28.8h , v2.4h[0] //weight 1 mult. for row 3L + mla v28.8h, v30.8h , v2.4h[2] //weight 2 mult. for row 3L + uxtl v22.8h, v16.8b //converting row 4L in source 1 to 16-bit + uxtl v6.8h, v18.8b //converting row 4L in source 2 to 16-bit + mul v12.8h, v12.8h , v2.4h[0] //weight 1 mult. for row 3H + mla v12.8h, v14.8h , v2.4h[2] //weight 2 mult. for row 3H + uxtl v16.8h, v17.8b //converting row 4H in source 1 to 16-bit + uxtl v18.8h, v19.8b //converting row 4H in source 2 to 16-bit + mul v22.8h, v22.8h , v2.4h[0] //weight 1 mult. for row 4L + mla v22.8h, v6.8h , v2.4h[2] //weight 2 mult. for row 4L + srshl v20.8h, v20.8h , v0.8h //rounds off the weighted samples from row 1L + mul v16.8h, v16.8h , v2.4h[0] //weight 1 mult. for row 4H + mla v16.8h, v18.8h , v2.4h[2] //weight 2 mult. for row 4H + srshl v4.8h, v4.8h , v0.8h //rounds off the weighted samples from row 1H + srshl v24.8h, v24.8h , v0.8h //rounds off the weighted samples from row 2L + saddw v20.8h, v20.8h , v3.8b //adding offset for row 1L + srshl v8.8h, v8.8h , v0.8h //rounds off the weighted samples from row 2H + saddw v4.8h, v4.8h , v3.8b //adding offset for row 1H + srshl v28.8h, v28.8h , v0.8h //rounds off the weighted samples from row 3L + saddw v24.8h, v24.8h , v3.8b //adding offset for row 2L + srshl v12.8h, v12.8h , v0.8h //rounds off the weighted samples from row 3H + saddw v8.8h, v8.8h , v3.8b //adding offset for row 2H + srshl v22.8h, v22.8h , v0.8h //rounds off the weighted samples from row 4L + saddw v28.8h, v28.8h , v3.8b //adding offset for row 3L + srshl v16.8h, v16.8h , v0.8h //rounds off the weighted samples from row 4H + saddw v12.8h, v12.8h , v3.8b //adding offset for row 3H + sqxtun v26.8b, v20.8h //saturating row 1L to unsigned 8-bit + saddw v22.8h, v22.8h , v3.8b //adding offset for row 4L + sqxtun v27.8b, v4.8h //saturating row 1H to unsigned 8-bit + saddw v16.8h, v16.8h , v3.8b //adding offset for row 4H + sqxtun v10.8b, v24.8h //saturating row 2L to unsigned 8-bit + sqxtun v11.8b, v8.8h //saturating row 2H to unsigned 8-bit + sqxtun v30.8b, v28.8h //saturating row 3L to unsigned 8-bit + sqxtun v31.8b, v12.8h //saturating row 3H to unsigned 8-bit + st1 {v26.8b, v27.8b}, [x2], x5 //store row 1 in destination + sqxtun v14.8b, v22.8h //saturating row 4L to unsigned 8-bit + sqxtun v15.8b, v16.8h //saturating row 4H to unsigned 8-bit + st1 {v10.8b, v11.8b}, [x2], x5 //store row 2 in destination + subs w11, w11, #4 //decrement ht by 4 + st1 {v30.8b, v31.8b}, [x2], x5 //store row 3 in destination + st1 {v14.8b, v15.8b}, [x2], x5 //store row 4 in destination + bgt loop_16 //if greater than 0 repeat the loop again + +end_loops: + + // LDMFD sp!,{x4-x12,x15} //Reload the registers from sp + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + +//******************************************************************************* +//* @function +//* ih264_weighted_bi_pred_chroma_av8() +//* +//* @brief +//* This routine performs the default weighted prediction as described in sec +//* 8.4.2.3.2 titled "Weighted sample prediction process" for chroma. +//* +//* @par Description: +//* This function gets two ht x wd blocks, calculates the weighted samples, +//* rounds off, adds offset and stores it in the destination block for U and V. +//* +//* @param[in] puc_src1 +//* UWORD8 Pointer to the buffer containing the input block 1. +//* +//* @param[in] puc_src2 +//* UWORD8 Pointer to the buffer containing the input block 2. +//* +//* @param[out] puc_dst +//* UWORD8 pointer to the destination where the output block is stored. +//* +//* @param[in] src_strd1 +//* Stride of the input buffer 1 +//* +//* @param[in] src_strd2 +//* Stride of the input buffer 2 +//* +//* @param[in] dst_strd +//* Stride of the destination buffer +//* +//* @param[in] log_WD +//* number of bits to be rounded off +//* +//* @param[in] wt1 +//* weights for the weighted prediction in U and V +//* +//* @param[in] wt2 +//* weights for the weighted prediction in U and V +//* +//* @param[in] ofst1 +//* offset 1 used after rounding off for U an dV +//* +//* @param[in] ofst2 +//* offset 2 used after rounding off for U and V +//* +//* @param[in] ht +//* integer height of the array +//* +//* @param[in] wd +//* integer width of the array +//* +//* @returns +//* None +//* +//* @remarks +//* (ht,wd) can be (2,2), (2,4), (4,2), (4,4), (4,8), (8,4) or (8,8). +//* +//******************************************************************************* +//*/ +//void ih264_weighted_bi_pred_chroma_av8(UWORD8 *puc_src1, +// UWORD8 *puc_src2, +// UWORD8 *puc_dst, +// WORD32 src_strd1, +// WORD32 src_strd2, +// WORD32 dst_strd, +// UWORD16 log_WD, +// UWORD32 wt1, +// UWORD32 wt2, +// UWORD16 ofst1, +// UWORD16 ofst2, +// UWORD8 ht, +// UWORD8 wd) +// +//**************Variables Vs Registers***************************************** +// x0 => puc_src1 +// x1 => puc_src2 +// x2 => puc_dst +// x3 => src_strd1 +// [sp] => src_strd2 (x4) +// [sp+4] => dst_strd (x5) +// [sp+8] => log_WD (x6) +// [sp+12] => wt1 (x7) +// [sp+16] => wt2 (x8) +// [sp+20] => ofst1 (x9) +// [sp+24] => ofst2 (x10) +// [sp+28] => ht (x11) +// [sp+32] => wd (x12) +// + + + + + + .global ih264_weighted_bi_pred_chroma_av8 + +ih264_weighted_bi_pred_chroma_av8: + + // STMFD sp!, {x4-x12,x14} //stack stores the values of the arguments + push_v_regs + stp x19, x20, [sp, #-16]! + + + ldr x8, [sp, #80] //Load wt2 in x8 + dup v4.4s, w8 //Q2 = (wt2_u, wt2_v) (32-bit) + dup v2.4s, w7 //Q1 = (wt1_u, wt1_v) (32-bit) + add x6, x6, #1 //x6 = log_WD + 1 + ldr w9, [sp, #88] //Load ofst1 in x9 + sxtw x9, w9 + ldr w10, [sp, #96] //Load ofst2 in x10 + sxtw x10, w10 + sub x20, x6, #0 //x12 = -(log_WD + 1) + neg x20, x20 + dup v0.8h, w20 //Q0 = -(log_WD + 1) (16-bit) + ldr w11, [sp, #104] //Load ht in x11 + ldr w12, [sp, #112] //Load wd in x12 + sxtw x11, w11 + sxtw x12, w12 + dup v20.8h, w9 //0ffset1 + dup v21.8h, w10 //0ffset2 + srhadd v6.8b, v20.8b, v21.8b + sxtl v6.8h, v6.8b + cmp w12, #8 //check if wd is 8 + beq loop_8_uv //branch if wd is 8 + cmp w12, #4 //check if wd is 4 + beq loop_4_uv //branch if wd is 4 + +loop_2_uv: //each iteration processes two rows + + ld1 {v8.s}[0], [x0], x3 //load row 1 in source 1 + ld1 {v8.s}[1], [x0], x3 //load row 2 in source 1 + ld1 {v10.s}[0], [x1], x4 //load row 1 in source 2 + ld1 {v10.s}[1], [x1], x4 //load row 2 in source 2 + uxtl v8.8h, v8.8b //converting rows 1,2 in source 1 to 16-bit + uxtl v10.8h, v10.8b //converting rows 1,2 in source 2 to 16-bit + mul v8.8h, v8.8h , v2.8h //weight 1 mult. for rows 1,2 + mla v8.8h, v10.8h , v4.8h //weight 2 mult. for rows 1,2 + srshl v8.8h, v8.8h , v0.8h //rounds off the weighted samples from rows 1,2 + add v8.8h, v8.8h , v6.8h //adding offset for rows 1,2 + sqxtun v8.8b, v8.8h //saturating rows 1,2 to unsigned 8-bit/ + st1 {v8.s}[0], [x2], x5 //store row 1 in destination + st1 {v8.s}[1], [x2], x5 //store row 2 in destination + subs w11, w11, #2 //decrement ht by 2 + bgt loop_2_uv //if greater than 0 repeat the loop again + b end_loops_uv + +loop_4_uv: //each iteration processes two rows + + ld1 {v8.8b}, [x0], x3 //load row 1 in source 1 + ld1 {v10.8b}, [x1], x4 //load row 1 in source 2 + uxtl v8.8h, v8.8b //converting row 1 in source 1 to 16-bit + ld1 {v12.8b}, [x0], x3 //load row 2 in source 1 + uxtl v10.8h, v10.8b //converting row 1 in source 2 to 16-bit + ld1 {v14.8b}, [x1], x4 //load row 2 in source 2 + uxtl v12.8h, v12.8b //converting row 2 in source 1 to 16-bit + mul v8.8h, v8.8h , v2.8h //weight 1 mult. for row 1 + mla v8.8h, v10.8h , v4.8h //weight 2 mult. for row 1 + uxtl v14.8h, v14.8b //converting row 2 in source 2 to 16-bit + mul v12.8h, v12.8h , v2.8h //weight 1 mult. for row 2 + mla v12.8h, v14.8h , v4.8h //weight 2 mult. for row 2 + subs w11, w11, #2 //decrement ht by 2 + srshl v8.8h, v8.8h , v0.8h //rounds off the weighted samples from row 1 + srshl v12.8h, v12.8h , v0.8h //rounds off the weighted samples from row 2 + add v8.8h, v8.8h , v6.8h //adding offset for row 1 + add v12.8h, v12.8h , v6.8h //adding offset for row 2 + sqxtun v8.8b, v8.8h //saturating row 1 to unsigned 8-bit + sqxtun v12.8b, v12.8h //saturating row 2 to unsigned 8-bit + st1 {v8.8b}, [x2], x5 //store row 1 in destination + st1 {v12.8b}, [x2], x5 //store row 2 in destination + bgt loop_4_uv //if greater than 0 repeat the loop again + b end_loops_uv + +loop_8_uv: //each iteration processes two rows + + ld1 {v8.8b, v9.8b}, [x0], x3 //load row 1 in source 1 + ld1 {v10.8b, v11.8b}, [x1], x4 //load row 1 in source 2 + ld1 {v12.8b, v13.8b}, [x0], x3 //load row 2 in source 1 + ld1 {v14.8b, v15.8b}, [x1], x4 //load row 2 in source 2 + uxtl v24.8h, v8.8b //converting row 1L in source 1 to 16-bit + ld1 {v16.8b, v17.8b}, [x0], x3 //load row 3 in source 1 + ld1 {v18.8b, v19.8b}, [x1], x4 //load row 3 in source 2 + uxtl v26.8h, v10.8b //converting row 1L in source 2 to 16-bit + ld1 {v20.8b, v21.8b}, [x0], x3 //load row 4 in source 1 + ld1 {v22.8b, v23.8b}, [x1], x4 //load row 4 in source 2 + uxtl v8.8h, v9.8b //converting row 1H in source 1 to 16-bit + uxtl v10.8h, v11.8b //converting row 1H in source 2 to 16-bit + mul v24.8h, v24.8h , v2.8h //weight 1 mult. for row 1L + mla v24.8h, v26.8h , v4.8h //weight 2 mult. for row 1L + uxtl v28.8h, v12.8b //converting row 2L in source 1 to 16-bit + uxtl v30.8h, v14.8b //converting row 2L in source 2 to 16-bit + mul v8.8h, v8.8h , v2.8h //weight 1 mult. for row 1H + mla v8.8h, v10.8h , v4.8h //weight 2 mult. for row 1H + uxtl v12.8h, v13.8b //converting row 2H in source 1 to 16-bit + uxtl v14.8h, v15.8b //converting row 2H in source 2 to 16-bit + mul v28.8h, v28.8h , v2.8h //weight 1 mult. for row 2L + mla v28.8h, v30.8h , v4.8h //weight 2 mult. for row 2L + uxtl v26.8h, v16.8b //converting row 3L in source 1 to 16-bit + uxtl v10.8h, v18.8b //converting row 3L in source 2 to 16-bit + mul v12.8h, v12.8h , v2.8h //weight 1 mult. for row 2H + mla v12.8h, v14.8h , v4.8h //weight 2 mult. for row 2H + uxtl v16.8h, v17.8b //converting row 3H in source 1 to 16-bit + uxtl v18.8h, v19.8b //converting row 3H in source 2 to 16-bit + mul v26.8h, v26.8h , v2.8h //weight 1 mult. for row 3L + mla v26.8h, v10.8h , v4.8h //weight 2 mult. for row 3L + uxtl v30.8h, v20.8b //converting row 4L in source 1 to 16-bit + uxtl v14.8h, v22.8b //converting row 4L in source 2 to 16-bit + mul v16.8h, v16.8h , v2.8h //weight 1 mult. for row 3H + mla v16.8h, v18.8h , v4.8h //weight 2 mult. for row 3H + uxtl v20.8h, v21.8b //converting row 4H in source 1 to 16-bit + uxtl v22.8h, v23.8b //converting row 4H in source 2 to 16-bit + mul v30.8h, v30.8h , v2.8h //weight 1 mult. for row 4L + mla v30.8h, v14.8h , v4.8h //weight 2 mult. for row 4L + srshl v24.8h, v24.8h , v0.8h //rounds off the weighted samples from row 1L + mul v20.8h, v20.8h , v2.8h //weight 1 mult. for row 4H + mla v20.8h, v22.8h , v4.8h //weight 2 mult. for row 4H + srshl v8.8h, v8.8h , v0.8h //rounds off the weighted samples from row 1H + srshl v28.8h, v28.8h , v0.8h //rounds off the weighted samples from row 2L + add v24.8h, v24.8h , v6.8h //adding offset for row 1L + srshl v12.8h, v12.8h , v0.8h //rounds off the weighted samples from row 2H + add v8.8h, v8.8h , v6.8h //adding offset for row 1H + srshl v26.8h, v26.8h , v0.8h //rounds off the weighted samples from row 3L + add v28.8h, v28.8h , v6.8h //adding offset for row 2L + srshl v16.8h, v16.8h , v0.8h //rounds off the weighted samples from row 3H + add v12.8h, v12.8h , v6.8h //adding offset for row 2H + srshl v30.8h, v30.8h , v0.8h //rounds off the weighted samples from row 4L + add v26.8h, v26.8h , v6.8h //adding offset for row 3L + srshl v20.8h, v20.8h , v0.8h //rounds off the weighted samples from row 4H + add v16.8h, v16.8h , v6.8h //adding offset for row 3H + sqxtun v10.8b, v24.8h //saturating row 1L to unsigned 8-bit + add v30.8h, v30.8h , v6.8h //adding offset for row 4L + sqxtun v11.8b, v8.8h //saturating row 1H to unsigned 8-bit + add v20.8h, v20.8h , v6.8h //adding offset for row 4H + sqxtun v18.8b, v28.8h //saturating row 2L to unsigned 8-bit + sqxtun v19.8b, v12.8h //saturating row 2H to unsigned 8-bit + sqxtun v14.8b, v26.8h //saturating row 3L to unsigned 8-bit + sqxtun v15.8b, v16.8h //saturating row 3H to unsigned 8-bit + st1 {v10.8b, v11.8b}, [x2], x5 //store row 1 in destination + sqxtun v22.8b, v30.8h //saturating row 4L to unsigned 8-bit + sqxtun v23.8b, v20.8h //saturating row 4H to unsigned 8-bit + st1 {v18.8b, v19.8b}, [x2], x5 //store row 2 in destination + subs w11, w11, #4 //decrement ht by 4 + st1 {v14.8b, v15.8b}, [x2], x5 //store row 3 in destination + st1 {v22.8b, v23.8b}, [x2], x5 //store row 4 in destination + bgt loop_8_uv //if greater than 0 repeat the loop again + +end_loops_uv: + + // LDMFD sp!,{x4-x12,x15} //Reload the registers from sp + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + + diff --git a/common/armv8/ih264_weighted_pred_av8.s b/common/armv8/ih264_weighted_pred_av8.s new file mode 100755 index 0000000..6a03875 --- /dev/null +++ b/common/armv8/ih264_weighted_pred_av8.s @@ -0,0 +1,471 @@ +//****************************************************************************** +//* +//* Copyright (C) 2015 The Android Open Source Project +//* +//* Licensed under the Apache License, Version 2.0 (the "License"); +//* you may not use this file except in compliance with the License. +//* You may obtain a copy of the License at: +//* +//* http://www.apache.org/licenses/LICENSE-2.0 +//* +//* Unless required by applicable law or agreed to in writing, software +//* distributed under the License is distributed on an "AS IS" BASIS, +//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//* See the License for the specific language governing permissions and +//* limitations under the License. +//* +//***************************************************************************** +//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +//*/ +///** +//****************************************************************************** +//* @file +//* ih264_weighted_pred_av8.s +//* +//* @brief +//* Contains function definitions for weighted prediction. +//* Functions are coded using NEON intrinsics and can be compiled using ARM RVCT +//* +//* @author +//* Kaushik Senthoor R +//* +//* @par List of Functions: +//* +//* - ih264_weighted_pred_luma_av8() +//* - ih264_weighted_pred_chroma_av8() +//* +//* @remarks +//* None +//* +//******************************************************************************* +//*/ +//******************************************************************************* +//* @function +//* ih264_weighted_pred_luma_av8() +//* +//* @brief +//* This routine performs the default weighted prediction as described in sec +//* 8.4.2.3.2 titled "Weighted sample prediction process" for luma. +//* +//* @par Description: +//* This function gets a ht x wd block, calculates the weighted sample, rounds +//* off, adds offset and stores it in the destination block. +//* +//* @param[in] puc_src: +//* UWORD8 Pointer to the buffer containing the input block. +//* +//* @param[out] puc_dst +//* UWORD8 pointer to the destination where the output block is stored. +//* +//* @param[in] src_strd +//* Stride of the input buffer +//* +//* @param[in] dst_strd +//* Stride of the destination buffer +//* +//* @param[in] log_WD +//* number of bits to be rounded off +//* +//* @param[in] wt +//* weight for the weighted prediction +//* +//* @param[in] ofst +//* offset used after rounding off +//* +//* @param[in] ht +//* integer height of the array +//* +//* @param[in] wd +//* integer width of the array +//* +//* @returns +//* None +//* +//* @remarks +//* (ht,wd) can be (4,4), (4,8), (8,4), (8,8), (8,16), (16,8) or (16,16). +//* +//******************************************************************************* +//*/ +//void ih264_weighted_pred_luma_av8(UWORD8 *puc_src, +// UWORD8 *puc_dst, +// WORD32 src_strd, +// WORD32 dst_strd, +// UWORD8 log_WD, +// UWORD32 wt, +// UWORD16 ofst, +// UWORD8 ht, +// UWORD8 wd) +// +//**************Variables Vs Registers***************************************** +// x0 => puc_src +// x1 => puc_dst +// x2 => src_strd +// x3 => dst_strd +// [sp] => log_WD (x4) +// [sp+4] => wt (x5) +// [sp+8] => ofst (x6) +// [sp+12] => ht (x7) +// [sp+16] => wd (x8) +// +.text +.p2align 2 +.include "ih264_neon_macros.s" + + + + .global ih264_weighted_pred_luma_av8 + +ih264_weighted_pred_luma_av8: + + // STMFD sp!, {x4-x9,x14} //stack stores the values of the arguments + push_v_regs + stp x19, x20, [sp, #-16]! + ldr w8, [sp, #80] //Load wd + sxtw x8, w8 + + dup v2.4h, w5 //D2 = wt (16-bit) + sub x20, x4, #0 //x9 = -log_WD + neg x9, x20 + dup v3.8b, w6 //D3 = ofst (8-bit) + cmp w8, #16 //check if wd is 16 + dup v0.8h, w9 //Q0 = -log_WD (16-bit) + beq loop_16 //branch if wd is 16 + + cmp w8, #8 //check if wd is 8 + beq loop_8 //branch if wd is 8 + +loop_4: //each iteration processes four rows + + ld1 {v4.s}[0], [x0], x2 //load row 1 in source + ld1 {v4.s}[1], [x0], x2 //load row 2 in source + ld1 {v6.s}[0], [x0], x2 //load row 3 in source + ld1 {v6.s}[1], [x0], x2 //load row 4 in source + + uxtl v4.8h, v4.8b //converting rows 1,2 to 16-bit + uxtl v6.8h, v6.8b //converting rows 3,4 to 16-bit + + mul v4.8h, v4.8h , v2.4h[0] //weight mult. for rows 1,2 + mul v6.8h, v6.8h , v2.4h[0] //weight mult. for rows 3,4 + + subs w7, w7, #4 //decrement ht by 4 + srshl v4.8h, v4.8h , v0.8h //rounds off the weighted samples from rows 1,2 + srshl v6.8h, v6.8h , v0.8h //rounds off the weighted samples from rows 3,4 + + saddw v4.8h, v4.8h , v3.8b //adding offset for rows 1,2 + saddw v6.8h, v6.8h , v3.8b //adding offset for rows 3,4 + + sqxtun v4.8b, v4.8h //saturating rows 1,2 to unsigned 8-bit + sqxtun v6.8b, v6.8h //saturating rows 3,4 to unsigned 8-bit + + st1 {v4.s}[0], [x1], x3 //store row 1 in destination + st1 {v4.s}[1], [x1], x3 //store row 2 in destination + st1 {v6.s}[0], [x1], x3 //store row 3 in destination + st1 {v6.s}[1], [x1], x3 //store row 4 in destination + + bgt loop_4 //if greater than 0 repeat the loop again + + b end_loops + +loop_8: //each iteration processes four rows + + ld1 {v4.8b}, [x0], x2 //load row 1 in source + ld1 {v6.8b}, [x0], x2 //load row 2 in source + ld1 {v8.8b}, [x0], x2 //load row 3 in source + uxtl v4.8h, v4.8b //converting row 1 to 16-bit + ld1 {v10.8b}, [x0], x2 //load row 4 in source + uxtl v6.8h, v6.8b //converting row 2 to 16-bit + + uxtl v8.8h, v8.8b //converting row 3 to 16-bit + mul v4.8h, v4.8h , v2.4h[0] //weight mult. for row 1 + uxtl v10.8h, v10.8b //converting row 4 to 16-bit + mul v6.8h, v6.8h , v2.4h[0] //weight mult. for row 2 + mul v8.8h, v8.8h , v2.4h[0] //weight mult. for row 3 + mul v10.8h, v10.8h , v2.4h[0] //weight mult. for row 4 + + srshl v4.8h, v4.8h , v0.8h //rounds off the weighted samples from row 1 + srshl v6.8h, v6.8h , v0.8h //rounds off the weighted samples from row 2 + srshl v8.8h, v8.8h , v0.8h //rounds off the weighted samples from row 3 + saddw v4.8h, v4.8h , v3.8b //adding offset for row 1 + srshl v10.8h, v10.8h , v0.8h //rounds off the weighted samples from row 4 + saddw v6.8h, v6.8h , v3.8b //adding offset for row 2 + + saddw v8.8h, v8.8h , v3.8b //adding offset for row 3 + sqxtun v4.8b, v4.8h //saturating row 1 to unsigned 8-bit + saddw v10.8h, v10.8h , v3.8b //adding offset for row 4 + sqxtun v6.8b, v6.8h //saturating row 2 to unsigned 8-bit + sqxtun v8.8b, v8.8h //saturating row 3 to unsigned 8-bit + sqxtun v10.8b, v10.8h //saturating row 4 to unsigned 8-bit + + st1 {v4.8b}, [x1], x3 //store row 1 in destination + st1 {v6.8b}, [x1], x3 //store row 2 in destination + subs w7, w7, #4 //decrement ht by 4 + st1 {v8.8b}, [x1], x3 //store row 3 in destination + st1 {v10.8b}, [x1], x3 //store row 4 in destination + + bgt loop_8 //if greater than 0 repeat the loop again + + b end_loops + +loop_16: //each iteration processes two rows + + ld1 {v4.8b, v5.8b}, [x0], x2 //load row 1 in source + ld1 {v6.8b, v7.8b}, [x0], x2 //load row 2 in source + uxtl v12.8h, v4.8b //converting row 1L to 16-bit + ld1 {v8.8b, v9.8b}, [x0], x2 //load row 3 in source + uxtl v14.8h, v5.8b //converting row 1H to 16-bit + ld1 {v10.8b, v11.8b}, [x0], x2 //load row 4 in source + uxtl v16.8h, v6.8b //converting row 2L to 16-bit + mul v12.8h, v12.8h , v2.4h[0] //weight mult. for row 1L + uxtl v18.8h, v7.8b //converting row 2H to 16-bit + mul v14.8h, v14.8h , v2.4h[0] //weight mult. for row 1H + uxtl v20.8h, v8.8b //converting row 3L to 16-bit + mul v16.8h, v16.8h , v2.4h[0] //weight mult. for row 2L + uxtl v22.8h, v9.8b //converting row 3H to 16-bit + mul v18.8h, v18.8h , v2.4h[0] //weight mult. for row 2H + uxtl v24.8h, v10.8b //converting row 4L to 16-bit + mul v20.8h, v20.8h , v2.4h[0] //weight mult. for row 3L + uxtl v26.8h, v11.8b //converting row 4H to 16-bit + mul v22.8h, v22.8h , v2.4h[0] //weight mult. for row 3H + mul v24.8h, v24.8h , v2.4h[0] //weight mult. for row 4L + srshl v12.8h, v12.8h , v0.8h //rounds off the weighted samples from row 1L + mul v26.8h, v26.8h , v2.4h[0] //weight mult. for row 4H + srshl v14.8h, v14.8h , v0.8h //rounds off the weighted samples from row 1H + srshl v16.8h, v16.8h , v0.8h //rounds off the weighted samples from row 2L + saddw v12.8h, v12.8h , v3.8b //adding offset for row 1L + srshl v18.8h, v18.8h , v0.8h //rounds off the weighted samples from row 2H + saddw v14.8h, v14.8h , v3.8b //adding offset for row 1H + sqxtun v4.8b, v12.8h //saturating row 1L to unsigned 8-bit + srshl v20.8h, v20.8h , v0.8h //rounds off the weighted samples from row 3L + saddw v16.8h, v16.8h , v3.8b //adding offset for row 2L + sqxtun v5.8b, v14.8h //saturating row 1H to unsigned 8-bit + srshl v22.8h, v22.8h , v0.8h //rounds off the weighted samples from row 3H + saddw v18.8h, v18.8h , v3.8b //adding offset for row 2H + sqxtun v6.8b, v16.8h //saturating row 2L to unsigned 8-bit + srshl v24.8h, v24.8h , v0.8h //rounds off the weighted samples from row 4L + saddw v20.8h, v20.8h , v3.8b //adding offset for row 3L + sqxtun v7.8b, v18.8h //saturating row 2H to unsigned 8-bit + srshl v26.8h, v26.8h , v0.8h //rounds off the weighted samples from row 4H + saddw v22.8h, v22.8h , v3.8b //adding offset for row 3H + sqxtun v8.8b, v20.8h //saturating row 3L to unsigned 8-bit + saddw v24.8h, v24.8h , v3.8b //adding offset for row 4L + sqxtun v9.8b, v22.8h //saturating row 3H to unsigned 8-bit + saddw v26.8h, v26.8h , v3.8b //adding offset for row 4H + sqxtun v10.8b, v24.8h //saturating row 4L to unsigned 8-bit + st1 {v4.8b, v5.8b}, [x1], x3 //store row 1 in destination + sqxtun v11.8b, v26.8h //saturating row 4H to unsigned 8-bit + st1 {v6.8b, v7.8b}, [x1], x3 //store row 2 in destination + subs w7, w7, #4 //decrement ht by 4 + st1 {v8.8b, v9.8b}, [x1], x3 //store row 3 in destination + st1 {v10.8b, v11.8b}, [x1], x3 //store row 4 in destination + + bgt loop_16 //if greater than 0 repeat the loop again + +end_loops: + + // LDMFD sp!,{x4-x9,x15} //Reload the registers from sp + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + +//******************************************************************************* +//* @function +//* ih264_weighted_pred_chroma_av8() +//* +//* @brief +//* This routine performs the default weighted prediction as described in sec +//* 8.4.2.3.2 titled "Weighted sample prediction process" for chroma. +//* +//* @par Description: +//* This function gets a ht x wd block, calculates the weighted sample, rounds +//* off, adds offset and stores it in the destination block for U and V. +//* +//* @param[in] puc_src: +//* UWORD8 Pointer to the buffer containing the input block. +//* +//* @param[out] puc_dst +//* UWORD8 pointer to the destination where the output block is stored. +//* +//* @param[in] src_strd +//* Stride of the input buffer +//* +//* @param[in] dst_strd +//* Stride of the destination buffer +//* +//* @param[in] log_WD +//* number of bits to be rounded off +//* +//* @param[in] wt +//* weights for the weighted prediction for U and V +//* +//* @param[in] ofst +//* offsets used after rounding off for U and V +//* +//* @param[in] ht +//* integer height of the array +//* +//* @param[in] wd +//* integer width of the array +//* +//* @returns +//* None +//* +//* @remarks +//* (ht,wd) can be (2,2), (2,4), (4,2), (4,4), (4,8), (8,4) or (8,8). +//* +//******************************************************************************* +//*/ +//void ih264_weighted_pred_chroma_av8(UWORD8 *puc_src, +// UWORD8 *puc_dst, +// WORD32 src_strd, +// WORD32 dst_strd, +// UWORD8 log_WD, +// UWORD32 wt, +// UWORD16 ofst, +// UWORD8 ht, +// UWORD8 wd) +// +//**************Variables Vs Registers***************************************** +// x0 => puc_src +// x1 => puc_dst +// x2 => src_strd +// x3 => dst_strd +// [sp] => log_WD (x4) +// [sp+4] => wt (x5) +// [sp+8] => ofst (x6) +// [sp+12] => ht (x7) +// [sp+16] => wd (x8) +// + + + + + .global ih264_weighted_pred_chroma_av8 + +ih264_weighted_pred_chroma_av8: + + // STMFD sp!, {x4-x9,x14} //stack stores the values of the arguments + push_v_regs + stp x19, x20, [sp, #-16]! + + ldr w8, [sp, #80] //Load wd + sxtw x8, w8 + + sub x20, x4, #0 //x9 = -log_WD + neg x9, x20 + dup v2.4s, w5 //Q1 = {wt_u (16-bit), wt_v (16-bit)} + + + dup v4.4h, w6 //D4 = {ofst_u (8-bit), ofst_v (8-bit)} + cmp w8, #8 //check if wd is 8 + dup v0.8h, w9 //Q0 = -log_WD (16-bit) + beq loop_8_uv //branch if wd is 8 + + cmp w8, #4 //check if ws is 4 + beq loop_4_uv //branch if wd is 4 + +loop_2_uv: //each iteration processes two rows + + ld1 {v6.s}[0], [x0], x2 //load row 1 in source + ld1 {v6.s}[1], [x0], x2 //load row 2 in source + uxtl v6.8h, v6.8b //converting rows 1,2 to 16-bit + mul v6.8h, v6.8h , v2.8h //weight mult. for rows 1,2 + srshl v6.8h, v6.8h , v0.8h //rounds off the weighted samples from rows 1,2 + saddw v6.8h, v6.8h , v4.8b //adding offset for rows 1,2 + sqxtun v6.8b, v6.8h //saturating rows 1,2 to unsigned 8-bit + subs w7, w7, #2 //decrement ht by 2 + st1 {v6.s}[0], [x1], x3 //store row 1 in destination + st1 {v6.s}[1], [x1], x3 //store row 2 in destination + bgt loop_2_uv //if greater than 0 repeat the loop again + b end_loops_uv + +loop_4_uv: //each iteration processes two rows + + ld1 {v6.8b}, [x0], x2 //load row 1 in source + ld1 {v8.8b}, [x0], x2 //load row 2 in source + uxtl v6.8h, v6.8b //converting row 1 to 16-bit + uxtl v8.8h, v8.8b //converting row 2 to 16-bit + mul v6.8h, v6.8h , v2.8h //weight mult. for row 1 + mul v8.8h, v8.8h , v2.8h //weight mult. for row 2 + subs w7, w7, #2 //decrement ht by 2 + srshl v6.8h, v6.8h , v0.8h //rounds off the weighted samples from row 1 + srshl v8.8h, v8.8h , v0.8h //rounds off the weighted samples from row 2 + saddw v6.8h, v6.8h , v4.8b //adding offset for row 1 + saddw v8.8h, v8.8h , v4.8b //adding offset for row 2 + sqxtun v6.8b, v6.8h //saturating row 1 to unsigned 8-bit + sqxtun v8.8b, v8.8h //saturating row 2 to unsigned 8-bit + st1 {v6.8b}, [x1], x3 //store row 1 in destination + st1 {v8.8b}, [x1], x3 //store row 2 in destination + + bgt loop_4_uv //if greater than 0 repeat the loop again + + b end_loops_uv + +loop_8_uv: //each iteration processes two rows + + ld1 {v6.8b, v7.8b}, [x0], x2 //load row 1 in source + ld1 {v8.8b, v9.8b}, [x0], x2 //load row 2 in source + uxtl v14.8h, v6.8b //converting row 1L to 16-bit + ld1 {v10.8b, v11.8b}, [x0], x2 //load row 3 in source + uxtl v16.8h, v7.8b //converting row 1H to 16-bit + ld1 {v12.8b, v13.8b}, [x0], x2 //load row 4 in source + + mul v14.8h, v14.8h , v2.8h //weight mult. for row 1L + uxtl v18.8h, v8.8b //converting row 2L to 16-bit + mul v16.8h, v16.8h , v2.8h //weight mult. for row 1H + uxtl v20.8h, v9.8b //converting row 2H to 16-bit + mul v18.8h, v18.8h , v2.8h //weight mult. for row 2L + uxtl v22.8h, v10.8b //converting row 3L to 16-bit + mul v20.8h, v20.8h , v2.8h //weight mult. for row 2H + uxtl v24.8h, v11.8b //converting row 3H to 16-bit + mul v22.8h, v22.8h , v2.8h //weight mult. for row 3L + uxtl v26.8h, v12.8b //converting row 4L to 16-bit + mul v24.8h, v24.8h , v2.8h //weight mult. for row 3H + uxtl v28.8h, v13.8b //converting row 4H to 16-bit + + mul v26.8h, v26.8h , v2.8h //weight mult. for row 4L + srshl v14.8h, v14.8h , v0.8h //rounds off the weighted samples from row 1L + mul v28.8h, v28.8h , v2.8h //weight mult. for row 4H + + srshl v16.8h, v16.8h , v0.8h //rounds off the weighted samples from row 1H + srshl v18.8h, v18.8h , v0.8h //rounds off the weighted samples from row 2L + saddw v14.8h, v14.8h , v4.8b //adding offset for row 1L + srshl v20.8h, v20.8h , v0.8h //rounds off the weighted samples from row 2H + saddw v16.8h, v16.8h , v4.8b //adding offset for row 1H + sqxtun v6.8b, v14.8h //saturating row 1L to unsigned 8-bit + srshl v22.8h, v22.8h , v0.8h //rounds off the weighted samples from row 3L + saddw v18.8h, v18.8h , v4.8b //adding offset for row 2L + sqxtun v7.8b, v16.8h //saturating row 1H to unsigned 8-bit + srshl v24.8h, v24.8h , v0.8h //rounds off the weighted samples from row 3H + saddw v20.8h, v20.8h , v4.8b //adding offset for row 2H + sqxtun v8.8b, v18.8h //saturating row 2L to unsigned 8-bit + srshl v26.8h, v26.8h , v0.8h //rounds off the weighted samples from row 4L + saddw v22.8h, v22.8h , v4.8b //adding offset for row 3L + sqxtun v9.8b, v20.8h //saturating row 2H to unsigned 8-bit + srshl v28.8h, v28.8h , v0.8h //rounds off the weighted samples from row 4H + saddw v24.8h, v24.8h , v4.8b //adding offset for row 3H + + sqxtun v10.8b, v22.8h //saturating row 3L to unsigned 8-bit + saddw v26.8h, v26.8h , v4.8b //adding offset for row 4L + sqxtun v11.8b, v24.8h //saturating row 3H to unsigned 8-bit + saddw v28.8h, v28.8h , v4.8b //adding offset for row 4H + + sqxtun v12.8b, v26.8h //saturating row 4L to unsigned 8-bit + st1 {v6.8b, v7.8b}, [x1], x3 //store row 1 in destination + sqxtun v13.8b, v28.8h //saturating row 4H to unsigned 8-bit + st1 {v8.8b, v9.8b}, [x1], x3 //store row 2 in destination + subs w7, w7, #4 //decrement ht by 4 + st1 {v10.8b, v11.8b}, [x1], x3 //store row 3 in destination + st1 {v12.8b, v13.8b}, [x1], x3 //store row 4 in destination + + bgt loop_8_uv //if greater than 0 repeat the loop again + +end_loops_uv: + + // LDMFD sp!,{x4-x9,x15} //Reload the registers from sp + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + + diff --git a/common/ih264_buf_mgr.c b/common/ih264_buf_mgr.c new file mode 100755 index 0000000..ea4333e --- /dev/null +++ b/common/ih264_buf_mgr.c @@ -0,0 +1,696 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ih264_buf_mgr.c +* +* @brief +* Contains function definitions for buffer management +* +* @author +* Srinivas T +* +* @par List of Functions: +* - ih264_buf_mgr_size() +* - ih264_buf_mgr_lock() +* - ih264_buf_mgr_unlock() +* - ih264_buf_mgr_yield() +* - ih264_buf_mgr_free() +* - ih264_buf_mgr_init() +* - ih264_buf_mgr_add() +* - ih264_buf_mgr_get_next_free() +* - ih264_buf_mgr_check_free() +* - ih264_buf_mgr_set_status() +* - ih264_buf_mgr_get_status() +* - ih264_buf_mgr_get_buf() +* - ih264_buf_mgr_get_bufid() +* - ih264_buf_mgr_get_num_active_buf() +* +* @remarks +* None +* +******************************************************************************* +*/ +#include +#include +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_defs.h" +#include "ih264_error.h" +#include "ih264_buf_mgr.h" + +#include "ithread.h" + +/** +******************************************************************************* +* +* @brief Returns size for buf queue context. Does not include buf queue buffer +* requirements +* +* @par Description +* Returns size for buf queue context. Does not include buf queue buffer +* requirements. Buffer size required to store the bufs should be allocated in +* addition to the value returned here. +* +* @returns Size of the buf queue context +* +* @remarks +* +******************************************************************************* +*/ +WORD32 ih264_buf_mgr_size(void) +{ + WORD32 size; + + size = sizeof(buf_mgr_t); + size += ithread_get_mutex_lock_size(); + + return size; +} + +/** +******************************************************************************* +* +* @brief +* Locks the buf_mgr context +* +* @par Description +* Locks the buf_mgr context by calling ithread_mutex_lock() +* +* @param[in] ps_buf_mgr +* Job Queue context +* +* @returns IH264_FAIL if mutex lock fails else IH264_SUCCESS +* +* @remarks +* +******************************************************************************* +*/ +IH264_ERROR_T ih264_buf_mgr_lock(buf_mgr_t *ps_buf_mgr) +{ + WORD32 retval; + retval = ithread_mutex_lock(ps_buf_mgr->pv_mutex); + if(retval) + { + return IH264_FAIL; + } + return IH264_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Unlocks the buf_mgr context +* +* @par Description +* Unlocks the buf_mgr context by calling ithread_mutex_unlock() +* +* @param[in] ps_buf_mgr +* Job Queue context +* +* @returns IH264_FAIL if mutex unlock fails else IH264_SUCCESS +* +* @remarks +* +******************************************************************************* +*/ + +IH264_ERROR_T ih264_buf_mgr_unlock(buf_mgr_t *ps_buf_mgr) +{ + WORD32 retval; + retval = ithread_mutex_unlock(ps_buf_mgr->pv_mutex); + if(retval) + { + return IH264_FAIL; + } + return IH264_SUCCESS; + +} +/** +******************************************************************************* +* +* @brief +* Yeilds the thread +* +* @par Description +* Unlocks the buf_mgr context by calling +* ih264_buf_mgr_unlock(), ithread_yield() and then ih264_buf_mgr_lock() +* buf_mgr is unlocked before to ensure the buf_mgr can be accessed by other threads +* If unlock is not done before calling yield then no other thread can access +* the buf_mgr functions and update buf_mgr. +* +* @param[in] ps_buf_mgr +* Job Queue context +* +* @returns IH264_FAIL if mutex lock unlock or yield fails else IH264_SUCCESS +* +* @remarks +* +******************************************************************************* +*/ +IH264_ERROR_T ih264_buf_mgr_yield(buf_mgr_t *ps_buf_mgr) +{ + + IH264_ERROR_T ret = IH264_SUCCESS; + + IH264_ERROR_T rettmp; + rettmp = ih264_buf_mgr_unlock(ps_buf_mgr); + RETURN_IF((rettmp != IH264_SUCCESS), rettmp); + + //ithread_usleep(10); + ithread_yield(); + + rettmp = ih264_buf_mgr_lock(ps_buf_mgr); + RETURN_IF((rettmp != IH264_SUCCESS), rettmp); + return ret; +} + + +/** +******************************************************************************* +* +* @brief free the buf queue pointers +* +* @par Description +* Frees the buf_mgr context +* +* @param[in] pv_buf +* Memoy for buf queue buffer and buf queue context +* +* @returns Pointer to buf queue context +* +* @remarks +* Since it will be called only once by master thread this is not thread safe. +* +******************************************************************************* +*/ +IH264_ERROR_T ih264_buf_mgr_free(buf_mgr_t *ps_buf_mgr) +{ + WORD32 ret; + ret = ithread_mutex_destroy(ps_buf_mgr->pv_mutex); + + if(0 == ret) + return IH264_SUCCESS; + else + return IH264_FAIL; +} +/** +******************************************************************************* +* +* @brief +* Buffer manager initialization function. +* +* @par Description: +* Initializes the buffer manager structure +* +* @param[in] ps_buf_mgr +* Pointer to the buffer manager +* +* @returns +* +* @remarks +* None +* +******************************************************************************* +*/ + + +void *ih264_buf_mgr_init(void *pv_buf) +{ + WORD32 id; + UWORD8 *pu1_buf; + buf_mgr_t *ps_buf_mgr; + pu1_buf = (UWORD8 *)pv_buf; + + ps_buf_mgr = (buf_mgr_t *)pu1_buf; + pu1_buf += sizeof(buf_mgr_t); + + ps_buf_mgr->pv_mutex = pu1_buf; + pu1_buf += ithread_get_mutex_lock_size(); + + ithread_mutex_init(ps_buf_mgr->pv_mutex); + + ps_buf_mgr->i4_max_buf_cnt = BUF_MGR_MAX_CNT; + ps_buf_mgr->i4_active_buf_cnt = 0; + + for(id = 0; id < BUF_MGR_MAX_CNT; id++) + { + ps_buf_mgr->au4_status[id] = 0; + ps_buf_mgr->apv_ptr[id] = NULL; + } + + return ps_buf_mgr; +} + + +/** +******************************************************************************* +* +* @brief +* Adds and increments the buffer and buffer count. +* +* @par Description: +* Adds a buffer to the buffer manager if it is not already present and +* increments the active buffer count +* +* @param[in] ps_buf_mgr +* Pointer to the buffer manager +* +* @param[in] pv_ptr +* Pointer to the buffer to be added +* +* @returns Returns 0 on success, -1 otherwise +* +* @remarks +* None +* +******************************************************************************* +*/ +IH264_ERROR_T ih264_buf_mgr_add(buf_mgr_t *ps_buf_mgr, + void *pv_ptr, + WORD32 buf_id) +{ + + IH264_ERROR_T ret = IH264_SUCCESS; + ret = ih264_buf_mgr_lock(ps_buf_mgr); + RETURN_IF((ret != IH264_SUCCESS), ret); + + /* Check if buffer ID is within allowed range */ + if(buf_id >= ps_buf_mgr->i4_max_buf_cnt) + { + ret = ih264_buf_mgr_unlock(ps_buf_mgr); + RETURN_IF((ret != IH264_SUCCESS), ret); + + return IH264_FAIL; + } + + /* Check if the current ID is being used to hold some other buffer */ + if((ps_buf_mgr->apv_ptr[buf_id] != NULL) && + (ps_buf_mgr->apv_ptr[buf_id] !=pv_ptr)) + { + ret = ih264_buf_mgr_unlock(ps_buf_mgr); + RETURN_IF((ret != IH264_SUCCESS), ret); + + return IH264_FAIL; + } + ps_buf_mgr->apv_ptr[buf_id] = pv_ptr; + ps_buf_mgr->i4_active_buf_cnt++; + + ret = ih264_buf_mgr_unlock(ps_buf_mgr); + RETURN_IF((ret != IH264_SUCCESS), ret); + + return ret; +} + +/** +******************************************************************************* +* +* @brief +* Gets the next free buffer. +* +* @par Description: +* Returns the next free buffer available and sets the corresponding status +* to DEC +* +* @param[in] ps_buf_mgr +* Pointer to the buffer manager +* +* @param[in] pi4_buf_id +* Pointer to the id of the free buffer +* +* @returns Pointer to the free buffer +* +* @remarks +* None +* +******************************************************************************* +*/ +void* ih264_buf_mgr_get_next_free(buf_mgr_t *ps_buf_mgr, WORD32 *pi4_buf_id) +{ + WORD32 id; + void *pv_ret_ptr; + IH264_ERROR_T ret = IH264_SUCCESS; + ret = ih264_buf_mgr_lock(ps_buf_mgr); + RETURN_IF((ret != IH264_SUCCESS), NULL); + + pv_ret_ptr = NULL; + for(id = 0; id < ps_buf_mgr->i4_active_buf_cnt; id++) + { + /* Check if the buffer is non-null and status is zero */ + if((ps_buf_mgr->au4_status[id] == 0) && (ps_buf_mgr->apv_ptr[id])) + { + *pi4_buf_id = id; + /* DEC is set to 1 */ + ps_buf_mgr->au4_status[id] = 1; + pv_ret_ptr = ps_buf_mgr->apv_ptr[id]; + break; + } + } + ret = ih264_buf_mgr_unlock(ps_buf_mgr); + RETURN_IF((ret != IH264_SUCCESS), NULL); + + return pv_ret_ptr; +} + + +/** +******************************************************************************* +* +* @brief +* Checks the buffer manager for free buffers available. +* +* @par Description: +* Checks if there are any free buffers available +* +* @param[in] ps_buf_mgr +* Pointer to the buffer manager +* +* @returns Returns 0 if available, -1 otherwise +* +* @remarks +* None +* +******************************************************************************* +*/ +IH264_ERROR_T ih264_buf_mgr_check_free(buf_mgr_t *ps_buf_mgr) +{ + WORD32 id; + IH264_ERROR_T ret = IH264_SUCCESS; + IH264_ERROR_T rettmp = IH264_SUCCESS; + rettmp = ih264_buf_mgr_lock(ps_buf_mgr); + RETURN_IF((rettmp != IH264_SUCCESS), ret); + + ret = IH264_FAIL; + for(id = 0; id < ps_buf_mgr->i4_active_buf_cnt; id++) + { + if((ps_buf_mgr->au4_status[id] == 0) && + (ps_buf_mgr->apv_ptr[id])) + { + ret = IH264_SUCCESS; + break; + } + } + rettmp = ih264_buf_mgr_unlock(ps_buf_mgr); + RETURN_IF((rettmp != IH264_SUCCESS), ret); + + return ret; + +} + + +/** +******************************************************************************* +* +* @brief +* Resets the status bits. +* +* @par Description: +* resets the status bits that the mask contains (status corresponding to +* the id) +* +* @param[in] ps_buf_mgr +* Pointer to the buffer manager +* +* @param[in] buf_id +* ID of the buffer status to be released +* +* @param[in] mask +* Contains the bits that are to be reset +* +* @returns 0 if success, -1 otherwise +* +* @remarks +* None +* +******************************************************************************* +*/ +IH264_ERROR_T ih264_buf_mgr_release(buf_mgr_t *ps_buf_mgr, + WORD32 buf_id, + UWORD32 mask) +{ + IH264_ERROR_T ret = IH264_SUCCESS; + ret = ih264_buf_mgr_lock(ps_buf_mgr); + RETURN_IF((ret != IH264_SUCCESS), ret); + + + /* If the given id is pointing to an id which is not yet added */ + if(buf_id >= ps_buf_mgr->i4_active_buf_cnt) + { + ret = ih264_buf_mgr_unlock(ps_buf_mgr); + RETURN_IF((ret != IH264_SUCCESS), ret); + return IH264_FAIL; + } + + ps_buf_mgr->au4_status[buf_id] &= ~mask; + + +/* If both the REF and DISP are zero, DEC is set to zero */ + if(ps_buf_mgr->au4_status[buf_id] == 1) + { + ps_buf_mgr->au4_status[buf_id] = 0; + } + + + ret = ih264_buf_mgr_unlock(ps_buf_mgr); + RETURN_IF((ret != IH264_SUCCESS), ret); + + return ret; +} + + +/** +******************************************************************************* +* +* @brief +* Sets the status bit. +* +* @par Description: +* sets the status bits that the mask contains (status corresponding to the +* id) +* +* +* @param[in] ps_buf_mgr +* Pointer to the buffer manager +* +* @param[in] buf_id +* ID of the buffer whose status needs to be modified +* +* +* @param[in] mask +* Contains the bits that are to be set +* +* @returns 0 if success, -1 otherwise +* +* @remarks +* None +* +******************************************************************************* +*/ +IH264_ERROR_T ih264_buf_mgr_set_status(buf_mgr_t *ps_buf_mgr, + WORD32 buf_id, + UWORD32 mask) +{ + IH264_ERROR_T ret = IH264_SUCCESS; + ret = ih264_buf_mgr_lock(ps_buf_mgr); + RETURN_IF((ret != IH264_SUCCESS), ret); + + if(buf_id >= ps_buf_mgr->i4_active_buf_cnt) + { + ret = ih264_buf_mgr_unlock(ps_buf_mgr); + RETURN_IF((ret != IH264_SUCCESS), ret); + return IH264_FAIL; + } + + + if((ps_buf_mgr->au4_status[buf_id] & mask) != 0) + { + ret = ih264_buf_mgr_unlock(ps_buf_mgr); + RETURN_IF((ret != IH264_SUCCESS), ret); + return IH264_FAIL; + } + + ps_buf_mgr->au4_status[buf_id] |= mask; + ret = ih264_buf_mgr_unlock(ps_buf_mgr); + RETURN_IF((ret != IH264_SUCCESS), ret); + + return ret; +} + + +/** +******************************************************************************* +* +* @brief +* Returns the status of the buffer. +* +* @par Description: +* Returns the status of the buffer corresponding to the id +* +* @param[in] ps_buf_mgr +* Pointer to the buffer manager +* +* @param[in] buf_id +* ID of the buffer status required +* +* @returns Status of the buffer corresponding to the id +* +* @remarks +* None +* +******************************************************************************* +*/ +WORD32 ih264_buf_mgr_get_status( buf_mgr_t *ps_buf_mgr, WORD32 buf_id ) +{ + IH264_ERROR_T ret = IH264_SUCCESS; + UWORD32 status; + + ret = ih264_buf_mgr_lock(ps_buf_mgr); + RETURN_IF((ret != IH264_SUCCESS), ret); + + status = ps_buf_mgr->au4_status[buf_id]; + + ret = ih264_buf_mgr_unlock(ps_buf_mgr); + RETURN_IF((ret != IH264_SUCCESS), ret); + + return status; +} + + +/** +******************************************************************************* +* +* @brief +* Gets the buffer from the buffer manager +* +* @par Description: +* Returns the pointer to the buffer corresponding to the id +* +* @param[in] ps_buf_mgr +* Pointer to the buffer manager +* +* @param[in] buf_id +* ID of the buffer required +* +* @returns Pointer to the buffer required +* +* @remarks +* None +* +******************************************************************************* +*/ +void* ih264_buf_mgr_get_buf(buf_mgr_t *ps_buf_mgr, WORD32 buf_id) +{ + IH264_ERROR_T ret = IH264_SUCCESS; + void *pv_buf; + ret = ih264_buf_mgr_lock(ps_buf_mgr); + RETURN_IF((ret != IH264_SUCCESS), NULL); + + pv_buf = ps_buf_mgr->apv_ptr[buf_id]; + + ret = ih264_buf_mgr_unlock(ps_buf_mgr); + RETURN_IF((ret != IH264_SUCCESS), NULL); + + return pv_buf; +} + + +/** +******************************************************************************* +* +* @brief +* Gets the buffer id from the buffer manager if the buffer is added to the +* buffer manager +* +* @par Description: +* Returns the buffer id corresponding to the given buffer if it exists +* +* @param[in] ps_buf_mgr +* Pointer to the buffer manager +* +* @param[in] pv_buf +* Pointer to the buffer +* +* @returns Buffer id if exists, else -1 +* +* @remarks +* None +* +******************************************************************************* +*/ +WORD32 ih264_buf_mgr_get_bufid(buf_mgr_t *ps_buf_mgr, void *pv_buf) +{ + WORD32 id; + WORD32 buf_id = -1; + IH264_ERROR_T ret = IH264_SUCCESS; + ret = ih264_buf_mgr_lock(ps_buf_mgr); + RETURN_IF((ret != IH264_SUCCESS), ret); + + for(id = 0; id < ps_buf_mgr->i4_active_buf_cnt; id++) + { + if(ps_buf_mgr->apv_ptr[id] == pv_buf) + { + buf_id = id; + break; + } + } + ret = ih264_buf_mgr_unlock(ps_buf_mgr); + RETURN_IF((ret != IH264_SUCCESS), ret); + + return buf_id; +} + + +/** +******************************************************************************* +* +* @brief +* Gets the no.of active buffer +* +* @par Description: +* Return the number of active buffers in the buffer manager +* +* @param[in] ps_buf_mgr +* Pointer to the buffer manager +* +* @returns number of active buffers +* +* @remarks +* None +* +******************************************************************************* +*/ +UWORD32 ih264_buf_mgr_get_num_active_buf(buf_mgr_t *ps_buf_mgr) +{ + UWORD32 u4_buf_cnt; + IH264_ERROR_T ret = IH264_SUCCESS; + + u4_buf_cnt = 0; + + ret = ih264_buf_mgr_lock(ps_buf_mgr); + RETURN_IF((ret != IH264_SUCCESS), ret); + u4_buf_cnt = ps_buf_mgr->i4_active_buf_cnt; + + ret = ih264_buf_mgr_unlock(ps_buf_mgr); + RETURN_IF((ret != IH264_SUCCESS), ret); + + return u4_buf_cnt; +} diff --git a/common/ih264_buf_mgr.h b/common/ih264_buf_mgr.h new file mode 100755 index 0000000..52efa70 --- /dev/null +++ b/common/ih264_buf_mgr.h @@ -0,0 +1,122 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ih264_buf_mgr.h +* +* @brief +* Function declarations used for buffer management +* +* @remarks +* None +* +******************************************************************************* +*/ +#ifndef _IH264_BUF_MGR_H_ +#define _IH264_BUF_MGR_H_ + +#define BUF_MGR_MAX_CNT 64 + +/** Flag for current encoding decoder */ +#define BUF_MGR_CODEC (1 << 1) + +/** Flag for reference status */ +#define BUF_MGR_REF (1 << 2) + +/** Flag for I/O - Display/output in case of decoder, capture/input in case of encoder */ +#define BUF_MGR_IO (1 << 3) + +typedef struct +{ + /** + * Mutex used to keep the functions thread-safe + */ + void *pv_mutex; + + /** + * max_buf_cnt + */ + WORD32 i4_max_buf_cnt; + + /** + * active_buf_cnt + */ + WORD32 i4_active_buf_cnt; + + /** + * au4_status[BUF_MGR_MAX_CNT] + */ + UWORD32 au4_status[BUF_MGR_MAX_CNT]; + + /* The last three bit of status are: */ + + /* Bit 0 - IN USE */ + /* Bit 1 - CODEC */ + /* Bit 2 - REF */ + /* Bit 3 - DISP/IO/RECON */ + void *apv_ptr[BUF_MGR_MAX_CNT]; + +}buf_mgr_t; + +// Returns size of the buffer manager context +WORD32 ih264_buf_mgr_size(void); + +//Free buffer manager +IH264_ERROR_T ih264_buf_mgr_free(buf_mgr_t *ps_buf_mgr); + +// Initializes the buffer API structure +void *ih264_buf_mgr_init(void *pv_buf); + +// Add buffer to buffer manager. 0: success, -1: fail (u4_active_buf_cnt has reached u4_max_buf_cnt) +IH264_ERROR_T ih264_buf_mgr_add(buf_mgr_t *ps_buf_mgr, + void *pv_ptr, + WORD32 buf_id); + +// this function will set the buffer status to DEC +void* ih264_buf_mgr_get_next_free(buf_mgr_t *ps_buf_mgr, WORD32 *pi4_id); + +// this function will check if there are any free buffers +IH264_ERROR_T ih264_buf_mgr_check_free(buf_mgr_t *ps_buf_mgr); + +// mask will have who released it: DISP:REF:DEC +IH264_ERROR_T ih264_buf_mgr_release(buf_mgr_t *ps_buf_mgr, + WORD32 id, + UWORD32 mask); + +// sets the status to one or all of DISP:REF:DEC +IH264_ERROR_T ih264_buf_mgr_set_status(buf_mgr_t *ps_buf_mgr, + WORD32 id, + UWORD32 mask); + +// Gets status of the buffer +WORD32 ih264_buf_mgr_get_status(buf_mgr_t *ps_buf_mgr, WORD32 id); + +// pass the ID - buffer will be returned +void* ih264_buf_mgr_get_buf(buf_mgr_t *ps_buf_mgr, WORD32 id); +//Pass buffer to get ID +WORD32 ih264_buf_mgr_get_bufid(buf_mgr_t *ps_buf_mgr, void *pv_buf); + +// will return number of active buffers +UWORD32 ih264_buf_mgr_get_num_active_buf(buf_mgr_t *ps_buf_mgr); + + + +#endif /* _IH264_BUF_MGR_H_ */ diff --git a/common/ih264_cabac_tables.c b/common/ih264_cabac_tables.c new file mode 100755 index 0000000..118ca12 --- /dev/null +++ b/common/ih264_cabac_tables.c @@ -0,0 +1,10869 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + + +/** +****************************************************************************** +* @file +* ih264_cabac_tables.c +* +* @brief +* This file contains H264 cabac tables for init contexts, rlps and +* cabac state trasnitions +* +* @author +* Ittiam +* +* @par List of Tables +* - gau1_ih264_cab_ctxts[] +* - gau1_ih264_next_state[] +* - gau1_ih264_cab_ctxts[][][] +* +****************************************************************************** +*/ + + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* User include files */ +#include "ih264_typedefs.h" +#include "ih264_cabac_tables.h" + + +/*****************************************************************************/ +/* Extern global definitions */ +/*****************************************************************************/ + +/** + ****************************************************************************** + * @brief Table for rangeTabLPS depending on pStateIdx and qCodIRangeIdx + * input : pStateIdx(0-63) and qCodIRangeIdx(0-3) [(Range >> 6) & 0x3] + * output : RLPS + * + * @remarks See Table 9-35 of H264 spec for rangeTabLPS + ****************************************************************************** + */ +const UWORD8 gau1_ih264_cabac_rlps[64][4] = +{ + { 128, 176, 208, 240}, + { 128, 167, 197, 227}, + { 128, 158, 187, 216}, + { 123, 150, 178, 205}, + { 116, 142, 169, 195}, + { 111, 135, 160, 185}, + { 105, 128, 152, 175}, + { 100, 122, 144, 166}, + { 95, 116, 137, 158}, + { 90, 110, 130, 150}, + { 85, 104, 123, 142}, + { 81, 99, 117, 135}, + { 77, 94, 111, 128}, + { 73, 89, 105, 122}, + { 69, 85, 100, 116}, + { 66, 80, 95, 110}, + { 62, 76, 90, 104}, + { 59, 72, 86, 99}, + { 56, 69, 81, 94}, + { 53, 65, 77, 89}, + { 51, 62, 73, 85}, + { 48, 59, 69, 80}, + { 46, 56, 66, 76}, + { 43, 53, 63, 72}, + { 41, 50, 59, 69}, + { 39, 48, 56, 65}, + { 37, 45, 54, 62}, + { 35, 43, 51, 59}, + { 33, 41, 48, 56}, + { 32, 39, 46, 53}, + { 30, 37, 43, 50}, + { 29, 35, 41, 48}, + { 27, 33, 39, 45}, + { 26, 31, 37, 43}, + { 24, 30, 35, 41}, + { 23, 28, 33, 39}, + { 22, 27, 32, 37}, + { 21, 26, 30, 35}, + { 20, 24, 29, 33}, + { 19, 23, 27, 31}, + { 18, 22, 26, 30}, + { 17, 21, 25, 28}, + { 16, 20, 23, 27}, + { 15, 19, 22, 25}, + { 14, 18, 21, 24}, + { 14, 17, 20, 23}, + { 13, 16, 19, 22}, + { 12, 15, 18, 21}, + { 12, 14, 17, 20}, + { 11, 14, 16, 19}, + { 11, 13, 15, 18}, + { 10, 12, 15, 17}, + { 10, 12, 14, 16}, + { 9, 11, 13, 15}, + { 9, 11, 12, 14}, + { 8, 10, 12, 14}, + { 8, 9, 11, 13}, + { 7, 9, 11, 12}, + { 7, 9, 10, 12}, + { 7, 8, 10, 11}, + { 6, 8, 9, 11}, + { 6, 7, 9, 10}, + { 6, 7, 8, 9}, + { 2, 2, 2, 2} +}; + +/** + ****************************************************************************** + * @brief probaility+MPS state transition tables based on cur State and bin + * input : curpState[bits7-2] | curMPS[bit1] | decodedBin[bit0] + * output : nextpState[bits6-1] | nextMPS[bit0] + * @remarks Modified form of Table-9-36 State Transition table in H264 spec + ****************************************************************************** + */ +const UWORD8 gau1_ih264_next_state[64 * 2 * 2] = +{ +/*****************************************************************************/ +/* m=0,b=0 | m=0,b=1 | m=1,b=0 | m=1,b=1 */ +/*****************************************************************************/ + 2, 1, 0, 3,/* mps reversal for m=0,b=1 / m=1,b=0 */ + 4, 0, 1, 5, + 6, 2, 3, 7, + 8, 4, 5, 9, + 10, 4, 5, 11, + 12, 8, 9, 13, + 14, 8, 9, 15, + 16, 10, 11, 17, + 18, 12, 13, 19, + 20, 14, 15, 21, + 22, 16, 17, 23, + 24, 18, 19, 25, + 26, 18, 19, 27, + 28, 22, 23, 29, + 30, 22, 23, 31, + 32, 24, 25, 33, + 34, 26, 27, 35, + 36, 26, 27, 37, + 38, 30, 31, 39, + 40, 30, 31, 41, + 42, 32, 33, 43, + 44, 32, 33, 45, + 46, 36, 37, 47, + 48, 36, 37, 49, + 50, 38, 39, 51, + 52, 38, 39, 53, + 54, 42, 43, 55, + 56, 42, 43, 57, + 58, 44, 45, 59, + 60, 44, 45, 61, + 62, 46, 47, 63, + 64, 48, 49, 65, + 66, 48, 49, 67, + 68, 50, 51, 69, + 70, 52, 53, 71, + 72, 52, 53, 73, + 74, 54, 55, 75, + 76, 54, 55, 77, + 78, 56, 57, 79, + 80, 58, 59, 81, + 82, 58, 59, 83, + 84, 60, 61, 85, + 86, 60, 61, 87, + 88, 60, 61, 89, + 90, 62, 63, 91, + 92, 64, 65, 93, + 94, 64, 65, 95, + 96, 66, 67, 97, + 98, 66, 67, 99, + 100, 66, 67, 101, + 102, 68, 69, 103, + 104, 68, 69, 105, + 106, 70, 71, 107, + 108, 70, 71, 109, + 110, 70, 71, 111, + 112, 72, 73, 113, + 114, 72, 73, 115, + 116, 72, 73, 117, + 118, 74, 75, 119, + 120, 74, 75, 121, + 122, 74, 75, 123, + 124, 76, 77, 125, + 124, 76, 77, 125, + 126, 126, 127, 127 +}; + + +/* +****************************************************************************** +* As per H264 standard the cabac initialization of context variables +* are generated using following logic +* (ref: section 9.3.1.1 of ITU-T Rec. H.264 (03/2005)) +* +* The two values assigned to pStateIdx and valMPS during this initialization +* are derived from SliceQPY +* +* Given the two table entries [m, n] (for a given slice type, context index and +* cabac_init_idc), the initialization is specified by the following pseudo-code process +* +* preCtxState = Clip3( 1, 126, ( ( m * Clip3( 0, 51, SliceQPY ) ) >> 4 ) + n ) +* if( preCtxState <= 63 ) { +* pStateIdx = 63 - preCtxState +* valMPS = 0 +* } else { +* pStateIdx = preCtxState - 64 +* valMPS = 1 +* } +****************************************************************************** +*/ + +/** + ****************************************************************************** + * @brief Init context tables for all combinations of qp and cabac_init_idc + * @remarks Packing format MPS in lsb and pState in bits[1-6] + ****************************************************************************** + */ +const UWORD8 gau1_ih264_cab_ctxts[IH264_NUM_CABAC_INIT_IDC_PLUS_ONE][IH264_MAX_QP][IH264_NUM_CABAC_CTXTS] = +{ + + { + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 0 */ + + 124, 18, 21, 124, 18, 21, 125, 81, 20, 18, + 24, 60, 122, 124, 108, 28, 109, 12, 29, 3, + 2, 28, 19, 26, 1, 40, 124, 7, 53, 81, + 125, 81, 7, 29, 3, 2, 45, 63, 4, 36, + 11, 35, 65, 16, 7, 45, 49, 10, 25, 61, + 18, 11, 35, 49, 7, 21, 21, 33, 17, 10, + 44, 0, 0, 0, 39, 45, 67, 17, 44, 2, + 36, 29, 65, 125, 69, 75, 7, 37, 61, 39, + 93, 55, 77, 59, 125, 57, 51, 65, 89, 34, + 3, 12, 59, 21, 57, 47, 125, 18, 6, 8, + 11, 30, 9, 11, 49, 43, 29, 23, 27, 18, + 26, 9, 26, 42, 35, 0, 13, 7, 12, 25, + 56, 1, 4, 56, 76, 78, 68, 54, 59, 19, + 19, 34, 28, 73, 20, 20, 20, 4, 14, 14, + 0, 6, 2, 12, 11, 12, 48, 24, 9, 1, + 4, 0, 26, 48, 38, 22, 30, 6, 8, 8, + 60, 38, 40, 29, 6, 11, 70, 46, 38, 28, + 34, 38, 24, 32, 48, 2, 34, 18, 18, 10, + 0, 24, 12, 20, 22, 16, 36, 54, 20, 37, + 16, 29, 34, 64, 41, 112, 124, 120, 118, 124, + 124, 114, 114, 108, 88, 72, 66, 86, 58, 13, + 7, 8, 7, 66, 62, 56, 68, 64, 50, 40, + 44, 0, 8, 1, 61, 51, 89, 25, 38, 36, + 22, 1, 8, 13, 23, 37, 77, 27, 78, 42, + 30, 16, 8, 15, 39, 47, 111, 10, 68, 54, + 50, 40, 16, 10, 1, 21, 53, 13, 68, 64, + 42, 8, 10, 17, 35, 67, 10, 116, 98, 90, + 72, 46, 10, 13, 31, 43, 124, 85, 85, 47, + 101, 93, 69, 93, 85, 79, 87, 89, 97, 65, + 63, 55, 59, 61, 45, 7, 33, 43, 13, 6, + 10, 4, 26, 26, 28, 18, 44, 34, 24, 28, + 22, 44, 32, 16, 44, 38, 26, 20, 28, 0, + 1, 11, 8, 13, 38, 64, 40, 20, 58, 50, + 22, 46, 62, 38, 50, 26, 12, 40, 104, 98, + 104, 104, 108, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 68, 124, 124, 124, 124, 124, 124, 108, + 74, 72, 12, 37, 23, 67, 123, 124, 124, 124, + 114, 110, 106, 82, 88, 62, 64, 44, 38, 32, + 3, 15, 6, 0, 3, 78, 86, 80, 62, 80, + 78, 46, 62, 68, 42, 12, 20, 4, 45, 46, + 24, 8, 31, 15, 11, 13, 5, 9, 19, 11, + 13, 7, 2, 13, 5, 3, 0, 124, 124, 124, + 124, 124, 120, 108, 72, 8, 5, 56, 42, 36, + 30, 14, 6, 2, 5, 25, 43, 35, 27, 35, + 33, 19, 21, 39, 15, 7, 4, 5, 5, 8, + 8, 124, 124, 124, 124, 122, 114, 92, 58, 2, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 1 */ + + 124, 18, 21, 124, 18, 21, 123, 77, 22, 20, + 24, 58, 120, 124, 108, 28, 103, 12, 27, 1, + 2, 28, 17, 24, 3, 40, 124, 9, 55, 81, + 121, 77, 7, 27, 1, 2, 43, 59, 6, 36, + 9, 33, 63, 16, 7, 43, 49, 10, 23, 59, + 18, 11, 33, 49, 5, 19, 19, 31, 15, 10, + 44, 0, 0, 0, 37, 45, 67, 15, 44, 2, + 36, 27, 63, 121, 65, 71, 3, 33, 57, 37, + 89, 51, 73, 57, 123, 55, 49, 63, 87, 36, + 1, 14, 57, 19, 55, 45, 121, 18, 6, 8, + 11, 32, 9, 9, 47, 41, 27, 21, 25, 18, + 26, 7, 26, 42, 33, 0, 11, 7, 12, 23, + 56, 1, 4, 56, 74, 78, 68, 54, 57, 17, + 17, 34, 28, 71, 20, 20, 20, 6, 14, 14, + 2, 8, 4, 12, 9, 12, 48, 24, 9, 1, + 4, 0, 26, 46, 38, 22, 30, 8, 10, 8, + 58, 38, 40, 27, 6, 11, 70, 46, 38, 28, + 34, 38, 24, 32, 48, 2, 34, 18, 18, 10, + 0, 24, 12, 20, 22, 16, 36, 54, 20, 35, + 16, 27, 34, 62, 39, 110, 124, 118, 116, 122, + 124, 112, 112, 104, 86, 70, 64, 82, 56, 15, + 7, 8, 7, 64, 60, 54, 66, 62, 48, 38, + 42, 0, 8, 1, 59, 49, 87, 23, 40, 36, + 22, 0, 10, 11, 21, 35, 73, 25, 78, 42, + 30, 16, 10, 13, 37, 45, 107, 10, 70, 56, + 50, 40, 18, 10, 1, 19, 51, 13, 70, 64, + 42, 8, 12, 15, 33, 65, 10, 116, 98, 90, + 72, 46, 10, 11, 29, 41, 124, 83, 83, 45, + 97, 89, 67, 89, 81, 75, 83, 85, 93, 63, + 61, 53, 57, 57, 43, 7, 31, 41, 11, 6, + 10, 4, 26, 26, 26, 16, 44, 34, 26, 28, + 22, 44, 32, 16, 44, 38, 26, 20, 28, 0, + 1, 9, 10, 13, 38, 64, 40, 20, 58, 50, + 24, 46, 60, 38, 50, 26, 12, 38, 104, 98, + 104, 102, 106, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 66, 124, 124, 124, 124, 124, 124, 106, + 72, 70, 12, 35, 21, 63, 117, 124, 124, 124, + 112, 106, 104, 80, 84, 60, 62, 42, 36, 30, + 5, 15, 6, 0, 5, 76, 84, 78, 60, 78, + 76, 44, 60, 66, 40, 10, 18, 2, 45, 46, + 24, 8, 29, 13, 9, 11, 3, 7, 15, 9, + 11, 5, 6, 9, 3, 0, 4, 124, 124, 124, + 124, 124, 116, 102, 68, 4, 3, 58, 44, 38, + 32, 16, 8, 4, 3, 23, 41, 33, 25, 33, + 29, 15, 19, 37, 13, 5, 6, 3, 3, 8, + 8, 124, 124, 124, 124, 116, 108, 86, 52, 1, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 2 */ + + 124, 18, 21, 124, 18, 21, 119, 75, 22, 20, + 24, 56, 118, 122, 108, 28, 99, 12, 25, 0, + 2, 26, 17, 22, 5, 38, 120, 13, 57, 83, + 115, 75, 7, 25, 0, 2, 43, 57, 6, 34, + 9, 33, 61, 16, 7, 43, 49, 10, 23, 57, + 18, 11, 33, 49, 5, 19, 19, 31, 15, 10, + 44, 0, 0, 0, 35, 45, 67, 15, 42, 2, + 36, 27, 63, 117, 61, 67, 1, 29, 55, 35, + 87, 49, 71, 55, 119, 55, 49, 63, 85, 36, + 1, 14, 55, 19, 53, 45, 119, 18, 6, 8, + 11, 32, 9, 9, 47, 41, 27, 21, 25, 18, + 26, 7, 26, 42, 33, 0, 11, 7, 12, 23, + 54, 1, 4, 54, 72, 76, 66, 52, 55, 17, + 17, 32, 26, 71, 18, 20, 20, 6, 14, 14, + 4, 8, 4, 12, 9, 12, 46, 24, 11, 1, + 4, 1, 26, 44, 38, 22, 28, 8, 10, 8, + 56, 38, 38, 27, 6, 13, 68, 46, 38, 28, + 34, 38, 24, 32, 48, 2, 34, 18, 18, 10, + 0, 24, 12, 20, 22, 16, 34, 52, 18, 35, + 16, 27, 32, 60, 39, 106, 124, 114, 112, 118, + 120, 108, 108, 100, 82, 66, 60, 78, 52, 17, + 7, 8, 9, 62, 58, 52, 64, 58, 46, 36, + 40, 1, 6, 3, 59, 49, 85, 23, 40, 36, + 22, 0, 10, 11, 21, 35, 71, 23, 78, 42, + 30, 16, 10, 13, 35, 43, 103, 10, 70, 56, + 50, 40, 18, 10, 1, 19, 49, 13, 70, 64, + 42, 8, 12, 15, 33, 63, 10, 114, 96, 88, + 70, 46, 10, 11, 29, 41, 124, 81, 81, 43, + 95, 87, 65, 87, 79, 73, 81, 83, 89, 61, + 59, 53, 55, 55, 43, 9, 31, 39, 11, 6, + 8, 4, 24, 24, 24, 14, 42, 34, 26, 28, + 20, 42, 32, 16, 42, 36, 26, 20, 26, 0, + 1, 9, 10, 13, 36, 62, 38, 20, 56, 48, + 24, 44, 58, 38, 50, 24, 10, 34, 102, 96, + 102, 100, 104, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 64, 124, 124, 124, 124, 124, 124, 102, + 70, 68, 12, 33, 21, 61, 113, 120, 120, 124, + 108, 102, 100, 76, 80, 58, 58, 40, 32, 28, + 7, 17, 4, 0, 7, 74, 82, 74, 56, 74, + 72, 42, 56, 62, 38, 8, 16, 0, 47, 44, + 22, 6, 29, 13, 9, 9, 3, 5, 13, 7, + 9, 3, 8, 7, 1, 2, 6, 124, 124, 124, + 124, 120, 110, 96, 62, 0, 3, 58, 44, 38, + 32, 18, 8, 4, 3, 23, 41, 33, 23, 33, + 27, 13, 19, 35, 11, 3, 6, 3, 1, 8, + 8, 124, 124, 124, 120, 110, 100, 78, 46, 7, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 3 */ + + 124, 18, 21, 124, 18, 21, 115, 71, 24, 20, + 22, 52, 114, 120, 108, 28, 95, 12, 23, 2, + 2, 24, 17, 20, 7, 38, 116, 15, 59, 83, + 109, 73, 7, 23, 2, 2, 41, 55, 8, 34, + 9, 31, 59, 14, 9, 43, 49, 10, 23, 57, + 18, 11, 33, 49, 3, 19, 19, 31, 13, 10, + 44, 0, 0, 0, 35, 45, 67, 13, 40, 2, + 36, 27, 63, 113, 57, 65, 2, 25, 53, 33, + 83, 47, 69, 53, 115, 53, 49, 61, 83, 36, + 1, 14, 55, 19, 53, 43, 115, 18, 4, 6, + 13, 32, 9, 9, 45, 41, 25, 21, 23, 18, + 26, 7, 26, 40, 33, 0, 11, 7, 12, 23, + 52, 1, 4, 52, 70, 74, 64, 50, 55, 15, + 17, 30, 26, 69, 18, 20, 20, 6, 14, 14, + 6, 8, 4, 12, 7, 12, 44, 24, 13, 1, + 4, 1, 24, 42, 38, 22, 26, 8, 10, 8, + 52, 38, 36, 27, 6, 13, 66, 46, 38, 28, + 34, 38, 24, 32, 48, 2, 32, 18, 18, 10, + 0, 22, 10, 18, 20, 14, 32, 50, 18, 35, + 14, 27, 30, 56, 39, 104, 124, 110, 108, 114, + 116, 104, 104, 96, 78, 64, 58, 74, 48, 19, + 7, 8, 9, 60, 56, 50, 60, 56, 42, 34, + 38, 3, 6, 3, 59, 49, 85, 21, 40, 36, + 22, 0, 10, 11, 21, 33, 69, 23, 78, 42, + 30, 16, 12, 11, 33, 41, 99, 10, 70, 56, + 50, 40, 20, 10, 1, 19, 49, 13, 70, 64, + 40, 8, 12, 15, 33, 61, 10, 114, 96, 86, + 68, 46, 10, 11, 27, 39, 124, 79, 79, 43, + 93, 85, 63, 83, 77, 71, 79, 79, 87, 61, + 57, 53, 55, 51, 43, 9, 31, 39, 11, 4, + 8, 4, 22, 22, 22, 12, 42, 32, 26, 26, + 20, 42, 30, 16, 40, 36, 24, 20, 24, 0, + 3, 9, 10, 15, 36, 62, 36, 20, 54, 48, + 24, 42, 56, 36, 48, 22, 10, 32, 100, 94, + 102, 98, 102, 122, 124, 124, 124, 124, 124, 124, + 124, 124, 62, 124, 124, 124, 124, 124, 124, 98, + 68, 66, 12, 31, 21, 59, 109, 116, 116, 124, + 104, 98, 96, 74, 76, 54, 56, 38, 30, 24, + 9, 19, 4, 1, 9, 72, 78, 72, 52, 70, + 68, 38, 54, 58, 34, 6, 12, 3, 49, 42, + 20, 4, 29, 11, 9, 9, 1, 5, 11, 5, + 7, 1, 10, 5, 0, 6, 8, 124, 124, 124, + 124, 116, 104, 90, 56, 3, 1, 60, 46, 40, + 32, 20, 10, 4, 1, 21, 41, 31, 23, 31, + 25, 11, 19, 35, 11, 3, 6, 1, 0, 8, + 8, 124, 124, 124, 114, 104, 92, 70, 38, 11, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 4 */ + + 124, 18, 21, 124, 18, 21, 113, 69, 24, 20, + 22, 50, 112, 116, 108, 28, 89, 10, 21, 2, + 2, 22, 17, 18, 9, 36, 112, 19, 61, 85, + 103, 71, 7, 21, 2, 2, 41, 53, 8, 32, + 9, 31, 59, 14, 9, 41, 49, 10, 23, 55, + 16, 13, 33, 49, 3, 17, 19, 29, 13, 10, + 44, 0, 0, 0, 33, 47, 67, 13, 38, 2, + 36, 27, 63, 111, 55, 61, 4, 23, 51, 31, + 81, 43, 67, 51, 111, 53, 47, 61, 81, 36, + 1, 14, 53, 19, 51, 43, 113, 16, 4, 6, + 13, 32, 9, 9, 45, 41, 25, 21, 23, 18, + 24, 7, 26, 40, 33, 0, 11, 7, 12, 23, + 52, 3, 4, 52, 68, 72, 62, 48, 53, 15, + 17, 28, 24, 69, 16, 20, 18, 6, 14, 14, + 8, 10, 4, 10, 7, 10, 42, 22, 15, 1, + 4, 3, 24, 40, 36, 20, 26, 10, 10, 8, + 50, 36, 34, 27, 6, 15, 66, 46, 38, 28, + 34, 38, 24, 32, 46, 2, 32, 18, 18, 10, + 1, 22, 10, 18, 20, 14, 32, 48, 16, 35, + 14, 27, 28, 54, 39, 100, 124, 106, 104, 110, + 112, 100, 100, 92, 74, 60, 54, 68, 44, 21, + 7, 6, 11, 58, 54, 48, 58, 52, 40, 32, + 34, 3, 4, 5, 59, 49, 83, 21, 40, 36, + 22, 0, 10, 11, 21, 33, 67, 21, 78, 42, + 30, 16, 12, 11, 33, 41, 95, 10, 70, 56, + 50, 40, 20, 10, 1, 19, 47, 13, 70, 62, + 40, 8, 12, 15, 33, 61, 10, 112, 94, 84, + 66, 46, 10, 11, 27, 39, 124, 77, 77, 41, + 89, 83, 61, 81, 73, 69, 75, 77, 83, 59, + 57, 51, 53, 49, 41, 11, 31, 37, 11, 4, + 6, 2, 20, 20, 20, 10, 40, 32, 26, 26, + 18, 40, 30, 16, 38, 34, 24, 18, 22, 1, + 3, 9, 10, 15, 34, 60, 34, 20, 52, 46, + 24, 40, 54, 36, 48, 20, 8, 28, 98, 94, + 100, 96, 98, 120, 124, 124, 124, 124, 124, 124, + 124, 124, 58, 124, 124, 124, 124, 124, 124, 94, + 66, 62, 12, 29, 19, 57, 105, 114, 112, 120, + 102, 94, 92, 70, 72, 52, 52, 34, 26, 22, + 11, 21, 2, 1, 11, 68, 76, 68, 50, 66, + 64, 36, 50, 54, 32, 4, 10, 5, 49, 40, + 20, 2, 29, 11, 7, 7, 1, 3, 9, 5, + 5, 0, 12, 3, 2, 8, 10, 124, 124, 124, + 122, 110, 98, 84, 50, 9, 1, 60, 46, 40, + 34, 20, 10, 6, 1, 21, 39, 31, 21, 31, + 23, 9, 19, 33, 9, 1, 6, 1, 2, 8, + 8, 124, 124, 122, 108, 98, 84, 62, 32, 17, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 5 */ + + 124, 18, 21, 124, 18, 21, 109, 65, 24, 20, + 20, 46, 108, 114, 108, 28, 85, 10, 19, 4, + 2, 22, 15, 16, 11, 36, 108, 23, 63, 85, + 97, 67, 7, 19, 4, 2, 41, 51, 8, 32, + 9, 31, 57, 14, 11, 41, 49, 10, 23, 53, + 16, 13, 33, 49, 1, 17, 17, 29, 11, 10, + 44, 0, 0, 0, 33, 47, 67, 11, 36, 2, + 36, 25, 63, 107, 51, 59, 8, 19, 47, 29, + 79, 41, 65, 49, 107, 51, 47, 59, 79, 36, + 1, 14, 53, 19, 51, 41, 109, 16, 4, 6, + 13, 32, 9, 7, 43, 41, 25, 21, 21, 18, + 24, 7, 26, 40, 31, 0, 9, 7, 12, 23, + 50, 3, 4, 50, 66, 72, 60, 46, 51, 13, + 17, 26, 24, 67, 16, 20, 18, 6, 14, 14, + 10, 10, 4, 10, 7, 10, 40, 22, 17, 1, + 4, 3, 22, 38, 36, 20, 24, 10, 10, 8, + 48, 36, 32, 27, 6, 15, 64, 46, 38, 28, + 34, 38, 24, 32, 46, 2, 32, 18, 18, 10, + 1, 22, 10, 16, 20, 14, 30, 46, 16, 35, + 12, 27, 26, 52, 39, 98, 122, 104, 102, 106, + 108, 96, 96, 88, 70, 56, 50, 64, 42, 23, + 7, 6, 11, 56, 52, 46, 56, 50, 36, 30, + 32, 5, 4, 5, 59, 49, 83, 21, 40, 36, + 22, 0, 10, 9, 19, 31, 65, 21, 78, 42, + 30, 16, 12, 9, 31, 39, 91, 10, 70, 56, + 50, 40, 20, 10, 1, 19, 45, 13, 72, 62, + 38, 8, 12, 15, 33, 59, 10, 112, 92, 82, + 64, 46, 10, 11, 27, 37, 124, 75, 75, 39, + 87, 81, 59, 79, 71, 67, 73, 73, 79, 57, + 55, 51, 53, 47, 41, 11, 29, 35, 11, 2, + 6, 2, 20, 18, 18, 8, 38, 30, 26, 24, + 18, 40, 30, 16, 36, 32, 24, 18, 20, 1, + 3, 9, 10, 15, 32, 60, 34, 20, 50, 44, + 24, 38, 52, 34, 46, 18, 6, 24, 96, 92, + 100, 94, 96, 116, 124, 124, 124, 124, 124, 124, + 124, 124, 56, 124, 124, 124, 124, 124, 122, 90, + 64, 60, 12, 27, 19, 55, 101, 110, 110, 116, + 98, 90, 88, 68, 68, 50, 48, 32, 22, 18, + 13, 23, 2, 1, 13, 66, 72, 64, 46, 64, + 62, 32, 48, 52, 28, 2, 8, 7, 51, 40, + 18, 0, 27, 9, 7, 7, 0, 1, 7, 3, + 3, 2, 16, 1, 4, 10, 14, 124, 124, 124, + 116, 106, 92, 78, 44, 13, 1, 62, 48, 42, + 34, 22, 10, 6, 0, 19, 39, 31, 19, 29, + 21, 7, 17, 31, 9, 1, 6, 0, 4, 8, + 8, 124, 124, 116, 102, 92, 78, 54, 24, 23, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 6 */ + + 124, 18, 23, 124, 18, 23, 105, 63, 26, 20, + 20, 44, 106, 112, 108, 28, 81, 10, 19, 6, + 2, 20, 15, 14, 13, 34, 106, 25, 65, 87, + 91, 65, 7, 19, 6, 2, 39, 49, 10, 30, + 7, 29, 55, 12, 11, 41, 49, 10, 21, 53, + 16, 13, 31, 49, 1, 17, 17, 29, 11, 10, + 44, 0, 0, 0, 31, 47, 67, 11, 36, 0, + 36, 25, 61, 103, 47, 55, 10, 15, 45, 27, + 75, 39, 63, 49, 105, 51, 47, 59, 79, 38, + 1, 14, 51, 17, 49, 41, 107, 16, 2, 4, + 15, 32, 9, 7, 43, 41, 23, 21, 21, 18, + 24, 5, 26, 38, 31, 0, 9, 7, 12, 23, + 48, 3, 4, 48, 64, 70, 60, 46, 51, 13, + 17, 26, 22, 67, 14, 20, 18, 6, 14, 14, + 10, 10, 4, 10, 5, 10, 38, 22, 17, 3, + 4, 5, 22, 36, 36, 20, 22, 10, 10, 8, + 44, 36, 30, 27, 6, 17, 62, 46, 36, 28, + 34, 38, 24, 32, 46, 2, 30, 18, 16, 10, + 1, 20, 8, 16, 18, 12, 28, 44, 14, 35, + 12, 25, 24, 48, 39, 94, 118, 100, 98, 102, + 104, 92, 92, 84, 66, 54, 48, 60, 38, 25, + 7, 6, 13, 54, 50, 44, 52, 46, 34, 28, + 30, 7, 2, 7, 59, 49, 81, 19, 40, 36, + 22, 2, 10, 9, 19, 31, 63, 19, 76, 42, + 30, 16, 14, 9, 29, 37, 87, 10, 72, 56, + 50, 40, 22, 10, 1, 17, 45, 13, 72, 62, + 38, 8, 12, 13, 31, 57, 10, 110, 92, 80, + 64, 46, 10, 9, 25, 37, 124, 75, 73, 39, + 85, 79, 57, 75, 69, 65, 71, 71, 77, 57, + 53, 51, 51, 43, 41, 13, 29, 35, 11, 2, + 4, 2, 18, 16, 16, 6, 38, 30, 26, 24, + 16, 38, 28, 16, 36, 32, 22, 18, 20, 1, + 5, 9, 10, 17, 32, 58, 32, 18, 48, 44, + 26, 38, 50, 34, 46, 18, 6, 22, 94, 90, + 98, 92, 94, 114, 124, 124, 124, 124, 124, 124, + 124, 122, 54, 124, 124, 124, 124, 124, 118, 86, + 62, 58, 12, 25, 19, 51, 95, 106, 106, 112, + 94, 86, 84, 64, 64, 46, 46, 30, 20, 16, + 15, 25, 0, 3, 15, 64, 70, 62, 42, 60, + 58, 30, 44, 48, 26, 1, 4, 11, 53, 38, + 16, 1, 27, 9, 7, 5, 0, 1, 3, 1, + 1, 4, 18, 2, 6, 14, 16, 124, 124, 120, + 112, 100, 88, 72, 40, 17, 0, 62, 48, 42, + 34, 24, 12, 6, 0, 19, 39, 29, 19, 29, + 19, 5, 17, 31, 7, 0, 6, 0, 6, 8, + 8, 124, 124, 112, 96, 84, 70, 48, 18, 27, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 7 */ + + 124, 18, 23, 124, 18, 23, 101, 59, 26, 20, + 18, 40, 102, 108, 108, 28, 75, 8, 17, 6, + 2, 18, 15, 12, 15, 34, 102, 29, 67, 87, + 85, 63, 7, 17, 6, 2, 39, 47, 10, 30, + 7, 29, 55, 12, 13, 39, 49, 10, 21, 51, + 14, 13, 31, 49, 0, 15, 17, 27, 9, 10, + 44, 0, 0, 0, 31, 47, 67, 9, 34, 0, + 36, 25, 61, 101, 43, 53, 14, 11, 43, 25, + 73, 35, 61, 47, 101, 49, 45, 57, 77, 38, + 1, 14, 51, 17, 49, 39, 103, 14, 2, 4, + 15, 32, 9, 7, 41, 41, 23, 21, 19, 18, + 22, 5, 26, 38, 31, 0, 9, 7, 12, 23, + 48, 3, 4, 48, 62, 68, 58, 44, 49, 11, + 17, 24, 22, 65, 14, 20, 16, 6, 14, 14, + 12, 12, 4, 10, 5, 10, 36, 22, 19, 3, + 4, 5, 20, 34, 34, 20, 22, 12, 10, 8, + 42, 34, 28, 27, 6, 17, 62, 46, 36, 28, + 34, 38, 24, 32, 46, 2, 30, 18, 16, 10, + 1, 20, 8, 14, 18, 12, 28, 42, 14, 35, + 10, 25, 22, 46, 39, 92, 114, 96, 94, 98, + 100, 88, 88, 80, 62, 50, 44, 54, 34, 27, + 7, 4, 13, 52, 48, 42, 50, 44, 30, 26, + 28, 7, 2, 7, 59, 49, 81, 19, 40, 36, + 22, 2, 10, 9, 19, 29, 61, 19, 76, 42, + 30, 16, 14, 7, 27, 37, 83, 10, 72, 56, + 50, 40, 22, 10, 1, 17, 43, 13, 72, 60, + 36, 8, 12, 13, 31, 57, 10, 110, 90, 78, + 62, 46, 10, 9, 25, 35, 124, 73, 71, 37, + 81, 77, 55, 73, 65, 63, 67, 67, 73, 55, + 51, 49, 51, 41, 39, 13, 29, 33, 11, 0, + 4, 0, 16, 14, 14, 4, 36, 28, 26, 22, + 16, 38, 28, 16, 34, 30, 22, 16, 18, 1, + 5, 9, 10, 17, 30, 58, 30, 18, 46, 42, + 26, 36, 48, 32, 44, 16, 4, 18, 92, 90, + 98, 90, 90, 110, 124, 124, 124, 124, 124, 124, + 124, 118, 50, 124, 124, 124, 124, 124, 112, 82, + 60, 56, 12, 23, 17, 49, 91, 104, 102, 108, + 92, 82, 80, 62, 60, 44, 42, 26, 16, 12, + 17, 27, 0, 3, 17, 60, 66, 58, 40, 56, + 54, 26, 42, 44, 22, 3, 2, 13, 53, 36, + 16, 3, 27, 7, 5, 5, 2, 0, 1, 0, + 0, 6, 20, 4, 8, 16, 18, 124, 122, 116, + 106, 96, 82, 66, 34, 21, 0, 64, 50, 44, + 36, 26, 12, 8, 2, 17, 37, 29, 17, 27, + 17, 3, 17, 29, 7, 0, 6, 2, 8, 8, + 8, 124, 124, 106, 90, 78, 62, 40, 10, 33, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 8 */ + + 124, 16, 23, 124, 16, 23, 99, 57, 26, 20, + 18, 38, 100, 106, 108, 28, 71, 8, 15, 8, + 2, 16, 15, 10, 19, 32, 98, 33, 69, 89, + 81, 61, 7, 15, 8, 2, 39, 45, 10, 28, + 7, 29, 53, 10, 13, 39, 51, 10, 21, 51, + 14, 15, 31, 49, 0, 15, 17, 27, 9, 10, + 44, 0, 0, 0, 29, 49, 67, 9, 32, 0, + 36, 25, 61, 97, 41, 49, 16, 9, 41, 23, + 71, 33, 59, 45, 97, 49, 45, 57, 75, 38, + 1, 14, 49, 17, 47, 39, 101, 14, 0, 2, + 17, 32, 9, 7, 41, 41, 23, 21, 19, 16, + 22, 5, 26, 36, 31, 0, 9, 7, 10, 23, + 46, 5, 4, 46, 58, 66, 56, 42, 49, 11, + 17, 22, 20, 65, 12, 18, 16, 6, 14, 14, + 14, 12, 4, 8, 5, 8, 34, 20, 21, 3, + 4, 7, 20, 32, 34, 18, 20, 12, 10, 8, + 38, 34, 26, 27, 6, 19, 60, 44, 36, 28, + 34, 36, 22, 32, 44, 0, 28, 18, 16, 8, + 3, 18, 6, 14, 16, 10, 26, 40, 12, 35, + 10, 25, 20, 42, 39, 88, 110, 92, 90, 94, + 94, 84, 84, 76, 58, 46, 40, 50, 30, 29, + 7, 4, 15, 50, 44, 38, 46, 40, 28, 22, + 24, 9, 0, 9, 59, 49, 79, 19, 40, 36, + 22, 2, 10, 9, 19, 29, 59, 17, 76, 42, + 30, 16, 14, 7, 27, 35, 81, 10, 72, 56, + 50, 38, 22, 10, 1, 17, 43, 13, 72, 60, + 36, 8, 12, 13, 31, 55, 10, 108, 88, 76, + 60, 44, 10, 9, 25, 35, 124, 71, 69, 37, + 79, 75, 55, 71, 63, 61, 65, 65, 71, 55, + 51, 49, 49, 39, 39, 15, 29, 33, 11, 0, + 2, 0, 14, 12, 10, 2, 34, 28, 26, 22, + 14, 36, 26, 14, 32, 28, 20, 16, 16, 3, + 7, 9, 10, 19, 28, 56, 28, 18, 44, 40, + 26, 34, 46, 32, 44, 14, 2, 14, 90, 88, + 96, 86, 88, 108, 124, 124, 124, 124, 124, 124, + 124, 112, 48, 124, 124, 124, 124, 122, 108, 78, + 56, 52, 12, 23, 17, 47, 87, 100, 98, 104, + 88, 76, 76, 58, 56, 40, 38, 24, 12, 10, + 19, 29, 1, 5, 19, 58, 64, 54, 36, 52, + 50, 24, 38, 40, 20, 5, 1, 17, 55, 34, + 14, 5, 27, 7, 5, 3, 2, 0, 0, 0, + 2, 8, 22, 6, 10, 18, 20, 122, 118, 112, + 102, 90, 76, 60, 28, 27, 0, 64, 50, 44, + 36, 26, 12, 8, 2, 17, 37, 29, 17, 27, + 15, 1, 17, 29, 5, 2, 6, 2, 8, 8, + 6, 124, 122, 102, 84, 72, 54, 32, 4, 39, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 9 */ + + 124, 16, 23, 124, 16, 23, 95, 55, 28, 20, + 18, 36, 98, 104, 108, 28, 67, 8, 13, 10, + 2, 16, 13, 8, 21, 30, 94, 35, 71, 91, + 75, 57, 7, 13, 10, 2, 37, 43, 12, 26, + 7, 27, 51, 10, 13, 39, 51, 10, 21, 49, + 14, 15, 31, 49, 0, 15, 15, 27, 9, 10, + 44, 0, 0, 0, 27, 49, 67, 9, 30, 0, + 36, 23, 61, 93, 37, 45, 18, 5, 37, 21, + 67, 31, 55, 43, 93, 49, 45, 57, 73, 38, + 1, 14, 47, 17, 45, 37, 99, 14, 0, 2, + 17, 32, 9, 5, 39, 39, 21, 21, 19, 16, + 22, 5, 26, 36, 29, 0, 7, 7, 10, 21, + 44, 5, 4, 44, 56, 66, 54, 40, 47, 11, + 15, 20, 18, 65, 10, 18, 16, 8, 14, 14, + 16, 12, 4, 8, 3, 8, 34, 20, 23, 3, + 4, 9, 20, 30, 34, 18, 18, 12, 10, 8, + 36, 34, 26, 27, 6, 21, 58, 44, 36, 28, + 34, 36, 22, 32, 44, 0, 28, 18, 16, 8, + 3, 18, 6, 14, 16, 10, 24, 40, 12, 35, + 10, 25, 18, 40, 39, 84, 108, 90, 88, 90, + 90, 82, 82, 72, 54, 44, 38, 46, 28, 31, + 7, 4, 17, 48, 42, 36, 44, 38, 26, 20, + 22, 11, 1, 11, 59, 47, 77, 17, 42, 36, + 22, 2, 12, 7, 17, 27, 57, 15, 76, 42, + 30, 16, 16, 7, 25, 33, 77, 10, 72, 56, + 50, 38, 24, 10, 1, 17, 41, 13, 74, 60, + 36, 8, 14, 13, 31, 53, 10, 108, 88, 76, + 58, 44, 10, 9, 23, 33, 124, 69, 67, 35, + 77, 71, 53, 67, 61, 57, 63, 63, 67, 53, + 49, 49, 47, 35, 39, 17, 27, 31, 11, 0, + 0, 0, 14, 10, 8, 0, 34, 28, 26, 22, + 14, 34, 26, 14, 30, 28, 20, 16, 14, 3, + 7, 7, 12, 19, 28, 54, 28, 18, 44, 40, + 26, 32, 44, 32, 44, 12, 2, 12, 90, 86, + 94, 84, 86, 106, 120, 120, 124, 124, 124, 124, + 124, 108, 46, 124, 124, 124, 124, 116, 104, 76, + 54, 50, 12, 21, 17, 45, 83, 96, 96, 100, + 84, 72, 74, 56, 52, 38, 36, 22, 10, 8, + 21, 29, 1, 5, 21, 56, 62, 52, 32, 50, + 48, 22, 36, 38, 18, 7, 3, 19, 57, 34, + 12, 5, 25, 7, 5, 1, 4, 2, 2, 2, + 4, 10, 26, 8, 12, 22, 24, 120, 116, 108, + 98, 84, 70, 54, 22, 31, 2, 64, 50, 46, + 36, 28, 14, 8, 4, 15, 37, 27, 15, 27, + 13, 2, 15, 27, 3, 4, 6, 4, 10, 8, + 6, 124, 118, 98, 80, 66, 48, 24, 1, 43, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 10 */ + + 124, 16, 23, 124, 16, 23, 91, 51, 28, 20, + 16, 32, 94, 100, 108, 28, 61, 6, 11, 10, + 2, 14, 13, 6, 23, 30, 90, 39, 73, 91, + 69, 55, 7, 11, 10, 2, 37, 41, 12, 26, + 7, 27, 51, 10, 15, 37, 51, 10, 21, 47, + 12, 15, 31, 49, 2, 13, 15, 25, 7, 10, + 44, 0, 0, 0, 27, 49, 67, 7, 28, 0, + 36, 23, 61, 91, 33, 43, 22, 1, 35, 19, + 65, 27, 53, 41, 89, 47, 43, 55, 71, 38, + 1, 14, 47, 17, 45, 37, 95, 12, 0, 2, + 17, 32, 9, 5, 39, 39, 21, 21, 17, 16, + 20, 5, 26, 36, 29, 0, 7, 7, 10, 21, + 44, 5, 4, 44, 54, 64, 52, 38, 45, 9, + 15, 18, 18, 63, 10, 18, 14, 8, 14, 14, + 18, 14, 4, 8, 3, 8, 32, 20, 25, 3, + 4, 9, 18, 28, 32, 18, 18, 14, 10, 8, + 34, 32, 24, 27, 6, 21, 58, 44, 36, 28, + 34, 36, 22, 32, 44, 0, 28, 18, 16, 8, + 3, 18, 6, 12, 16, 10, 24, 38, 10, 35, + 8, 25, 16, 38, 39, 82, 104, 86, 84, 86, + 86, 78, 78, 68, 50, 40, 34, 40, 24, 33, + 7, 2, 17, 46, 40, 34, 42, 34, 22, 18, + 20, 11, 1, 11, 59, 47, 77, 17, 42, 36, + 22, 2, 12, 7, 17, 27, 55, 15, 76, 42, + 30, 16, 16, 5, 23, 33, 73, 10, 72, 56, + 50, 38, 24, 10, 1, 17, 39, 13, 74, 58, + 34, 8, 14, 13, 31, 53, 10, 106, 86, 74, + 56, 44, 10, 9, 23, 33, 124, 67, 65, 33, + 73, 69, 51, 65, 57, 55, 59, 59, 63, 51, + 47, 47, 47, 33, 37, 17, 27, 29, 11, 1, + 0, 1, 12, 8, 6, 1, 32, 26, 26, 20, + 12, 34, 26, 14, 28, 26, 20, 14, 12, 3, + 7, 7, 12, 19, 26, 54, 26, 18, 42, 38, + 26, 30, 42, 30, 42, 10, 0, 8, 88, 86, + 94, 82, 82, 102, 116, 116, 124, 124, 124, 124, + 124, 104, 42, 118, 124, 118, 124, 112, 98, 72, + 52, 48, 12, 19, 15, 43, 79, 94, 92, 96, + 82, 68, 70, 52, 48, 36, 32, 18, 6, 4, + 23, 31, 3, 5, 23, 52, 58, 48, 30, 46, + 44, 18, 32, 34, 14, 9, 5, 21, 57, 32, + 12, 7, 25, 5, 3, 1, 4, 4, 4, 4, + 6, 12, 28, 10, 14, 24, 26, 120, 112, 104, + 92, 80, 64, 48, 16, 35, 2, 66, 52, 46, + 38, 30, 14, 10, 4, 15, 35, 27, 13, 25, + 11, 4, 15, 25, 3, 4, 6, 4, 12, 8, + 6, 124, 114, 92, 74, 60, 40, 16, 9, 49, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 11 */ + + 124, 16, 25, 124, 16, 25, 87, 49, 30, 20, + 16, 30, 92, 98, 108, 28, 57, 6, 11, 12, + 2, 12, 13, 4, 25, 28, 88, 41, 75, 93, + 63, 53, 7, 11, 12, 2, 35, 39, 14, 24, + 5, 25, 49, 8, 15, 37, 51, 10, 19, 47, + 12, 15, 29, 49, 2, 13, 15, 25, 7, 10, + 44, 0, 0, 0, 25, 49, 67, 7, 28, 1, + 36, 23, 59, 87, 29, 39, 24, 2, 33, 17, + 61, 25, 51, 41, 87, 47, 43, 55, 71, 40, + 1, 14, 45, 15, 43, 35, 93, 12, 1, 0, + 19, 32, 9, 5, 37, 39, 19, 21, 17, 16, + 20, 3, 26, 34, 29, 0, 7, 7, 10, 21, + 42, 5, 4, 42, 52, 62, 52, 38, 45, 9, + 15, 18, 16, 63, 8, 18, 14, 8, 14, 14, + 18, 14, 4, 8, 1, 8, 30, 20, 25, 5, + 4, 11, 18, 26, 32, 18, 16, 14, 10, 8, + 30, 32, 22, 27, 6, 23, 56, 44, 34, 28, + 34, 36, 22, 32, 44, 0, 26, 18, 14, 8, + 3, 16, 4, 12, 14, 8, 22, 36, 10, 35, + 8, 23, 14, 34, 39, 78, 100, 82, 80, 82, + 82, 74, 74, 64, 46, 38, 32, 36, 20, 35, + 7, 2, 19, 44, 38, 32, 38, 32, 20, 16, + 18, 13, 3, 13, 59, 47, 75, 15, 42, 36, + 22, 4, 12, 7, 17, 25, 53, 13, 74, 42, + 30, 16, 18, 5, 21, 31, 69, 10, 74, 56, + 50, 38, 26, 10, 1, 15, 39, 13, 74, 58, + 34, 8, 14, 11, 29, 51, 10, 106, 86, 72, + 56, 44, 10, 7, 21, 31, 124, 67, 63, 33, + 71, 67, 49, 61, 55, 53, 57, 57, 61, 51, + 45, 47, 45, 29, 37, 19, 27, 29, 11, 1, + 1, 1, 10, 6, 4, 3, 32, 26, 26, 20, + 12, 32, 24, 14, 28, 26, 18, 14, 12, 3, + 9, 7, 12, 21, 26, 52, 24, 16, 40, 38, + 28, 30, 40, 30, 42, 10, 0, 6, 86, 84, + 92, 80, 80, 100, 112, 112, 122, 120, 124, 124, + 120, 98, 40, 114, 124, 112, 124, 106, 94, 68, + 50, 46, 12, 17, 15, 39, 73, 90, 88, 92, + 78, 64, 66, 50, 44, 32, 30, 16, 4, 2, + 25, 33, 3, 7, 25, 50, 56, 46, 26, 42, + 40, 16, 30, 30, 12, 13, 9, 25, 59, 30, + 10, 9, 25, 5, 3, 0, 6, 4, 8, 6, + 8, 14, 30, 14, 16, 28, 28, 118, 110, 100, + 88, 74, 60, 42, 12, 39, 4, 66, 52, 48, + 38, 32, 16, 10, 6, 13, 35, 25, 13, 25, + 9, 6, 15, 25, 1, 6, 6, 6, 14, 8, + 6, 124, 110, 88, 68, 52, 32, 10, 15, 53, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 12 */ + + 124, 16, 25, 124, 16, 25, 85, 45, 30, 20, + 14, 26, 88, 96, 108, 28, 53, 6, 9, 14, + 2, 10, 13, 2, 27, 28, 84, 45, 77, 93, + 57, 51, 7, 9, 14, 2, 35, 37, 14, 24, + 5, 25, 47, 8, 17, 37, 51, 10, 19, 45, + 12, 17, 29, 49, 4, 13, 15, 25, 5, 10, + 44, 0, 0, 0, 25, 51, 67, 5, 26, 1, + 36, 23, 59, 83, 27, 37, 28, 4, 31, 15, + 59, 23, 49, 39, 83, 45, 43, 53, 69, 40, + 1, 14, 45, 15, 43, 35, 89, 12, 1, 0, + 19, 32, 9, 5, 37, 39, 19, 21, 15, 16, + 20, 3, 26, 34, 29, 0, 7, 7, 10, 21, + 40, 7, 4, 40, 50, 60, 50, 36, 43, 7, + 15, 16, 16, 61, 8, 18, 14, 8, 14, 14, + 20, 14, 4, 6, 1, 6, 28, 18, 27, 5, + 4, 11, 16, 24, 32, 16, 14, 14, 10, 8, + 28, 32, 20, 27, 6, 23, 54, 44, 34, 28, + 34, 36, 22, 32, 42, 0, 26, 18, 14, 8, + 5, 16, 4, 10, 14, 8, 20, 34, 8, 35, + 6, 23, 12, 32, 39, 76, 96, 78, 76, 78, + 78, 70, 70, 60, 42, 34, 28, 32, 16, 37, + 7, 2, 19, 42, 36, 30, 36, 28, 16, 14, + 14, 15, 3, 13, 59, 47, 75, 15, 42, 36, + 22, 4, 12, 7, 17, 25, 51, 13, 74, 42, + 30, 16, 18, 3, 21, 29, 65, 10, 74, 56, + 50, 38, 26, 10, 1, 15, 37, 13, 74, 58, + 32, 8, 14, 11, 29, 49, 10, 104, 84, 70, + 54, 44, 10, 7, 21, 31, 124, 65, 61, 31, + 69, 65, 47, 59, 53, 51, 55, 53, 57, 49, + 45, 47, 45, 27, 37, 19, 27, 27, 11, 3, + 1, 1, 8, 4, 2, 5, 30, 24, 26, 18, + 10, 32, 24, 14, 26, 24, 18, 14, 10, 5, + 9, 7, 12, 21, 24, 52, 22, 16, 38, 36, + 28, 28, 38, 28, 40, 8, 1, 2, 84, 82, + 92, 78, 78, 96, 108, 108, 118, 114, 124, 124, + 114, 94, 38, 108, 124, 106, 116, 100, 88, 64, + 48, 42, 12, 15, 15, 37, 69, 86, 84, 88, + 74, 60, 62, 46, 40, 30, 26, 14, 0, 1, + 27, 35, 5, 7, 27, 48, 52, 42, 22, 38, + 36, 12, 26, 26, 8, 15, 11, 27, 61, 28, + 8, 11, 25, 3, 3, 0, 6, 6, 10, 6, + 10, 16, 32, 16, 18, 30, 30, 118, 106, 96, + 82, 70, 54, 36, 6, 45, 4, 68, 54, 48, + 38, 32, 16, 10, 6, 13, 35, 25, 11, 23, + 7, 8, 15, 23, 1, 6, 6, 6, 16, 8, + 6, 122, 106, 82, 62, 46, 24, 2, 23, 59, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 13 */ + + 124, 16, 25, 124, 16, 25, 81, 43, 30, 20, + 14, 24, 86, 92, 108, 28, 47, 4, 7, 14, + 2, 10, 11, 0, 29, 26, 80, 49, 79, 95, + 51, 47, 7, 7, 14, 2, 35, 35, 14, 22, + 5, 25, 47, 8, 17, 35, 51, 10, 19, 43, + 10, 17, 29, 49, 4, 11, 13, 23, 5, 10, + 44, 0, 0, 0, 23, 51, 67, 5, 24, 1, + 36, 21, 59, 81, 23, 33, 30, 8, 27, 13, + 57, 19, 47, 37, 79, 45, 41, 53, 67, 40, + 1, 14, 43, 15, 41, 33, 87, 10, 1, 0, + 19, 32, 9, 3, 35, 39, 19, 21, 15, 16, + 18, 3, 26, 34, 27, 0, 5, 7, 10, 21, + 40, 7, 4, 40, 48, 60, 48, 34, 41, 7, + 15, 14, 14, 61, 6, 18, 12, 8, 14, 14, + 22, 16, 4, 6, 1, 6, 26, 18, 29, 5, + 4, 13, 16, 22, 30, 16, 14, 16, 10, 8, + 26, 30, 18, 27, 6, 25, 54, 44, 34, 28, + 34, 36, 22, 32, 42, 0, 26, 18, 14, 8, + 5, 16, 4, 10, 14, 8, 20, 32, 8, 35, + 6, 23, 10, 30, 39, 72, 92, 76, 74, 74, + 74, 66, 66, 56, 38, 30, 24, 26, 14, 39, + 7, 0, 21, 40, 34, 28, 34, 26, 14, 12, + 12, 15, 5, 15, 59, 47, 73, 15, 42, 36, + 22, 4, 12, 5, 15, 23, 49, 11, 74, 42, + 30, 16, 18, 3, 19, 29, 61, 10, 74, 56, + 50, 38, 26, 10, 1, 15, 35, 13, 76, 56, + 32, 8, 14, 11, 29, 49, 10, 104, 82, 68, + 52, 44, 10, 7, 21, 29, 124, 63, 59, 29, + 65, 63, 45, 57, 49, 49, 51, 51, 53, 47, + 43, 45, 43, 25, 35, 21, 25, 25, 11, 3, + 3, 3, 8, 2, 0, 7, 28, 24, 26, 18, + 10, 30, 24, 14, 24, 22, 18, 12, 8, 5, + 9, 7, 12, 21, 22, 50, 22, 16, 36, 34, + 28, 26, 36, 28, 40, 6, 3, 1, 82, 82, + 90, 76, 74, 94, 104, 104, 114, 110, 124, 122, + 108, 90, 34, 102, 124, 100, 108, 96, 84, 60, + 46, 40, 12, 13, 13, 35, 65, 84, 82, 84, + 72, 56, 58, 44, 36, 28, 22, 10, 3, 3, + 29, 37, 5, 7, 29, 44, 50, 38, 20, 36, + 34, 10, 24, 24, 6, 17, 13, 29, 61, 28, + 8, 13, 23, 3, 1, 2, 8, 8, 12, 8, + 12, 18, 36, 18, 20, 32, 34, 116, 102, 92, + 78, 64, 48, 30, 0, 49, 4, 68, 54, 50, + 40, 34, 16, 12, 8, 11, 33, 25, 9, 23, + 5, 10, 13, 21, 0, 8, 6, 8, 18, 8, + 6, 118, 102, 78, 56, 40, 18, 5, 29, 65, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 14 */ + + 122, 16, 25, 122, 16, 25, 77, 39, 32, 20, + 12, 20, 82, 90, 108, 28, 43, 4, 5, 16, + 2, 8, 11, 1, 31, 26, 76, 51, 81, 95, + 45, 45, 7, 5, 16, 2, 33, 33, 16, 22, + 5, 23, 45, 6, 19, 35, 51, 10, 19, 43, + 10, 17, 29, 49, 6, 11, 13, 23, 3, 10, + 44, 0, 0, 0, 23, 51, 67, 3, 22, 1, + 36, 21, 59, 77, 19, 31, 34, 12, 25, 11, + 53, 17, 45, 35, 75, 43, 41, 51, 65, 40, + 1, 14, 43, 15, 41, 33, 83, 10, 3, 1, + 21, 32, 9, 3, 35, 39, 17, 21, 13, 16, + 18, 3, 26, 32, 27, 0, 5, 7, 10, 21, + 38, 7, 4, 38, 46, 58, 46, 32, 41, 5, + 15, 12, 14, 59, 6, 18, 12, 8, 14, 14, + 24, 16, 4, 6, 0, 6, 24, 18, 31, 5, + 4, 13, 14, 20, 30, 16, 12, 16, 10, 8, + 22, 30, 16, 27, 6, 25, 52, 44, 34, 28, + 34, 36, 22, 32, 42, 0, 24, 18, 14, 8, + 5, 14, 2, 8, 12, 6, 18, 30, 6, 35, + 4, 23, 8, 26, 39, 70, 88, 72, 70, 70, + 70, 62, 62, 52, 34, 28, 22, 22, 10, 41, + 7, 0, 21, 38, 32, 26, 30, 22, 10, 10, + 10, 17, 5, 15, 59, 47, 73, 13, 42, 36, + 22, 4, 12, 5, 15, 23, 47, 11, 74, 42, + 30, 16, 20, 1, 17, 27, 57, 10, 74, 56, + 50, 38, 28, 10, 1, 15, 35, 13, 76, 56, + 30, 8, 14, 11, 29, 47, 10, 102, 82, 66, + 50, 44, 10, 7, 19, 29, 124, 61, 57, 29, + 63, 61, 43, 53, 47, 47, 49, 47, 51, 47, + 41, 45, 43, 21, 35, 21, 25, 25, 11, 5, + 3, 3, 6, 0, 1, 9, 28, 22, 26, 16, + 8, 30, 22, 14, 22, 22, 16, 12, 6, 5, + 11, 7, 12, 23, 22, 50, 20, 16, 34, 34, + 28, 24, 34, 26, 38, 4, 3, 3, 80, 80, + 90, 74, 72, 90, 100, 100, 110, 104, 120, 118, + 102, 84, 32, 96, 124, 94, 100, 90, 78, 56, + 44, 38, 12, 11, 13, 33, 61, 80, 78, 80, + 68, 52, 54, 40, 32, 24, 20, 8, 5, 7, + 31, 39, 7, 9, 31, 42, 46, 36, 16, 32, + 30, 6, 20, 20, 2, 19, 17, 33, 63, 26, + 6, 15, 23, 1, 1, 2, 8, 8, 14, 10, + 14, 20, 38, 20, 22, 36, 36, 116, 100, 88, + 72, 60, 42, 24, 5, 53, 6, 70, 56, 50, + 40, 36, 18, 12, 8, 11, 33, 23, 9, 21, + 3, 12, 13, 21, 0, 8, 6, 8, 20, 8, + 6, 116, 98, 72, 50, 34, 10, 13, 37, 69, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 15 */ + + 120, 16, 25, 120, 16, 25, 73, 37, 32, 20, + 12, 18, 80, 88, 108, 28, 39, 4, 3, 18, + 2, 6, 11, 3, 33, 24, 72, 55, 83, 97, + 39, 43, 7, 3, 18, 2, 33, 31, 16, 20, + 5, 23, 43, 6, 19, 35, 51, 10, 19, 41, + 10, 17, 29, 49, 6, 11, 13, 23, 3, 10, + 44, 0, 0, 0, 21, 51, 67, 3, 20, 1, + 36, 21, 59, 73, 15, 27, 36, 16, 23, 9, + 51, 15, 43, 33, 71, 43, 41, 51, 63, 40, + 1, 14, 41, 15, 39, 31, 81, 10, 3, 1, + 21, 32, 9, 3, 33, 39, 17, 21, 13, 16, + 18, 3, 26, 32, 27, 0, 5, 7, 10, 21, + 36, 7, 4, 36, 44, 56, 44, 30, 39, 5, + 15, 10, 12, 59, 4, 18, 12, 8, 14, 14, + 26, 16, 4, 6, 0, 6, 22, 18, 33, 5, + 4, 15, 14, 18, 30, 16, 10, 16, 10, 8, + 20, 30, 14, 27, 6, 27, 50, 44, 34, 28, + 34, 36, 22, 32, 42, 0, 24, 18, 14, 8, + 5, 14, 2, 8, 12, 6, 16, 28, 6, 35, + 4, 23, 6, 24, 39, 66, 84, 68, 66, 66, + 66, 58, 58, 48, 30, 24, 18, 18, 6, 43, + 7, 0, 23, 36, 30, 24, 28, 20, 8, 8, + 8, 19, 7, 17, 59, 47, 71, 13, 42, 36, + 22, 4, 12, 5, 15, 21, 45, 9, 74, 42, + 30, 16, 20, 1, 15, 25, 53, 10, 74, 56, + 50, 38, 28, 10, 1, 15, 33, 13, 76, 56, + 30, 8, 14, 11, 29, 45, 10, 102, 80, 64, + 48, 44, 10, 7, 19, 27, 124, 59, 55, 27, + 61, 59, 41, 51, 45, 45, 47, 45, 47, 45, + 39, 45, 41, 19, 35, 23, 25, 23, 11, 5, + 5, 3, 4, 1, 3, 11, 26, 22, 26, 16, + 8, 28, 22, 14, 20, 20, 16, 12, 4, 5, + 11, 7, 12, 23, 20, 48, 18, 16, 32, 32, + 28, 22, 32, 26, 38, 2, 5, 7, 78, 78, + 88, 72, 70, 88, 96, 96, 106, 100, 114, 112, + 96, 80, 30, 90, 118, 88, 92, 84, 74, 52, + 42, 36, 12, 9, 13, 31, 57, 76, 74, 76, + 64, 48, 50, 38, 28, 22, 16, 6, 9, 9, + 33, 41, 7, 9, 33, 40, 44, 32, 12, 28, + 26, 4, 18, 16, 0, 21, 19, 35, 65, 24, + 4, 17, 23, 1, 1, 4, 10, 10, 16, 12, + 16, 22, 40, 22, 24, 38, 38, 114, 96, 84, + 68, 54, 36, 18, 11, 57, 6, 70, 56, 52, + 40, 38, 18, 12, 10, 9, 33, 23, 7, 21, + 1, 14, 13, 19, 2, 10, 6, 10, 22, 8, + 6, 114, 94, 68, 44, 28, 2, 21, 43, 75, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 16 */ + + 116, 14, 27, 116, 14, 27, 71, 35, 32, 20, + 10, 14, 76, 84, 106, 28, 35, 2, 3, 18, + 0, 4, 11, 7, 37, 22, 68, 59, 85, 99, + 35, 41, 9, 3, 18, 0, 33, 29, 16, 18, + 5, 23, 43, 4, 21, 35, 53, 10, 19, 41, + 8, 19, 29, 49, 6, 11, 13, 23, 3, 8, + 44, 0, 0, 0, 21, 53, 67, 3, 18, 3, + 36, 21, 59, 71, 13, 25, 38, 18, 21, 7, + 49, 13, 41, 33, 69, 43, 41, 51, 63, 40, + 1, 14, 41, 15, 39, 31, 79, 8, 5, 3, + 23, 32, 9, 3, 33, 39, 17, 21, 13, 14, + 16, 3, 24, 30, 27, 1, 5, 7, 8, 21, + 34, 9, 2, 34, 40, 54, 42, 28, 39, 5, + 15, 8, 10, 59, 2, 16, 10, 8, 14, 14, + 26, 16, 4, 4, 0, 4, 20, 16, 35, 7, + 2, 17, 12, 16, 28, 14, 8, 16, 10, 8, + 16, 28, 12, 27, 6, 29, 48, 42, 32, 28, + 34, 34, 20, 32, 40, 1, 22, 18, 12, 6, + 7, 12, 0, 6, 10, 4, 14, 26, 4, 35, + 2, 23, 4, 20, 39, 62, 80, 64, 62, 62, + 60, 54, 54, 44, 26, 20, 14, 12, 2, 47, + 9, 1, 25, 34, 26, 20, 24, 16, 4, 4, + 4, 21, 9, 19, 59, 47, 71, 13, 42, 36, + 22, 4, 12, 5, 15, 21, 43, 9, 72, 42, + 30, 16, 20, 1, 15, 25, 51, 8, 74, 56, + 48, 36, 28, 10, 1, 15, 33, 13, 76, 54, + 28, 6, 14, 11, 29, 45, 10, 100, 78, 62, + 46, 42, 10, 7, 19, 27, 124, 59, 53, 27, + 59, 57, 41, 49, 43, 43, 45, 43, 45, 45, + 39, 45, 41, 17, 35, 25, 25, 23, 11, 7, + 7, 5, 2, 3, 7, 15, 24, 20, 26, 14, + 6, 26, 20, 12, 18, 18, 14, 10, 2, 7, + 13, 7, 12, 25, 18, 46, 16, 14, 30, 30, + 28, 20, 28, 24, 36, 0, 7, 11, 76, 76, + 86, 68, 66, 84, 92, 92, 100, 94, 108, 106, + 90, 74, 26, 84, 110, 82, 82, 78, 68, 48, + 38, 32, 12, 9, 13, 29, 53, 72, 70, 72, + 60, 42, 46, 34, 22, 18, 12, 2, 13, 13, + 35, 43, 9, 11, 37, 36, 40, 28, 8, 24, + 22, 0, 14, 12, 3, 25, 23, 39, 67, 22, + 2, 19, 23, 1, 1, 4, 10, 10, 18, 12, + 18, 22, 42, 24, 26, 40, 40, 112, 92, 78, + 62, 48, 30, 10, 17, 63, 6, 70, 56, 52, + 40, 38, 18, 12, 10, 9, 33, 23, 7, 21, + 0, 16, 13, 19, 2, 10, 6, 10, 22, 8, + 4, 110, 88, 62, 38, 20, 5, 29, 51, 81, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 17 */ + + 114, 14, 27, 114, 14, 27, 67, 31, 34, 22, + 10, 12, 74, 82, 106, 28, 29, 2, 1, 20, + 0, 4, 9, 9, 39, 22, 66, 61, 87, 99, + 29, 37, 9, 1, 20, 0, 31, 25, 18, 18, + 3, 21, 41, 4, 21, 33, 53, 10, 17, 39, + 8, 19, 27, 49, 8, 9, 11, 21, 1, 8, + 44, 0, 0, 0, 19, 53, 67, 1, 18, 3, + 36, 19, 57, 67, 9, 21, 42, 22, 17, 5, + 45, 9, 37, 31, 65, 41, 39, 49, 61, 42, + 0, 16, 39, 13, 37, 29, 75, 8, 5, 3, + 23, 34, 9, 1, 31, 37, 15, 19, 11, 14, + 16, 1, 24, 30, 25, 1, 3, 7, 8, 19, + 34, 9, 2, 34, 38, 54, 42, 28, 37, 3, + 13, 8, 10, 57, 2, 16, 10, 10, 14, 14, + 28, 18, 6, 4, 2, 4, 20, 16, 35, 7, + 2, 17, 12, 14, 28, 14, 8, 18, 12, 8, + 14, 28, 12, 25, 6, 29, 48, 42, 32, 28, + 34, 34, 20, 32, 40, 1, 22, 18, 12, 6, + 7, 12, 0, 6, 10, 4, 14, 26, 4, 33, + 2, 21, 4, 18, 37, 60, 78, 62, 60, 58, + 56, 52, 52, 40, 24, 18, 12, 8, 0, 49, + 9, 1, 25, 32, 24, 18, 22, 14, 2, 2, + 2, 21, 9, 19, 57, 45, 69, 11, 44, 36, + 22, 6, 14, 3, 13, 19, 39, 7, 72, 42, + 30, 16, 22, 0, 13, 23, 47, 8, 76, 58, + 48, 36, 30, 10, 1, 13, 31, 13, 78, 54, + 28, 6, 16, 9, 27, 43, 10, 100, 78, 62, + 46, 42, 10, 5, 17, 25, 124, 57, 51, 25, + 55, 53, 39, 45, 39, 39, 41, 39, 41, 43, + 37, 43, 39, 13, 33, 25, 23, 21, 9, 7, + 7, 5, 2, 3, 9, 17, 24, 20, 28, 14, + 6, 26, 20, 12, 18, 18, 14, 10, 2, 7, + 13, 5, 14, 25, 18, 46, 16, 14, 30, 30, + 30, 20, 26, 24, 36, 0, 7, 13, 76, 76, + 86, 66, 64, 82, 88, 88, 96, 90, 104, 102, + 86, 70, 24, 80, 104, 76, 74, 74, 64, 46, + 36, 30, 12, 7, 11, 25, 47, 70, 68, 70, + 58, 38, 44, 32, 18, 16, 10, 0, 15, 15, + 37, 43, 9, 11, 39, 34, 38, 26, 6, 22, + 20, 1, 12, 10, 5, 27, 25, 41, 67, 22, + 2, 19, 21, 0, 0, 6, 12, 12, 22, 14, + 20, 24, 46, 28, 28, 44, 44, 112, 90, 74, + 58, 44, 26, 4, 21, 67, 8, 72, 58, 54, + 42, 40, 20, 14, 12, 7, 31, 21, 5, 19, + 4, 20, 11, 17, 4, 12, 8, 12, 24, 8, + 4, 108, 84, 58, 34, 14, 11, 35, 57, 85, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 18 */ + + 112, 14, 27, 112, 14, 27, 63, 29, 34, 22, + 10, 10, 72, 80, 106, 28, 25, 2, 0, 22, + 0, 2, 9, 11, 41, 20, 62, 65, 89, 101, + 23, 35, 9, 0, 22, 0, 31, 23, 18, 16, + 3, 21, 39, 4, 21, 33, 53, 10, 17, 37, + 8, 19, 27, 49, 8, 9, 11, 21, 1, 8, + 44, 0, 0, 0, 17, 53, 67, 1, 16, 3, + 36, 19, 57, 63, 5, 17, 44, 26, 15, 3, + 43, 7, 35, 29, 61, 41, 39, 49, 59, 42, + 0, 16, 37, 13, 35, 29, 73, 8, 5, 3, + 23, 34, 9, 1, 31, 37, 15, 19, 11, 14, + 16, 1, 24, 30, 25, 1, 3, 7, 8, 19, + 32, 9, 2, 32, 36, 52, 40, 26, 35, 3, + 13, 6, 8, 57, 0, 16, 10, 10, 14, 14, + 30, 18, 6, 4, 2, 4, 18, 16, 37, 7, + 2, 19, 12, 12, 28, 14, 6, 18, 12, 8, + 12, 28, 10, 25, 6, 31, 46, 42, 32, 28, + 34, 34, 20, 32, 40, 1, 22, 18, 12, 6, + 7, 12, 0, 6, 10, 4, 12, 24, 2, 33, + 2, 21, 2, 16, 37, 56, 74, 58, 56, 54, + 52, 48, 48, 36, 20, 14, 8, 4, 3, 51, + 9, 1, 27, 30, 22, 16, 20, 10, 0, 0, + 0, 23, 11, 21, 57, 45, 67, 11, 44, 36, + 22, 6, 14, 3, 13, 19, 37, 5, 72, 42, + 30, 16, 22, 0, 11, 21, 43, 8, 76, 58, + 48, 36, 30, 10, 1, 13, 29, 13, 78, 54, + 28, 6, 16, 9, 27, 41, 10, 98, 76, 60, + 44, 42, 10, 5, 17, 25, 124, 55, 49, 23, + 53, 51, 37, 43, 37, 37, 39, 37, 37, 41, + 35, 43, 37, 11, 33, 27, 23, 19, 9, 7, + 9, 5, 0, 5, 11, 19, 22, 20, 28, 14, + 4, 24, 20, 12, 16, 16, 14, 10, 0, 7, + 13, 5, 14, 25, 16, 44, 14, 14, 28, 28, + 30, 18, 24, 24, 36, 1, 9, 17, 74, 74, + 84, 64, 62, 80, 84, 84, 92, 86, 98, 96, + 80, 66, 22, 74, 98, 70, 66, 68, 60, 42, + 34, 28, 12, 5, 11, 23, 43, 66, 64, 66, + 54, 34, 40, 28, 14, 14, 6, 1, 19, 17, + 39, 45, 11, 11, 41, 32, 36, 22, 2, 18, + 16, 3, 8, 6, 7, 29, 27, 43, 69, 20, + 0, 21, 21, 0, 0, 8, 12, 14, 24, 16, + 22, 26, 48, 30, 30, 46, 46, 110, 86, 70, + 54, 38, 20, 1, 27, 71, 8, 72, 58, 54, + 42, 42, 20, 14, 12, 7, 31, 21, 3, 19, + 6, 22, 11, 15, 6, 14, 8, 12, 26, 8, + 4, 106, 80, 54, 28, 8, 19, 43, 63, 91, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 19 */ + + 110, 14, 27, 110, 14, 27, 59, 25, 36, 22, + 8, 6, 68, 78, 106, 28, 21, 2, 2, 24, + 0, 0, 9, 13, 43, 20, 58, 67, 91, 101, + 17, 33, 9, 2, 24, 0, 29, 21, 20, 16, + 3, 19, 37, 2, 23, 33, 53, 10, 17, 37, + 8, 19, 27, 49, 10, 9, 11, 21, 0, 8, + 44, 0, 0, 0, 17, 53, 67, 0, 14, 3, + 36, 19, 57, 59, 1, 15, 48, 30, 13, 1, + 39, 5, 33, 27, 57, 39, 39, 47, 57, 42, + 0, 16, 37, 13, 35, 27, 69, 8, 7, 5, + 25, 34, 9, 1, 29, 37, 13, 19, 9, 14, + 16, 1, 24, 28, 25, 1, 3, 7, 8, 19, + 30, 9, 2, 30, 34, 50, 38, 24, 35, 1, + 13, 4, 8, 55, 0, 16, 10, 10, 14, 14, + 32, 18, 6, 4, 4, 4, 16, 16, 39, 7, + 2, 19, 10, 10, 28, 14, 4, 18, 12, 8, + 8, 28, 8, 25, 6, 31, 44, 42, 32, 28, + 34, 34, 20, 32, 40, 1, 20, 18, 12, 6, + 7, 10, 1, 4, 8, 2, 10, 22, 2, 33, + 0, 21, 0, 12, 37, 54, 70, 54, 52, 50, + 48, 44, 44, 32, 16, 12, 6, 0, 7, 53, + 9, 1, 27, 28, 20, 14, 16, 8, 3, 1, + 1, 25, 11, 21, 57, 45, 67, 9, 44, 36, + 22, 6, 14, 3, 13, 17, 35, 5, 72, 42, + 30, 16, 24, 2, 9, 19, 39, 8, 76, 58, + 48, 36, 32, 10, 1, 13, 29, 13, 78, 54, + 26, 6, 16, 9, 27, 39, 10, 98, 76, 58, + 42, 42, 10, 5, 15, 23, 124, 53, 47, 23, + 51, 49, 35, 39, 35, 35, 37, 33, 35, 41, + 33, 43, 37, 7, 33, 27, 23, 19, 9, 9, + 9, 5, 1, 7, 13, 21, 22, 18, 28, 12, + 4, 24, 18, 12, 14, 16, 12, 10, 1, 7, + 15, 5, 14, 27, 16, 44, 12, 14, 26, 28, + 30, 16, 22, 22, 34, 3, 9, 19, 72, 72, + 84, 62, 60, 76, 80, 80, 88, 80, 94, 92, + 74, 60, 20, 68, 92, 64, 58, 62, 54, 38, + 32, 26, 12, 3, 11, 21, 39, 62, 60, 62, + 50, 30, 36, 26, 10, 10, 4, 3, 21, 21, + 41, 47, 11, 13, 43, 30, 32, 20, 1, 14, + 12, 7, 6, 2, 11, 31, 31, 47, 71, 18, + 1, 23, 21, 2, 0, 8, 14, 14, 26, 18, + 24, 28, 50, 32, 32, 50, 48, 110, 84, 66, + 48, 34, 14, 7, 33, 75, 10, 74, 60, 56, + 42, 44, 22, 14, 14, 5, 31, 19, 3, 17, + 8, 24, 11, 15, 6, 14, 8, 14, 28, 8, + 4, 104, 76, 48, 22, 2, 27, 51, 71, 95, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 20 */ + + 106, 14, 27, 106, 14, 27, 57, 23, 36, 22, + 8, 4, 66, 74, 106, 28, 15, 0, 4, 24, + 0, 1, 9, 15, 45, 18, 54, 71, 93, 103, + 11, 31, 9, 4, 24, 0, 29, 19, 20, 14, + 3, 19, 37, 2, 23, 31, 53, 10, 17, 35, + 6, 21, 27, 49, 10, 7, 11, 19, 0, 8, + 44, 0, 0, 0, 15, 55, 67, 0, 12, 3, + 36, 19, 57, 57, 0, 11, 50, 32, 11, 0, + 37, 1, 31, 25, 53, 39, 37, 47, 55, 42, + 0, 16, 35, 13, 33, 27, 67, 6, 7, 5, + 25, 34, 9, 1, 29, 37, 13, 19, 9, 14, + 14, 1, 24, 28, 25, 1, 3, 7, 8, 19, + 30, 11, 2, 30, 32, 48, 36, 22, 33, 1, + 13, 2, 6, 55, 1, 16, 8, 10, 14, 14, + 34, 20, 6, 2, 4, 2, 14, 14, 41, 7, + 2, 21, 10, 8, 26, 12, 4, 20, 12, 8, + 6, 26, 6, 25, 6, 33, 44, 42, 32, 28, + 34, 34, 20, 32, 38, 1, 20, 18, 12, 6, + 9, 10, 1, 4, 8, 2, 10, 20, 0, 33, + 0, 21, 1, 10, 37, 50, 66, 50, 48, 46, + 44, 40, 40, 28, 12, 8, 2, 5, 11, 55, + 9, 3, 29, 26, 18, 12, 14, 4, 5, 3, + 5, 25, 13, 23, 57, 45, 65, 9, 44, 36, + 22, 6, 14, 3, 13, 17, 33, 3, 72, 42, + 30, 16, 24, 2, 9, 19, 35, 8, 76, 58, + 48, 36, 32, 10, 1, 13, 27, 13, 78, 52, + 26, 6, 16, 9, 27, 39, 10, 96, 74, 56, + 40, 42, 10, 5, 15, 23, 124, 51, 45, 21, + 47, 47, 33, 37, 31, 33, 33, 31, 31, 39, + 33, 41, 35, 5, 31, 29, 23, 17, 9, 9, + 11, 7, 3, 9, 15, 23, 20, 18, 28, 12, + 2, 22, 18, 12, 12, 14, 12, 8, 3, 9, + 15, 5, 14, 27, 14, 42, 10, 14, 24, 26, + 30, 14, 20, 22, 34, 5, 11, 23, 70, 72, + 82, 60, 56, 74, 76, 76, 84, 76, 88, 86, + 68, 56, 16, 62, 84, 58, 50, 58, 50, 34, + 30, 22, 12, 1, 9, 19, 35, 60, 56, 58, + 48, 26, 32, 22, 6, 8, 0, 7, 25, 23, + 43, 49, 13, 13, 45, 26, 30, 16, 3, 10, + 8, 9, 2, 1, 13, 33, 33, 49, 71, 16, + 1, 25, 21, 2, 2, 10, 14, 16, 28, 18, + 26, 30, 52, 34, 34, 52, 50, 108, 80, 62, + 44, 28, 8, 13, 39, 81, 10, 74, 60, 56, + 44, 44, 22, 16, 14, 5, 29, 19, 1, 17, + 10, 26, 11, 13, 8, 16, 8, 14, 30, 8, + 4, 100, 72, 44, 16, 3, 35, 59, 77, 101, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 21 */ + + 104, 14, 27, 104, 14, 27, 53, 19, 36, 22, + 6, 0, 62, 72, 106, 28, 11, 0, 6, 26, + 0, 1, 7, 17, 47, 18, 50, 75, 95, 103, + 5, 27, 9, 6, 26, 0, 29, 17, 20, 14, + 3, 19, 35, 2, 25, 31, 53, 10, 17, 33, + 6, 21, 27, 49, 12, 7, 9, 19, 2, 8, + 44, 0, 0, 0, 15, 55, 67, 2, 10, 3, + 36, 17, 57, 53, 4, 9, 54, 36, 7, 2, + 35, 0, 29, 23, 49, 37, 37, 45, 53, 42, + 0, 16, 35, 13, 33, 25, 63, 6, 7, 5, + 25, 34, 9, 0, 27, 37, 13, 19, 7, 14, + 14, 1, 24, 28, 23, 1, 1, 7, 8, 19, + 28, 11, 2, 28, 30, 48, 34, 20, 31, 0, + 13, 0, 6, 53, 1, 16, 8, 10, 14, 14, + 36, 20, 6, 2, 4, 2, 12, 14, 43, 7, + 2, 21, 8, 6, 26, 12, 2, 20, 12, 8, + 4, 26, 4, 25, 6, 33, 42, 42, 32, 28, + 34, 34, 20, 32, 38, 1, 20, 18, 12, 6, + 9, 10, 1, 2, 8, 2, 8, 18, 0, 33, + 1, 21, 3, 8, 37, 48, 62, 48, 46, 42, + 40, 36, 36, 24, 8, 4, 1, 9, 13, 57, + 9, 3, 29, 24, 16, 10, 12, 2, 9, 5, + 7, 27, 13, 23, 57, 45, 65, 9, 44, 36, + 22, 6, 14, 1, 11, 15, 31, 3, 72, 42, + 30, 16, 24, 4, 7, 17, 31, 8, 76, 58, + 48, 36, 32, 10, 1, 13, 25, 13, 80, 52, + 24, 6, 16, 9, 27, 37, 10, 96, 72, 54, + 38, 42, 10, 5, 15, 21, 124, 49, 43, 19, + 45, 45, 31, 35, 29, 31, 31, 27, 27, 37, + 31, 41, 35, 3, 31, 29, 21, 15, 9, 11, + 11, 7, 3, 11, 17, 25, 18, 16, 28, 10, + 2, 22, 18, 12, 10, 12, 12, 8, 5, 9, + 15, 5, 14, 27, 12, 42, 10, 14, 22, 24, + 30, 12, 18, 20, 32, 7, 13, 27, 68, 70, + 82, 58, 54, 70, 72, 72, 80, 70, 82, 82, + 62, 52, 14, 56, 78, 52, 42, 52, 44, 30, + 28, 20, 12, 0, 9, 17, 31, 56, 54, 54, + 44, 22, 28, 20, 2, 6, 3, 9, 29, 27, + 45, 51, 13, 13, 47, 24, 26, 12, 7, 8, + 6, 13, 0, 3, 17, 35, 35, 51, 73, 16, + 3, 27, 19, 4, 2, 10, 16, 18, 30, 20, + 28, 32, 56, 36, 36, 54, 54, 108, 76, 58, + 38, 24, 2, 19, 45, 85, 10, 76, 62, 58, + 44, 46, 22, 16, 16, 3, 29, 19, 0, 15, + 12, 28, 9, 11, 8, 16, 8, 16, 32, 8, + 4, 98, 68, 38, 10, 9, 41, 67, 85, 107, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 22 */ + + 102, 14, 29, 102, 14, 29, 49, 17, 38, 22, + 6, 1, 60, 70, 106, 28, 7, 0, 6, 28, + 0, 3, 7, 19, 49, 16, 48, 77, 97, 105, + 0, 25, 9, 6, 28, 0, 27, 15, 22, 12, + 1, 17, 33, 0, 25, 31, 53, 10, 15, 33, + 6, 21, 25, 49, 12, 7, 9, 19, 2, 8, + 44, 0, 0, 0, 13, 55, 67, 2, 10, 5, + 36, 17, 55, 49, 8, 5, 56, 40, 5, 4, + 31, 2, 27, 23, 47, 37, 37, 45, 53, 44, + 0, 16, 33, 11, 31, 25, 61, 6, 9, 7, + 27, 34, 9, 0, 27, 37, 11, 19, 7, 14, + 14, 0, 24, 26, 23, 1, 1, 7, 8, 19, + 26, 11, 2, 26, 28, 46, 34, 20, 31, 0, + 13, 0, 4, 53, 3, 16, 8, 10, 14, 14, + 36, 20, 6, 2, 6, 2, 10, 14, 43, 9, + 2, 23, 8, 4, 26, 12, 0, 20, 12, 8, + 0, 26, 2, 25, 6, 35, 40, 42, 30, 28, + 34, 34, 20, 32, 38, 1, 18, 18, 10, 6, + 9, 8, 3, 2, 6, 0, 6, 16, 1, 33, + 1, 19, 5, 4, 37, 44, 58, 44, 42, 38, + 36, 32, 32, 20, 4, 2, 3, 13, 17, 59, + 9, 3, 31, 22, 14, 8, 8, 1, 11, 7, + 9, 29, 15, 25, 57, 45, 63, 7, 44, 36, + 22, 8, 14, 1, 11, 15, 29, 1, 70, 42, + 30, 16, 26, 4, 5, 15, 27, 8, 78, 58, + 48, 36, 34, 10, 1, 11, 25, 13, 80, 52, + 24, 6, 16, 7, 25, 35, 10, 94, 72, 52, + 38, 42, 10, 3, 13, 21, 124, 49, 41, 19, + 43, 43, 29, 31, 27, 29, 29, 25, 25, 37, + 29, 41, 33, 0, 31, 31, 21, 15, 9, 11, + 13, 7, 5, 13, 19, 27, 18, 16, 28, 10, + 0, 20, 16, 12, 10, 12, 10, 8, 5, 9, + 17, 5, 14, 29, 12, 40, 8, 12, 20, 24, + 32, 12, 16, 20, 32, 7, 13, 29, 66, 68, + 80, 56, 52, 68, 68, 68, 76, 66, 78, 76, + 56, 46, 12, 52, 72, 46, 34, 46, 40, 26, + 26, 18, 12, 2, 9, 13, 25, 52, 50, 50, + 40, 18, 24, 16, 1, 2, 5, 11, 31, 29, + 47, 53, 15, 15, 49, 22, 24, 10, 11, 4, + 2, 15, 3, 7, 19, 39, 39, 55, 75, 14, + 5, 29, 19, 4, 2, 12, 16, 18, 34, 22, + 30, 34, 58, 40, 38, 58, 56, 106, 74, 54, + 34, 18, 1, 25, 49, 89, 12, 76, 62, 58, + 44, 48, 24, 16, 16, 3, 29, 17, 0, 15, + 14, 30, 9, 11, 10, 18, 8, 16, 34, 8, + 4, 96, 64, 34, 4, 17, 49, 73, 91, 111, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 23 */ + + 100, 14, 29, 100, 14, 29, 45, 13, 38, 22, + 4, 5, 56, 66, 106, 28, 1, 1, 8, 28, + 0, 5, 7, 21, 51, 16, 44, 81, 99, 105, + 6, 23, 9, 8, 28, 0, 27, 13, 22, 12, + 1, 17, 33, 0, 27, 29, 53, 10, 15, 31, + 4, 21, 25, 49, 14, 5, 9, 17, 4, 8, + 44, 0, 0, 0, 13, 55, 67, 4, 8, 5, + 36, 17, 55, 47, 12, 3, 60, 44, 3, 6, + 29, 6, 25, 21, 43, 35, 35, 43, 51, 44, + 0, 16, 33, 11, 31, 23, 57, 4, 9, 7, + 27, 34, 9, 0, 25, 37, 11, 19, 5, 14, + 12, 0, 24, 26, 23, 1, 1, 7, 8, 19, + 26, 11, 2, 26, 26, 44, 32, 18, 29, 2, + 13, 1, 4, 51, 3, 16, 6, 10, 14, 14, + 38, 22, 6, 2, 6, 2, 8, 14, 45, 9, + 2, 23, 6, 2, 24, 12, 0, 22, 12, 8, + 1, 24, 0, 25, 6, 35, 40, 42, 30, 28, + 34, 34, 20, 32, 38, 1, 18, 18, 10, 6, + 9, 8, 3, 0, 6, 0, 6, 14, 1, 33, + 3, 19, 7, 2, 37, 42, 54, 40, 38, 34, + 32, 28, 28, 16, 0, 1, 7, 19, 21, 61, + 9, 5, 31, 20, 12, 6, 6, 3, 15, 9, + 11, 29, 15, 25, 57, 45, 63, 7, 44, 36, + 22, 8, 14, 1, 11, 13, 27, 1, 70, 42, + 30, 16, 26, 6, 3, 15, 23, 8, 78, 58, + 48, 36, 34, 10, 1, 11, 23, 13, 80, 50, + 22, 6, 16, 7, 25, 35, 10, 94, 70, 50, + 36, 42, 10, 3, 13, 19, 124, 47, 39, 17, + 39, 41, 27, 29, 23, 27, 25, 21, 21, 35, + 27, 39, 33, 2, 29, 31, 21, 13, 9, 13, + 13, 9, 7, 15, 21, 29, 16, 14, 28, 8, + 0, 20, 16, 12, 8, 10, 10, 6, 7, 9, + 17, 5, 14, 29, 10, 40, 6, 12, 18, 22, + 32, 10, 14, 18, 30, 9, 15, 33, 64, 68, + 80, 54, 48, 64, 64, 64, 72, 60, 72, 72, + 50, 42, 8, 46, 64, 40, 26, 42, 34, 22, + 24, 16, 12, 4, 7, 11, 21, 50, 46, 46, + 38, 14, 20, 14, 5, 0, 9, 15, 35, 33, + 49, 55, 15, 15, 51, 18, 20, 6, 13, 0, + 1, 19, 5, 11, 23, 41, 41, 57, 75, 12, + 5, 31, 19, 6, 4, 12, 18, 20, 36, 24, + 32, 36, 60, 42, 40, 60, 58, 106, 70, 50, + 28, 14, 7, 31, 55, 93, 12, 78, 64, 60, + 46, 50, 24, 18, 18, 1, 27, 17, 2, 13, + 16, 32, 9, 9, 10, 18, 8, 18, 36, 8, + 4, 92, 60, 28, 1, 23, 57, 81, 99, 117, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 24 */ + + 96, 12, 29, 96, 12, 29, 43, 11, 38, 22, + 4, 7, 54, 64, 106, 28, 2, 1, 10, 30, + 0, 7, 7, 23, 55, 14, 40, 85, 101, 107, + 10, 21, 9, 10, 30, 0, 27, 11, 22, 10, + 1, 17, 31, 1, 27, 29, 55, 10, 15, 31, + 4, 23, 25, 49, 14, 5, 9, 17, 4, 8, + 44, 0, 0, 0, 11, 57, 67, 4, 6, 5, + 36, 17, 55, 43, 14, 0, 62, 46, 1, 8, + 27, 8, 23, 19, 39, 35, 35, 43, 49, 44, + 0, 16, 31, 11, 29, 23, 55, 4, 11, 9, + 29, 34, 9, 0, 25, 37, 11, 19, 5, 12, + 12, 0, 24, 24, 23, 1, 1, 7, 6, 19, + 24, 13, 2, 24, 22, 42, 30, 16, 29, 2, + 13, 3, 2, 51, 5, 14, 6, 10, 14, 14, + 40, 22, 6, 0, 6, 0, 6, 12, 47, 9, + 2, 25, 6, 0, 24, 10, 1, 22, 12, 8, + 5, 24, 1, 25, 6, 37, 38, 40, 30, 28, + 34, 32, 18, 32, 36, 3, 16, 18, 10, 4, + 11, 6, 5, 0, 4, 1, 4, 12, 3, 33, + 3, 19, 9, 1, 37, 38, 50, 36, 34, 30, + 26, 24, 24, 12, 3, 5, 11, 23, 25, 63, + 9, 5, 33, 18, 8, 2, 2, 7, 17, 13, + 15, 31, 17, 27, 57, 45, 61, 7, 44, 36, + 22, 8, 14, 1, 11, 13, 25, 0, 70, 42, + 30, 16, 26, 6, 3, 13, 21, 8, 78, 58, + 48, 34, 34, 10, 1, 11, 23, 13, 80, 50, + 22, 6, 16, 7, 25, 33, 10, 92, 68, 48, + 34, 40, 10, 3, 13, 19, 124, 45, 37, 17, + 37, 39, 27, 27, 21, 25, 23, 19, 19, 35, + 27, 39, 31, 4, 29, 33, 21, 13, 9, 13, + 15, 9, 9, 17, 25, 31, 14, 14, 28, 8, + 1, 18, 14, 10, 6, 8, 8, 6, 9, 11, + 19, 5, 14, 31, 8, 38, 4, 12, 16, 20, + 32, 8, 12, 18, 30, 11, 17, 37, 62, 66, + 78, 50, 46, 62, 60, 60, 66, 56, 66, 66, + 44, 36, 6, 40, 58, 34, 18, 36, 30, 18, + 20, 12, 12, 4, 7, 9, 17, 46, 42, 42, + 34, 8, 16, 10, 9, 3, 13, 17, 39, 35, + 51, 57, 17, 17, 53, 16, 18, 2, 17, 3, + 5, 21, 9, 15, 25, 43, 45, 61, 77, 10, + 7, 33, 19, 6, 4, 14, 18, 20, 38, 24, + 34, 38, 62, 44, 42, 62, 60, 104, 66, 46, + 24, 8, 13, 37, 61, 99, 12, 78, 64, 60, + 46, 50, 24, 18, 18, 1, 27, 17, 2, 13, + 18, 34, 9, 9, 12, 20, 8, 18, 36, 8, + 2, 90, 56, 24, 7, 29, 65, 89, 105, 123, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 25 */ + + 94, 12, 29, 94, 12, 29, 39, 9, 40, 22, + 4, 9, 52, 62, 106, 28, 6, 1, 12, 32, + 0, 7, 5, 25, 57, 12, 36, 87, 103, 109, + 16, 17, 9, 12, 32, 0, 25, 9, 24, 8, + 1, 15, 29, 1, 27, 29, 55, 10, 15, 29, + 4, 23, 25, 49, 14, 5, 7, 17, 4, 8, + 44, 0, 0, 0, 9, 57, 67, 4, 4, 5, + 36, 15, 55, 39, 18, 4, 64, 50, 2, 10, + 23, 10, 19, 17, 35, 35, 35, 43, 47, 44, + 0, 16, 29, 11, 27, 21, 53, 4, 11, 9, + 29, 34, 9, 2, 23, 35, 9, 19, 5, 12, + 12, 0, 24, 24, 21, 1, 0, 7, 6, 17, + 22, 13, 2, 22, 20, 42, 28, 14, 27, 2, + 11, 5, 0, 51, 7, 14, 6, 12, 14, 14, + 42, 22, 6, 0, 8, 0, 6, 12, 49, 9, + 2, 27, 6, 1, 24, 10, 3, 22, 12, 8, + 7, 24, 1, 25, 6, 39, 36, 40, 30, 28, + 34, 32, 18, 32, 36, 3, 16, 18, 10, 4, + 11, 6, 5, 0, 4, 1, 2, 12, 3, 33, + 3, 19, 11, 3, 37, 34, 48, 34, 32, 26, + 22, 22, 22, 8, 7, 7, 13, 27, 27, 65, + 9, 5, 35, 16, 6, 0, 0, 9, 19, 15, + 17, 33, 19, 29, 57, 43, 59, 5, 46, 36, + 22, 8, 16, 0, 9, 11, 23, 2, 70, 42, + 30, 16, 28, 6, 1, 11, 17, 8, 78, 58, + 48, 34, 36, 10, 1, 11, 21, 13, 82, 50, + 22, 6, 18, 7, 25, 31, 10, 92, 68, 48, + 32, 40, 10, 3, 11, 17, 124, 43, 35, 15, + 35, 35, 25, 23, 19, 21, 21, 17, 15, 33, + 25, 39, 29, 8, 29, 35, 19, 11, 9, 13, + 17, 9, 9, 19, 27, 33, 14, 14, 28, 8, + 1, 16, 14, 10, 4, 8, 8, 6, 11, 11, + 19, 3, 16, 31, 8, 36, 4, 12, 16, 20, + 32, 6, 10, 18, 30, 13, 17, 39, 62, 64, + 76, 48, 44, 60, 56, 56, 62, 52, 62, 60, + 40, 32, 4, 34, 52, 28, 10, 30, 26, 16, + 18, 10, 12, 6, 7, 7, 13, 42, 40, 38, + 30, 4, 14, 8, 13, 5, 15, 19, 41, 37, + 53, 57, 17, 17, 55, 14, 16, 0, 21, 5, + 7, 23, 11, 17, 27, 45, 47, 63, 79, 10, + 9, 33, 17, 6, 4, 16, 20, 22, 40, 26, + 36, 40, 66, 46, 44, 66, 64, 102, 64, 42, + 20, 2, 19, 43, 67, 103, 14, 78, 64, 62, + 46, 52, 26, 18, 20, 0, 27, 15, 4, 13, + 20, 38, 7, 7, 14, 22, 8, 20, 38, 8, + 2, 88, 52, 20, 11, 35, 71, 97, 111, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 26 */ + + 92, 12, 29, 92, 12, 29, 35, 5, 40, 22, + 2, 13, 48, 58, 106, 28, 12, 3, 14, 32, + 0, 9, 5, 27, 59, 12, 32, 91, 105, 109, + 22, 15, 9, 14, 32, 0, 25, 7, 24, 8, + 1, 15, 29, 1, 29, 27, 55, 10, 15, 27, + 2, 23, 25, 49, 16, 3, 7, 15, 6, 8, + 44, 0, 0, 0, 9, 57, 67, 6, 2, 5, + 36, 15, 55, 37, 22, 6, 68, 54, 4, 12, + 21, 14, 17, 15, 31, 33, 33, 41, 45, 44, + 0, 16, 29, 11, 27, 21, 49, 2, 11, 9, + 29, 34, 9, 2, 23, 35, 9, 19, 3, 12, + 10, 0, 24, 24, 21, 1, 0, 7, 6, 17, + 22, 13, 2, 22, 18, 40, 26, 12, 25, 4, + 11, 7, 0, 49, 7, 14, 4, 12, 14, 14, + 44, 24, 6, 0, 8, 0, 4, 12, 51, 9, + 2, 27, 4, 3, 22, 10, 3, 24, 12, 8, + 9, 22, 3, 25, 6, 39, 36, 40, 30, 28, + 34, 32, 18, 32, 36, 3, 16, 18, 10, 4, + 11, 6, 5, 1, 4, 1, 2, 10, 5, 33, + 5, 19, 13, 5, 37, 32, 44, 30, 28, 22, + 18, 18, 18, 4, 11, 11, 17, 33, 31, 67, + 9, 7, 35, 14, 4, 1, 1, 13, 23, 17, + 19, 33, 19, 29, 57, 43, 59, 5, 46, 36, + 22, 8, 16, 0, 9, 11, 21, 2, 70, 42, + 30, 16, 28, 8, 0, 11, 13, 8, 78, 58, + 48, 34, 36, 10, 1, 11, 19, 13, 82, 48, + 20, 6, 18, 7, 25, 31, 10, 90, 66, 46, + 30, 40, 10, 3, 11, 17, 124, 41, 33, 13, + 31, 33, 23, 21, 15, 19, 17, 13, 11, 31, + 23, 37, 29, 10, 27, 35, 19, 9, 9, 15, + 17, 11, 11, 21, 29, 35, 12, 12, 28, 6, + 3, 16, 14, 10, 2, 6, 8, 4, 13, 11, + 19, 3, 16, 31, 6, 36, 2, 12, 14, 18, + 32, 4, 8, 16, 28, 15, 19, 43, 60, 64, + 76, 46, 40, 56, 52, 52, 58, 46, 56, 56, + 34, 28, 0, 28, 44, 22, 2, 26, 20, 12, + 16, 8, 12, 8, 5, 5, 9, 40, 36, 34, + 28, 0, 10, 4, 17, 7, 19, 23, 45, 41, + 55, 59, 19, 17, 57, 10, 12, 3, 23, 9, + 11, 27, 15, 21, 31, 47, 49, 65, 79, 8, + 9, 35, 17, 8, 6, 16, 20, 24, 42, 28, + 38, 42, 68, 48, 46, 68, 66, 102, 60, 38, + 14, 1, 25, 49, 73, 107, 14, 80, 66, 62, + 48, 54, 26, 20, 20, 0, 25, 15, 6, 11, + 22, 40, 7, 5, 14, 22, 8, 20, 40, 8, + 2, 84, 48, 14, 17, 41, 79, 105, 119, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 27 */ + + 90, 12, 31, 90, 12, 31, 31, 3, 42, 22, + 2, 15, 46, 56, 106, 28, 16, 3, 14, 34, + 0, 11, 5, 29, 61, 10, 30, 93, 107, 111, + 28, 13, 9, 14, 34, 0, 23, 5, 26, 6, + 0, 13, 27, 3, 29, 27, 55, 10, 13, 27, + 2, 23, 23, 49, 16, 3, 7, 15, 6, 8, + 44, 0, 0, 0, 7, 57, 67, 6, 2, 7, + 36, 15, 53, 33, 26, 10, 70, 58, 6, 14, + 17, 16, 15, 15, 29, 33, 33, 41, 45, 46, + 0, 16, 27, 9, 25, 19, 47, 2, 13, 11, + 31, 34, 9, 2, 21, 35, 7, 19, 3, 12, + 10, 2, 24, 22, 21, 1, 0, 7, 6, 17, + 20, 13, 2, 20, 16, 38, 26, 12, 25, 4, + 11, 7, 1, 49, 9, 14, 4, 12, 14, 14, + 44, 24, 6, 0, 10, 0, 2, 12, 51, 11, + 2, 29, 4, 5, 22, 10, 5, 24, 12, 8, + 13, 22, 5, 25, 6, 41, 34, 40, 28, 28, + 34, 32, 18, 32, 36, 3, 14, 18, 8, 4, + 11, 4, 7, 1, 2, 3, 0, 8, 5, 33, + 5, 17, 15, 9, 37, 28, 40, 26, 24, 18, + 14, 14, 14, 0, 15, 13, 19, 37, 35, 69, + 9, 7, 37, 12, 2, 3, 5, 15, 25, 19, + 21, 35, 21, 31, 57, 43, 57, 3, 46, 36, + 22, 10, 16, 0, 9, 9, 19, 4, 68, 42, + 30, 16, 30, 8, 2, 9, 9, 8, 80, 58, + 48, 34, 38, 10, 1, 9, 19, 13, 82, 48, + 20, 6, 18, 5, 23, 29, 10, 90, 66, 44, + 30, 40, 10, 1, 9, 15, 124, 41, 31, 13, + 29, 31, 21, 17, 13, 17, 15, 11, 9, 31, + 21, 37, 27, 14, 27, 37, 19, 9, 9, 15, + 19, 11, 13, 23, 31, 37, 12, 12, 28, 6, + 3, 14, 12, 10, 2, 6, 6, 4, 13, 11, + 21, 3, 16, 33, 6, 34, 0, 10, 12, 18, + 34, 4, 6, 16, 28, 15, 19, 45, 58, 62, + 74, 44, 38, 54, 48, 48, 54, 42, 52, 50, + 28, 22, 1, 24, 38, 16, 5, 20, 16, 8, + 14, 6, 12, 10, 5, 1, 3, 36, 32, 30, + 24, 3, 6, 2, 21, 11, 21, 25, 47, 43, + 57, 61, 19, 19, 59, 8, 10, 5, 27, 13, + 15, 29, 17, 25, 33, 51, 53, 69, 81, 6, + 11, 37, 17, 8, 6, 18, 22, 24, 46, 30, + 40, 44, 70, 52, 48, 72, 68, 100, 58, 34, + 10, 7, 29, 55, 77, 111, 16, 80, 66, 64, + 48, 56, 28, 20, 22, 2, 25, 13, 6, 11, + 24, 42, 7, 5, 16, 24, 8, 22, 42, 8, + 2, 82, 44, 10, 23, 49, 87, 111, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 28 */ + + 86, 12, 31, 86, 12, 31, 29, 0, 42, 22, + 0, 19, 42, 54, 106, 28, 20, 3, 16, 36, + 0, 13, 5, 31, 63, 10, 26, 97, 109, 111, + 34, 11, 9, 16, 36, 0, 23, 3, 26, 6, + 0, 13, 25, 3, 31, 27, 55, 10, 13, 25, + 2, 25, 23, 49, 18, 3, 7, 15, 8, 8, + 44, 0, 0, 0, 7, 59, 67, 8, 0, 7, + 36, 15, 53, 29, 28, 12, 74, 60, 8, 16, + 15, 18, 13, 13, 25, 31, 33, 39, 43, 46, + 0, 16, 27, 9, 25, 19, 43, 2, 13, 11, + 31, 34, 9, 2, 21, 35, 7, 19, 1, 12, + 10, 2, 24, 22, 21, 1, 0, 7, 6, 17, + 18, 15, 2, 18, 14, 36, 24, 10, 23, 6, + 11, 9, 1, 47, 9, 14, 4, 12, 14, 14, + 46, 24, 6, 1, 10, 1, 0, 10, 53, 11, + 2, 29, 2, 7, 22, 8, 7, 24, 12, 8, + 15, 22, 7, 25, 6, 41, 32, 40, 28, 28, + 34, 32, 18, 32, 34, 3, 14, 18, 8, 4, + 13, 4, 7, 3, 2, 3, 1, 6, 7, 33, + 7, 17, 17, 11, 37, 26, 36, 22, 20, 14, + 10, 10, 10, 3, 19, 17, 23, 41, 39, 71, + 9, 7, 37, 10, 0, 5, 7, 19, 29, 21, + 25, 37, 21, 31, 57, 43, 57, 3, 46, 36, + 22, 10, 16, 0, 9, 9, 17, 4, 68, 42, + 30, 16, 30, 10, 2, 7, 5, 8, 80, 58, + 48, 34, 38, 10, 1, 9, 17, 13, 82, 48, + 18, 6, 18, 5, 23, 27, 10, 88, 64, 42, + 28, 40, 10, 1, 9, 15, 124, 39, 29, 11, + 27, 29, 19, 15, 11, 15, 13, 7, 5, 29, + 21, 37, 27, 16, 27, 37, 19, 7, 9, 17, + 19, 11, 15, 25, 33, 39, 10, 10, 28, 4, + 5, 14, 12, 10, 0, 4, 6, 4, 15, 13, + 21, 3, 16, 33, 4, 34, 1, 10, 10, 16, + 34, 2, 4, 14, 26, 17, 21, 49, 56, 60, + 74, 42, 36, 50, 44, 44, 50, 36, 46, 46, + 22, 18, 3, 18, 32, 10, 13, 14, 10, 4, + 12, 2, 12, 12, 5, 0, 0, 32, 28, 26, + 20, 7, 2, 1, 25, 13, 25, 27, 51, 47, + 59, 63, 21, 19, 61, 6, 6, 9, 31, 17, + 19, 33, 21, 29, 37, 53, 55, 71, 83, 4, + 13, 39, 17, 10, 6, 18, 22, 26, 48, 30, + 42, 46, 72, 54, 50, 74, 70, 100, 54, 30, + 4, 11, 35, 61, 83, 117, 16, 82, 68, 64, + 48, 56, 28, 20, 22, 2, 25, 13, 8, 9, + 26, 44, 7, 3, 16, 24, 8, 22, 44, 8, + 2, 80, 40, 4, 29, 55, 95, 119, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 29 */ + + 84, 12, 31, 84, 12, 31, 25, 2, 42, 22, + 0, 21, 40, 50, 106, 28, 26, 5, 18, 36, + 0, 13, 3, 33, 65, 8, 22, 101, 111, 113, + 40, 7, 9, 18, 36, 0, 23, 1, 26, 4, + 0, 13, 25, 3, 31, 25, 55, 10, 13, 23, + 0, 25, 23, 49, 18, 1, 5, 13, 8, 8, + 44, 0, 0, 0, 5, 59, 67, 8, 1, 7, + 36, 13, 53, 27, 32, 16, 76, 64, 12, 18, + 13, 22, 11, 11, 21, 31, 31, 39, 41, 46, + 0, 16, 25, 9, 23, 17, 41, 0, 13, 11, + 31, 34, 9, 4, 19, 35, 7, 19, 1, 12, + 8, 2, 24, 22, 19, 1, 2, 7, 6, 17, + 18, 15, 2, 18, 12, 36, 22, 8, 21, 6, + 11, 11, 3, 47, 11, 14, 2, 12, 14, 14, + 48, 26, 6, 1, 10, 1, 1, 10, 55, 11, + 2, 31, 2, 9, 20, 8, 7, 26, 12, 8, + 17, 20, 9, 25, 6, 43, 32, 40, 28, 28, + 34, 32, 18, 32, 34, 3, 14, 18, 8, 4, + 13, 4, 7, 3, 2, 3, 1, 4, 7, 33, + 7, 17, 19, 13, 37, 22, 32, 20, 18, 10, + 6, 6, 6, 7, 23, 21, 27, 47, 41, 73, + 9, 9, 39, 8, 1, 7, 9, 21, 31, 23, + 27, 37, 23, 33, 57, 43, 55, 3, 46, 36, + 22, 10, 16, 2, 7, 7, 15, 6, 68, 42, + 30, 16, 30, 10, 4, 7, 1, 8, 80, 58, + 48, 34, 38, 10, 1, 9, 15, 13, 84, 46, + 18, 6, 18, 5, 23, 27, 10, 88, 62, 40, + 26, 40, 10, 1, 9, 13, 124, 37, 27, 9, + 23, 27, 17, 13, 7, 13, 9, 5, 1, 27, + 19, 35, 25, 18, 25, 39, 17, 5, 9, 17, + 21, 13, 15, 27, 35, 41, 8, 10, 28, 4, + 5, 12, 12, 10, 1, 2, 6, 2, 17, 13, + 21, 3, 16, 33, 2, 32, 1, 10, 8, 14, + 34, 0, 2, 14, 26, 19, 23, 53, 54, 60, + 72, 40, 32, 48, 40, 40, 46, 32, 40, 40, + 16, 14, 7, 12, 24, 4, 21, 10, 6, 0, + 10, 0, 12, 14, 3, 2, 4, 30, 26, 22, + 18, 11, 1, 3, 29, 15, 29, 31, 55, 49, + 61, 65, 21, 19, 63, 2, 4, 13, 33, 19, + 21, 35, 23, 31, 39, 55, 57, 73, 83, 4, + 13, 41, 15, 10, 8, 20, 24, 28, 50, 32, + 44, 48, 76, 56, 52, 76, 74, 98, 50, 26, + 0, 17, 41, 67, 89, 121, 16, 82, 68, 66, + 50, 58, 28, 22, 24, 4, 23, 13, 10, 9, + 28, 46, 5, 1, 18, 26, 8, 24, 46, 8, + 2, 76, 36, 0, 35, 61, 101, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 30 */ + + 82, 12, 31, 82, 12, 31, 21, 6, 44, 22, + 1, 25, 36, 48, 106, 28, 30, 5, 20, 38, + 0, 15, 3, 35, 67, 8, 18, 103, 113, 113, + 46, 5, 9, 20, 38, 0, 21, 0, 28, 4, + 0, 11, 23, 5, 33, 25, 55, 10, 13, 23, + 0, 25, 23, 49, 20, 1, 5, 13, 10, 8, + 44, 0, 0, 0, 5, 59, 67, 10, 3, 7, + 36, 13, 53, 23, 36, 18, 80, 68, 14, 20, + 9, 24, 9, 9, 17, 29, 31, 37, 39, 46, + 0, 16, 25, 9, 23, 17, 37, 0, 15, 13, + 33, 34, 9, 4, 19, 35, 5, 19, 0, 12, + 8, 2, 24, 20, 19, 1, 2, 7, 6, 17, + 16, 15, 2, 16, 10, 34, 20, 6, 21, 8, + 11, 13, 3, 45, 11, 14, 2, 12, 14, 14, + 50, 26, 6, 1, 12, 1, 3, 10, 57, 11, + 2, 31, 0, 11, 20, 8, 9, 26, 12, 8, + 21, 20, 11, 25, 6, 43, 30, 40, 28, 28, + 34, 32, 18, 32, 34, 3, 12, 18, 8, 4, + 13, 2, 9, 5, 0, 5, 3, 2, 9, 33, + 9, 17, 21, 17, 37, 20, 28, 16, 14, 6, + 2, 2, 2, 11, 27, 23, 29, 51, 45, 75, + 9, 9, 39, 6, 3, 9, 13, 25, 35, 25, + 29, 39, 23, 33, 57, 43, 55, 1, 46, 36, + 22, 10, 16, 2, 7, 7, 13, 6, 68, 42, + 30, 16, 32, 12, 6, 5, 2, 8, 80, 58, + 48, 34, 40, 10, 1, 9, 15, 13, 84, 46, + 16, 6, 18, 5, 23, 25, 10, 86, 62, 38, + 24, 40, 10, 1, 7, 13, 124, 35, 25, 9, + 21, 25, 15, 9, 5, 11, 7, 1, 0, 27, + 17, 35, 25, 22, 25, 39, 17, 5, 9, 19, + 21, 13, 17, 29, 37, 43, 8, 8, 28, 2, + 7, 12, 10, 10, 3, 2, 4, 2, 19, 13, + 23, 3, 16, 35, 2, 32, 3, 10, 6, 14, + 34, 1, 0, 12, 24, 21, 23, 55, 52, 58, + 72, 38, 30, 44, 36, 36, 42, 26, 36, 36, + 10, 8, 9, 6, 18, 1, 29, 4, 0, 3, + 8, 1, 12, 16, 3, 4, 8, 26, 22, 18, + 14, 15, 5, 7, 33, 19, 31, 33, 57, 53, + 63, 67, 23, 21, 65, 0, 0, 15, 37, 23, + 25, 39, 27, 35, 43, 57, 61, 77, 85, 2, + 15, 43, 15, 12, 8, 20, 24, 28, 52, 34, + 46, 50, 78, 58, 54, 80, 76, 98, 48, 22, + 5, 21, 47, 73, 95, 125, 18, 84, 70, 66, + 50, 60, 30, 22, 24, 4, 23, 11, 10, 7, + 30, 48, 5, 1, 18, 26, 8, 24, 48, 8, + 2, 74, 32, 5, 41, 67, 109, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 31 */ + + 80, 12, 31, 80, 12, 31, 17, 8, 44, 22, + 1, 27, 34, 46, 106, 28, 34, 5, 22, 40, + 0, 17, 3, 37, 69, 6, 14, 107, 115, 115, + 52, 3, 9, 22, 40, 0, 21, 2, 28, 2, + 0, 11, 21, 5, 33, 25, 55, 10, 13, 21, + 0, 25, 23, 49, 20, 1, 5, 13, 10, 8, + 44, 0, 0, 0, 3, 59, 67, 10, 5, 7, + 36, 13, 53, 19, 40, 22, 82, 72, 16, 22, + 7, 26, 7, 7, 13, 29, 31, 37, 37, 46, + 0, 16, 23, 9, 21, 15, 35, 0, 15, 13, + 33, 34, 9, 4, 17, 35, 5, 19, 0, 12, + 8, 2, 24, 20, 19, 1, 2, 7, 6, 17, + 14, 15, 2, 14, 8, 32, 18, 4, 19, 8, + 11, 15, 5, 45, 13, 14, 2, 12, 14, 14, + 52, 26, 6, 1, 12, 1, 5, 10, 59, 11, + 2, 33, 0, 13, 20, 8, 11, 26, 12, 8, + 23, 20, 13, 25, 6, 45, 28, 40, 28, 28, + 34, 32, 18, 32, 34, 3, 12, 18, 8, 4, + 13, 2, 9, 5, 0, 5, 5, 0, 9, 33, + 9, 17, 23, 19, 37, 16, 24, 12, 10, 2, + 1, 1, 1, 15, 31, 27, 33, 55, 49, 77, + 9, 9, 41, 4, 5, 11, 15, 27, 37, 27, + 31, 41, 25, 35, 57, 43, 53, 1, 46, 36, + 22, 10, 16, 2, 7, 5, 11, 8, 68, 42, + 30, 16, 32, 12, 8, 3, 6, 8, 80, 58, + 48, 34, 40, 10, 1, 9, 13, 13, 84, 46, + 16, 6, 18, 5, 23, 23, 10, 86, 60, 36, + 22, 40, 10, 1, 7, 11, 124, 33, 23, 7, + 19, 23, 13, 7, 3, 9, 5, 0, 4, 25, + 15, 35, 23, 24, 25, 41, 17, 3, 9, 19, + 23, 13, 19, 31, 39, 45, 6, 8, 28, 2, + 7, 10, 10, 10, 5, 0, 4, 2, 21, 13, + 23, 3, 16, 35, 0, 30, 5, 10, 4, 12, + 34, 3, 1, 12, 24, 23, 25, 59, 50, 56, + 70, 36, 28, 42, 32, 32, 38, 22, 30, 30, + 4, 4, 11, 0, 12, 7, 37, 1, 3, 7, + 6, 3, 12, 18, 3, 6, 12, 22, 18, 14, + 10, 19, 9, 9, 37, 21, 35, 35, 61, 55, + 65, 69, 23, 21, 67, 1, 1, 19, 41, 27, + 29, 41, 29, 39, 45, 59, 63, 79, 87, 0, + 17, 45, 15, 12, 8, 22, 26, 30, 54, 36, + 48, 52, 80, 60, 56, 82, 78, 96, 44, 18, + 9, 27, 53, 79, 101, 125, 18, 84, 70, 68, + 50, 62, 30, 22, 26, 6, 23, 11, 12, 7, + 32, 50, 5, 0, 20, 28, 8, 26, 50, 8, + 2, 72, 28, 9, 47, 73, 117, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 32 */ + + 76, 10, 33, 76, 10, 33, 15, 10, 44, 22, + 3, 31, 30, 42, 104, 28, 38, 7, 22, 40, + 1, 19, 3, 41, 73, 4, 10, 111, 117, 117, + 56, 1, 11, 22, 40, 1, 21, 4, 28, 0, + 0, 11, 21, 7, 35, 25, 57, 10, 13, 21, + 1, 27, 23, 49, 20, 1, 5, 13, 10, 6, + 44, 0, 0, 0, 3, 61, 67, 10, 7, 9, + 36, 13, 53, 17, 42, 24, 84, 74, 18, 24, + 5, 28, 5, 7, 11, 29, 31, 37, 37, 46, + 0, 16, 23, 9, 21, 15, 33, 1, 17, 15, + 35, 34, 9, 4, 17, 35, 5, 19, 0, 10, + 6, 2, 22, 18, 19, 3, 2, 7, 4, 17, + 12, 17, 0, 12, 4, 30, 16, 2, 19, 8, + 11, 17, 7, 45, 15, 12, 0, 12, 14, 14, + 52, 26, 6, 3, 12, 3, 7, 8, 61, 13, + 0, 35, 1, 15, 18, 6, 13, 26, 12, 8, + 27, 18, 15, 25, 6, 47, 26, 38, 26, 28, + 34, 30, 16, 32, 32, 5, 10, 18, 6, 2, + 15, 0, 11, 7, 1, 7, 7, 1, 11, 33, + 11, 17, 25, 23, 37, 12, 20, 8, 6, 1, + 7, 5, 5, 19, 35, 31, 37, 61, 53, 81, + 11, 11, 43, 2, 9, 15, 19, 31, 41, 31, + 35, 43, 27, 37, 57, 43, 53, 1, 46, 36, + 22, 10, 16, 2, 7, 5, 9, 8, 66, 42, + 30, 16, 32, 12, 8, 3, 8, 6, 80, 58, + 46, 32, 40, 10, 1, 9, 13, 13, 84, 44, + 14, 4, 18, 5, 23, 23, 10, 84, 58, 34, + 20, 38, 10, 1, 7, 11, 124, 33, 21, 7, + 17, 21, 13, 5, 1, 7, 3, 2, 6, 25, + 15, 35, 23, 26, 25, 43, 17, 3, 9, 21, + 25, 15, 21, 33, 43, 49, 4, 6, 28, 0, + 9, 8, 8, 8, 7, 1, 2, 0, 23, 15, + 25, 3, 16, 37, 1, 28, 7, 8, 2, 10, + 34, 5, 5, 10, 22, 25, 27, 63, 48, 54, + 68, 32, 24, 38, 28, 28, 32, 16, 24, 24, + 1, 1, 15, 5, 4, 13, 47, 7, 9, 11, + 2, 7, 12, 18, 3, 8, 16, 18, 14, 10, + 6, 25, 13, 13, 43, 25, 39, 39, 65, 59, + 67, 71, 25, 23, 71, 5, 5, 23, 45, 31, + 33, 45, 33, 43, 49, 63, 67, 83, 89, 1, + 19, 47, 15, 12, 8, 22, 26, 30, 56, 36, + 50, 52, 82, 62, 58, 84, 80, 94, 40, 12, + 15, 33, 59, 87, 107, 125, 18, 84, 70, 68, + 50, 62, 30, 22, 26, 6, 23, 11, 12, 7, + 34, 52, 5, 0, 20, 28, 8, 26, 50, 8, + 0, 68, 22, 15, 53, 81, 125, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 33 */ + + 74, 10, 33, 74, 10, 33, 11, 14, 46, 24, + 3, 33, 28, 40, 104, 28, 44, 7, 24, 42, + 1, 19, 1, 43, 75, 4, 8, 113, 119, 117, + 62, 2, 11, 24, 42, 1, 19, 8, 30, 0, + 2, 9, 19, 7, 35, 23, 57, 10, 11, 19, + 1, 27, 21, 49, 22, 0, 3, 11, 12, 6, + 44, 0, 0, 0, 1, 61, 67, 12, 7, 9, + 36, 11, 51, 13, 46, 28, 88, 78, 22, 26, + 1, 32, 1, 5, 7, 27, 29, 35, 35, 48, + 2, 18, 21, 7, 19, 13, 29, 1, 17, 15, + 35, 36, 9, 6, 15, 33, 3, 17, 2, 10, + 6, 4, 22, 18, 17, 3, 4, 7, 4, 15, + 12, 17, 0, 12, 2, 30, 16, 2, 17, 10, + 9, 17, 7, 43, 15, 12, 0, 14, 14, 14, + 54, 28, 8, 3, 14, 3, 7, 8, 61, 13, + 0, 35, 1, 17, 18, 6, 13, 28, 14, 8, + 29, 18, 15, 23, 6, 47, 26, 38, 26, 28, + 34, 30, 16, 32, 32, 5, 10, 18, 6, 2, + 15, 0, 11, 7, 1, 7, 7, 1, 11, 31, + 11, 15, 25, 25, 35, 10, 18, 6, 4, 5, + 11, 7, 7, 23, 37, 33, 39, 65, 55, 83, + 11, 11, 43, 0, 11, 17, 21, 33, 43, 33, + 37, 43, 27, 37, 55, 41, 51, 0, 48, 36, + 22, 12, 18, 4, 5, 3, 5, 10, 66, 42, + 30, 16, 34, 14, 10, 1, 12, 6, 82, 60, + 46, 32, 42, 10, 1, 7, 11, 13, 86, 44, + 14, 4, 20, 3, 21, 21, 10, 84, 58, 34, + 20, 38, 10, 0, 5, 9, 124, 31, 19, 5, + 13, 17, 11, 1, 2, 3, 0, 6, 10, 23, + 13, 33, 21, 30, 23, 43, 15, 1, 7, 21, + 25, 15, 21, 33, 45, 51, 4, 6, 30, 0, + 9, 8, 8, 8, 7, 1, 2, 0, 23, 15, + 25, 1, 18, 37, 1, 28, 7, 8, 2, 10, + 36, 5, 7, 10, 22, 25, 27, 65, 48, 54, + 68, 30, 22, 36, 24, 24, 28, 12, 20, 20, + 5, 5, 17, 9, 1, 19, 55, 11, 13, 13, + 0, 9, 12, 20, 1, 12, 22, 16, 12, 8, + 4, 29, 15, 15, 47, 27, 41, 41, 67, 61, + 69, 71, 25, 23, 73, 7, 7, 25, 47, 33, + 35, 47, 35, 45, 51, 65, 69, 85, 89, 1, + 19, 47, 13, 14, 10, 24, 28, 32, 60, 38, + 52, 54, 86, 66, 60, 88, 84, 94, 38, 8, + 19, 37, 63, 93, 111, 125, 20, 86, 72, 70, + 52, 64, 32, 24, 28, 8, 21, 9, 14, 5, + 38, 56, 3, 2, 22, 30, 10, 28, 52, 8, + 0, 66, 18, 19, 57, 87, 125, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 34 */ + + 72, 10, 33, 72, 10, 33, 7, 16, 46, 24, + 3, 35, 26, 38, 104, 28, 48, 7, 26, 44, + 1, 21, 1, 45, 77, 2, 4, 117, 121, 119, + 68, 4, 11, 26, 44, 1, 19, 10, 30, 1, + 2, 9, 17, 7, 35, 23, 57, 10, 11, 17, + 1, 27, 21, 49, 22, 0, 3, 11, 12, 6, + 44, 0, 0, 0, 0, 61, 67, 12, 9, 9, + 36, 11, 51, 9, 50, 32, 90, 82, 24, 28, + 0, 34, 0, 3, 3, 27, 29, 35, 33, 48, + 2, 18, 19, 7, 17, 13, 27, 1, 17, 15, + 35, 36, 9, 6, 15, 33, 3, 17, 2, 10, + 6, 4, 22, 18, 17, 3, 4, 7, 4, 15, + 10, 17, 0, 10, 0, 28, 14, 0, 15, 10, + 9, 19, 9, 43, 17, 12, 0, 14, 14, 14, + 56, 28, 8, 3, 14, 3, 9, 8, 63, 13, + 0, 37, 1, 19, 18, 6, 15, 28, 14, 8, + 31, 18, 17, 23, 6, 49, 24, 38, 26, 28, + 34, 30, 16, 32, 32, 5, 10, 18, 6, 2, + 15, 0, 11, 7, 1, 7, 9, 3, 13, 31, + 11, 15, 27, 27, 35, 6, 14, 2, 0, 9, + 15, 11, 11, 27, 41, 37, 43, 69, 59, 85, + 11, 11, 45, 1, 13, 19, 23, 37, 45, 35, + 39, 45, 29, 39, 55, 41, 49, 0, 48, 36, + 22, 12, 18, 4, 5, 3, 3, 12, 66, 42, + 30, 16, 34, 14, 12, 0, 16, 6, 82, 60, + 46, 32, 42, 10, 1, 7, 9, 13, 86, 44, + 14, 4, 20, 3, 21, 19, 10, 82, 56, 32, + 18, 38, 10, 0, 5, 9, 124, 29, 17, 3, + 11, 15, 9, 0, 4, 1, 2, 8, 14, 21, + 11, 33, 19, 32, 23, 45, 15, 0, 7, 21, + 27, 15, 23, 35, 47, 53, 2, 6, 30, 0, + 11, 6, 8, 8, 9, 3, 2, 0, 25, 15, + 25, 1, 18, 37, 3, 26, 9, 8, 0, 8, + 36, 7, 9, 10, 22, 27, 29, 69, 46, 52, + 66, 28, 20, 34, 20, 20, 24, 8, 14, 14, + 11, 9, 19, 15, 7, 25, 63, 17, 17, 17, + 1, 11, 12, 22, 1, 14, 26, 12, 8, 4, + 0, 33, 19, 19, 51, 29, 45, 43, 71, 63, + 71, 73, 27, 23, 75, 9, 9, 29, 51, 37, + 39, 49, 39, 49, 53, 67, 71, 87, 91, 3, + 21, 49, 13, 14, 10, 26, 28, 34, 62, 40, + 54, 56, 88, 68, 62, 90, 86, 92, 34, 4, + 23, 43, 69, 99, 117, 125, 20, 86, 72, 70, + 52, 66, 32, 24, 28, 8, 21, 9, 16, 5, + 40, 58, 3, 4, 24, 32, 10, 28, 54, 8, + 0, 64, 14, 23, 63, 93, 125, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 35 */ + + 70, 10, 33, 70, 10, 33, 3, 20, 48, 24, + 5, 39, 22, 36, 104, 28, 52, 7, 28, 46, + 1, 23, 1, 47, 79, 2, 0, 119, 123, 119, + 74, 6, 11, 28, 46, 1, 17, 12, 32, 1, + 2, 7, 15, 9, 37, 23, 57, 10, 11, 17, + 1, 27, 21, 49, 24, 0, 3, 11, 14, 6, + 44, 0, 0, 0, 0, 61, 67, 14, 11, 9, + 36, 11, 51, 5, 54, 34, 94, 86, 26, 30, + 4, 36, 2, 1, 0, 25, 29, 33, 31, 48, + 2, 18, 19, 7, 17, 11, 23, 1, 19, 17, + 37, 36, 9, 6, 13, 33, 1, 17, 4, 10, + 6, 4, 22, 16, 17, 3, 4, 7, 4, 15, + 8, 17, 0, 8, 1, 26, 12, 1, 15, 12, + 9, 21, 9, 41, 17, 12, 0, 14, 14, 14, + 58, 28, 8, 3, 16, 3, 11, 8, 65, 13, + 0, 37, 3, 21, 18, 6, 17, 28, 14, 8, + 35, 18, 19, 23, 6, 49, 22, 38, 26, 28, + 34, 30, 16, 32, 32, 5, 8, 18, 6, 2, + 15, 1, 13, 9, 3, 9, 11, 5, 13, 31, + 13, 15, 29, 31, 35, 4, 10, 1, 3, 13, + 19, 15, 15, 31, 45, 39, 45, 73, 63, 87, + 11, 11, 45, 3, 15, 21, 27, 39, 49, 37, + 41, 47, 29, 39, 55, 41, 49, 2, 48, 36, + 22, 12, 18, 4, 5, 1, 1, 12, 66, 42, + 30, 16, 36, 16, 14, 2, 20, 6, 82, 60, + 46, 32, 44, 10, 1, 7, 9, 13, 86, 44, + 12, 4, 20, 3, 21, 17, 10, 82, 56, 30, + 16, 38, 10, 0, 3, 7, 124, 27, 15, 3, + 9, 13, 7, 4, 6, 0, 4, 12, 16, 21, + 9, 33, 19, 36, 23, 45, 15, 0, 7, 23, + 27, 15, 25, 37, 49, 55, 2, 4, 30, 1, + 11, 6, 6, 8, 11, 3, 0, 0, 27, 15, + 27, 1, 18, 39, 3, 26, 11, 8, 1, 8, + 36, 9, 11, 8, 20, 29, 29, 71, 44, 50, + 66, 26, 18, 30, 16, 16, 20, 2, 10, 10, + 17, 15, 21, 21, 13, 31, 71, 23, 23, 21, + 3, 13, 12, 24, 1, 16, 30, 8, 4, 0, + 3, 37, 23, 21, 55, 33, 47, 45, 73, 67, + 73, 75, 27, 25, 77, 11, 13, 31, 55, 41, + 43, 53, 41, 53, 57, 69, 75, 91, 93, 5, + 23, 51, 13, 16, 10, 26, 30, 34, 64, 42, + 56, 58, 90, 70, 64, 94, 88, 92, 32, 0, + 29, 47, 75, 105, 123, 125, 22, 88, 74, 72, + 52, 68, 34, 24, 30, 10, 21, 7, 16, 3, + 42, 60, 3, 4, 24, 32, 10, 30, 56, 8, + 0, 62, 10, 29, 69, 99, 125, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 36 */ + + 66, 10, 33, 66, 10, 33, 1, 22, 48, 24, + 5, 41, 20, 32, 104, 28, 58, 9, 30, 46, + 1, 25, 1, 49, 81, 0, 3, 123, 125, 121, + 80, 8, 11, 30, 46, 1, 17, 14, 32, 3, + 2, 7, 15, 9, 37, 21, 57, 10, 11, 15, + 3, 29, 21, 49, 24, 2, 3, 9, 14, 6, + 44, 0, 0, 0, 2, 63, 67, 14, 13, 9, + 36, 11, 51, 3, 56, 38, 96, 88, 28, 32, + 6, 40, 4, 0, 4, 25, 27, 33, 29, 48, + 2, 18, 17, 7, 15, 11, 21, 3, 19, 17, + 37, 36, 9, 6, 13, 33, 1, 17, 4, 10, + 4, 4, 22, 16, 17, 3, 4, 7, 4, 15, + 8, 19, 0, 8, 3, 24, 10, 3, 13, 12, + 9, 23, 11, 41, 19, 12, 1, 14, 14, 14, + 60, 30, 8, 5, 16, 5, 13, 6, 67, 13, + 0, 39, 3, 23, 16, 4, 17, 30, 14, 8, + 37, 16, 21, 23, 6, 51, 22, 38, 26, 28, + 34, 30, 16, 32, 30, 5, 8, 18, 6, 2, + 17, 1, 13, 9, 3, 9, 11, 7, 15, 31, + 13, 15, 31, 33, 35, 0, 6, 5, 7, 17, + 23, 19, 19, 35, 49, 43, 49, 79, 67, 89, + 11, 13, 47, 5, 17, 23, 29, 43, 51, 39, + 45, 47, 31, 41, 55, 41, 47, 2, 48, 36, + 22, 12, 18, 4, 5, 1, 0, 14, 66, 42, + 30, 16, 36, 16, 14, 2, 24, 6, 82, 60, + 46, 32, 44, 10, 1, 7, 7, 13, 86, 42, + 12, 4, 20, 3, 21, 17, 10, 80, 54, 28, + 14, 38, 10, 0, 3, 7, 124, 25, 13, 1, + 5, 11, 5, 6, 10, 2, 8, 14, 20, 19, + 9, 31, 17, 38, 21, 47, 15, 2, 7, 23, + 29, 17, 27, 39, 51, 57, 0, 4, 30, 1, + 13, 4, 6, 8, 13, 5, 0, 1, 29, 17, + 27, 1, 18, 39, 5, 24, 13, 8, 3, 6, + 36, 11, 13, 8, 20, 31, 31, 75, 42, 50, + 64, 24, 14, 28, 12, 12, 16, 1, 4, 4, + 23, 19, 25, 27, 21, 37, 79, 27, 27, 25, + 5, 17, 12, 26, 0, 18, 34, 6, 0, 3, + 5, 41, 27, 25, 59, 35, 51, 49, 77, 69, + 75, 77, 29, 25, 79, 15, 15, 35, 57, 45, + 47, 55, 45, 57, 59, 71, 77, 93, 93, 7, + 23, 53, 13, 16, 12, 28, 30, 36, 66, 42, + 58, 60, 92, 72, 66, 96, 90, 90, 28, 3, + 33, 53, 81, 111, 125, 125, 22, 88, 74, 72, + 54, 68, 34, 26, 30, 10, 19, 7, 18, 3, + 44, 62, 3, 6, 26, 34, 10, 30, 58, 8, + 0, 58, 6, 33, 75, 105, 125, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 37 */ + + 64, 10, 33, 64, 10, 33, 2, 26, 48, 24, + 7, 45, 16, 30, 104, 28, 62, 9, 32, 48, + 1, 25, 0, 51, 83, 0, 7, 125, 125, 121, + 86, 12, 11, 32, 48, 1, 17, 16, 32, 3, + 2, 7, 13, 9, 39, 21, 57, 10, 11, 13, + 3, 29, 21, 49, 26, 2, 1, 9, 16, 6, + 44, 0, 0, 0, 2, 63, 67, 16, 15, 9, + 36, 9, 51, 0, 60, 40, 100, 92, 32, 34, + 8, 42, 6, 2, 8, 23, 27, 31, 27, 48, + 2, 18, 17, 7, 15, 9, 17, 3, 19, 17, + 37, 36, 9, 8, 11, 33, 1, 17, 6, 10, + 4, 4, 22, 16, 15, 3, 6, 7, 4, 15, + 6, 19, 0, 6, 5, 24, 8, 5, 11, 14, + 9, 25, 11, 39, 19, 12, 1, 14, 14, 14, + 62, 30, 8, 5, 16, 5, 15, 6, 69, 13, + 0, 39, 5, 25, 16, 4, 19, 30, 14, 8, + 39, 16, 23, 23, 6, 51, 20, 38, 26, 28, + 34, 30, 16, 32, 30, 5, 8, 18, 6, 2, + 17, 1, 13, 11, 3, 9, 13, 9, 15, 31, + 15, 15, 33, 35, 35, 1, 2, 7, 9, 21, + 27, 23, 23, 39, 53, 47, 53, 83, 69, 91, + 11, 13, 47, 7, 19, 25, 31, 45, 55, 41, + 47, 49, 31, 41, 55, 41, 47, 2, 48, 36, + 22, 12, 18, 6, 3, 0, 2, 14, 66, 42, + 30, 16, 36, 18, 16, 4, 28, 6, 82, 60, + 46, 32, 44, 10, 1, 7, 5, 13, 88, 42, + 10, 4, 20, 3, 21, 15, 10, 80, 52, 26, + 12, 38, 10, 0, 3, 5, 124, 23, 11, 0, + 3, 9, 3, 8, 12, 4, 10, 18, 24, 17, + 7, 31, 17, 40, 21, 47, 13, 4, 7, 25, + 29, 17, 27, 41, 53, 59, 1, 2, 30, 3, + 13, 4, 6, 8, 15, 7, 0, 1, 31, 17, + 27, 1, 18, 39, 7, 24, 13, 8, 5, 4, + 36, 13, 15, 6, 18, 33, 33, 79, 40, 48, + 64, 22, 12, 24, 8, 8, 12, 7, 1, 0, + 29, 23, 27, 33, 27, 43, 87, 33, 33, 29, + 7, 19, 12, 28, 0, 20, 38, 2, 1, 7, + 9, 45, 31, 27, 63, 37, 55, 51, 81, 73, + 77, 79, 29, 25, 81, 17, 19, 39, 61, 47, + 49, 59, 47, 59, 63, 73, 79, 95, 95, 7, + 25, 55, 11, 18, 12, 28, 32, 38, 68, 44, + 60, 62, 96, 74, 68, 98, 94, 90, 24, 7, + 39, 57, 87, 117, 125, 125, 22, 90, 76, 74, + 54, 70, 34, 26, 32, 12, 19, 7, 20, 1, + 46, 64, 1, 8, 26, 34, 10, 32, 60, 8, + 0, 56, 2, 39, 81, 111, 125, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 38 */ + + 62, 10, 35, 62, 10, 35, 6, 28, 50, 24, + 7, 47, 14, 28, 104, 28, 66, 9, 32, 50, + 1, 27, 0, 53, 85, 1, 9, 125, 125, 123, + 92, 14, 11, 32, 50, 1, 15, 18, 34, 5, + 4, 5, 11, 11, 39, 21, 57, 10, 9, 13, + 3, 29, 19, 49, 26, 2, 1, 9, 16, 6, + 44, 0, 0, 0, 4, 63, 67, 16, 15, 11, + 36, 9, 49, 4, 64, 44, 102, 96, 34, 36, + 12, 44, 8, 2, 10, 23, 27, 31, 27, 50, + 2, 18, 15, 5, 13, 9, 15, 3, 21, 19, + 39, 36, 9, 8, 11, 33, 0, 17, 6, 10, + 4, 6, 22, 14, 15, 3, 6, 7, 4, 15, + 4, 19, 0, 4, 7, 22, 8, 5, 11, 14, + 9, 25, 13, 39, 21, 12, 1, 14, 14, 14, + 62, 30, 8, 5, 18, 5, 17, 6, 69, 15, + 0, 41, 5, 27, 16, 4, 21, 30, 14, 8, + 43, 16, 25, 23, 6, 53, 18, 38, 24, 28, + 34, 30, 16, 32, 30, 5, 6, 18, 4, 2, + 17, 3, 15, 11, 5, 11, 15, 11, 17, 31, + 15, 13, 35, 39, 35, 5, 1, 11, 13, 25, + 31, 27, 27, 43, 57, 49, 55, 87, 73, 93, + 11, 13, 49, 9, 21, 27, 35, 49, 57, 43, + 49, 51, 33, 43, 55, 41, 45, 4, 48, 36, + 22, 14, 18, 6, 3, 0, 4, 16, 64, 42, + 30, 16, 38, 18, 18, 6, 32, 6, 84, 60, + 46, 32, 46, 10, 1, 5, 5, 13, 88, 42, + 10, 4, 20, 1, 19, 13, 10, 78, 52, 24, + 12, 38, 10, 2, 1, 5, 124, 23, 9, 0, + 1, 7, 1, 12, 14, 6, 12, 20, 26, 17, + 5, 31, 15, 44, 21, 49, 13, 4, 7, 25, + 31, 17, 29, 43, 55, 61, 1, 2, 30, 3, + 15, 2, 4, 8, 15, 7, 1, 1, 31, 17, + 29, 1, 18, 41, 7, 22, 15, 6, 7, 4, + 38, 13, 17, 6, 18, 33, 33, 81, 38, 46, + 62, 20, 10, 22, 4, 4, 8, 11, 5, 5, + 35, 29, 29, 37, 33, 49, 95, 39, 37, 33, + 9, 21, 12, 30, 0, 24, 44, 1, 5, 11, + 13, 49, 35, 31, 67, 41, 57, 53, 83, 75, + 79, 81, 31, 27, 83, 19, 21, 41, 65, 51, + 53, 61, 51, 63, 65, 77, 83, 99, 97, 9, + 27, 57, 11, 18, 12, 30, 32, 38, 72, 46, + 62, 64, 98, 78, 70, 102, 96, 88, 22, 11, + 43, 63, 91, 123, 125, 125, 24, 90, 76, 74, + 54, 72, 36, 26, 32, 12, 19, 5, 20, 1, + 48, 66, 1, 8, 28, 36, 10, 32, 62, 8, + 0, 54, 1, 43, 87, 119, 125, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 39 */ + + 60, 10, 35, 60, 10, 35, 10, 32, 50, 24, + 9, 51, 10, 24, 104, 28, 72, 11, 34, 50, + 1, 29, 0, 55, 87, 1, 13, 125, 125, 123, + 98, 16, 11, 34, 50, 1, 15, 20, 34, 5, + 4, 5, 11, 11, 41, 19, 57, 10, 9, 11, + 5, 29, 19, 49, 28, 4, 1, 7, 18, 6, + 44, 0, 0, 0, 4, 63, 67, 18, 17, 11, + 36, 9, 49, 6, 68, 46, 106, 100, 36, 38, + 14, 48, 10, 4, 14, 21, 25, 29, 25, 50, + 2, 18, 15, 5, 13, 7, 11, 5, 21, 19, + 39, 36, 9, 8, 9, 33, 0, 17, 8, 10, + 2, 6, 22, 14, 15, 3, 6, 7, 4, 15, + 4, 19, 0, 4, 9, 20, 6, 7, 9, 16, + 9, 27, 13, 37, 21, 12, 3, 14, 14, 14, + 64, 32, 8, 5, 18, 5, 19, 6, 71, 15, + 0, 41, 7, 29, 14, 4, 21, 32, 14, 8, + 45, 14, 27, 23, 6, 53, 18, 38, 24, 28, + 34, 30, 16, 32, 30, 5, 6, 18, 4, 2, + 17, 3, 15, 13, 5, 11, 15, 13, 17, 31, + 17, 13, 37, 41, 35, 7, 5, 15, 17, 29, + 35, 31, 31, 47, 61, 53, 59, 93, 77, 95, + 11, 15, 49, 11, 23, 29, 37, 51, 61, 45, + 51, 51, 33, 43, 55, 41, 45, 4, 48, 36, + 22, 14, 18, 6, 3, 2, 6, 16, 64, 42, + 30, 16, 38, 20, 20, 6, 36, 6, 84, 60, + 46, 32, 46, 10, 1, 5, 3, 13, 88, 40, + 8, 4, 20, 1, 19, 13, 10, 78, 50, 22, + 10, 38, 10, 2, 1, 3, 124, 21, 7, 2, + 2, 5, 0, 14, 18, 8, 16, 24, 30, 15, + 3, 29, 15, 46, 19, 49, 13, 6, 7, 27, + 31, 19, 31, 45, 57, 63, 3, 0, 30, 5, + 15, 2, 4, 8, 17, 9, 1, 3, 33, 17, + 29, 1, 18, 41, 9, 22, 17, 6, 9, 2, + 38, 15, 19, 4, 16, 35, 35, 85, 36, 46, + 62, 18, 6, 18, 0, 0, 4, 17, 11, 9, + 41, 33, 33, 43, 41, 55, 103, 43, 43, 37, + 11, 23, 12, 32, 2, 26, 48, 3, 9, 15, + 15, 53, 39, 33, 71, 43, 61, 57, 87, 79, + 81, 83, 31, 27, 85, 23, 25, 45, 67, 55, + 57, 65, 53, 67, 69, 79, 85, 101, 97, 11, + 27, 59, 11, 20, 14, 30, 34, 40, 74, 48, + 64, 66, 100, 80, 72, 104, 98, 88, 18, 15, + 49, 67, 97, 125, 125, 125, 24, 92, 78, 76, + 56, 74, 36, 28, 34, 14, 17, 5, 22, 0, + 50, 68, 1, 10, 28, 36, 10, 34, 64, 8, + 0, 50, 5, 49, 93, 125, 125, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 40 */ + + 56, 8, 35, 56, 8, 35, 12, 34, 50, 24, + 9, 53, 8, 22, 104, 28, 76, 11, 36, 52, + 1, 31, 0, 57, 91, 3, 17, 125, 125, 125, + 102, 18, 11, 36, 52, 1, 15, 22, 34, 7, + 4, 5, 9, 13, 41, 19, 59, 10, 9, 11, + 5, 31, 19, 49, 28, 4, 1, 7, 18, 6, + 44, 0, 0, 0, 6, 65, 67, 18, 19, 11, + 36, 9, 49, 10, 70, 50, 108, 102, 38, 40, + 16, 50, 12, 6, 18, 21, 25, 29, 23, 50, + 2, 18, 13, 5, 11, 7, 9, 5, 23, 21, + 41, 36, 9, 8, 9, 33, 0, 17, 8, 8, + 2, 6, 22, 12, 15, 3, 6, 7, 2, 15, + 2, 21, 0, 2, 13, 18, 4, 9, 9, 16, + 9, 29, 15, 37, 23, 10, 3, 14, 14, 14, + 66, 32, 8, 7, 18, 7, 21, 4, 73, 15, + 0, 43, 7, 31, 14, 2, 23, 32, 14, 8, + 49, 14, 29, 23, 6, 55, 16, 36, 24, 28, + 34, 28, 14, 32, 28, 7, 4, 18, 4, 0, + 19, 5, 17, 13, 7, 13, 17, 15, 19, 31, + 17, 13, 39, 45, 35, 11, 9, 19, 21, 33, + 41, 35, 35, 51, 65, 57, 63, 97, 81, 97, + 11, 15, 51, 13, 27, 33, 41, 55, 63, 49, + 55, 53, 35, 45, 55, 41, 43, 4, 48, 36, + 22, 14, 18, 6, 3, 2, 8, 18, 64, 42, + 30, 16, 38, 20, 20, 8, 38, 6, 84, 60, + 46, 30, 46, 10, 1, 5, 3, 13, 88, 40, + 8, 4, 20, 1, 19, 11, 10, 76, 48, 20, + 8, 36, 10, 2, 1, 3, 124, 19, 5, 2, + 4, 3, 0, 16, 20, 10, 18, 26, 32, 15, + 3, 29, 13, 48, 19, 51, 13, 6, 7, 27, + 33, 19, 33, 47, 61, 65, 5, 0, 30, 5, + 17, 0, 2, 6, 19, 11, 3, 3, 35, 19, + 31, 1, 18, 43, 11, 20, 19, 6, 11, 0, + 38, 17, 21, 4, 16, 37, 37, 89, 34, 44, + 60, 14, 4, 16, 3, 3, 1, 21, 17, 15, + 47, 39, 35, 49, 47, 61, 111, 49, 47, 41, + 15, 27, 12, 32, 2, 28, 52, 7, 13, 19, + 19, 59, 43, 37, 75, 47, 65, 59, 91, 81, + 83, 85, 33, 29, 87, 25, 27, 49, 71, 59, + 61, 67, 57, 71, 71, 81, 89, 105, 99, 13, + 29, 61, 11, 20, 14, 32, 34, 40, 76, 48, + 66, 68, 102, 82, 74, 106, 100, 86, 14, 19, + 53, 73, 103, 125, 125, 125, 24, 92, 78, 76, + 56, 74, 36, 28, 34, 14, 17, 5, 22, 0, + 52, 70, 1, 10, 30, 38, 10, 34, 64, 8, + 1, 48, 9, 53, 99, 125, 125, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 41 */ + + 54, 8, 35, 54, 8, 35, 16, 36, 52, 24, + 9, 55, 6, 20, 104, 28, 80, 11, 38, 54, + 1, 31, 2, 59, 93, 5, 21, 125, 125, 125, + 108, 22, 11, 38, 54, 1, 13, 24, 36, 9, + 4, 3, 7, 13, 41, 19, 59, 10, 9, 9, + 5, 31, 19, 49, 28, 4, 0, 7, 18, 6, + 44, 0, 0, 0, 8, 65, 67, 18, 21, 11, + 36, 7, 49, 14, 74, 54, 110, 106, 42, 42, + 20, 52, 16, 8, 22, 21, 25, 29, 21, 50, + 2, 18, 11, 5, 9, 5, 7, 5, 23, 21, + 41, 36, 9, 10, 7, 31, 2, 17, 8, 8, + 2, 6, 22, 12, 13, 3, 8, 7, 2, 13, + 0, 21, 0, 0, 15, 18, 2, 11, 7, 16, + 7, 31, 17, 37, 25, 10, 3, 16, 14, 14, + 68, 32, 8, 7, 20, 7, 21, 4, 75, 15, + 0, 45, 7, 33, 14, 2, 25, 32, 14, 8, + 51, 14, 29, 23, 6, 57, 14, 36, 24, 28, + 34, 28, 14, 32, 28, 7, 4, 18, 4, 0, + 19, 5, 17, 13, 7, 13, 19, 15, 19, 31, + 17, 13, 41, 47, 35, 15, 11, 21, 23, 37, + 45, 37, 37, 55, 69, 59, 65, 101, 83, 99, + 11, 15, 53, 15, 29, 35, 43, 57, 65, 51, + 57, 55, 37, 47, 55, 39, 41, 6, 50, 36, + 22, 14, 20, 8, 1, 4, 10, 20, 64, 42, + 30, 16, 40, 20, 22, 10, 42, 6, 84, 60, + 46, 30, 48, 10, 1, 5, 1, 13, 90, 40, + 8, 4, 22, 1, 19, 9, 10, 76, 48, 20, + 6, 36, 10, 2, 0, 1, 124, 17, 3, 4, + 6, 0, 2, 20, 22, 14, 20, 28, 36, 13, + 1, 29, 11, 52, 19, 53, 11, 8, 7, 27, + 35, 19, 33, 49, 63, 67, 5, 0, 30, 5, + 17, 1, 2, 6, 21, 11, 3, 3, 37, 19, + 31, 0, 20, 43, 11, 18, 19, 6, 11, 0, + 38, 19, 23, 4, 16, 39, 37, 91, 34, 42, + 58, 12, 2, 14, 7, 7, 5, 25, 21, 21, + 51, 43, 37, 55, 53, 67, 119, 55, 51, 43, + 17, 29, 12, 34, 2, 30, 56, 11, 15, 23, + 23, 63, 45, 39, 79, 49, 67, 61, 93, 83, + 85, 85, 33, 29, 89, 27, 29, 51, 75, 61, + 63, 69, 59, 73, 73, 83, 91, 107, 101, 13, + 31, 61, 9, 20, 14, 34, 36, 42, 78, 50, + 68, 70, 106, 84, 76, 110, 104, 84, 12, 23, + 57, 79, 109, 125, 125, 125, 26, 92, 78, 78, + 56, 76, 38, 28, 36, 16, 17, 3, 24, 0, + 54, 74, 0, 12, 32, 40, 10, 36, 66, 8, + 1, 46, 13, 57, 103, 125, 125, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 42 */ + + 52, 8, 35, 52, 8, 35, 20, 40, 52, 24, + 11, 59, 2, 16, 104, 28, 86, 13, 40, 54, + 1, 33, 2, 61, 95, 5, 25, 125, 125, 125, + 114, 24, 11, 40, 54, 1, 13, 26, 36, 9, + 4, 3, 7, 13, 43, 17, 59, 10, 9, 7, + 7, 31, 19, 49, 30, 6, 0, 5, 20, 6, + 44, 0, 0, 0, 8, 65, 67, 20, 23, 11, + 36, 7, 49, 16, 78, 56, 114, 110, 44, 44, + 22, 56, 18, 10, 26, 19, 23, 27, 19, 50, + 2, 18, 11, 5, 9, 5, 3, 7, 23, 21, + 41, 36, 9, 10, 7, 31, 2, 17, 10, 8, + 0, 6, 22, 12, 13, 3, 8, 7, 2, 13, + 0, 21, 0, 0, 17, 16, 0, 13, 5, 18, + 7, 33, 17, 35, 25, 10, 5, 16, 14, 14, + 70, 34, 8, 7, 20, 7, 23, 4, 77, 15, + 0, 45, 9, 35, 12, 2, 25, 34, 14, 8, + 53, 12, 31, 23, 6, 57, 14, 36, 24, 28, + 34, 28, 14, 32, 28, 7, 4, 18, 4, 0, + 19, 5, 17, 15, 7, 13, 19, 17, 21, 31, + 19, 13, 43, 49, 35, 17, 15, 25, 27, 41, + 49, 41, 41, 59, 73, 63, 69, 107, 87, 101, + 11, 17, 53, 17, 31, 37, 45, 61, 69, 53, + 59, 55, 37, 47, 55, 39, 41, 6, 50, 36, + 22, 14, 20, 8, 1, 4, 12, 20, 64, 42, + 30, 16, 40, 22, 24, 10, 46, 6, 84, 60, + 46, 30, 48, 10, 1, 5, 0, 13, 90, 38, + 6, 4, 22, 1, 19, 9, 10, 74, 46, 18, + 4, 36, 10, 2, 0, 1, 124, 15, 1, 6, + 10, 2, 4, 22, 26, 16, 24, 32, 40, 11, + 0, 27, 11, 54, 17, 53, 11, 10, 7, 29, + 35, 21, 35, 51, 65, 69, 7, 1, 30, 7, + 19, 1, 2, 6, 23, 13, 3, 5, 39, 19, + 31, 0, 20, 43, 13, 18, 21, 6, 13, 1, + 38, 21, 25, 2, 14, 41, 39, 95, 32, 42, + 58, 10, 1, 10, 11, 11, 9, 31, 27, 25, + 57, 47, 41, 61, 61, 73, 125, 59, 57, 47, + 19, 31, 12, 36, 4, 32, 60, 13, 19, 27, + 25, 67, 49, 43, 83, 51, 71, 65, 97, 87, + 87, 87, 35, 29, 91, 31, 33, 55, 77, 65, + 67, 73, 63, 77, 77, 85, 93, 109, 101, 15, + 31, 63, 9, 22, 16, 34, 36, 44, 80, 52, + 70, 72, 108, 86, 78, 112, 106, 84, 8, 27, + 63, 83, 115, 125, 125, 125, 26, 94, 80, 78, + 58, 78, 38, 30, 36, 16, 15, 3, 26, 2, + 56, 76, 0, 14, 32, 40, 10, 36, 68, 8, + 1, 42, 17, 63, 109, 125, 125, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 43 */ + + 50, 8, 37, 50, 8, 37, 24, 42, 54, 24, + 11, 61, 0, 14, 104, 28, 90, 13, 40, 56, + 1, 35, 2, 63, 97, 7, 27, 125, 125, 125, + 120, 26, 11, 40, 56, 1, 11, 28, 38, 11, + 6, 1, 5, 15, 43, 17, 59, 10, 7, 7, + 7, 31, 17, 49, 30, 6, 0, 5, 20, 6, + 44, 0, 0, 0, 10, 65, 67, 20, 23, 13, + 36, 7, 47, 20, 82, 60, 116, 114, 46, 46, + 26, 58, 20, 10, 28, 19, 23, 27, 19, 52, + 2, 18, 9, 3, 7, 3, 1, 7, 25, 23, + 43, 36, 9, 10, 5, 31, 4, 17, 10, 8, + 0, 8, 22, 10, 13, 3, 8, 7, 2, 13, + 1, 21, 0, 1, 19, 14, 0, 13, 5, 18, + 7, 33, 19, 35, 27, 10, 5, 16, 14, 14, + 70, 34, 8, 7, 22, 7, 25, 4, 77, 17, + 0, 47, 9, 37, 12, 2, 27, 34, 14, 8, + 57, 12, 33, 23, 6, 59, 12, 36, 22, 28, + 34, 28, 14, 32, 28, 7, 2, 18, 2, 0, + 19, 7, 19, 15, 9, 15, 21, 19, 21, 31, + 19, 11, 45, 53, 35, 21, 19, 29, 31, 45, + 53, 45, 45, 63, 77, 65, 71, 111, 91, 103, + 11, 17, 55, 19, 33, 39, 49, 63, 71, 55, + 61, 57, 39, 49, 55, 39, 39, 8, 50, 36, + 22, 16, 20, 8, 1, 6, 14, 22, 62, 42, + 30, 16, 42, 22, 26, 12, 50, 6, 86, 60, + 46, 30, 50, 10, 1, 3, 0, 13, 90, 38, + 6, 4, 22, 0, 17, 7, 10, 74, 46, 16, + 4, 36, 10, 4, 2, 0, 124, 15, 0, 6, + 12, 4, 6, 26, 28, 18, 26, 34, 42, 11, + 2, 27, 9, 58, 17, 55, 11, 10, 7, 29, + 37, 21, 37, 53, 67, 71, 7, 1, 30, 7, + 19, 3, 0, 6, 23, 13, 5, 5, 39, 19, + 33, 0, 20, 45, 13, 16, 23, 4, 15, 1, + 40, 21, 27, 2, 14, 41, 39, 97, 30, 40, + 56, 8, 3, 8, 15, 15, 13, 35, 31, 31, + 63, 53, 43, 65, 67, 79, 125, 65, 61, 51, + 21, 33, 12, 38, 4, 36, 66, 17, 23, 31, + 29, 71, 53, 45, 87, 55, 73, 67, 99, 89, + 89, 89, 35, 31, 93, 33, 35, 57, 81, 69, + 71, 75, 65, 81, 79, 89, 97, 113, 103, 17, + 33, 65, 9, 22, 16, 36, 38, 44, 84, 54, + 72, 74, 110, 90, 80, 116, 108, 82, 6, 31, + 67, 89, 119, 125, 125, 125, 28, 94, 80, 80, + 58, 80, 40, 30, 38, 18, 15, 1, 26, 2, + 58, 78, 0, 14, 34, 42, 10, 38, 70, 8, + 1, 40, 21, 67, 115, 125, 125, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 44 */ + + 46, 8, 37, 46, 8, 37, 26, 46, 54, 24, + 13, 65, 3, 12, 104, 28, 94, 13, 42, 58, + 1, 37, 2, 65, 99, 7, 31, 125, 125, 125, + 124, 28, 11, 42, 58, 1, 11, 30, 38, 11, + 6, 1, 3, 15, 45, 17, 59, 10, 7, 5, + 7, 33, 17, 49, 32, 6, 0, 5, 22, 6, + 44, 0, 0, 0, 10, 67, 67, 22, 25, 13, + 36, 7, 47, 24, 84, 62, 120, 116, 48, 48, + 28, 60, 22, 12, 32, 17, 23, 25, 17, 52, + 2, 18, 9, 3, 7, 3, 2, 7, 25, 23, + 43, 36, 9, 10, 5, 31, 4, 17, 12, 8, + 0, 8, 22, 10, 13, 3, 8, 7, 2, 13, + 3, 23, 0, 3, 21, 12, 1, 15, 3, 20, + 7, 35, 19, 33, 27, 10, 5, 16, 14, 14, + 72, 34, 8, 9, 22, 9, 27, 2, 79, 17, + 0, 47, 11, 39, 12, 0, 29, 34, 14, 8, + 59, 12, 35, 23, 6, 59, 10, 36, 22, 28, + 34, 28, 14, 32, 26, 7, 2, 18, 2, 0, + 21, 7, 19, 17, 9, 15, 23, 21, 23, 31, + 21, 11, 47, 55, 35, 23, 23, 33, 35, 49, + 57, 49, 49, 67, 81, 69, 75, 115, 95, 105, + 11, 17, 55, 21, 35, 41, 51, 67, 75, 57, + 65, 59, 39, 49, 55, 39, 39, 8, 50, 36, + 22, 16, 20, 8, 1, 6, 16, 22, 62, 42, + 30, 16, 42, 24, 26, 14, 54, 6, 86, 60, + 46, 30, 50, 10, 1, 3, 2, 13, 90, 38, + 4, 4, 22, 0, 17, 5, 10, 72, 44, 14, + 2, 36, 10, 4, 2, 0, 124, 13, 2, 8, + 14, 6, 8, 28, 30, 20, 28, 38, 46, 9, + 2, 27, 9, 60, 17, 55, 11, 12, 7, 31, + 37, 21, 39, 55, 69, 73, 9, 3, 30, 9, + 21, 3, 0, 6, 25, 15, 5, 5, 41, 21, + 33, 0, 20, 45, 15, 16, 25, 4, 17, 3, + 40, 23, 29, 0, 12, 43, 41, 101, 28, 38, + 56, 6, 5, 4, 19, 19, 17, 41, 37, 35, + 69, 57, 45, 71, 73, 85, 125, 71, 67, 55, + 23, 37, 12, 40, 4, 38, 70, 21, 27, 35, + 33, 75, 57, 49, 91, 57, 77, 69, 103, 93, + 91, 91, 37, 31, 95, 35, 39, 61, 85, 73, + 75, 79, 69, 85, 83, 91, 99, 115, 105, 19, + 35, 67, 9, 24, 16, 36, 38, 46, 86, 54, + 74, 76, 112, 92, 82, 118, 110, 82, 2, 35, + 73, 93, 125, 125, 125, 125, 28, 96, 82, 80, + 58, 80, 40, 30, 38, 18, 15, 1, 28, 4, + 60, 80, 0, 16, 34, 42, 10, 38, 72, 8, + 1, 38, 25, 73, 121, 125, 125, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 45 */ + + 44, 8, 37, 44, 8, 37, 30, 48, 54, 24, + 13, 67, 5, 8, 104, 28, 100, 15, 44, 58, + 1, 37, 4, 67, 101, 9, 35, 125, 125, 125, + 124, 32, 11, 44, 58, 1, 11, 32, 38, 13, + 6, 1, 3, 15, 45, 15, 59, 10, 7, 3, + 9, 33, 17, 49, 32, 8, 2, 3, 22, 6, + 44, 0, 0, 0, 12, 67, 67, 22, 27, 13, + 36, 5, 47, 26, 88, 66, 122, 120, 52, 50, + 30, 64, 24, 14, 36, 17, 21, 25, 15, 52, + 2, 18, 7, 3, 5, 1, 4, 9, 25, 23, + 43, 36, 9, 12, 3, 31, 4, 17, 12, 8, + 1, 8, 22, 10, 11, 3, 10, 7, 2, 13, + 3, 23, 0, 3, 23, 12, 3, 17, 1, 20, + 7, 37, 21, 33, 29, 10, 7, 16, 14, 14, + 74, 36, 8, 9, 22, 9, 29, 2, 81, 17, + 0, 49, 11, 41, 10, 0, 29, 36, 14, 8, + 61, 10, 37, 23, 6, 61, 10, 36, 22, 28, + 34, 28, 14, 32, 26, 7, 2, 18, 2, 0, + 21, 7, 19, 17, 9, 15, 23, 23, 23, 31, + 21, 11, 49, 57, 35, 27, 27, 35, 37, 53, + 61, 53, 53, 71, 85, 73, 79, 121, 97, 107, + 11, 19, 57, 23, 37, 43, 53, 69, 77, 59, + 67, 59, 41, 51, 55, 39, 37, 8, 50, 36, + 22, 16, 20, 10, 0, 8, 18, 24, 62, 42, + 30, 16, 42, 24, 28, 14, 58, 6, 86, 60, + 46, 30, 50, 10, 1, 3, 4, 13, 92, 36, + 4, 4, 22, 0, 17, 5, 10, 72, 42, 12, + 0, 36, 10, 4, 2, 2, 124, 11, 4, 10, + 18, 8, 10, 30, 34, 22, 32, 40, 50, 7, + 4, 25, 7, 62, 15, 57, 9, 14, 7, 31, + 39, 23, 39, 57, 71, 75, 11, 3, 30, 9, + 21, 5, 0, 6, 27, 17, 5, 7, 43, 21, + 33, 0, 20, 45, 17, 14, 25, 4, 19, 5, + 40, 25, 31, 0, 12, 45, 43, 105, 26, 38, + 54, 4, 9, 2, 23, 23, 21, 45, 43, 41, + 75, 61, 49, 77, 81, 91, 125, 75, 71, 59, + 25, 39, 12, 42, 6, 40, 74, 23, 29, 39, + 35, 79, 61, 51, 95, 59, 81, 73, 107, 95, + 93, 93, 37, 31, 97, 39, 41, 65, 87, 75, + 77, 81, 71, 87, 85, 93, 101, 117, 105, 19, + 35, 69, 7, 24, 18, 38, 40, 48, 88, 56, + 76, 78, 116, 94, 84, 120, 114, 80, 1, 39, + 77, 99, 125, 125, 125, 125, 28, 96, 82, 82, + 60, 82, 40, 32, 40, 20, 13, 1, 30, 4, + 62, 82, 2, 18, 36, 44, 10, 40, 74, 8, + 1, 34, 29, 77, 125, 125, 125, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 46 */ + + 42, 8, 37, 42, 8, 37, 34, 52, 56, 24, + 15, 71, 9, 6, 104, 28, 104, 15, 46, 60, + 1, 39, 4, 69, 103, 9, 39, 125, 125, 125, + 124, 34, 11, 46, 60, 1, 9, 34, 40, 13, + 6, 0, 1, 17, 47, 15, 59, 10, 7, 3, + 9, 33, 17, 49, 34, 8, 2, 3, 24, 6, + 44, 0, 0, 0, 12, 67, 67, 24, 29, 13, + 36, 5, 47, 30, 92, 68, 124, 124, 54, 52, + 34, 66, 26, 16, 40, 15, 21, 23, 13, 52, + 2, 18, 7, 3, 5, 1, 8, 9, 27, 25, + 45, 36, 9, 12, 3, 31, 6, 17, 14, 8, + 1, 8, 22, 8, 11, 3, 10, 7, 2, 13, + 5, 23, 0, 5, 25, 10, 5, 19, 1, 22, + 7, 39, 21, 31, 29, 10, 7, 16, 14, 14, + 76, 36, 8, 9, 24, 9, 31, 2, 83, 17, + 0, 49, 13, 43, 10, 0, 31, 36, 14, 8, + 65, 10, 39, 23, 6, 61, 8, 36, 22, 28, + 34, 28, 14, 32, 26, 7, 0, 18, 2, 0, + 21, 9, 21, 19, 11, 17, 25, 25, 25, 31, + 23, 11, 51, 61, 35, 29, 31, 39, 41, 57, + 65, 57, 57, 75, 89, 75, 81, 125, 101, 109, + 11, 19, 57, 25, 39, 45, 57, 73, 81, 61, + 69, 61, 41, 51, 55, 39, 37, 10, 50, 36, + 22, 16, 20, 10, 0, 8, 20, 24, 62, 42, + 30, 16, 44, 26, 30, 16, 62, 6, 86, 60, + 46, 30, 52, 10, 1, 3, 4, 13, 92, 36, + 2, 4, 22, 0, 17, 3, 10, 70, 42, 10, + 1, 36, 10, 4, 4, 2, 124, 9, 6, 10, + 20, 10, 12, 34, 36, 24, 34, 44, 52, 7, + 6, 25, 7, 66, 15, 57, 9, 14, 7, 33, + 39, 23, 41, 59, 73, 77, 11, 5, 30, 11, + 23, 5, 1, 6, 29, 17, 7, 7, 45, 21, + 35, 0, 20, 47, 17, 14, 27, 4, 21, 5, + 40, 27, 33, 1, 10, 47, 43, 107, 24, 36, + 54, 2, 11, 1, 27, 27, 25, 51, 47, 45, + 81, 67, 51, 83, 87, 97, 125, 81, 77, 63, + 27, 41, 12, 44, 6, 42, 78, 27, 33, 43, + 39, 83, 65, 55, 99, 63, 83, 75, 109, 99, + 95, 95, 39, 33, 99, 41, 45, 67, 91, 79, + 81, 85, 75, 91, 89, 95, 105, 121, 107, 21, + 37, 71, 7, 26, 18, 38, 40, 48, 90, 58, + 78, 80, 118, 96, 86, 124, 116, 80, 3, 43, + 83, 103, 125, 125, 125, 125, 30, 98, 84, 82, + 60, 84, 42, 32, 40, 20, 13, 0, 30, 6, + 64, 84, 2, 18, 36, 44, 10, 40, 76, 8, + 1, 32, 33, 83, 125, 125, 125, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 47 */ + + 40, 8, 37, 40, 8, 37, 38, 54, 56, 24, + 15, 73, 11, 4, 104, 28, 108, 15, 48, 62, + 1, 41, 4, 71, 105, 11, 43, 125, 125, 125, + 124, 36, 11, 48, 62, 1, 9, 36, 40, 15, + 6, 0, 0, 17, 47, 15, 59, 10, 7, 1, + 9, 33, 17, 49, 34, 8, 2, 3, 24, 6, + 44, 0, 0, 0, 14, 67, 67, 24, 31, 13, + 36, 5, 47, 34, 96, 72, 124, 124, 56, 54, + 36, 68, 28, 18, 44, 15, 21, 23, 11, 52, + 2, 18, 5, 3, 3, 0, 10, 9, 27, 25, + 45, 36, 9, 12, 1, 31, 6, 17, 14, 8, + 1, 8, 22, 8, 11, 3, 10, 7, 2, 13, + 7, 23, 0, 7, 27, 8, 7, 21, 0, 22, + 7, 41, 23, 31, 31, 10, 7, 16, 14, 14, + 78, 36, 8, 9, 24, 9, 33, 2, 85, 17, + 0, 51, 13, 45, 10, 0, 33, 36, 14, 8, + 67, 10, 41, 23, 6, 63, 6, 36, 22, 28, + 34, 28, 14, 32, 26, 7, 0, 18, 2, 0, + 21, 9, 21, 19, 11, 17, 27, 27, 25, 31, + 23, 11, 53, 63, 35, 33, 35, 43, 45, 61, + 69, 61, 61, 79, 93, 79, 85, 125, 105, 111, + 11, 19, 59, 27, 41, 47, 59, 75, 83, 63, + 71, 63, 43, 53, 55, 39, 35, 10, 50, 36, + 22, 16, 20, 10, 0, 10, 22, 26, 62, 42, + 30, 16, 44, 26, 32, 18, 66, 6, 86, 60, + 46, 30, 52, 10, 1, 3, 6, 13, 92, 36, + 2, 4, 22, 0, 17, 1, 10, 70, 40, 8, + 3, 36, 10, 4, 4, 4, 124, 7, 8, 12, + 22, 12, 14, 36, 38, 26, 36, 46, 56, 5, + 8, 25, 5, 68, 15, 59, 9, 16, 7, 33, + 41, 23, 43, 61, 75, 79, 13, 5, 30, 11, + 23, 7, 1, 6, 31, 19, 7, 7, 47, 21, + 35, 0, 20, 47, 19, 12, 29, 4, 23, 7, + 40, 29, 35, 1, 10, 49, 45, 111, 22, 34, + 52, 0, 13, 3, 31, 31, 29, 55, 53, 51, + 87, 71, 53, 89, 93, 103, 125, 87, 81, 67, + 29, 43, 12, 46, 6, 44, 82, 31, 37, 47, + 43, 87, 69, 57, 103, 65, 87, 77, 113, 101, + 97, 97, 39, 33, 101, 43, 47, 71, 95, 83, + 85, 87, 77, 95, 91, 97, 107, 123, 109, 23, + 39, 73, 7, 26, 18, 40, 42, 50, 92, 60, + 80, 82, 120, 98, 88, 124, 118, 78, 7, 47, + 87, 109, 125, 125, 125, 125, 30, 98, 84, 84, + 60, 86, 42, 32, 42, 22, 13, 0, 32, 6, + 66, 86, 2, 20, 38, 46, 10, 42, 78, 8, + 1, 30, 37, 87, 125, 125, 125, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 48 */ + + 36, 6, 39, 36, 6, 39, 40, 56, 56, 24, + 17, 77, 15, 0, 102, 28, 112, 17, 48, 62, + 3, 43, 4, 75, 109, 13, 47, 125, 125, 125, + 124, 38, 13, 48, 62, 3, 9, 38, 40, 17, + 6, 0, 0, 19, 49, 15, 61, 10, 7, 1, + 11, 35, 17, 49, 34, 8, 2, 3, 24, 4, + 44, 0, 0, 0, 14, 69, 67, 24, 33, 15, + 36, 5, 47, 36, 98, 74, 124, 124, 58, 56, + 38, 70, 30, 18, 46, 15, 21, 23, 11, 52, + 2, 18, 5, 3, 3, 0, 12, 11, 29, 27, + 47, 36, 9, 12, 1, 31, 6, 17, 14, 6, + 3, 8, 20, 6, 11, 5, 10, 7, 0, 13, + 9, 25, 1, 9, 31, 6, 9, 23, 0, 22, + 7, 43, 25, 31, 33, 8, 9, 16, 14, 14, + 78, 36, 8, 11, 24, 11, 35, 0, 87, 19, + 1, 53, 15, 47, 8, 1, 35, 36, 14, 8, + 71, 8, 43, 23, 6, 65, 4, 34, 20, 28, + 34, 26, 12, 32, 24, 9, 1, 18, 0, 1, + 23, 11, 23, 21, 13, 19, 29, 29, 27, 31, + 25, 11, 55, 67, 35, 37, 39, 47, 49, 65, + 75, 65, 65, 83, 97, 83, 89, 125, 109, 115, + 13, 21, 61, 29, 45, 51, 63, 79, 87, 67, + 75, 65, 45, 55, 55, 39, 35, 10, 50, 36, + 22, 16, 20, 10, 0, 10, 24, 26, 60, 42, + 30, 16, 44, 26, 32, 18, 68, 4, 86, 60, + 44, 28, 52, 10, 1, 3, 6, 13, 92, 34, + 0, 2, 22, 0, 17, 1, 10, 68, 38, 6, + 5, 34, 10, 4, 4, 4, 124, 7, 10, 12, + 24, 14, 14, 38, 40, 28, 38, 48, 58, 5, + 8, 25, 5, 70, 15, 61, 9, 16, 7, 35, + 43, 25, 45, 63, 79, 83, 15, 7, 30, 13, + 25, 9, 3, 4, 33, 21, 9, 9, 49, 23, + 37, 0, 20, 49, 21, 10, 31, 2, 25, 9, + 40, 31, 39, 3, 8, 51, 47, 115, 20, 32, + 50, 3, 17, 7, 35, 35, 35, 61, 59, 57, + 93, 77, 57, 95, 101, 109, 125, 93, 87, 71, + 33, 47, 12, 46, 6, 46, 86, 35, 41, 51, + 47, 93, 73, 61, 109, 69, 91, 81, 117, 105, + 99, 99, 41, 35, 105, 47, 51, 75, 99, 87, + 89, 91, 81, 99, 95, 101, 111, 125, 111, 25, + 41, 75, 7, 26, 18, 40, 42, 50, 94, 60, + 82, 82, 122, 100, 90, 124, 120, 76, 11, 53, + 93, 115, 125, 125, 125, 125, 30, 98, 84, 84, + 60, 86, 42, 32, 42, 22, 13, 0, 32, 6, + 68, 88, 2, 20, 38, 46, 10, 42, 78, 8, + 3, 26, 43, 93, 125, 125, 125, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 49 */ + + 34, 6, 39, 34, 6, 39, 44, 60, 58, 26, + 17, 79, 17, 1, 102, 28, 118, 17, 50, 64, + 3, 43, 6, 77, 111, 13, 49, 125, 125, 125, + 124, 42, 13, 50, 64, 3, 7, 42, 42, 17, + 8, 2, 2, 19, 49, 13, 61, 10, 5, 0, + 11, 35, 15, 49, 36, 10, 4, 1, 26, 4, + 44, 0, 0, 0, 16, 69, 67, 26, 33, 15, + 36, 3, 45, 40, 102, 78, 124, 124, 62, 58, + 42, 74, 34, 20, 50, 13, 19, 21, 9, 54, + 4, 20, 3, 1, 1, 2, 16, 11, 29, 27, + 47, 38, 9, 14, 0, 29, 8, 15, 16, 6, + 3, 10, 20, 6, 9, 5, 12, 7, 0, 11, + 9, 25, 1, 9, 33, 6, 9, 23, 2, 24, + 5, 43, 25, 29, 33, 8, 9, 18, 14, 14, + 80, 38, 10, 11, 26, 11, 35, 0, 87, 19, + 1, 53, 15, 49, 8, 1, 35, 38, 16, 8, + 73, 8, 43, 21, 6, 65, 4, 34, 20, 28, + 34, 26, 12, 32, 24, 9, 1, 18, 0, 1, + 23, 11, 23, 21, 13, 19, 29, 29, 27, 29, + 25, 9, 55, 69, 33, 39, 41, 49, 51, 69, + 79, 67, 67, 87, 99, 85, 91, 125, 111, 117, + 13, 21, 61, 31, 47, 53, 65, 81, 89, 69, + 77, 65, 45, 55, 53, 37, 33, 12, 52, 36, + 22, 18, 22, 12, 2, 12, 28, 28, 60, 42, + 30, 16, 46, 28, 34, 20, 72, 4, 88, 62, + 44, 28, 54, 10, 1, 1, 8, 13, 94, 34, + 0, 2, 24, 2, 15, 0, 10, 68, 38, 6, + 5, 34, 10, 6, 6, 6, 124, 5, 12, 14, + 28, 18, 16, 42, 44, 32, 42, 52, 62, 3, + 10, 23, 3, 74, 13, 61, 7, 18, 5, 35, + 43, 25, 45, 63, 81, 85, 15, 7, 32, 13, + 25, 9, 3, 4, 33, 21, 9, 9, 49, 23, + 37, 2, 22, 49, 21, 10, 31, 2, 25, 9, + 42, 31, 41, 3, 8, 51, 47, 117, 20, 32, + 50, 5, 19, 9, 39, 39, 39, 65, 63, 61, + 97, 81, 59, 99, 107, 115, 125, 97, 91, 73, + 35, 49, 12, 48, 8, 50, 92, 37, 43, 53, + 49, 97, 75, 63, 113, 71, 93, 83, 119, 107, + 101, 99, 41, 35, 107, 49, 53, 77, 101, 89, + 91, 93, 83, 101, 97, 103, 113, 125, 111, 25, + 41, 75, 5, 28, 20, 42, 44, 52, 98, 62, + 84, 84, 124, 104, 92, 124, 124, 76, 13, 57, + 97, 119, 125, 125, 125, 125, 32, 100, 86, 86, + 62, 88, 44, 34, 44, 24, 11, 2, 34, 8, + 72, 92, 4, 22, 40, 48, 12, 44, 80, 8, + 3, 24, 47, 97, 125, 125, 125, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 50 */ + + 32, 6, 39, 32, 6, 39, 48, 62, 58, 26, + 17, 81, 19, 3, 102, 28, 122, 17, 52, 66, + 3, 45, 6, 79, 113, 15, 53, 125, 125, 125, + 124, 44, 13, 52, 66, 3, 7, 44, 42, 19, + 8, 2, 4, 19, 49, 13, 61, 10, 5, 2, + 11, 35, 15, 49, 36, 10, 4, 1, 26, 4, + 44, 0, 0, 0, 18, 69, 67, 26, 35, 15, + 36, 3, 45, 44, 106, 82, 124, 124, 64, 60, + 44, 76, 36, 22, 54, 13, 19, 21, 7, 54, + 4, 20, 1, 1, 0, 2, 18, 11, 29, 27, + 47, 38, 9, 14, 0, 29, 8, 15, 16, 6, + 3, 10, 20, 6, 9, 5, 12, 7, 0, 11, + 11, 25, 1, 11, 35, 4, 11, 25, 4, 24, + 5, 45, 27, 29, 35, 8, 9, 18, 14, 14, + 82, 38, 10, 11, 26, 11, 37, 0, 89, 19, + 1, 55, 15, 51, 8, 1, 37, 38, 16, 8, + 75, 8, 45, 21, 6, 67, 2, 34, 20, 28, + 34, 26, 12, 32, 24, 9, 1, 18, 0, 1, + 23, 11, 23, 21, 13, 19, 31, 31, 29, 29, + 25, 9, 57, 71, 33, 43, 45, 53, 55, 73, + 83, 71, 71, 91, 103, 89, 95, 125, 115, 119, + 13, 21, 63, 33, 49, 55, 67, 85, 91, 71, + 79, 67, 47, 57, 53, 37, 31, 12, 52, 36, + 22, 18, 22, 12, 2, 12, 30, 30, 60, 42, + 30, 16, 46, 28, 36, 22, 76, 4, 88, 62, + 44, 28, 54, 10, 1, 1, 10, 13, 94, 34, + 0, 2, 24, 2, 15, 2, 10, 66, 36, 4, + 7, 34, 10, 6, 6, 6, 124, 3, 14, 16, + 30, 20, 18, 44, 46, 34, 44, 54, 66, 1, + 12, 23, 1, 76, 13, 63, 7, 20, 5, 35, + 45, 25, 47, 65, 83, 87, 17, 7, 32, 13, + 27, 11, 3, 4, 35, 23, 9, 9, 51, 23, + 37, 2, 22, 49, 23, 8, 33, 2, 27, 11, + 42, 33, 43, 3, 8, 53, 49, 121, 18, 30, + 48, 7, 21, 11, 43, 43, 43, 69, 69, 67, + 103, 85, 61, 105, 113, 121, 125, 103, 95, 77, + 37, 51, 12, 50, 8, 52, 96, 41, 47, 57, + 53, 101, 79, 67, 117, 73, 97, 85, 123, 109, + 103, 101, 43, 35, 109, 51, 55, 81, 105, 93, + 95, 95, 87, 105, 99, 105, 115, 125, 113, 27, + 43, 77, 5, 28, 20, 44, 44, 54, 100, 64, + 86, 86, 124, 106, 94, 124, 124, 74, 17, 61, + 101, 125, 125, 125, 125, 125, 32, 100, 86, 86, + 62, 90, 44, 34, 44, 24, 11, 2, 36, 8, + 74, 94, 4, 24, 42, 50, 12, 44, 82, 8, + 3, 22, 51, 101, 125, 125, 125, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 51 */ + + 30, 6, 39, 30, 6, 39, 52, 66, 60, 26, + 19, 85, 23, 5, 102, 28, 124, 17, 54, 68, + 3, 47, 6, 81, 115, 15, 57, 125, 125, 125, + 124, 46, 13, 54, 68, 3, 5, 46, 44, 19, + 8, 4, 6, 21, 51, 13, 61, 10, 5, 2, + 11, 35, 15, 49, 38, 10, 4, 1, 28, 4, + 44, 0, 0, 0, 18, 69, 67, 28, 37, 15, + 36, 3, 45, 48, 110, 84, 124, 124, 66, 62, + 48, 78, 38, 24, 58, 11, 19, 19, 5, 54, + 4, 20, 1, 1, 0, 4, 22, 11, 31, 29, + 49, 38, 9, 14, 2, 29, 10, 15, 18, 6, + 3, 10, 20, 4, 9, 5, 12, 7, 0, 11, + 13, 25, 1, 13, 37, 2, 13, 27, 4, 26, + 5, 47, 27, 27, 35, 8, 9, 18, 14, 14, + 84, 38, 10, 11, 28, 11, 39, 0, 91, 19, + 1, 55, 17, 53, 8, 1, 39, 38, 16, 8, + 79, 8, 47, 21, 6, 67, 0, 34, 20, 28, + 34, 26, 12, 32, 24, 9, 3, 18, 0, 1, + 23, 13, 25, 23, 15, 21, 33, 33, 29, 29, + 27, 9, 59, 75, 33, 45, 49, 57, 59, 77, + 87, 75, 75, 95, 107, 91, 97, 125, 119, 121, + 13, 21, 63, 35, 51, 57, 71, 87, 95, 73, + 81, 69, 47, 57, 53, 37, 31, 14, 52, 36, + 22, 18, 22, 12, 2, 14, 32, 30, 60, 42, + 30, 16, 48, 30, 38, 24, 80, 4, 88, 62, + 44, 28, 56, 10, 1, 1, 10, 13, 94, 34, + 1, 2, 24, 2, 15, 4, 10, 66, 36, 2, + 9, 34, 10, 6, 8, 8, 124, 1, 16, 16, + 32, 22, 20, 48, 48, 36, 46, 58, 68, 1, + 14, 23, 1, 80, 13, 63, 7, 20, 5, 37, + 45, 25, 49, 67, 85, 89, 17, 9, 32, 15, + 27, 11, 5, 4, 37, 23, 11, 9, 53, 23, + 39, 2, 22, 51, 23, 8, 35, 2, 29, 11, + 42, 35, 45, 5, 6, 55, 49, 123, 16, 28, + 48, 9, 23, 15, 47, 47, 47, 75, 73, 71, + 109, 91, 63, 111, 119, 125, 125, 109, 101, 81, + 39, 53, 12, 52, 8, 54, 100, 45, 51, 61, + 57, 105, 83, 69, 121, 77, 99, 87, 125, 113, + 105, 103, 43, 37, 111, 53, 59, 83, 109, 97, + 99, 99, 89, 109, 103, 107, 119, 125, 115, 29, + 45, 79, 5, 30, 20, 44, 46, 54, 102, 66, + 88, 88, 124, 108, 96, 124, 124, 74, 19, 65, + 107, 125, 125, 125, 125, 125, 34, 102, 88, 88, + 62, 92, 46, 34, 46, 26, 11, 4, 36, 10, + 76, 96, 4, 24, 42, 50, 12, 46, 84, 8, + 3, 20, 55, 107, 125, 125, 125, 125, 125, 125, + }, + + }, + + { + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 0 */ + + 124, 18, 21, 124, 18, 21, 125, 81, 20, 18, + 24, 76, 124, 124, 108, 44, 109, 3, 15, 31, + 22, 26, 13, 18, 58, 82, 124, 122, 54, 11, + 125, 75, 25, 15, 31, 22, 11, 53, 22, 40, + 11, 37, 65, 8, 23, 47, 73, 14, 21, 43, + 8, 35, 45, 63, 5, 27, 13, 45, 17, 4, + 44, 0, 0, 0, 39, 45, 67, 17, 44, 2, + 96, 24, 33, 125, 55, 65, 35, 69, 77, 67, + 111, 71, 93, 77, 125, 33, 51, 61, 57, 48, + 3, 41, 125, 19, 81, 55, 125, 16, 14, 16, + 4, 20, 9, 21, 49, 79, 55, 51, 57, 25, + 47, 93, 83, 29, 97, 71, 125, 125, 125, 125, + 5, 29, 15, 17, 8, 16, 13, 23, 51, 111, + 23, 86, 82, 125, 18, 4, 10, 6, 4, 7, + 41, 21, 3, 22, 12, 4, 11, 13, 16, 15, + 10, 4, 44, 76, 62, 40, 32, 38, 24, 34, + 50, 5, 50, 42, 58, 51, 36, 70, 64, 124, + 124, 96, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 106, 124, 124, 124, 124, 124, 124, 124, + 112, 124, 124, 124, 54, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 106, 90, 76, 44, + 23, 17, 27, 56, 64, 56, 66, 36, 42, 36, + 74, 18, 5, 14, 19, 7, 105, 97, 15, 4, + 20, 5, 27, 33, 41, 47, 125, 75, 48, 20, + 4, 23, 27, 55, 87, 95, 117, 25, 38, 22, + 12, 10, 17, 11, 11, 21, 45, 5, 58, 62, + 64, 22, 16, 7, 19, 51, 22, 118, 110, 110, + 88, 52, 4, 19, 13, 29, 124, 125, 121, 93, + 125, 121, 83, 115, 107, 77, 107, 105, 117, 63, + 73, 63, 95, 101, 51, 33, 37, 43, 35, 17, + 1, 7, 14, 11, 11, 11, 11, 7, 27, 1, + 4, 7, 1, 12, 3, 5, 2, 24, 5, 15, + 23, 13, 17, 6, 52, 32, 56, 52, 44, 44, + 30, 44, 44, 8, 26, 46, 5, 26, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 108, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 98, 74, 52, 16, 3, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 86, + 66, 38, 30, 28, 36, 82, 82, 84, 86, 70, + 78, 58, 42, 48, 26, 13, 18, 15, 39, 62, + 28, 18, 43, 35, 27, 35, 33, 19, 21, 39, + 15, 7, 4, 5, 5, 8, 8, 124, 124, 124, + 124, 124, 120, 106, 72, 12, 15, 78, 54, 42, + 22, 12, 0, 3, 7, 37, 35, 25, 17, 29, + 17, 9, 13, 25, 5, 2, 12, 4, 6, 18, + 10, 124, 124, 124, 124, 124, 120, 106, 72, 12, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 1 */ + + 124, 18, 21, 124, 18, 21, 123, 77, 22, 20, + 24, 74, 122, 124, 110, 44, 105, 3, 13, 29, + 22, 26, 11, 18, 56, 80, 122, 116, 50, 13, + 121, 73, 23, 13, 29, 22, 11, 51, 22, 40, + 9, 35, 63, 8, 23, 45, 71, 14, 19, 41, + 8, 33, 43, 61, 3, 25, 13, 43, 15, 4, + 44, 0, 0, 0, 37, 45, 67, 15, 44, 2, + 96, 24, 33, 121, 51, 61, 31, 63, 73, 63, + 107, 67, 89, 73, 121, 33, 49, 59, 55, 48, + 3, 39, 121, 17, 79, 53, 123, 16, 14, 16, + 4, 22, 9, 19, 47, 77, 53, 49, 55, 23, + 45, 89, 79, 27, 93, 67, 117, 117, 119, 121, + 3, 27, 13, 15, 8, 18, 11, 21, 49, 105, + 21, 82, 80, 121, 18, 6, 10, 8, 6, 5, + 37, 19, 1, 22, 12, 4, 9, 11, 14, 13, + 10, 4, 44, 74, 62, 40, 32, 38, 24, 34, + 48, 3, 50, 42, 58, 51, 36, 70, 64, 124, + 124, 94, 124, 124, 124, 122, 124, 124, 124, 124, + 124, 124, 104, 124, 124, 124, 124, 124, 124, 124, + 108, 124, 120, 124, 52, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 122, 104, 88, 74, 42, + 23, 17, 27, 56, 62, 54, 64, 34, 40, 34, + 72, 16, 5, 12, 19, 7, 103, 93, 13, 6, + 20, 3, 25, 31, 39, 45, 121, 71, 50, 22, + 6, 21, 25, 51, 83, 91, 113, 23, 40, 24, + 14, 12, 15, 9, 9, 19, 43, 5, 60, 62, + 64, 22, 18, 5, 19, 49, 22, 118, 110, 108, + 86, 52, 6, 17, 11, 27, 124, 121, 117, 89, + 121, 117, 79, 111, 103, 73, 103, 101, 111, 61, + 71, 61, 91, 97, 49, 31, 35, 41, 33, 15, + 1, 7, 14, 11, 11, 11, 9, 5, 25, 0, + 4, 5, 0, 12, 1, 3, 2, 24, 3, 13, + 21, 11, 15, 6, 50, 32, 54, 52, 44, 44, + 30, 44, 44, 8, 26, 44, 5, 24, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 104, 124, 124, 124, 124, 124, 124, 124, + 122, 124, 96, 72, 50, 16, 3, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 84, + 64, 36, 30, 28, 34, 80, 80, 82, 82, 68, + 76, 56, 40, 46, 24, 13, 16, 15, 39, 60, + 26, 16, 41, 33, 25, 33, 29, 15, 19, 37, + 13, 5, 6, 3, 3, 8, 8, 124, 124, 124, + 124, 120, 112, 98, 64, 8, 13, 78, 56, 44, + 24, 14, 2, 1, 5, 35, 33, 23, 15, 27, + 15, 7, 11, 23, 3, 4, 12, 6, 8, 18, + 10, 124, 124, 124, 124, 120, 112, 98, 64, 8, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 2 */ + + 124, 18, 21, 124, 18, 21, 119, 75, 22, 20, + 24, 72, 118, 122, 110, 44, 101, 3, 13, 27, + 22, 24, 11, 16, 52, 78, 116, 108, 44, 17, + 115, 71, 23, 13, 27, 22, 11, 49, 22, 38, + 9, 35, 61, 8, 23, 45, 71, 14, 19, 41, + 8, 33, 43, 61, 3, 25, 13, 43, 15, 4, + 44, 0, 0, 0, 35, 45, 67, 15, 42, 2, + 94, 24, 33, 117, 49, 59, 27, 59, 71, 61, + 103, 65, 87, 71, 117, 33, 49, 59, 55, 48, + 3, 37, 117, 17, 77, 51, 119, 16, 14, 16, + 2, 22, 9, 19, 45, 75, 51, 47, 53, 23, + 43, 87, 77, 25, 91, 65, 107, 109, 113, 115, + 3, 27, 13, 15, 8, 18, 11, 21, 49, 101, + 21, 78, 76, 115, 18, 6, 10, 8, 6, 5, + 33, 17, 1, 22, 12, 4, 7, 9, 12, 13, + 10, 4, 42, 72, 60, 40, 30, 38, 24, 34, + 46, 3, 48, 40, 56, 51, 36, 68, 62, 124, + 124, 92, 120, 124, 124, 118, 124, 124, 124, 124, + 124, 124, 100, 124, 124, 124, 124, 124, 124, 124, + 104, 124, 116, 124, 48, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 118, 100, 84, 70, 38, + 23, 17, 29, 54, 60, 52, 62, 32, 38, 32, + 68, 14, 5, 10, 21, 9, 101, 91, 11, 6, + 20, 3, 23, 29, 37, 43, 117, 69, 50, 22, + 6, 19, 23, 49, 79, 87, 109, 21, 42, 26, + 16, 14, 13, 9, 9, 19, 41, 5, 62, 62, + 62, 22, 18, 5, 19, 49, 22, 118, 108, 106, + 84, 52, 6, 17, 11, 27, 124, 119, 115, 87, + 117, 113, 77, 107, 99, 71, 99, 97, 107, 59, + 69, 61, 89, 93, 49, 31, 35, 39, 33, 15, + 1, 7, 12, 11, 11, 11, 9, 5, 23, 0, + 4, 5, 0, 12, 1, 3, 2, 22, 3, 13, + 21, 11, 13, 4, 48, 32, 52, 50, 42, 42, + 30, 42, 42, 8, 26, 42, 5, 22, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 100, 124, 124, 124, 124, 124, 124, 124, + 118, 118, 92, 68, 48, 14, 5, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 80, + 60, 32, 28, 26, 30, 78, 78, 78, 78, 64, + 72, 52, 38, 42, 22, 15, 14, 17, 41, 56, + 24, 14, 41, 33, 23, 33, 27, 13, 19, 35, + 11, 3, 6, 3, 1, 8, 8, 124, 124, 124, + 124, 114, 104, 90, 56, 2, 13, 78, 56, 44, + 24, 16, 2, 1, 5, 35, 33, 23, 15, 27, + 13, 5, 11, 23, 3, 4, 12, 6, 10, 18, + 10, 124, 124, 124, 124, 114, 104, 90, 56, 2, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 3 */ + + 124, 18, 21, 124, 18, 21, 115, 71, 24, 20, + 22, 68, 114, 120, 110, 44, 97, 3, 11, 25, + 22, 24, 11, 16, 50, 76, 112, 102, 40, 19, + 109, 69, 23, 11, 25, 22, 13, 47, 22, 38, + 9, 35, 61, 8, 23, 45, 71, 14, 19, 39, + 8, 33, 41, 61, 3, 25, 13, 43, 15, 4, + 44, 0, 0, 0, 35, 45, 67, 13, 40, 2, + 92, 22, 33, 111, 47, 57, 25, 55, 67, 57, + 99, 61, 85, 69, 113, 33, 49, 57, 55, 48, + 3, 35, 113, 17, 75, 51, 115, 16, 12, 14, + 2, 22, 9, 17, 45, 73, 49, 47, 51, 21, + 41, 83, 73, 25, 89, 63, 97, 99, 107, 109, + 3, 27, 13, 13, 8, 18, 9, 19, 47, 97, + 21, 74, 72, 109, 18, 6, 10, 8, 6, 3, + 31, 15, 1, 22, 12, 4, 7, 7, 10, 13, + 10, 2, 42, 70, 60, 40, 30, 38, 24, 34, + 44, 3, 46, 38, 56, 51, 36, 68, 62, 124, + 124, 90, 116, 124, 124, 114, 124, 124, 124, 124, + 124, 122, 96, 124, 124, 124, 124, 124, 124, 120, + 100, 124, 112, 124, 44, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 114, 96, 80, 68, 34, + 23, 17, 29, 52, 58, 50, 60, 30, 36, 30, + 64, 12, 7, 8, 23, 9, 101, 87, 9, 8, + 20, 3, 21, 29, 37, 43, 113, 67, 50, 22, + 8, 17, 21, 47, 77, 85, 105, 19, 42, 26, + 16, 14, 11, 7, 9, 19, 41, 5, 62, 62, + 60, 22, 18, 5, 19, 47, 22, 116, 108, 104, + 82, 52, 6, 17, 11, 27, 124, 117, 111, 85, + 115, 111, 75, 103, 95, 69, 97, 93, 103, 59, + 67, 59, 87, 89, 47, 31, 35, 39, 31, 15, + 1, 7, 12, 11, 11, 13, 7, 3, 21, 0, + 4, 3, 0, 12, 1, 3, 2, 22, 3, 13, + 21, 11, 13, 2, 46, 32, 50, 48, 40, 42, + 30, 40, 40, 8, 26, 40, 5, 20, 124, 124, + 122, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 96, 124, 124, 124, 124, 124, 124, 124, + 114, 114, 88, 64, 44, 12, 7, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 118, 120, 76, + 56, 30, 26, 24, 28, 74, 74, 74, 74, 62, + 68, 48, 36, 40, 20, 17, 12, 19, 43, 54, + 22, 12, 41, 31, 23, 31, 25, 11, 19, 35, + 11, 3, 6, 1, 0, 8, 8, 124, 124, 124, + 118, 108, 96, 82, 48, 3, 13, 78, 56, 44, + 24, 16, 4, 1, 5, 33, 33, 23, 13, 25, + 11, 3, 11, 21, 3, 4, 12, 6, 10, 18, + 10, 124, 124, 124, 118, 108, 96, 82, 48, 3, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 4 */ + + 124, 18, 21, 124, 18, 21, 113, 69, 24, 20, + 22, 66, 110, 118, 110, 42, 93, 3, 11, 23, + 20, 22, 11, 14, 46, 74, 106, 94, 34, 23, + 103, 67, 23, 11, 23, 20, 13, 45, 22, 36, + 9, 33, 59, 8, 23, 45, 71, 14, 19, 39, + 8, 33, 41, 59, 3, 25, 13, 43, 13, 4, + 44, 0, 0, 0, 33, 47, 67, 13, 38, 2, + 90, 22, 33, 107, 45, 55, 21, 51, 65, 55, + 97, 59, 81, 67, 109, 33, 47, 57, 55, 48, + 3, 33, 109, 17, 75, 49, 111, 16, 12, 14, + 0, 22, 9, 17, 43, 71, 47, 45, 49, 21, + 41, 81, 71, 23, 87, 61, 87, 91, 101, 103, + 3, 25, 13, 13, 8, 18, 9, 19, 47, 93, + 21, 70, 68, 105, 18, 8, 10, 8, 6, 3, + 27, 13, 0, 20, 12, 4, 5, 7, 8, 13, + 10, 2, 40, 68, 58, 38, 28, 38, 24, 34, + 42, 3, 44, 36, 54, 51, 34, 66, 60, 124, + 124, 88, 112, 124, 124, 110, 124, 124, 124, 124, + 124, 118, 92, 118, 124, 124, 124, 124, 124, 114, + 96, 124, 108, 124, 42, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 120, 110, 92, 76, 64, 30, + 23, 17, 31, 50, 56, 48, 56, 28, 32, 28, + 62, 10, 7, 6, 23, 11, 99, 85, 7, 8, + 20, 1, 21, 27, 35, 41, 109, 63, 50, 24, + 8, 17, 19, 45, 73, 81, 103, 19, 44, 28, + 18, 16, 9, 7, 9, 17, 39, 5, 64, 62, + 60, 20, 18, 5, 19, 47, 22, 116, 106, 102, + 80, 52, 6, 15, 11, 27, 124, 113, 109, 83, + 111, 107, 73, 101, 93, 67, 93, 91, 99, 57, + 65, 59, 85, 87, 47, 31, 35, 37, 31, 15, + 3, 7, 10, 11, 11, 13, 7, 3, 19, 0, + 4, 3, 0, 12, 1, 3, 2, 20, 3, 13, + 21, 11, 11, 0, 44, 32, 48, 48, 38, 40, + 30, 38, 38, 8, 26, 38, 5, 18, 124, 124, + 120, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 92, 124, 124, 124, 124, 124, 124, 124, + 108, 108, 84, 60, 42, 10, 7, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 114, 114, 72, + 52, 26, 24, 24, 24, 72, 72, 72, 70, 58, + 64, 46, 34, 36, 18, 19, 8, 21, 43, 50, + 18, 8, 39, 31, 21, 31, 23, 9, 19, 33, + 9, 1, 6, 1, 2, 8, 8, 124, 124, 124, + 112, 100, 88, 72, 40, 9, 11, 78, 56, 44, + 24, 18, 4, 1, 5, 33, 33, 23, 13, 25, + 11, 1, 11, 21, 1, 6, 12, 6, 12, 18, + 10, 124, 124, 124, 112, 100, 88, 72, 40, 9, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 5 */ + + 124, 18, 21, 124, 18, 21, 109, 65, 24, 20, + 20, 64, 106, 116, 110, 42, 89, 3, 11, 21, + 20, 22, 11, 12, 42, 72, 102, 88, 30, 27, + 97, 65, 21, 11, 21, 20, 13, 43, 22, 36, + 9, 33, 57, 8, 23, 45, 71, 14, 19, 39, + 8, 33, 39, 59, 3, 25, 13, 43, 13, 4, + 44, 0, 0, 0, 33, 47, 67, 11, 36, 2, + 88, 20, 33, 101, 43, 53, 17, 47, 61, 51, + 93, 55, 79, 65, 103, 33, 47, 55, 53, 48, + 3, 31, 105, 17, 73, 49, 107, 16, 10, 12, + 0, 22, 9, 15, 43, 69, 45, 45, 47, 19, + 39, 77, 67, 21, 83, 59, 77, 83, 95, 97, + 1, 25, 11, 11, 8, 18, 7, 19, 45, 89, + 21, 66, 64, 99, 18, 8, 10, 8, 6, 1, + 25, 11, 0, 20, 12, 4, 5, 5, 6, 11, + 10, 0, 40, 66, 58, 38, 28, 38, 24, 34, + 40, 1, 42, 36, 54, 51, 34, 64, 58, 124, + 124, 86, 110, 124, 124, 106, 124, 124, 124, 124, + 122, 114, 88, 114, 124, 120, 124, 124, 124, 110, + 92, 124, 104, 124, 38, 124, 124, 124, 124, 124, + 124, 124, 124, 122, 116, 106, 88, 74, 60, 26, + 23, 17, 31, 48, 54, 46, 54, 26, 30, 26, + 58, 8, 9, 4, 25, 13, 97, 81, 5, 10, + 20, 1, 19, 27, 35, 39, 105, 61, 50, 24, + 10, 15, 17, 43, 71, 79, 99, 17, 46, 30, + 20, 16, 7, 5, 7, 17, 39, 5, 64, 62, + 58, 20, 18, 5, 19, 45, 22, 114, 104, 100, + 78, 52, 6, 15, 11, 25, 124, 111, 105, 79, + 107, 105, 71, 97, 89, 65, 89, 87, 95, 55, + 63, 57, 83, 83, 47, 31, 33, 37, 29, 15, + 3, 7, 10, 11, 11, 15, 5, 3, 17, 0, + 4, 3, 0, 12, 1, 3, 2, 20, 3, 13, + 21, 11, 11, 1, 42, 32, 46, 46, 38, 38, + 30, 38, 36, 8, 26, 36, 5, 16, 124, 124, + 118, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 88, 124, 124, 124, 124, 124, 124, 122, + 104, 104, 80, 58, 38, 10, 9, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 122, 110, 108, 68, + 48, 24, 24, 22, 20, 70, 68, 68, 66, 54, + 60, 42, 32, 34, 16, 19, 6, 23, 45, 48, + 16, 6, 39, 31, 19, 29, 21, 7, 17, 31, + 9, 1, 6, 0, 4, 8, 8, 124, 124, 118, + 106, 94, 80, 64, 32, 15, 11, 78, 56, 44, + 24, 18, 4, 0, 3, 31, 33, 23, 11, 25, + 9, 0, 11, 21, 1, 6, 12, 8, 12, 18, + 10, 124, 124, 118, 106, 94, 80, 64, 32, 15, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 6 */ + + 124, 18, 23, 124, 18, 23, 105, 63, 26, 20, + 20, 60, 102, 114, 110, 42, 87, 3, 9, 21, + 20, 20, 9, 12, 40, 68, 96, 80, 24, 29, + 93, 63, 21, 9, 21, 20, 15, 43, 22, 34, + 9, 33, 57, 8, 23, 43, 69, 14, 17, 37, + 8, 31, 39, 59, 3, 25, 13, 43, 13, 4, + 44, 0, 0, 0, 31, 47, 67, 11, 36, 0, + 88, 20, 33, 97, 41, 51, 15, 41, 59, 49, + 89, 53, 77, 63, 99, 33, 47, 55, 53, 48, + 3, 29, 99, 17, 71, 47, 103, 14, 10, 12, + 1, 24, 9, 15, 41, 69, 45, 43, 45, 19, + 37, 75, 65, 21, 81, 57, 67, 73, 89, 91, + 1, 25, 11, 11, 8, 18, 7, 17, 45, 85, + 19, 62, 60, 93, 18, 8, 10, 8, 8, 1, + 21, 9, 0, 20, 12, 4, 3, 3, 4, 11, + 10, 0, 38, 64, 56, 38, 26, 38, 24, 34, + 36, 1, 40, 34, 52, 51, 34, 64, 58, 124, + 124, 84, 106, 124, 124, 102, 124, 124, 124, 124, + 114, 110, 86, 110, 124, 116, 124, 124, 124, 104, + 88, 124, 100, 124, 34, 124, 124, 124, 124, 124, + 124, 124, 124, 118, 112, 100, 84, 70, 58, 24, + 23, 17, 33, 46, 52, 44, 52, 24, 28, 24, + 54, 6, 9, 2, 27, 13, 97, 79, 3, 10, + 20, 1, 17, 25, 33, 39, 101, 59, 52, 24, + 10, 13, 15, 41, 67, 75, 95, 15, 46, 30, + 20, 18, 5, 5, 7, 17, 37, 5, 66, 62, + 56, 20, 18, 5, 19, 45, 20, 114, 104, 98, + 76, 50, 6, 15, 11, 25, 124, 109, 103, 77, + 105, 101, 69, 93, 85, 63, 87, 83, 91, 55, + 61, 57, 81, 79, 45, 31, 33, 35, 29, 15, + 3, 7, 8, 11, 11, 15, 5, 1, 15, 0, + 4, 1, 2, 12, 0, 1, 2, 18, 3, 13, + 21, 11, 9, 3, 40, 32, 44, 44, 36, 38, + 30, 36, 36, 8, 24, 32, 7, 14, 124, 124, + 116, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 84, 124, 124, 124, 124, 124, 124, 116, + 100, 98, 76, 54, 36, 8, 11, 124, 124, 124, + 124, 124, 124, 124, 124, 122, 116, 104, 102, 64, + 46, 20, 22, 20, 18, 66, 66, 64, 62, 52, + 56, 38, 30, 30, 14, 21, 4, 25, 47, 44, + 14, 4, 39, 29, 19, 29, 19, 5, 17, 31, + 7, 0, 6, 0, 6, 8, 8, 124, 124, 114, + 100, 88, 72, 56, 24, 21, 11, 78, 56, 44, + 24, 20, 6, 0, 3, 31, 31, 21, 11, 23, + 7, 2, 9, 19, 1, 6, 12, 8, 14, 18, + 10, 124, 124, 114, 100, 88, 72, 56, 24, 21, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 7 */ + + 124, 18, 23, 124, 18, 23, 101, 59, 26, 20, + 18, 58, 98, 112, 110, 42, 83, 3, 9, 19, + 18, 20, 9, 10, 36, 66, 92, 74, 20, 33, + 87, 61, 21, 9, 19, 18, 15, 41, 22, 34, + 9, 31, 55, 8, 23, 43, 69, 14, 17, 37, + 8, 31, 37, 57, 3, 25, 13, 43, 11, 4, + 44, 0, 0, 0, 31, 47, 67, 9, 34, 0, + 86, 18, 33, 91, 39, 49, 11, 37, 55, 45, + 87, 49, 73, 61, 95, 33, 45, 53, 53, 48, + 3, 27, 95, 17, 69, 47, 99, 14, 8, 10, + 1, 24, 9, 13, 41, 67, 43, 43, 43, 17, + 35, 71, 61, 19, 79, 55, 57, 65, 83, 85, + 1, 23, 11, 9, 8, 18, 5, 17, 43, 81, + 19, 58, 56, 87, 18, 10, 10, 8, 8, 0, + 19, 7, 2, 18, 12, 4, 3, 3, 2, 11, + 10, 1, 38, 62, 56, 36, 26, 38, 24, 34, + 34, 1, 38, 32, 52, 51, 34, 62, 56, 120, + 124, 82, 102, 124, 124, 98, 124, 122, 124, 124, + 108, 106, 82, 104, 124, 110, 124, 124, 124, 98, + 84, 124, 96, 124, 32, 124, 124, 124, 124, 124, + 124, 124, 124, 114, 106, 96, 80, 66, 54, 20, + 23, 17, 33, 44, 50, 42, 48, 22, 26, 22, + 52, 4, 11, 0, 27, 15, 95, 75, 1, 12, + 20, 0, 17, 25, 33, 37, 97, 55, 52, 26, + 12, 13, 13, 39, 65, 73, 91, 15, 48, 32, + 22, 18, 3, 3, 7, 15, 37, 5, 66, 62, + 56, 18, 18, 5, 19, 43, 20, 112, 102, 96, + 74, 50, 6, 13, 11, 25, 124, 105, 99, 75, + 101, 99, 67, 91, 83, 61, 83, 81, 87, 53, + 59, 55, 79, 75, 45, 31, 33, 35, 27, 15, + 5, 7, 8, 11, 11, 17, 3, 1, 13, 0, + 4, 1, 2, 12, 0, 1, 2, 18, 3, 13, + 21, 11, 9, 5, 38, 32, 42, 44, 34, 36, + 30, 34, 34, 8, 24, 30, 7, 12, 122, 124, + 114, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 80, 124, 124, 124, 124, 124, 124, 112, + 96, 94, 72, 50, 32, 6, 11, 124, 124, 124, + 124, 124, 124, 124, 124, 118, 112, 100, 96, 60, + 42, 18, 20, 20, 14, 64, 62, 62, 58, 48, + 52, 36, 28, 28, 12, 23, 0, 27, 47, 42, + 10, 0, 37, 29, 17, 27, 17, 3, 17, 29, + 7, 0, 6, 2, 8, 8, 8, 124, 124, 108, + 94, 80, 64, 48, 16, 27, 9, 78, 56, 44, + 24, 20, 6, 0, 3, 29, 31, 21, 9, 23, + 5, 4, 9, 19, 0, 8, 12, 8, 14, 18, + 10, 124, 124, 108, 94, 80, 64, 48, 16, 27, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 8 */ + + 124, 16, 23, 124, 16, 23, 99, 57, 26, 20, + 18, 54, 92, 110, 110, 40, 79, 5, 9, 17, + 18, 18, 9, 8, 32, 64, 86, 66, 14, 37, + 81, 59, 21, 9, 17, 18, 17, 39, 22, 32, + 9, 31, 55, 6, 25, 43, 69, 14, 17, 37, + 8, 31, 37, 57, 3, 25, 13, 43, 11, 4, + 44, 0, 0, 0, 29, 49, 67, 9, 32, 0, + 84, 18, 35, 87, 37, 47, 9, 33, 53, 43, + 83, 47, 71, 59, 91, 33, 45, 53, 53, 48, + 3, 25, 91, 17, 69, 45, 95, 14, 8, 10, + 3, 24, 9, 13, 39, 65, 41, 41, 43, 17, + 35, 69, 59, 19, 77, 53, 49, 57, 77, 81, + 1, 23, 11, 9, 6, 18, 5, 17, 43, 77, + 19, 54, 52, 83, 18, 10, 10, 8, 8, 0, + 15, 7, 2, 18, 10, 4, 1, 1, 1, 11, + 10, 1, 36, 58, 54, 36, 24, 38, 24, 32, + 32, 1, 36, 30, 50, 51, 32, 60, 54, 116, + 124, 78, 98, 124, 124, 92, 124, 118, 124, 124, + 100, 102, 78, 100, 124, 106, 124, 124, 124, 92, + 80, 124, 92, 124, 28, 124, 124, 124, 124, 124, + 124, 124, 120, 110, 102, 92, 76, 62, 50, 16, + 23, 19, 35, 42, 46, 40, 46, 20, 22, 18, + 48, 2, 11, 1, 29, 17, 95, 73, 0, 12, + 20, 0, 15, 23, 31, 37, 93, 53, 52, 26, + 12, 11, 11, 37, 61, 69, 89, 13, 48, 32, + 22, 20, 1, 3, 7, 15, 35, 7, 68, 62, + 54, 18, 18, 5, 19, 43, 20, 112, 100, 94, + 72, 50, 6, 13, 11, 25, 124, 103, 97, 73, + 99, 95, 65, 87, 79, 59, 81, 77, 83, 53, + 59, 55, 77, 73, 45, 31, 33, 33, 27, 15, + 5, 7, 6, 11, 11, 17, 3, 1, 11, 0, + 2, 1, 2, 10, 0, 1, 2, 16, 3, 13, + 21, 11, 7, 7, 36, 32, 38, 42, 32, 34, + 28, 32, 32, 8, 24, 28, 7, 8, 120, 120, + 112, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 120, 76, 124, 124, 124, 124, 124, 124, 106, + 90, 88, 68, 46, 30, 4, 13, 124, 124, 124, + 124, 124, 124, 124, 124, 112, 106, 94, 90, 56, + 38, 14, 18, 18, 10, 60, 60, 58, 54, 44, + 48, 32, 24, 24, 8, 25, 1, 29, 49, 38, + 8, 1, 37, 29, 17, 27, 15, 1, 17, 29, + 5, 2, 6, 2, 8, 8, 6, 124, 120, 102, + 88, 74, 56, 38, 6, 33, 9, 78, 56, 44, + 24, 22, 6, 0, 3, 29, 31, 21, 9, 23, + 5, 4, 9, 19, 0, 8, 12, 8, 16, 18, + 8, 124, 120, 102, 88, 74, 56, 38, 6, 33, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 9 */ + + 124, 16, 23, 124, 16, 23, 95, 55, 28, 20, + 18, 52, 88, 108, 112, 40, 75, 5, 7, 15, + 18, 16, 9, 8, 30, 62, 82, 58, 8, 39, + 75, 57, 19, 7, 15, 18, 17, 37, 22, 32, + 7, 31, 53, 6, 25, 43, 69, 14, 17, 35, + 8, 31, 37, 57, 3, 25, 13, 41, 11, 4, + 44, 0, 0, 0, 27, 49, 67, 9, 30, 0, + 82, 18, 35, 83, 33, 45, 5, 29, 49, 41, + 79, 43, 69, 55, 85, 33, 45, 53, 51, 48, + 3, 23, 87, 15, 67, 43, 91, 14, 8, 10, + 3, 24, 9, 13, 37, 63, 39, 39, 41, 15, + 33, 67, 55, 17, 73, 51, 39, 47, 69, 75, + 0, 23, 9, 7, 6, 18, 5, 15, 41, 71, + 19, 50, 50, 77, 18, 10, 10, 8, 8, 2, + 11, 5, 2, 18, 10, 4, 0, 0, 3, 9, + 10, 1, 34, 56, 52, 36, 22, 38, 24, 32, + 30, 0, 34, 30, 48, 51, 32, 60, 54, 112, + 124, 76, 96, 124, 124, 88, 120, 114, 124, 124, + 94, 98, 74, 96, 124, 102, 124, 124, 124, 88, + 76, 124, 88, 124, 24, 124, 124, 124, 124, 124, + 124, 120, 116, 106, 98, 88, 74, 60, 48, 12, + 23, 19, 35, 42, 44, 38, 44, 18, 20, 16, + 44, 0, 11, 3, 31, 17, 93, 71, 2, 12, + 20, 0, 13, 21, 29, 35, 87, 51, 52, 26, + 12, 9, 9, 35, 57, 65, 85, 11, 50, 34, + 24, 22, 0, 3, 5, 15, 33, 7, 70, 62, + 52, 18, 20, 3, 19, 41, 20, 112, 100, 92, + 70, 50, 6, 13, 11, 23, 124, 101, 95, 69, + 95, 91, 63, 83, 75, 57, 77, 73, 79, 51, + 57, 53, 75, 69, 43, 29, 31, 31, 25, 15, + 5, 7, 4, 11, 11, 17, 3, 0, 9, 2, + 2, 0, 2, 10, 0, 1, 2, 14, 3, 11, + 19, 11, 5, 7, 34, 32, 36, 40, 32, 34, + 28, 32, 30, 8, 24, 26, 7, 6, 118, 118, + 112, 122, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 114, 72, 124, 124, 124, 124, 124, 124, 100, + 86, 84, 66, 44, 28, 4, 15, 124, 124, 124, + 124, 124, 124, 124, 124, 108, 102, 90, 86, 52, + 34, 10, 18, 16, 8, 58, 58, 54, 50, 42, + 46, 28, 22, 20, 6, 25, 3, 29, 51, 34, + 6, 3, 37, 27, 15, 27, 13, 2, 15, 27, + 3, 4, 6, 4, 10, 8, 6, 124, 116, 98, + 82, 68, 48, 30, 1, 39, 9, 78, 56, 46, + 26, 24, 8, 2, 1, 29, 31, 21, 9, 21, + 3, 6, 9, 17, 0, 8, 12, 10, 18, 18, + 8, 124, 116, 98, 82, 68, 48, 30, 1, 39, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 10 */ + + 124, 16, 23, 124, 16, 23, 91, 51, 28, 20, + 16, 50, 84, 106, 112, 40, 71, 5, 7, 13, + 16, 16, 9, 6, 26, 60, 76, 52, 4, 43, + 69, 55, 19, 7, 13, 16, 17, 35, 22, 30, + 7, 29, 51, 6, 25, 43, 69, 14, 17, 35, + 8, 31, 35, 55, 3, 25, 13, 41, 9, 4, + 44, 0, 0, 0, 27, 49, 67, 7, 28, 0, + 80, 16, 35, 77, 31, 43, 1, 25, 47, 37, + 77, 41, 65, 53, 81, 33, 43, 51, 51, 48, + 3, 21, 83, 15, 65, 43, 87, 14, 6, 8, + 5, 24, 9, 11, 37, 61, 37, 39, 39, 15, + 31, 63, 53, 15, 71, 49, 29, 39, 63, 69, + 0, 21, 9, 7, 6, 18, 3, 15, 41, 67, + 19, 46, 46, 71, 18, 12, 10, 8, 8, 2, + 9, 3, 4, 16, 10, 4, 0, 0, 5, 9, + 10, 3, 34, 54, 52, 34, 22, 38, 24, 32, + 28, 0, 32, 28, 48, 51, 32, 58, 52, 108, + 124, 74, 92, 124, 124, 84, 114, 110, 124, 124, + 86, 94, 70, 90, 122, 96, 124, 124, 124, 82, + 72, 116, 84, 124, 22, 124, 124, 124, 124, 124, + 120, 116, 112, 102, 92, 84, 70, 56, 44, 8, + 23, 19, 37, 40, 42, 36, 40, 16, 18, 14, + 42, 1, 13, 5, 31, 19, 91, 67, 4, 14, + 20, 2, 13, 21, 29, 33, 83, 47, 52, 28, + 14, 9, 7, 33, 55, 63, 81, 11, 52, 36, + 26, 22, 2, 1, 5, 13, 33, 7, 70, 62, + 52, 16, 20, 3, 19, 41, 20, 110, 98, 90, + 68, 50, 6, 11, 11, 23, 124, 97, 91, 67, + 91, 89, 61, 81, 73, 55, 73, 71, 75, 49, + 55, 53, 73, 65, 43, 29, 31, 31, 25, 15, + 7, 7, 4, 11, 11, 19, 1, 0, 7, 2, + 2, 0, 2, 10, 0, 1, 2, 14, 3, 11, + 19, 11, 5, 9, 32, 32, 34, 40, 30, 32, + 28, 30, 28, 8, 24, 24, 7, 4, 116, 116, + 110, 118, 120, 124, 124, 124, 124, 124, 124, 124, + 124, 110, 68, 124, 124, 124, 124, 124, 124, 96, + 82, 78, 62, 40, 24, 2, 15, 124, 124, 124, + 124, 124, 124, 124, 124, 104, 96, 86, 80, 48, + 30, 8, 16, 16, 4, 56, 54, 52, 46, 38, + 42, 26, 20, 18, 4, 27, 7, 31, 51, 32, + 2, 7, 35, 27, 13, 25, 11, 4, 15, 25, + 3, 4, 6, 4, 12, 8, 6, 124, 112, 92, + 76, 60, 40, 22, 9, 45, 7, 78, 56, 46, + 26, 24, 8, 2, 1, 27, 31, 21, 7, 21, + 1, 8, 9, 17, 2, 10, 12, 10, 18, 18, + 8, 124, 112, 92, 76, 60, 40, 22, 9, 45, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 11 */ + + 124, 16, 25, 124, 16, 25, 87, 49, 30, 20, + 16, 46, 80, 104, 112, 40, 69, 5, 5, 13, + 16, 14, 7, 6, 24, 56, 72, 44, 1, 45, + 65, 53, 19, 5, 13, 16, 19, 35, 22, 30, + 7, 29, 51, 6, 25, 41, 67, 14, 15, 33, + 8, 29, 35, 55, 3, 25, 13, 41, 9, 4, + 44, 0, 0, 0, 25, 49, 67, 7, 28, 1, + 80, 16, 35, 73, 29, 41, 0, 19, 43, 35, + 73, 37, 63, 51, 77, 33, 43, 51, 51, 48, + 3, 19, 77, 15, 63, 41, 83, 12, 6, 8, + 5, 26, 9, 11, 35, 61, 37, 37, 37, 13, + 29, 61, 49, 15, 69, 47, 19, 29, 57, 63, + 0, 21, 9, 5, 6, 18, 3, 13, 39, 63, + 17, 42, 42, 65, 18, 12, 10, 8, 10, 4, + 5, 1, 4, 16, 10, 4, 2, 2, 7, 9, + 10, 3, 32, 52, 50, 34, 20, 38, 24, 32, + 24, 0, 30, 26, 46, 51, 32, 58, 52, 104, + 124, 72, 88, 122, 124, 80, 110, 106, 124, 124, + 80, 90, 68, 86, 114, 92, 124, 124, 124, 76, + 68, 110, 80, 124, 18, 124, 124, 124, 124, 124, + 116, 110, 108, 98, 88, 78, 66, 52, 42, 6, + 23, 19, 37, 38, 40, 34, 38, 14, 16, 12, + 38, 3, 13, 7, 33, 19, 91, 65, 6, 14, + 20, 2, 11, 19, 27, 33, 79, 45, 54, 28, + 14, 7, 5, 31, 51, 59, 77, 9, 52, 36, + 26, 24, 4, 1, 5, 13, 31, 7, 72, 62, + 50, 16, 20, 3, 19, 39, 18, 110, 98, 88, + 66, 48, 6, 11, 11, 23, 124, 95, 89, 65, + 89, 85, 59, 77, 69, 53, 71, 67, 71, 49, + 53, 51, 71, 61, 41, 29, 31, 29, 23, 15, + 7, 7, 2, 11, 11, 19, 1, 2, 5, 2, + 2, 2, 4, 10, 2, 0, 2, 12, 3, 11, + 19, 11, 3, 11, 30, 32, 32, 38, 28, 32, + 28, 28, 28, 8, 22, 20, 9, 2, 112, 114, + 108, 116, 116, 124, 124, 124, 124, 124, 124, 124, + 124, 104, 64, 124, 124, 124, 124, 124, 124, 90, + 78, 74, 58, 36, 22, 0, 17, 124, 124, 124, + 124, 124, 124, 120, 118, 98, 92, 80, 74, 44, + 28, 4, 14, 14, 2, 52, 52, 48, 42, 36, + 38, 22, 18, 14, 2, 29, 9, 33, 53, 28, + 0, 9, 35, 25, 13, 25, 9, 6, 15, 25, + 1, 6, 6, 6, 14, 8, 6, 124, 108, 88, + 70, 54, 32, 14, 17, 51, 7, 78, 56, 46, + 26, 26, 10, 2, 1, 27, 29, 19, 7, 19, + 0, 10, 7, 15, 2, 10, 12, 10, 20, 18, + 8, 124, 108, 88, 70, 54, 32, 14, 17, 51, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 12 */ + + 124, 16, 25, 124, 16, 25, 85, 45, 30, 20, + 14, 44, 76, 102, 112, 38, 65, 5, 5, 11, + 16, 14, 7, 4, 20, 54, 66, 38, 5, 49, + 59, 51, 19, 5, 11, 16, 19, 33, 22, 28, + 7, 29, 49, 6, 25, 41, 67, 14, 15, 33, + 8, 29, 33, 55, 3, 25, 13, 41, 9, 4, + 44, 0, 0, 0, 25, 51, 67, 5, 26, 1, + 78, 14, 35, 67, 27, 39, 4, 15, 41, 31, + 69, 35, 61, 49, 73, 33, 43, 49, 51, 48, + 3, 17, 73, 15, 63, 41, 79, 12, 4, 6, + 7, 26, 9, 9, 35, 59, 35, 37, 35, 13, + 29, 57, 47, 13, 67, 45, 9, 21, 51, 57, + 0, 21, 9, 5, 6, 18, 1, 13, 39, 59, + 17, 38, 38, 61, 18, 12, 10, 8, 10, 4, + 3, 0, 4, 16, 10, 4, 2, 4, 9, 9, + 10, 5, 32, 50, 50, 34, 20, 38, 24, 32, + 22, 0, 28, 24, 46, 51, 30, 56, 50, 100, + 124, 70, 84, 118, 120, 76, 104, 102, 124, 124, + 72, 86, 64, 82, 108, 86, 116, 124, 124, 70, + 64, 102, 76, 124, 14, 124, 124, 124, 124, 124, + 112, 106, 104, 94, 84, 74, 62, 48, 38, 2, + 23, 19, 39, 36, 38, 32, 36, 12, 12, 10, + 34, 5, 15, 9, 35, 21, 89, 61, 8, 16, + 20, 2, 9, 19, 27, 31, 75, 43, 54, 28, + 16, 5, 3, 29, 49, 57, 75, 7, 54, 38, + 28, 24, 6, 0, 5, 13, 31, 7, 72, 62, + 48, 16, 20, 3, 19, 39, 18, 108, 96, 86, + 64, 48, 6, 11, 11, 23, 124, 93, 85, 63, + 85, 83, 57, 73, 65, 51, 67, 63, 67, 47, + 51, 51, 69, 59, 41, 29, 31, 29, 23, 15, + 7, 7, 2, 11, 11, 21, 0, 2, 3, 2, + 2, 2, 4, 10, 2, 0, 2, 12, 3, 11, + 19, 11, 3, 13, 28, 32, 30, 36, 26, 30, + 28, 26, 26, 8, 22, 18, 9, 0, 110, 112, + 106, 112, 112, 124, 122, 124, 124, 124, 124, 124, + 122, 100, 60, 124, 124, 124, 124, 124, 118, 86, + 72, 68, 54, 32, 18, 1, 19, 124, 124, 124, + 124, 124, 124, 114, 112, 94, 86, 76, 68, 40, + 24, 2, 12, 12, 1, 50, 48, 44, 38, 32, + 34, 18, 16, 12, 0, 31, 11, 35, 55, 26, + 1, 11, 35, 25, 11, 23, 7, 8, 15, 23, + 1, 6, 6, 6, 16, 8, 6, 122, 104, 82, + 64, 48, 24, 4, 25, 57, 7, 78, 56, 46, + 26, 26, 10, 2, 1, 25, 29, 19, 5, 19, + 0, 12, 7, 15, 2, 10, 12, 10, 20, 18, + 8, 122, 104, 82, 64, 48, 24, 4, 25, 57, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 13 */ + + 124, 16, 25, 124, 16, 25, 81, 43, 30, 20, + 14, 42, 72, 100, 112, 38, 61, 5, 5, 9, + 14, 12, 7, 2, 16, 52, 62, 30, 11, 53, + 53, 49, 17, 5, 9, 14, 19, 31, 22, 28, + 7, 27, 47, 6, 25, 41, 67, 14, 15, 33, + 8, 29, 33, 53, 3, 25, 13, 41, 7, 4, + 44, 0, 0, 0, 23, 51, 67, 5, 24, 1, + 76, 14, 35, 63, 25, 37, 8, 11, 37, 29, + 67, 31, 57, 47, 67, 33, 41, 49, 49, 48, + 3, 15, 69, 15, 61, 39, 75, 12, 4, 6, + 7, 26, 9, 9, 33, 57, 33, 35, 33, 11, + 27, 55, 43, 11, 63, 43, 0, 13, 45, 51, + 2, 19, 7, 3, 6, 18, 1, 13, 37, 55, + 17, 34, 34, 55, 18, 14, 10, 8, 10, 6, + 0, 2, 6, 14, 10, 4, 4, 4, 11, 7, + 10, 5, 30, 48, 48, 32, 18, 38, 24, 32, + 20, 2, 26, 24, 44, 51, 30, 54, 48, 96, + 124, 68, 82, 114, 116, 72, 100, 98, 124, 124, + 66, 82, 60, 76, 102, 82, 110, 124, 124, 66, + 60, 96, 72, 124, 12, 124, 124, 124, 122, 120, + 108, 102, 100, 90, 78, 70, 58, 46, 34, 1, + 23, 19, 39, 34, 36, 30, 32, 10, 10, 8, + 32, 7, 15, 11, 35, 23, 87, 59, 10, 16, + 20, 4, 9, 17, 25, 29, 71, 39, 54, 30, + 16, 5, 1, 27, 45, 53, 71, 7, 56, 40, + 30, 26, 8, 0, 3, 11, 29, 7, 74, 62, + 48, 14, 20, 3, 19, 37, 18, 108, 94, 84, + 62, 48, 6, 9, 11, 21, 124, 89, 83, 59, + 81, 79, 55, 71, 63, 49, 63, 61, 63, 45, + 49, 49, 67, 55, 41, 29, 29, 27, 21, 15, + 9, 7, 0, 11, 11, 21, 0, 2, 1, 2, + 2, 2, 4, 10, 2, 0, 2, 10, 3, 11, + 19, 11, 1, 15, 26, 32, 28, 36, 26, 28, + 28, 26, 24, 8, 22, 16, 9, 1, 108, 110, + 104, 108, 108, 124, 118, 122, 124, 118, 124, 124, + 116, 94, 56, 124, 124, 124, 124, 118, 112, 80, + 68, 64, 50, 30, 16, 1, 19, 124, 124, 124, + 124, 118, 118, 110, 106, 90, 82, 72, 62, 36, + 20, 1, 12, 12, 5, 48, 46, 42, 34, 28, + 30, 16, 14, 8, 1, 31, 15, 37, 55, 22, + 5, 15, 33, 25, 9, 23, 5, 10, 13, 21, + 0, 8, 6, 8, 18, 8, 6, 120, 100, 76, + 58, 40, 16, 3, 33, 63, 5, 78, 56, 46, + 26, 28, 10, 4, 0, 25, 29, 19, 5, 19, + 2, 14, 7, 15, 4, 12, 12, 12, 22, 18, + 8, 120, 100, 76, 58, 40, 16, 3, 33, 63, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 14 */ + + 122, 16, 25, 122, 16, 25, 77, 39, 32, 20, + 12, 38, 68, 98, 112, 38, 57, 5, 3, 7, + 14, 12, 7, 2, 14, 50, 56, 24, 15, 55, + 47, 47, 17, 3, 7, 14, 21, 29, 22, 26, + 7, 27, 47, 6, 25, 41, 67, 14, 15, 31, + 8, 29, 31, 53, 3, 25, 13, 41, 7, 4, + 44, 0, 0, 0, 23, 51, 67, 3, 22, 1, + 74, 12, 35, 57, 23, 35, 10, 7, 35, 25, + 63, 29, 55, 45, 63, 33, 41, 47, 49, 48, + 3, 13, 65, 15, 59, 39, 71, 12, 2, 4, + 9, 26, 9, 7, 33, 55, 31, 35, 31, 11, + 25, 51, 41, 11, 61, 41, 10, 3, 39, 45, + 2, 19, 7, 3, 6, 18, 0, 11, 37, 51, + 17, 30, 30, 49, 18, 14, 10, 8, 10, 6, + 2, 4, 6, 14, 10, 4, 4, 6, 13, 7, + 10, 7, 30, 46, 48, 32, 18, 38, 24, 32, + 18, 2, 24, 22, 44, 51, 30, 54, 48, 92, + 122, 66, 78, 110, 110, 68, 94, 94, 124, 124, + 58, 78, 56, 72, 96, 76, 104, 122, 124, 60, + 56, 88, 68, 124, 8, 120, 124, 120, 116, 114, + 104, 98, 96, 86, 74, 66, 54, 42, 32, 5, + 23, 19, 41, 32, 34, 28, 30, 8, 8, 6, + 28, 9, 17, 13, 37, 23, 87, 55, 12, 18, + 20, 4, 7, 17, 25, 29, 67, 37, 54, 30, + 18, 3, 0, 25, 43, 51, 67, 5, 56, 40, + 30, 26, 10, 2, 3, 11, 29, 7, 74, 62, + 46, 14, 20, 3, 19, 37, 18, 106, 94, 82, + 60, 48, 6, 9, 11, 21, 124, 87, 79, 57, + 79, 77, 53, 67, 59, 47, 61, 57, 59, 45, + 47, 49, 65, 51, 39, 29, 29, 27, 21, 15, + 9, 7, 0, 11, 11, 23, 2, 4, 0, 2, + 2, 4, 4, 10, 2, 0, 2, 10, 3, 11, + 19, 11, 1, 17, 24, 32, 26, 34, 24, 28, + 28, 24, 22, 8, 22, 14, 9, 3, 106, 108, + 102, 106, 104, 120, 114, 118, 118, 114, 124, 120, + 110, 90, 52, 124, 124, 124, 124, 110, 106, 76, + 64, 58, 46, 26, 12, 3, 21, 124, 124, 124, + 120, 112, 114, 104, 100, 84, 76, 66, 56, 32, + 16, 3, 10, 10, 7, 44, 42, 38, 30, 26, + 26, 12, 12, 6, 3, 33, 17, 39, 57, 20, + 7, 17, 33, 23, 9, 21, 3, 12, 13, 21, + 0, 8, 6, 8, 20, 8, 6, 118, 96, 72, + 52, 34, 8, 11, 41, 69, 5, 78, 56, 46, + 26, 28, 12, 4, 0, 23, 29, 19, 3, 17, + 4, 16, 7, 13, 4, 12, 12, 12, 22, 18, + 8, 118, 96, 72, 52, 34, 8, 11, 41, 69, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 15 */ + + 120, 16, 25, 120, 16, 25, 73, 37, 32, 20, + 12, 36, 64, 96, 112, 38, 53, 5, 3, 5, + 14, 10, 7, 0, 10, 48, 52, 16, 21, 59, + 41, 45, 17, 3, 5, 14, 21, 27, 22, 26, + 7, 27, 45, 6, 25, 41, 67, 14, 15, 31, + 8, 29, 31, 53, 3, 25, 13, 41, 7, 4, + 44, 0, 0, 0, 21, 51, 67, 3, 20, 1, + 72, 12, 35, 53, 21, 33, 14, 3, 31, 23, + 59, 25, 53, 43, 59, 33, 41, 47, 49, 48, + 3, 11, 61, 15, 57, 37, 67, 12, 2, 4, + 9, 26, 9, 7, 31, 53, 29, 33, 29, 9, + 23, 49, 37, 9, 59, 39, 20, 4, 33, 39, + 2, 19, 7, 1, 6, 18, 0, 11, 35, 47, + 17, 26, 26, 43, 18, 14, 10, 8, 10, 8, + 6, 6, 6, 14, 10, 4, 6, 8, 15, 7, + 10, 7, 28, 44, 46, 32, 16, 38, 24, 32, + 16, 2, 22, 20, 42, 51, 30, 52, 46, 88, + 116, 64, 74, 106, 106, 64, 90, 90, 124, 124, + 52, 74, 52, 68, 90, 72, 98, 114, 124, 54, + 52, 82, 64, 124, 4, 116, 124, 116, 112, 110, + 100, 94, 92, 82, 70, 62, 50, 38, 28, 9, + 23, 19, 41, 30, 32, 26, 28, 6, 6, 4, + 24, 11, 17, 15, 39, 25, 85, 53, 14, 18, + 20, 4, 5, 15, 23, 27, 63, 35, 54, 30, + 18, 1, 2, 23, 39, 47, 63, 3, 58, 42, + 32, 28, 12, 2, 3, 11, 27, 7, 76, 62, + 44, 14, 20, 3, 19, 35, 18, 106, 92, 80, + 58, 48, 6, 9, 11, 21, 124, 85, 77, 55, + 75, 73, 51, 63, 55, 45, 57, 53, 55, 43, + 45, 47, 63, 47, 39, 29, 29, 25, 19, 15, + 9, 7, 1, 11, 11, 23, 2, 4, 2, 2, + 2, 4, 4, 10, 2, 0, 2, 8, 3, 11, + 19, 11, 0, 19, 22, 32, 24, 32, 22, 26, + 28, 22, 20, 8, 22, 12, 9, 5, 104, 106, + 100, 102, 100, 116, 110, 114, 114, 108, 122, 114, + 104, 84, 48, 124, 124, 124, 124, 104, 100, 70, + 60, 54, 42, 22, 10, 5, 23, 124, 124, 124, + 116, 106, 108, 100, 94, 80, 72, 62, 50, 28, + 12, 7, 8, 8, 11, 42, 40, 34, 26, 22, + 22, 8, 10, 2, 5, 35, 19, 41, 59, 16, + 9, 19, 33, 23, 7, 21, 1, 14, 13, 19, + 2, 10, 6, 10, 22, 8, 6, 116, 92, 66, + 46, 28, 0, 19, 49, 75, 5, 78, 56, 46, + 26, 30, 12, 4, 0, 23, 29, 19, 3, 17, + 6, 18, 7, 13, 4, 12, 12, 12, 24, 18, + 8, 116, 92, 66, 46, 28, 0, 19, 49, 75, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 16 */ + + 116, 14, 27, 116, 14, 27, 71, 35, 32, 20, + 10, 32, 58, 94, 112, 36, 51, 7, 3, 5, + 12, 8, 7, 1, 6, 44, 46, 8, 27, 63, + 37, 45, 17, 3, 5, 12, 23, 27, 22, 24, + 7, 27, 45, 4, 27, 41, 67, 12, 15, 31, + 8, 29, 31, 53, 3, 25, 15, 41, 7, 4, + 44, 0, 0, 0, 21, 53, 67, 3, 18, 3, + 70, 10, 37, 49, 19, 31, 16, 0, 29, 21, + 57, 23, 51, 41, 55, 33, 41, 47, 49, 48, + 3, 11, 57, 15, 57, 37, 65, 10, 0, 2, + 11, 26, 9, 7, 31, 53, 29, 33, 29, 9, + 23, 47, 35, 9, 57, 37, 28, 12, 27, 35, + 2, 19, 7, 1, 4, 18, 0, 11, 35, 43, + 17, 22, 22, 39, 18, 14, 10, 8, 10, 8, + 8, 6, 6, 12, 8, 4, 6, 8, 19, 7, + 10, 9, 26, 40, 44, 30, 14, 38, 24, 30, + 12, 2, 20, 18, 40, 51, 28, 50, 44, 82, + 108, 60, 70, 100, 100, 58, 84, 86, 110, 124, + 44, 68, 48, 62, 82, 66, 90, 104, 118, 48, + 48, 74, 60, 124, 0, 110, 118, 110, 106, 104, + 94, 88, 86, 78, 64, 56, 46, 34, 24, 13, + 23, 21, 43, 28, 28, 22, 24, 2, 2, 0, + 20, 13, 19, 17, 41, 27, 85, 51, 14, 18, + 20, 4, 5, 15, 23, 27, 59, 33, 54, 30, + 18, 1, 2, 21, 37, 45, 61, 3, 58, 42, + 32, 28, 14, 2, 3, 11, 27, 9, 76, 60, + 42, 12, 20, 3, 19, 35, 16, 104, 90, 76, + 56, 46, 6, 9, 11, 21, 124, 83, 75, 53, + 73, 71, 49, 61, 53, 43, 55, 51, 51, 43, + 45, 47, 61, 45, 39, 29, 29, 25, 19, 15, + 11, 9, 3, 11, 13, 25, 2, 4, 4, 2, + 0, 4, 4, 8, 2, 0, 2, 6, 3, 11, + 19, 11, 0, 21, 20, 32, 20, 30, 20, 24, + 26, 20, 18, 8, 20, 8, 11, 9, 100, 102, + 98, 98, 96, 110, 104, 108, 108, 102, 116, 108, + 96, 78, 44, 124, 124, 122, 120, 96, 92, 64, + 54, 48, 38, 18, 6, 7, 25, 118, 120, 120, + 110, 100, 102, 94, 86, 74, 66, 56, 44, 24, + 8, 11, 6, 6, 15, 38, 36, 30, 20, 18, + 18, 4, 6, 1, 9, 37, 23, 43, 61, 12, + 13, 23, 33, 23, 7, 21, 0, 16, 13, 19, + 2, 10, 6, 10, 22, 8, 4, 112, 88, 60, + 38, 20, 7, 29, 59, 81, 5, 78, 56, 46, + 26, 30, 12, 4, 0, 23, 29, 19, 3, 17, + 6, 18, 7, 13, 4, 12, 12, 12, 24, 16, + 6, 112, 88, 60, 38, 20, 7, 29, 59, 81, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 17 */ + + 114, 14, 27, 114, 14, 27, 67, 31, 34, 22, + 10, 30, 54, 92, 114, 36, 47, 7, 1, 3, + 12, 8, 5, 1, 4, 42, 42, 2, 31, 65, + 31, 43, 15, 1, 3, 12, 23, 25, 22, 24, + 5, 25, 43, 4, 27, 39, 65, 12, 13, 29, + 8, 27, 29, 51, 1, 23, 15, 39, 5, 4, + 44, 0, 0, 0, 19, 53, 67, 1, 18, 3, + 70, 10, 37, 43, 15, 27, 20, 6, 25, 17, + 53, 19, 47, 37, 49, 33, 39, 45, 47, 48, + 3, 9, 51, 13, 55, 35, 61, 10, 0, 2, + 11, 28, 9, 5, 29, 51, 27, 31, 27, 7, + 21, 43, 31, 7, 53, 33, 38, 22, 19, 29, + 4, 17, 5, 0, 4, 20, 2, 9, 33, 37, + 15, 18, 20, 33, 18, 16, 10, 10, 12, 10, + 12, 8, 8, 12, 8, 4, 8, 10, 21, 5, + 10, 9, 26, 38, 44, 30, 14, 38, 24, 30, + 10, 4, 20, 18, 40, 51, 28, 50, 44, 78, + 102, 58, 68, 96, 96, 54, 80, 82, 98, 124, + 38, 64, 46, 58, 76, 62, 84, 96, 110, 44, + 44, 68, 56, 124, 1, 106, 114, 106, 102, 100, + 90, 84, 82, 74, 60, 52, 44, 32, 22, 15, + 23, 21, 43, 28, 26, 20, 22, 0, 0, 1, + 18, 15, 19, 19, 41, 27, 83, 47, 16, 20, + 20, 6, 3, 13, 21, 25, 53, 29, 56, 32, + 20, 0, 4, 17, 33, 41, 57, 1, 60, 44, + 34, 30, 16, 4, 1, 9, 25, 9, 78, 60, + 42, 12, 22, 1, 19, 33, 16, 104, 90, 74, + 54, 46, 8, 7, 9, 19, 124, 79, 71, 49, + 69, 67, 45, 57, 49, 39, 51, 47, 45, 41, + 43, 45, 57, 41, 37, 27, 27, 23, 17, 13, + 11, 9, 3, 11, 13, 25, 4, 6, 6, 4, + 0, 6, 6, 8, 4, 2, 2, 6, 1, 9, + 17, 9, 2, 21, 18, 32, 18, 30, 20, 24, + 26, 20, 18, 8, 20, 6, 11, 11, 98, 100, + 98, 96, 94, 106, 100, 104, 104, 98, 112, 104, + 90, 74, 40, 122, 120, 114, 112, 90, 86, 60, + 50, 44, 36, 16, 4, 7, 25, 114, 116, 116, + 106, 96, 98, 90, 80, 70, 62, 52, 40, 22, + 6, 13, 6, 6, 17, 36, 34, 28, 16, 16, + 16, 2, 4, 3, 11, 37, 25, 43, 61, 10, + 15, 25, 31, 21, 5, 19, 4, 20, 11, 17, + 4, 12, 8, 12, 24, 8, 4, 110, 84, 56, + 32, 14, 15, 37, 67, 85, 3, 78, 58, 48, + 28, 32, 14, 6, 2, 21, 27, 17, 1, 15, + 8, 20, 5, 11, 6, 14, 12, 14, 26, 16, + 6, 110, 84, 56, 32, 14, 15, 37, 67, 85, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 18 */ + + 112, 14, 27, 112, 14, 27, 63, 29, 34, 22, + 10, 28, 50, 90, 114, 36, 43, 7, 1, 1, + 12, 6, 5, 3, 0, 40, 36, 5, 37, 69, + 25, 41, 15, 1, 1, 12, 23, 23, 22, 22, + 5, 25, 41, 4, 27, 39, 65, 12, 13, 29, + 8, 27, 29, 51, 1, 23, 15, 39, 5, 4, + 44, 0, 0, 0, 17, 53, 67, 1, 16, 3, + 68, 10, 37, 39, 13, 25, 24, 10, 23, 15, + 49, 17, 45, 35, 45, 33, 39, 45, 47, 48, + 3, 7, 47, 13, 53, 33, 57, 10, 0, 2, + 13, 28, 9, 5, 27, 49, 25, 29, 25, 7, + 19, 41, 29, 5, 51, 31, 48, 30, 13, 23, + 4, 17, 5, 0, 4, 20, 2, 9, 33, 33, + 15, 14, 16, 27, 18, 16, 10, 10, 12, 10, + 16, 10, 8, 12, 8, 4, 10, 12, 23, 5, + 10, 9, 24, 36, 42, 30, 12, 38, 24, 30, + 8, 4, 18, 16, 38, 51, 28, 48, 42, 74, + 96, 56, 64, 92, 92, 50, 76, 78, 86, 124, + 30, 60, 42, 54, 70, 58, 78, 88, 102, 38, + 40, 62, 52, 124, 5, 102, 110, 102, 98, 96, + 86, 80, 78, 70, 56, 48, 40, 28, 18, 19, + 23, 21, 45, 26, 24, 18, 20, 1, 1, 3, + 14, 17, 19, 21, 43, 29, 81, 45, 18, 20, + 20, 6, 1, 11, 19, 23, 49, 27, 56, 32, + 20, 2, 6, 15, 29, 37, 53, 0, 62, 46, + 36, 32, 18, 4, 1, 9, 23, 9, 80, 60, + 40, 12, 22, 1, 19, 33, 16, 104, 88, 72, + 52, 46, 8, 7, 9, 19, 124, 77, 69, 47, + 65, 63, 43, 53, 45, 37, 47, 43, 41, 39, + 41, 45, 55, 37, 37, 27, 27, 21, 17, 13, + 11, 9, 5, 11, 13, 25, 4, 6, 8, 4, + 0, 6, 6, 8, 4, 2, 2, 4, 1, 9, + 17, 9, 4, 23, 16, 32, 16, 28, 18, 22, + 26, 18, 16, 8, 20, 4, 11, 13, 96, 98, + 96, 92, 90, 102, 96, 100, 100, 92, 106, 98, + 84, 68, 36, 114, 112, 106, 102, 84, 80, 54, + 46, 38, 32, 12, 2, 9, 27, 110, 112, 110, + 102, 90, 92, 84, 74, 66, 56, 48, 34, 18, + 2, 17, 4, 4, 21, 34, 32, 24, 12, 12, + 12, 1, 2, 7, 13, 39, 27, 45, 63, 6, + 17, 27, 31, 21, 3, 19, 6, 22, 11, 15, + 6, 14, 8, 12, 26, 8, 4, 108, 80, 50, + 26, 8, 23, 45, 75, 91, 3, 78, 58, 48, + 28, 34, 14, 6, 2, 21, 27, 17, 1, 15, + 10, 22, 5, 11, 6, 14, 12, 14, 28, 16, + 6, 108, 80, 50, 26, 8, 23, 45, 75, 91, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 19 */ + + 110, 14, 27, 110, 14, 27, 59, 25, 36, 22, + 8, 24, 46, 88, 114, 36, 39, 7, 0, 0, + 12, 6, 5, 3, 1, 38, 32, 11, 41, 71, + 19, 39, 15, 0, 0, 12, 25, 21, 22, 22, + 5, 25, 41, 4, 27, 39, 65, 12, 13, 27, + 8, 27, 27, 51, 1, 23, 15, 39, 5, 4, + 44, 0, 0, 0, 17, 53, 67, 0, 14, 3, + 66, 8, 37, 33, 11, 23, 26, 14, 19, 11, + 45, 13, 43, 33, 41, 33, 39, 43, 47, 48, + 3, 5, 43, 13, 51, 33, 53, 10, 1, 0, + 13, 28, 9, 3, 27, 47, 23, 29, 23, 5, + 17, 37, 25, 5, 49, 29, 58, 40, 7, 17, + 4, 17, 5, 2, 4, 20, 4, 7, 31, 29, + 15, 10, 12, 21, 18, 16, 10, 10, 12, 12, + 18, 12, 8, 12, 8, 4, 10, 14, 25, 5, + 10, 11, 24, 34, 42, 30, 12, 38, 24, 30, + 6, 4, 16, 14, 38, 51, 28, 48, 42, 70, + 90, 54, 60, 88, 86, 46, 70, 74, 72, 124, + 24, 56, 38, 50, 64, 52, 72, 80, 94, 32, + 36, 54, 48, 124, 9, 98, 106, 98, 92, 90, + 82, 76, 74, 66, 52, 44, 36, 24, 16, 23, + 23, 21, 45, 24, 22, 16, 18, 3, 3, 5, + 10, 19, 21, 23, 45, 29, 81, 41, 20, 22, + 20, 6, 0, 11, 19, 23, 45, 25, 56, 32, + 22, 4, 8, 13, 27, 35, 49, 2, 62, 46, + 36, 32, 20, 6, 1, 9, 23, 9, 80, 60, + 38, 12, 22, 1, 19, 31, 16, 102, 88, 70, + 50, 46, 8, 7, 9, 19, 124, 75, 65, 45, + 63, 61, 41, 49, 41, 35, 45, 39, 37, 39, + 39, 43, 53, 33, 35, 27, 27, 21, 15, 13, + 11, 9, 5, 11, 13, 27, 6, 8, 10, 4, + 0, 8, 6, 8, 4, 2, 2, 4, 1, 9, + 17, 9, 4, 25, 14, 32, 14, 26, 16, 22, + 26, 16, 14, 8, 20, 2, 11, 15, 94, 96, + 94, 90, 86, 98, 92, 96, 94, 88, 100, 92, + 78, 64, 32, 106, 104, 98, 92, 76, 74, 50, + 42, 34, 28, 8, 1, 11, 29, 106, 106, 106, + 96, 84, 88, 80, 68, 60, 52, 42, 28, 14, + 1, 19, 2, 2, 23, 30, 28, 20, 8, 10, + 8, 5, 0, 9, 15, 41, 29, 47, 65, 4, + 19, 29, 31, 19, 3, 17, 8, 24, 11, 15, + 6, 14, 8, 14, 28, 8, 4, 106, 76, 46, + 20, 2, 31, 53, 83, 97, 3, 78, 58, 48, + 28, 34, 16, 6, 2, 19, 27, 17, 0, 13, + 12, 24, 5, 9, 6, 14, 12, 14, 28, 16, + 6, 106, 76, 46, 20, 2, 31, 53, 83, 97, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 20 */ + + 106, 14, 27, 106, 14, 27, 57, 23, 36, 22, + 8, 22, 42, 86, 114, 34, 35, 7, 0, 2, + 10, 4, 5, 5, 5, 36, 26, 19, 47, 75, + 13, 37, 15, 0, 2, 10, 25, 19, 22, 20, + 5, 23, 39, 4, 27, 39, 65, 12, 13, 27, + 8, 27, 27, 49, 1, 23, 15, 39, 3, 4, + 44, 0, 0, 0, 15, 55, 67, 0, 12, 3, + 64, 8, 37, 29, 9, 21, 30, 18, 17, 9, + 43, 11, 39, 31, 37, 33, 37, 43, 47, 48, + 3, 3, 39, 13, 51, 31, 49, 10, 1, 0, + 15, 28, 9, 3, 25, 45, 21, 27, 21, 5, + 17, 35, 23, 3, 47, 27, 68, 48, 1, 11, + 4, 15, 5, 2, 4, 20, 4, 7, 31, 25, + 15, 6, 8, 17, 18, 18, 10, 10, 12, 12, + 22, 14, 10, 10, 8, 4, 12, 14, 27, 5, + 10, 11, 22, 32, 40, 28, 10, 38, 24, 30, + 4, 4, 14, 12, 36, 51, 26, 46, 40, 66, + 82, 52, 56, 84, 82, 42, 66, 70, 60, 124, + 16, 52, 34, 44, 58, 48, 64, 70, 86, 26, + 32, 48, 44, 124, 11, 94, 102, 92, 88, 86, + 78, 72, 70, 62, 46, 40, 32, 20, 12, 27, + 23, 21, 47, 22, 20, 14, 14, 5, 7, 7, + 8, 21, 21, 25, 45, 31, 79, 39, 22, 22, + 20, 8, 0, 9, 17, 21, 41, 21, 56, 34, + 22, 4, 10, 11, 23, 31, 47, 2, 64, 48, + 38, 34, 22, 6, 1, 7, 21, 9, 82, 60, + 38, 10, 22, 1, 19, 31, 16, 102, 86, 68, + 48, 46, 8, 5, 9, 19, 124, 71, 63, 43, + 59, 57, 39, 47, 39, 33, 41, 37, 33, 37, + 37, 43, 51, 31, 35, 27, 27, 19, 15, 13, + 13, 9, 7, 11, 13, 27, 6, 8, 12, 4, + 0, 8, 6, 8, 4, 2, 2, 2, 1, 9, + 17, 9, 6, 27, 12, 32, 12, 26, 14, 20, + 26, 14, 12, 8, 20, 0, 11, 17, 92, 94, + 92, 86, 82, 94, 88, 90, 90, 82, 94, 86, + 72, 58, 28, 96, 96, 90, 82, 70, 66, 44, + 36, 28, 24, 4, 3, 13, 29, 100, 102, 100, + 92, 78, 82, 74, 62, 56, 46, 38, 22, 10, + 5, 23, 0, 2, 27, 28, 26, 18, 4, 6, + 4, 7, 1, 13, 17, 43, 33, 49, 65, 0, + 23, 33, 29, 19, 1, 17, 10, 26, 11, 13, + 8, 16, 8, 14, 30, 8, 4, 104, 72, 40, + 14, 5, 39, 63, 91, 103, 1, 78, 58, 48, + 28, 36, 16, 6, 2, 19, 27, 17, 0, 13, + 12, 26, 5, 9, 8, 16, 12, 14, 30, 16, + 6, 104, 72, 40, 14, 5, 39, 63, 91, 103, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 21 */ + + 104, 14, 27, 104, 14, 27, 53, 19, 36, 22, + 6, 20, 38, 84, 114, 34, 31, 7, 0, 4, + 10, 4, 5, 7, 9, 34, 22, 25, 51, 79, + 7, 35, 13, 0, 4, 10, 25, 17, 22, 20, + 5, 23, 37, 4, 27, 39, 65, 12, 13, 27, + 8, 27, 25, 49, 1, 23, 15, 39, 3, 4, + 44, 0, 0, 0, 15, 55, 67, 2, 10, 3, + 62, 6, 37, 23, 7, 19, 34, 22, 13, 5, + 39, 7, 37, 29, 31, 33, 37, 41, 45, 48, + 3, 1, 35, 13, 49, 31, 45, 10, 3, 1, + 15, 28, 9, 1, 25, 43, 19, 27, 19, 3, + 15, 31, 19, 1, 43, 25, 78, 56, 4, 5, + 6, 15, 3, 4, 4, 20, 6, 7, 29, 21, + 15, 2, 4, 11, 18, 18, 10, 10, 12, 14, + 24, 16, 10, 10, 8, 4, 12, 16, 29, 3, + 10, 13, 22, 30, 40, 28, 10, 38, 24, 30, + 2, 6, 12, 12, 36, 51, 26, 44, 38, 62, + 76, 50, 54, 80, 78, 38, 60, 66, 48, 124, + 10, 48, 30, 40, 52, 42, 58, 62, 78, 22, + 28, 40, 40, 124, 15, 90, 98, 88, 84, 82, + 74, 68, 66, 58, 42, 36, 28, 18, 8, 31, + 23, 21, 47, 20, 18, 12, 12, 7, 9, 9, + 4, 23, 23, 27, 47, 33, 77, 35, 24, 24, + 20, 8, 2, 9, 17, 19, 37, 19, 56, 34, + 24, 6, 12, 9, 21, 29, 43, 4, 66, 50, + 40, 34, 24, 8, 0, 7, 21, 9, 82, 60, + 36, 10, 22, 1, 19, 29, 16, 100, 84, 66, + 46, 46, 8, 5, 9, 17, 124, 69, 59, 39, + 55, 55, 37, 43, 35, 31, 37, 33, 29, 35, + 35, 41, 49, 27, 35, 27, 25, 19, 13, 13, + 13, 9, 7, 11, 13, 29, 8, 8, 14, 4, + 0, 8, 6, 8, 4, 2, 2, 2, 1, 9, + 17, 9, 6, 29, 10, 32, 10, 24, 14, 18, + 26, 14, 10, 8, 20, 1, 11, 19, 90, 92, + 90, 82, 78, 90, 84, 86, 84, 76, 88, 80, + 66, 54, 24, 88, 88, 82, 72, 64, 60, 40, + 32, 24, 20, 2, 7, 13, 31, 96, 96, 96, + 88, 72, 76, 70, 56, 52, 42, 34, 16, 6, + 9, 25, 0, 0, 31, 26, 22, 14, 0, 2, + 0, 11, 3, 15, 19, 43, 35, 51, 67, 1, + 25, 35, 29, 19, 0, 15, 12, 28, 9, 11, + 8, 16, 8, 16, 32, 8, 4, 102, 68, 34, + 8, 11, 47, 71, 99, 109, 1, 78, 58, 48, + 28, 36, 16, 8, 4, 17, 27, 17, 2, 13, + 14, 28, 5, 9, 8, 16, 12, 16, 30, 16, + 6, 102, 68, 34, 8, 11, 47, 71, 99, 109, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 22 */ + + 102, 14, 29, 102, 14, 29, 49, 17, 38, 22, + 6, 16, 34, 82, 114, 34, 29, 7, 2, 4, + 10, 2, 3, 7, 11, 30, 16, 33, 57, 81, + 3, 33, 13, 2, 4, 10, 27, 17, 22, 18, + 5, 23, 37, 4, 27, 37, 63, 12, 11, 25, + 8, 25, 25, 49, 1, 23, 15, 39, 3, 4, + 44, 0, 0, 0, 13, 55, 67, 2, 10, 5, + 62, 6, 37, 19, 5, 17, 36, 28, 11, 3, + 35, 5, 35, 27, 27, 33, 37, 41, 45, 48, + 3, 0, 29, 13, 47, 29, 41, 8, 3, 1, + 17, 30, 9, 1, 23, 43, 19, 25, 17, 3, + 13, 29, 17, 1, 41, 23, 88, 66, 10, 0, + 6, 15, 3, 4, 4, 20, 6, 5, 29, 17, + 13, 1, 0, 5, 18, 18, 10, 10, 14, 14, + 28, 18, 10, 10, 8, 4, 14, 18, 31, 3, + 10, 13, 20, 28, 38, 28, 8, 38, 24, 30, + 1, 6, 10, 10, 34, 51, 26, 44, 38, 58, + 70, 48, 50, 74, 72, 34, 56, 62, 34, 124, + 2, 44, 28, 36, 44, 38, 52, 54, 68, 16, + 24, 34, 36, 124, 19, 86, 94, 84, 78, 76, + 70, 62, 62, 54, 38, 30, 24, 14, 6, 33, + 23, 21, 49, 18, 16, 10, 10, 9, 11, 11, + 0, 25, 23, 29, 49, 33, 77, 33, 26, 24, + 20, 8, 4, 7, 15, 19, 33, 17, 58, 34, + 24, 8, 14, 7, 17, 25, 39, 6, 66, 50, + 40, 36, 26, 8, 0, 7, 19, 9, 84, 60, + 34, 10, 22, 1, 19, 29, 14, 100, 84, 64, + 44, 44, 8, 5, 9, 17, 124, 67, 57, 37, + 53, 51, 35, 39, 31, 29, 35, 29, 25, 35, + 33, 41, 47, 23, 33, 27, 25, 17, 13, 13, + 13, 9, 9, 11, 13, 29, 8, 10, 16, 4, + 0, 10, 8, 8, 6, 4, 2, 0, 1, 9, + 17, 9, 8, 31, 8, 32, 8, 22, 12, 18, + 26, 12, 10, 8, 18, 5, 13, 21, 86, 90, + 88, 80, 74, 86, 80, 82, 80, 72, 82, 76, + 60, 48, 20, 80, 80, 74, 64, 56, 54, 34, + 28, 18, 16, 1, 9, 15, 33, 92, 92, 90, + 82, 66, 72, 64, 50, 46, 36, 28, 10, 2, + 11, 29, 1, 1, 33, 22, 20, 10, 3, 0, + 3, 15, 5, 19, 21, 45, 37, 53, 69, 5, + 27, 37, 29, 17, 0, 15, 14, 30, 9, 11, + 10, 18, 8, 16, 34, 8, 4, 100, 64, 30, + 2, 17, 55, 79, 107, 115, 1, 78, 58, 48, + 28, 38, 18, 8, 4, 17, 25, 15, 2, 11, + 16, 30, 3, 7, 8, 16, 12, 16, 32, 16, + 6, 100, 64, 30, 2, 17, 55, 79, 107, 115, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 23 */ + + 100, 14, 29, 100, 14, 29, 45, 13, 38, 22, + 4, 14, 30, 80, 114, 34, 25, 7, 2, 6, + 8, 2, 3, 9, 15, 28, 12, 39, 61, 85, + 2, 31, 13, 2, 6, 8, 27, 15, 22, 18, + 5, 21, 35, 4, 27, 37, 63, 12, 11, 25, + 8, 25, 23, 47, 1, 23, 15, 39, 1, 4, + 44, 0, 0, 0, 13, 55, 67, 4, 8, 5, + 60, 4, 37, 13, 3, 15, 40, 32, 7, 0, + 33, 1, 31, 25, 23, 33, 35, 39, 45, 48, + 3, 2, 25, 13, 45, 29, 37, 8, 5, 3, + 17, 30, 9, 0, 23, 41, 17, 25, 15, 1, + 11, 25, 13, 0, 39, 21, 98, 74, 16, 6, + 6, 13, 3, 6, 4, 20, 8, 5, 27, 13, + 13, 5, 3, 0, 18, 20, 10, 10, 14, 16, + 30, 20, 12, 8, 8, 4, 14, 18, 33, 3, + 10, 15, 20, 26, 38, 26, 8, 38, 24, 30, + 3, 6, 8, 8, 34, 51, 26, 42, 36, 54, + 64, 46, 46, 70, 68, 30, 50, 58, 22, 124, + 3, 40, 24, 30, 38, 32, 46, 44, 60, 10, + 20, 26, 32, 124, 21, 82, 90, 80, 74, 72, + 66, 58, 58, 50, 32, 26, 20, 10, 2, 37, + 23, 21, 49, 16, 14, 8, 6, 11, 13, 13, + 1, 27, 25, 31, 49, 35, 75, 29, 28, 26, + 20, 10, 4, 7, 15, 17, 29, 13, 58, 36, + 26, 8, 16, 5, 15, 23, 35, 6, 68, 52, + 42, 36, 28, 10, 0, 5, 19, 9, 84, 60, + 34, 8, 22, 1, 19, 27, 14, 98, 82, 62, + 42, 44, 8, 3, 9, 17, 124, 63, 53, 35, + 49, 49, 33, 37, 29, 27, 31, 27, 21, 33, + 31, 39, 45, 19, 33, 27, 25, 17, 11, 13, + 15, 9, 9, 11, 13, 31, 10, 10, 18, 4, + 0, 10, 8, 8, 6, 4, 2, 0, 1, 9, + 17, 9, 8, 33, 6, 32, 6, 22, 10, 16, + 26, 10, 8, 8, 18, 7, 13, 23, 84, 88, + 86, 76, 70, 82, 76, 76, 74, 66, 76, 70, + 54, 44, 16, 70, 72, 66, 54, 50, 48, 30, + 24, 14, 12, 5, 13, 17, 33, 86, 86, 86, + 78, 60, 66, 60, 44, 42, 32, 24, 4, 1, + 15, 31, 3, 1, 37, 20, 16, 8, 7, 3, + 7, 17, 7, 21, 23, 47, 41, 55, 69, 7, + 31, 41, 27, 17, 2, 13, 16, 32, 9, 9, + 10, 18, 8, 18, 36, 8, 4, 98, 60, 24, + 3, 25, 63, 87, 115, 121, 0, 78, 58, 48, + 28, 38, 18, 8, 4, 15, 25, 15, 4, 11, + 18, 32, 3, 7, 10, 18, 12, 16, 32, 16, + 6, 98, 60, 24, 3, 25, 63, 87, 115, 121, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 24 */ + + 96, 12, 29, 96, 12, 29, 43, 11, 38, 22, + 4, 10, 24, 78, 114, 32, 21, 9, 2, 8, + 8, 0, 3, 11, 19, 26, 6, 47, 67, 89, + 8, 29, 13, 2, 8, 8, 29, 13, 22, 16, + 5, 21, 35, 2, 29, 37, 63, 12, 11, 25, + 8, 25, 23, 47, 1, 23, 15, 39, 1, 4, + 44, 0, 0, 0, 11, 57, 67, 4, 6, 5, + 58, 4, 39, 9, 1, 13, 42, 36, 5, 2, + 29, 0, 29, 23, 19, 33, 35, 39, 45, 48, + 3, 4, 21, 13, 45, 27, 33, 8, 5, 3, + 19, 30, 9, 0, 21, 39, 15, 23, 15, 1, + 11, 23, 11, 0, 37, 19, 106, 82, 22, 10, + 6, 13, 3, 6, 2, 20, 8, 5, 27, 9, + 13, 9, 7, 4, 18, 20, 10, 10, 14, 16, + 34, 20, 12, 8, 6, 4, 16, 20, 37, 3, + 10, 15, 18, 22, 36, 26, 6, 38, 24, 28, + 5, 6, 6, 6, 32, 51, 24, 40, 34, 50, + 56, 42, 42, 66, 62, 24, 46, 54, 8, 124, + 11, 36, 20, 26, 32, 28, 38, 36, 52, 4, + 16, 20, 28, 124, 25, 78, 84, 74, 68, 66, + 60, 54, 52, 46, 28, 22, 16, 6, 1, 41, + 23, 23, 51, 14, 10, 6, 4, 13, 17, 17, + 5, 29, 25, 33, 51, 37, 75, 27, 30, 26, + 20, 10, 6, 5, 13, 17, 25, 11, 58, 36, + 26, 10, 18, 3, 11, 19, 33, 8, 68, 52, + 42, 38, 30, 10, 0, 5, 17, 11, 86, 60, + 32, 8, 22, 1, 19, 27, 14, 98, 80, 60, + 40, 44, 8, 3, 9, 17, 124, 61, 51, 33, + 47, 45, 31, 33, 25, 25, 29, 23, 17, 33, + 31, 39, 43, 17, 33, 27, 25, 15, 11, 13, + 15, 9, 11, 11, 13, 31, 10, 10, 20, 4, + 1, 10, 8, 6, 6, 4, 2, 1, 1, 9, + 17, 9, 10, 35, 4, 32, 2, 20, 8, 14, + 24, 8, 6, 8, 18, 9, 13, 27, 82, 84, + 84, 72, 66, 78, 72, 72, 70, 60, 70, 64, + 48, 38, 12, 62, 64, 56, 44, 42, 40, 24, + 18, 8, 8, 9, 15, 19, 35, 82, 82, 80, + 72, 54, 60, 54, 38, 36, 26, 18, 1, 5, + 19, 35, 5, 3, 41, 16, 14, 4, 11, 7, + 11, 21, 11, 25, 27, 49, 43, 57, 71, 11, + 33, 43, 27, 17, 2, 13, 18, 34, 9, 9, + 12, 20, 8, 18, 36, 8, 2, 96, 56, 18, + 9, 31, 71, 97, 125, 125, 0, 78, 58, 48, + 28, 40, 18, 8, 4, 15, 25, 15, 4, 11, + 18, 32, 3, 7, 10, 18, 12, 16, 34, 16, + 4, 96, 56, 18, 9, 31, 71, 97, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 25 */ + + 94, 12, 29, 94, 12, 29, 39, 9, 40, 22, + 4, 8, 20, 76, 116, 32, 17, 9, 4, 10, + 8, 1, 3, 11, 21, 24, 2, 55, 73, 91, + 14, 27, 11, 4, 10, 8, 29, 11, 22, 16, + 3, 21, 33, 2, 29, 37, 63, 12, 11, 23, + 8, 25, 23, 47, 1, 23, 15, 37, 1, 4, + 44, 0, 0, 0, 9, 57, 67, 4, 4, 5, + 56, 4, 39, 5, 2, 11, 46, 40, 1, 4, + 25, 4, 27, 19, 13, 33, 35, 39, 43, 48, + 3, 6, 17, 11, 43, 25, 29, 8, 5, 3, + 19, 30, 9, 0, 19, 37, 13, 21, 13, 0, + 9, 21, 7, 2, 33, 17, 116, 92, 30, 16, + 8, 13, 1, 8, 2, 20, 8, 3, 25, 3, + 13, 13, 9, 10, 18, 20, 10, 10, 14, 18, + 38, 22, 12, 8, 6, 4, 18, 22, 39, 1, + 10, 15, 16, 20, 34, 26, 4, 38, 24, 28, + 7, 8, 4, 6, 30, 51, 24, 40, 34, 46, + 50, 40, 40, 62, 58, 20, 42, 50, 3, 124, + 17, 32, 16, 22, 26, 24, 32, 28, 44, 0, + 12, 14, 24, 124, 29, 74, 80, 70, 64, 62, + 56, 50, 48, 42, 24, 18, 14, 4, 3, 45, + 23, 23, 51, 14, 8, 4, 2, 15, 19, 19, + 9, 31, 25, 35, 53, 37, 73, 25, 32, 26, + 20, 10, 8, 3, 11, 15, 19, 9, 58, 36, + 26, 12, 20, 1, 7, 15, 29, 10, 70, 54, + 44, 40, 32, 10, 2, 5, 15, 11, 88, 60, + 30, 8, 24, 0, 19, 25, 14, 98, 80, 58, + 38, 44, 8, 3, 9, 15, 124, 59, 49, 29, + 43, 41, 29, 29, 21, 23, 25, 19, 13, 31, + 29, 37, 41, 13, 31, 25, 23, 13, 9, 13, + 15, 9, 13, 11, 13, 31, 10, 12, 22, 6, + 1, 12, 8, 6, 6, 4, 2, 3, 1, 7, + 15, 9, 12, 35, 2, 32, 0, 18, 8, 14, + 24, 8, 4, 8, 18, 11, 13, 29, 80, 82, + 84, 70, 62, 74, 68, 68, 66, 56, 64, 58, + 42, 32, 8, 54, 56, 48, 34, 36, 34, 18, + 14, 4, 6, 11, 17, 19, 37, 78, 78, 76, + 68, 50, 56, 50, 32, 32, 22, 14, 5, 9, + 23, 39, 5, 5, 43, 14, 12, 0, 15, 9, + 13, 25, 13, 29, 29, 49, 45, 57, 73, 15, + 35, 45, 27, 15, 4, 13, 20, 38, 7, 7, + 14, 22, 8, 20, 38, 8, 2, 94, 52, 14, + 15, 37, 79, 105, 125, 125, 0, 78, 58, 50, + 30, 42, 20, 10, 6, 15, 25, 15, 4, 9, + 20, 34, 3, 5, 10, 18, 12, 18, 36, 16, + 4, 94, 52, 14, 15, 37, 79, 105, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 26 */ + + 92, 12, 29, 92, 12, 29, 35, 5, 40, 22, + 2, 6, 16, 74, 116, 32, 13, 9, 4, 12, + 6, 1, 3, 13, 25, 22, 3, 61, 77, 95, + 20, 25, 11, 4, 12, 6, 29, 9, 22, 14, + 3, 19, 31, 2, 29, 37, 63, 12, 11, 23, + 8, 25, 21, 45, 1, 23, 15, 37, 0, 4, + 44, 0, 0, 0, 9, 57, 67, 6, 2, 5, + 54, 2, 39, 0, 4, 9, 50, 44, 0, 8, + 23, 6, 23, 17, 9, 33, 33, 37, 43, 48, + 3, 8, 13, 11, 41, 25, 25, 8, 7, 5, + 21, 30, 9, 2, 19, 35, 11, 21, 11, 0, + 7, 17, 5, 4, 31, 15, 124, 100, 36, 22, + 8, 11, 1, 8, 2, 20, 10, 3, 25, 0, + 13, 17, 13, 16, 18, 22, 10, 10, 14, 18, + 40, 24, 14, 6, 6, 4, 18, 22, 41, 1, + 10, 17, 16, 18, 34, 24, 4, 38, 24, 28, + 9, 8, 2, 4, 30, 51, 24, 38, 32, 42, + 44, 38, 36, 58, 54, 16, 36, 46, 15, 124, + 25, 28, 12, 16, 20, 18, 26, 18, 36, 5, + 8, 6, 20, 124, 31, 70, 76, 66, 60, 58, + 52, 46, 44, 38, 18, 14, 10, 0, 7, 49, + 23, 23, 53, 12, 6, 2, 1, 17, 21, 21, + 11, 33, 27, 37, 53, 39, 71, 21, 34, 28, + 20, 12, 8, 3, 11, 13, 15, 5, 58, 38, + 28, 12, 22, 0, 5, 13, 25, 10, 72, 56, + 46, 40, 34, 12, 2, 3, 15, 11, 88, 60, + 30, 6, 24, 0, 19, 25, 14, 96, 78, 56, + 36, 44, 8, 1, 9, 15, 124, 55, 45, 27, + 39, 39, 27, 27, 19, 21, 21, 17, 9, 29, + 27, 37, 39, 9, 31, 25, 23, 13, 9, 13, + 17, 9, 13, 11, 13, 33, 12, 12, 24, 6, + 1, 12, 8, 6, 6, 4, 2, 3, 1, 7, + 15, 9, 12, 37, 0, 32, 1, 18, 6, 12, + 24, 6, 2, 8, 18, 13, 13, 31, 78, 80, + 82, 66, 58, 70, 64, 62, 60, 50, 58, 52, + 36, 28, 4, 44, 48, 40, 24, 30, 28, 14, + 10, 1, 2, 15, 21, 21, 37, 72, 72, 70, + 64, 44, 50, 44, 26, 28, 16, 10, 11, 13, + 27, 41, 7, 5, 47, 12, 8, 1, 19, 13, + 17, 27, 15, 31, 31, 51, 49, 59, 73, 17, + 39, 49, 25, 15, 6, 11, 22, 40, 7, 5, + 14, 22, 8, 20, 40, 8, 2, 92, 48, 8, + 21, 45, 87, 113, 125, 125, 2, 78, 58, 50, + 30, 42, 20, 10, 6, 13, 25, 15, 6, 9, + 22, 36, 3, 5, 12, 20, 12, 18, 36, 16, + 4, 92, 48, 8, 21, 45, 87, 113, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 27 */ + + 90, 12, 31, 90, 12, 31, 31, 3, 42, 22, + 2, 2, 12, 72, 116, 32, 11, 9, 6, 12, + 6, 3, 1, 13, 27, 18, 7, 69, 83, 97, + 24, 23, 11, 6, 12, 6, 31, 9, 22, 14, + 3, 19, 31, 2, 29, 35, 61, 12, 9, 21, + 8, 23, 21, 45, 1, 23, 15, 37, 0, 4, + 44, 0, 0, 0, 7, 57, 67, 6, 2, 7, + 54, 2, 39, 4, 6, 7, 52, 50, 4, 10, + 19, 10, 21, 15, 5, 33, 33, 37, 43, 48, + 3, 10, 7, 11, 39, 23, 21, 6, 7, 5, + 21, 32, 9, 2, 17, 35, 11, 19, 9, 2, + 5, 15, 1, 4, 29, 13, 124, 110, 42, 28, + 8, 11, 1, 10, 2, 20, 10, 1, 23, 4, + 11, 21, 17, 22, 18, 22, 10, 10, 16, 20, + 44, 26, 14, 6, 6, 4, 20, 24, 43, 1, + 10, 17, 14, 16, 32, 24, 2, 38, 24, 28, + 13, 8, 0, 2, 28, 51, 24, 38, 32, 38, + 38, 36, 32, 52, 48, 12, 32, 42, 29, 124, + 31, 24, 10, 12, 12, 14, 20, 10, 26, 11, + 4, 0, 16, 124, 35, 66, 72, 62, 54, 52, + 48, 40, 40, 34, 14, 8, 6, 3, 9, 51, + 23, 23, 53, 10, 4, 0, 3, 19, 23, 23, + 15, 35, 27, 39, 55, 39, 71, 19, 36, 28, + 20, 12, 10, 1, 9, 13, 11, 3, 60, 38, + 28, 14, 24, 2, 1, 9, 21, 12, 72, 56, + 46, 42, 36, 12, 2, 3, 13, 11, 90, 60, + 28, 6, 24, 0, 19, 23, 12, 96, 78, 54, + 34, 42, 8, 1, 9, 15, 124, 53, 43, 25, + 37, 35, 25, 23, 15, 19, 19, 13, 5, 29, + 25, 35, 37, 5, 29, 25, 23, 11, 7, 13, + 17, 9, 15, 11, 13, 33, 12, 14, 26, 6, + 1, 14, 10, 6, 8, 6, 2, 5, 1, 7, + 15, 9, 14, 39, 1, 32, 3, 16, 4, 12, + 24, 4, 2, 8, 16, 17, 15, 33, 74, 78, + 80, 64, 54, 66, 60, 58, 56, 46, 52, 48, + 30, 22, 0, 36, 40, 32, 16, 22, 22, 8, + 6, 5, 1, 19, 23, 23, 39, 68, 68, 66, + 58, 38, 46, 40, 20, 22, 12, 4, 17, 17, + 29, 45, 9, 7, 49, 8, 6, 5, 23, 15, + 21, 31, 17, 35, 33, 53, 51, 61, 75, 21, + 41, 51, 25, 13, 6, 11, 24, 42, 7, 5, + 16, 24, 8, 22, 42, 8, 2, 90, 44, 4, + 27, 51, 95, 121, 125, 125, 2, 78, 58, 50, + 30, 44, 22, 10, 6, 13, 23, 13, 6, 7, + 24, 38, 1, 3, 12, 20, 12, 18, 38, 16, + 4, 90, 44, 4, 27, 51, 95, 121, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 28 */ + + 86, 12, 31, 86, 12, 31, 29, 0, 42, 22, + 0, 0, 8, 70, 116, 30, 7, 9, 6, 14, + 6, 3, 1, 15, 31, 16, 13, 75, 87, 101, + 30, 21, 11, 6, 14, 6, 31, 7, 22, 12, + 3, 19, 29, 2, 29, 35, 61, 12, 9, 21, + 8, 23, 19, 45, 1, 23, 15, 37, 0, 4, + 44, 0, 0, 0, 7, 59, 67, 8, 0, 7, + 52, 0, 39, 10, 8, 5, 56, 54, 6, 14, + 15, 12, 19, 13, 1, 33, 33, 35, 43, 48, + 3, 12, 3, 11, 39, 23, 17, 6, 9, 7, + 23, 32, 9, 4, 17, 33, 9, 19, 7, 2, + 5, 11, 0, 6, 27, 11, 124, 118, 48, 34, + 8, 11, 1, 10, 2, 20, 12, 1, 23, 8, + 11, 25, 21, 26, 18, 22, 10, 10, 16, 20, + 46, 28, 14, 6, 6, 4, 20, 26, 45, 1, + 10, 19, 14, 14, 32, 24, 2, 38, 24, 28, + 15, 8, 1, 0, 28, 51, 22, 36, 30, 34, + 30, 34, 28, 48, 44, 8, 26, 38, 41, 124, + 39, 20, 6, 8, 6, 8, 12, 2, 18, 17, + 0, 7, 12, 124, 39, 62, 68, 56, 50, 48, + 44, 36, 36, 30, 10, 4, 2, 7, 13, 55, + 23, 23, 55, 8, 2, 1, 5, 21, 27, 25, + 19, 37, 29, 41, 57, 41, 69, 15, 38, 30, + 20, 12, 12, 1, 9, 11, 7, 1, 60, 38, + 30, 16, 26, 4, 0, 7, 19, 14, 74, 58, + 48, 42, 38, 14, 2, 3, 13, 11, 90, 60, + 26, 6, 24, 0, 19, 23, 12, 94, 76, 52, + 32, 42, 8, 1, 9, 15, 124, 51, 39, 23, + 33, 33, 23, 19, 11, 17, 15, 9, 1, 27, + 23, 35, 35, 3, 29, 25, 23, 11, 7, 13, + 17, 9, 15, 11, 13, 35, 14, 14, 28, 6, + 1, 14, 10, 6, 8, 6, 2, 5, 1, 7, + 15, 9, 14, 41, 3, 32, 5, 14, 2, 10, + 24, 2, 0, 8, 16, 19, 15, 35, 72, 76, + 78, 60, 50, 62, 56, 54, 50, 40, 46, 42, + 24, 18, 3, 28, 32, 24, 6, 16, 14, 4, + 0, 11, 5, 23, 27, 25, 41, 64, 62, 60, + 54, 32, 40, 34, 14, 18, 6, 0, 23, 21, + 33, 47, 11, 9, 53, 6, 2, 9, 27, 19, + 25, 35, 19, 37, 35, 55, 53, 63, 77, 23, + 43, 53, 25, 13, 8, 9, 26, 44, 7, 3, + 16, 24, 8, 22, 44, 8, 2, 88, 40, 1, + 33, 57, 103, 125, 125, 125, 2, 78, 58, 50, + 30, 44, 22, 10, 6, 11, 23, 13, 8, 7, + 24, 40, 1, 3, 12, 20, 12, 18, 38, 16, + 4, 88, 40, 1, 33, 57, 103, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 29 */ + + 84, 12, 31, 84, 12, 31, 25, 2, 42, 22, + 0, 1, 4, 68, 116, 30, 3, 9, 6, 16, + 4, 5, 1, 17, 35, 14, 17, 83, 93, 105, + 36, 19, 9, 6, 16, 4, 31, 5, 22, 12, + 3, 17, 27, 2, 29, 35, 61, 12, 9, 21, + 8, 23, 19, 43, 1, 23, 15, 37, 2, 4, + 44, 0, 0, 0, 5, 59, 67, 8, 1, 7, + 50, 0, 39, 14, 10, 3, 60, 58, 10, 16, + 13, 16, 15, 11, 4, 33, 31, 35, 41, 48, + 3, 14, 0, 11, 37, 21, 13, 6, 9, 7, + 23, 32, 9, 4, 15, 31, 7, 17, 5, 4, + 3, 9, 4, 8, 23, 9, 124, 124, 54, 40, + 10, 9, 0, 12, 2, 20, 12, 1, 21, 12, + 11, 29, 25, 32, 18, 24, 10, 10, 16, 22, + 50, 30, 16, 4, 6, 4, 22, 26, 47, 0, + 10, 19, 12, 12, 30, 22, 0, 38, 24, 28, + 17, 10, 3, 0, 26, 51, 22, 34, 28, 30, + 24, 32, 26, 44, 40, 4, 22, 34, 53, 124, + 45, 16, 2, 2, 0, 4, 6, 7, 10, 21, + 3, 13, 8, 124, 41, 58, 64, 52, 46, 44, + 40, 32, 32, 26, 4, 0, 1, 9, 17, 59, + 23, 23, 55, 6, 0, 3, 9, 23, 29, 27, + 21, 39, 29, 43, 57, 43, 67, 13, 40, 30, + 20, 14, 12, 0, 7, 9, 3, 2, 60, 40, + 30, 16, 28, 6, 4, 3, 15, 14, 76, 60, + 50, 44, 40, 14, 4, 1, 11, 11, 92, 60, + 26, 4, 24, 0, 19, 21, 12, 94, 74, 50, + 30, 42, 8, 0, 9, 13, 124, 47, 37, 19, + 29, 29, 21, 17, 9, 15, 11, 7, 2, 25, + 21, 33, 33, 0, 29, 25, 21, 9, 5, 13, + 19, 9, 17, 11, 13, 35, 14, 14, 30, 6, + 1, 14, 10, 6, 8, 6, 2, 7, 1, 7, + 15, 9, 16, 43, 5, 32, 7, 14, 2, 8, + 24, 2, 1, 8, 16, 21, 15, 37, 70, 74, + 76, 56, 46, 58, 52, 48, 46, 34, 40, 36, + 18, 12, 7, 18, 24, 16, 3, 10, 8, 1, + 3, 15, 9, 25, 29, 25, 41, 58, 58, 56, + 50, 26, 34, 30, 8, 14, 2, 3, 29, 25, + 37, 51, 11, 9, 57, 4, 0, 11, 31, 23, + 29, 37, 21, 41, 37, 55, 57, 65, 77, 27, + 47, 57, 23, 13, 10, 9, 28, 46, 5, 1, + 18, 26, 8, 24, 46, 8, 2, 86, 36, 7, + 39, 65, 111, 125, 125, 125, 4, 78, 58, 50, + 30, 46, 22, 12, 8, 11, 23, 13, 8, 7, + 26, 42, 1, 3, 14, 22, 12, 20, 40, 16, + 4, 86, 36, 7, 39, 65, 111, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 30 */ + + 82, 12, 31, 82, 12, 31, 21, 6, 44, 22, + 1, 5, 0, 66, 116, 30, 0, 9, 8, 18, + 4, 5, 1, 17, 37, 12, 23, 89, 97, 107, + 42, 17, 9, 8, 18, 4, 33, 3, 22, 10, + 3, 17, 27, 2, 29, 35, 61, 12, 9, 19, + 8, 23, 17, 43, 1, 23, 15, 37, 2, 4, + 44, 0, 0, 0, 5, 59, 67, 10, 3, 7, + 48, 1, 39, 20, 12, 1, 62, 62, 12, 20, + 9, 18, 13, 9, 8, 33, 31, 33, 41, 48, + 3, 16, 4, 11, 35, 21, 9, 6, 11, 9, + 25, 32, 9, 6, 15, 29, 5, 17, 3, 4, + 1, 5, 6, 8, 21, 7, 124, 124, 60, 46, + 10, 9, 0, 12, 2, 20, 14, 0, 21, 16, + 11, 33, 29, 38, 18, 24, 10, 10, 16, 22, + 52, 32, 16, 4, 6, 4, 22, 28, 49, 0, + 10, 21, 12, 10, 30, 22, 0, 38, 24, 28, + 19, 10, 5, 1, 26, 51, 22, 34, 28, 26, + 18, 30, 22, 40, 34, 0, 16, 30, 67, 124, + 53, 12, 1, 1, 5, 1, 0, 15, 2, 27, + 7, 21, 4, 124, 45, 54, 60, 48, 40, 38, + 36, 28, 28, 22, 0, 3, 5, 13, 19, 63, + 23, 23, 57, 4, 1, 5, 11, 25, 31, 29, + 25, 41, 31, 45, 59, 43, 67, 9, 42, 32, + 20, 14, 14, 0, 7, 9, 0, 4, 60, 40, + 32, 18, 30, 8, 6, 1, 11, 16, 76, 60, + 50, 44, 42, 16, 4, 1, 11, 11, 92, 60, + 24, 4, 24, 0, 19, 21, 12, 92, 74, 48, + 28, 42, 8, 0, 9, 13, 124, 45, 33, 17, + 27, 27, 19, 13, 5, 13, 9, 3, 6, 25, + 19, 33, 31, 4, 27, 25, 21, 9, 5, 13, + 19, 9, 17, 11, 13, 37, 16, 16, 32, 6, + 1, 16, 10, 6, 8, 6, 2, 7, 1, 7, + 15, 9, 16, 45, 7, 32, 9, 12, 0, 8, + 24, 0, 3, 8, 16, 23, 15, 39, 68, 72, + 74, 54, 42, 54, 48, 44, 40, 30, 34, 30, + 12, 8, 11, 10, 16, 8, 13, 2, 2, 5, + 7, 21, 13, 29, 33, 27, 43, 54, 52, 50, + 44, 20, 30, 24, 2, 8, 3, 9, 35, 29, + 41, 53, 13, 11, 59, 0, 3, 15, 35, 25, + 33, 41, 23, 43, 39, 57, 59, 67, 79, 29, + 49, 59, 23, 11, 10, 7, 30, 48, 5, 1, + 18, 26, 8, 24, 48, 8, 2, 84, 32, 11, + 45, 71, 119, 125, 125, 125, 4, 78, 58, 50, + 30, 46, 24, 12, 8, 9, 23, 13, 10, 5, + 28, 44, 1, 1, 14, 22, 12, 20, 40, 16, + 4, 84, 32, 11, 45, 71, 119, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 31 */ + + 80, 12, 31, 80, 12, 31, 17, 8, 44, 22, + 1, 7, 3, 64, 116, 30, 4, 9, 8, 20, + 4, 7, 1, 19, 41, 10, 27, 97, 103, 111, + 48, 15, 9, 8, 20, 4, 33, 1, 22, 10, + 3, 17, 25, 2, 29, 35, 61, 12, 9, 19, + 8, 23, 17, 43, 1, 23, 15, 37, 2, 4, + 44, 0, 0, 0, 3, 59, 67, 10, 5, 7, + 46, 1, 39, 24, 14, 0, 66, 66, 16, 22, + 5, 22, 11, 7, 12, 33, 31, 33, 41, 48, + 3, 18, 8, 11, 33, 19, 5, 6, 11, 9, + 25, 32, 9, 6, 13, 27, 3, 15, 1, 6, + 0, 3, 10, 10, 19, 5, 124, 124, 66, 52, + 10, 9, 0, 14, 2, 20, 14, 0, 19, 20, + 11, 37, 33, 44, 18, 24, 10, 10, 16, 24, + 56, 34, 16, 4, 6, 4, 24, 30, 51, 0, + 10, 21, 10, 8, 28, 22, 1, 38, 24, 28, + 21, 10, 7, 3, 24, 51, 22, 32, 26, 22, + 12, 28, 18, 36, 30, 3, 12, 26, 79, 124, + 59, 8, 5, 5, 11, 5, 5, 23, 5, 33, + 11, 27, 0, 124, 49, 50, 56, 44, 36, 34, + 32, 24, 24, 18, 3, 7, 9, 17, 23, 67, + 23, 23, 57, 2, 3, 7, 13, 27, 33, 31, + 29, 43, 31, 47, 61, 45, 65, 7, 44, 32, + 20, 14, 16, 2, 5, 7, 4, 6, 60, 40, + 32, 20, 32, 10, 10, 2, 7, 18, 78, 62, + 52, 46, 44, 16, 4, 1, 9, 11, 94, 60, + 22, 4, 24, 0, 19, 19, 12, 92, 72, 46, + 26, 42, 8, 0, 9, 13, 124, 43, 31, 15, + 23, 23, 17, 9, 1, 11, 5, 0, 10, 23, + 17, 31, 29, 8, 27, 25, 21, 7, 3, 13, + 19, 9, 19, 11, 13, 37, 16, 16, 34, 6, + 1, 16, 10, 6, 8, 6, 2, 9, 1, 7, + 15, 9, 18, 47, 9, 32, 11, 10, 1, 6, + 24, 1, 5, 8, 16, 25, 15, 41, 66, 70, + 72, 50, 38, 50, 44, 40, 36, 24, 28, 24, + 6, 2, 15, 2, 8, 0, 23, 3, 3, 11, + 11, 25, 17, 33, 35, 29, 45, 50, 48, 46, + 40, 14, 24, 20, 3, 4, 7, 13, 41, 33, + 45, 57, 15, 13, 63, 1, 5, 19, 39, 29, + 37, 45, 25, 47, 41, 59, 61, 69, 81, 33, + 51, 61, 23, 11, 12, 7, 32, 50, 5, 0, + 20, 28, 8, 26, 50, 8, 2, 82, 28, 17, + 51, 77, 125, 125, 125, 125, 4, 78, 58, 50, + 30, 48, 24, 12, 8, 9, 23, 13, 10, 5, + 30, 46, 1, 1, 14, 22, 12, 20, 42, 16, + 4, 82, 28, 17, 51, 77, 125, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 32 */ + + 76, 10, 33, 76, 10, 33, 15, 10, 44, 22, + 3, 11, 9, 62, 116, 28, 6, 11, 8, 20, + 2, 9, 1, 21, 45, 6, 33, 105, 109, 115, + 52, 15, 9, 8, 20, 2, 35, 1, 22, 8, + 3, 17, 25, 0, 31, 35, 61, 10, 9, 19, + 8, 23, 17, 43, 1, 23, 17, 37, 2, 4, + 44, 0, 0, 0, 3, 61, 67, 10, 7, 9, + 44, 3, 41, 28, 16, 2, 68, 70, 18, 24, + 3, 24, 9, 5, 16, 33, 31, 33, 41, 48, + 3, 18, 12, 11, 33, 19, 3, 4, 13, 11, + 27, 32, 9, 6, 13, 27, 3, 15, 1, 6, + 0, 1, 12, 10, 17, 3, 124, 124, 72, 56, + 10, 9, 0, 14, 0, 20, 14, 0, 19, 24, + 11, 41, 37, 48, 18, 24, 10, 10, 16, 24, + 58, 34, 16, 2, 4, 4, 24, 30, 55, 0, + 10, 23, 8, 4, 26, 20, 3, 38, 24, 26, + 25, 10, 9, 5, 22, 51, 20, 30, 24, 16, + 4, 24, 14, 30, 24, 9, 6, 22, 93, 124, + 67, 2, 9, 11, 19, 11, 13, 33, 15, 39, + 15, 35, 3, 124, 53, 44, 50, 38, 30, 28, + 26, 18, 18, 14, 9, 13, 13, 21, 27, 71, + 23, 25, 59, 0, 7, 11, 17, 31, 37, 35, + 33, 45, 33, 49, 63, 47, 65, 5, 44, 32, + 20, 14, 16, 2, 5, 7, 8, 8, 60, 40, + 32, 20, 32, 12, 12, 4, 5, 18, 78, 62, + 52, 46, 46, 16, 4, 1, 9, 13, 94, 58, + 20, 2, 24, 0, 19, 19, 10, 90, 70, 42, + 24, 40, 8, 0, 9, 13, 124, 41, 29, 13, + 21, 21, 15, 7, 0, 9, 3, 2, 14, 23, + 17, 31, 27, 10, 27, 25, 21, 7, 3, 13, + 21, 11, 21, 11, 15, 39, 16, 16, 36, 6, + 3, 16, 10, 4, 8, 6, 2, 11, 1, 7, + 15, 9, 18, 49, 11, 32, 15, 8, 3, 4, + 22, 3, 7, 8, 14, 29, 17, 45, 62, 66, + 70, 46, 34, 44, 38, 34, 30, 18, 22, 18, + 1, 3, 19, 7, 0, 9, 33, 11, 11, 17, + 17, 31, 21, 37, 39, 31, 47, 44, 42, 40, + 34, 8, 18, 14, 11, 1, 13, 19, 47, 37, + 49, 61, 17, 15, 67, 5, 9, 23, 45, 33, + 41, 49, 29, 51, 45, 61, 65, 71, 83, 37, + 55, 65, 23, 11, 12, 7, 34, 52, 5, 0, + 20, 28, 8, 26, 50, 8, 0, 78, 24, 23, + 59, 85, 125, 125, 125, 125, 4, 78, 58, 50, + 30, 48, 24, 12, 8, 9, 23, 13, 10, 5, + 30, 46, 1, 1, 14, 22, 12, 20, 42, 14, + 2, 78, 24, 23, 59, 85, 125, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 33 */ + + 74, 10, 33, 74, 10, 33, 11, 14, 46, 24, + 3, 13, 13, 60, 118, 28, 10, 11, 10, 22, + 2, 9, 0, 21, 47, 4, 37, 111, 113, 117, + 58, 13, 7, 10, 22, 2, 35, 0, 22, 8, + 1, 15, 23, 0, 31, 33, 59, 10, 7, 17, + 8, 21, 15, 41, 0, 21, 17, 35, 4, 4, + 44, 0, 0, 0, 1, 61, 67, 12, 7, 9, + 44, 3, 41, 34, 20, 6, 72, 76, 22, 28, + 0, 28, 5, 1, 22, 33, 29, 31, 39, 48, + 3, 20, 18, 9, 31, 17, 0, 4, 13, 11, + 27, 34, 9, 8, 11, 25, 1, 13, 0, 8, + 2, 2, 16, 12, 13, 0, 124, 124, 80, 62, + 12, 7, 2, 16, 0, 22, 16, 2, 17, 30, + 9, 45, 39, 54, 18, 26, 10, 12, 18, 26, + 62, 36, 18, 2, 4, 4, 26, 32, 57, 2, + 10, 23, 8, 2, 26, 20, 3, 38, 24, 26, + 27, 12, 9, 5, 22, 51, 20, 30, 24, 12, + 1, 22, 12, 26, 20, 13, 2, 18, 105, 124, + 73, 1, 11, 15, 25, 15, 19, 41, 23, 43, + 19, 41, 7, 124, 55, 40, 46, 34, 26, 24, + 22, 14, 14, 10, 13, 17, 15, 23, 29, 73, + 23, 25, 59, 0, 9, 13, 19, 33, 39, 37, + 35, 47, 33, 51, 63, 47, 63, 1, 46, 34, + 20, 16, 18, 4, 3, 5, 14, 12, 62, 42, + 34, 22, 34, 16, 16, 8, 1, 20, 80, 64, + 54, 48, 48, 18, 6, 0, 7, 13, 96, 58, + 20, 2, 26, 2, 19, 17, 10, 90, 70, 40, + 22, 40, 10, 2, 7, 11, 124, 37, 25, 9, + 17, 17, 11, 3, 4, 5, 0, 6, 20, 21, + 15, 29, 23, 14, 25, 23, 19, 5, 1, 11, + 21, 11, 21, 11, 15, 39, 18, 18, 38, 8, + 3, 18, 12, 4, 10, 8, 2, 11, 0, 5, + 13, 7, 20, 49, 13, 32, 17, 8, 3, 4, + 22, 3, 7, 8, 14, 31, 17, 47, 60, 64, + 70, 44, 32, 40, 34, 30, 26, 14, 18, 14, + 7, 7, 23, 15, 5, 17, 41, 17, 17, 21, + 21, 35, 23, 39, 41, 31, 47, 40, 38, 36, + 30, 4, 14, 10, 17, 5, 17, 23, 51, 39, + 51, 63, 17, 15, 69, 7, 11, 25, 49, 35, + 43, 51, 31, 53, 47, 61, 67, 71, 83, 39, + 57, 67, 21, 9, 14, 5, 38, 56, 3, 2, + 22, 30, 10, 28, 52, 8, 0, 76, 20, 27, + 65, 91, 125, 125, 125, 125, 6, 78, 60, 52, + 32, 50, 26, 14, 10, 7, 21, 11, 12, 3, + 32, 48, 0, 0, 16, 24, 12, 22, 44, 14, + 2, 76, 20, 27, 65, 91, 125, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 34 */ + + 72, 10, 33, 72, 10, 33, 7, 16, 46, 24, + 3, 15, 17, 58, 118, 28, 14, 11, 10, 24, + 2, 11, 0, 23, 51, 2, 43, 119, 119, 121, + 64, 11, 7, 10, 24, 2, 35, 2, 22, 6, + 1, 15, 21, 0, 31, 33, 59, 10, 7, 17, + 8, 21, 15, 41, 0, 21, 17, 35, 4, 4, + 44, 0, 0, 0, 0, 61, 67, 12, 9, 9, + 42, 3, 41, 38, 22, 8, 76, 80, 24, 30, + 4, 30, 3, 0, 26, 33, 29, 31, 39, 48, + 3, 22, 22, 9, 29, 15, 4, 4, 13, 11, + 29, 34, 9, 8, 9, 23, 0, 11, 2, 8, + 4, 4, 18, 14, 11, 2, 124, 124, 86, 68, + 12, 7, 2, 16, 0, 22, 16, 2, 17, 34, + 9, 49, 43, 60, 18, 26, 10, 12, 18, 26, + 66, 38, 18, 2, 4, 4, 28, 34, 59, 2, + 10, 23, 6, 0, 24, 20, 5, 38, 24, 26, + 29, 12, 11, 7, 20, 51, 20, 28, 22, 8, + 7, 20, 8, 22, 16, 17, 1, 14, 117, 124, + 81, 5, 15, 19, 31, 19, 25, 49, 31, 49, + 23, 47, 11, 124, 59, 36, 42, 30, 22, 20, + 18, 10, 10, 6, 17, 21, 19, 27, 33, 77, + 23, 25, 61, 1, 11, 15, 21, 35, 41, 39, + 39, 49, 33, 53, 65, 49, 61, 0, 48, 34, + 20, 16, 20, 6, 1, 3, 18, 14, 62, 42, + 34, 24, 36, 18, 20, 12, 2, 22, 82, 66, + 56, 50, 50, 18, 6, 0, 5, 13, 98, 58, + 18, 2, 26, 2, 19, 17, 10, 90, 68, 38, + 20, 40, 10, 2, 7, 11, 124, 35, 23, 7, + 13, 13, 9, 0, 8, 3, 4, 10, 24, 19, + 13, 29, 21, 18, 25, 23, 19, 3, 1, 11, + 21, 11, 23, 11, 15, 39, 18, 18, 40, 8, + 3, 18, 12, 4, 10, 8, 2, 13, 0, 5, + 13, 7, 22, 51, 15, 32, 19, 6, 5, 2, + 22, 5, 9, 8, 14, 33, 17, 49, 58, 62, + 68, 40, 28, 36, 30, 26, 22, 8, 12, 8, + 13, 13, 27, 23, 13, 25, 51, 23, 23, 27, + 25, 41, 27, 43, 43, 33, 49, 36, 34, 30, + 26, 1, 8, 4, 23, 9, 23, 27, 57, 43, + 55, 67, 19, 17, 73, 9, 13, 29, 53, 39, + 47, 55, 33, 57, 49, 63, 69, 73, 85, 43, + 59, 69, 21, 9, 16, 5, 40, 58, 3, 4, + 24, 32, 10, 28, 54, 8, 0, 74, 16, 33, + 71, 97, 125, 125, 125, 125, 6, 78, 60, 52, + 32, 52, 26, 14, 10, 7, 21, 11, 12, 3, + 34, 50, 0, 0, 16, 24, 12, 22, 46, 14, + 2, 74, 16, 33, 71, 97, 125, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 35 */ + + 70, 10, 33, 70, 10, 33, 3, 20, 48, 24, + 5, 19, 21, 56, 118, 28, 18, 11, 12, 26, + 2, 11, 0, 23, 53, 0, 47, 125, 123, 123, + 70, 9, 7, 12, 26, 2, 37, 4, 22, 6, + 1, 15, 21, 0, 31, 33, 59, 10, 7, 15, + 8, 21, 13, 41, 0, 21, 17, 35, 4, 4, + 44, 0, 0, 0, 0, 61, 67, 14, 11, 9, + 40, 5, 41, 44, 24, 10, 78, 84, 28, 34, + 8, 34, 1, 2, 30, 33, 29, 29, 39, 48, + 3, 24, 26, 9, 27, 15, 8, 4, 15, 13, + 29, 34, 9, 10, 9, 21, 2, 11, 4, 10, + 6, 8, 22, 14, 9, 4, 124, 124, 92, 74, + 12, 7, 2, 18, 0, 22, 18, 4, 15, 38, + 9, 53, 47, 66, 18, 26, 10, 12, 18, 28, + 68, 40, 18, 2, 4, 4, 28, 36, 61, 2, + 10, 25, 6, 1, 24, 20, 5, 38, 24, 26, + 31, 12, 13, 9, 20, 51, 20, 28, 22, 4, + 13, 18, 4, 18, 10, 21, 7, 10, 125, 124, + 87, 9, 19, 23, 37, 25, 31, 57, 39, 55, + 27, 55, 15, 124, 63, 32, 38, 26, 16, 14, + 14, 6, 6, 2, 21, 25, 23, 31, 35, 81, + 23, 25, 61, 3, 13, 17, 23, 37, 43, 41, + 43, 51, 35, 55, 67, 49, 61, 4, 50, 36, + 20, 16, 22, 6, 1, 3, 22, 16, 62, 42, + 36, 26, 38, 20, 22, 14, 6, 24, 82, 66, + 56, 50, 52, 20, 6, 0, 5, 13, 98, 58, + 16, 2, 26, 2, 19, 15, 10, 88, 68, 36, + 18, 40, 10, 2, 7, 11, 124, 33, 19, 5, + 11, 11, 7, 4, 12, 1, 6, 14, 28, 19, + 11, 27, 19, 22, 23, 23, 19, 3, 0, 11, + 21, 11, 23, 11, 15, 41, 20, 20, 42, 8, + 3, 20, 12, 4, 10, 8, 2, 13, 0, 5, + 13, 7, 22, 53, 17, 32, 21, 4, 7, 2, + 22, 7, 11, 8, 14, 35, 17, 51, 56, 60, + 66, 38, 24, 32, 26, 22, 16, 4, 6, 2, + 19, 17, 31, 31, 21, 33, 61, 31, 29, 31, + 29, 45, 31, 47, 47, 35, 51, 32, 28, 26, + 20, 7, 4, 0, 29, 15, 27, 33, 63, 47, + 59, 69, 21, 19, 75, 13, 17, 33, 57, 41, + 51, 59, 35, 59, 51, 65, 71, 75, 87, 45, + 61, 71, 21, 7, 16, 3, 42, 60, 3, 4, + 24, 32, 10, 30, 56, 8, 0, 72, 12, 37, + 77, 103, 125, 125, 125, 125, 6, 78, 60, 52, + 32, 52, 28, 14, 10, 5, 21, 11, 14, 1, + 36, 52, 0, 2, 16, 24, 12, 22, 46, 14, + 2, 72, 12, 37, 77, 103, 125, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 36 */ + + 66, 10, 33, 66, 10, 33, 1, 22, 48, 24, + 5, 21, 25, 54, 118, 26, 22, 11, 12, 28, + 0, 13, 0, 25, 57, 1, 53, 125, 125, 125, + 76, 7, 7, 12, 28, 0, 37, 6, 22, 4, + 1, 13, 19, 0, 31, 33, 59, 10, 7, 15, + 8, 21, 13, 39, 0, 21, 17, 35, 6, 4, + 44, 0, 0, 0, 2, 63, 67, 14, 13, 9, + 38, 5, 41, 48, 26, 12, 82, 88, 30, 36, + 10, 36, 2, 4, 34, 33, 27, 29, 39, 48, + 3, 26, 30, 9, 27, 13, 12, 4, 15, 13, + 31, 34, 9, 10, 7, 19, 4, 9, 6, 10, + 6, 10, 24, 16, 7, 6, 124, 124, 98, 80, + 12, 5, 2, 18, 0, 22, 18, 4, 15, 42, + 9, 57, 51, 70, 18, 28, 10, 12, 18, 28, + 72, 42, 20, 0, 4, 4, 30, 36, 63, 2, + 10, 25, 4, 3, 22, 18, 7, 38, 24, 26, + 33, 12, 15, 11, 18, 51, 18, 26, 20, 0, + 21, 16, 0, 14, 6, 25, 11, 6, 125, 124, + 95, 13, 23, 29, 43, 29, 39, 67, 47, 61, + 31, 61, 19, 124, 65, 28, 34, 20, 12, 10, + 10, 2, 2, 1, 27, 29, 27, 35, 39, 85, + 23, 25, 63, 5, 15, 19, 27, 39, 47, 43, + 45, 53, 35, 57, 67, 51, 59, 6, 52, 36, + 20, 18, 22, 8, 0, 1, 26, 20, 62, 44, + 36, 26, 40, 22, 26, 18, 8, 24, 84, 68, + 58, 52, 54, 20, 6, 2, 3, 13, 100, 58, + 16, 0, 26, 2, 19, 15, 10, 88, 66, 34, + 16, 40, 10, 4, 7, 11, 124, 29, 17, 3, + 7, 7, 5, 6, 14, 0, 10, 16, 32, 17, + 9, 27, 17, 24, 23, 23, 19, 1, 0, 11, + 23, 11, 25, 11, 15, 41, 20, 20, 44, 8, + 3, 20, 12, 4, 10, 8, 2, 15, 0, 5, + 13, 7, 24, 55, 19, 32, 23, 4, 9, 0, + 22, 9, 13, 8, 14, 37, 17, 53, 54, 58, + 64, 34, 20, 28, 22, 16, 12, 1, 0, 3, + 25, 23, 35, 41, 29, 41, 71, 37, 37, 37, + 35, 51, 35, 51, 49, 37, 51, 26, 24, 20, + 16, 13, 1, 5, 35, 19, 33, 37, 69, 51, + 63, 73, 23, 19, 79, 15, 19, 35, 61, 45, + 55, 61, 37, 63, 53, 67, 75, 77, 87, 49, + 65, 75, 19, 7, 18, 3, 44, 62, 3, 6, + 26, 34, 10, 30, 58, 8, 0, 70, 8, 43, + 83, 111, 125, 125, 125, 125, 8, 78, 60, 52, + 32, 54, 28, 14, 10, 5, 21, 11, 14, 1, + 36, 54, 0, 2, 18, 26, 12, 22, 48, 14, + 2, 70, 8, 43, 83, 111, 125, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 37 */ + + 64, 10, 33, 64, 10, 33, 2, 26, 48, 24, + 7, 23, 29, 52, 118, 26, 26, 11, 12, 30, + 0, 13, 0, 27, 61, 3, 57, 125, 125, 125, + 82, 5, 5, 12, 30, 0, 37, 8, 22, 4, + 1, 13, 17, 0, 31, 33, 59, 10, 7, 15, + 8, 21, 11, 39, 0, 21, 17, 35, 6, 4, + 44, 0, 0, 0, 2, 63, 67, 16, 15, 9, + 36, 7, 41, 54, 28, 14, 86, 92, 34, 40, + 14, 40, 4, 6, 40, 33, 27, 27, 37, 48, + 3, 28, 34, 9, 25, 13, 16, 4, 17, 15, + 31, 34, 9, 12, 7, 17, 6, 9, 8, 12, + 8, 14, 28, 18, 3, 8, 124, 124, 104, 86, + 14, 5, 4, 20, 0, 22, 20, 4, 13, 46, + 9, 61, 55, 76, 18, 28, 10, 12, 18, 30, + 74, 44, 20, 0, 4, 4, 30, 38, 65, 4, + 10, 27, 4, 5, 22, 18, 7, 38, 24, 26, + 35, 14, 17, 11, 18, 51, 18, 24, 18, 3, + 27, 14, 1, 10, 2, 29, 17, 2, 125, 124, + 101, 17, 27, 33, 49, 35, 45, 75, 55, 65, + 35, 69, 23, 124, 69, 24, 30, 16, 8, 6, + 6, 1, 1, 5, 31, 33, 31, 37, 43, 89, + 23, 25, 63, 7, 17, 21, 29, 41, 49, 45, + 49, 55, 37, 59, 69, 53, 57, 10, 54, 38, + 20, 18, 24, 8, 0, 0, 30, 22, 62, 44, + 38, 28, 42, 24, 28, 20, 12, 26, 86, 70, + 60, 52, 56, 22, 8, 2, 3, 13, 100, 58, + 14, 0, 26, 2, 19, 13, 10, 86, 64, 32, + 14, 40, 10, 4, 7, 9, 124, 27, 13, 0, + 3, 5, 3, 10, 18, 2, 14, 20, 36, 15, + 7, 25, 15, 28, 23, 23, 17, 1, 2, 11, + 23, 11, 25, 11, 15, 43, 22, 20, 46, 8, + 3, 20, 12, 4, 10, 8, 2, 15, 0, 5, + 13, 7, 24, 57, 21, 32, 25, 2, 9, 1, + 22, 9, 15, 8, 14, 39, 17, 55, 52, 56, + 62, 30, 16, 24, 18, 12, 6, 7, 5, 9, + 31, 27, 39, 49, 37, 49, 81, 43, 43, 41, + 39, 55, 39, 53, 53, 37, 53, 22, 18, 16, + 12, 19, 7, 9, 41, 23, 37, 41, 75, 55, + 67, 75, 23, 21, 83, 17, 23, 39, 65, 49, + 59, 65, 39, 65, 55, 67, 77, 79, 89, 51, + 67, 77, 19, 7, 20, 1, 46, 64, 1, 8, + 26, 34, 10, 32, 60, 8, 0, 68, 4, 49, + 89, 117, 125, 125, 125, 125, 8, 78, 60, 52, + 32, 54, 28, 16, 12, 3, 21, 11, 16, 1, + 38, 56, 0, 2, 18, 26, 12, 24, 48, 14, + 2, 68, 4, 49, 89, 117, 125, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 38 */ + + 62, 10, 35, 62, 10, 35, 6, 28, 50, 24, + 7, 27, 33, 50, 118, 26, 28, 11, 14, 30, + 0, 15, 2, 27, 63, 7, 63, 125, 125, 125, + 86, 3, 5, 14, 30, 0, 39, 8, 22, 2, + 1, 13, 17, 0, 31, 31, 57, 10, 5, 13, + 8, 19, 11, 39, 0, 21, 17, 35, 6, 4, + 44, 0, 0, 0, 4, 63, 67, 16, 15, 11, + 36, 7, 41, 58, 30, 16, 88, 98, 36, 42, + 18, 42, 6, 8, 44, 33, 27, 27, 37, 48, + 3, 30, 40, 9, 23, 11, 20, 2, 17, 15, + 33, 36, 9, 12, 5, 17, 6, 7, 10, 12, + 10, 16, 30, 18, 1, 10, 124, 124, 110, 92, + 14, 5, 4, 20, 0, 22, 20, 6, 13, 50, + 7, 65, 59, 82, 18, 28, 10, 12, 20, 30, + 78, 46, 20, 0, 4, 4, 32, 40, 67, 4, + 10, 27, 2, 7, 20, 18, 9, 38, 24, 26, + 39, 14, 19, 13, 16, 51, 18, 24, 18, 7, + 33, 12, 5, 4, 3, 33, 21, 1, 125, 124, + 109, 21, 29, 37, 57, 39, 51, 83, 65, 71, + 39, 75, 27, 124, 73, 20, 26, 12, 2, 0, + 2, 7, 5, 9, 35, 39, 35, 41, 45, 91, + 23, 25, 65, 9, 19, 23, 31, 43, 51, 47, + 53, 57, 37, 61, 71, 53, 57, 12, 56, 38, + 20, 18, 26, 10, 2, 0, 34, 24, 64, 44, + 38, 30, 44, 26, 32, 24, 16, 28, 86, 70, + 60, 54, 58, 22, 8, 2, 1, 13, 102, 58, + 12, 0, 26, 2, 19, 13, 8, 86, 64, 30, + 12, 38, 10, 4, 7, 9, 124, 25, 11, 2, + 1, 1, 1, 14, 22, 4, 16, 24, 40, 15, + 5, 25, 13, 32, 21, 23, 17, 0, 2, 11, + 23, 11, 27, 11, 15, 43, 22, 22, 48, 8, + 3, 22, 14, 4, 12, 10, 2, 17, 0, 5, + 13, 7, 26, 59, 23, 32, 27, 0, 11, 1, + 22, 11, 15, 8, 12, 43, 19, 57, 48, 54, + 60, 28, 12, 20, 14, 8, 2, 11, 11, 13, + 37, 33, 43, 57, 45, 57, 89, 51, 49, 47, + 43, 61, 43, 57, 55, 39, 55, 18, 14, 10, + 6, 25, 11, 15, 47, 29, 43, 47, 81, 59, + 69, 79, 25, 23, 85, 21, 25, 43, 69, 51, + 63, 69, 41, 69, 57, 69, 79, 81, 91, 55, + 69, 79, 19, 5, 20, 1, 48, 66, 1, 8, + 28, 36, 10, 32, 62, 8, 0, 66, 0, 53, + 95, 123, 125, 125, 125, 125, 8, 78, 60, 52, + 32, 56, 30, 16, 12, 3, 19, 9, 16, 0, + 40, 58, 2, 4, 18, 26, 12, 24, 50, 14, + 2, 66, 0, 53, 95, 123, 125, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 39 */ + + 60, 10, 35, 60, 10, 35, 10, 32, 50, 24, + 9, 29, 37, 48, 118, 26, 32, 11, 14, 32, + 1, 15, 2, 29, 67, 9, 67, 125, 125, 125, + 92, 1, 5, 14, 32, 1, 39, 10, 22, 2, + 1, 11, 15, 0, 31, 31, 57, 10, 5, 13, + 8, 19, 9, 37, 0, 21, 17, 35, 8, 4, + 44, 0, 0, 0, 4, 63, 67, 18, 17, 11, + 34, 9, 41, 64, 32, 18, 92, 102, 40, 46, + 20, 46, 10, 10, 48, 33, 25, 25, 37, 48, + 3, 32, 44, 9, 21, 11, 24, 2, 19, 17, + 33, 36, 9, 14, 5, 15, 8, 7, 12, 14, + 12, 20, 34, 20, 0, 12, 124, 124, 116, 98, + 14, 3, 4, 22, 0, 22, 22, 6, 11, 54, + 7, 69, 63, 88, 18, 30, 10, 12, 20, 32, + 80, 48, 22, 1, 4, 4, 32, 40, 69, 4, + 10, 29, 2, 9, 20, 16, 9, 38, 24, 26, + 41, 14, 21, 15, 16, 51, 18, 22, 16, 11, + 39, 10, 9, 0, 7, 37, 27, 5, 125, 124, + 115, 25, 33, 43, 63, 45, 57, 93, 73, 77, + 43, 83, 31, 124, 75, 16, 22, 8, 1, 3, + 1, 11, 9, 13, 41, 43, 39, 45, 49, 95, + 23, 25, 65, 11, 21, 25, 35, 45, 53, 49, + 55, 59, 39, 63, 71, 55, 55, 16, 58, 40, + 20, 20, 26, 10, 2, 2, 38, 28, 64, 46, + 40, 30, 46, 28, 34, 26, 20, 28, 88, 72, + 62, 54, 60, 24, 8, 4, 1, 13, 102, 58, + 12, 1, 26, 2, 19, 11, 8, 84, 62, 28, + 10, 38, 10, 6, 7, 9, 124, 21, 7, 4, + 2, 0, 0, 16, 24, 6, 20, 26, 44, 13, + 3, 23, 11, 36, 21, 23, 17, 0, 4, 11, + 25, 11, 27, 11, 15, 45, 24, 22, 50, 8, + 3, 22, 14, 4, 12, 10, 2, 17, 0, 5, + 13, 7, 26, 61, 25, 32, 29, 0, 13, 3, + 22, 13, 17, 8, 12, 45, 19, 59, 46, 52, + 58, 24, 8, 16, 10, 2, 3, 17, 17, 19, + 43, 37, 47, 67, 53, 65, 99, 57, 55, 51, + 47, 65, 47, 61, 59, 41, 55, 12, 8, 6, + 2, 31, 17, 19, 53, 33, 47, 51, 87, 63, + 73, 81, 27, 23, 89, 23, 29, 45, 73, 55, + 67, 71, 43, 71, 59, 71, 83, 83, 91, 57, + 73, 83, 17, 5, 22, 0, 50, 68, 1, 10, + 28, 36, 10, 34, 64, 8, 0, 64, 3, 59, + 101, 125, 125, 125, 125, 125, 10, 78, 60, 52, + 32, 56, 30, 16, 12, 1, 19, 9, 18, 0, + 42, 60, 2, 4, 20, 28, 12, 24, 50, 14, + 2, 64, 3, 59, 101, 125, 125, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 40 */ + + 56, 8, 35, 56, 8, 35, 12, 34, 50, 24, + 9, 33, 43, 46, 118, 24, 36, 13, 14, 34, + 1, 17, 2, 31, 71, 11, 73, 125, 125, 125, + 98, 0, 5, 14, 34, 1, 41, 12, 22, 0, + 1, 11, 15, 1, 33, 31, 57, 10, 5, 13, + 8, 19, 9, 37, 0, 21, 17, 35, 8, 4, + 44, 0, 0, 0, 6, 65, 67, 18, 19, 11, + 32, 9, 43, 68, 34, 20, 94, 106, 42, 48, + 24, 48, 12, 12, 52, 33, 25, 25, 37, 48, + 3, 34, 48, 9, 21, 9, 28, 2, 19, 17, + 35, 36, 9, 14, 3, 13, 10, 5, 12, 14, + 12, 22, 36, 20, 2, 14, 124, 124, 122, 102, + 14, 3, 4, 22, 1, 22, 22, 6, 11, 58, + 7, 73, 67, 92, 18, 30, 10, 12, 20, 32, + 84, 48, 22, 1, 2, 4, 34, 42, 73, 4, + 10, 29, 0, 13, 18, 16, 11, 38, 24, 24, + 43, 14, 23, 17, 14, 51, 16, 20, 14, 15, + 47, 6, 13, 3, 13, 43, 31, 9, 125, 124, + 123, 29, 37, 47, 69, 49, 65, 101, 81, 83, + 47, 89, 35, 124, 79, 12, 16, 2, 7, 9, + 7, 15, 15, 17, 45, 47, 43, 49, 53, 99, + 23, 27, 67, 13, 25, 27, 37, 47, 57, 53, + 59, 61, 39, 65, 73, 57, 55, 18, 60, 40, + 20, 20, 28, 12, 4, 2, 42, 30, 64, 46, + 40, 32, 48, 30, 38, 30, 22, 30, 88, 72, + 62, 56, 62, 24, 8, 4, 0, 15, 104, 58, + 10, 1, 26, 2, 19, 11, 8, 84, 60, 26, + 8, 38, 10, 6, 7, 9, 124, 19, 5, 6, + 4, 4, 2, 20, 28, 8, 22, 30, 48, 13, + 3, 23, 9, 38, 21, 23, 17, 2, 4, 11, + 25, 11, 29, 11, 15, 45, 24, 22, 52, 8, + 5, 22, 14, 2, 12, 10, 2, 19, 0, 5, + 13, 7, 28, 63, 27, 32, 33, 1, 15, 5, + 20, 15, 19, 8, 12, 47, 19, 63, 44, 48, + 56, 20, 4, 12, 6, 1, 7, 23, 23, 25, + 49, 43, 51, 75, 61, 75, 109, 65, 63, 57, + 53, 71, 51, 65, 61, 43, 57, 8, 4, 0, + 3, 37, 23, 25, 59, 39, 53, 57, 93, 67, + 77, 85, 29, 25, 93, 27, 31, 49, 77, 59, + 71, 75, 47, 75, 63, 73, 85, 85, 93, 61, + 75, 85, 17, 5, 22, 0, 52, 70, 1, 10, + 30, 38, 10, 34, 64, 8, 1, 62, 7, 65, + 107, 125, 125, 125, 125, 125, 10, 78, 60, 52, + 32, 58, 30, 16, 12, 1, 19, 9, 18, 0, + 42, 60, 2, 4, 20, 28, 12, 24, 52, 14, + 0, 62, 7, 65, 107, 125, 125, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 41 */ + + 54, 8, 35, 54, 8, 35, 16, 36, 52, 24, + 9, 35, 47, 44, 120, 24, 40, 13, 16, 36, + 1, 19, 2, 31, 73, 13, 77, 125, 125, 125, + 104, 2, 3, 16, 36, 1, 41, 14, 22, 0, + 0, 11, 13, 1, 33, 31, 57, 10, 5, 11, + 8, 19, 9, 37, 0, 21, 17, 33, 8, 4, + 44, 0, 0, 0, 8, 65, 67, 18, 21, 11, + 30, 9, 43, 72, 38, 22, 98, 110, 46, 50, + 28, 52, 14, 16, 58, 33, 25, 25, 35, 48, + 3, 36, 52, 7, 19, 7, 32, 2, 19, 17, + 35, 36, 9, 14, 1, 11, 12, 3, 14, 16, + 14, 24, 40, 22, 6, 16, 124, 124, 124, 108, + 16, 3, 6, 24, 1, 22, 22, 8, 9, 64, + 7, 77, 69, 98, 18, 30, 10, 12, 20, 34, + 88, 50, 22, 1, 2, 4, 36, 44, 75, 6, + 10, 29, 1, 15, 16, 16, 13, 38, 24, 24, + 45, 16, 25, 17, 12, 51, 16, 20, 14, 19, + 53, 4, 15, 7, 17, 47, 35, 13, 125, 124, + 125, 33, 41, 51, 75, 53, 71, 109, 89, 87, + 51, 95, 39, 124, 83, 8, 12, 1, 11, 13, + 11, 19, 19, 21, 49, 51, 45, 51, 55, 103, + 23, 27, 67, 13, 27, 29, 39, 49, 59, 55, + 63, 63, 39, 67, 75, 57, 53, 20, 62, 40, + 20, 20, 30, 14, 6, 4, 48, 32, 64, 46, + 40, 34, 50, 32, 42, 34, 26, 32, 90, 74, + 64, 58, 64, 24, 10, 4, 2, 15, 106, 58, + 8, 1, 28, 4, 19, 9, 8, 84, 60, 24, + 6, 38, 10, 6, 7, 7, 124, 17, 3, 10, + 8, 8, 4, 24, 32, 10, 26, 34, 52, 11, + 1, 21, 7, 42, 19, 21, 15, 4, 6, 11, + 25, 11, 31, 11, 15, 45, 24, 24, 54, 10, + 5, 24, 14, 2, 12, 10, 2, 21, 0, 3, + 11, 7, 30, 63, 29, 32, 35, 3, 15, 5, + 20, 15, 21, 8, 12, 49, 19, 65, 42, 46, + 56, 18, 0, 8, 2, 5, 11, 27, 29, 31, + 55, 49, 55, 83, 69, 83, 119, 71, 69, 63, + 57, 75, 53, 67, 63, 43, 59, 4, 0, 3, + 7, 41, 27, 29, 65, 43, 57, 61, 97, 71, + 81, 89, 29, 27, 95, 29, 33, 53, 81, 61, + 73, 79, 49, 79, 65, 73, 87, 85, 95, 65, + 77, 87, 17, 3, 24, 0, 54, 74, 0, 12, + 32, 40, 10, 36, 66, 8, 1, 60, 11, 69, + 113, 125, 125, 125, 125, 125, 10, 78, 60, 54, + 34, 60, 32, 18, 14, 1, 19, 9, 18, 2, + 44, 62, 2, 6, 20, 28, 12, 26, 54, 14, + 0, 60, 11, 69, 113, 125, 125, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 42 */ + + 52, 8, 35, 52, 8, 35, 20, 40, 52, 24, + 11, 37, 51, 42, 120, 24, 44, 13, 16, 38, + 3, 19, 2, 33, 77, 15, 83, 125, 125, 125, + 110, 4, 3, 16, 38, 3, 41, 16, 22, 1, + 0, 9, 11, 1, 33, 31, 57, 10, 5, 11, + 8, 19, 7, 35, 0, 21, 17, 33, 10, 4, + 44, 0, 0, 0, 8, 65, 67, 20, 23, 11, + 28, 11, 43, 78, 40, 24, 102, 114, 48, 54, + 30, 54, 18, 18, 62, 33, 23, 23, 35, 48, + 3, 38, 56, 7, 17, 7, 36, 2, 21, 19, + 37, 36, 9, 16, 1, 9, 14, 3, 16, 16, + 16, 28, 42, 24, 8, 18, 124, 124, 124, 114, + 16, 1, 6, 24, 1, 22, 24, 8, 9, 68, + 7, 81, 73, 104, 18, 32, 10, 12, 20, 34, + 90, 52, 24, 3, 2, 4, 36, 44, 77, 6, + 10, 31, 1, 17, 16, 14, 13, 38, 24, 24, + 47, 16, 27, 19, 12, 51, 16, 18, 12, 23, + 59, 2, 19, 11, 21, 51, 41, 17, 125, 124, + 125, 37, 45, 57, 81, 59, 77, 119, 97, 93, + 55, 103, 43, 124, 85, 4, 8, 5, 15, 17, + 15, 23, 23, 25, 55, 55, 49, 55, 59, 107, + 23, 27, 69, 15, 29, 31, 43, 51, 61, 57, + 65, 65, 41, 69, 75, 59, 51, 24, 64, 42, + 20, 22, 30, 14, 6, 6, 52, 36, 64, 48, + 42, 34, 52, 34, 44, 36, 30, 32, 92, 76, + 66, 58, 66, 26, 10, 6, 2, 15, 106, 58, + 8, 3, 28, 4, 19, 9, 8, 82, 58, 22, + 4, 38, 10, 8, 7, 7, 124, 13, 0, 12, + 12, 10, 6, 26, 34, 12, 30, 36, 56, 9, + 0, 21, 5, 46, 19, 21, 15, 4, 6, 11, + 27, 11, 31, 11, 15, 47, 26, 24, 56, 10, + 5, 24, 14, 2, 12, 10, 2, 21, 0, 3, + 11, 7, 30, 65, 31, 32, 37, 3, 17, 7, + 20, 17, 23, 8, 12, 51, 19, 67, 40, 44, + 54, 14, 3, 4, 1, 11, 17, 33, 35, 37, + 61, 53, 59, 93, 77, 91, 125, 77, 75, 67, + 61, 81, 57, 71, 67, 45, 59, 1, 5, 9, + 11, 47, 33, 35, 71, 47, 63, 65, 103, 75, + 85, 91, 31, 27, 99, 31, 37, 55, 85, 65, + 77, 81, 51, 81, 67, 75, 91, 87, 95, 67, + 81, 91, 15, 3, 26, 2, 56, 76, 0, 14, + 32, 40, 10, 36, 68, 8, 1, 58, 15, 75, + 119, 125, 125, 125, 125, 125, 12, 78, 60, 54, + 34, 60, 32, 18, 14, 0, 19, 9, 20, 2, + 46, 64, 2, 6, 22, 30, 12, 26, 54, 14, + 0, 58, 15, 75, 119, 125, 125, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 43 */ + + 50, 8, 37, 50, 8, 37, 24, 42, 54, 24, + 11, 41, 55, 40, 120, 24, 46, 13, 18, 38, + 3, 21, 4, 33, 79, 19, 87, 125, 125, 125, + 114, 6, 3, 18, 38, 3, 43, 16, 22, 1, + 0, 9, 11, 1, 33, 29, 55, 10, 3, 9, + 8, 17, 7, 35, 0, 21, 17, 33, 10, 4, + 44, 0, 0, 0, 10, 65, 67, 20, 23, 13, + 28, 11, 43, 82, 42, 26, 104, 120, 52, 56, + 34, 58, 20, 20, 66, 33, 23, 23, 35, 48, + 3, 40, 62, 7, 15, 5, 40, 0, 21, 19, + 37, 38, 9, 16, 0, 9, 14, 1, 18, 18, + 18, 30, 46, 24, 10, 20, 124, 124, 124, 120, + 16, 1, 6, 26, 1, 22, 24, 10, 7, 72, + 5, 85, 77, 110, 18, 32, 10, 12, 22, 36, + 94, 54, 24, 3, 2, 4, 38, 46, 79, 6, + 10, 31, 3, 19, 14, 14, 15, 38, 24, 24, + 51, 16, 29, 21, 10, 51, 16, 18, 12, 27, + 65, 0, 23, 17, 27, 55, 45, 21, 125, 124, + 125, 41, 47, 61, 89, 63, 83, 125, 107, 99, + 59, 109, 47, 124, 89, 0, 4, 9, 21, 23, + 19, 29, 27, 29, 59, 61, 53, 59, 61, 109, + 23, 27, 69, 17, 31, 33, 45, 53, 63, 59, + 69, 67, 41, 71, 77, 59, 51, 26, 66, 42, + 20, 22, 32, 16, 8, 6, 56, 38, 66, 48, + 42, 36, 54, 36, 48, 40, 34, 34, 92, 76, + 66, 60, 68, 26, 10, 6, 4, 15, 108, 58, + 6, 3, 28, 4, 19, 7, 6, 82, 58, 20, + 2, 36, 10, 8, 7, 7, 124, 11, 2, 14, + 14, 14, 8, 30, 38, 14, 32, 40, 60, 9, + 2, 19, 3, 50, 17, 21, 15, 6, 8, 11, + 27, 11, 33, 11, 15, 47, 26, 26, 58, 10, + 5, 26, 16, 2, 14, 12, 2, 23, 0, 3, + 11, 7, 32, 67, 33, 32, 39, 5, 19, 7, + 20, 19, 23, 8, 10, 55, 21, 69, 36, 42, + 52, 12, 7, 0, 5, 15, 21, 37, 41, 41, + 67, 59, 63, 101, 85, 99, 125, 85, 81, 73, + 65, 85, 61, 75, 69, 47, 61, 5, 9, 13, + 17, 53, 37, 39, 77, 53, 67, 71, 109, 79, + 87, 95, 33, 29, 101, 35, 39, 59, 89, 67, + 81, 85, 53, 85, 69, 77, 93, 89, 97, 71, + 83, 93, 15, 1, 26, 2, 58, 78, 0, 14, + 34, 42, 10, 38, 70, 8, 1, 56, 19, 79, + 125, 125, 125, 125, 125, 125, 12, 78, 60, 54, + 34, 62, 34, 18, 14, 0, 17, 7, 20, 4, + 48, 66, 4, 8, 22, 30, 12, 26, 56, 14, + 0, 56, 19, 79, 125, 125, 125, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 44 */ + + 46, 8, 37, 46, 8, 37, 26, 46, 54, 24, + 13, 43, 59, 38, 120, 22, 50, 13, 18, 40, + 3, 21, 4, 35, 83, 21, 93, 125, 125, 125, + 120, 8, 3, 18, 40, 3, 43, 18, 22, 3, + 0, 9, 9, 1, 33, 29, 55, 10, 3, 9, + 8, 17, 5, 35, 0, 21, 17, 33, 10, 4, + 44, 0, 0, 0, 10, 67, 67, 22, 25, 13, + 26, 13, 43, 88, 44, 28, 108, 124, 54, 60, + 38, 60, 22, 22, 70, 33, 23, 21, 35, 48, + 3, 42, 66, 7, 15, 5, 44, 0, 23, 21, + 39, 38, 9, 18, 0, 7, 16, 1, 20, 18, + 18, 34, 48, 26, 12, 22, 124, 124, 124, 124, + 16, 1, 6, 26, 1, 22, 26, 10, 7, 76, + 5, 89, 81, 114, 18, 32, 10, 12, 22, 36, + 96, 56, 24, 3, 2, 4, 38, 48, 81, 6, + 10, 33, 3, 21, 14, 14, 15, 38, 24, 24, + 53, 16, 31, 23, 10, 51, 14, 16, 10, 31, + 73, 1, 27, 21, 31, 59, 51, 25, 125, 124, + 125, 45, 51, 65, 95, 69, 91, 125, 115, 105, + 63, 117, 51, 124, 93, 3, 0, 15, 25, 27, + 23, 33, 31, 33, 63, 65, 57, 63, 65, 113, + 23, 27, 71, 19, 33, 35, 47, 55, 67, 61, + 73, 69, 43, 73, 79, 61, 49, 30, 68, 44, + 20, 22, 34, 16, 8, 8, 60, 40, 66, 48, + 44, 38, 56, 38, 50, 42, 36, 36, 94, 78, + 68, 60, 70, 28, 10, 6, 4, 15, 108, 58, + 4, 3, 28, 4, 19, 7, 6, 80, 56, 18, + 0, 36, 10, 8, 7, 7, 124, 9, 6, 16, + 18, 16, 10, 34, 42, 16, 36, 44, 64, 7, + 4, 19, 1, 52, 17, 21, 15, 6, 8, 11, + 27, 11, 33, 11, 15, 49, 28, 26, 60, 10, + 5, 26, 16, 2, 14, 12, 2, 23, 0, 3, + 11, 7, 32, 69, 35, 32, 41, 7, 21, 9, + 20, 21, 25, 8, 10, 57, 21, 71, 34, 40, + 50, 8, 11, 3, 9, 19, 27, 43, 47, 47, + 73, 63, 67, 109, 93, 107, 125, 91, 89, 77, + 71, 91, 65, 79, 73, 49, 63, 9, 15, 19, + 21, 59, 43, 45, 83, 57, 73, 75, 115, 83, + 91, 97, 35, 31, 105, 37, 43, 63, 93, 71, + 85, 89, 55, 87, 71, 79, 95, 91, 99, 73, + 85, 95, 15, 1, 28, 4, 60, 80, 0, 16, + 34, 42, 10, 38, 72, 8, 1, 54, 23, 85, + 125, 125, 125, 125, 125, 125, 12, 78, 60, 54, + 34, 62, 34, 18, 14, 2, 17, 7, 22, 4, + 48, 68, 4, 8, 22, 30, 12, 26, 56, 14, + 0, 54, 23, 85, 125, 125, 125, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 45 */ + + 44, 8, 37, 44, 8, 37, 30, 48, 54, 24, + 13, 45, 63, 36, 120, 22, 54, 13, 18, 42, + 5, 23, 4, 37, 87, 23, 97, 125, 125, 125, + 124, 10, 1, 18, 42, 5, 43, 20, 22, 3, + 0, 7, 7, 1, 33, 29, 55, 10, 3, 9, + 8, 17, 5, 33, 0, 21, 17, 33, 12, 4, + 44, 0, 0, 0, 12, 67, 67, 22, 27, 13, + 24, 13, 43, 92, 46, 30, 112, 124, 58, 62, + 40, 64, 26, 24, 76, 33, 21, 21, 33, 48, + 3, 44, 70, 7, 13, 3, 48, 0, 23, 21, + 39, 38, 9, 18, 2, 5, 18, 0, 22, 20, + 20, 36, 52, 28, 16, 24, 124, 124, 124, 124, + 18, 0, 8, 28, 1, 22, 26, 10, 5, 80, + 5, 93, 85, 120, 18, 34, 10, 12, 22, 38, + 100, 58, 26, 5, 2, 4, 40, 48, 83, 8, + 10, 33, 5, 23, 12, 12, 17, 38, 24, 24, + 55, 18, 33, 23, 8, 51, 14, 14, 8, 35, + 79, 3, 29, 25, 35, 63, 55, 29, 125, 124, + 125, 49, 55, 71, 101, 73, 97, 125, 123, 109, + 67, 123, 55, 124, 95, 7, 3, 19, 29, 31, + 27, 37, 35, 37, 69, 69, 61, 65, 69, 117, + 23, 27, 71, 21, 35, 37, 51, 57, 69, 63, + 75, 71, 43, 75, 79, 63, 47, 32, 70, 44, + 20, 24, 34, 18, 10, 10, 64, 44, 66, 50, + 44, 38, 58, 40, 54, 46, 40, 36, 96, 80, + 70, 62, 72, 28, 12, 8, 6, 15, 110, 58, + 4, 5, 28, 4, 19, 5, 6, 80, 54, 16, + 1, 36, 10, 10, 7, 5, 124, 5, 8, 20, + 22, 20, 12, 36, 44, 18, 40, 46, 68, 5, + 6, 17, 0, 56, 17, 21, 13, 8, 10, 11, + 29, 11, 35, 11, 15, 49, 28, 26, 62, 10, + 5, 26, 16, 2, 14, 12, 2, 25, 0, 3, + 11, 7, 34, 71, 37, 32, 43, 7, 21, 11, + 20, 21, 27, 8, 10, 59, 21, 73, 32, 38, + 48, 4, 15, 7, 13, 25, 31, 49, 53, 53, + 79, 69, 71, 119, 101, 115, 125, 97, 95, 83, + 75, 95, 69, 81, 75, 49, 63, 15, 19, 23, + 25, 65, 49, 49, 89, 61, 77, 79, 121, 87, + 95, 101, 35, 31, 109, 39, 45, 65, 97, 75, + 89, 91, 57, 91, 73, 79, 99, 93, 99, 77, + 89, 99, 13, 1, 30, 4, 62, 82, 2, 18, + 36, 44, 10, 40, 74, 8, 1, 52, 27, 91, + 125, 125, 125, 125, 125, 125, 14, 78, 60, 54, + 34, 64, 34, 20, 16, 2, 17, 7, 22, 4, + 50, 70, 4, 8, 24, 32, 12, 28, 58, 14, + 0, 52, 27, 91, 125, 125, 125, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 46 */ + + 42, 8, 37, 42, 8, 37, 34, 52, 56, 24, + 15, 49, 67, 34, 120, 22, 58, 13, 20, 44, + 5, 23, 4, 37, 89, 25, 103, 125, 125, 125, + 124, 12, 1, 20, 44, 5, 45, 22, 22, 5, + 0, 7, 7, 1, 33, 29, 55, 10, 3, 7, + 8, 17, 3, 33, 0, 21, 17, 33, 12, 4, + 44, 0, 0, 0, 12, 67, 67, 24, 29, 13, + 22, 15, 43, 98, 48, 32, 114, 124, 60, 66, + 44, 66, 28, 26, 80, 33, 21, 19, 33, 48, + 3, 46, 74, 7, 11, 3, 52, 0, 25, 23, + 41, 38, 9, 20, 2, 3, 20, 0, 24, 20, + 22, 40, 54, 28, 18, 26, 124, 124, 124, 124, + 18, 0, 8, 28, 1, 22, 28, 12, 5, 84, + 5, 97, 89, 124, 18, 34, 10, 12, 22, 38, + 102, 60, 26, 5, 2, 4, 40, 50, 85, 8, + 10, 35, 5, 25, 12, 12, 17, 38, 24, 24, + 57, 18, 35, 25, 8, 51, 14, 14, 8, 39, + 85, 5, 33, 29, 41, 67, 61, 33, 125, 124, + 125, 53, 59, 75, 107, 79, 103, 125, 125, 115, + 71, 125, 59, 124, 99, 11, 7, 23, 35, 37, + 31, 41, 39, 41, 73, 73, 65, 69, 71, 121, + 23, 27, 73, 23, 37, 39, 53, 59, 71, 65, + 79, 73, 45, 77, 81, 63, 47, 36, 72, 46, + 20, 24, 36, 18, 10, 10, 68, 46, 66, 50, + 46, 40, 60, 42, 56, 48, 44, 38, 96, 80, + 70, 62, 74, 30, 12, 8, 6, 15, 110, 58, + 2, 5, 28, 4, 19, 5, 6, 78, 54, 14, + 3, 36, 10, 10, 7, 5, 124, 3, 12, 22, + 24, 22, 14, 40, 48, 20, 42, 50, 72, 5, + 8, 17, 2, 60, 15, 21, 13, 8, 10, 11, + 29, 11, 35, 11, 15, 51, 30, 28, 64, 10, + 5, 28, 16, 2, 14, 12, 2, 25, 0, 3, + 11, 7, 34, 73, 39, 32, 45, 9, 23, 11, + 20, 23, 29, 8, 10, 61, 21, 75, 30, 36, + 46, 2, 19, 11, 17, 29, 37, 53, 59, 59, + 85, 73, 75, 125, 109, 123, 125, 105, 101, 87, + 79, 101, 73, 85, 79, 51, 65, 19, 25, 29, + 31, 71, 53, 55, 95, 67, 83, 85, 125, 91, + 99, 103, 37, 33, 111, 43, 49, 69, 101, 77, + 93, 95, 59, 93, 75, 81, 101, 95, 101, 79, + 91, 101, 13, 0, 30, 6, 64, 84, 2, 18, + 36, 44, 10, 40, 76, 8, 1, 50, 31, 95, + 125, 125, 125, 125, 125, 125, 14, 78, 60, 54, + 34, 64, 36, 20, 16, 4, 17, 7, 24, 6, + 52, 72, 4, 10, 24, 32, 12, 28, 58, 14, + 0, 50, 31, 95, 125, 125, 125, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 47 */ + + 40, 8, 37, 40, 8, 37, 38, 54, 56, 24, + 15, 51, 71, 32, 120, 22, 62, 13, 20, 46, + 5, 25, 4, 39, 93, 27, 107, 125, 125, 125, + 124, 14, 1, 20, 46, 5, 45, 24, 22, 5, + 0, 7, 5, 1, 33, 29, 55, 10, 3, 7, + 8, 17, 3, 33, 0, 21, 17, 33, 12, 4, + 44, 0, 0, 0, 14, 67, 67, 24, 31, 13, + 20, 15, 43, 102, 50, 34, 118, 124, 64, 68, + 48, 70, 30, 28, 84, 33, 21, 19, 33, 48, + 3, 48, 78, 7, 9, 1, 56, 0, 25, 23, + 41, 38, 9, 20, 4, 1, 22, 2, 26, 22, + 24, 42, 58, 30, 20, 28, 124, 124, 124, 124, + 18, 0, 8, 30, 1, 22, 28, 12, 3, 88, + 5, 101, 93, 124, 18, 34, 10, 12, 22, 40, + 106, 62, 26, 5, 2, 4, 42, 52, 87, 8, + 10, 35, 7, 27, 10, 12, 19, 38, 24, 24, + 59, 18, 37, 27, 6, 51, 14, 12, 6, 43, + 91, 7, 37, 33, 45, 71, 65, 37, 125, 124, + 125, 57, 63, 79, 113, 83, 109, 125, 125, 121, + 75, 125, 63, 124, 103, 15, 11, 27, 39, 41, + 35, 45, 43, 45, 77, 77, 69, 73, 75, 125, + 23, 27, 73, 25, 39, 41, 55, 61, 73, 67, + 83, 75, 45, 79, 83, 65, 45, 38, 74, 46, + 20, 24, 38, 20, 12, 12, 72, 48, 66, 50, + 46, 42, 62, 44, 60, 52, 48, 40, 98, 82, + 72, 64, 76, 30, 12, 8, 8, 15, 112, 58, + 0, 5, 28, 4, 19, 3, 6, 78, 52, 12, + 5, 36, 10, 10, 7, 5, 124, 1, 14, 24, + 28, 26, 16, 44, 52, 22, 46, 54, 76, 3, + 10, 15, 4, 64, 15, 21, 13, 10, 12, 11, + 29, 11, 37, 11, 15, 51, 30, 28, 66, 10, + 5, 28, 16, 2, 14, 12, 2, 27, 0, 3, + 11, 7, 36, 75, 41, 32, 47, 11, 25, 13, + 20, 25, 31, 8, 10, 63, 21, 77, 28, 34, + 44, 1, 23, 15, 21, 33, 41, 59, 65, 65, + 91, 79, 79, 125, 117, 125, 125, 111, 107, 93, + 83, 105, 77, 89, 81, 53, 67, 23, 29, 33, + 35, 77, 59, 59, 101, 71, 87, 89, 125, 95, + 103, 107, 39, 35, 115, 45, 51, 73, 105, 81, + 97, 99, 61, 97, 77, 83, 103, 97, 103, 83, + 93, 103, 13, 0, 32, 6, 66, 86, 2, 20, + 38, 46, 10, 42, 78, 8, 1, 48, 35, 101, + 125, 125, 125, 125, 125, 125, 14, 78, 60, 54, + 34, 66, 36, 20, 16, 4, 17, 7, 24, 6, + 54, 74, 4, 10, 24, 32, 12, 28, 60, 14, + 0, 48, 35, 101, 125, 125, 125, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 48 */ + + 36, 6, 39, 36, 6, 39, 40, 56, 56, 24, + 17, 55, 77, 30, 120, 20, 64, 15, 20, 46, + 7, 27, 4, 41, 97, 31, 113, 125, 125, 125, + 124, 14, 1, 20, 46, 7, 47, 24, 22, 7, + 0, 7, 5, 3, 35, 29, 55, 8, 3, 7, + 8, 17, 3, 33, 0, 21, 19, 33, 12, 4, + 44, 0, 0, 0, 14, 69, 67, 24, 33, 15, + 18, 17, 45, 106, 52, 36, 120, 124, 66, 70, + 50, 72, 32, 30, 88, 33, 21, 19, 33, 48, + 3, 48, 82, 7, 9, 1, 58, 1, 27, 25, + 43, 38, 9, 20, 4, 1, 22, 2, 26, 22, + 24, 44, 60, 30, 22, 30, 124, 124, 124, 124, + 18, 0, 8, 30, 3, 22, 28, 12, 3, 92, + 5, 105, 97, 124, 18, 34, 10, 12, 22, 40, + 108, 62, 26, 7, 0, 4, 42, 52, 91, 8, + 10, 37, 9, 31, 8, 10, 21, 38, 24, 22, + 63, 18, 39, 29, 4, 51, 12, 10, 4, 49, + 99, 11, 41, 39, 51, 77, 71, 41, 125, 124, + 125, 63, 67, 85, 121, 89, 117, 125, 125, 125, + 79, 125, 67, 124, 107, 21, 17, 33, 45, 47, + 41, 51, 49, 49, 83, 83, 73, 77, 79, 125, + 23, 29, 75, 27, 43, 45, 59, 65, 77, 71, + 87, 77, 47, 81, 85, 67, 45, 40, 74, 46, + 20, 24, 38, 20, 12, 12, 76, 50, 66, 50, + 46, 42, 62, 46, 62, 54, 50, 40, 98, 82, + 72, 64, 78, 30, 12, 8, 8, 17, 112, 56, + 1, 7, 28, 4, 19, 3, 4, 76, 50, 8, + 7, 34, 10, 10, 7, 5, 124, 0, 16, 26, + 30, 28, 18, 46, 54, 24, 48, 56, 80, 3, + 10, 15, 6, 66, 15, 21, 13, 10, 12, 11, + 31, 13, 39, 11, 17, 53, 30, 28, 68, 10, + 7, 28, 16, 0, 14, 12, 2, 29, 0, 3, + 11, 7, 36, 77, 43, 32, 51, 13, 27, 15, + 18, 27, 33, 8, 8, 67, 23, 81, 24, 30, + 42, 5, 27, 21, 27, 39, 47, 65, 71, 71, + 99, 85, 83, 125, 125, 125, 125, 119, 115, 99, + 89, 111, 81, 93, 85, 55, 69, 29, 35, 39, + 41, 83, 65, 65, 109, 77, 93, 95, 125, 99, + 107, 111, 41, 37, 119, 49, 55, 77, 111, 85, + 101, 103, 65, 101, 81, 85, 107, 99, 105, 87, + 97, 107, 13, 0, 32, 6, 68, 88, 2, 20, + 38, 46, 10, 42, 78, 8, 3, 44, 39, 107, + 125, 125, 125, 125, 125, 125, 14, 78, 60, 54, + 34, 66, 36, 20, 16, 4, 17, 7, 24, 6, + 54, 74, 4, 10, 24, 32, 12, 28, 60, 12, + 1, 44, 39, 107, 125, 125, 125, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 49 */ + + 34, 6, 39, 34, 6, 39, 44, 60, 58, 26, + 17, 57, 81, 28, 122, 20, 68, 15, 22, 48, + 7, 27, 6, 41, 99, 33, 117, 125, 125, 125, + 124, 16, 0, 22, 48, 7, 47, 26, 22, 7, + 2, 5, 3, 3, 35, 27, 53, 8, 1, 5, + 8, 15, 1, 31, 2, 19, 19, 31, 14, 4, + 44, 0, 0, 0, 16, 69, 67, 26, 33, 15, + 18, 17, 45, 112, 56, 40, 124, 124, 70, 74, + 54, 76, 36, 34, 94, 33, 19, 17, 31, 48, + 3, 50, 88, 5, 7, 0, 62, 1, 27, 25, + 43, 40, 9, 22, 6, 0, 24, 4, 28, 24, + 26, 48, 64, 32, 26, 34, 124, 124, 124, 124, + 20, 2, 10, 32, 3, 24, 30, 14, 1, 98, + 3, 109, 99, 124, 18, 36, 10, 14, 24, 42, + 112, 64, 28, 7, 0, 4, 44, 54, 93, 10, + 10, 37, 9, 33, 8, 10, 21, 38, 24, 22, + 65, 20, 39, 29, 4, 51, 12, 10, 4, 53, + 105, 13, 43, 43, 55, 81, 75, 45, 125, 124, + 125, 67, 69, 89, 125, 93, 123, 125, 125, 125, + 83, 125, 71, 124, 109, 25, 21, 37, 49, 51, + 45, 55, 53, 53, 87, 87, 75, 79, 81, 125, + 23, 29, 75, 27, 45, 47, 61, 67, 79, 73, + 89, 79, 47, 83, 85, 67, 43, 44, 76, 48, + 20, 26, 40, 22, 14, 14, 82, 54, 68, 52, + 48, 44, 64, 50, 66, 58, 54, 42, 100, 84, + 74, 66, 80, 32, 14, 10, 10, 17, 114, 56, + 1, 7, 30, 6, 19, 1, 4, 76, 50, 6, + 9, 34, 12, 12, 5, 3, 124, 4, 20, 30, + 34, 32, 22, 50, 58, 28, 52, 60, 86, 1, + 12, 13, 10, 70, 13, 19, 11, 12, 14, 9, + 31, 13, 39, 11, 17, 53, 32, 30, 70, 12, + 7, 30, 18, 0, 16, 14, 2, 29, 2, 1, + 9, 5, 38, 77, 45, 32, 53, 13, 27, 15, + 18, 27, 33, 8, 8, 69, 23, 83, 22, 28, + 42, 7, 29, 25, 31, 43, 51, 69, 75, 75, + 105, 89, 87, 125, 125, 125, 125, 125, 121, 103, + 93, 115, 83, 95, 87, 55, 69, 33, 39, 43, + 45, 87, 69, 69, 115, 81, 97, 99, 125, 101, + 109, 113, 41, 37, 121, 51, 57, 79, 115, 87, + 103, 105, 67, 103, 83, 85, 109, 99, 105, 89, + 99, 109, 11, 2, 34, 8, 72, 92, 4, 22, + 40, 48, 12, 44, 80, 8, 3, 42, 43, 111, + 125, 125, 125, 125, 125, 125, 16, 78, 62, 56, + 36, 68, 38, 22, 18, 6, 15, 5, 26, 8, + 56, 76, 6, 12, 26, 34, 12, 30, 62, 12, + 1, 42, 43, 111, 125, 125, 125, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 50 */ + + 32, 6, 39, 32, 6, 39, 48, 62, 58, 26, + 17, 59, 85, 26, 122, 20, 72, 15, 22, 50, + 7, 29, 6, 43, 103, 35, 123, 125, 125, 125, + 124, 18, 0, 22, 50, 7, 47, 28, 22, 9, + 2, 5, 1, 3, 35, 27, 53, 8, 1, 5, + 8, 15, 1, 31, 2, 19, 19, 31, 14, 4, + 44, 0, 0, 0, 18, 69, 67, 26, 35, 15, + 16, 17, 45, 116, 58, 42, 124, 124, 72, 76, + 58, 78, 38, 36, 98, 33, 19, 17, 31, 48, + 3, 52, 92, 5, 5, 2, 66, 1, 27, 25, + 45, 40, 9, 22, 8, 2, 26, 6, 30, 24, + 28, 50, 66, 34, 28, 36, 124, 124, 124, 124, + 20, 2, 10, 32, 3, 24, 30, 14, 1, 102, + 3, 113, 103, 124, 18, 36, 10, 14, 24, 42, + 116, 66, 28, 7, 0, 4, 46, 56, 95, 10, + 10, 37, 11, 35, 6, 10, 23, 38, 24, 22, + 67, 20, 41, 31, 2, 51, 12, 8, 2, 57, + 111, 15, 47, 47, 59, 85, 79, 49, 125, 124, + 125, 71, 73, 93, 125, 97, 125, 125, 125, 125, + 87, 125, 75, 124, 113, 29, 25, 41, 53, 55, + 49, 59, 57, 57, 91, 91, 79, 83, 85, 125, + 23, 29, 77, 29, 47, 49, 63, 69, 81, 75, + 93, 81, 47, 85, 87, 69, 41, 46, 78, 48, + 20, 26, 42, 24, 16, 16, 86, 56, 68, 52, + 48, 46, 66, 52, 70, 62, 58, 44, 102, 86, + 76, 68, 82, 32, 14, 10, 12, 17, 116, 56, + 3, 7, 30, 6, 19, 1, 4, 76, 48, 4, + 11, 34, 12, 12, 5, 3, 124, 6, 22, 32, + 38, 36, 24, 54, 62, 30, 56, 64, 90, 0, + 14, 13, 12, 74, 13, 19, 11, 14, 14, 9, + 31, 13, 41, 11, 17, 53, 32, 30, 72, 12, + 7, 30, 18, 0, 16, 14, 2, 31, 2, 1, + 9, 5, 40, 79, 47, 32, 55, 15, 29, 17, + 18, 29, 35, 8, 8, 71, 23, 85, 20, 26, + 40, 11, 33, 29, 35, 47, 55, 75, 81, 81, + 111, 95, 91, 125, 125, 125, 125, 125, 125, 109, + 97, 121, 87, 99, 89, 57, 71, 37, 43, 49, + 49, 93, 75, 75, 121, 85, 103, 103, 125, 105, + 113, 117, 43, 39, 125, 53, 59, 83, 119, 91, + 107, 109, 69, 107, 85, 87, 111, 101, 107, 93, + 101, 111, 11, 2, 36, 8, 74, 94, 4, 24, + 42, 50, 12, 44, 82, 8, 3, 40, 47, 117, + 125, 125, 125, 125, 125, 125, 16, 78, 62, 56, + 36, 70, 38, 22, 18, 6, 15, 5, 26, 8, + 58, 78, 6, 12, 26, 34, 12, 30, 64, 12, + 1, 40, 47, 117, 125, 125, 125, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 51 */ + + 30, 6, 39, 30, 6, 39, 52, 66, 60, 26, + 19, 63, 89, 24, 122, 20, 76, 15, 24, 52, + 7, 29, 6, 43, 105, 37, 125, 125, 125, 125, + 124, 20, 0, 24, 52, 7, 49, 30, 22, 9, + 2, 5, 1, 3, 35, 27, 53, 8, 1, 3, + 8, 15, 0, 31, 2, 19, 19, 31, 14, 4, + 44, 0, 0, 0, 18, 69, 67, 28, 37, 15, + 14, 19, 45, 122, 60, 44, 124, 124, 76, 80, + 62, 82, 40, 38, 102, 33, 19, 15, 31, 48, + 3, 54, 96, 5, 3, 2, 70, 1, 29, 27, + 45, 40, 9, 24, 8, 4, 28, 6, 32, 26, + 30, 54, 70, 34, 30, 38, 124, 124, 124, 124, + 20, 2, 10, 34, 3, 24, 32, 16, 0, 106, + 3, 117, 107, 124, 18, 36, 10, 14, 24, 44, + 118, 68, 28, 7, 0, 4, 46, 58, 97, 10, + 10, 39, 11, 37, 6, 10, 23, 38, 24, 22, + 69, 20, 43, 33, 2, 51, 12, 8, 2, 61, + 117, 17, 51, 51, 65, 89, 85, 53, 125, 124, + 125, 75, 77, 97, 125, 103, 125, 125, 125, 125, + 91, 125, 79, 124, 117, 33, 29, 45, 59, 61, + 53, 63, 61, 61, 95, 95, 83, 87, 87, 125, + 23, 29, 77, 31, 49, 51, 65, 71, 83, 77, + 97, 83, 49, 87, 89, 69, 41, 50, 80, 50, + 20, 26, 44, 24, 16, 16, 90, 58, 68, 52, + 50, 48, 68, 54, 72, 64, 62, 46, 102, 86, + 76, 68, 84, 34, 14, 10, 12, 17, 116, 56, + 5, 7, 30, 6, 19, 0, 4, 74, 48, 2, + 13, 34, 12, 12, 5, 3, 124, 8, 26, 34, + 40, 38, 26, 58, 66, 32, 58, 68, 94, 0, + 16, 11, 14, 78, 11, 19, 11, 14, 16, 9, + 31, 13, 41, 11, 17, 55, 34, 32, 74, 12, + 7, 32, 18, 0, 16, 14, 2, 31, 2, 1, + 9, 5, 40, 81, 49, 32, 57, 17, 31, 17, + 18, 31, 37, 8, 8, 73, 23, 87, 18, 24, + 38, 13, 37, 33, 39, 51, 61, 79, 87, 87, + 117, 99, 95, 125, 125, 125, 125, 125, 125, 113, + 101, 125, 91, 103, 93, 59, 73, 41, 49, 53, + 55, 99, 79, 79, 125, 91, 107, 109, 125, 109, + 117, 119, 45, 41, 125, 57, 63, 87, 123, 93, + 111, 113, 71, 109, 87, 89, 113, 103, 109, 95, + 103, 113, 11, 4, 36, 10, 76, 96, 4, 24, + 42, 50, 12, 46, 84, 8, 3, 38, 51, 121, + 125, 125, 125, 125, 125, 125, 16, 78, 62, 56, + 36, 70, 40, 22, 18, 8, 15, 5, 28, 10, + 60, 80, 6, 14, 26, 34, 12, 30, 64, 12, + 1, 38, 51, 121, 125, 125, 125, 125, 125, 125, + }, + + }, + + { + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 0 */ + + 124, 18, 21, 124, 18, 21, 125, 81, 20, 18, + 24, 94, 124, 124, 24, 2, 71, 94, 43, 77, + 12, 12, 19, 12, 46, 106, 124, 124, 42, 67, + 125, 107, 21, 43, 77, 12, 59, 49, 38, 16, + 51, 79, 105, 12, 10, 41, 65, 0, 43, 85, + 0, 23, 53, 75, 16, 31, 23, 67, 26, 6, + 44, 0, 0, 0, 39, 45, 67, 17, 44, 2, + 58, 49, 125, 125, 55, 63, 41, 45, 51, 55, + 125, 25, 79, 53, 125, 33, 25, 41, 29, 16, + 4, 39, 125, 31, 81, 55, 125, 3, 31, 17, + 57, 14, 9, 15, 69, 45, 49, 37, 17, 7, + 17, 51, 11, 8, 5, 12, 15, 15, 10, 21, + 38, 11, 2, 24, 32, 42, 44, 20, 25, 29, + 39, 22, 7, 53, 7, 17, 23, 33, 39, 1, + 64, 1, 61, 23, 0, 21, 56, 72, 55, 3, + 11, 27, 5, 2, 9, 35, 66, 112, 80, 21, + 5, 121, 52, 124, 124, 125, 48, 42, 58, 68, + 64, 52, 42, 46, 60, 40, 54, 32, 16, 10, + 6, 38, 38, 42, 30, 14, 22, 52, 28, 10, + 30, 36, 11, 60, 0, 124, 124, 124, 106, 124, + 124, 124, 124, 92, 76, 68, 60, 96, 86, 19, + 58, 64, 38, 94, 54, 54, 70, 84, 86, 102, + 94, 42, 59, 14, 12, 50, 125, 103, 37, 2, + 20, 8, 43, 51, 61, 57, 125, 73, 12, 7, + 15, 27, 43, 49, 81, 69, 125, 37, 30, 4, + 5, 13, 23, 31, 39, 57, 89, 31, 11, 23, + 10, 10, 29, 39, 35, 71, 35, 50, 2, 10, + 8, 19, 25, 45, 39, 47, 124, 125, 125, 113, + 125, 101, 107, 109, 107, 99, 109, 113, 121, 61, + 77, 71, 85, 125, 57, 12, 45, 61, 55, 27, + 15, 19, 1, 35, 1, 12, 7, 9, 7, 9, + 27, 1, 9, 29, 16, 8, 3, 18, 38, 6, + 13, 25, 45, 13, 1, 13, 16, 14, 11, 3, + 21, 18, 18, 25, 37, 27, 27, 42, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 104, 124, 124, 124, 124, 124, 124, 96, + 124, 124, 92, 50, 36, 18, 31, 124, 124, 124, + 124, 96, 96, 76, 82, 94, 90, 70, 44, 70, + 32, 2, 64, 74, 78, 80, 94, 66, 68, 44, + 42, 6, 22, 6, 29, 119, 20, 14, 4, 60, + 26, 4, 29, 21, 17, 17, 23, 15, 0, 13, + 23, 17, 7, 20, 8, 22, 9, 124, 124, 124, + 124, 112, 102, 80, 50, 1, 15, 52, 38, 28, + 14, 8, 0, 7, 9, 31, 29, 21, 17, 17, + 23, 15, 0, 13, 23, 17, 7, 20, 8, 22, + 9, 124, 124, 124, 124, 112, 102, 80, 50, 1, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 1 */ + + 124, 18, 21, 124, 18, 21, 123, 77, 22, 20, + 24, 92, 124, 124, 26, 4, 67, 92, 41, 73, + 12, 12, 15, 12, 44, 104, 124, 120, 38, 67, + 123, 103, 19, 41, 73, 12, 57, 47, 40, 16, + 49, 77, 101, 10, 8, 41, 65, 0, 41, 83, + 0, 23, 51, 73, 16, 29, 21, 65, 28, 6, + 44, 0, 0, 0, 37, 45, 67, 15, 44, 2, + 58, 47, 123, 121, 51, 61, 37, 41, 49, 51, + 123, 23, 75, 51, 121, 33, 25, 41, 29, 18, + 4, 37, 121, 29, 79, 53, 123, 3, 29, 17, + 55, 16, 9, 13, 67, 43, 47, 35, 15, 5, + 15, 49, 9, 10, 5, 12, 13, 13, 10, 19, + 40, 9, 2, 26, 34, 44, 46, 22, 25, 27, + 37, 22, 7, 51, 7, 15, 21, 31, 35, 2, + 66, 2, 57, 23, 1, 19, 58, 74, 55, 3, + 9, 27, 3, 2, 7, 31, 66, 112, 82, 17, + 7, 117, 50, 124, 124, 123, 48, 42, 58, 68, + 64, 52, 42, 46, 60, 40, 54, 32, 16, 10, + 6, 38, 38, 42, 30, 14, 22, 52, 28, 8, + 30, 36, 11, 58, 0, 124, 124, 124, 104, 124, + 124, 124, 124, 90, 74, 64, 58, 92, 84, 21, + 56, 62, 36, 92, 54, 54, 68, 82, 84, 100, + 92, 40, 59, 14, 12, 48, 123, 99, 33, 4, + 20, 8, 41, 49, 59, 55, 123, 69, 14, 5, + 13, 25, 39, 47, 77, 67, 121, 35, 32, 6, + 3, 11, 21, 29, 37, 55, 85, 29, 7, 21, + 12, 10, 27, 37, 33, 69, 33, 52, 4, 12, + 10, 17, 23, 43, 37, 45, 124, 123, 123, 109, + 123, 97, 103, 105, 103, 95, 105, 109, 115, 59, + 75, 69, 83, 119, 55, 10, 43, 59, 53, 25, + 15, 17, 1, 33, 1, 12, 7, 9, 5, 9, + 27, 1, 9, 27, 16, 8, 3, 18, 38, 6, + 13, 23, 41, 13, 1, 11, 16, 14, 11, 3, + 19, 18, 18, 23, 35, 25, 25, 40, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 100, 124, 124, 124, 124, 124, 124, 94, + 120, 120, 90, 48, 34, 18, 31, 124, 124, 124, + 120, 92, 94, 74, 78, 92, 86, 68, 40, 66, + 30, 0, 62, 72, 74, 78, 92, 64, 66, 42, + 40, 4, 22, 6, 29, 117, 18, 12, 2, 58, + 24, 2, 27, 19, 15, 15, 19, 13, 2, 11, + 19, 15, 5, 22, 10, 24, 7, 124, 124, 124, + 124, 108, 100, 76, 48, 3, 13, 54, 40, 30, + 16, 10, 2, 5, 7, 29, 27, 19, 15, 15, + 19, 13, 2, 11, 19, 15, 5, 22, 10, 24, + 7, 124, 124, 124, 124, 108, 100, 76, 48, 3, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 2 */ + + 124, 18, 21, 124, 18, 21, 119, 75, 22, 20, + 24, 88, 120, 124, 28, 4, 63, 88, 41, 71, + 12, 12, 13, 10, 42, 102, 120, 114, 34, 69, + 119, 101, 19, 41, 71, 12, 57, 45, 40, 16, + 47, 75, 99, 8, 6, 41, 65, 0, 41, 81, + 0, 23, 51, 73, 16, 29, 21, 63, 28, 6, + 44, 0, 0, 0, 35, 45, 67, 15, 42, 2, + 58, 45, 121, 117, 49, 59, 33, 37, 47, 49, + 119, 21, 73, 49, 117, 35, 25, 41, 29, 18, + 4, 35, 117, 29, 77, 51, 119, 3, 29, 17, + 55, 16, 9, 13, 65, 43, 45, 35, 15, 5, + 15, 47, 7, 10, 5, 12, 13, 13, 10, 19, + 40, 9, 2, 26, 34, 44, 46, 22, 27, 25, + 35, 20, 7, 51, 7, 13, 21, 31, 33, 4, + 68, 6, 53, 25, 3, 19, 58, 74, 57, 3, + 9, 29, 1, 2, 7, 29, 66, 112, 82, 15, + 9, 115, 48, 124, 124, 121, 48, 42, 58, 66, + 62, 52, 42, 46, 58, 38, 52, 32, 16, 10, + 6, 36, 36, 40, 30, 14, 22, 50, 26, 6, + 28, 34, 11, 56, 1, 124, 124, 124, 100, 120, + 124, 124, 124, 88, 70, 60, 54, 88, 80, 23, + 54, 60, 32, 90, 52, 52, 66, 78, 80, 96, + 88, 36, 59, 12, 10, 44, 121, 97, 31, 6, + 20, 8, 39, 47, 57, 53, 119, 67, 16, 3, + 11, 23, 37, 45, 75, 65, 117, 33, 32, 6, + 3, 11, 19, 27, 35, 53, 83, 29, 5, 19, + 12, 10, 25, 35, 33, 67, 31, 52, 6, 12, + 10, 15, 21, 41, 35, 43, 124, 121, 119, 105, + 119, 95, 101, 101, 99, 93, 101, 105, 111, 57, + 73, 67, 81, 113, 55, 8, 43, 57, 51, 25, + 15, 17, 1, 33, 1, 10, 7, 9, 3, 9, + 27, 1, 9, 27, 16, 8, 3, 16, 36, 6, + 13, 23, 39, 15, 1, 9, 14, 14, 11, 3, + 19, 18, 18, 23, 33, 25, 25, 36, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 96, 124, 124, 124, 124, 124, 122, 90, + 116, 116, 86, 46, 32, 16, 31, 124, 124, 124, + 116, 88, 90, 70, 74, 88, 82, 64, 36, 62, + 26, 1, 60, 70, 70, 74, 88, 60, 62, 40, + 38, 2, 20, 4, 29, 115, 16, 10, 1, 56, + 22, 0, 27, 19, 13, 13, 17, 11, 4, 11, + 17, 13, 3, 22, 12, 26, 5, 124, 124, 124, + 120, 104, 96, 72, 44, 5, 11, 54, 40, 32, + 18, 12, 2, 3, 7, 27, 27, 19, 13, 13, + 17, 11, 4, 11, 17, 13, 3, 22, 12, 26, + 5, 124, 124, 124, 120, 104, 96, 72, 44, 5, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 3 */ + + 124, 18, 21, 124, 18, 21, 115, 71, 24, 20, + 22, 84, 118, 122, 28, 4, 59, 86, 41, 67, + 12, 10, 11, 8, 40, 100, 116, 106, 30, 71, + 115, 97, 19, 41, 67, 12, 55, 43, 42, 16, + 45, 73, 97, 6, 4, 41, 67, 0, 41, 79, + 0, 25, 51, 73, 16, 29, 21, 61, 30, 6, + 44, 0, 0, 0, 35, 45, 67, 13, 40, 2, + 56, 45, 119, 113, 47, 57, 31, 35, 45, 47, + 115, 19, 71, 47, 113, 37, 25, 41, 29, 20, + 4, 33, 113, 29, 75, 49, 115, 3, 29, 17, + 55, 18, 9, 11, 63, 43, 43, 35, 15, 5, + 13, 45, 7, 10, 5, 12, 13, 13, 10, 19, + 40, 9, 2, 28, 34, 46, 46, 24, 27, 25, + 33, 20, 7, 51, 7, 11, 21, 29, 31, 6, + 70, 8, 49, 25, 5, 17, 58, 74, 59, 3, + 7, 29, 1, 2, 7, 27, 66, 112, 82, 13, + 11, 111, 46, 124, 124, 117, 48, 42, 56, 64, + 62, 50, 40, 46, 58, 36, 50, 32, 16, 10, + 4, 36, 34, 38, 28, 14, 22, 48, 26, 4, + 28, 32, 11, 54, 1, 124, 124, 122, 98, 116, + 124, 124, 124, 86, 66, 56, 52, 84, 76, 27, + 52, 58, 28, 88, 50, 50, 64, 76, 76, 92, + 84, 34, 59, 10, 8, 42, 117, 93, 27, 6, + 20, 8, 37, 45, 55, 51, 115, 65, 18, 1, + 9, 23, 35, 43, 71, 63, 113, 33, 34, 8, + 1, 9, 17, 27, 35, 51, 81, 29, 1, 17, + 12, 10, 23, 35, 33, 65, 29, 54, 8, 14, + 10, 13, 21, 39, 35, 43, 124, 117, 117, 103, + 115, 93, 97, 99, 97, 89, 97, 101, 107, 57, + 71, 67, 79, 107, 55, 6, 43, 55, 49, 25, + 15, 17, 1, 31, 1, 8, 7, 9, 3, 9, + 27, 1, 9, 27, 14, 8, 3, 14, 34, 6, + 13, 23, 37, 17, 1, 7, 12, 14, 11, 3, + 17, 18, 16, 21, 31, 25, 25, 34, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 92, 124, 124, 124, 124, 124, 118, 86, + 112, 110, 82, 44, 30, 14, 31, 124, 124, 124, + 112, 84, 86, 68, 70, 84, 78, 60, 32, 58, + 22, 3, 58, 68, 66, 72, 84, 58, 58, 36, + 34, 0, 18, 2, 29, 113, 14, 6, 3, 54, + 20, 1, 27, 17, 13, 13, 15, 9, 6, 11, + 15, 11, 1, 24, 14, 26, 3, 124, 124, 124, + 116, 100, 92, 68, 40, 7, 11, 56, 42, 34, + 18, 14, 4, 3, 5, 27, 27, 17, 13, 13, + 15, 9, 6, 11, 15, 11, 1, 24, 14, 26, + 3, 124, 124, 124, 116, 100, 92, 68, 40, 7, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 4 */ + + 124, 18, 21, 124, 18, 21, 113, 69, 24, 20, + 22, 80, 114, 120, 30, 4, 57, 82, 41, 65, + 10, 10, 9, 6, 36, 96, 112, 100, 24, 73, + 111, 95, 19, 41, 65, 10, 55, 41, 42, 14, + 45, 71, 93, 4, 0, 43, 67, 0, 39, 77, + 1, 25, 51, 73, 16, 29, 21, 61, 30, 6, + 44, 0, 0, 0, 33, 47, 67, 13, 38, 2, + 56, 43, 117, 109, 45, 55, 27, 31, 45, 45, + 111, 17, 69, 45, 107, 37, 27, 41, 31, 20, + 2, 31, 107, 27, 75, 49, 111, 3, 29, 17, + 55, 18, 9, 11, 61, 43, 43, 33, 15, 5, + 13, 43, 5, 10, 7, 10, 13, 13, 10, 19, + 40, 9, 2, 28, 34, 46, 46, 24, 29, 23, + 33, 18, 7, 49, 7, 9, 19, 29, 27, 10, + 72, 12, 45, 27, 7, 17, 60, 74, 61, 3, + 7, 31, 0, 2, 7, 25, 66, 112, 82, 9, + 13, 109, 44, 124, 124, 115, 46, 42, 56, 64, + 60, 50, 40, 46, 56, 34, 48, 30, 16, 10, + 4, 34, 34, 36, 28, 12, 20, 46, 24, 2, + 26, 30, 11, 50, 3, 124, 124, 118, 94, 114, + 124, 124, 124, 84, 62, 50, 48, 80, 72, 29, + 48, 56, 26, 86, 48, 48, 60, 72, 72, 88, + 82, 30, 59, 8, 6, 38, 115, 91, 25, 8, + 20, 8, 35, 43, 53, 51, 111, 61, 20, 1, + 9, 21, 31, 41, 69, 61, 107, 31, 34, 8, + 1, 9, 15, 25, 33, 51, 79, 29, 0, 15, + 12, 10, 21, 33, 33, 63, 27, 54, 10, 14, + 10, 11, 19, 37, 33, 41, 124, 115, 113, 99, + 113, 91, 95, 95, 93, 87, 95, 97, 101, 55, + 69, 65, 77, 101, 53, 4, 41, 53, 49, 25, + 15, 17, 3, 31, 3, 6, 7, 9, 1, 9, + 27, 1, 9, 25, 14, 6, 3, 12, 32, 4, + 13, 23, 35, 19, 3, 7, 12, 12, 11, 3, + 17, 16, 16, 21, 31, 25, 25, 30, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 88, 124, 124, 124, 124, 124, 114, 82, + 108, 106, 78, 40, 28, 12, 31, 124, 124, 124, + 108, 80, 82, 64, 66, 80, 74, 56, 28, 52, + 20, 7, 56, 66, 60, 68, 82, 54, 54, 34, + 32, 1, 16, 0, 29, 111, 10, 4, 7, 50, + 18, 3, 27, 17, 11, 11, 13, 9, 6, 9, + 13, 9, 0, 24, 16, 28, 3, 124, 124, 120, + 112, 96, 88, 62, 36, 11, 9, 56, 42, 34, + 20, 14, 4, 1, 5, 25, 27, 17, 11, 11, + 13, 9, 6, 9, 13, 9, 0, 24, 16, 28, + 3, 124, 124, 120, 112, 96, 88, 62, 36, 11, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 5 */ + + 124, 18, 21, 124, 18, 21, 109, 65, 24, 20, + 20, 76, 112, 118, 32, 4, 53, 78, 39, 61, + 10, 10, 7, 4, 34, 94, 108, 94, 20, 73, + 107, 93, 19, 39, 61, 10, 55, 39, 42, 14, + 43, 69, 91, 2, 1, 43, 67, 0, 39, 75, + 1, 25, 51, 73, 16, 27, 21, 59, 32, 6, + 44, 0, 0, 0, 33, 47, 67, 11, 36, 2, + 54, 43, 113, 103, 43, 53, 25, 29, 43, 43, + 107, 15, 67, 43, 103, 39, 27, 41, 31, 20, + 2, 29, 103, 27, 73, 47, 107, 3, 29, 17, + 53, 18, 9, 9, 59, 41, 41, 33, 15, 3, + 11, 41, 5, 10, 7, 10, 11, 13, 10, 19, + 42, 9, 2, 30, 36, 46, 46, 24, 29, 23, + 31, 18, 7, 49, 7, 7, 19, 27, 25, 12, + 74, 14, 41, 27, 9, 15, 60, 74, 63, 3, + 5, 31, 2, 2, 7, 21, 66, 112, 82, 7, + 15, 105, 42, 124, 124, 113, 46, 42, 54, 62, + 60, 50, 38, 46, 56, 32, 46, 30, 16, 10, + 4, 34, 32, 34, 26, 12, 20, 44, 24, 0, + 24, 30, 11, 48, 3, 124, 124, 116, 92, 110, + 124, 124, 124, 82, 58, 46, 46, 76, 68, 31, + 46, 54, 22, 84, 46, 46, 58, 70, 68, 84, + 78, 28, 59, 6, 4, 34, 111, 87, 23, 8, + 20, 8, 33, 41, 51, 49, 107, 59, 22, 0, + 7, 19, 29, 39, 65, 59, 103, 29, 36, 10, + 0, 7, 13, 23, 33, 49, 77, 27, 2, 13, + 12, 10, 19, 33, 31, 61, 25, 54, 12, 14, + 10, 9, 17, 35, 33, 39, 124, 113, 111, 97, + 109, 89, 91, 93, 89, 83, 91, 93, 97, 53, + 67, 63, 75, 95, 53, 2, 41, 51, 47, 25, + 15, 17, 3, 29, 3, 4, 7, 9, 0, 9, + 27, 1, 9, 25, 12, 6, 3, 10, 30, 4, + 13, 23, 33, 19, 3, 5, 10, 12, 11, 3, + 17, 16, 14, 21, 29, 25, 25, 28, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 84, 124, 124, 124, 124, 124, 110, 80, + 104, 100, 74, 38, 26, 10, 31, 124, 124, 124, + 104, 76, 78, 62, 62, 76, 70, 52, 24, 48, + 16, 9, 54, 64, 56, 66, 78, 52, 50, 32, + 30, 3, 14, 1, 29, 109, 8, 2, 9, 48, + 16, 5, 27, 15, 11, 9, 11, 7, 8, 9, + 11, 7, 2, 26, 18, 28, 1, 124, 124, 116, + 108, 92, 84, 58, 32, 13, 9, 58, 44, 36, + 22, 16, 6, 1, 5, 23, 27, 15, 11, 9, + 11, 7, 8, 9, 11, 7, 2, 26, 18, 28, + 1, 124, 124, 116, 108, 92, 84, 58, 32, 13, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 6 */ + + 124, 18, 23, 124, 18, 23, 105, 63, 26, 20, + 20, 74, 108, 116, 32, 6, 49, 76, 39, 59, + 10, 8, 5, 2, 32, 92, 106, 86, 16, 75, + 103, 89, 19, 39, 59, 10, 53, 37, 44, 14, + 41, 67, 89, 1, 3, 43, 69, 0, 39, 75, + 1, 27, 49, 73, 14, 27, 21, 57, 32, 6, + 44, 0, 0, 0, 31, 47, 67, 11, 36, 0, + 54, 41, 111, 99, 41, 51, 21, 25, 41, 41, + 103, 13, 65, 43, 99, 41, 27, 41, 31, 22, + 2, 27, 99, 27, 71, 45, 103, 3, 29, 17, + 53, 20, 11, 9, 59, 41, 39, 33, 13, 3, + 11, 39, 3, 10, 7, 10, 11, 13, 10, 19, + 42, 9, 2, 30, 36, 48, 48, 26, 31, 21, + 29, 16, 7, 49, 7, 5, 19, 27, 23, 14, + 74, 18, 39, 29, 11, 15, 60, 74, 63, 5, + 5, 33, 2, 0, 5, 19, 66, 112, 84, 5, + 17, 103, 40, 124, 124, 109, 46, 42, 54, 60, + 58, 48, 38, 44, 54, 32, 46, 30, 14, 10, + 2, 32, 30, 32, 26, 12, 20, 44, 22, 3, + 24, 28, 11, 46, 5, 124, 124, 112, 88, 106, + 124, 124, 124, 78, 54, 42, 42, 72, 64, 35, + 44, 50, 18, 80, 44, 44, 56, 66, 64, 80, + 74, 24, 59, 4, 2, 32, 109, 85, 19, 10, + 20, 8, 31, 41, 51, 47, 105, 57, 24, 2, + 5, 19, 27, 37, 63, 57, 99, 29, 36, 10, + 0, 7, 11, 23, 31, 47, 75, 27, 6, 11, + 12, 10, 19, 31, 31, 61, 25, 56, 12, 16, + 10, 7, 17, 35, 31, 39, 124, 109, 107, 93, + 105, 85, 89, 89, 87, 81, 87, 89, 93, 53, + 65, 63, 75, 89, 53, 0, 41, 51, 45, 25, + 15, 17, 3, 29, 3, 2, 7, 9, 0, 9, + 27, 1, 9, 25, 12, 6, 3, 8, 28, 4, + 13, 23, 31, 21, 3, 3, 8, 12, 11, 3, + 15, 16, 14, 19, 27, 25, 25, 24, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 80, 124, 124, 124, 124, 124, 104, 76, + 100, 96, 70, 36, 24, 8, 31, 124, 124, 124, + 100, 72, 76, 58, 58, 72, 64, 48, 20, 44, + 12, 11, 52, 60, 52, 62, 74, 48, 46, 28, + 26, 5, 12, 3, 31, 107, 6, 1, 13, 46, + 12, 7, 25, 15, 9, 9, 9, 5, 10, 9, + 9, 5, 4, 26, 20, 30, 0, 124, 124, 112, + 104, 88, 80, 54, 28, 15, 7, 58, 44, 38, + 22, 18, 6, 0, 3, 23, 25, 15, 9, 9, + 9, 5, 10, 9, 9, 5, 4, 26, 20, 30, + 0, 124, 124, 112, 104, 88, 80, 54, 28, 15, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 7 */ + + 124, 18, 23, 124, 18, 23, 101, 59, 26, 20, + 18, 70, 106, 114, 34, 6, 47, 72, 39, 55, + 8, 8, 3, 0, 30, 90, 102, 80, 10, 77, + 99, 87, 19, 39, 55, 8, 53, 35, 44, 14, + 41, 65, 85, 3, 5, 43, 69, 0, 37, 73, + 3, 27, 49, 73, 14, 27, 21, 55, 34, 6, + 44, 0, 0, 0, 31, 47, 67, 9, 34, 0, + 52, 41, 109, 95, 39, 49, 19, 23, 39, 39, + 99, 11, 63, 41, 93, 41, 29, 41, 33, 22, + 2, 25, 93, 25, 71, 45, 99, 3, 29, 17, + 53, 20, 11, 7, 57, 41, 37, 31, 13, 3, + 9, 37, 3, 10, 9, 10, 11, 13, 10, 19, + 42, 9, 2, 32, 36, 48, 48, 26, 31, 21, + 29, 16, 7, 47, 7, 3, 17, 25, 19, 18, + 76, 20, 35, 29, 13, 13, 62, 74, 65, 5, + 3, 33, 4, 0, 5, 17, 66, 112, 84, 1, + 19, 99, 38, 124, 124, 107, 46, 42, 52, 60, + 58, 48, 36, 44, 54, 30, 44, 30, 14, 10, + 2, 32, 30, 30, 24, 12, 20, 42, 22, 5, + 22, 26, 11, 44, 5, 124, 124, 108, 86, 104, + 124, 124, 124, 76, 50, 38, 40, 68, 60, 37, + 42, 48, 16, 78, 42, 42, 52, 64, 60, 76, + 72, 22, 59, 2, 0, 28, 105, 81, 17, 10, + 20, 8, 29, 39, 49, 47, 101, 53, 26, 4, + 5, 17, 23, 35, 59, 55, 93, 27, 38, 12, + 2, 5, 9, 21, 31, 45, 73, 27, 8, 9, + 12, 10, 17, 31, 31, 59, 23, 56, 14, 16, + 10, 5, 15, 33, 31, 37, 124, 107, 105, 91, + 103, 83, 85, 87, 83, 77, 83, 85, 87, 51, + 63, 61, 73, 83, 51, 1, 39, 49, 43, 25, + 15, 17, 3, 27, 5, 0, 7, 9, 2, 9, + 27, 1, 9, 23, 10, 4, 3, 6, 26, 2, + 13, 23, 29, 23, 5, 1, 8, 10, 11, 3, + 15, 14, 12, 19, 27, 25, 25, 22, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 124, 76, 124, 124, 124, 124, 124, 100, 72, + 96, 90, 66, 34, 22, 6, 31, 124, 122, 124, + 96, 68, 72, 56, 54, 68, 60, 44, 16, 40, + 10, 15, 50, 58, 48, 60, 72, 46, 42, 26, + 24, 7, 10, 5, 31, 105, 2, 3, 15, 42, + 10, 9, 25, 13, 9, 7, 7, 3, 10, 7, + 7, 3, 6, 28, 22, 30, 0, 124, 120, 108, + 100, 84, 76, 48, 24, 17, 7, 60, 46, 38, + 24, 20, 8, 0, 3, 21, 25, 13, 9, 7, + 7, 3, 10, 7, 7, 3, 6, 28, 22, 30, + 0, 124, 120, 108, 100, 84, 76, 48, 24, 17, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 8 */ + + 124, 16, 23, 124, 16, 23, 99, 57, 26, 20, + 18, 66, 102, 112, 34, 6, 43, 68, 39, 53, + 8, 6, 1, 1, 26, 86, 98, 72, 6, 79, + 95, 85, 19, 39, 53, 8, 53, 35, 44, 12, + 39, 63, 83, 5, 9, 45, 71, 0, 37, 71, + 3, 29, 49, 73, 14, 27, 21, 55, 34, 6, + 44, 0, 0, 0, 29, 49, 67, 9, 32, 0, + 52, 39, 107, 91, 37, 49, 15, 19, 39, 37, + 95, 11, 61, 39, 89, 43, 29, 43, 33, 22, + 0, 25, 89, 25, 69, 43, 97, 3, 29, 17, + 53, 20, 11, 7, 55, 41, 37, 31, 13, 3, + 9, 35, 1, 10, 9, 8, 11, 13, 8, 19, + 42, 9, 2, 32, 36, 48, 48, 26, 33, 19, + 27, 14, 7, 47, 7, 1, 17, 25, 17, 20, + 78, 24, 31, 31, 15, 13, 62, 74, 67, 5, + 3, 35, 4, 0, 5, 15, 66, 112, 84, 0, + 21, 97, 36, 118, 124, 105, 44, 42, 52, 58, + 56, 46, 36, 44, 52, 28, 42, 28, 14, 8, + 0, 30, 28, 28, 24, 10, 18, 40, 20, 7, + 20, 24, 11, 40, 7, 124, 124, 104, 82, 100, + 120, 124, 124, 74, 46, 32, 36, 62, 56, 41, + 38, 46, 12, 76, 40, 40, 50, 60, 56, 72, + 68, 18, 59, 0, 1, 24, 103, 79, 15, 12, + 20, 8, 29, 37, 47, 45, 97, 51, 26, 4, + 3, 17, 21, 33, 57, 53, 89, 27, 38, 12, + 2, 5, 9, 21, 29, 45, 71, 27, 10, 7, + 12, 10, 15, 29, 31, 57, 21, 56, 16, 16, + 10, 3, 15, 31, 29, 37, 124, 105, 101, 87, + 99, 81, 83, 83, 81, 75, 81, 81, 83, 51, + 61, 61, 71, 77, 51, 3, 39, 47, 43, 25, + 15, 17, 5, 27, 5, 1, 7, 9, 2, 9, + 27, 3, 9, 23, 10, 4, 5, 4, 24, 2, + 15, 23, 27, 25, 5, 1, 6, 10, 11, 5, + 15, 14, 12, 19, 25, 25, 25, 18, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 122, 72, 124, 124, 124, 124, 122, 96, 68, + 90, 86, 62, 30, 18, 4, 31, 122, 118, 124, + 92, 62, 68, 52, 48, 64, 56, 40, 12, 34, + 6, 17, 46, 56, 42, 56, 68, 42, 38, 22, + 20, 9, 8, 7, 31, 103, 0, 7, 19, 40, + 8, 11, 25, 13, 7, 7, 5, 3, 12, 7, + 5, 3, 8, 28, 22, 32, 2, 122, 116, 104, + 96, 80, 72, 44, 20, 21, 5, 60, 46, 40, + 24, 20, 8, 2, 3, 21, 25, 13, 7, 7, + 5, 3, 12, 7, 5, 3, 8, 28, 22, 32, + 2, 122, 116, 104, 96, 80, 72, 44, 20, 21, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 9 */ + + 124, 16, 23, 124, 16, 23, 95, 55, 28, 20, + 18, 62, 98, 112, 36, 6, 39, 66, 37, 49, + 8, 6, 0, 1, 24, 84, 94, 66, 2, 79, + 91, 81, 17, 37, 49, 8, 51, 33, 46, 12, + 37, 61, 81, 7, 11, 45, 71, 0, 37, 69, + 3, 29, 49, 73, 14, 25, 19, 53, 34, 6, + 44, 0, 0, 0, 27, 49, 67, 9, 30, 0, + 52, 37, 103, 85, 35, 47, 11, 15, 37, 35, + 91, 9, 57, 37, 85, 45, 29, 43, 33, 24, + 0, 23, 85, 25, 67, 41, 93, 3, 27, 17, + 51, 22, 11, 5, 53, 39, 35, 31, 13, 1, + 7, 33, 0, 10, 9, 8, 9, 11, 8, 19, + 44, 9, 2, 32, 38, 50, 48, 28, 33, 17, + 25, 12, 7, 47, 7, 0, 17, 23, 15, 22, + 80, 28, 27, 33, 17, 11, 62, 76, 69, 5, + 3, 35, 6, 0, 5, 11, 66, 112, 84, 2, + 23, 95, 34, 114, 124, 101, 44, 42, 52, 56, + 56, 46, 36, 44, 52, 26, 40, 28, 14, 8, + 0, 30, 26, 28, 24, 10, 18, 38, 18, 9, + 20, 24, 11, 38, 7, 124, 124, 102, 80, 96, + 116, 124, 124, 72, 42, 28, 34, 58, 54, 43, + 36, 44, 8, 74, 38, 38, 48, 56, 54, 68, + 64, 16, 59, 0, 3, 22, 99, 75, 11, 14, + 20, 8, 27, 35, 45, 43, 93, 49, 28, 6, + 1, 15, 19, 31, 55, 51, 85, 25, 40, 14, + 4, 5, 7, 19, 27, 43, 67, 25, 14, 5, + 14, 10, 13, 27, 29, 55, 19, 58, 18, 18, + 12, 1, 13, 29, 27, 35, 124, 101, 97, 83, + 95, 79, 81, 79, 77, 71, 77, 77, 79, 49, + 59, 59, 69, 69, 51, 5, 39, 45, 41, 23, + 15, 17, 5, 27, 5, 3, 7, 9, 4, 9, + 27, 3, 9, 23, 10, 4, 5, 4, 22, 2, + 15, 21, 23, 25, 5, 0, 4, 10, 11, 5, + 13, 14, 12, 17, 23, 23, 23, 14, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 116, 68, 124, 124, 124, 124, 116, 92, 66, + 86, 82, 60, 28, 16, 2, 31, 118, 114, 120, + 88, 58, 64, 50, 44, 60, 52, 36, 8, 30, + 2, 19, 44, 54, 38, 54, 64, 40, 34, 20, + 18, 11, 6, 7, 31, 101, 1, 9, 23, 38, + 6, 13, 25, 11, 5, 5, 1, 1, 14, 7, + 3, 1, 10, 30, 24, 34, 4, 120, 114, 100, + 92, 76, 68, 40, 16, 23, 3, 60, 48, 42, + 26, 22, 10, 4, 1, 19, 25, 11, 5, 5, + 1, 1, 14, 7, 3, 1, 10, 30, 24, 34, + 4, 120, 114, 100, 92, 76, 68, 40, 16, 23, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 10 */ + + 124, 16, 23, 124, 16, 23, 91, 51, 28, 20, + 16, 58, 96, 110, 38, 6, 37, 62, 37, 47, + 6, 6, 2, 3, 22, 82, 90, 60, 3, 81, + 87, 79, 17, 37, 47, 6, 51, 31, 46, 12, + 37, 59, 77, 9, 13, 45, 71, 0, 35, 67, + 5, 29, 49, 73, 14, 25, 19, 51, 36, 6, + 44, 0, 0, 0, 27, 49, 67, 7, 28, 0, + 50, 37, 101, 81, 33, 45, 9, 13, 35, 33, + 87, 7, 55, 35, 79, 45, 31, 43, 35, 24, + 0, 21, 79, 23, 67, 41, 89, 3, 27, 17, + 51, 22, 11, 5, 51, 39, 33, 29, 13, 1, + 7, 31, 0, 10, 11, 8, 9, 11, 8, 19, + 44, 9, 2, 34, 38, 50, 48, 28, 35, 17, + 25, 12, 7, 45, 7, 2, 15, 23, 11, 26, + 82, 30, 23, 33, 19, 11, 64, 76, 71, 5, + 1, 37, 8, 0, 5, 9, 66, 112, 84, 6, + 25, 91, 32, 108, 124, 99, 44, 42, 50, 56, + 54, 46, 34, 44, 50, 24, 38, 28, 14, 8, + 0, 28, 26, 26, 22, 10, 18, 36, 18, 11, + 18, 22, 11, 36, 9, 120, 124, 98, 76, 94, + 112, 124, 124, 70, 38, 24, 30, 54, 50, 45, + 34, 42, 6, 72, 36, 36, 44, 54, 50, 64, + 62, 12, 59, 1, 5, 18, 97, 73, 9, 14, + 20, 8, 25, 33, 43, 43, 89, 45, 30, 8, + 1, 13, 15, 29, 51, 49, 79, 23, 40, 14, + 4, 3, 5, 17, 27, 41, 65, 25, 16, 3, + 14, 10, 11, 27, 29, 53, 17, 58, 20, 18, + 12, 0, 11, 27, 27, 33, 124, 99, 95, 81, + 93, 77, 77, 77, 73, 69, 73, 73, 73, 47, + 57, 57, 67, 63, 49, 7, 37, 43, 39, 23, + 15, 17, 5, 25, 7, 5, 7, 9, 6, 9, + 27, 3, 9, 21, 8, 2, 5, 2, 20, 0, + 15, 21, 21, 27, 7, 2, 4, 8, 11, 5, + 13, 12, 10, 17, 23, 23, 23, 12, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 112, 64, 124, 124, 124, 124, 110, 88, 62, + 82, 76, 56, 26, 14, 0, 31, 114, 108, 114, + 84, 54, 60, 46, 40, 56, 48, 32, 4, 26, + 0, 23, 42, 52, 34, 50, 62, 36, 30, 18, + 16, 13, 4, 9, 31, 99, 5, 11, 25, 34, + 4, 15, 25, 11, 5, 3, 0, 0, 14, 5, + 1, 0, 12, 30, 26, 34, 4, 120, 110, 96, + 88, 72, 64, 34, 12, 25, 3, 62, 48, 42, + 28, 24, 10, 4, 1, 17, 25, 11, 5, 3, + 0, 0, 14, 5, 1, 0, 12, 30, 26, 34, + 4, 120, 110, 96, 88, 72, 64, 34, 12, 25, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 11 */ + + 124, 16, 25, 124, 16, 25, 87, 49, 30, 20, + 16, 56, 92, 108, 38, 8, 33, 60, 37, 43, + 6, 4, 4, 5, 20, 80, 88, 52, 7, 83, + 83, 75, 17, 37, 43, 6, 49, 29, 48, 12, + 35, 57, 75, 13, 15, 45, 73, 0, 35, 67, + 5, 31, 47, 73, 12, 25, 19, 49, 36, 6, + 44, 0, 0, 0, 25, 49, 67, 7, 28, 1, + 50, 35, 99, 77, 31, 43, 5, 9, 33, 31, + 83, 5, 53, 35, 75, 47, 31, 43, 35, 26, + 0, 19, 75, 23, 65, 39, 85, 3, 27, 17, + 51, 24, 13, 3, 51, 39, 31, 29, 11, 1, + 5, 29, 2, 10, 11, 8, 9, 11, 8, 19, + 44, 9, 2, 34, 38, 52, 50, 30, 35, 15, + 23, 10, 7, 45, 7, 4, 15, 21, 9, 28, + 82, 34, 21, 35, 21, 9, 64, 76, 71, 7, + 1, 37, 8, 1, 3, 7, 66, 112, 86, 8, + 27, 89, 30, 102, 124, 95, 44, 42, 50, 54, + 54, 44, 34, 42, 50, 24, 38, 28, 12, 8, + 1, 28, 24, 24, 22, 10, 18, 36, 16, 15, + 18, 20, 11, 34, 9, 114, 124, 94, 74, 90, + 108, 124, 122, 66, 34, 20, 28, 50, 46, 49, + 32, 38, 2, 68, 34, 34, 42, 50, 46, 60, + 58, 10, 59, 3, 7, 16, 93, 69, 5, 16, + 20, 8, 23, 33, 43, 41, 87, 43, 32, 10, + 0, 13, 13, 27, 49, 47, 75, 23, 42, 16, + 6, 3, 3, 17, 25, 39, 63, 25, 20, 1, + 14, 10, 11, 25, 29, 53, 17, 60, 20, 20, + 12, 2, 11, 27, 25, 33, 124, 95, 91, 77, + 89, 73, 75, 73, 71, 65, 69, 69, 69, 47, + 55, 57, 67, 57, 49, 9, 37, 43, 37, 23, + 15, 17, 5, 25, 7, 7, 7, 9, 6, 9, + 27, 3, 9, 21, 8, 2, 5, 0, 18, 0, + 15, 21, 19, 29, 7, 4, 2, 8, 11, 5, + 11, 12, 10, 15, 21, 23, 23, 8, 124, 122, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 106, 60, 124, 124, 124, 124, 106, 82, 58, + 78, 72, 52, 24, 12, 1, 31, 110, 104, 110, + 80, 50, 58, 44, 36, 52, 42, 28, 0, 22, + 3, 25, 40, 48, 30, 48, 58, 34, 26, 14, + 12, 15, 2, 11, 33, 97, 7, 15, 29, 32, + 0, 17, 23, 9, 3, 3, 2, 2, 16, 5, + 0, 2, 14, 32, 28, 36, 6, 118, 106, 92, + 84, 68, 60, 30, 8, 27, 1, 62, 50, 44, + 28, 26, 12, 6, 0, 17, 23, 9, 3, 3, + 2, 2, 16, 5, 0, 2, 14, 32, 28, 36, + 6, 118, 106, 92, 84, 68, 60, 30, 8, 27, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 12 */ + + 124, 16, 25, 124, 16, 25, 85, 45, 30, 20, + 14, 52, 90, 106, 40, 8, 29, 56, 37, 41, + 6, 4, 6, 7, 16, 76, 84, 46, 11, 85, + 79, 73, 17, 37, 41, 6, 49, 27, 48, 10, + 33, 55, 73, 15, 19, 47, 73, 0, 35, 65, + 5, 31, 47, 73, 12, 25, 19, 49, 38, 6, + 44, 0, 0, 0, 25, 51, 67, 5, 26, 1, + 48, 35, 97, 73, 29, 41, 3, 7, 33, 29, + 79, 3, 51, 33, 71, 49, 31, 43, 35, 26, + 1, 17, 71, 23, 63, 37, 81, 3, 27, 17, + 51, 24, 13, 3, 49, 39, 31, 29, 11, 1, + 5, 27, 2, 10, 11, 6, 9, 11, 8, 19, + 44, 9, 2, 36, 38, 52, 50, 30, 37, 15, + 21, 10, 7, 45, 7, 6, 15, 21, 7, 30, + 84, 36, 17, 35, 23, 9, 64, 76, 73, 7, + 0, 39, 10, 1, 3, 5, 66, 112, 86, 10, + 29, 85, 28, 96, 120, 93, 42, 42, 48, 52, + 52, 44, 32, 42, 48, 22, 36, 26, 12, 8, + 1, 26, 22, 22, 20, 8, 16, 34, 16, 17, + 16, 18, 11, 30, 11, 110, 124, 90, 70, 86, + 104, 124, 116, 64, 30, 14, 24, 46, 42, 51, + 28, 36, 1, 66, 32, 32, 40, 48, 42, 56, + 54, 6, 59, 5, 9, 12, 91, 67, 3, 16, + 20, 8, 21, 31, 41, 39, 83, 41, 34, 10, + 2, 11, 11, 25, 45, 45, 71, 21, 42, 16, + 6, 1, 1, 15, 25, 39, 61, 25, 22, 0, + 14, 10, 9, 25, 29, 51, 15, 60, 22, 20, + 12, 4, 9, 25, 25, 31, 124, 93, 89, 75, + 85, 71, 71, 71, 67, 63, 67, 65, 65, 45, + 53, 55, 65, 51, 49, 11, 37, 41, 37, 23, + 15, 17, 7, 23, 7, 9, 7, 9, 8, 9, + 27, 3, 9, 21, 6, 2, 5, 1, 16, 0, + 15, 21, 17, 31, 7, 4, 0, 8, 11, 5, + 11, 12, 8, 15, 19, 23, 23, 6, 124, 120, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 124, 100, 56, 124, 124, 124, 124, 100, 78, 54, + 74, 66, 48, 20, 10, 3, 31, 104, 100, 106, + 76, 46, 54, 40, 32, 48, 38, 24, 3, 16, + 7, 27, 38, 46, 24, 44, 54, 30, 22, 12, + 10, 17, 0, 13, 33, 95, 9, 17, 31, 30, + 1, 19, 23, 9, 3, 1, 4, 2, 18, 5, + 2, 4, 16, 32, 30, 36, 8, 118, 102, 88, + 80, 64, 56, 26, 4, 31, 1, 64, 50, 46, + 30, 26, 12, 6, 0, 15, 23, 9, 3, 1, + 4, 2, 18, 5, 2, 4, 16, 32, 30, 36, + 8, 118, 102, 88, 80, 64, 56, 26, 4, 31, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 13 */ + + 124, 16, 25, 124, 16, 25, 81, 43, 30, 20, + 14, 48, 86, 104, 42, 8, 27, 52, 35, 37, + 4, 4, 8, 9, 14, 74, 80, 40, 17, 85, + 75, 71, 17, 35, 37, 4, 49, 25, 48, 10, + 33, 53, 69, 17, 21, 47, 73, 0, 33, 63, + 7, 31, 47, 73, 12, 23, 19, 47, 38, 6, + 44, 0, 0, 0, 23, 51, 67, 5, 24, 1, + 48, 33, 93, 67, 27, 39, 0, 3, 31, 27, + 75, 1, 49, 31, 65, 49, 33, 43, 37, 26, + 1, 15, 65, 21, 63, 37, 77, 3, 27, 17, + 49, 24, 13, 1, 47, 37, 29, 27, 11, 0, + 3, 25, 4, 10, 13, 6, 7, 11, 8, 19, + 46, 9, 2, 36, 40, 52, 50, 30, 37, 13, + 21, 8, 7, 43, 7, 8, 13, 19, 3, 34, + 86, 40, 13, 37, 25, 7, 66, 76, 75, 7, + 0, 39, 12, 1, 3, 1, 66, 112, 86, 14, + 31, 83, 26, 92, 114, 91, 42, 42, 48, 52, + 52, 44, 32, 42, 48, 20, 34, 26, 12, 8, + 1, 26, 22, 20, 20, 8, 16, 32, 14, 19, + 14, 18, 11, 28, 11, 106, 124, 88, 68, 84, + 100, 124, 112, 62, 26, 10, 22, 42, 38, 53, + 26, 34, 3, 64, 30, 30, 36, 44, 38, 52, + 52, 4, 59, 7, 11, 8, 87, 63, 1, 18, + 20, 8, 19, 29, 39, 39, 79, 37, 36, 12, + 2, 9, 7, 23, 43, 43, 65, 19, 44, 18, + 8, 1, 0, 13, 23, 37, 59, 23, 24, 2, + 14, 10, 7, 23, 27, 49, 13, 60, 24, 20, + 12, 6, 7, 23, 23, 29, 124, 91, 85, 71, + 83, 69, 69, 67, 63, 59, 63, 61, 59, 43, + 51, 53, 63, 45, 47, 13, 35, 39, 35, 23, + 15, 17, 7, 23, 9, 11, 7, 9, 10, 9, + 27, 3, 9, 19, 6, 0, 5, 3, 14, 1, + 15, 21, 15, 31, 9, 6, 0, 6, 11, 5, + 11, 10, 8, 15, 19, 23, 23, 2, 124, 118, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 120, 96, 52, 124, 124, 124, 124, 94, 74, 52, + 70, 62, 44, 18, 8, 5, 31, 100, 94, 100, + 72, 42, 50, 38, 28, 44, 34, 20, 7, 12, + 9, 31, 36, 44, 20, 42, 52, 28, 18, 10, + 8, 19, 1, 15, 33, 93, 13, 19, 35, 26, + 3, 21, 23, 7, 1, 0, 6, 4, 18, 3, + 4, 6, 18, 34, 32, 38, 8, 116, 98, 84, + 76, 60, 52, 20, 0, 33, 0, 64, 52, 46, + 32, 28, 14, 8, 0, 13, 23, 7, 1, 0, + 6, 4, 18, 3, 4, 6, 18, 34, 32, 38, + 8, 116, 98, 84, 76, 60, 52, 20, 0, 33, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 14 */ + + 122, 16, 25, 122, 16, 25, 77, 39, 32, 20, + 12, 44, 84, 102, 42, 8, 23, 50, 35, 35, + 4, 2, 10, 11, 12, 72, 76, 32, 21, 87, + 71, 67, 17, 35, 35, 4, 47, 23, 50, 10, + 31, 51, 67, 19, 23, 47, 75, 0, 33, 61, + 7, 33, 47, 73, 12, 23, 19, 45, 40, 6, + 44, 0, 0, 0, 23, 51, 67, 3, 22, 1, + 46, 33, 91, 63, 25, 37, 2, 1, 29, 25, + 71, 0, 47, 29, 61, 51, 33, 43, 37, 28, + 1, 13, 61, 21, 61, 35, 73, 3, 27, 17, + 49, 26, 13, 1, 45, 37, 27, 27, 11, 0, + 3, 23, 4, 10, 13, 6, 7, 11, 8, 19, + 46, 9, 2, 38, 40, 54, 50, 32, 39, 13, + 19, 8, 7, 43, 7, 10, 13, 19, 1, 36, + 88, 42, 9, 37, 27, 7, 66, 76, 77, 7, + 2, 41, 12, 1, 3, 0, 66, 112, 86, 16, + 33, 79, 24, 86, 108, 87, 42, 42, 46, 50, + 50, 42, 30, 42, 46, 18, 32, 26, 12, 8, + 3, 24, 20, 18, 18, 8, 16, 30, 14, 21, + 14, 16, 11, 26, 13, 102, 120, 84, 64, 80, + 96, 124, 106, 60, 22, 6, 18, 38, 34, 57, + 24, 32, 7, 62, 28, 28, 34, 42, 34, 48, + 48, 0, 59, 9, 13, 6, 85, 61, 2, 18, + 20, 8, 17, 27, 37, 37, 75, 35, 38, 14, + 4, 9, 5, 21, 39, 41, 61, 19, 44, 18, + 8, 0, 2, 13, 23, 35, 57, 23, 28, 4, + 14, 10, 5, 23, 27, 47, 11, 62, 26, 22, + 12, 8, 7, 21, 23, 29, 124, 87, 83, 69, + 79, 67, 65, 65, 61, 57, 59, 57, 55, 43, + 49, 53, 61, 39, 47, 15, 35, 37, 33, 23, + 15, 17, 7, 21, 9, 13, 7, 9, 10, 9, + 27, 3, 9, 19, 4, 0, 5, 5, 12, 1, + 15, 21, 13, 33, 9, 8, 1, 6, 11, 5, + 9, 10, 6, 13, 17, 23, 23, 0, 124, 116, + 122, 122, 122, 124, 124, 124, 122, 124, 124, 124, + 114, 90, 48, 124, 120, 118, 120, 88, 70, 48, + 66, 56, 40, 16, 6, 7, 31, 96, 90, 96, + 68, 38, 46, 34, 24, 40, 30, 16, 11, 8, + 13, 33, 34, 42, 16, 38, 48, 24, 14, 6, + 4, 21, 3, 17, 33, 91, 15, 23, 37, 24, + 5, 23, 23, 7, 1, 0, 8, 6, 20, 3, + 6, 8, 20, 34, 34, 38, 10, 116, 94, 80, + 72, 56, 48, 16, 3, 35, 0, 66, 52, 48, + 32, 30, 14, 8, 2, 13, 23, 7, 1, 0, + 8, 6, 20, 3, 6, 8, 20, 34, 34, 38, + 10, 116, 94, 80, 72, 56, 48, 16, 3, 35, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 15 */ + + 120, 16, 25, 120, 16, 25, 73, 37, 32, 20, + 12, 40, 80, 100, 44, 8, 19, 46, 35, 31, + 4, 2, 12, 13, 10, 70, 72, 26, 25, 89, + 67, 65, 17, 35, 31, 4, 47, 21, 50, 10, + 29, 49, 65, 21, 25, 47, 75, 0, 33, 59, + 7, 33, 47, 73, 12, 23, 19, 43, 40, 6, + 44, 0, 0, 0, 21, 51, 67, 3, 20, 1, + 46, 31, 89, 59, 23, 35, 6, 2, 27, 23, + 67, 2, 45, 27, 57, 53, 33, 43, 37, 28, + 1, 11, 57, 21, 59, 33, 69, 3, 27, 17, + 49, 26, 13, 0, 43, 37, 25, 27, 11, 0, + 1, 21, 6, 10, 13, 6, 7, 11, 8, 19, + 46, 9, 2, 38, 40, 54, 50, 32, 39, 11, + 17, 6, 7, 43, 7, 12, 13, 17, 0, 38, + 90, 46, 5, 39, 29, 5, 66, 76, 79, 7, + 2, 41, 14, 1, 3, 2, 66, 112, 86, 18, + 35, 77, 22, 80, 102, 85, 42, 42, 46, 48, + 50, 42, 30, 42, 46, 16, 30, 26, 12, 8, + 3, 24, 18, 16, 18, 8, 16, 28, 12, 23, + 12, 14, 11, 24, 13, 98, 116, 80, 62, 76, + 92, 118, 102, 58, 18, 2, 16, 34, 30, 59, + 22, 30, 11, 60, 26, 26, 32, 38, 30, 44, + 44, 1, 59, 11, 15, 2, 81, 57, 4, 20, + 20, 8, 15, 25, 35, 35, 71, 33, 40, 16, + 6, 7, 3, 19, 37, 39, 57, 17, 46, 20, + 10, 0, 4, 11, 21, 33, 55, 23, 30, 6, + 14, 10, 3, 21, 27, 45, 9, 62, 28, 22, + 12, 10, 5, 19, 21, 27, 124, 85, 79, 65, + 75, 65, 63, 61, 57, 53, 55, 53, 51, 41, + 47, 51, 59, 33, 47, 17, 35, 35, 31, 23, + 15, 17, 7, 21, 9, 15, 7, 9, 12, 9, + 27, 3, 9, 19, 4, 0, 5, 7, 10, 1, + 15, 21, 11, 35, 9, 10, 3, 6, 11, 5, + 9, 10, 6, 13, 15, 23, 23, 3, 122, 114, + 120, 118, 118, 124, 124, 124, 118, 120, 124, 122, + 108, 84, 44, 122, 114, 110, 110, 82, 66, 44, + 62, 52, 36, 14, 4, 9, 31, 92, 86, 92, + 64, 34, 42, 32, 20, 36, 26, 12, 15, 4, + 17, 35, 32, 40, 12, 36, 44, 22, 10, 4, + 2, 23, 5, 19, 33, 89, 17, 25, 41, 22, + 7, 25, 23, 5, 0, 2, 10, 8, 22, 3, + 8, 10, 22, 36, 36, 40, 12, 114, 90, 76, + 68, 52, 44, 12, 7, 37, 2, 66, 54, 50, + 34, 32, 16, 10, 2, 11, 23, 5, 0, 2, + 10, 8, 22, 3, 8, 10, 22, 36, 36, 40, + 12, 114, 90, 76, 68, 52, 44, 12, 7, 37, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 16 */ + + 116, 14, 27, 116, 14, 27, 71, 35, 32, 20, + 10, 36, 76, 98, 44, 8, 17, 42, 35, 29, + 2, 0, 14, 15, 6, 66, 68, 18, 31, 91, + 63, 63, 17, 35, 29, 2, 47, 21, 50, 8, + 29, 49, 63, 25, 29, 49, 77, 1, 33, 59, + 9, 35, 47, 73, 10, 23, 19, 43, 40, 4, + 44, 0, 0, 0, 21, 53, 67, 3, 18, 3, + 44, 31, 87, 55, 21, 35, 8, 4, 27, 21, + 65, 2, 43, 27, 53, 55, 35, 45, 39, 28, + 3, 11, 53, 21, 59, 33, 67, 3, 27, 17, + 49, 26, 15, 0, 43, 37, 25, 27, 11, 0, + 1, 19, 6, 10, 15, 4, 7, 11, 6, 19, + 46, 9, 2, 38, 40, 54, 50, 32, 41, 11, + 17, 4, 7, 43, 9, 12, 13, 17, 2, 40, + 90, 48, 3, 41, 33, 5, 66, 76, 81, 9, + 2, 43, 14, 3, 3, 4, 66, 110, 86, 20, + 37, 75, 18, 74, 94, 83, 40, 42, 44, 46, + 48, 40, 28, 40, 44, 14, 28, 24, 10, 6, + 5, 22, 16, 14, 16, 6, 14, 26, 10, 27, + 10, 12, 11, 20, 15, 92, 110, 76, 58, 72, + 86, 110, 96, 54, 14, 3, 12, 28, 26, 63, + 18, 26, 15, 56, 24, 24, 28, 34, 26, 40, + 40, 5, 59, 13, 17, 1, 79, 55, 6, 20, + 20, 8, 15, 25, 35, 35, 69, 31, 40, 16, + 6, 7, 1, 17, 35, 39, 53, 17, 46, 20, + 10, 0, 4, 11, 21, 33, 53, 23, 32, 8, + 14, 8, 3, 21, 27, 45, 9, 62, 28, 22, + 12, 12, 5, 19, 21, 27, 124, 83, 77, 63, + 73, 63, 61, 59, 55, 51, 53, 51, 47, 41, + 47, 51, 59, 27, 47, 21, 35, 35, 31, 23, + 15, 17, 9, 21, 11, 17, 9, 9, 12, 11, + 27, 5, 9, 19, 2, 1, 7, 9, 8, 3, + 17, 21, 9, 37, 11, 10, 5, 4, 11, 7, + 9, 8, 4, 13, 15, 23, 23, 7, 118, 112, + 116, 114, 112, 124, 124, 124, 112, 114, 124, 116, + 100, 78, 40, 114, 106, 102, 98, 76, 60, 40, + 56, 46, 32, 10, 0, 11, 31, 86, 80, 86, + 60, 28, 38, 28, 14, 32, 20, 8, 21, 1, + 21, 39, 28, 36, 6, 32, 40, 18, 6, 0, + 1, 25, 7, 21, 35, 87, 21, 29, 45, 18, + 11, 29, 23, 5, 0, 2, 12, 8, 22, 3, + 10, 10, 24, 36, 36, 40, 12, 112, 86, 72, + 62, 46, 40, 6, 11, 41, 2, 66, 54, 50, + 34, 32, 16, 10, 2, 11, 23, 5, 0, 2, + 12, 8, 22, 3, 10, 10, 24, 36, 36, 40, + 12, 112, 86, 72, 62, 46, 40, 6, 11, 41, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 17 */ + + 114, 14, 27, 114, 14, 27, 67, 31, 34, 22, + 10, 34, 74, 98, 46, 10, 13, 40, 33, 25, + 2, 0, 18, 15, 4, 64, 66, 12, 35, 91, + 59, 59, 15, 33, 25, 2, 45, 19, 52, 8, + 27, 47, 59, 27, 31, 49, 77, 1, 31, 57, + 9, 35, 45, 71, 10, 21, 17, 41, 42, 4, + 44, 0, 0, 0, 19, 53, 67, 1, 18, 3, + 44, 29, 83, 49, 17, 33, 12, 8, 25, 17, + 61, 4, 39, 25, 47, 55, 35, 45, 39, 30, + 3, 9, 47, 19, 57, 31, 63, 3, 25, 17, + 47, 28, 15, 2, 41, 35, 23, 25, 9, 2, + 0, 17, 8, 12, 15, 4, 5, 9, 6, 17, + 48, 7, 2, 40, 42, 56, 52, 34, 41, 9, + 15, 4, 7, 41, 9, 14, 11, 15, 6, 44, + 92, 52, 0, 41, 35, 3, 68, 78, 81, 9, + 4, 43, 16, 3, 1, 8, 66, 110, 88, 24, + 39, 71, 16, 70, 88, 79, 40, 42, 44, 46, + 48, 40, 28, 40, 44, 14, 28, 24, 10, 6, + 5, 22, 16, 14, 16, 6, 14, 26, 10, 29, + 10, 12, 11, 18, 15, 88, 106, 74, 56, 70, + 82, 104, 92, 52, 12, 7, 10, 24, 24, 65, + 16, 24, 17, 54, 24, 24, 26, 32, 24, 38, + 38, 7, 59, 13, 17, 3, 75, 51, 10, 22, + 20, 8, 13, 23, 33, 33, 65, 27, 42, 18, + 8, 5, 2, 15, 31, 37, 47, 15, 48, 22, + 12, 2, 6, 9, 19, 31, 49, 21, 36, 10, + 16, 8, 1, 19, 25, 43, 7, 64, 30, 24, + 14, 14, 3, 17, 19, 25, 124, 79, 73, 59, + 69, 59, 57, 55, 51, 47, 49, 47, 41, 39, + 45, 49, 57, 19, 45, 23, 33, 33, 29, 21, + 15, 15, 9, 19, 11, 17, 9, 9, 14, 11, + 27, 5, 9, 17, 2, 1, 7, 9, 8, 3, + 17, 19, 5, 37, 11, 12, 5, 4, 11, 7, + 7, 8, 4, 11, 13, 21, 21, 9, 116, 110, + 114, 112, 108, 120, 120, 118, 108, 110, 118, 112, + 94, 74, 36, 108, 100, 96, 88, 72, 56, 38, + 52, 42, 30, 8, 1, 11, 31, 82, 76, 82, + 56, 24, 36, 26, 10, 30, 16, 6, 25, 5, + 23, 41, 26, 34, 2, 30, 38, 16, 4, 1, + 3, 27, 7, 21, 35, 85, 23, 31, 47, 16, + 13, 31, 21, 3, 2, 4, 16, 10, 24, 1, + 14, 12, 26, 38, 38, 42, 14, 112, 84, 70, + 58, 42, 38, 2, 13, 43, 4, 68, 56, 52, + 36, 34, 18, 12, 4, 9, 21, 3, 2, 4, + 16, 10, 24, 1, 14, 12, 26, 38, 38, 42, + 14, 112, 84, 70, 58, 42, 38, 2, 13, 43, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 18 */ + + 112, 14, 27, 112, 14, 27, 63, 29, 34, 22, + 10, 30, 70, 96, 48, 10, 9, 36, 33, 23, + 2, 0, 20, 17, 2, 62, 62, 6, 39, 93, + 55, 57, 15, 33, 23, 2, 45, 17, 52, 8, + 25, 45, 57, 29, 33, 49, 77, 1, 31, 55, + 9, 35, 45, 71, 10, 21, 17, 39, 42, 4, + 44, 0, 0, 0, 17, 53, 67, 1, 16, 3, + 44, 27, 81, 45, 15, 31, 16, 12, 23, 15, + 57, 6, 37, 23, 43, 57, 35, 45, 39, 30, + 3, 7, 43, 19, 55, 29, 59, 3, 25, 17, + 47, 28, 15, 2, 39, 35, 21, 25, 9, 2, + 0, 15, 10, 12, 15, 4, 5, 9, 6, 17, + 48, 7, 2, 40, 42, 56, 52, 34, 43, 7, + 13, 2, 7, 41, 9, 16, 11, 15, 8, 46, + 94, 56, 4, 43, 37, 3, 68, 78, 83, 9, + 4, 45, 18, 3, 1, 10, 66, 110, 88, 26, + 41, 69, 14, 64, 82, 77, 40, 42, 44, 44, + 46, 40, 28, 40, 42, 12, 26, 24, 10, 6, + 5, 20, 14, 12, 16, 6, 14, 24, 8, 31, + 8, 10, 11, 16, 17, 84, 102, 70, 52, 66, + 78, 98, 88, 50, 8, 11, 6, 20, 20, 67, + 14, 22, 21, 52, 22, 22, 24, 28, 20, 34, + 34, 11, 59, 15, 19, 7, 73, 49, 12, 24, + 20, 8, 11, 21, 31, 31, 61, 25, 44, 20, + 10, 3, 4, 13, 29, 35, 43, 13, 48, 22, + 12, 2, 8, 7, 17, 29, 47, 21, 38, 12, + 16, 8, 0, 17, 25, 41, 5, 64, 32, 24, + 14, 16, 1, 15, 17, 23, 124, 77, 69, 55, + 65, 57, 55, 51, 47, 45, 45, 43, 37, 37, + 43, 47, 55, 13, 45, 25, 33, 31, 27, 21, + 15, 15, 9, 19, 11, 19, 9, 9, 16, 11, + 27, 5, 9, 17, 2, 1, 7, 11, 6, 3, + 17, 19, 3, 39, 11, 14, 7, 4, 11, 7, + 7, 8, 4, 11, 11, 21, 21, 13, 114, 108, + 112, 108, 104, 114, 114, 112, 104, 104, 112, 106, + 88, 68, 32, 100, 92, 88, 78, 66, 52, 34, + 48, 38, 26, 6, 3, 13, 31, 78, 72, 78, + 52, 20, 32, 22, 6, 26, 12, 2, 29, 9, + 27, 43, 24, 32, 1, 26, 34, 12, 0, 3, + 5, 29, 9, 23, 35, 83, 25, 33, 51, 14, + 15, 33, 21, 3, 4, 6, 18, 12, 26, 1, + 16, 14, 28, 38, 40, 44, 16, 110, 80, 66, + 54, 38, 34, 1, 17, 45, 6, 68, 56, 54, + 38, 36, 18, 14, 4, 7, 21, 3, 4, 6, + 18, 12, 26, 1, 16, 14, 28, 38, 40, 44, + 16, 110, 80, 66, 54, 38, 34, 1, 17, 45, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 19 */ + + 110, 14, 27, 110, 14, 27, 59, 25, 36, 22, + 8, 26, 68, 94, 48, 10, 5, 34, 33, 19, + 2, 1, 22, 19, 0, 60, 58, 1, 43, 95, + 51, 53, 15, 33, 19, 2, 43, 15, 54, 8, + 23, 43, 55, 31, 35, 49, 79, 1, 31, 53, + 9, 37, 45, 71, 10, 21, 17, 37, 44, 4, + 44, 0, 0, 0, 17, 53, 67, 0, 14, 3, + 42, 27, 79, 41, 13, 29, 18, 14, 21, 13, + 53, 8, 35, 21, 39, 59, 35, 45, 39, 32, + 3, 5, 39, 19, 53, 27, 55, 3, 25, 17, + 47, 30, 15, 4, 37, 35, 19, 25, 9, 2, + 2, 13, 10, 12, 15, 4, 5, 9, 6, 17, + 48, 7, 2, 42, 42, 58, 52, 36, 43, 7, + 11, 2, 7, 41, 9, 18, 11, 13, 10, 48, + 96, 58, 8, 43, 39, 1, 68, 78, 85, 9, + 6, 45, 18, 3, 1, 12, 66, 110, 88, 28, + 43, 65, 12, 58, 76, 73, 40, 42, 42, 42, + 46, 38, 26, 40, 42, 10, 24, 24, 10, 6, + 7, 20, 12, 10, 14, 6, 14, 22, 8, 33, + 8, 8, 11, 14, 17, 80, 98, 66, 50, 62, + 74, 92, 82, 48, 4, 15, 4, 16, 16, 71, + 12, 20, 25, 50, 20, 20, 22, 26, 16, 30, + 30, 13, 59, 17, 21, 9, 69, 45, 16, 24, + 20, 8, 9, 19, 29, 29, 57, 23, 46, 22, + 12, 3, 6, 11, 25, 33, 39, 13, 50, 24, + 14, 4, 10, 7, 17, 27, 45, 21, 42, 14, + 16, 8, 2, 17, 25, 39, 3, 66, 34, 26, + 14, 18, 1, 13, 17, 23, 124, 73, 67, 53, + 61, 55, 51, 49, 45, 41, 41, 39, 33, 37, + 41, 47, 53, 7, 45, 27, 33, 29, 25, 21, + 15, 15, 9, 17, 11, 21, 9, 9, 16, 11, + 27, 5, 9, 17, 0, 1, 7, 13, 4, 3, + 17, 19, 1, 41, 11, 16, 9, 4, 11, 7, + 5, 8, 2, 9, 9, 21, 21, 15, 112, 106, + 110, 104, 100, 110, 110, 106, 98, 98, 106, 100, + 82, 62, 28, 92, 86, 80, 68, 60, 48, 30, + 44, 32, 22, 4, 5, 15, 31, 74, 68, 74, + 48, 16, 28, 20, 2, 22, 8, 1, 33, 13, + 31, 45, 22, 30, 5, 24, 30, 10, 3, 7, + 9, 31, 11, 25, 35, 81, 27, 37, 53, 12, + 17, 35, 21, 1, 4, 6, 20, 14, 28, 1, + 18, 16, 30, 40, 42, 44, 18, 110, 76, 62, + 50, 34, 30, 5, 21, 47, 6, 70, 58, 56, + 38, 38, 20, 14, 6, 7, 21, 1, 4, 6, + 20, 14, 28, 1, 18, 16, 30, 40, 42, 44, + 18, 110, 76, 62, 50, 34, 30, 5, 21, 47, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 20 */ + + 106, 14, 27, 106, 14, 27, 57, 23, 36, 22, + 8, 22, 64, 92, 50, 10, 3, 30, 33, 17, + 0, 1, 24, 21, 3, 56, 54, 7, 49, 97, + 47, 51, 15, 33, 17, 0, 43, 13, 54, 6, + 23, 41, 51, 33, 39, 51, 79, 1, 29, 51, + 11, 37, 45, 71, 10, 21, 17, 37, 44, 4, + 44, 0, 0, 0, 15, 55, 67, 0, 12, 3, + 42, 25, 77, 37, 11, 27, 22, 18, 21, 11, + 49, 10, 33, 19, 33, 59, 37, 45, 41, 32, + 5, 3, 33, 17, 53, 27, 51, 3, 25, 17, + 47, 30, 15, 4, 35, 35, 19, 23, 9, 2, + 2, 11, 12, 12, 17, 2, 5, 9, 6, 17, + 48, 7, 2, 42, 42, 58, 52, 36, 45, 5, + 11, 0, 7, 39, 9, 20, 9, 13, 14, 52, + 98, 62, 12, 45, 41, 1, 70, 78, 87, 9, + 6, 47, 20, 3, 1, 14, 66, 110, 88, 32, + 45, 63, 10, 52, 70, 71, 38, 42, 42, 42, + 44, 38, 26, 40, 40, 8, 22, 22, 10, 6, + 7, 18, 12, 8, 14, 4, 12, 20, 6, 35, + 6, 6, 11, 10, 19, 76, 94, 62, 46, 60, + 70, 84, 78, 46, 0, 21, 0, 12, 12, 73, + 8, 18, 27, 48, 18, 18, 18, 22, 12, 26, + 28, 17, 59, 19, 23, 13, 67, 43, 18, 26, + 20, 8, 7, 17, 27, 29, 53, 19, 48, 22, + 12, 1, 10, 9, 23, 31, 33, 11, 50, 24, + 14, 4, 12, 5, 15, 27, 43, 21, 44, 16, + 16, 8, 4, 15, 25, 37, 1, 66, 36, 26, + 14, 20, 0, 11, 15, 21, 124, 71, 63, 49, + 59, 53, 49, 45, 41, 39, 39, 35, 27, 35, + 39, 45, 51, 1, 43, 29, 31, 27, 25, 21, + 15, 15, 11, 17, 13, 23, 9, 9, 18, 11, + 27, 5, 9, 15, 0, 3, 7, 15, 2, 5, + 17, 19, 0, 43, 13, 16, 9, 2, 11, 7, + 5, 6, 2, 9, 9, 21, 21, 19, 110, 104, + 108, 102, 94, 104, 104, 100, 94, 92, 98, 94, + 74, 58, 24, 84, 78, 72, 58, 54, 44, 26, + 40, 28, 18, 0, 7, 17, 31, 68, 62, 68, + 44, 12, 24, 16, 1, 18, 4, 5, 37, 19, + 33, 49, 20, 28, 11, 20, 28, 6, 7, 9, + 11, 33, 13, 27, 35, 79, 31, 39, 57, 8, + 19, 37, 21, 1, 6, 8, 22, 14, 28, 0, + 20, 18, 32, 40, 44, 46, 18, 108, 72, 58, + 46, 30, 26, 11, 25, 51, 8, 70, 58, 56, + 40, 38, 20, 16, 6, 5, 21, 1, 6, 8, + 22, 14, 28, 0, 20, 18, 32, 40, 44, 46, + 18, 108, 72, 58, 46, 30, 26, 11, 25, 51, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 21 */ + + 104, 14, 27, 104, 14, 27, 53, 19, 36, 22, + 6, 18, 62, 90, 52, 10, 0, 26, 31, 13, + 0, 1, 26, 23, 5, 54, 50, 13, 53, 97, + 43, 49, 15, 31, 13, 0, 43, 11, 54, 6, + 21, 39, 49, 35, 41, 51, 79, 1, 29, 49, + 11, 37, 45, 71, 10, 19, 17, 35, 46, 4, + 44, 0, 0, 0, 15, 55, 67, 2, 10, 3, + 40, 25, 73, 31, 9, 25, 24, 20, 19, 9, + 45, 12, 31, 17, 29, 61, 37, 45, 41, 32, + 5, 1, 29, 17, 51, 25, 47, 3, 25, 17, + 45, 30, 15, 6, 33, 33, 17, 23, 9, 4, + 4, 9, 12, 12, 17, 2, 3, 9, 6, 17, + 50, 7, 2, 44, 44, 58, 52, 36, 45, 5, + 9, 0, 7, 39, 9, 22, 9, 11, 16, 54, + 100, 64, 16, 45, 43, 0, 70, 78, 89, 9, + 8, 47, 22, 3, 1, 18, 66, 110, 88, 34, + 47, 59, 8, 48, 64, 69, 38, 42, 40, 40, + 44, 38, 24, 40, 40, 6, 20, 22, 10, 6, + 7, 18, 10, 6, 12, 4, 12, 18, 6, 37, + 4, 6, 11, 8, 19, 72, 90, 60, 44, 56, + 66, 78, 72, 44, 3, 25, 1, 8, 8, 75, + 6, 16, 31, 46, 16, 16, 16, 20, 8, 22, + 24, 19, 59, 21, 25, 17, 63, 39, 20, 26, + 20, 8, 5, 15, 25, 27, 49, 17, 50, 24, + 14, 0, 12, 7, 19, 29, 29, 9, 52, 26, + 16, 6, 14, 3, 15, 25, 41, 19, 46, 18, + 16, 8, 6, 15, 23, 35, 0, 66, 38, 26, + 14, 22, 2, 9, 15, 19, 124, 69, 61, 47, + 55, 51, 45, 43, 37, 35, 35, 31, 23, 33, + 37, 43, 49, 4, 43, 31, 31, 25, 23, 21, + 15, 15, 11, 15, 13, 25, 9, 9, 20, 11, + 27, 5, 9, 15, 1, 3, 7, 17, 0, 5, + 17, 19, 2, 43, 13, 18, 11, 2, 11, 7, + 5, 6, 0, 9, 7, 21, 21, 21, 108, 102, + 106, 98, 90, 100, 98, 94, 88, 86, 92, 88, + 68, 52, 20, 76, 72, 64, 48, 48, 40, 24, + 36, 22, 14, 1, 9, 19, 31, 64, 58, 64, + 40, 8, 20, 14, 5, 14, 0, 9, 41, 23, + 37, 51, 18, 26, 15, 18, 24, 4, 11, 11, + 13, 35, 15, 29, 35, 77, 33, 41, 59, 6, + 21, 39, 21, 0, 6, 10, 24, 16, 30, 0, + 22, 20, 34, 42, 46, 46, 20, 108, 68, 54, + 42, 26, 22, 15, 29, 53, 8, 72, 60, 58, + 42, 40, 22, 16, 6, 3, 21, 0, 6, 10, + 24, 16, 30, 0, 22, 20, 34, 42, 46, 46, + 20, 108, 68, 54, 42, 26, 22, 15, 29, 53, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 22 */ + + 102, 14, 29, 102, 14, 29, 49, 17, 38, 22, + 6, 16, 58, 88, 52, 12, 4, 24, 31, 11, + 0, 3, 28, 25, 7, 52, 48, 21, 57, 99, + 39, 45, 15, 31, 11, 0, 41, 9, 56, 6, + 19, 37, 47, 39, 43, 51, 81, 1, 29, 49, + 11, 39, 43, 71, 8, 19, 17, 33, 46, 4, + 44, 0, 0, 0, 13, 55, 67, 2, 10, 5, + 40, 23, 71, 27, 7, 23, 28, 24, 17, 7, + 41, 14, 29, 17, 25, 63, 37, 45, 41, 34, + 5, 0, 25, 17, 49, 23, 43, 3, 25, 17, + 45, 32, 17, 6, 33, 33, 15, 23, 7, 4, + 4, 7, 14, 12, 17, 2, 3, 9, 6, 17, + 50, 7, 2, 44, 44, 60, 54, 38, 47, 3, + 7, 1, 7, 39, 9, 24, 9, 11, 18, 56, + 100, 68, 18, 47, 45, 0, 70, 78, 89, 11, + 8, 49, 22, 5, 0, 20, 66, 110, 90, 36, + 49, 57, 6, 42, 58, 65, 38, 42, 40, 38, + 42, 36, 24, 38, 38, 6, 20, 22, 8, 6, + 9, 16, 8, 4, 12, 4, 12, 18, 4, 41, + 4, 4, 11, 6, 21, 66, 86, 56, 40, 52, + 62, 72, 68, 40, 7, 29, 5, 4, 4, 79, + 4, 12, 35, 42, 14, 14, 14, 16, 4, 18, + 20, 23, 59, 23, 27, 19, 61, 37, 24, 28, + 20, 8, 3, 15, 25, 25, 47, 15, 52, 26, + 16, 0, 14, 5, 17, 27, 25, 9, 52, 26, + 16, 6, 16, 3, 13, 23, 39, 19, 50, 20, + 16, 8, 6, 13, 23, 35, 0, 68, 38, 28, + 14, 24, 2, 9, 13, 19, 124, 65, 57, 43, + 51, 47, 43, 39, 35, 33, 31, 27, 19, 33, + 35, 43, 49, 10, 43, 33, 31, 25, 21, 21, + 15, 15, 11, 15, 13, 27, 9, 9, 20, 11, + 27, 5, 9, 15, 1, 3, 7, 19, 1, 5, + 17, 19, 4, 45, 13, 20, 13, 2, 11, 7, + 3, 6, 0, 7, 5, 21, 21, 25, 106, 100, + 104, 94, 86, 94, 94, 88, 84, 80, 86, 82, + 62, 46, 16, 70, 64, 56, 38, 44, 34, 20, + 32, 18, 10, 3, 11, 21, 31, 60, 54, 60, + 36, 4, 18, 10, 9, 10, 5, 13, 45, 27, + 41, 53, 16, 22, 19, 14, 20, 0, 15, 15, + 17, 37, 17, 31, 37, 75, 35, 45, 63, 4, + 25, 41, 19, 0, 8, 10, 26, 18, 32, 0, + 24, 22, 36, 42, 48, 48, 22, 106, 64, 50, + 38, 22, 18, 19, 33, 55, 10, 72, 60, 60, + 42, 42, 22, 18, 8, 3, 19, 0, 8, 10, + 26, 18, 32, 0, 24, 22, 36, 42, 48, 48, + 22, 106, 64, 50, 38, 22, 18, 19, 33, 55, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 23 */ + + 100, 14, 29, 100, 14, 29, 45, 13, 38, 22, + 4, 12, 56, 86, 54, 12, 6, 20, 31, 7, + 1, 3, 30, 27, 9, 50, 44, 27, 63, 101, + 35, 43, 15, 31, 7, 1, 41, 7, 56, 6, + 19, 35, 43, 41, 45, 51, 81, 1, 27, 47, + 13, 39, 43, 71, 8, 19, 17, 31, 48, 4, + 44, 0, 0, 0, 13, 55, 67, 4, 8, 5, + 38, 23, 69, 23, 5, 21, 30, 26, 15, 5, + 37, 16, 27, 15, 19, 63, 39, 45, 43, 34, + 5, 2, 19, 15, 49, 23, 39, 3, 25, 17, + 45, 32, 17, 8, 31, 33, 13, 21, 7, 4, + 6, 5, 14, 12, 19, 2, 3, 9, 6, 17, + 50, 7, 2, 46, 44, 60, 54, 38, 47, 3, + 7, 1, 7, 37, 9, 26, 7, 9, 22, 60, + 102, 70, 22, 47, 47, 2, 72, 78, 91, 11, + 10, 49, 24, 5, 0, 22, 66, 110, 90, 40, + 51, 53, 4, 36, 52, 63, 38, 42, 38, 38, + 42, 36, 22, 38, 38, 4, 18, 22, 8, 6, + 9, 16, 8, 2, 10, 4, 12, 16, 4, 43, + 2, 2, 11, 4, 21, 62, 82, 52, 38, 50, + 58, 66, 62, 38, 11, 33, 7, 0, 0, 81, + 2, 10, 37, 40, 12, 12, 10, 14, 0, 14, + 18, 25, 59, 25, 29, 23, 57, 33, 26, 28, + 20, 8, 1, 13, 23, 25, 43, 11, 54, 28, + 16, 2, 18, 3, 13, 25, 19, 7, 54, 28, + 18, 8, 18, 1, 13, 21, 37, 19, 52, 22, + 16, 8, 8, 13, 23, 33, 2, 68, 40, 28, + 14, 26, 4, 7, 13, 17, 124, 63, 55, 41, + 49, 45, 39, 37, 31, 29, 27, 23, 13, 31, + 33, 41, 47, 16, 41, 35, 29, 23, 19, 21, + 15, 15, 11, 13, 15, 29, 9, 9, 22, 11, + 27, 5, 9, 13, 3, 5, 7, 21, 3, 7, + 17, 19, 6, 47, 15, 22, 13, 0, 11, 7, + 3, 4, 1, 7, 5, 21, 21, 27, 104, 98, + 102, 92, 80, 90, 88, 82, 78, 74, 80, 76, + 56, 42, 12, 62, 58, 48, 28, 38, 30, 16, + 28, 12, 6, 5, 13, 23, 31, 56, 48, 54, + 32, 0, 14, 8, 13, 6, 9, 17, 49, 31, + 43, 57, 14, 20, 23, 12, 18, 1, 19, 17, + 19, 39, 19, 33, 37, 73, 39, 47, 65, 0, + 27, 43, 19, 2, 8, 12, 28, 20, 32, 2, + 26, 24, 38, 44, 50, 48, 22, 106, 60, 46, + 34, 18, 14, 25, 37, 57, 10, 74, 62, 60, + 44, 44, 24, 18, 8, 1, 19, 2, 8, 12, + 28, 20, 32, 2, 26, 24, 38, 44, 50, 48, + 22, 106, 60, 46, 34, 18, 14, 25, 37, 57, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 24 */ + + 96, 12, 29, 96, 12, 29, 43, 11, 38, 22, + 4, 8, 52, 84, 54, 12, 10, 16, 31, 5, + 1, 5, 32, 29, 13, 46, 40, 35, 67, 103, + 31, 41, 15, 31, 5, 1, 41, 7, 56, 4, + 17, 33, 41, 43, 49, 53, 83, 1, 27, 45, + 13, 41, 43, 71, 8, 19, 17, 31, 48, 4, + 44, 0, 0, 0, 11, 57, 67, 4, 6, 5, + 38, 21, 67, 19, 3, 21, 34, 30, 15, 3, + 33, 16, 25, 13, 15, 65, 39, 47, 43, 34, + 7, 2, 15, 15, 47, 21, 37, 3, 25, 17, + 45, 32, 17, 8, 29, 33, 13, 21, 7, 4, + 6, 3, 16, 12, 19, 0, 3, 9, 4, 17, + 50, 7, 2, 46, 44, 60, 54, 38, 49, 1, + 5, 3, 7, 37, 9, 28, 7, 9, 24, 62, + 104, 74, 26, 49, 49, 2, 72, 78, 93, 11, + 10, 51, 24, 5, 0, 24, 66, 110, 90, 42, + 53, 51, 2, 30, 44, 61, 36, 42, 38, 36, + 40, 34, 22, 38, 36, 2, 16, 20, 8, 4, + 11, 14, 6, 0, 10, 2, 10, 14, 2, 45, + 0, 0, 11, 0, 23, 58, 78, 48, 34, 46, + 52, 58, 58, 36, 15, 39, 11, 5, 3, 85, + 1, 8, 41, 38, 10, 10, 8, 10, 3, 10, + 14, 29, 59, 27, 31, 27, 55, 31, 28, 30, + 20, 8, 1, 11, 21, 23, 39, 9, 54, 28, + 18, 2, 20, 1, 11, 23, 15, 7, 54, 28, + 18, 8, 18, 1, 11, 21, 35, 19, 54, 24, + 16, 8, 10, 11, 23, 31, 4, 68, 42, 28, + 14, 28, 4, 5, 11, 17, 124, 61, 51, 37, + 45, 43, 37, 33, 29, 27, 25, 19, 9, 31, + 31, 41, 45, 22, 41, 37, 29, 21, 19, 21, + 15, 15, 13, 13, 15, 31, 9, 9, 22, 11, + 27, 7, 9, 13, 3, 5, 9, 23, 5, 7, + 19, 19, 8, 49, 15, 22, 15, 0, 11, 9, + 3, 4, 1, 7, 3, 21, 21, 31, 102, 96, + 100, 88, 76, 84, 82, 76, 74, 68, 72, 70, + 48, 36, 8, 54, 50, 40, 18, 32, 26, 12, + 22, 8, 2, 9, 17, 25, 31, 50, 44, 50, + 28, 5, 10, 4, 19, 2, 13, 21, 53, 37, + 47, 59, 10, 18, 29, 8, 14, 5, 23, 21, + 23, 41, 21, 35, 37, 71, 41, 51, 69, 1, + 29, 45, 19, 2, 10, 12, 30, 20, 34, 2, + 28, 24, 40, 44, 50, 50, 24, 104, 56, 42, + 30, 14, 10, 29, 41, 61, 12, 74, 62, 62, + 44, 44, 24, 20, 8, 1, 19, 2, 10, 12, + 30, 20, 34, 2, 28, 24, 40, 44, 50, 50, + 24, 104, 56, 42, 30, 14, 10, 29, 41, 61, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 25 */ + + 94, 12, 29, 94, 12, 29, 39, 9, 40, 22, + 4, 4, 48, 84, 56, 12, 14, 14, 29, 1, + 1, 5, 34, 29, 15, 44, 36, 41, 71, 103, + 27, 37, 13, 29, 1, 1, 39, 5, 58, 4, + 15, 31, 39, 45, 51, 53, 83, 1, 27, 43, + 13, 41, 43, 71, 8, 17, 15, 29, 48, 4, + 44, 0, 0, 0, 9, 57, 67, 4, 4, 5, + 38, 19, 63, 13, 1, 19, 38, 34, 13, 1, + 29, 18, 21, 11, 11, 67, 39, 47, 43, 36, + 7, 4, 11, 15, 45, 19, 33, 3, 23, 17, + 43, 34, 17, 10, 27, 31, 11, 21, 7, 6, + 8, 1, 18, 12, 19, 0, 1, 7, 4, 17, + 52, 7, 2, 46, 46, 62, 54, 40, 49, 0, + 3, 5, 7, 37, 9, 30, 7, 7, 26, 64, + 106, 78, 30, 51, 51, 4, 72, 80, 95, 11, + 10, 51, 26, 5, 0, 28, 66, 110, 90, 44, + 55, 49, 0, 26, 38, 57, 36, 42, 38, 34, + 40, 34, 22, 38, 36, 0, 14, 20, 8, 4, + 11, 14, 4, 0, 10, 2, 10, 12, 0, 47, + 0, 0, 11, 1, 23, 54, 74, 46, 32, 42, + 48, 52, 54, 34, 19, 43, 13, 9, 5, 87, + 3, 6, 45, 36, 8, 8, 6, 6, 5, 6, + 10, 31, 59, 27, 33, 29, 51, 27, 32, 32, + 20, 8, 0, 9, 19, 21, 35, 7, 56, 30, + 20, 4, 22, 0, 9, 21, 11, 5, 56, 30, + 20, 8, 20, 0, 9, 19, 31, 17, 58, 26, + 18, 8, 12, 9, 21, 29, 6, 70, 44, 30, + 16, 30, 6, 3, 9, 15, 124, 57, 47, 33, + 41, 41, 35, 29, 25, 23, 21, 15, 5, 29, + 29, 39, 43, 30, 41, 39, 29, 19, 17, 19, + 15, 15, 13, 13, 15, 33, 9, 9, 24, 11, + 27, 7, 9, 13, 3, 5, 9, 23, 7, 7, + 19, 17, 12, 49, 15, 24, 17, 0, 11, 9, + 1, 4, 1, 5, 1, 19, 19, 35, 100, 94, + 98, 84, 72, 78, 78, 70, 70, 64, 66, 66, + 42, 30, 4, 46, 44, 34, 8, 26, 22, 10, + 18, 4, 0, 11, 19, 27, 31, 46, 40, 46, + 24, 9, 6, 2, 23, 1, 17, 25, 57, 41, + 51, 61, 8, 16, 33, 6, 10, 7, 27, 23, + 25, 43, 23, 35, 37, 69, 43, 53, 73, 3, + 31, 47, 19, 4, 12, 14, 34, 22, 36, 2, + 30, 26, 42, 46, 52, 52, 26, 102, 54, 38, + 26, 10, 6, 33, 45, 63, 14, 74, 64, 64, + 46, 46, 26, 22, 10, 0, 19, 4, 12, 14, + 34, 22, 36, 2, 30, 26, 42, 46, 52, 52, + 26, 102, 54, 38, 26, 10, 6, 33, 45, 63, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 26 */ + + 92, 12, 29, 92, 12, 29, 35, 5, 40, 22, + 2, 0, 46, 82, 58, 12, 16, 10, 29, 0, + 3, 5, 36, 31, 17, 42, 32, 47, 77, 105, + 23, 35, 13, 29, 0, 3, 39, 3, 58, 4, + 15, 29, 35, 47, 53, 53, 83, 1, 25, 41, + 15, 41, 43, 71, 8, 17, 15, 27, 50, 4, + 44, 0, 0, 0, 9, 57, 67, 6, 2, 5, + 36, 19, 61, 9, 0, 17, 40, 36, 11, 0, + 25, 20, 19, 9, 5, 67, 41, 47, 45, 36, + 7, 6, 5, 13, 45, 19, 29, 3, 23, 17, + 43, 34, 17, 10, 25, 31, 9, 19, 7, 6, + 8, 0, 18, 12, 21, 0, 1, 7, 4, 17, + 52, 7, 2, 48, 46, 62, 54, 40, 51, 0, + 3, 5, 7, 35, 9, 32, 5, 7, 30, 68, + 108, 80, 34, 51, 53, 4, 74, 80, 97, 11, + 12, 53, 28, 5, 0, 30, 66, 110, 90, 48, + 57, 45, 1, 20, 32, 55, 36, 42, 36, 34, + 38, 34, 20, 38, 34, 1, 12, 20, 8, 4, + 11, 12, 4, 1, 8, 2, 10, 10, 0, 49, + 1, 1, 11, 3, 25, 50, 70, 42, 28, 40, + 44, 46, 48, 32, 23, 47, 17, 13, 9, 89, + 5, 4, 47, 34, 6, 6, 2, 4, 9, 2, + 8, 35, 59, 29, 35, 33, 49, 25, 34, 32, + 20, 8, 2, 7, 17, 21, 31, 3, 58, 32, + 20, 6, 26, 2, 5, 19, 5, 3, 56, 30, + 20, 10, 22, 2, 9, 17, 29, 17, 60, 28, + 18, 8, 14, 9, 21, 27, 8, 70, 46, 30, + 16, 32, 8, 1, 9, 13, 124, 55, 45, 31, + 39, 39, 31, 27, 21, 21, 17, 11, 0, 27, + 27, 37, 41, 36, 39, 41, 27, 17, 15, 19, + 15, 15, 13, 11, 17, 35, 9, 9, 26, 11, + 27, 7, 9, 11, 5, 7, 9, 25, 9, 9, + 19, 17, 14, 51, 17, 26, 17, 1, 11, 9, + 1, 2, 3, 5, 1, 19, 19, 37, 98, 92, + 96, 82, 66, 74, 72, 64, 64, 58, 60, 60, + 36, 26, 0, 38, 36, 26, 1, 20, 18, 6, + 14, 1, 3, 13, 21, 29, 31, 42, 34, 40, + 20, 13, 2, 1, 27, 5, 21, 29, 61, 45, + 53, 65, 6, 14, 37, 2, 8, 11, 31, 25, + 27, 45, 25, 37, 37, 67, 47, 55, 75, 7, + 33, 49, 19, 4, 12, 16, 36, 24, 36, 4, + 32, 28, 44, 46, 54, 52, 26, 102, 50, 34, + 22, 6, 2, 39, 49, 65, 14, 76, 64, 64, + 48, 48, 26, 22, 10, 2, 19, 4, 12, 16, + 36, 24, 36, 4, 32, 28, 44, 46, 54, 52, + 26, 102, 50, 34, 22, 6, 2, 39, 49, 65, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 27 */ + + 90, 12, 31, 90, 12, 31, 31, 3, 42, 22, + 2, 1, 42, 80, 58, 14, 20, 8, 29, 4, + 3, 7, 38, 33, 19, 40, 30, 55, 81, 107, + 19, 31, 13, 29, 4, 3, 37, 1, 60, 4, + 13, 27, 33, 51, 55, 53, 85, 1, 25, 41, + 15, 43, 41, 71, 6, 17, 15, 25, 50, 4, + 44, 0, 0, 0, 7, 57, 67, 6, 2, 7, + 36, 17, 59, 5, 2, 15, 44, 40, 9, 2, + 21, 22, 17, 9, 1, 69, 41, 47, 45, 38, + 7, 8, 1, 13, 43, 17, 25, 3, 23, 17, + 43, 36, 19, 12, 25, 31, 7, 19, 5, 6, + 10, 2, 20, 12, 21, 0, 1, 7, 4, 17, + 52, 7, 2, 48, 46, 64, 56, 42, 51, 2, + 1, 7, 7, 35, 9, 34, 5, 5, 32, 70, + 108, 84, 36, 53, 55, 6, 74, 80, 97, 13, + 12, 53, 28, 7, 2, 32, 66, 110, 92, 50, + 59, 43, 3, 14, 26, 51, 36, 42, 36, 32, + 38, 32, 20, 36, 34, 1, 12, 20, 6, 4, + 13, 12, 2, 3, 8, 2, 10, 10, 1, 53, + 1, 3, 11, 5, 25, 44, 66, 38, 26, 36, + 40, 40, 44, 28, 27, 51, 19, 17, 13, 93, + 7, 0, 51, 30, 4, 4, 0, 0, 13, 1, + 4, 37, 59, 31, 37, 35, 45, 21, 38, 34, + 20, 8, 4, 7, 17, 19, 29, 1, 60, 34, + 22, 6, 28, 4, 3, 17, 1, 3, 58, 32, + 22, 10, 24, 2, 7, 15, 27, 17, 64, 30, + 18, 8, 14, 7, 21, 27, 8, 72, 46, 32, + 16, 34, 8, 1, 7, 13, 124, 51, 41, 27, + 35, 35, 29, 23, 19, 17, 13, 7, 4, 27, + 25, 37, 41, 42, 39, 43, 27, 17, 13, 19, + 15, 15, 13, 11, 17, 37, 9, 9, 26, 11, + 27, 7, 9, 11, 5, 7, 9, 27, 11, 9, + 19, 17, 16, 53, 17, 28, 19, 1, 11, 9, + 0, 2, 3, 3, 0, 19, 19, 41, 96, 90, + 94, 78, 62, 68, 68, 58, 60, 52, 54, 54, + 30, 20, 3, 32, 30, 18, 11, 16, 12, 2, + 10, 5, 7, 15, 23, 31, 31, 38, 30, 36, + 16, 17, 0, 3, 31, 9, 27, 33, 65, 49, + 57, 67, 4, 10, 41, 0, 4, 13, 35, 29, + 31, 47, 27, 39, 39, 65, 49, 59, 79, 9, + 37, 51, 17, 6, 14, 16, 38, 26, 38, 4, + 34, 30, 46, 48, 56, 54, 28, 100, 46, 30, + 18, 2, 1, 43, 53, 67, 16, 76, 66, 66, + 48, 50, 28, 24, 12, 2, 17, 6, 14, 16, + 38, 26, 38, 4, 34, 30, 46, 48, 56, 54, + 28, 100, 46, 30, 18, 2, 1, 43, 53, 67, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 28 */ + + 86, 12, 31, 86, 12, 31, 29, 0, 42, 22, + 0, 5, 40, 78, 60, 14, 24, 4, 29, 6, + 3, 7, 40, 35, 23, 36, 26, 61, 85, 109, + 15, 29, 13, 29, 6, 3, 37, 0, 60, 2, + 11, 25, 31, 53, 59, 55, 85, 1, 25, 39, + 15, 43, 41, 71, 6, 17, 15, 25, 52, 4, + 44, 0, 0, 0, 7, 59, 67, 8, 0, 7, + 34, 17, 57, 1, 4, 13, 46, 42, 9, 4, + 17, 24, 15, 7, 2, 71, 41, 47, 45, 38, + 9, 10, 2, 13, 41, 15, 21, 3, 23, 17, + 43, 36, 19, 12, 23, 31, 7, 19, 5, 6, + 10, 4, 20, 12, 21, 1, 1, 7, 4, 17, + 52, 7, 2, 50, 46, 64, 56, 42, 53, 2, + 0, 7, 7, 35, 9, 36, 5, 5, 34, 72, + 110, 86, 40, 53, 57, 6, 74, 80, 99, 13, + 14, 55, 30, 7, 2, 34, 66, 110, 92, 52, + 61, 39, 5, 8, 20, 49, 34, 42, 34, 30, + 36, 32, 18, 36, 32, 3, 10, 18, 6, 4, + 13, 10, 0, 5, 6, 0, 8, 8, 1, 55, + 3, 5, 11, 9, 27, 40, 62, 34, 22, 32, + 36, 32, 38, 26, 31, 57, 23, 21, 17, 95, + 11, 1, 55, 28, 2, 2, 1, 1, 17, 5, + 0, 41, 59, 33, 39, 39, 43, 19, 40, 34, + 20, 8, 6, 5, 15, 17, 25, 0, 62, 34, + 24, 8, 30, 6, 0, 15, 2, 1, 58, 32, + 22, 12, 26, 4, 7, 15, 25, 17, 66, 32, + 18, 8, 16, 7, 21, 25, 10, 72, 48, 32, + 16, 36, 10, 0, 7, 11, 124, 49, 39, 25, + 31, 33, 25, 21, 15, 15, 11, 3, 8, 25, + 23, 35, 39, 48, 39, 45, 27, 15, 13, 19, + 15, 15, 15, 9, 17, 39, 9, 9, 28, 11, + 27, 7, 9, 11, 7, 7, 9, 29, 13, 9, + 19, 17, 18, 55, 17, 28, 21, 1, 11, 9, + 0, 2, 5, 3, 2, 19, 19, 43, 94, 88, + 92, 74, 58, 64, 62, 52, 54, 46, 46, 48, + 22, 14, 7, 24, 22, 10, 21, 10, 8, 1, + 6, 11, 11, 19, 25, 33, 31, 32, 26, 32, + 12, 21, 3, 7, 35, 13, 31, 37, 69, 55, + 61, 69, 2, 8, 47, 3, 0, 17, 39, 31, + 33, 49, 29, 41, 39, 63, 51, 61, 81, 11, + 39, 53, 17, 6, 14, 18, 40, 26, 40, 4, + 36, 32, 48, 48, 58, 54, 30, 100, 42, 26, + 14, 1, 5, 47, 57, 71, 16, 78, 66, 68, + 50, 50, 28, 24, 12, 4, 17, 6, 14, 18, + 40, 26, 40, 4, 36, 32, 48, 48, 58, 54, + 30, 100, 42, 26, 14, 1, 5, 47, 57, 71, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 29 */ + + 84, 12, 31, 84, 12, 31, 25, 2, 42, 22, + 0, 9, 36, 76, 62, 14, 26, 0, 27, 10, + 5, 7, 42, 37, 25, 34, 22, 67, 91, 109, + 11, 27, 13, 27, 10, 5, 37, 2, 60, 2, + 11, 23, 27, 55, 61, 55, 85, 1, 23, 37, + 17, 43, 41, 71, 6, 15, 15, 23, 52, 4, + 44, 0, 0, 0, 5, 59, 67, 8, 1, 7, + 34, 15, 53, 4, 6, 11, 50, 46, 7, 6, + 13, 26, 13, 5, 8, 71, 43, 47, 47, 38, + 9, 12, 8, 11, 41, 15, 17, 3, 23, 17, + 41, 36, 19, 14, 21, 29, 5, 17, 5, 8, + 12, 6, 22, 12, 23, 1, 0, 7, 4, 17, + 54, 7, 2, 50, 48, 64, 56, 42, 53, 4, + 0, 9, 7, 33, 9, 38, 3, 3, 38, 76, + 112, 90, 44, 55, 59, 8, 76, 80, 101, 13, + 14, 55, 32, 7, 2, 38, 66, 110, 92, 56, + 63, 37, 7, 4, 14, 47, 34, 42, 34, 30, + 36, 32, 18, 36, 32, 5, 8, 18, 6, 4, + 13, 10, 0, 7, 6, 0, 8, 6, 3, 57, + 5, 5, 11, 11, 27, 36, 58, 32, 20, 30, + 32, 26, 34, 24, 35, 61, 25, 25, 21, 97, + 13, 3, 57, 26, 0, 0, 5, 5, 21, 9, + 1, 43, 59, 35, 41, 43, 39, 15, 42, 36, + 20, 8, 8, 3, 13, 17, 21, 4, 64, 36, + 24, 10, 34, 8, 2, 13, 8, 0, 60, 34, + 24, 12, 28, 6, 5, 13, 23, 15, 68, 34, + 18, 8, 18, 5, 19, 23, 12, 72, 50, 32, + 16, 38, 12, 2, 5, 9, 124, 47, 35, 21, + 29, 31, 23, 17, 11, 11, 7, 0, 14, 23, + 21, 33, 37, 54, 37, 47, 25, 13, 11, 19, + 15, 15, 15, 9, 19, 41, 9, 9, 30, 11, + 27, 7, 9, 9, 7, 9, 9, 31, 15, 11, + 19, 17, 20, 55, 19, 30, 21, 3, 11, 9, + 0, 0, 5, 3, 2, 19, 19, 47, 92, 86, + 90, 72, 52, 58, 56, 46, 50, 40, 40, 42, + 16, 10, 11, 16, 16, 2, 31, 4, 4, 3, + 2, 15, 15, 21, 27, 35, 31, 28, 20, 26, + 8, 25, 7, 9, 39, 17, 35, 41, 73, 59, + 63, 73, 0, 6, 51, 5, 1, 19, 43, 33, + 35, 51, 31, 43, 39, 61, 55, 63, 85, 15, + 41, 55, 17, 8, 16, 20, 42, 28, 40, 6, + 38, 34, 50, 50, 60, 56, 30, 98, 38, 22, + 10, 5, 9, 53, 61, 73, 18, 78, 68, 68, + 52, 52, 30, 26, 12, 6, 17, 8, 16, 20, + 42, 28, 40, 6, 38, 34, 50, 50, 60, 56, + 30, 98, 38, 22, 10, 5, 9, 53, 61, 73, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 30 */ + + 82, 12, 31, 82, 12, 31, 21, 6, 44, 22, + 1, 13, 34, 74, 62, 14, 30, 1, 27, 12, + 5, 9, 44, 39, 27, 32, 18, 75, 95, 111, + 7, 23, 13, 27, 12, 5, 35, 4, 62, 2, + 9, 21, 25, 57, 63, 55, 87, 1, 23, 35, + 17, 45, 41, 71, 6, 15, 15, 21, 54, 4, + 44, 0, 0, 0, 5, 59, 67, 10, 3, 7, + 32, 15, 51, 8, 8, 9, 52, 48, 5, 8, + 9, 28, 11, 3, 12, 73, 43, 47, 47, 40, + 9, 14, 12, 11, 39, 13, 13, 3, 23, 17, + 41, 38, 19, 14, 19, 29, 3, 17, 5, 8, + 12, 8, 22, 12, 23, 1, 0, 7, 4, 17, + 54, 7, 2, 52, 48, 66, 56, 44, 55, 4, + 2, 9, 7, 33, 9, 40, 3, 3, 40, 78, + 114, 92, 48, 55, 61, 8, 76, 80, 103, 13, + 16, 57, 32, 7, 2, 40, 66, 110, 92, 58, + 65, 33, 9, 1, 8, 43, 34, 42, 32, 28, + 34, 30, 16, 36, 30, 7, 6, 18, 6, 4, + 15, 8, 1, 9, 4, 0, 8, 4, 3, 59, + 5, 7, 11, 13, 29, 32, 54, 28, 16, 26, + 28, 20, 28, 22, 39, 65, 29, 29, 25, 101, + 15, 5, 61, 24, 1, 1, 7, 7, 25, 13, + 5, 47, 59, 37, 43, 45, 37, 13, 46, 36, + 20, 8, 10, 1, 11, 15, 17, 6, 66, 38, + 26, 10, 36, 10, 6, 11, 12, 0, 60, 34, + 24, 14, 30, 6, 5, 11, 21, 15, 72, 36, + 18, 8, 20, 5, 19, 21, 14, 74, 52, 34, + 16, 40, 12, 4, 5, 9, 124, 43, 33, 19, + 25, 29, 19, 15, 9, 9, 3, 4, 18, 23, + 19, 33, 35, 60, 37, 49, 25, 11, 9, 19, + 15, 15, 15, 7, 19, 43, 9, 9, 30, 11, + 27, 7, 9, 9, 9, 9, 9, 33, 17, 11, + 19, 17, 22, 57, 19, 32, 23, 3, 11, 9, + 2, 0, 7, 1, 4, 19, 19, 49, 90, 84, + 88, 68, 48, 54, 52, 40, 44, 34, 34, 36, + 10, 4, 15, 8, 8, 5, 41, 1, 0, 7, + 1, 21, 19, 23, 29, 37, 31, 24, 16, 22, + 4, 29, 11, 13, 43, 21, 39, 45, 77, 63, + 67, 75, 1, 4, 55, 9, 5, 23, 47, 37, + 39, 53, 33, 45, 39, 59, 57, 67, 87, 17, + 43, 57, 17, 8, 16, 20, 44, 30, 42, 6, + 40, 36, 52, 50, 62, 56, 32, 98, 34, 18, + 6, 9, 13, 57, 65, 75, 18, 80, 68, 70, + 52, 54, 30, 26, 14, 6, 17, 8, 16, 20, + 44, 30, 42, 6, 40, 36, 52, 50, 62, 56, + 32, 98, 34, 18, 6, 9, 13, 57, 65, 75, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 31 */ + + 80, 12, 31, 80, 12, 31, 17, 8, 44, 22, + 1, 17, 30, 72, 64, 14, 34, 5, 27, 16, + 5, 9, 46, 41, 29, 30, 14, 81, 99, 113, + 3, 21, 13, 27, 16, 5, 35, 6, 62, 2, + 7, 19, 23, 59, 65, 55, 87, 1, 23, 33, + 17, 45, 41, 71, 6, 15, 15, 19, 54, 4, + 44, 0, 0, 0, 3, 59, 67, 10, 5, 7, + 32, 13, 49, 12, 10, 7, 56, 52, 3, 10, + 5, 30, 9, 1, 16, 75, 43, 47, 47, 40, + 9, 16, 16, 11, 37, 11, 9, 3, 23, 17, + 41, 38, 19, 16, 17, 29, 1, 17, 5, 8, + 14, 10, 24, 12, 23, 1, 0, 7, 4, 17, + 54, 7, 2, 52, 48, 66, 56, 44, 55, 6, + 4, 11, 7, 33, 9, 42, 3, 1, 42, 80, + 116, 96, 52, 57, 63, 10, 76, 80, 105, 13, + 16, 57, 34, 7, 2, 42, 66, 110, 92, 60, + 67, 31, 11, 7, 2, 41, 34, 42, 32, 26, + 34, 30, 16, 36, 30, 9, 4, 18, 6, 4, + 15, 8, 3, 11, 4, 0, 8, 2, 5, 61, + 7, 9, 11, 15, 29, 28, 50, 24, 14, 22, + 24, 14, 24, 20, 43, 69, 31, 33, 29, 103, + 17, 7, 65, 22, 3, 3, 9, 11, 29, 17, + 9, 49, 59, 39, 45, 49, 33, 9, 48, 38, + 20, 8, 12, 0, 9, 13, 13, 8, 68, 40, + 28, 12, 38, 12, 8, 9, 16, 2, 62, 36, + 26, 14, 32, 8, 3, 9, 19, 15, 74, 38, + 18, 8, 22, 3, 19, 19, 16, 74, 54, 34, + 16, 42, 14, 6, 3, 7, 124, 41, 29, 15, + 21, 27, 17, 11, 5, 5, 0, 8, 22, 21, + 17, 31, 33, 66, 37, 51, 25, 9, 7, 19, + 15, 15, 15, 7, 19, 45, 9, 9, 32, 11, + 27, 7, 9, 9, 9, 9, 9, 35, 19, 11, + 19, 17, 24, 59, 19, 34, 25, 3, 11, 9, + 2, 0, 7, 1, 6, 19, 19, 53, 88, 82, + 86, 64, 44, 48, 46, 34, 40, 28, 28, 30, + 4, 1, 19, 0, 2, 13, 51, 7, 3, 11, + 5, 25, 23, 25, 31, 39, 31, 20, 12, 18, + 0, 33, 15, 15, 47, 25, 43, 49, 81, 67, + 71, 77, 3, 2, 59, 11, 9, 25, 51, 39, + 41, 55, 35, 47, 39, 57, 59, 69, 91, 19, + 45, 59, 17, 10, 18, 22, 46, 32, 44, 6, + 42, 38, 54, 52, 64, 58, 34, 96, 30, 14, + 2, 13, 17, 61, 69, 77, 20, 80, 70, 72, + 54, 56, 32, 28, 14, 8, 17, 10, 18, 22, + 46, 32, 44, 6, 42, 38, 54, 52, 64, 58, + 34, 96, 30, 14, 2, 13, 17, 61, 69, 77, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 32 */ + + 76, 10, 33, 76, 10, 33, 15, 10, 44, 22, + 3, 21, 26, 70, 64, 14, 36, 9, 27, 18, + 7, 11, 48, 43, 33, 26, 10, 89, 105, 115, + 0, 19, 13, 27, 18, 7, 35, 6, 62, 0, + 7, 19, 21, 63, 69, 57, 89, 3, 23, 33, + 19, 47, 41, 71, 4, 15, 15, 19, 54, 2, + 44, 0, 0, 0, 3, 61, 67, 10, 7, 9, + 30, 13, 47, 16, 12, 7, 58, 54, 3, 12, + 3, 30, 7, 1, 20, 77, 45, 49, 49, 40, + 11, 16, 20, 11, 37, 11, 7, 3, 23, 17, + 41, 38, 21, 16, 17, 29, 1, 17, 5, 8, + 14, 12, 24, 12, 25, 3, 0, 7, 2, 17, + 54, 7, 2, 52, 48, 66, 56, 44, 57, 6, + 4, 13, 7, 33, 11, 42, 3, 1, 44, 82, + 116, 98, 54, 59, 67, 10, 76, 80, 107, 15, + 16, 59, 34, 9, 2, 44, 66, 108, 92, 62, + 69, 29, 15, 13, 5, 39, 32, 42, 30, 24, + 32, 28, 14, 34, 28, 11, 2, 16, 4, 2, + 17, 6, 5, 13, 2, 1, 6, 0, 7, 65, + 9, 11, 11, 19, 31, 22, 44, 20, 10, 18, + 18, 6, 18, 16, 47, 75, 35, 39, 33, 107, + 21, 11, 69, 18, 5, 5, 13, 15, 33, 21, + 13, 53, 59, 41, 47, 53, 31, 7, 50, 38, + 20, 8, 12, 0, 9, 13, 11, 10, 68, 40, + 28, 12, 40, 14, 10, 9, 20, 2, 62, 36, + 26, 14, 32, 8, 3, 9, 17, 15, 76, 40, + 18, 6, 22, 3, 19, 19, 16, 74, 54, 34, + 16, 44, 14, 6, 3, 7, 124, 39, 27, 13, + 19, 25, 15, 9, 3, 3, 2, 10, 26, 21, + 17, 31, 33, 72, 37, 55, 25, 9, 7, 19, + 15, 15, 17, 7, 21, 47, 11, 9, 32, 13, + 27, 9, 9, 9, 11, 11, 11, 37, 21, 13, + 21, 17, 26, 61, 21, 34, 27, 5, 11, 11, + 2, 1, 9, 1, 6, 19, 19, 57, 84, 80, + 82, 60, 38, 42, 40, 28, 34, 22, 20, 24, + 3, 7, 23, 7, 5, 21, 63, 13, 9, 15, + 11, 31, 27, 29, 35, 41, 31, 14, 6, 12, + 3, 39, 19, 19, 53, 29, 49, 53, 87, 73, + 75, 81, 7, 1, 65, 15, 13, 29, 55, 43, + 45, 57, 37, 49, 41, 55, 63, 73, 95, 23, + 49, 63, 17, 10, 18, 22, 48, 32, 44, 6, + 44, 38, 56, 52, 64, 58, 34, 94, 26, 10, + 3, 19, 21, 67, 73, 81, 20, 80, 70, 72, + 54, 56, 32, 28, 14, 8, 17, 10, 18, 22, + 48, 32, 44, 6, 44, 38, 56, 52, 64, 58, + 34, 94, 26, 10, 3, 19, 21, 67, 73, 81, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 33 */ + + 74, 10, 33, 74, 10, 33, 11, 14, 46, 24, + 3, 23, 24, 70, 66, 16, 40, 11, 25, 22, + 7, 11, 52, 43, 35, 24, 8, 95, 109, 115, + 4, 15, 11, 25, 22, 7, 33, 8, 64, 0, + 5, 17, 17, 65, 71, 57, 89, 3, 21, 31, + 19, 47, 39, 69, 4, 13, 13, 17, 56, 2, + 44, 0, 0, 0, 1, 61, 67, 12, 7, 9, + 30, 11, 43, 22, 16, 5, 62, 58, 1, 16, + 0, 32, 3, 0, 26, 77, 45, 49, 49, 42, + 11, 18, 26, 9, 35, 9, 3, 3, 21, 17, + 39, 40, 21, 18, 15, 27, 0, 15, 3, 10, + 16, 14, 26, 14, 25, 3, 2, 5, 2, 15, + 56, 5, 2, 54, 50, 68, 58, 46, 57, 8, + 6, 13, 7, 31, 11, 44, 1, 0, 48, 86, + 118, 102, 58, 59, 69, 12, 78, 82, 107, 15, + 18, 59, 36, 9, 4, 48, 66, 108, 94, 66, + 71, 25, 17, 17, 11, 35, 32, 42, 30, 24, + 32, 28, 14, 34, 28, 11, 2, 16, 4, 2, + 17, 6, 5, 13, 2, 1, 6, 0, 7, 67, + 9, 11, 11, 21, 31, 18, 40, 18, 8, 16, + 14, 0, 14, 14, 49, 79, 37, 43, 35, 109, + 23, 13, 71, 16, 5, 5, 15, 17, 35, 23, + 15, 55, 59, 41, 47, 55, 27, 3, 54, 40, + 20, 8, 14, 2, 7, 11, 7, 14, 70, 42, + 30, 14, 44, 16, 14, 7, 26, 4, 64, 38, + 28, 16, 34, 10, 1, 7, 13, 13, 80, 42, + 20, 6, 24, 1, 17, 17, 18, 76, 56, 36, + 18, 46, 16, 8, 1, 5, 124, 35, 23, 9, + 15, 21, 11, 5, 0, 0, 6, 14, 32, 19, + 15, 29, 31, 80, 35, 57, 23, 7, 5, 17, + 15, 13, 17, 5, 21, 47, 11, 9, 34, 13, + 27, 9, 9, 7, 11, 11, 11, 37, 21, 13, + 21, 15, 30, 61, 21, 36, 27, 5, 11, 11, + 4, 1, 9, 0, 8, 17, 17, 59, 82, 78, + 80, 58, 34, 38, 36, 22, 30, 18, 14, 20, + 9, 11, 27, 13, 11, 27, 73, 17, 13, 17, + 15, 35, 29, 31, 37, 41, 31, 10, 2, 8, + 7, 43, 21, 21, 57, 31, 53, 55, 91, 77, + 77, 83, 9, 3, 69, 17, 15, 31, 57, 45, + 47, 59, 37, 49, 41, 53, 65, 75, 97, 25, + 51, 65, 15, 12, 20, 24, 52, 34, 46, 8, + 48, 40, 58, 54, 66, 60, 36, 94, 24, 8, + 7, 23, 23, 71, 75, 83, 22, 82, 72, 74, + 56, 58, 34, 30, 16, 10, 15, 12, 20, 24, + 52, 34, 46, 8, 48, 40, 58, 54, 66, 60, + 36, 94, 24, 8, 7, 23, 23, 71, 75, 83, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 34 */ + + 72, 10, 33, 72, 10, 33, 7, 16, 46, 24, + 3, 27, 20, 68, 68, 16, 44, 15, 25, 24, + 7, 11, 54, 45, 37, 22, 4, 101, 113, 117, + 8, 13, 11, 25, 24, 7, 33, 10, 64, 0, + 3, 15, 15, 67, 73, 57, 89, 3, 21, 29, + 19, 47, 39, 69, 4, 13, 13, 15, 56, 2, + 44, 0, 0, 0, 0, 61, 67, 12, 9, 9, + 30, 9, 41, 26, 18, 3, 66, 62, 0, 18, + 4, 34, 1, 2, 30, 79, 45, 49, 49, 42, + 11, 20, 30, 9, 33, 7, 0, 3, 21, 17, + 39, 40, 21, 18, 13, 27, 2, 15, 3, 10, + 16, 16, 28, 14, 25, 3, 2, 5, 2, 15, + 56, 5, 2, 54, 50, 68, 58, 46, 59, 10, + 8, 15, 7, 31, 11, 46, 1, 0, 50, 88, + 120, 106, 62, 61, 71, 12, 78, 82, 109, 15, + 18, 61, 38, 9, 4, 50, 66, 108, 94, 68, + 73, 23, 19, 23, 17, 33, 32, 42, 30, 22, + 30, 28, 14, 34, 26, 13, 0, 16, 4, 2, + 17, 4, 7, 15, 2, 1, 6, 1, 9, 69, + 11, 13, 11, 23, 33, 14, 36, 14, 4, 12, + 10, 5, 10, 12, 53, 83, 41, 47, 39, 111, + 25, 15, 75, 14, 7, 7, 17, 21, 39, 27, + 19, 59, 59, 43, 49, 59, 25, 1, 56, 42, + 20, 8, 16, 4, 5, 9, 3, 16, 72, 44, + 32, 16, 46, 18, 16, 5, 30, 6, 64, 38, + 28, 16, 36, 12, 0, 5, 11, 13, 82, 44, + 20, 6, 26, 0, 17, 15, 20, 76, 58, 36, + 18, 48, 18, 10, 0, 3, 124, 33, 19, 5, + 11, 19, 9, 1, 4, 2, 10, 18, 36, 17, + 13, 27, 29, 86, 35, 59, 23, 5, 3, 17, + 15, 13, 17, 5, 21, 49, 11, 9, 36, 13, + 27, 9, 9, 7, 11, 11, 11, 39, 23, 13, + 21, 15, 32, 63, 21, 38, 29, 5, 11, 11, + 4, 1, 9, 0, 10, 17, 17, 63, 80, 76, + 78, 54, 30, 32, 30, 16, 26, 12, 8, 14, + 15, 17, 31, 21, 19, 35, 83, 23, 17, 21, + 19, 39, 33, 33, 39, 43, 31, 6, 1, 4, + 11, 47, 25, 25, 61, 35, 57, 59, 95, 81, + 81, 85, 11, 5, 73, 21, 19, 35, 61, 47, + 49, 61, 39, 51, 41, 51, 67, 77, 101, 27, + 53, 67, 15, 12, 22, 26, 54, 36, 48, 8, + 50, 42, 60, 54, 68, 62, 38, 92, 20, 4, + 11, 27, 27, 75, 79, 85, 24, 82, 72, 76, + 58, 60, 34, 32, 16, 12, 15, 12, 22, 26, + 54, 36, 48, 8, 50, 42, 60, 54, 68, 62, + 38, 92, 20, 4, 11, 27, 27, 75, 79, 85, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 35 */ + + 70, 10, 33, 70, 10, 33, 3, 20, 48, 24, + 5, 31, 18, 66, 68, 16, 48, 17, 25, 28, + 7, 13, 56, 47, 39, 20, 0, 109, 117, 119, + 12, 9, 11, 25, 28, 7, 31, 12, 66, 0, + 1, 13, 13, 69, 75, 57, 91, 3, 21, 27, + 19, 49, 39, 69, 4, 13, 13, 13, 58, 2, + 44, 0, 0, 0, 0, 61, 67, 14, 11, 9, + 28, 9, 39, 30, 20, 1, 68, 64, 2, 20, + 8, 36, 0, 4, 34, 81, 45, 49, 49, 44, + 11, 22, 34, 9, 31, 5, 4, 3, 21, 17, + 39, 42, 21, 20, 11, 27, 4, 15, 3, 10, + 18, 18, 28, 14, 25, 3, 2, 5, 2, 15, + 56, 5, 2, 56, 50, 70, 58, 48, 59, 10, + 10, 15, 7, 31, 11, 48, 1, 2, 52, 90, + 122, 108, 66, 61, 73, 14, 78, 82, 111, 15, + 20, 61, 38, 9, 4, 52, 66, 108, 94, 70, + 75, 19, 21, 29, 23, 29, 32, 42, 28, 20, + 30, 26, 12, 34, 26, 15, 1, 16, 4, 2, + 19, 4, 9, 17, 0, 1, 6, 3, 9, 71, + 11, 15, 11, 25, 33, 10, 32, 10, 2, 8, + 6, 11, 4, 10, 57, 87, 43, 51, 43, 115, + 27, 17, 79, 12, 9, 9, 19, 23, 43, 31, + 23, 61, 59, 45, 51, 61, 21, 2, 60, 42, + 20, 8, 18, 6, 3, 7, 0, 18, 74, 46, + 34, 16, 48, 20, 20, 3, 34, 6, 66, 40, + 30, 18, 38, 12, 0, 3, 9, 13, 86, 46, + 20, 6, 28, 0, 17, 13, 22, 78, 60, 38, + 18, 50, 18, 12, 0, 3, 124, 29, 17, 3, + 7, 17, 5, 0, 6, 6, 14, 22, 40, 17, + 11, 27, 27, 92, 35, 61, 23, 3, 1, 17, + 15, 13, 17, 3, 21, 51, 11, 9, 36, 13, + 27, 9, 9, 7, 13, 11, 11, 41, 25, 13, + 21, 15, 34, 65, 21, 40, 31, 5, 11, 11, + 6, 1, 11, 2, 12, 17, 17, 65, 78, 74, + 76, 50, 26, 28, 26, 10, 20, 6, 2, 8, + 21, 23, 35, 29, 25, 43, 93, 29, 21, 25, + 23, 45, 37, 35, 41, 45, 31, 2, 5, 0, + 15, 51, 29, 27, 65, 39, 61, 63, 99, 85, + 85, 87, 13, 7, 77, 23, 23, 37, 65, 51, + 53, 63, 41, 53, 41, 49, 69, 81, 103, 29, + 55, 69, 15, 14, 22, 26, 56, 38, 50, 8, + 52, 44, 62, 56, 70, 62, 40, 92, 16, 0, + 15, 31, 31, 79, 83, 87, 24, 84, 74, 78, + 58, 62, 36, 32, 18, 12, 15, 14, 22, 26, + 56, 38, 50, 8, 52, 44, 62, 56, 70, 62, + 40, 92, 16, 0, 15, 31, 31, 79, 83, 87, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 36 */ + + 66, 10, 33, 66, 10, 33, 1, 22, 48, 24, + 5, 35, 14, 64, 70, 16, 50, 21, 25, 30, + 9, 13, 58, 49, 43, 16, 3, 115, 123, 121, + 16, 7, 11, 25, 30, 9, 31, 14, 66, 1, + 1, 11, 9, 71, 79, 59, 91, 3, 19, 25, + 21, 49, 39, 69, 4, 13, 13, 13, 58, 2, + 44, 0, 0, 0, 2, 63, 67, 14, 13, 9, + 28, 7, 37, 34, 22, 0, 72, 68, 2, 22, + 12, 38, 2, 6, 40, 81, 47, 49, 51, 44, + 13, 24, 40, 7, 31, 5, 8, 3, 21, 17, + 39, 42, 21, 20, 9, 27, 4, 13, 3, 10, + 18, 20, 30, 14, 27, 5, 2, 5, 2, 15, + 56, 5, 2, 56, 50, 70, 58, 48, 61, 12, + 10, 17, 7, 29, 11, 50, 0, 2, 56, 94, + 124, 112, 70, 63, 75, 14, 80, 82, 113, 15, + 20, 63, 40, 9, 4, 54, 66, 108, 94, 74, + 77, 17, 23, 35, 29, 27, 30, 42, 28, 20, + 28, 26, 12, 34, 24, 17, 3, 14, 4, 2, + 19, 2, 9, 19, 0, 3, 4, 5, 11, 73, + 13, 17, 11, 29, 35, 6, 28, 6, 1, 6, + 2, 19, 0, 8, 61, 93, 47, 55, 47, 117, + 31, 19, 81, 10, 11, 11, 23, 27, 47, 35, + 25, 65, 59, 47, 53, 65, 19, 4, 62, 44, + 20, 8, 20, 8, 1, 7, 4, 22, 76, 46, + 34, 18, 52, 22, 22, 1, 40, 8, 66, 40, + 30, 18, 40, 14, 2, 3, 7, 13, 88, 48, + 20, 6, 30, 2, 17, 11, 24, 78, 62, 38, + 18, 52, 20, 14, 2, 1, 124, 27, 13, 0, + 5, 15, 3, 4, 10, 8, 16, 26, 46, 15, + 9, 25, 25, 98, 33, 63, 21, 1, 1, 17, + 15, 13, 19, 3, 23, 53, 11, 9, 38, 13, + 27, 9, 9, 5, 13, 13, 11, 43, 27, 15, + 21, 15, 36, 67, 23, 40, 31, 7, 11, 11, + 6, 3, 11, 2, 12, 17, 17, 69, 76, 72, + 74, 48, 20, 22, 20, 4, 16, 0, 5, 2, + 29, 27, 39, 37, 33, 51, 103, 35, 25, 29, + 27, 49, 41, 39, 43, 47, 31, 3, 11, 5, + 19, 55, 33, 31, 69, 43, 65, 67, 103, 91, + 87, 91, 15, 9, 83, 27, 25, 41, 69, 53, + 55, 65, 43, 55, 41, 47, 73, 83, 107, 33, + 57, 71, 15, 14, 24, 28, 58, 38, 50, 10, + 54, 46, 64, 56, 72, 64, 40, 90, 12, 3, + 19, 35, 35, 85, 87, 91, 26, 84, 74, 78, + 60, 62, 36, 34, 18, 14, 15, 14, 24, 28, + 58, 38, 50, 10, 54, 46, 64, 56, 72, 64, + 40, 90, 12, 3, 19, 35, 35, 85, 87, 91, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 37 */ + + 64, 10, 33, 64, 10, 33, 2, 26, 48, 24, + 7, 39, 12, 62, 72, 16, 54, 25, 23, 34, + 9, 13, 60, 51, 45, 14, 7, 121, 125, 121, + 20, 5, 11, 23, 34, 9, 31, 16, 66, 1, + 0, 9, 7, 73, 81, 59, 91, 3, 19, 23, + 21, 49, 39, 69, 4, 11, 13, 11, 60, 2, + 44, 0, 0, 0, 2, 63, 67, 16, 15, 9, + 26, 7, 33, 40, 24, 2, 74, 70, 4, 24, + 16, 40, 4, 8, 44, 83, 47, 49, 51, 44, + 13, 26, 44, 7, 29, 3, 12, 3, 21, 17, + 37, 42, 21, 22, 7, 25, 6, 13, 3, 12, + 20, 22, 30, 14, 27, 5, 4, 5, 2, 15, + 58, 5, 2, 58, 52, 70, 58, 48, 61, 12, + 12, 17, 7, 29, 11, 52, 0, 4, 58, 96, + 124, 114, 74, 63, 77, 16, 80, 82, 115, 15, + 22, 63, 42, 9, 4, 58, 66, 108, 94, 76, + 79, 13, 25, 39, 35, 25, 30, 42, 26, 18, + 28, 26, 10, 34, 24, 19, 5, 14, 4, 2, + 19, 2, 11, 21, 1, 3, 4, 7, 11, 75, + 15, 17, 11, 31, 35, 2, 24, 4, 3, 2, + 1, 25, 5, 6, 65, 97, 49, 59, 51, 119, + 33, 21, 85, 8, 13, 13, 25, 29, 51, 39, + 29, 67, 59, 49, 55, 69, 15, 8, 64, 44, + 20, 8, 22, 10, 0, 5, 8, 24, 78, 48, + 36, 20, 54, 24, 26, 0, 44, 10, 68, 42, + 32, 20, 42, 16, 2, 1, 5, 11, 90, 50, + 20, 6, 32, 2, 15, 9, 26, 78, 64, 38, + 18, 54, 22, 16, 2, 0, 124, 25, 11, 2, + 1, 13, 0, 6, 14, 12, 20, 30, 50, 13, + 7, 23, 23, 104, 33, 65, 21, 0, 0, 17, + 15, 13, 19, 1, 23, 55, 11, 9, 40, 13, + 27, 9, 9, 5, 15, 13, 11, 45, 29, 15, + 21, 15, 38, 67, 23, 42, 33, 7, 11, 11, + 6, 3, 13, 2, 14, 17, 17, 71, 74, 70, + 72, 44, 16, 18, 14, 1, 10, 5, 11, 3, + 35, 33, 43, 45, 39, 59, 113, 41, 29, 31, + 31, 55, 45, 41, 45, 49, 31, 7, 15, 9, + 23, 59, 37, 33, 73, 47, 69, 71, 107, 95, + 91, 93, 17, 11, 87, 29, 29, 43, 73, 55, + 57, 67, 45, 57, 41, 45, 75, 85, 109, 35, + 59, 73, 15, 16, 24, 30, 60, 40, 52, 10, + 56, 48, 66, 58, 74, 64, 42, 90, 8, 7, + 23, 39, 39, 89, 91, 93, 26, 86, 76, 80, + 62, 64, 38, 34, 18, 16, 15, 16, 24, 30, + 60, 40, 52, 10, 56, 48, 66, 58, 74, 64, + 42, 90, 8, 7, 23, 39, 39, 89, 91, 93, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 38 */ + + 62, 10, 35, 62, 10, 35, 6, 28, 50, 24, + 7, 41, 8, 60, 72, 18, 58, 27, 23, 36, + 9, 15, 62, 53, 47, 12, 9, 125, 125, 123, + 24, 1, 11, 23, 36, 9, 29, 18, 68, 1, + 2, 7, 5, 77, 83, 59, 93, 3, 19, 23, + 21, 51, 37, 69, 2, 11, 13, 9, 60, 2, + 44, 0, 0, 0, 4, 63, 67, 16, 15, 11, + 26, 5, 31, 44, 26, 4, 78, 74, 6, 26, + 20, 42, 6, 8, 48, 85, 47, 49, 51, 46, + 13, 28, 48, 7, 27, 1, 16, 3, 21, 17, + 37, 44, 23, 22, 7, 25, 8, 13, 1, 12, + 20, 24, 32, 14, 27, 5, 4, 5, 2, 15, + 58, 5, 2, 58, 52, 72, 60, 50, 63, 14, + 14, 19, 7, 29, 11, 54, 0, 4, 60, 98, + 124, 118, 76, 65, 79, 16, 80, 82, 115, 17, + 22, 65, 42, 11, 6, 60, 66, 108, 96, 78, + 81, 11, 27, 45, 41, 21, 30, 42, 26, 16, + 26, 24, 10, 32, 22, 19, 5, 14, 2, 2, + 21, 0, 13, 23, 1, 3, 4, 7, 13, 79, + 15, 19, 11, 33, 37, 3, 20, 0, 7, 1, + 5, 31, 9, 2, 69, 101, 53, 63, 55, 123, + 35, 25, 89, 4, 15, 15, 27, 33, 55, 43, + 33, 71, 59, 51, 57, 71, 13, 10, 68, 46, + 20, 8, 24, 10, 0, 3, 10, 26, 80, 50, + 38, 20, 56, 26, 28, 2, 48, 10, 68, 42, + 32, 20, 44, 16, 4, 0, 3, 11, 94, 52, + 20, 6, 32, 4, 15, 9, 26, 80, 64, 40, + 18, 56, 22, 16, 4, 0, 124, 21, 7, 6, + 2, 9, 2, 10, 16, 14, 24, 34, 54, 13, + 5, 23, 23, 110, 33, 67, 21, 0, 2, 17, + 15, 13, 19, 1, 23, 57, 11, 9, 40, 13, + 27, 9, 9, 5, 15, 13, 11, 47, 31, 15, + 21, 15, 40, 69, 23, 44, 35, 7, 11, 11, + 8, 3, 13, 4, 16, 17, 17, 75, 72, 68, + 70, 40, 12, 12, 10, 7, 6, 11, 17, 9, + 41, 39, 47, 51, 47, 67, 123, 45, 35, 35, + 35, 59, 49, 43, 47, 51, 31, 11, 19, 13, + 27, 63, 39, 37, 77, 51, 75, 75, 111, 99, + 95, 95, 19, 15, 91, 33, 33, 47, 77, 59, + 61, 69, 47, 59, 43, 43, 77, 89, 113, 37, + 63, 75, 13, 16, 26, 30, 62, 42, 54, 10, + 58, 50, 68, 58, 76, 66, 44, 88, 4, 11, + 27, 43, 43, 93, 95, 95, 28, 86, 76, 82, + 62, 66, 38, 36, 20, 16, 13, 16, 26, 30, + 62, 42, 54, 10, 58, 50, 68, 58, 76, 66, + 44, 88, 4, 11, 27, 43, 43, 93, 95, 95, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 39 */ + + 60, 10, 35, 60, 10, 35, 10, 32, 50, 24, + 9, 45, 6, 58, 74, 18, 60, 31, 23, 40, + 11, 15, 64, 55, 49, 10, 13, 125, 125, 125, + 28, 0, 11, 23, 40, 11, 29, 20, 68, 1, + 2, 5, 1, 79, 85, 59, 93, 3, 17, 21, + 23, 51, 37, 69, 2, 11, 13, 7, 62, 2, + 44, 0, 0, 0, 4, 63, 67, 18, 17, 11, + 24, 5, 29, 48, 28, 6, 80, 76, 8, 28, + 24, 44, 8, 10, 54, 85, 49, 49, 53, 46, + 13, 30, 54, 5, 27, 1, 20, 3, 21, 17, + 37, 44, 23, 24, 5, 25, 10, 11, 1, 12, + 22, 26, 32, 14, 29, 5, 4, 5, 2, 15, + 58, 5, 2, 60, 52, 72, 60, 50, 63, 14, + 14, 19, 7, 27, 11, 56, 2, 6, 64, 102, + 124, 120, 80, 65, 81, 18, 82, 82, 117, 17, + 24, 65, 44, 11, 6, 62, 66, 108, 96, 82, + 83, 7, 29, 51, 47, 19, 30, 42, 24, 16, + 26, 24, 8, 32, 22, 21, 7, 14, 2, 2, + 21, 0, 13, 25, 3, 3, 4, 9, 13, 81, + 17, 21, 11, 35, 37, 7, 16, 3, 9, 3, + 9, 37, 15, 0, 73, 105, 55, 67, 59, 125, + 37, 27, 91, 2, 17, 17, 31, 35, 59, 47, + 35, 73, 59, 53, 59, 75, 9, 14, 70, 46, + 20, 8, 26, 12, 2, 3, 14, 30, 82, 52, + 38, 22, 60, 28, 32, 4, 54, 12, 70, 44, + 34, 22, 46, 18, 4, 2, 1, 11, 96, 54, + 20, 6, 34, 4, 15, 7, 28, 80, 66, 40, + 18, 58, 24, 18, 4, 2, 124, 19, 5, 8, + 4, 7, 6, 12, 20, 18, 28, 38, 60, 11, + 3, 21, 21, 116, 31, 69, 19, 2, 4, 17, + 15, 13, 19, 0, 25, 59, 11, 9, 42, 13, + 27, 9, 9, 3, 17, 15, 11, 49, 33, 17, + 21, 15, 42, 71, 25, 46, 35, 9, 11, 11, + 8, 5, 15, 4, 16, 17, 17, 77, 70, 66, + 68, 38, 6, 8, 4, 13, 0, 17, 23, 15, + 47, 43, 51, 59, 53, 75, 125, 51, 39, 39, + 39, 65, 53, 45, 49, 53, 31, 15, 25, 19, + 31, 67, 43, 39, 81, 55, 79, 79, 115, 103, + 97, 99, 21, 17, 95, 35, 35, 49, 81, 61, + 63, 71, 49, 61, 43, 41, 81, 91, 115, 41, + 65, 77, 13, 18, 26, 32, 64, 44, 54, 12, + 60, 52, 70, 60, 78, 66, 44, 88, 0, 15, + 31, 47, 47, 99, 99, 97, 28, 88, 78, 82, + 64, 68, 40, 36, 20, 18, 13, 18, 26, 32, + 64, 44, 54, 12, 60, 52, 70, 60, 78, 66, + 44, 88, 0, 15, 31, 47, 47, 99, 99, 97, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 40 */ + + 56, 8, 35, 56, 8, 35, 12, 34, 50, 24, + 9, 49, 2, 56, 74, 18, 64, 35, 23, 42, + 11, 17, 66, 57, 53, 6, 17, 125, 125, 125, + 32, 2, 11, 23, 42, 11, 29, 20, 68, 3, + 4, 3, 0, 81, 89, 61, 95, 3, 17, 19, + 23, 53, 37, 69, 2, 11, 13, 7, 62, 2, + 44, 0, 0, 0, 6, 65, 67, 18, 19, 11, + 24, 3, 27, 52, 30, 6, 84, 80, 8, 30, + 28, 44, 10, 12, 58, 87, 49, 51, 53, 46, + 15, 30, 58, 5, 25, 0, 22, 3, 21, 17, + 37, 44, 23, 24, 3, 25, 10, 11, 1, 12, + 22, 28, 34, 14, 29, 7, 4, 5, 0, 15, + 58, 5, 2, 60, 52, 72, 60, 50, 65, 16, + 16, 21, 7, 27, 11, 58, 2, 6, 66, 104, + 124, 124, 84, 67, 83, 18, 82, 82, 119, 17, + 24, 67, 44, 11, 6, 64, 66, 108, 96, 84, + 85, 5, 31, 57, 55, 17, 28, 42, 24, 14, + 24, 22, 8, 32, 20, 23, 9, 12, 2, 0, + 23, 1, 15, 27, 3, 5, 2, 11, 15, 83, + 19, 23, 11, 39, 39, 11, 12, 7, 13, 7, + 15, 45, 19, 1, 77, 111, 59, 73, 63, 125, + 41, 29, 95, 0, 19, 19, 33, 39, 63, 51, + 39, 77, 59, 55, 61, 79, 7, 16, 72, 48, + 20, 8, 26, 14, 4, 1, 18, 32, 82, 52, + 40, 22, 62, 30, 34, 6, 58, 12, 70, 44, + 34, 22, 46, 18, 6, 2, 0, 11, 98, 56, + 20, 6, 36, 6, 15, 5, 30, 80, 68, 40, + 18, 60, 24, 20, 6, 2, 124, 17, 1, 12, + 8, 5, 8, 16, 22, 20, 30, 42, 64, 11, + 1, 21, 19, 122, 31, 71, 19, 4, 4, 17, + 15, 13, 21, 0, 25, 61, 11, 9, 42, 13, + 27, 11, 9, 3, 17, 15, 13, 51, 35, 17, + 23, 15, 44, 73, 25, 46, 37, 9, 11, 13, + 8, 5, 15, 4, 18, 17, 17, 81, 68, 64, + 66, 34, 2, 2, 1, 19, 3, 23, 31, 21, + 55, 49, 55, 67, 61, 83, 125, 57, 43, 43, + 45, 69, 57, 49, 53, 55, 31, 21, 29, 23, + 35, 73, 47, 43, 87, 59, 83, 83, 119, 109, + 101, 101, 25, 19, 101, 39, 39, 53, 85, 65, + 67, 73, 51, 63, 43, 39, 83, 95, 119, 43, + 67, 79, 13, 18, 28, 32, 66, 44, 56, 12, + 62, 52, 72, 60, 78, 68, 46, 86, 3, 19, + 35, 51, 51, 103, 103, 101, 30, 88, 78, 84, + 64, 68, 40, 38, 20, 18, 13, 18, 28, 32, + 66, 44, 56, 12, 62, 52, 72, 60, 78, 68, + 46, 86, 3, 19, 35, 51, 51, 103, 103, 101, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 41 */ + + 54, 8, 35, 54, 8, 35, 16, 36, 52, 24, + 9, 53, 1, 56, 76, 18, 68, 37, 21, 46, + 11, 17, 68, 57, 55, 4, 21, 125, 125, 125, + 36, 6, 9, 21, 46, 11, 27, 22, 70, 3, + 6, 1, 2, 83, 91, 61, 95, 3, 17, 17, + 23, 53, 37, 69, 2, 9, 11, 5, 62, 2, + 44, 0, 0, 0, 8, 65, 67, 18, 21, 11, + 24, 1, 23, 58, 32, 8, 88, 84, 10, 32, + 32, 46, 14, 14, 62, 89, 49, 51, 53, 48, + 15, 32, 62, 5, 23, 2, 26, 3, 19, 17, + 35, 46, 23, 26, 1, 23, 12, 11, 1, 14, + 24, 30, 36, 14, 29, 7, 6, 3, 0, 15, + 60, 5, 2, 60, 54, 74, 60, 52, 65, 18, + 18, 23, 7, 27, 11, 60, 2, 8, 68, 106, + 124, 124, 88, 69, 85, 20, 82, 84, 121, 17, + 24, 67, 46, 11, 6, 68, 66, 108, 96, 86, + 87, 3, 33, 61, 61, 13, 28, 42, 24, 12, + 24, 22, 8, 32, 20, 25, 11, 12, 2, 0, + 23, 1, 17, 27, 3, 5, 2, 13, 17, 85, + 19, 23, 11, 41, 39, 15, 8, 9, 15, 11, + 19, 51, 23, 3, 81, 115, 61, 77, 65, 125, + 43, 31, 99, 1, 21, 21, 35, 43, 65, 55, + 43, 79, 59, 55, 63, 81, 3, 20, 76, 50, + 20, 8, 28, 16, 6, 0, 22, 34, 84, 54, + 42, 24, 64, 32, 36, 8, 62, 14, 72, 46, + 36, 22, 48, 20, 8, 4, 4, 9, 102, 58, + 22, 6, 38, 8, 13, 3, 32, 82, 70, 42, + 20, 62, 26, 22, 8, 4, 124, 13, 2, 16, + 12, 3, 10, 20, 26, 24, 34, 46, 68, 9, + 0, 19, 17, 124, 31, 73, 19, 6, 6, 15, + 15, 13, 21, 0, 25, 63, 11, 9, 44, 13, + 27, 11, 9, 3, 17, 15, 13, 51, 37, 17, + 23, 13, 48, 73, 25, 48, 39, 9, 11, 13, + 10, 5, 15, 6, 20, 15, 15, 85, 66, 62, + 64, 30, 1, 3, 5, 25, 7, 27, 37, 25, + 61, 55, 59, 75, 67, 89, 125, 63, 47, 45, + 49, 73, 59, 51, 55, 57, 31, 25, 33, 27, + 39, 77, 51, 45, 91, 63, 87, 87, 123, 113, + 105, 103, 27, 21, 105, 41, 43, 55, 89, 67, + 69, 75, 53, 63, 43, 37, 85, 97, 123, 45, + 69, 81, 13, 20, 30, 34, 70, 46, 58, 12, + 64, 54, 74, 62, 80, 70, 48, 84, 5, 23, + 39, 55, 55, 107, 107, 103, 32, 88, 80, 86, + 66, 70, 42, 40, 22, 20, 13, 20, 30, 34, + 70, 46, 58, 12, 64, 54, 74, 62, 80, 70, + 48, 84, 5, 23, 39, 55, 55, 107, 107, 103, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 42 */ + + 52, 8, 35, 52, 8, 35, 20, 40, 52, 24, + 11, 57, 3, 54, 78, 18, 70, 41, 21, 48, + 13, 17, 70, 59, 57, 2, 25, 125, 125, 125, + 40, 8, 9, 21, 48, 13, 27, 24, 70, 3, + 6, 0, 6, 85, 93, 61, 95, 3, 15, 15, + 25, 53, 37, 69, 2, 9, 11, 3, 64, 2, + 44, 0, 0, 0, 8, 65, 67, 20, 23, 11, + 22, 1, 21, 62, 34, 10, 90, 86, 12, 34, + 36, 48, 16, 16, 68, 89, 51, 51, 55, 48, + 15, 34, 68, 3, 23, 2, 30, 3, 19, 17, + 35, 46, 23, 26, 0, 23, 14, 9, 1, 14, + 24, 32, 36, 14, 31, 7, 6, 3, 0, 15, + 60, 5, 2, 62, 54, 74, 60, 52, 67, 18, + 18, 23, 7, 25, 11, 62, 4, 8, 72, 110, + 124, 124, 92, 69, 87, 20, 84, 84, 123, 17, + 26, 69, 48, 11, 6, 70, 66, 108, 96, 90, + 89, 0, 35, 67, 67, 11, 28, 42, 22, 12, + 22, 22, 6, 32, 18, 27, 13, 12, 2, 0, + 23, 3, 17, 29, 5, 5, 2, 15, 17, 87, + 21, 25, 11, 43, 41, 19, 4, 13, 19, 13, + 23, 57, 29, 5, 85, 119, 65, 81, 69, 125, + 45, 33, 101, 3, 23, 23, 39, 45, 69, 59, + 45, 83, 59, 57, 65, 85, 1, 22, 78, 50, + 20, 8, 30, 18, 8, 0, 26, 38, 86, 56, + 42, 26, 68, 34, 40, 10, 68, 16, 72, 46, + 36, 24, 50, 22, 8, 6, 6, 9, 104, 60, + 22, 6, 40, 8, 13, 1, 34, 82, 72, 42, + 20, 64, 28, 24, 8, 6, 124, 11, 4, 18, + 14, 1, 14, 22, 30, 26, 38, 50, 74, 7, + 2, 17, 15, 124, 29, 75, 17, 8, 8, 15, + 15, 13, 21, 2, 27, 65, 11, 9, 46, 13, + 27, 11, 9, 1, 19, 17, 13, 53, 39, 19, + 23, 13, 50, 75, 27, 50, 39, 11, 11, 13, + 10, 7, 17, 6, 20, 15, 15, 87, 64, 60, + 62, 28, 7, 7, 11, 31, 13, 33, 43, 31, + 67, 59, 63, 83, 75, 97, 125, 69, 51, 49, + 53, 79, 63, 53, 57, 59, 31, 29, 39, 33, + 43, 81, 55, 49, 95, 67, 91, 91, 125, 117, + 107, 107, 29, 23, 109, 45, 45, 59, 93, 69, + 71, 77, 55, 65, 43, 35, 89, 99, 125, 49, + 71, 83, 13, 20, 30, 36, 72, 48, 58, 14, + 66, 56, 76, 62, 82, 70, 48, 84, 9, 27, + 43, 59, 59, 113, 111, 105, 32, 90, 80, 86, + 68, 72, 42, 40, 22, 22, 13, 20, 30, 36, + 72, 48, 58, 14, 66, 56, 76, 62, 82, 70, + 48, 84, 9, 27, 43, 59, 59, 113, 111, 105, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 43 */ + + 50, 8, 37, 50, 8, 37, 24, 42, 54, 24, + 11, 59, 7, 52, 78, 20, 74, 43, 21, 52, + 13, 19, 72, 61, 59, 0, 27, 125, 125, 125, + 44, 12, 9, 21, 52, 13, 25, 26, 72, 3, + 8, 2, 8, 89, 95, 61, 97, 3, 15, 15, + 25, 55, 35, 69, 0, 9, 11, 1, 64, 2, + 44, 0, 0, 0, 10, 65, 67, 20, 23, 13, + 22, 0, 19, 66, 36, 12, 94, 90, 14, 36, + 40, 50, 18, 16, 72, 91, 51, 51, 55, 50, + 15, 36, 72, 3, 21, 4, 34, 3, 19, 17, + 35, 48, 25, 28, 0, 23, 16, 9, 0, 14, + 26, 34, 38, 14, 31, 7, 6, 3, 0, 15, + 60, 5, 2, 62, 54, 76, 62, 54, 67, 20, + 20, 25, 7, 25, 11, 64, 4, 10, 74, 112, + 124, 124, 94, 71, 89, 22, 84, 84, 123, 19, + 26, 69, 48, 13, 8, 72, 66, 108, 98, 92, + 91, 2, 37, 73, 73, 7, 28, 42, 22, 10, + 22, 20, 6, 30, 18, 27, 13, 12, 0, 0, + 25, 3, 19, 31, 5, 5, 2, 15, 19, 91, + 21, 27, 11, 45, 41, 25, 0, 17, 21, 17, + 27, 63, 33, 9, 89, 123, 67, 85, 73, 125, + 47, 37, 105, 7, 25, 25, 41, 49, 73, 63, + 49, 85, 59, 59, 67, 87, 2, 26, 82, 52, + 20, 8, 32, 18, 8, 2, 28, 40, 88, 58, + 44, 26, 70, 36, 42, 12, 72, 16, 74, 48, + 38, 24, 52, 22, 10, 8, 8, 9, 108, 62, + 22, 6, 40, 10, 13, 1, 34, 84, 72, 44, + 20, 66, 28, 24, 10, 6, 124, 7, 8, 22, + 18, 2, 16, 26, 32, 30, 42, 54, 78, 7, + 4, 17, 15, 124, 29, 77, 17, 8, 10, 15, + 15, 13, 21, 2, 27, 67, 11, 9, 46, 13, + 27, 11, 9, 1, 19, 17, 13, 55, 41, 19, + 23, 13, 52, 77, 27, 52, 41, 11, 11, 13, + 12, 7, 17, 8, 22, 15, 15, 91, 62, 58, + 60, 24, 11, 13, 15, 37, 17, 39, 49, 37, + 73, 65, 67, 89, 81, 105, 125, 73, 57, 53, + 57, 83, 67, 55, 59, 61, 31, 33, 43, 37, + 47, 85, 57, 51, 99, 71, 97, 95, 125, 121, + 111, 109, 31, 27, 113, 47, 49, 61, 97, 73, + 75, 79, 57, 67, 45, 33, 91, 103, 125, 51, + 75, 85, 11, 22, 32, 36, 74, 50, 60, 14, + 68, 58, 78, 64, 84, 72, 50, 82, 13, 31, + 47, 63, 63, 117, 115, 107, 34, 90, 82, 88, + 68, 74, 44, 42, 24, 22, 11, 22, 32, 36, + 74, 50, 60, 14, 68, 58, 78, 64, 84, 72, + 50, 82, 13, 31, 47, 63, 63, 117, 115, 107, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 44 */ + + 46, 8, 37, 46, 8, 37, 26, 46, 54, 24, + 13, 63, 9, 50, 80, 20, 78, 47, 21, 54, + 13, 19, 74, 63, 63, 3, 31, 125, 125, 125, + 48, 14, 9, 21, 54, 13, 25, 28, 72, 5, + 10, 4, 10, 91, 99, 63, 97, 3, 15, 13, + 25, 55, 35, 69, 0, 9, 11, 1, 66, 2, + 44, 0, 0, 0, 10, 67, 67, 22, 25, 13, + 20, 0, 17, 70, 38, 14, 96, 92, 14, 38, + 44, 52, 20, 18, 76, 93, 51, 51, 55, 50, + 17, 38, 76, 3, 19, 6, 38, 3, 19, 17, + 35, 48, 25, 28, 2, 23, 16, 9, 0, 14, + 26, 36, 38, 14, 31, 9, 6, 3, 0, 15, + 60, 5, 2, 64, 54, 76, 62, 54, 69, 20, + 22, 25, 7, 25, 11, 66, 4, 10, 76, 114, + 124, 124, 98, 71, 91, 22, 84, 84, 125, 19, + 28, 71, 50, 13, 8, 74, 66, 108, 98, 94, + 93, 6, 39, 79, 79, 5, 26, 42, 20, 8, + 20, 20, 4, 30, 16, 29, 15, 10, 0, 0, + 25, 5, 21, 33, 7, 7, 0, 17, 19, 93, + 23, 29, 11, 49, 43, 29, 3, 21, 25, 21, + 31, 71, 39, 11, 93, 125, 71, 89, 77, 125, + 51, 39, 109, 9, 27, 27, 43, 51, 77, 67, + 53, 89, 59, 61, 69, 91, 4, 28, 84, 52, + 20, 8, 34, 20, 10, 4, 32, 42, 90, 58, + 46, 28, 72, 38, 46, 14, 76, 18, 74, 48, + 38, 26, 54, 24, 10, 8, 10, 9, 110, 64, + 22, 6, 42, 10, 13, 0, 36, 84, 74, 44, + 20, 68, 30, 26, 10, 8, 124, 5, 10, 24, + 22, 4, 20, 28, 36, 32, 44, 58, 82, 5, + 6, 15, 13, 124, 29, 79, 17, 10, 10, 15, + 15, 13, 23, 4, 27, 69, 11, 9, 48, 13, + 27, 11, 9, 1, 21, 17, 13, 57, 43, 19, + 23, 13, 54, 79, 27, 52, 43, 11, 11, 13, + 12, 7, 19, 8, 24, 15, 15, 93, 60, 56, + 58, 20, 15, 17, 21, 43, 23, 45, 57, 43, + 81, 71, 71, 97, 89, 113, 125, 79, 61, 57, + 61, 89, 71, 59, 61, 63, 31, 39, 47, 41, + 51, 89, 61, 55, 103, 75, 101, 99, 125, 125, + 115, 111, 33, 29, 119, 51, 53, 65, 101, 75, + 77, 81, 59, 69, 45, 31, 93, 105, 125, 53, + 77, 87, 11, 22, 32, 38, 76, 50, 62, 14, + 70, 60, 80, 64, 86, 72, 52, 82, 17, 35, + 51, 67, 67, 121, 119, 111, 34, 92, 82, 90, + 70, 74, 44, 42, 24, 24, 11, 22, 32, 38, + 76, 50, 62, 14, 70, 60, 80, 64, 86, 72, + 52, 82, 17, 35, 51, 67, 67, 121, 119, 111, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 45 */ + + 44, 8, 37, 44, 8, 37, 30, 48, 54, 24, + 13, 67, 13, 48, 82, 20, 80, 51, 19, 58, + 15, 19, 76, 65, 65, 5, 35, 125, 125, 125, + 52, 16, 9, 19, 58, 15, 25, 30, 72, 5, + 10, 6, 14, 93, 101, 63, 97, 3, 13, 11, + 27, 55, 35, 69, 0, 7, 11, 0, 66, 2, + 44, 0, 0, 0, 12, 67, 67, 22, 27, 13, + 20, 2, 13, 76, 40, 16, 100, 96, 16, 40, + 48, 54, 22, 20, 82, 93, 53, 51, 57, 50, + 17, 40, 82, 1, 19, 6, 42, 3, 19, 17, + 33, 48, 25, 30, 4, 21, 18, 7, 0, 16, + 28, 38, 40, 14, 33, 9, 8, 3, 0, 15, + 62, 5, 2, 64, 56, 76, 62, 54, 69, 22, + 22, 27, 7, 23, 11, 68, 6, 12, 80, 118, + 124, 124, 102, 73, 93, 24, 86, 84, 125, 19, + 28, 71, 52, 13, 8, 78, 66, 108, 98, 98, + 95, 8, 41, 83, 85, 3, 26, 42, 20, 8, + 20, 20, 4, 30, 16, 31, 17, 10, 0, 0, + 25, 5, 21, 35, 7, 7, 0, 19, 21, 95, + 25, 29, 11, 51, 43, 33, 7, 23, 27, 23, + 35, 77, 43, 13, 97, 125, 73, 93, 81, 125, + 53, 41, 111, 11, 29, 29, 47, 55, 81, 71, + 55, 91, 59, 63, 71, 95, 8, 32, 86, 54, + 20, 8, 36, 22, 12, 4, 36, 46, 92, 60, + 46, 30, 76, 40, 48, 16, 82, 20, 76, 50, + 40, 26, 56, 26, 12, 10, 12, 7, 112, 66, + 22, 6, 44, 12, 11, 2, 38, 84, 76, 44, + 20, 70, 32, 28, 12, 10, 124, 3, 14, 28, + 24, 6, 22, 32, 40, 36, 48, 62, 88, 3, + 8, 13, 11, 124, 27, 81, 15, 12, 12, 15, + 15, 13, 23, 4, 29, 71, 11, 9, 50, 13, + 27, 11, 9, 0, 21, 19, 13, 59, 45, 21, + 23, 13, 56, 79, 29, 54, 43, 13, 11, 13, + 12, 9, 19, 8, 24, 15, 15, 97, 58, 54, + 56, 18, 21, 23, 27, 49, 27, 51, 63, 49, + 87, 75, 75, 105, 95, 121, 125, 85, 65, 59, + 65, 93, 75, 61, 63, 65, 31, 43, 53, 47, + 55, 93, 65, 57, 107, 79, 105, 103, 125, 125, + 117, 115, 35, 31, 123, 53, 55, 67, 105, 77, + 79, 83, 61, 71, 45, 29, 97, 107, 125, 57, + 79, 89, 11, 24, 34, 40, 78, 52, 62, 16, + 72, 62, 82, 66, 88, 74, 52, 80, 21, 39, + 55, 71, 71, 125, 123, 113, 36, 92, 84, 90, + 72, 76, 46, 44, 24, 26, 11, 24, 34, 40, + 78, 52, 62, 16, 72, 62, 82, 66, 88, 74, + 52, 80, 21, 39, 55, 71, 71, 125, 123, 113, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 46 */ + + 42, 8, 37, 42, 8, 37, 34, 52, 56, 24, + 15, 71, 15, 46, 82, 20, 84, 53, 19, 60, + 15, 21, 78, 67, 67, 7, 39, 125, 125, 125, + 56, 20, 9, 19, 60, 15, 23, 32, 74, 5, + 12, 8, 16, 95, 103, 63, 99, 3, 13, 9, + 27, 57, 35, 69, 0, 7, 11, 2, 68, 2, + 44, 0, 0, 0, 12, 67, 67, 24, 29, 13, + 18, 2, 11, 80, 42, 18, 102, 98, 18, 42, + 52, 56, 24, 22, 86, 95, 53, 51, 57, 52, + 17, 42, 86, 1, 17, 8, 46, 3, 19, 17, + 33, 50, 25, 30, 6, 21, 20, 7, 0, 16, + 28, 40, 40, 14, 33, 9, 8, 3, 0, 15, + 62, 5, 2, 66, 56, 78, 62, 56, 71, 22, + 24, 27, 7, 23, 11, 70, 6, 12, 82, 120, + 124, 124, 106, 73, 95, 24, 86, 84, 125, 19, + 30, 73, 52, 13, 8, 80, 66, 108, 98, 100, + 97, 12, 43, 89, 91, 0, 26, 42, 18, 6, + 18, 18, 2, 30, 14, 33, 19, 10, 0, 0, + 27, 7, 23, 37, 9, 7, 0, 21, 21, 97, + 25, 31, 11, 53, 45, 37, 11, 27, 31, 27, + 39, 83, 49, 15, 101, 125, 77, 97, 85, 125, + 55, 43, 115, 13, 31, 31, 49, 57, 85, 75, + 59, 95, 59, 65, 73, 97, 10, 34, 90, 54, + 20, 8, 38, 24, 14, 6, 40, 48, 94, 62, + 48, 30, 78, 42, 52, 18, 86, 20, 76, 50, + 40, 28, 58, 26, 12, 12, 14, 7, 116, 68, + 22, 6, 46, 12, 11, 4, 40, 86, 78, 46, + 20, 72, 32, 30, 12, 10, 124, 0, 16, 30, + 28, 8, 26, 34, 42, 38, 52, 66, 92, 3, + 10, 13, 9, 124, 27, 83, 15, 14, 14, 15, + 15, 13, 23, 6, 29, 73, 11, 9, 50, 13, + 27, 11, 9, 0, 23, 19, 13, 61, 47, 21, + 23, 13, 58, 81, 29, 56, 45, 13, 11, 13, + 14, 9, 21, 10, 26, 15, 15, 99, 56, 52, + 54, 14, 25, 27, 31, 55, 33, 57, 69, 55, + 93, 81, 79, 113, 103, 125, 125, 91, 69, 63, + 69, 99, 79, 63, 65, 67, 31, 47, 57, 51, + 59, 97, 69, 61, 111, 83, 109, 107, 125, 125, + 121, 117, 37, 33, 125, 57, 59, 71, 109, 81, + 83, 85, 63, 73, 45, 27, 99, 111, 125, 59, + 81, 91, 11, 24, 34, 40, 80, 54, 64, 16, + 74, 64, 84, 66, 90, 74, 54, 80, 25, 43, + 59, 75, 75, 125, 125, 115, 36, 94, 84, 92, + 72, 78, 46, 44, 26, 26, 11, 24, 34, 40, + 80, 54, 64, 16, 74, 64, 84, 66, 90, 74, + 54, 80, 25, 43, 59, 75, 75, 125, 125, 115, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 47 */ + + 40, 8, 37, 40, 8, 37, 38, 54, 56, 24, + 15, 75, 19, 44, 84, 20, 88, 57, 19, 64, + 15, 21, 80, 69, 69, 9, 43, 125, 125, 125, + 60, 22, 9, 19, 64, 15, 23, 34, 74, 5, + 14, 10, 18, 97, 105, 63, 99, 3, 13, 7, + 27, 57, 35, 69, 0, 7, 11, 4, 68, 2, + 44, 0, 0, 0, 14, 67, 67, 24, 31, 13, + 18, 4, 9, 84, 44, 20, 106, 102, 20, 44, + 56, 58, 26, 24, 90, 97, 53, 51, 57, 52, + 17, 44, 90, 1, 15, 10, 50, 3, 19, 17, + 33, 50, 25, 32, 8, 21, 22, 7, 0, 16, + 30, 42, 42, 14, 33, 9, 8, 3, 0, 15, + 62, 5, 2, 66, 56, 78, 62, 56, 71, 24, + 26, 29, 7, 23, 11, 72, 6, 14, 84, 122, + 124, 124, 110, 75, 97, 26, 86, 84, 125, 19, + 30, 73, 54, 13, 8, 82, 66, 108, 98, 102, + 99, 14, 45, 95, 97, 2, 26, 42, 18, 4, + 18, 18, 2, 30, 14, 35, 21, 10, 0, 0, + 27, 7, 25, 39, 9, 7, 0, 23, 23, 99, + 27, 33, 11, 55, 45, 41, 15, 31, 33, 31, + 43, 89, 53, 17, 105, 125, 79, 101, 89, 125, + 57, 45, 119, 15, 33, 33, 51, 61, 89, 79, + 63, 97, 59, 67, 75, 101, 14, 38, 92, 56, + 20, 8, 40, 26, 16, 8, 44, 50, 96, 64, + 50, 32, 80, 44, 54, 20, 90, 22, 78, 52, + 42, 28, 60, 28, 14, 14, 16, 7, 118, 70, + 22, 6, 48, 14, 11, 6, 42, 86, 80, 46, + 20, 74, 34, 32, 14, 12, 124, 2, 20, 34, + 32, 10, 28, 38, 46, 42, 56, 70, 96, 1, + 12, 11, 7, 124, 27, 85, 15, 16, 16, 15, + 15, 13, 23, 6, 29, 75, 11, 9, 52, 13, + 27, 11, 9, 0, 23, 19, 13, 63, 49, 21, + 23, 13, 60, 83, 29, 58, 47, 13, 11, 13, + 14, 9, 21, 10, 28, 15, 15, 103, 54, 50, + 52, 10, 29, 33, 37, 61, 37, 63, 75, 61, + 99, 87, 83, 121, 109, 125, 125, 97, 73, 67, + 73, 103, 83, 65, 67, 69, 31, 51, 61, 55, + 63, 101, 73, 63, 115, 87, 113, 111, 125, 125, + 125, 119, 39, 35, 125, 59, 63, 73, 113, 83, + 85, 87, 65, 75, 45, 25, 101, 113, 125, 61, + 83, 93, 11, 26, 36, 42, 82, 56, 66, 16, + 76, 66, 86, 68, 92, 76, 56, 78, 29, 47, + 63, 79, 79, 125, 125, 117, 38, 94, 86, 94, + 74, 80, 48, 46, 26, 28, 11, 26, 36, 42, + 82, 56, 66, 16, 76, 66, 86, 68, 92, 76, + 56, 78, 29, 47, 63, 79, 79, 125, 125, 117, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 48 */ + + 36, 6, 39, 36, 6, 39, 40, 56, 56, 24, + 17, 79, 23, 42, 84, 20, 90, 61, 19, 66, + 17, 23, 82, 71, 73, 13, 47, 125, 125, 125, + 64, 24, 9, 19, 66, 17, 23, 34, 74, 7, + 14, 10, 20, 101, 109, 65, 101, 5, 13, 7, + 29, 59, 35, 69, 1, 7, 11, 4, 68, 0, + 44, 0, 0, 0, 14, 69, 67, 24, 33, 15, + 16, 4, 7, 88, 46, 20, 108, 104, 20, 46, + 58, 58, 28, 24, 94, 99, 55, 53, 59, 52, + 19, 44, 94, 1, 15, 10, 52, 3, 19, 17, + 33, 50, 27, 32, 8, 21, 22, 7, 0, 16, + 30, 44, 42, 14, 35, 11, 8, 3, 1, 15, + 62, 5, 2, 66, 56, 78, 62, 56, 73, 24, + 26, 31, 7, 23, 13, 72, 6, 14, 86, 124, + 124, 124, 112, 77, 101, 26, 86, 84, 125, 21, + 30, 75, 54, 15, 8, 84, 66, 106, 98, 104, + 101, 16, 49, 101, 105, 4, 24, 42, 16, 2, + 16, 16, 0, 28, 12, 37, 23, 8, 1, 1, + 29, 9, 27, 41, 11, 9, 1, 25, 25, 103, + 29, 35, 11, 59, 47, 47, 21, 35, 37, 35, + 49, 97, 59, 21, 109, 125, 83, 107, 93, 125, + 61, 49, 123, 19, 35, 35, 55, 65, 93, 83, + 67, 101, 59, 69, 77, 105, 16, 40, 94, 56, + 20, 8, 40, 26, 16, 8, 46, 52, 96, 64, + 50, 32, 82, 46, 56, 20, 94, 22, 78, 52, + 42, 28, 60, 28, 14, 14, 18, 7, 120, 72, + 22, 4, 48, 14, 11, 6, 42, 86, 80, 46, + 20, 76, 34, 32, 14, 12, 124, 4, 22, 36, + 34, 12, 30, 40, 48, 44, 58, 72, 100, 1, + 12, 11, 7, 124, 27, 89, 15, 16, 16, 15, + 15, 13, 25, 6, 31, 77, 13, 9, 52, 15, + 27, 13, 9, 0, 25, 21, 15, 65, 51, 23, + 25, 13, 62, 85, 31, 58, 49, 15, 11, 15, + 14, 11, 23, 10, 28, 15, 15, 107, 50, 48, + 48, 6, 35, 39, 43, 67, 43, 69, 83, 67, + 107, 93, 87, 125, 117, 125, 125, 103, 79, 71, + 79, 109, 87, 69, 71, 71, 31, 57, 67, 61, + 67, 107, 77, 67, 121, 91, 119, 115, 125, 125, + 125, 123, 43, 39, 125, 63, 67, 77, 117, 87, + 89, 89, 67, 77, 47, 23, 105, 117, 125, 65, + 87, 97, 11, 26, 36, 42, 84, 56, 66, 16, + 78, 66, 88, 68, 92, 76, 56, 76, 33, 51, + 69, 85, 83, 125, 125, 121, 38, 94, 86, 94, + 74, 80, 48, 46, 26, 28, 11, 26, 36, 42, + 84, 56, 66, 16, 78, 66, 88, 68, 92, 76, + 56, 76, 33, 51, 69, 85, 83, 125, 125, 121, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 49 */ + + 34, 6, 39, 34, 6, 39, 44, 60, 58, 26, + 17, 81, 25, 42, 86, 22, 94, 63, 17, 70, + 17, 23, 86, 71, 75, 15, 49, 125, 125, 125, + 68, 28, 7, 17, 70, 17, 21, 36, 76, 7, + 16, 12, 24, 103, 111, 65, 101, 5, 11, 5, + 29, 59, 33, 67, 1, 5, 9, 6, 70, 0, + 44, 0, 0, 0, 16, 69, 67, 26, 33, 15, + 16, 6, 3, 94, 50, 22, 112, 108, 22, 50, + 62, 60, 32, 26, 100, 99, 55, 53, 59, 54, + 19, 46, 100, 0, 13, 12, 56, 3, 17, 17, + 31, 52, 27, 34, 10, 19, 24, 5, 2, 18, + 32, 46, 44, 16, 35, 11, 10, 1, 1, 13, + 64, 3, 2, 68, 58, 80, 64, 58, 73, 26, + 28, 31, 7, 21, 13, 74, 8, 16, 90, 124, + 124, 124, 116, 77, 103, 28, 88, 86, 125, 21, + 32, 75, 56, 15, 10, 88, 66, 106, 100, 108, + 103, 20, 51, 105, 111, 8, 24, 42, 16, 2, + 16, 16, 0, 28, 12, 37, 23, 8, 1, 1, + 29, 9, 27, 41, 11, 9, 1, 25, 25, 105, + 29, 35, 11, 61, 47, 51, 25, 37, 39, 37, + 53, 103, 63, 23, 111, 125, 85, 111, 95, 125, + 63, 51, 125, 21, 35, 35, 57, 67, 95, 85, + 69, 103, 59, 69, 77, 107, 20, 44, 98, 58, + 20, 8, 42, 28, 18, 10, 50, 56, 98, 66, + 52, 34, 86, 48, 60, 22, 100, 24, 80, 54, + 44, 30, 62, 30, 16, 16, 22, 5, 124, 74, + 24, 4, 50, 16, 9, 8, 44, 88, 82, 48, + 22, 78, 36, 34, 16, 14, 124, 8, 26, 40, + 38, 16, 34, 44, 52, 48, 62, 76, 106, 0, + 14, 9, 5, 124, 25, 91, 13, 18, 18, 13, + 15, 11, 25, 8, 31, 77, 13, 9, 54, 15, + 27, 13, 9, 2, 25, 21, 15, 65, 51, 23, + 25, 11, 66, 85, 31, 60, 49, 15, 11, 15, + 16, 11, 23, 12, 30, 13, 13, 109, 48, 46, + 46, 4, 39, 43, 47, 73, 47, 73, 89, 71, + 113, 97, 91, 125, 123, 125, 125, 107, 83, 73, + 83, 113, 89, 71, 73, 71, 31, 61, 71, 65, + 71, 111, 79, 69, 125, 93, 123, 117, 125, 125, + 125, 125, 45, 41, 125, 65, 69, 79, 119, 89, + 91, 91, 67, 77, 47, 21, 107, 119, 125, 67, + 89, 99, 9, 28, 38, 44, 88, 58, 68, 18, + 82, 68, 90, 70, 94, 78, 58, 76, 35, 53, + 73, 89, 85, 125, 125, 123, 40, 96, 88, 96, + 76, 82, 50, 48, 28, 30, 9, 28, 38, 44, + 88, 58, 68, 18, 82, 68, 90, 70, 94, 78, + 58, 76, 35, 53, 73, 89, 85, 125, 125, 123, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 50 */ + + 32, 6, 39, 32, 6, 39, 48, 62, 58, 26, + 17, 85, 29, 40, 88, 22, 98, 67, 17, 72, + 17, 23, 88, 73, 77, 17, 53, 125, 125, 125, + 72, 30, 7, 17, 72, 17, 21, 38, 76, 7, + 18, 14, 26, 105, 113, 65, 101, 5, 11, 3, + 29, 59, 33, 67, 1, 5, 9, 8, 70, 0, + 44, 0, 0, 0, 18, 69, 67, 26, 35, 15, + 16, 8, 1, 98, 52, 24, 116, 112, 24, 52, + 66, 62, 34, 28, 104, 101, 55, 53, 59, 54, + 19, 48, 104, 0, 11, 14, 60, 3, 17, 17, + 31, 52, 27, 34, 12, 19, 26, 5, 2, 18, + 32, 48, 46, 16, 35, 11, 10, 1, 1, 13, + 64, 3, 2, 68, 58, 80, 64, 58, 75, 28, + 30, 33, 7, 21, 13, 76, 8, 16, 92, 124, + 124, 124, 120, 79, 105, 28, 88, 86, 125, 21, + 32, 77, 58, 15, 10, 90, 66, 106, 100, 110, + 105, 22, 53, 111, 117, 10, 24, 42, 16, 0, + 14, 16, 0, 28, 10, 39, 25, 8, 1, 1, + 29, 11, 29, 43, 11, 9, 1, 27, 27, 107, + 31, 37, 11, 63, 49, 55, 29, 41, 43, 41, + 57, 109, 67, 25, 115, 125, 89, 115, 99, 125, + 65, 53, 125, 23, 37, 37, 59, 71, 99, 89, + 73, 107, 59, 71, 79, 111, 22, 46, 100, 60, + 20, 8, 44, 30, 20, 12, 54, 58, 100, 68, + 54, 36, 88, 50, 62, 24, 104, 26, 80, 54, + 44, 30, 64, 32, 18, 18, 24, 5, 124, 76, + 24, 4, 52, 18, 9, 10, 46, 88, 84, 48, + 22, 80, 38, 36, 18, 16, 124, 10, 30, 44, + 42, 18, 36, 48, 56, 50, 66, 80, 110, 2, + 16, 7, 3, 124, 25, 93, 13, 20, 20, 13, + 15, 11, 25, 8, 31, 79, 13, 9, 56, 15, + 27, 13, 9, 2, 25, 21, 15, 67, 53, 23, + 25, 11, 68, 87, 31, 62, 51, 15, 11, 15, + 16, 11, 23, 12, 32, 13, 13, 113, 46, 44, + 44, 0, 43, 49, 53, 79, 51, 79, 95, 77, + 119, 103, 95, 125, 125, 125, 125, 113, 87, 77, + 87, 117, 93, 73, 75, 73, 31, 65, 75, 69, + 75, 115, 83, 73, 125, 97, 125, 121, 125, 125, + 125, 125, 47, 43, 125, 69, 73, 83, 123, 91, + 93, 93, 69, 79, 47, 19, 109, 121, 125, 69, + 91, 101, 9, 28, 40, 46, 90, 60, 70, 18, + 84, 70, 92, 70, 96, 80, 60, 74, 39, 57, + 77, 93, 89, 125, 125, 125, 42, 96, 88, 98, + 78, 84, 50, 50, 28, 32, 9, 28, 40, 46, + 90, 60, 70, 18, 84, 70, 92, 70, 96, 80, + 60, 74, 39, 57, 77, 93, 89, 125, 125, 125, + }, + + { + /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 51 */ + + 30, 6, 39, 30, 6, 39, 52, 66, 60, 26, + 19, 89, 31, 38, 88, 22, 102, 69, 17, 76, + 17, 25, 90, 75, 79, 19, 57, 125, 125, 125, + 76, 34, 7, 17, 76, 17, 19, 40, 78, 7, + 20, 16, 28, 107, 115, 65, 103, 5, 11, 1, + 29, 61, 33, 67, 1, 5, 9, 10, 72, 0, + 44, 0, 0, 0, 18, 69, 67, 28, 37, 15, + 14, 8, 0, 102, 54, 26, 118, 114, 26, 54, + 70, 64, 36, 30, 108, 103, 55, 53, 59, 56, + 19, 50, 108, 0, 9, 16, 64, 3, 17, 17, + 31, 54, 27, 36, 14, 19, 28, 5, 2, 18, + 34, 50, 46, 16, 35, 11, 10, 1, 1, 13, + 64, 3, 2, 70, 58, 82, 64, 60, 75, 28, + 32, 33, 7, 21, 13, 78, 8, 18, 94, 124, + 124, 124, 124, 79, 107, 30, 88, 86, 125, 21, + 34, 77, 58, 15, 10, 92, 66, 106, 100, 112, + 107, 26, 55, 117, 123, 14, 24, 42, 14, 1, + 14, 14, 1, 28, 10, 41, 27, 8, 1, 1, + 31, 11, 31, 45, 13, 9, 1, 29, 27, 109, + 31, 39, 11, 65, 49, 59, 33, 45, 45, 45, + 61, 115, 73, 27, 119, 125, 91, 119, 103, 125, + 67, 55, 125, 25, 39, 39, 61, 73, 103, 93, + 77, 109, 59, 73, 81, 113, 26, 50, 104, 60, + 20, 8, 46, 32, 22, 14, 58, 60, 102, 70, + 56, 36, 90, 52, 66, 26, 108, 26, 82, 56, + 46, 32, 66, 32, 18, 20, 26, 5, 124, 78, + 24, 4, 54, 18, 9, 12, 48, 90, 86, 50, + 22, 82, 38, 38, 18, 16, 124, 14, 32, 46, + 46, 20, 40, 50, 58, 54, 70, 84, 114, 2, + 18, 7, 1, 124, 25, 95, 13, 22, 22, 13, + 15, 11, 25, 10, 31, 81, 13, 9, 56, 15, + 27, 13, 9, 2, 27, 21, 15, 69, 55, 23, + 25, 11, 70, 89, 31, 64, 53, 15, 11, 15, + 18, 11, 25, 14, 34, 13, 13, 115, 44, 42, + 42, 3, 47, 53, 57, 85, 57, 85, 101, 83, + 125, 109, 99, 125, 125, 125, 125, 119, 91, 81, + 91, 123, 97, 75, 77, 75, 31, 69, 79, 73, + 79, 119, 87, 75, 125, 101, 125, 125, 125, 125, + 125, 125, 49, 45, 125, 71, 77, 85, 125, 95, + 97, 95, 71, 81, 47, 17, 111, 125, 125, 71, + 93, 103, 9, 30, 40, 46, 92, 62, 72, 18, + 86, 72, 94, 72, 98, 80, 62, 74, 43, 61, + 81, 97, 93, 125, 125, 125, 42, 98, 90, 100, + 78, 86, 52, 50, 30, 32, 9, 30, 40, 46, + 92, 62, 72, 18, 86, 72, 94, 72, 98, 80, + 62, 74, 43, 61, 81, 97, 93, 125, 125, 125, + }, + + }, + + { + + { + /* Context Tables for I, SI Slices :: qp = 0 */ + + 124, 18, 21, 124, 18, 21, 125, 81, 20, 18, + 24, 60, 122, 124, 108, 28, 109, 12, 29, 3, + 2, 28, 19, 26, 1, 40, 124, 7, 53, 81, + 125, 81, 7, 29, 3, 2, 45, 63, 4, 36, + 11, 35, 65, 16, 7, 45, 49, 10, 25, 61, + 18, 11, 35, 49, 7, 21, 21, 33, 17, 10, + 44, 0, 0, 0, 39, 45, 67, 17, 44, 2, + 104, 16, 11, 125, 77, 37, 21, 87, 125, 125, + 125, 63, 125, 101, 125, 119, 103, 117, 103, 0, + 9, 41, 81, 13, 59, 53, 125, 21, 67, 55, + 125, 14, 37, 25, 123, 59, 47, 27, 15, 0, + 9, 41, 2, 3, 4, 14, 5, 1, 4, 29, + 26, 22, 56, 38, 50, 36, 34, 38, 92, 24, + 26, 88, 60, 2, 89, 73, 75, 55, 61, 49, + 41, 45, 39, 47, 61, 13, 17, 21, 8, 77, + 73, 63, 23, 17, 23, 15, 34, 11, 2, 3, + 52, 17, 12, 18, 2, 17, 124, 108, 76, 90, + 108, 88, 52, 90, 68, 60, 66, 36, 10, 2, + 4, 50, 36, 48, 42, 38, 36, 44, 28, 58, + 42, 16, 24, 34, 51, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 124, 92, + 124, 120, 82, 124, 124, 124, 124, 120, 116, 124, + 94, 82, 30, 52, 6, 9, 67, 15, 42, 26, + 18, 2, 10, 0, 17, 21, 55, 7, 72, 48, + 38, 34, 1, 9, 29, 27, 45, 57, 16, 6, + 2, 3, 19, 25, 33, 49, 93, 67, 41, 31, + 19, 21, 45, 65, 67, 107, 29, 60, 30, 20, + 2, 15, 31, 45, 53, 67, 124, 59, 41, 31, + 5, 15, 2, 6, 8, 23, 2, 10, 5, 31, + 15, 9, 38, 2, 54, 46, 72, 68, 38, 54, + 62, 42, 30, 2, 34, 1, 81, 67, 65, 49, + 43, 43, 43, 49, 5, 27, 25, 25, 10, 25, + 39, 71, 63, 63, 25, 21, 13, 23, 9, 3, + 19, 2, 2, 9, 23, 16, 1, 13, 114, 88, + 94, 98, 100, 104, 96, 94, 80, 80, 86, 74, + 38, 46, 32, 92, 84, 82, 72, 68, 56, 26, + 12, 0, 27, 37, 61, 11, 91, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 122, 100, + 56, 10, 124, 124, 66, 124, 124, 124, 120, 124, + 116, 104, 116, 102, 104, 68, 74, 48, 5, 84, + 64, 26, 113, 97, 101, 43, 57, 51, 15, 35, + 33, 9, 13, 14, 9, 26, 21, 124, 124, 124, + 124, 120, 114, 58, 18, 37, 23, 80, 58, 40, + 18, 16, 4, 1, 9, 57, 85, 67, 53, 53, + 49, 19, 31, 45, 19, 13, 11, 5, 1, 10, + 8, 124, 124, 124, 124, 120, 108, 86, 54, 7, + }, + + { + /* Context Tables for I, SI Slices :: qp = 1 */ + + 124, 18, 21, 124, 18, 21, 123, 77, 22, 20, + 24, 58, 120, 124, 108, 28, 103, 12, 27, 1, + 2, 28, 17, 24, 3, 40, 124, 9, 55, 81, + 121, 77, 7, 27, 1, 2, 43, 59, 6, 36, + 9, 33, 63, 16, 7, 43, 49, 10, 23, 59, + 18, 11, 33, 49, 5, 19, 19, 31, 15, 10, + 44, 0, 0, 0, 37, 45, 67, 15, 44, 2, + 104, 16, 11, 123, 75, 37, 19, 83, 123, 123, + 123, 59, 123, 97, 123, 115, 101, 115, 101, 2, + 7, 39, 79, 11, 57, 51, 123, 19, 65, 53, + 123, 16, 35, 23, 119, 57, 45, 25, 13, 2, + 7, 39, 4, 1, 4, 14, 3, 1, 4, 27, + 26, 22, 56, 38, 50, 36, 34, 38, 90, 24, + 26, 86, 58, 2, 87, 71, 73, 53, 59, 47, + 39, 43, 37, 45, 57, 13, 17, 19, 6, 75, + 71, 63, 21, 17, 21, 13, 34, 9, 2, 3, + 50, 15, 12, 16, 2, 17, 124, 108, 76, 90, + 108, 88, 52, 90, 68, 58, 66, 36, 10, 2, + 4, 50, 36, 48, 42, 38, 34, 44, 28, 56, + 40, 16, 22, 32, 51, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 120, 88, + 124, 118, 80, 124, 124, 124, 124, 116, 112, 122, + 90, 78, 30, 50, 4, 9, 67, 13, 44, 28, + 20, 4, 10, 0, 15, 19, 53, 5, 74, 50, + 40, 34, 0, 7, 27, 25, 43, 55, 18, 8, + 4, 1, 17, 23, 31, 47, 89, 65, 37, 29, + 17, 19, 43, 63, 65, 103, 27, 62, 32, 22, + 4, 13, 29, 43, 51, 65, 124, 57, 39, 29, + 5, 13, 2, 8, 10, 21, 4, 12, 3, 29, + 15, 9, 38, 4, 54, 46, 70, 68, 38, 52, + 60, 42, 30, 2, 32, 1, 79, 65, 63, 47, + 41, 41, 41, 47, 5, 25, 23, 23, 10, 23, + 37, 69, 61, 63, 25, 19, 13, 21, 9, 3, + 17, 2, 2, 7, 21, 16, 1, 13, 114, 88, + 94, 98, 98, 104, 96, 94, 80, 80, 86, 74, + 38, 44, 30, 90, 82, 80, 70, 66, 54, 26, + 12, 0, 25, 35, 59, 11, 89, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 122, 118, 96, + 54, 10, 124, 124, 64, 124, 124, 124, 116, 124, + 112, 100, 112, 98, 100, 66, 70, 46, 7, 82, + 62, 24, 109, 93, 97, 41, 55, 49, 11, 33, + 31, 9, 11, 18, 5, 30, 19, 124, 124, 124, + 124, 116, 110, 54, 14, 39, 21, 82, 58, 40, + 18, 18, 4, 1, 9, 55, 83, 65, 51, 51, + 45, 17, 29, 43, 17, 11, 9, 3, 0, 12, + 8, 124, 124, 124, 124, 118, 106, 82, 52, 7, + }, + + { + /* Context Tables for I, SI Slices :: qp = 2 */ + + 124, 18, 21, 124, 18, 21, 119, 75, 22, 20, + 24, 56, 118, 122, 108, 28, 99, 12, 25, 0, + 2, 26, 17, 22, 5, 38, 120, 13, 57, 83, + 115, 75, 7, 25, 0, 2, 43, 57, 6, 34, + 9, 33, 61, 16, 7, 43, 49, 10, 23, 57, + 18, 11, 33, 49, 5, 19, 19, 31, 15, 10, + 44, 0, 0, 0, 35, 45, 67, 15, 42, 2, + 104, 16, 11, 121, 73, 37, 19, 81, 119, 119, + 121, 57, 119, 95, 119, 113, 99, 113, 99, 4, + 7, 37, 77, 11, 57, 49, 119, 19, 65, 53, + 121, 16, 35, 23, 117, 57, 43, 25, 13, 2, + 7, 37, 4, 1, 2, 14, 3, 1, 4, 27, + 26, 22, 54, 38, 48, 36, 34, 38, 86, 24, + 26, 82, 56, 0, 85, 69, 71, 51, 57, 45, + 37, 41, 37, 43, 55, 13, 17, 19, 4, 75, + 69, 63, 21, 17, 19, 13, 32, 7, 2, 3, + 48, 13, 10, 14, 2, 19, 120, 106, 74, 88, + 106, 86, 50, 88, 68, 56, 64, 36, 10, 2, + 4, 48, 34, 46, 40, 36, 32, 42, 26, 52, + 38, 14, 20, 30, 51, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 116, 82, + 124, 114, 76, 124, 124, 124, 124, 112, 108, 116, + 86, 74, 28, 46, 2, 11, 67, 13, 44, 28, + 20, 4, 10, 0, 15, 19, 51, 5, 74, 50, + 40, 34, 2, 7, 25, 25, 41, 53, 20, 10, + 4, 1, 15, 23, 31, 45, 87, 63, 35, 27, + 17, 19, 41, 61, 63, 101, 27, 62, 32, 22, + 4, 11, 27, 41, 49, 63, 124, 57, 39, 29, + 5, 13, 2, 8, 10, 21, 4, 12, 1, 29, + 15, 9, 36, 4, 52, 44, 68, 66, 38, 50, + 58, 42, 30, 0, 30, 3, 77, 63, 61, 47, + 41, 41, 39, 45, 5, 25, 23, 23, 8, 23, + 37, 69, 59, 63, 25, 19, 13, 19, 9, 3, + 15, 2, 2, 7, 19, 14, 1, 15, 112, 88, + 94, 96, 96, 102, 94, 92, 78, 78, 84, 72, + 36, 42, 28, 86, 80, 76, 66, 64, 52, 24, + 10, 0, 25, 35, 59, 13, 87, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 118, 114, 92, + 52, 8, 124, 120, 62, 124, 124, 124, 112, 120, + 108, 96, 108, 94, 96, 62, 66, 42, 9, 78, + 58, 20, 107, 91, 95, 39, 53, 47, 7, 31, + 29, 9, 9, 20, 3, 32, 17, 124, 124, 124, + 124, 110, 104, 48, 10, 41, 21, 82, 58, 40, + 18, 18, 4, 1, 9, 53, 81, 63, 49, 49, + 43, 15, 27, 41, 15, 9, 7, 3, 2, 12, + 8, 124, 124, 124, 122, 114, 102, 78, 48, 9, + }, + + { + /* Context Tables for I, SI Slices :: qp = 3 */ + + 124, 18, 21, 124, 18, 21, 115, 71, 24, 20, + 22, 52, 114, 120, 108, 28, 95, 12, 23, 2, + 2, 24, 17, 20, 7, 38, 116, 15, 59, 83, + 109, 73, 7, 23, 2, 2, 41, 55, 8, 34, + 9, 31, 59, 14, 9, 43, 49, 10, 23, 57, + 18, 11, 33, 49, 3, 19, 19, 31, 13, 10, + 44, 0, 0, 0, 35, 45, 67, 13, 40, 2, + 104, 16, 11, 119, 71, 37, 17, 79, 115, 115, + 117, 55, 115, 93, 115, 111, 97, 111, 97, 6, + 7, 35, 75, 11, 55, 49, 115, 19, 63, 51, + 119, 16, 35, 21, 113, 55, 41, 25, 13, 2, + 7, 35, 6, 0, 2, 14, 3, 1, 4, 27, + 26, 20, 54, 38, 46, 36, 34, 38, 82, 24, + 24, 78, 54, 1, 83, 67, 69, 49, 55, 45, + 35, 41, 35, 41, 53, 13, 17, 19, 2, 73, + 67, 63, 21, 17, 17, 13, 30, 5, 2, 3, + 46, 11, 10, 12, 2, 21, 118, 104, 74, 86, + 104, 84, 50, 86, 66, 54, 62, 36, 10, 2, + 2, 46, 32, 44, 38, 34, 30, 40, 26, 48, + 36, 14, 18, 28, 51, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 124, 110, 78, + 124, 110, 74, 124, 122, 124, 118, 108, 102, 112, + 82, 68, 26, 42, 0, 13, 67, 13, 46, 28, + 20, 4, 10, 0, 15, 19, 51, 5, 74, 50, + 40, 34, 4, 5, 25, 23, 41, 51, 22, 10, + 6, 1, 13, 21, 29, 45, 85, 61, 33, 25, + 15, 19, 39, 59, 61, 99, 25, 62, 32, 22, + 4, 9, 27, 39, 47, 61, 124, 55, 37, 27, + 5, 13, 2, 8, 10, 21, 4, 12, 1, 29, + 15, 9, 36, 6, 50, 42, 66, 64, 38, 48, + 56, 42, 30, 0, 28, 3, 75, 61, 59, 45, + 39, 39, 39, 43, 5, 25, 23, 21, 8, 23, + 37, 67, 57, 63, 25, 19, 13, 17, 9, 3, + 13, 2, 2, 7, 17, 12, 1, 17, 110, 86, + 92, 94, 94, 100, 92, 90, 76, 76, 82, 70, + 34, 40, 26, 84, 78, 74, 62, 60, 50, 22, + 10, 1, 25, 35, 59, 13, 85, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 114, 108, 88, + 48, 6, 122, 118, 58, 124, 124, 120, 108, 116, + 104, 92, 104, 90, 90, 58, 62, 38, 11, 74, + 54, 18, 105, 89, 93, 37, 51, 45, 5, 29, + 27, 9, 7, 24, 0, 36, 15, 124, 124, 124, + 124, 104, 98, 42, 6, 43, 21, 82, 58, 40, + 18, 18, 4, 1, 9, 53, 79, 61, 47, 47, + 41, 15, 27, 39, 15, 9, 7, 3, 2, 12, + 8, 124, 124, 124, 118, 110, 98, 74, 44, 11, + }, + + { + /* Context Tables for I, SI Slices :: qp = 4 */ + + 124, 18, 21, 124, 18, 21, 113, 69, 24, 20, + 22, 50, 112, 116, 108, 28, 89, 10, 21, 2, + 2, 22, 17, 18, 9, 36, 112, 19, 61, 85, + 103, 71, 7, 21, 2, 2, 41, 53, 8, 32, + 9, 31, 59, 14, 9, 41, 49, 10, 23, 55, + 16, 13, 33, 49, 3, 17, 19, 29, 13, 10, + 44, 0, 0, 0, 33, 47, 67, 13, 38, 2, + 104, 16, 11, 117, 69, 37, 17, 75, 113, 111, + 115, 53, 113, 89, 111, 109, 97, 109, 97, 6, + 7, 33, 73, 11, 55, 47, 111, 19, 63, 51, + 117, 16, 33, 21, 111, 55, 41, 25, 11, 2, + 7, 35, 6, 0, 0, 12, 3, 1, 4, 27, + 26, 20, 52, 38, 46, 36, 34, 36, 78, 24, + 24, 74, 52, 3, 81, 65, 67, 47, 55, 43, + 33, 39, 35, 39, 51, 13, 17, 17, 0, 73, + 65, 63, 21, 17, 17, 13, 28, 3, 2, 3, + 42, 9, 8, 10, 2, 23, 114, 102, 72, 84, + 102, 82, 48, 84, 66, 50, 60, 34, 10, 2, + 2, 44, 32, 42, 38, 32, 28, 38, 24, 44, + 34, 12, 16, 26, 51, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 118, 106, 72, + 124, 108, 70, 124, 118, 124, 114, 102, 98, 106, + 78, 64, 24, 40, 3, 15, 67, 13, 46, 30, + 20, 4, 10, 0, 15, 19, 49, 3, 76, 50, + 40, 34, 6, 5, 23, 23, 39, 51, 24, 12, + 6, 1, 13, 21, 29, 43, 83, 61, 31, 25, + 15, 19, 37, 57, 61, 97, 25, 64, 32, 22, + 4, 7, 25, 39, 45, 59, 124, 55, 37, 27, + 5, 13, 2, 8, 10, 19, 4, 12, 0, 29, + 15, 9, 34, 6, 48, 40, 64, 62, 38, 44, + 54, 40, 30, 1, 26, 5, 75, 61, 57, 45, + 39, 39, 37, 41, 7, 25, 23, 21, 6, 23, + 37, 67, 55, 63, 25, 17, 13, 17, 9, 3, + 11, 2, 0, 7, 15, 12, 3, 19, 108, 86, + 92, 92, 92, 98, 90, 88, 74, 74, 80, 68, + 32, 38, 24, 80, 74, 70, 58, 58, 48, 20, + 8, 1, 25, 35, 59, 15, 85, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 120, 110, 104, 84, + 46, 4, 118, 114, 56, 124, 124, 116, 104, 110, + 100, 88, 100, 86, 86, 54, 58, 34, 13, 70, + 50, 14, 103, 87, 91, 37, 49, 43, 1, 27, + 25, 9, 5, 26, 2, 38, 15, 124, 124, 124, + 124, 98, 92, 36, 2, 45, 21, 82, 58, 40, + 18, 18, 4, 1, 9, 51, 77, 59, 45, 47, + 39, 13, 25, 37, 13, 7, 5, 1, 4, 14, + 8, 124, 124, 124, 114, 106, 94, 70, 40, 13, + }, + + { + /* Context Tables for I, SI Slices :: qp = 5 */ + + 124, 18, 21, 124, 18, 21, 109, 65, 24, 20, + 20, 46, 108, 114, 108, 28, 85, 10, 19, 4, + 2, 22, 15, 16, 11, 36, 108, 23, 63, 85, + 97, 67, 7, 19, 4, 2, 41, 51, 8, 32, + 9, 31, 57, 14, 11, 41, 49, 10, 23, 53, + 16, 13, 33, 49, 1, 17, 17, 29, 11, 10, + 44, 0, 0, 0, 33, 47, 67, 11, 36, 2, + 104, 16, 11, 115, 67, 37, 15, 73, 109, 107, + 111, 51, 109, 87, 107, 107, 95, 107, 95, 8, + 7, 31, 71, 11, 53, 45, 107, 19, 63, 49, + 113, 18, 33, 19, 109, 53, 39, 25, 11, 4, + 5, 33, 8, 2, 0, 12, 3, 1, 4, 27, + 26, 18, 50, 38, 44, 36, 34, 36, 74, 24, + 22, 72, 50, 5, 79, 63, 65, 45, 53, 41, + 31, 37, 33, 37, 49, 13, 17, 17, 1, 71, + 63, 63, 19, 17, 15, 13, 26, 1, 2, 3, + 40, 7, 8, 8, 2, 23, 112, 100, 72, 82, + 100, 80, 46, 84, 66, 48, 58, 34, 10, 2, + 0, 44, 30, 40, 36, 30, 26, 38, 22, 40, + 32, 10, 14, 24, 51, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 114, 102, 68, + 120, 104, 66, 124, 114, 120, 110, 98, 94, 100, + 74, 58, 22, 36, 5, 15, 67, 13, 46, 30, + 20, 4, 10, 0, 15, 19, 49, 3, 76, 50, + 40, 34, 8, 3, 21, 23, 37, 49, 26, 14, + 6, 0, 11, 19, 27, 43, 81, 59, 27, 23, + 15, 17, 35, 55, 59, 95, 23, 64, 34, 22, + 4, 5, 23, 37, 43, 57, 124, 55, 37, 25, + 5, 13, 2, 8, 10, 19, 4, 14, 0, 29, + 15, 9, 32, 8, 46, 38, 62, 62, 38, 42, + 52, 40, 30, 3, 24, 5, 73, 59, 55, 43, + 37, 37, 37, 39, 7, 25, 23, 21, 4, 23, + 37, 65, 53, 63, 25, 17, 13, 15, 9, 3, + 9, 2, 0, 7, 13, 10, 3, 19, 106, 86, + 90, 92, 90, 96, 88, 86, 74, 72, 78, 66, + 30, 36, 22, 78, 72, 68, 54, 56, 46, 18, + 6, 3, 25, 33, 59, 15, 83, 124, 124, 124, + 124, 124, 124, 124, 124, 120, 116, 106, 100, 80, + 42, 2, 114, 110, 54, 122, 124, 112, 100, 106, + 96, 84, 96, 82, 80, 50, 54, 30, 15, 66, + 46, 12, 101, 83, 89, 35, 47, 41, 2, 25, + 23, 9, 3, 30, 6, 42, 13, 124, 124, 124, + 124, 94, 86, 32, 1, 47, 21, 82, 58, 40, + 18, 18, 4, 1, 9, 51, 75, 57, 43, 45, + 37, 11, 25, 35, 11, 5, 3, 1, 4, 14, + 8, 124, 124, 124, 112, 102, 90, 66, 36, 15, + }, + + { + /* Context Tables for I, SI Slices :: qp = 6 */ + + 124, 18, 23, 124, 18, 23, 105, 63, 26, 20, + 20, 44, 106, 112, 108, 28, 81, 10, 19, 6, + 2, 20, 15, 14, 13, 34, 106, 25, 65, 87, + 91, 65, 7, 19, 6, 2, 39, 49, 10, 30, + 7, 29, 55, 12, 11, 41, 49, 10, 21, 53, + 16, 13, 31, 49, 1, 17, 17, 29, 11, 10, + 44, 0, 0, 0, 31, 47, 67, 11, 36, 0, + 104, 16, 11, 113, 67, 37, 15, 71, 105, 103, + 109, 49, 105, 85, 103, 105, 93, 105, 93, 10, + 7, 29, 71, 9, 53, 45, 103, 19, 61, 49, + 111, 18, 33, 19, 105, 53, 37, 23, 11, 4, + 5, 31, 8, 2, 1, 12, 3, 1, 4, 27, + 26, 18, 50, 38, 42, 36, 34, 36, 70, 24, + 22, 68, 48, 7, 79, 61, 65, 45, 51, 41, + 29, 37, 33, 37, 45, 13, 17, 17, 3, 71, + 61, 63, 19, 17, 13, 11, 24, 1, 2, 3, + 38, 5, 6, 6, 2, 25, 108, 98, 70, 82, + 98, 80, 46, 82, 64, 46, 56, 34, 10, 2, + 0, 42, 28, 38, 34, 30, 24, 36, 22, 36, + 30, 10, 12, 22, 51, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 124, 124, 108, 96, 62, + 116, 100, 64, 124, 108, 114, 104, 94, 88, 96, + 68, 54, 20, 32, 7, 17, 67, 11, 48, 30, + 22, 4, 10, 0, 15, 19, 47, 3, 76, 52, + 40, 34, 10, 3, 21, 21, 37, 47, 28, 14, + 8, 0, 9, 19, 27, 41, 79, 57, 25, 21, + 13, 17, 35, 55, 57, 91, 23, 64, 34, 22, + 6, 5, 23, 35, 43, 55, 124, 53, 35, 25, + 5, 13, 2, 8, 10, 19, 6, 14, 2, 29, + 15, 11, 32, 8, 44, 36, 60, 60, 38, 40, + 50, 40, 30, 3, 22, 7, 71, 57, 53, 43, + 37, 37, 35, 39, 7, 23, 21, 19, 4, 23, + 37, 65, 51, 63, 25, 17, 13, 13, 9, 3, + 7, 0, 0, 7, 13, 8, 3, 21, 104, 84, + 90, 90, 88, 96, 88, 84, 72, 72, 76, 64, + 28, 34, 20, 74, 70, 64, 50, 52, 42, 16, + 6, 3, 25, 33, 57, 17, 81, 124, 124, 124, + 124, 124, 124, 124, 124, 116, 110, 102, 94, 76, + 40, 2, 112, 108, 50, 118, 124, 108, 96, 102, + 92, 80, 90, 78, 76, 46, 50, 28, 19, 62, + 42, 8, 99, 81, 87, 33, 45, 39, 4, 23, + 21, 9, 1, 32, 8, 44, 11, 124, 124, 124, + 118, 88, 82, 26, 5, 51, 19, 82, 58, 40, + 18, 18, 4, 1, 9, 49, 73, 57, 41, 43, + 35, 11, 23, 33, 11, 5, 3, 1, 6, 14, + 8, 124, 124, 122, 108, 100, 88, 60, 34, 17, + }, + + { + /* Context Tables for I, SI Slices :: qp = 7 */ + + 124, 18, 23, 124, 18, 23, 101, 59, 26, 20, + 18, 40, 102, 108, 108, 28, 75, 8, 17, 6, + 2, 18, 15, 12, 15, 34, 102, 29, 67, 87, + 85, 63, 7, 17, 6, 2, 39, 47, 10, 30, + 7, 29, 55, 12, 13, 39, 49, 10, 21, 51, + 14, 13, 31, 49, 0, 15, 17, 27, 9, 10, + 44, 0, 0, 0, 31, 47, 67, 9, 34, 0, + 104, 16, 11, 111, 65, 37, 13, 67, 103, 99, + 105, 47, 103, 81, 99, 103, 91, 103, 93, 12, + 7, 27, 69, 9, 51, 43, 99, 19, 61, 47, + 109, 18, 31, 17, 103, 51, 37, 23, 9, 4, + 5, 29, 10, 4, 1, 10, 3, 1, 4, 27, + 26, 16, 48, 38, 42, 36, 34, 34, 66, 24, + 20, 64, 46, 9, 77, 59, 63, 43, 49, 39, + 27, 35, 31, 35, 43, 13, 17, 15, 5, 69, + 59, 63, 19, 17, 13, 11, 22, 0, 2, 3, + 34, 3, 6, 4, 2, 27, 106, 96, 70, 80, + 96, 78, 44, 80, 64, 44, 54, 34, 10, 2, + 1, 40, 28, 36, 34, 28, 22, 34, 20, 32, + 28, 8, 10, 20, 51, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 124, 120, 122, 104, 92, 58, + 112, 98, 60, 124, 104, 110, 100, 88, 84, 90, + 64, 48, 18, 30, 11, 19, 67, 11, 48, 32, + 22, 4, 10, 0, 15, 19, 47, 1, 78, 52, + 40, 34, 12, 1, 19, 21, 35, 45, 30, 16, + 8, 0, 7, 17, 25, 41, 77, 57, 23, 21, + 13, 17, 33, 53, 57, 89, 21, 66, 34, 22, + 6, 3, 21, 33, 41, 53, 124, 53, 35, 23, + 5, 13, 2, 8, 10, 17, 6, 14, 2, 29, + 15, 11, 30, 10, 42, 34, 58, 58, 38, 38, + 48, 38, 30, 5, 20, 7, 69, 57, 51, 41, + 35, 35, 35, 37, 7, 23, 21, 19, 2, 23, + 37, 63, 49, 63, 25, 15, 13, 13, 9, 3, + 5, 0, 0, 7, 11, 8, 5, 23, 102, 84, + 88, 88, 86, 94, 86, 82, 70, 70, 74, 62, + 26, 32, 18, 72, 66, 62, 46, 50, 40, 14, + 4, 5, 25, 33, 57, 17, 79, 124, 124, 124, + 124, 124, 124, 124, 122, 112, 106, 98, 90, 72, + 36, 0, 108, 104, 48, 114, 124, 104, 92, 98, + 88, 76, 86, 74, 70, 42, 46, 24, 21, 58, + 38, 6, 97, 79, 85, 33, 43, 37, 8, 21, + 19, 9, 0, 36, 12, 48, 11, 124, 124, 122, + 112, 82, 76, 20, 9, 53, 19, 82, 58, 40, + 18, 18, 4, 1, 9, 49, 71, 55, 39, 41, + 33, 9, 23, 31, 9, 3, 1, 0, 6, 16, + 8, 124, 124, 118, 104, 96, 84, 56, 30, 19, + }, + + { + /* Context Tables for I, SI Slices :: qp = 8 */ + + 124, 16, 23, 124, 16, 23, 99, 57, 26, 20, + 18, 38, 100, 106, 108, 28, 71, 8, 15, 8, + 2, 16, 15, 10, 19, 32, 98, 33, 69, 89, + 81, 61, 7, 15, 8, 2, 39, 45, 10, 28, + 7, 29, 53, 10, 13, 39, 51, 10, 21, 51, + 14, 15, 31, 49, 0, 15, 17, 27, 9, 10, + 44, 0, 0, 0, 29, 49, 67, 9, 32, 0, + 104, 16, 11, 109, 63, 37, 13, 65, 99, 95, + 103, 45, 99, 79, 97, 101, 91, 101, 91, 12, + 7, 25, 67, 9, 51, 43, 97, 19, 61, 47, + 107, 18, 31, 17, 101, 51, 35, 23, 9, 4, + 5, 29, 10, 4, 3, 10, 3, 1, 4, 27, + 26, 16, 46, 38, 40, 36, 34, 34, 62, 24, + 20, 60, 44, 11, 75, 57, 61, 41, 49, 39, + 25, 35, 31, 33, 41, 13, 17, 15, 9, 69, + 57, 63, 19, 19, 11, 11, 20, 2, 2, 3, + 32, 1, 4, 2, 2, 29, 102, 94, 68, 78, + 94, 76, 42, 78, 62, 40, 52, 32, 10, 2, + 1, 38, 26, 34, 32, 26, 20, 32, 18, 28, + 24, 6, 8, 18, 51, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 122, 116, 116, 98, 86, 52, + 108, 94, 56, 122, 100, 104, 94, 84, 78, 84, + 60, 44, 16, 26, 13, 21, 69, 11, 48, 32, + 22, 4, 10, 0, 15, 19, 45, 1, 78, 52, + 40, 34, 14, 1, 19, 21, 35, 45, 32, 16, + 8, 0, 7, 17, 25, 39, 75, 55, 21, 19, + 13, 17, 31, 51, 55, 87, 21, 66, 34, 22, + 6, 1, 21, 33, 39, 53, 124, 53, 35, 23, + 5, 13, 2, 8, 10, 17, 6, 14, 4, 29, + 15, 11, 28, 10, 40, 32, 56, 56, 38, 34, + 44, 38, 30, 7, 18, 9, 69, 55, 49, 41, + 35, 35, 33, 35, 9, 23, 21, 19, 0, 23, + 37, 63, 49, 65, 25, 15, 13, 11, 9, 3, + 5, 0, 1, 7, 9, 6, 5, 25, 100, 82, + 88, 86, 82, 92, 84, 80, 68, 68, 72, 60, + 24, 30, 16, 68, 64, 58, 42, 46, 38, 12, + 2, 5, 25, 33, 57, 19, 79, 124, 124, 124, + 124, 124, 124, 122, 116, 108, 102, 94, 84, 68, + 34, 1, 104, 100, 44, 110, 122, 98, 86, 92, + 82, 72, 82, 68, 66, 38, 40, 20, 23, 54, + 34, 2, 95, 77, 83, 31, 41, 37, 10, 19, + 19, 9, 0, 38, 14, 50, 9, 124, 124, 116, + 106, 76, 70, 14, 13, 55, 19, 82, 58, 40, + 18, 18, 4, 1, 9, 47, 71, 53, 37, 41, + 31, 9, 21, 31, 9, 3, 1, 0, 8, 16, + 6, 124, 124, 114, 100, 92, 80, 52, 26, 21, + }, + + { + /* Context Tables for I, SI Slices :: qp = 9 */ + + 124, 16, 23, 124, 16, 23, 95, 55, 28, 20, + 18, 36, 98, 104, 108, 28, 67, 8, 13, 10, + 2, 16, 13, 8, 21, 30, 94, 35, 71, 91, + 75, 57, 7, 13, 10, 2, 37, 43, 12, 26, + 7, 27, 51, 10, 13, 39, 51, 10, 21, 49, + 14, 15, 31, 49, 0, 15, 15, 27, 9, 10, + 44, 0, 0, 0, 27, 49, 67, 9, 30, 0, + 104, 16, 11, 107, 61, 37, 13, 63, 95, 91, + 99, 41, 95, 77, 93, 99, 89, 99, 89, 14, + 5, 23, 65, 9, 49, 41, 93, 19, 59, 47, + 103, 20, 31, 17, 97, 51, 33, 23, 9, 6, + 3, 27, 10, 4, 3, 10, 1, 1, 4, 25, + 26, 16, 46, 38, 38, 36, 34, 34, 58, 24, + 20, 58, 42, 11, 73, 55, 59, 39, 47, 37, + 23, 33, 31, 31, 39, 13, 17, 15, 11, 67, + 55, 63, 17, 19, 9, 11, 18, 4, 2, 3, + 30, 0, 2, 0, 2, 29, 100, 92, 68, 76, + 92, 74, 42, 78, 62, 38, 50, 32, 10, 2, + 1, 38, 24, 32, 30, 24, 18, 32, 18, 26, + 22, 6, 6, 16, 51, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 118, 112, 112, 92, 82, 46, + 106, 90, 54, 118, 96, 100, 90, 80, 74, 80, + 56, 40, 16, 22, 15, 21, 69, 11, 50, 32, + 22, 6, 10, 0, 13, 19, 43, 1, 78, 52, + 42, 34, 16, 0, 17, 19, 33, 43, 34, 18, + 10, 2, 5, 15, 25, 37, 73, 53, 17, 17, + 11, 15, 29, 49, 53, 85, 19, 66, 36, 24, + 6, 0, 19, 31, 37, 51, 124, 51, 33, 21, + 5, 13, 2, 10, 12, 17, 6, 16, 6, 29, + 15, 11, 28, 10, 38, 32, 54, 56, 38, 32, + 42, 38, 30, 7, 16, 11, 67, 53, 47, 41, + 33, 35, 31, 33, 9, 23, 21, 17, 0, 23, + 37, 63, 47, 65, 25, 15, 13, 9, 9, 3, + 3, 0, 1, 7, 7, 4, 5, 25, 98, 82, + 88, 86, 80, 90, 82, 78, 68, 66, 70, 60, + 24, 28, 14, 66, 62, 54, 38, 44, 36, 12, + 2, 5, 23, 31, 57, 21, 77, 124, 124, 124, + 124, 124, 124, 118, 112, 104, 98, 90, 80, 64, + 32, 3, 100, 98, 42, 106, 118, 94, 82, 88, + 78, 68, 78, 64, 62, 36, 36, 16, 25, 50, + 30, 1, 93, 73, 79, 29, 39, 35, 14, 17, + 17, 9, 2, 42, 16, 54, 7, 124, 124, 112, + 100, 72, 64, 10, 17, 57, 19, 82, 58, 40, + 18, 20, 4, 1, 9, 45, 69, 51, 35, 39, + 27, 7, 19, 29, 7, 1, 0, 0, 10, 16, + 6, 124, 122, 112, 98, 88, 76, 48, 22, 21, + }, + + { + /* Context Tables for I, SI Slices :: qp = 10 */ + + 124, 16, 23, 124, 16, 23, 91, 51, 28, 20, + 16, 32, 94, 100, 108, 28, 61, 6, 11, 10, + 2, 14, 13, 6, 23, 30, 90, 39, 73, 91, + 69, 55, 7, 11, 10, 2, 37, 41, 12, 26, + 7, 27, 51, 10, 15, 37, 51, 10, 21, 47, + 12, 15, 31, 49, 2, 13, 15, 25, 7, 10, + 44, 0, 0, 0, 27, 49, 67, 7, 28, 0, + 104, 16, 11, 105, 59, 37, 11, 59, 93, 87, + 97, 39, 93, 73, 89, 97, 87, 97, 89, 16, + 5, 21, 63, 9, 49, 39, 89, 19, 59, 45, + 101, 20, 29, 15, 95, 49, 33, 23, 7, 6, + 3, 25, 12, 6, 5, 8, 1, 1, 4, 25, + 26, 14, 44, 38, 38, 36, 34, 32, 54, 24, + 18, 54, 40, 13, 71, 53, 57, 37, 45, 35, + 21, 31, 29, 29, 37, 13, 17, 13, 13, 67, + 53, 63, 17, 19, 9, 11, 16, 6, 2, 3, + 26, 2, 2, 1, 2, 31, 96, 90, 66, 74, + 90, 72, 40, 76, 62, 36, 48, 32, 10, 2, + 3, 36, 24, 30, 30, 22, 16, 30, 16, 22, + 20, 4, 4, 14, 51, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 114, 108, 106, 88, 78, 42, + 102, 88, 50, 112, 92, 96, 86, 74, 70, 74, + 52, 34, 14, 20, 19, 23, 69, 11, 50, 34, + 22, 6, 10, 0, 13, 19, 43, 0, 80, 52, + 42, 34, 18, 0, 15, 19, 31, 41, 36, 20, + 10, 2, 3, 15, 23, 37, 71, 53, 15, 17, + 11, 15, 27, 47, 53, 83, 19, 68, 36, 24, + 6, 2, 17, 29, 35, 49, 124, 51, 33, 21, + 5, 13, 2, 10, 12, 15, 6, 16, 6, 29, + 15, 11, 26, 12, 36, 30, 52, 54, 38, 30, + 40, 36, 30, 9, 14, 11, 65, 53, 45, 39, + 33, 33, 31, 31, 9, 23, 21, 17, 1, 23, + 37, 61, 45, 65, 25, 13, 13, 9, 9, 3, + 1, 0, 1, 7, 5, 4, 7, 27, 96, 82, + 86, 84, 78, 88, 80, 76, 66, 64, 68, 58, + 22, 26, 12, 62, 58, 52, 34, 42, 34, 10, + 0, 7, 23, 31, 57, 21, 75, 124, 124, 124, + 124, 124, 120, 114, 106, 100, 94, 86, 76, 60, + 28, 5, 96, 94, 40, 102, 114, 90, 78, 84, + 74, 64, 74, 60, 56, 32, 32, 12, 27, 46, + 26, 3, 91, 71, 77, 29, 37, 33, 18, 15, + 15, 9, 4, 44, 20, 56, 7, 124, 120, 106, + 94, 66, 58, 4, 21, 59, 19, 82, 58, 40, + 18, 20, 4, 1, 9, 45, 67, 49, 33, 37, + 25, 5, 19, 27, 5, 0, 2, 2, 10, 18, + 6, 120, 118, 108, 94, 84, 72, 44, 18, 23, + }, + + { + /* Context Tables for I, SI Slices :: qp = 11 */ + + 124, 16, 25, 124, 16, 25, 87, 49, 30, 20, + 16, 30, 92, 98, 108, 28, 57, 6, 11, 12, + 2, 12, 13, 4, 25, 28, 88, 41, 75, 93, + 63, 53, 7, 11, 12, 2, 35, 39, 14, 24, + 5, 25, 49, 8, 15, 37, 51, 10, 19, 47, + 12, 15, 29, 49, 2, 13, 15, 25, 7, 10, + 44, 0, 0, 0, 25, 49, 67, 7, 28, 1, + 104, 16, 11, 103, 59, 37, 11, 57, 89, 83, + 93, 37, 89, 71, 85, 95, 85, 95, 87, 18, + 5, 19, 63, 7, 47, 39, 85, 19, 57, 45, + 99, 20, 29, 15, 91, 49, 31, 21, 7, 6, + 3, 23, 12, 6, 5, 8, 1, 1, 4, 25, + 26, 14, 44, 38, 36, 36, 34, 32, 50, 24, + 18, 50, 38, 15, 71, 51, 57, 37, 43, 35, + 19, 31, 29, 29, 33, 13, 17, 13, 15, 65, + 51, 63, 17, 19, 7, 9, 14, 6, 2, 3, + 24, 4, 0, 3, 2, 33, 94, 88, 66, 74, + 88, 72, 40, 74, 60, 34, 46, 32, 10, 2, + 3, 34, 22, 28, 28, 22, 14, 28, 16, 18, + 18, 4, 2, 12, 51, 124, 124, 124, 124, 124, + 124, 124, 124, 124, 108, 104, 102, 82, 72, 36, + 98, 84, 48, 108, 86, 90, 80, 70, 64, 70, + 46, 30, 12, 16, 21, 25, 69, 9, 52, 34, + 24, 6, 10, 0, 13, 19, 41, 0, 80, 54, + 42, 34, 20, 2, 15, 17, 31, 39, 38, 20, + 12, 2, 1, 13, 23, 35, 69, 51, 13, 15, + 9, 15, 27, 47, 51, 79, 17, 68, 36, 24, + 8, 2, 17, 27, 35, 47, 124, 49, 31, 19, + 5, 13, 2, 10, 12, 15, 8, 16, 8, 29, + 15, 13, 26, 12, 34, 28, 50, 52, 38, 28, + 38, 36, 30, 9, 12, 13, 63, 51, 43, 39, + 31, 33, 29, 31, 9, 21, 19, 15, 1, 23, + 37, 61, 43, 65, 25, 13, 13, 7, 9, 3, + 0, 1, 1, 7, 5, 2, 7, 29, 94, 80, + 86, 82, 76, 88, 80, 74, 64, 64, 66, 56, + 20, 24, 10, 60, 56, 48, 30, 38, 30, 8, + 0, 7, 23, 31, 55, 23, 73, 124, 124, 124, + 124, 124, 116, 110, 102, 96, 88, 82, 70, 56, + 26, 5, 94, 92, 36, 98, 108, 86, 74, 80, + 70, 60, 68, 56, 52, 28, 28, 10, 31, 42, + 22, 7, 89, 69, 75, 27, 35, 31, 20, 13, + 13, 9, 6, 48, 22, 60, 5, 122, 118, 102, + 88, 60, 54, 1, 25, 63, 17, 82, 58, 40, + 18, 20, 4, 1, 9, 43, 65, 49, 31, 35, + 23, 5, 17, 25, 5, 0, 2, 2, 12, 18, + 6, 118, 116, 104, 90, 82, 70, 38, 16, 25, + }, + + { + /* Context Tables for I, SI Slices :: qp = 12 */ + + 124, 16, 25, 124, 16, 25, 85, 45, 30, 20, + 14, 26, 88, 96, 108, 28, 53, 6, 9, 14, + 2, 10, 13, 2, 27, 28, 84, 45, 77, 93, + 57, 51, 7, 9, 14, 2, 35, 37, 14, 24, + 5, 25, 47, 8, 17, 37, 51, 10, 19, 45, + 12, 17, 29, 49, 4, 13, 15, 25, 5, 10, + 44, 0, 0, 0, 25, 51, 67, 5, 26, 1, + 104, 16, 11, 101, 57, 37, 9, 55, 85, 79, + 91, 35, 85, 69, 81, 93, 85, 93, 85, 18, + 5, 17, 61, 7, 47, 37, 81, 19, 57, 43, + 97, 20, 29, 13, 89, 47, 29, 21, 7, 6, + 3, 23, 14, 8, 7, 8, 1, 1, 4, 25, + 26, 12, 42, 38, 34, 36, 34, 32, 46, 24, + 16, 46, 36, 17, 69, 49, 55, 35, 43, 33, + 17, 29, 27, 27, 31, 13, 17, 13, 17, 65, + 49, 63, 17, 19, 5, 9, 12, 8, 2, 3, + 22, 6, 0, 5, 2, 35, 90, 86, 64, 72, + 86, 70, 38, 72, 60, 30, 44, 30, 10, 2, + 5, 32, 20, 26, 26, 20, 12, 26, 14, 14, + 16, 2, 0, 10, 51, 124, 124, 122, 124, 124, + 124, 124, 124, 122, 104, 100, 96, 78, 68, 32, + 94, 80, 44, 104, 82, 86, 76, 66, 60, 64, + 42, 24, 10, 12, 23, 27, 69, 9, 52, 34, + 24, 6, 10, 0, 13, 19, 41, 0, 80, 54, + 42, 34, 22, 2, 13, 17, 29, 39, 40, 22, + 12, 2, 1, 13, 21, 35, 67, 49, 11, 13, + 9, 15, 25, 45, 49, 77, 17, 68, 36, 24, + 8, 4, 15, 27, 33, 45, 124, 49, 31, 19, + 5, 13, 2, 10, 12, 15, 8, 16, 8, 29, + 15, 13, 24, 14, 32, 26, 48, 50, 38, 24, + 36, 36, 30, 11, 10, 13, 63, 49, 41, 37, + 31, 31, 29, 29, 11, 21, 19, 15, 3, 23, + 37, 59, 41, 65, 25, 13, 13, 5, 9, 3, + 2, 1, 3, 7, 3, 0, 7, 31, 92, 80, + 84, 80, 74, 86, 78, 72, 62, 62, 64, 54, + 18, 22, 8, 56, 54, 46, 26, 36, 28, 6, + 1, 9, 23, 31, 55, 23, 73, 124, 124, 124, + 124, 124, 112, 106, 96, 92, 84, 78, 66, 52, + 22, 7, 90, 88, 34, 94, 104, 82, 70, 74, + 66, 56, 64, 52, 46, 24, 24, 6, 33, 38, + 18, 9, 87, 67, 73, 25, 33, 29, 24, 11, + 11, 9, 8, 50, 26, 62, 3, 118, 114, 96, + 82, 54, 48, 7, 29, 65, 17, 82, 58, 40, + 18, 20, 4, 1, 9, 43, 63, 47, 29, 35, + 21, 3, 17, 23, 3, 2, 4, 2, 12, 18, + 6, 116, 112, 100, 86, 78, 66, 34, 12, 27, + }, + + { + /* Context Tables for I, SI Slices :: qp = 13 */ + + 124, 16, 25, 124, 16, 25, 81, 43, 30, 20, + 14, 24, 86, 92, 108, 28, 47, 4, 7, 14, + 2, 10, 11, 0, 29, 26, 80, 49, 79, 95, + 51, 47, 7, 7, 14, 2, 35, 35, 14, 22, + 5, 25, 47, 8, 17, 35, 51, 10, 19, 43, + 10, 17, 29, 49, 4, 11, 13, 23, 5, 10, + 44, 0, 0, 0, 23, 51, 67, 5, 24, 1, + 104, 16, 11, 99, 55, 37, 9, 51, 83, 75, + 87, 33, 83, 65, 77, 91, 83, 91, 85, 20, + 5, 15, 59, 7, 45, 35, 77, 19, 57, 43, + 93, 22, 27, 13, 87, 47, 29, 21, 5, 8, + 1, 21, 14, 8, 7, 6, 1, 1, 4, 25, + 26, 12, 40, 38, 34, 36, 34, 30, 42, 24, + 16, 44, 34, 19, 67, 47, 53, 33, 41, 31, + 15, 27, 27, 25, 29, 13, 17, 11, 19, 63, + 47, 63, 15, 19, 5, 9, 10, 10, 2, 3, + 18, 8, 1, 7, 2, 35, 88, 84, 64, 70, + 84, 68, 36, 72, 60, 28, 42, 30, 10, 2, + 5, 32, 20, 24, 26, 18, 10, 26, 12, 10, + 14, 0, 1, 8, 51, 122, 124, 118, 124, 122, + 120, 120, 120, 118, 100, 96, 92, 72, 64, 26, + 90, 78, 40, 98, 78, 82, 72, 60, 56, 58, + 38, 20, 8, 10, 27, 27, 69, 9, 52, 36, + 24, 6, 10, 0, 13, 19, 39, 2, 82, 54, + 42, 34, 24, 4, 11, 17, 27, 37, 42, 24, + 12, 4, 0, 11, 21, 33, 65, 49, 7, 13, + 9, 13, 23, 43, 49, 75, 15, 70, 38, 24, + 8, 6, 13, 25, 31, 43, 124, 49, 31, 17, + 5, 13, 2, 10, 12, 13, 8, 18, 10, 29, + 15, 13, 22, 14, 30, 24, 46, 50, 38, 22, + 34, 34, 30, 13, 8, 15, 61, 49, 39, 37, + 29, 31, 27, 27, 11, 21, 19, 15, 5, 23, + 37, 59, 39, 65, 25, 11, 13, 5, 9, 3, + 4, 1, 3, 7, 1, 0, 9, 31, 90, 80, + 84, 80, 72, 84, 76, 70, 62, 60, 62, 52, + 16, 20, 6, 54, 50, 42, 22, 34, 26, 4, + 3, 9, 23, 29, 55, 25, 71, 124, 124, 124, + 124, 120, 108, 102, 92, 88, 80, 74, 62, 48, + 20, 9, 86, 84, 32, 90, 100, 78, 66, 70, + 62, 52, 60, 48, 42, 20, 20, 2, 35, 34, + 14, 13, 85, 63, 71, 25, 31, 27, 28, 9, + 9, 9, 10, 54, 28, 66, 3, 116, 110, 92, + 76, 50, 42, 11, 33, 67, 17, 82, 58, 40, + 18, 20, 4, 1, 9, 41, 61, 45, 27, 33, + 19, 1, 15, 21, 1, 4, 6, 4, 14, 20, + 6, 112, 110, 98, 84, 74, 62, 30, 8, 29, + }, + + { + /* Context Tables for I, SI Slices :: qp = 14 */ + + 122, 16, 25, 122, 16, 25, 77, 39, 32, 20, + 12, 20, 82, 90, 108, 28, 43, 4, 5, 16, + 2, 8, 11, 1, 31, 26, 76, 51, 81, 95, + 45, 45, 7, 5, 16, 2, 33, 33, 16, 22, + 5, 23, 45, 6, 19, 35, 51, 10, 19, 43, + 10, 17, 29, 49, 6, 11, 13, 23, 3, 10, + 44, 0, 0, 0, 23, 51, 67, 3, 22, 1, + 104, 16, 11, 97, 53, 37, 7, 49, 79, 71, + 85, 31, 79, 63, 73, 89, 81, 89, 83, 22, + 5, 13, 57, 7, 45, 35, 73, 19, 55, 41, + 91, 22, 27, 11, 83, 45, 27, 21, 5, 8, + 1, 19, 16, 10, 9, 6, 1, 1, 4, 25, + 26, 10, 40, 38, 32, 36, 34, 30, 38, 24, + 14, 40, 32, 21, 65, 45, 51, 31, 39, 31, + 13, 27, 25, 23, 27, 13, 17, 11, 21, 63, + 45, 63, 15, 19, 3, 9, 8, 12, 2, 3, + 16, 10, 1, 9, 2, 37, 84, 82, 62, 68, + 82, 66, 36, 70, 58, 26, 40, 30, 10, 2, + 7, 30, 18, 22, 24, 16, 8, 24, 12, 6, + 12, 0, 3, 6, 51, 120, 122, 116, 124, 118, + 116, 116, 116, 112, 94, 92, 86, 68, 58, 22, + 86, 74, 38, 94, 74, 76, 66, 56, 50, 54, + 34, 14, 6, 6, 29, 29, 69, 9, 54, 36, + 24, 6, 10, 0, 13, 19, 39, 2, 82, 54, + 42, 34, 26, 4, 11, 15, 27, 35, 44, 24, + 14, 4, 2, 11, 19, 33, 63, 47, 5, 11, + 7, 13, 21, 41, 47, 73, 15, 70, 38, 24, + 8, 8, 13, 23, 29, 41, 124, 47, 29, 17, + 5, 13, 2, 10, 12, 13, 8, 18, 10, 29, + 15, 13, 22, 16, 28, 22, 44, 48, 38, 20, + 32, 34, 30, 13, 6, 15, 59, 47, 37, 35, + 29, 29, 27, 25, 11, 21, 19, 13, 5, 23, + 37, 57, 37, 65, 25, 11, 13, 3, 9, 3, + 6, 1, 3, 7, 0, 1, 9, 33, 88, 78, + 82, 78, 70, 82, 74, 68, 60, 58, 60, 50, + 14, 18, 4, 50, 48, 40, 18, 30, 24, 2, + 3, 11, 23, 29, 55, 25, 69, 124, 124, 122, + 122, 114, 104, 98, 86, 84, 76, 70, 56, 44, + 16, 11, 82, 82, 28, 86, 96, 74, 62, 66, + 58, 48, 56, 44, 36, 16, 16, 1, 37, 30, + 10, 15, 83, 61, 69, 23, 29, 25, 30, 7, + 7, 9, 12, 56, 32, 68, 1, 112, 108, 86, + 70, 44, 36, 17, 37, 69, 17, 82, 58, 40, + 18, 20, 4, 1, 9, 41, 59, 43, 25, 31, + 17, 1, 15, 19, 1, 4, 6, 4, 14, 20, + 6, 110, 106, 94, 80, 70, 58, 26, 4, 31, + }, + + { + /* Context Tables for I, SI Slices :: qp = 15 */ + + 120, 16, 25, 120, 16, 25, 73, 37, 32, 20, + 12, 18, 80, 88, 108, 28, 39, 4, 3, 18, + 2, 6, 11, 3, 33, 24, 72, 55, 83, 97, + 39, 43, 7, 3, 18, 2, 33, 31, 16, 20, + 5, 23, 43, 6, 19, 35, 51, 10, 19, 41, + 10, 17, 29, 49, 6, 11, 13, 23, 3, 10, + 44, 0, 0, 0, 21, 51, 67, 3, 20, 1, + 104, 16, 11, 95, 51, 37, 7, 47, 75, 67, + 81, 29, 75, 61, 69, 87, 79, 87, 81, 24, + 5, 11, 55, 7, 43, 33, 69, 19, 55, 41, + 89, 22, 27, 11, 81, 45, 25, 21, 5, 8, + 1, 17, 16, 10, 9, 6, 1, 1, 4, 25, + 26, 10, 38, 38, 30, 36, 34, 30, 34, 24, + 14, 36, 30, 23, 63, 43, 49, 29, 37, 29, + 11, 25, 25, 21, 25, 13, 17, 11, 23, 61, + 43, 63, 15, 19, 1, 9, 6, 14, 2, 3, + 14, 12, 3, 11, 2, 39, 82, 80, 62, 66, + 80, 64, 34, 68, 58, 24, 38, 30, 10, 2, + 7, 28, 16, 20, 22, 14, 6, 22, 10, 2, + 10, 1, 5, 4, 51, 116, 120, 112, 120, 114, + 112, 112, 112, 108, 90, 88, 82, 62, 54, 16, + 82, 70, 34, 90, 70, 72, 62, 52, 46, 48, + 30, 10, 4, 2, 31, 31, 69, 9, 54, 36, + 24, 6, 10, 0, 13, 19, 37, 2, 82, 54, + 42, 34, 28, 6, 9, 15, 25, 33, 46, 26, + 14, 4, 4, 9, 19, 31, 61, 45, 3, 9, + 7, 13, 19, 39, 45, 71, 13, 70, 38, 24, + 8, 10, 11, 21, 27, 39, 124, 47, 29, 15, + 5, 13, 2, 10, 12, 13, 8, 18, 12, 29, + 15, 13, 20, 16, 26, 20, 42, 46, 38, 18, + 30, 34, 30, 15, 4, 17, 57, 45, 35, 35, + 27, 29, 25, 23, 11, 21, 19, 13, 7, 23, + 37, 57, 35, 65, 25, 11, 13, 1, 9, 3, + 8, 1, 3, 7, 2, 3, 9, 35, 86, 78, + 82, 76, 68, 80, 72, 66, 58, 56, 58, 48, + 12, 16, 2, 48, 46, 36, 14, 28, 22, 0, + 5, 11, 23, 29, 55, 27, 67, 124, 124, 118, + 118, 108, 100, 94, 82, 80, 72, 66, 52, 40, + 14, 13, 78, 78, 26, 82, 92, 70, 58, 62, + 54, 44, 52, 40, 32, 12, 12, 5, 39, 26, + 6, 19, 81, 59, 67, 21, 27, 23, 34, 5, + 5, 9, 14, 60, 34, 72, 0, 110, 104, 82, + 64, 38, 30, 23, 41, 71, 17, 82, 58, 40, + 18, 20, 4, 1, 9, 39, 57, 41, 23, 29, + 15, 0, 13, 17, 0, 6, 8, 4, 16, 20, + 6, 108, 104, 90, 76, 66, 54, 22, 0, 33, + }, + + { + /* Context Tables for I, SI Slices :: qp = 16 */ + + 116, 14, 27, 116, 14, 27, 71, 35, 32, 20, + 10, 14, 76, 84, 106, 28, 35, 2, 3, 18, + 0, 4, 11, 7, 37, 22, 68, 59, 85, 99, + 35, 41, 9, 3, 18, 0, 33, 29, 16, 18, + 5, 23, 43, 4, 21, 35, 53, 10, 19, 41, + 8, 19, 29, 49, 6, 11, 13, 23, 3, 8, + 44, 0, 0, 0, 21, 53, 67, 3, 18, 3, + 104, 14, 11, 93, 51, 37, 7, 45, 73, 65, + 79, 27, 73, 59, 67, 85, 79, 85, 81, 24, + 5, 11, 55, 7, 43, 33, 67, 19, 55, 41, + 87, 22, 27, 11, 79, 45, 25, 21, 5, 8, + 1, 17, 16, 10, 11, 4, 1, 3, 4, 25, + 24, 8, 36, 38, 28, 34, 34, 28, 30, 22, + 12, 32, 28, 25, 63, 43, 49, 29, 37, 29, + 9, 25, 25, 21, 23, 15, 17, 11, 27, 61, + 43, 63, 15, 21, 1, 9, 4, 14, 2, 3, + 10, 12, 5, 13, 2, 41, 78, 78, 60, 64, + 78, 62, 32, 66, 56, 20, 36, 28, 8, 2, + 9, 26, 14, 18, 20, 12, 4, 20, 8, 1, + 6, 3, 9, 0, 51, 112, 116, 108, 116, 110, + 106, 106, 106, 102, 84, 82, 76, 56, 48, 10, + 78, 66, 30, 84, 64, 66, 56, 46, 40, 42, + 24, 4, 2, 1, 35, 33, 71, 9, 54, 36, + 24, 6, 10, 1, 13, 19, 37, 2, 82, 54, + 42, 34, 30, 6, 9, 15, 25, 33, 46, 26, + 14, 4, 4, 9, 19, 31, 59, 45, 1, 9, + 7, 13, 19, 39, 45, 69, 13, 70, 38, 24, + 8, 10, 11, 21, 27, 39, 124, 47, 29, 15, + 5, 13, 2, 10, 12, 13, 8, 18, 12, 29, + 15, 15, 18, 16, 24, 18, 40, 44, 36, 14, + 26, 32, 28, 17, 0, 19, 57, 45, 33, 35, + 27, 29, 25, 23, 13, 21, 19, 13, 9, 23, + 37, 57, 35, 67, 25, 11, 13, 1, 11, 3, + 8, 3, 5, 7, 2, 5, 11, 37, 84, 76, + 80, 74, 64, 78, 70, 64, 56, 54, 56, 46, + 10, 12, 1, 44, 42, 32, 10, 24, 18, 1, + 7, 13, 23, 29, 55, 29, 67, 124, 122, 114, + 112, 102, 94, 88, 76, 74, 66, 60, 46, 34, + 10, 15, 74, 74, 22, 78, 86, 64, 52, 56, + 48, 40, 46, 34, 26, 8, 6, 9, 43, 22, + 2, 23, 79, 57, 65, 21, 27, 23, 36, 5, + 5, 9, 14, 62, 36, 74, 0, 106, 100, 76, + 56, 32, 24, 29, 47, 75, 17, 82, 56, 38, + 18, 20, 4, 3, 9, 39, 57, 41, 23, 29, + 13, 0, 13, 17, 0, 6, 8, 4, 16, 20, + 4, 104, 100, 86, 72, 62, 50, 16, 3, 35, + }, + + { + /* Context Tables for I, SI Slices :: qp = 17 */ + + 114, 14, 27, 114, 14, 27, 67, 31, 34, 22, + 10, 12, 74, 82, 106, 28, 29, 2, 1, 20, + 0, 4, 9, 9, 39, 22, 66, 61, 87, 99, + 29, 37, 9, 1, 20, 0, 31, 25, 18, 18, + 3, 21, 41, 4, 21, 33, 53, 10, 17, 39, + 8, 19, 27, 49, 8, 9, 11, 21, 1, 8, + 44, 0, 0, 0, 19, 53, 67, 1, 18, 3, + 104, 14, 11, 89, 49, 37, 5, 41, 69, 61, + 75, 23, 69, 55, 63, 81, 77, 83, 79, 26, + 3, 9, 53, 5, 41, 31, 63, 17, 53, 39, + 83, 24, 25, 9, 75, 43, 23, 19, 3, 10, + 0, 15, 18, 12, 11, 4, 0, 3, 4, 23, + 24, 8, 36, 38, 28, 34, 34, 28, 28, 22, + 12, 30, 26, 25, 61, 41, 47, 27, 35, 27, + 7, 23, 23, 19, 19, 15, 17, 9, 29, 59, + 41, 63, 13, 21, 0, 7, 4, 16, 2, 3, + 8, 14, 5, 15, 2, 41, 76, 78, 60, 64, + 78, 62, 32, 66, 56, 18, 36, 28, 8, 2, + 9, 26, 14, 18, 20, 12, 2, 20, 8, 3, + 4, 3, 11, 1, 51, 110, 114, 106, 114, 108, + 102, 102, 102, 98, 80, 78, 72, 52, 44, 6, + 76, 64, 28, 80, 60, 62, 52, 42, 36, 38, + 20, 0, 2, 3, 37, 33, 71, 7, 56, 38, + 26, 8, 10, 1, 11, 17, 35, 4, 84, 56, + 44, 34, 32, 8, 7, 13, 23, 31, 48, 28, + 16, 6, 6, 7, 17, 29, 55, 43, 2, 7, + 5, 11, 17, 37, 43, 65, 11, 72, 40, 26, + 10, 12, 9, 19, 25, 37, 124, 45, 27, 13, + 5, 11, 2, 12, 14, 11, 10, 20, 14, 27, + 15, 15, 18, 18, 24, 18, 38, 44, 36, 12, + 24, 32, 28, 17, 1, 19, 55, 43, 31, 33, + 25, 27, 23, 21, 13, 19, 17, 11, 9, 21, + 35, 55, 33, 67, 25, 9, 13, 0, 11, 3, + 10, 3, 5, 5, 4, 5, 11, 37, 84, 76, + 80, 74, 62, 78, 70, 64, 56, 54, 56, 46, + 10, 10, 3, 42, 40, 30, 8, 22, 16, 1, + 7, 13, 21, 27, 53, 29, 65, 120, 118, 110, + 108, 98, 90, 84, 72, 70, 62, 56, 42, 30, + 8, 15, 72, 72, 20, 76, 82, 60, 48, 52, + 44, 36, 42, 30, 22, 6, 2, 11, 45, 20, + 0, 25, 75, 53, 61, 19, 25, 21, 40, 3, + 3, 9, 16, 66, 40, 78, 2, 104, 98, 72, + 50, 28, 20, 33, 51, 77, 15, 84, 56, 38, + 18, 22, 4, 3, 9, 37, 55, 39, 21, 27, + 9, 2, 11, 15, 2, 8, 10, 6, 18, 22, + 4, 102, 98, 84, 70, 60, 48, 12, 5, 35, + }, + + { + /* Context Tables for I, SI Slices :: qp = 18 */ + + 112, 14, 27, 112, 14, 27, 63, 29, 34, 22, + 10, 10, 72, 80, 106, 28, 25, 2, 0, 22, + 0, 2, 9, 11, 41, 20, 62, 65, 89, 101, + 23, 35, 9, 0, 22, 0, 31, 23, 18, 16, + 3, 21, 39, 4, 21, 33, 53, 10, 17, 37, + 8, 19, 27, 49, 8, 9, 11, 21, 1, 8, + 44, 0, 0, 0, 17, 53, 67, 1, 16, 3, + 104, 14, 11, 87, 47, 37, 5, 39, 65, 57, + 73, 21, 65, 53, 59, 79, 75, 81, 77, 28, + 3, 7, 51, 5, 41, 29, 59, 17, 53, 39, + 81, 24, 25, 9, 73, 43, 21, 19, 3, 10, + 0, 13, 18, 12, 13, 4, 0, 3, 4, 23, + 24, 8, 34, 38, 26, 34, 34, 28, 24, 22, + 12, 26, 24, 27, 59, 39, 45, 25, 33, 25, + 5, 21, 23, 17, 17, 15, 17, 9, 31, 59, + 39, 63, 13, 21, 2, 7, 2, 18, 2, 3, + 6, 16, 7, 17, 2, 43, 72, 76, 58, 62, + 76, 60, 30, 64, 56, 16, 34, 28, 8, 2, + 9, 24, 12, 16, 18, 10, 0, 18, 6, 7, + 2, 5, 13, 3, 51, 106, 112, 102, 110, 104, + 98, 98, 98, 92, 76, 74, 66, 46, 40, 0, + 72, 60, 24, 76, 56, 58, 48, 38, 32, 32, + 16, 3, 0, 7, 39, 35, 71, 7, 56, 38, + 26, 8, 10, 1, 11, 17, 33, 4, 84, 56, + 44, 34, 34, 8, 5, 13, 21, 29, 50, 30, + 16, 6, 8, 7, 17, 27, 53, 41, 4, 5, + 5, 11, 15, 35, 41, 63, 11, 72, 40, 26, + 10, 14, 7, 17, 23, 35, 124, 45, 27, 13, + 5, 11, 2, 12, 14, 11, 10, 20, 16, 27, + 15, 15, 16, 18, 22, 16, 36, 42, 36, 10, + 22, 32, 28, 19, 3, 21, 53, 41, 29, 33, + 25, 27, 21, 19, 13, 19, 17, 11, 11, 21, + 35, 55, 31, 67, 25, 9, 13, 2, 11, 3, + 12, 3, 5, 5, 6, 7, 11, 39, 82, 76, + 80, 72, 60, 76, 68, 62, 54, 52, 54, 44, + 8, 8, 5, 38, 38, 26, 4, 20, 14, 3, + 9, 13, 21, 27, 53, 31, 63, 116, 114, 106, + 104, 92, 86, 80, 66, 66, 58, 52, 38, 26, + 6, 17, 68, 68, 18, 72, 78, 56, 44, 48, + 40, 32, 38, 26, 18, 2, 1, 15, 47, 16, + 3, 29, 73, 51, 59, 17, 23, 19, 44, 1, + 1, 9, 18, 68, 42, 80, 4, 102, 94, 66, + 44, 22, 14, 39, 55, 79, 15, 84, 56, 38, + 18, 22, 4, 3, 9, 35, 53, 37, 19, 25, + 7, 4, 9, 13, 4, 10, 12, 6, 20, 22, + 4, 100, 94, 80, 66, 56, 44, 8, 9, 37, + }, + + { + /* Context Tables for I, SI Slices :: qp = 19 */ + + 110, 14, 27, 110, 14, 27, 59, 25, 36, 22, + 8, 6, 68, 78, 106, 28, 21, 2, 2, 24, + 0, 0, 9, 13, 43, 20, 58, 67, 91, 101, + 17, 33, 9, 2, 24, 0, 29, 21, 20, 16, + 3, 19, 37, 2, 23, 33, 53, 10, 17, 37, + 8, 19, 27, 49, 10, 9, 11, 21, 0, 8, + 44, 0, 0, 0, 17, 53, 67, 0, 14, 3, + 104, 14, 11, 85, 45, 37, 3, 37, 61, 53, + 69, 19, 61, 51, 55, 77, 73, 79, 75, 30, + 3, 5, 49, 5, 39, 29, 55, 17, 51, 37, + 79, 24, 25, 7, 69, 41, 19, 19, 3, 10, + 0, 11, 20, 14, 13, 4, 0, 3, 4, 23, + 24, 6, 34, 38, 24, 34, 34, 28, 20, 22, + 10, 22, 22, 29, 57, 37, 43, 23, 31, 25, + 3, 21, 21, 15, 15, 15, 17, 9, 33, 57, + 37, 63, 13, 21, 4, 7, 0, 20, 2, 3, + 4, 18, 7, 19, 2, 45, 70, 74, 58, 60, + 74, 58, 30, 62, 54, 14, 32, 28, 8, 2, + 11, 22, 10, 14, 16, 8, 1, 16, 6, 11, + 0, 5, 15, 5, 51, 104, 108, 100, 106, 100, + 94, 94, 94, 88, 70, 70, 62, 42, 34, 3, + 68, 56, 22, 72, 52, 52, 42, 34, 26, 28, + 12, 9, 1, 11, 41, 37, 71, 7, 58, 38, + 26, 8, 10, 1, 11, 17, 33, 4, 84, 56, + 44, 34, 36, 10, 5, 11, 21, 27, 52, 30, + 18, 6, 10, 5, 15, 27, 51, 39, 6, 3, + 3, 11, 13, 33, 39, 61, 9, 72, 40, 26, + 10, 16, 7, 15, 21, 33, 124, 43, 25, 11, + 5, 11, 2, 12, 14, 11, 10, 20, 16, 27, + 15, 15, 16, 20, 20, 14, 34, 40, 36, 8, + 20, 32, 28, 19, 5, 21, 51, 39, 27, 31, + 23, 25, 21, 17, 13, 19, 17, 9, 11, 21, + 35, 53, 29, 67, 25, 9, 13, 4, 11, 3, + 14, 3, 5, 5, 8, 9, 11, 41, 80, 74, + 78, 70, 58, 74, 66, 60, 52, 50, 52, 42, + 6, 6, 7, 36, 36, 24, 0, 16, 12, 5, + 9, 15, 21, 27, 53, 31, 61, 112, 110, 102, + 100, 86, 82, 76, 62, 62, 54, 48, 32, 22, + 2, 19, 64, 66, 14, 68, 74, 52, 40, 44, + 36, 28, 34, 22, 12, 1, 5, 19, 49, 12, + 7, 31, 71, 49, 57, 15, 21, 17, 46, 0, + 0, 9, 20, 72, 46, 84, 6, 98, 92, 62, + 38, 16, 8, 45, 59, 81, 15, 84, 56, 38, + 18, 22, 4, 3, 9, 35, 51, 35, 17, 23, + 5, 4, 9, 11, 4, 10, 12, 6, 20, 22, + 4, 98, 92, 76, 62, 52, 40, 4, 13, 39, + }, + + { + /* Context Tables for I, SI Slices :: qp = 20 */ + + 106, 14, 27, 106, 14, 27, 57, 23, 36, 22, + 8, 4, 66, 74, 106, 28, 15, 0, 4, 24, + 0, 1, 9, 15, 45, 18, 54, 71, 93, 103, + 11, 31, 9, 4, 24, 0, 29, 19, 20, 14, + 3, 19, 37, 2, 23, 31, 53, 10, 17, 35, + 6, 21, 27, 49, 10, 7, 11, 19, 0, 8, + 44, 0, 0, 0, 15, 55, 67, 0, 12, 3, + 104, 14, 11, 83, 43, 37, 3, 33, 59, 49, + 67, 17, 59, 47, 51, 75, 73, 77, 75, 30, + 3, 3, 47, 5, 39, 27, 51, 17, 51, 37, + 77, 24, 23, 7, 67, 41, 19, 19, 1, 10, + 0, 11, 20, 14, 15, 2, 0, 3, 4, 23, + 24, 6, 32, 38, 24, 34, 34, 26, 16, 22, + 10, 18, 20, 31, 55, 35, 41, 21, 31, 23, + 1, 19, 21, 13, 13, 15, 17, 7, 35, 57, + 35, 63, 13, 21, 4, 7, 1, 22, 2, 3, + 0, 20, 9, 21, 2, 47, 66, 72, 56, 58, + 72, 56, 28, 60, 54, 10, 30, 26, 8, 2, + 11, 20, 10, 12, 16, 6, 3, 14, 4, 15, + 1, 7, 17, 7, 51, 100, 106, 96, 102, 96, + 90, 88, 90, 82, 66, 66, 56, 36, 30, 9, + 64, 54, 18, 66, 48, 48, 38, 28, 22, 22, + 8, 13, 3, 13, 45, 39, 71, 7, 58, 40, + 26, 8, 10, 1, 11, 17, 31, 6, 86, 56, + 44, 34, 38, 10, 3, 11, 19, 27, 54, 32, + 18, 6, 10, 5, 15, 25, 49, 39, 8, 3, + 3, 11, 11, 31, 39, 59, 9, 74, 40, 26, + 10, 18, 5, 15, 19, 31, 124, 43, 25, 11, + 5, 11, 2, 12, 14, 9, 10, 20, 18, 27, + 15, 15, 14, 20, 18, 12, 32, 38, 36, 4, + 18, 30, 28, 21, 7, 23, 51, 39, 25, 31, + 23, 25, 19, 15, 15, 19, 17, 9, 13, 21, + 35, 53, 27, 67, 25, 7, 13, 4, 11, 3, + 16, 3, 7, 5, 10, 9, 13, 43, 78, 74, + 78, 68, 56, 72, 64, 58, 50, 48, 50, 40, + 4, 4, 9, 32, 32, 20, 3, 14, 10, 7, + 11, 15, 21, 27, 53, 33, 61, 106, 104, 98, + 94, 80, 78, 72, 56, 58, 50, 44, 28, 18, + 0, 21, 60, 62, 12, 64, 70, 48, 36, 38, + 32, 24, 30, 18, 8, 5, 9, 23, 51, 8, + 11, 35, 69, 47, 55, 15, 19, 15, 50, 2, + 2, 9, 22, 74, 48, 86, 6, 96, 88, 56, + 32, 10, 2, 51, 63, 83, 15, 84, 56, 38, + 18, 22, 4, 3, 9, 33, 49, 33, 15, 23, + 3, 6, 7, 9, 6, 12, 14, 8, 22, 24, + 4, 94, 88, 72, 58, 48, 36, 0, 17, 41, + }, + + { + /* Context Tables for I, SI Slices :: qp = 21 */ + + 104, 14, 27, 104, 14, 27, 53, 19, 36, 22, + 6, 0, 62, 72, 106, 28, 11, 0, 6, 26, + 0, 1, 7, 17, 47, 18, 50, 75, 95, 103, + 5, 27, 9, 6, 26, 0, 29, 17, 20, 14, + 3, 19, 35, 2, 25, 31, 53, 10, 17, 33, + 6, 21, 27, 49, 12, 7, 9, 19, 2, 8, + 44, 0, 0, 0, 15, 55, 67, 2, 10, 3, + 104, 14, 11, 81, 41, 37, 1, 31, 55, 45, + 63, 15, 55, 45, 47, 73, 71, 75, 73, 32, + 3, 1, 45, 5, 37, 25, 47, 17, 51, 35, + 73, 26, 23, 5, 65, 39, 17, 19, 1, 12, + 2, 9, 22, 16, 15, 2, 0, 3, 4, 23, + 24, 4, 30, 38, 22, 34, 34, 26, 12, 22, + 8, 16, 18, 33, 53, 33, 39, 19, 29, 21, + 0, 17, 19, 11, 11, 15, 17, 7, 37, 55, + 33, 63, 11, 21, 6, 7, 3, 24, 2, 3, + 1, 22, 9, 23, 2, 47, 64, 70, 56, 56, + 70, 54, 26, 60, 54, 8, 28, 26, 8, 2, + 13, 20, 8, 10, 14, 4, 5, 14, 2, 19, + 3, 9, 19, 9, 51, 96, 104, 92, 98, 94, + 86, 84, 86, 78, 62, 62, 52, 32, 26, 13, + 60, 50, 14, 62, 44, 44, 34, 24, 18, 16, + 4, 19, 5, 17, 47, 39, 71, 7, 58, 40, + 26, 8, 10, 1, 11, 17, 31, 6, 86, 56, + 44, 34, 40, 12, 1, 11, 17, 25, 56, 34, + 18, 8, 12, 3, 13, 25, 47, 37, 12, 1, + 3, 9, 9, 29, 37, 57, 7, 74, 42, 26, + 10, 20, 3, 13, 17, 29, 124, 43, 25, 9, + 5, 11, 2, 12, 14, 9, 10, 22, 18, 27, + 15, 15, 12, 22, 16, 10, 30, 38, 36, 2, + 16, 30, 28, 23, 9, 23, 49, 37, 23, 29, + 21, 23, 19, 13, 15, 19, 17, 9, 15, 21, + 35, 51, 25, 67, 25, 7, 13, 6, 11, 3, + 18, 3, 7, 5, 12, 11, 13, 43, 76, 74, + 76, 68, 54, 70, 62, 56, 50, 46, 48, 38, + 2, 2, 11, 30, 30, 18, 7, 12, 8, 9, + 13, 17, 21, 25, 53, 33, 59, 102, 100, 94, + 90, 76, 74, 68, 52, 54, 46, 40, 24, 14, + 3, 23, 56, 58, 10, 60, 66, 44, 32, 34, + 28, 20, 26, 14, 2, 9, 13, 27, 53, 4, + 15, 37, 67, 43, 53, 13, 17, 13, 54, 4, + 4, 9, 24, 78, 52, 90, 8, 92, 84, 52, + 26, 6, 3, 55, 67, 85, 15, 84, 56, 38, + 18, 22, 4, 3, 9, 33, 47, 31, 13, 21, + 1, 8, 7, 7, 8, 14, 16, 8, 22, 24, + 4, 92, 86, 70, 56, 44, 32, 3, 21, 43, + }, + + { + /* Context Tables for I, SI Slices :: qp = 22 */ + + 102, 14, 29, 102, 14, 29, 49, 17, 38, 22, + 6, 1, 60, 70, 106, 28, 7, 0, 6, 28, + 0, 3, 7, 19, 49, 16, 48, 77, 97, 105, + 0, 25, 9, 6, 28, 0, 27, 15, 22, 12, + 1, 17, 33, 0, 25, 31, 53, 10, 15, 33, + 6, 21, 25, 49, 12, 7, 9, 19, 2, 8, + 44, 0, 0, 0, 13, 55, 67, 2, 10, 5, + 104, 14, 11, 79, 41, 37, 1, 29, 51, 41, + 61, 13, 51, 43, 43, 71, 69, 73, 71, 34, + 3, 0, 45, 3, 37, 25, 43, 17, 49, 35, + 71, 26, 23, 5, 61, 39, 15, 17, 1, 12, + 2, 7, 22, 16, 17, 2, 0, 3, 4, 23, + 24, 4, 30, 38, 20, 34, 34, 26, 8, 22, + 8, 12, 16, 35, 53, 31, 39, 19, 27, 21, + 2, 17, 19, 11, 7, 15, 17, 7, 39, 55, + 31, 63, 11, 21, 8, 5, 5, 24, 2, 3, + 3, 24, 11, 25, 2, 49, 60, 68, 54, 56, + 68, 54, 26, 58, 52, 6, 26, 26, 8, 2, + 13, 18, 6, 8, 12, 4, 7, 12, 2, 23, + 5, 9, 21, 11, 51, 94, 100, 90, 94, 90, + 82, 80, 82, 72, 56, 58, 46, 26, 20, 19, + 56, 46, 12, 58, 38, 38, 28, 20, 12, 12, + 1, 23, 7, 21, 49, 41, 71, 5, 60, 40, + 28, 8, 10, 1, 11, 17, 29, 6, 86, 58, + 44, 34, 42, 12, 1, 9, 17, 23, 58, 34, + 20, 8, 14, 3, 13, 23, 45, 35, 14, 0, + 1, 9, 9, 29, 35, 53, 7, 74, 42, 26, + 12, 20, 3, 11, 17, 27, 124, 41, 23, 9, + 5, 11, 2, 12, 14, 9, 12, 22, 20, 27, + 15, 17, 12, 22, 14, 8, 28, 36, 36, 0, + 14, 30, 28, 23, 11, 25, 47, 35, 21, 29, + 21, 23, 17, 13, 15, 17, 15, 7, 15, 21, + 35, 51, 23, 67, 25, 7, 13, 8, 11, 3, + 20, 5, 7, 5, 12, 13, 13, 45, 74, 72, + 76, 66, 52, 70, 62, 54, 48, 46, 46, 36, + 0, 0, 13, 26, 28, 14, 11, 8, 4, 11, + 13, 17, 21, 25, 51, 35, 57, 98, 96, 90, + 86, 70, 70, 64, 46, 50, 40, 36, 18, 10, + 5, 23, 54, 56, 6, 56, 60, 40, 28, 30, + 24, 16, 20, 10, 1, 13, 17, 29, 57, 0, + 19, 41, 65, 41, 51, 11, 15, 11, 56, 6, + 6, 9, 26, 80, 54, 92, 10, 90, 82, 46, + 20, 0, 7, 61, 71, 89, 13, 84, 56, 38, + 18, 22, 4, 3, 9, 31, 45, 31, 11, 19, + 0, 8, 5, 5, 8, 14, 16, 8, 24, 24, + 4, 90, 82, 66, 52, 42, 30, 9, 23, 45, + }, + + { + /* Context Tables for I, SI Slices :: qp = 23 */ + + 100, 14, 29, 100, 14, 29, 45, 13, 38, 22, + 4, 5, 56, 66, 106, 28, 1, 1, 8, 28, + 0, 5, 7, 21, 51, 16, 44, 81, 99, 105, + 6, 23, 9, 8, 28, 0, 27, 13, 22, 12, + 1, 17, 33, 0, 27, 29, 53, 10, 15, 31, + 4, 21, 25, 49, 14, 5, 9, 17, 4, 8, + 44, 0, 0, 0, 13, 55, 67, 4, 8, 5, + 104, 14, 11, 77, 39, 37, 0, 25, 49, 37, + 57, 11, 49, 39, 39, 69, 67, 71, 71, 36, + 3, 2, 43, 3, 35, 23, 39, 17, 49, 33, + 69, 26, 21, 3, 59, 37, 15, 17, 0, 12, + 2, 5, 24, 18, 17, 0, 0, 3, 4, 23, + 24, 2, 28, 38, 20, 34, 34, 24, 4, 22, + 6, 8, 14, 37, 51, 29, 37, 17, 25, 19, + 4, 15, 17, 9, 5, 15, 17, 5, 41, 53, + 29, 63, 11, 21, 8, 5, 7, 26, 2, 3, + 7, 26, 11, 27, 2, 51, 58, 66, 54, 54, + 66, 52, 24, 56, 52, 4, 24, 26, 8, 2, + 15, 16, 6, 6, 12, 2, 9, 10, 0, 27, + 7, 11, 23, 13, 51, 90, 98, 86, 90, 86, + 78, 74, 78, 68, 52, 54, 42, 22, 16, 23, + 52, 44, 8, 52, 34, 34, 24, 14, 8, 6, + 5, 29, 9, 23, 53, 43, 71, 5, 60, 42, + 28, 8, 10, 1, 11, 17, 29, 8, 88, 58, + 44, 34, 44, 14, 0, 9, 15, 21, 60, 36, + 20, 8, 16, 1, 11, 23, 43, 35, 16, 0, + 1, 9, 7, 27, 35, 51, 5, 76, 42, 26, + 12, 22, 1, 9, 15, 25, 124, 41, 23, 7, + 5, 11, 2, 12, 14, 7, 12, 22, 20, 27, + 15, 17, 10, 24, 12, 6, 26, 34, 36, 1, + 12, 28, 28, 25, 13, 25, 45, 35, 19, 27, + 19, 21, 17, 11, 15, 17, 15, 7, 17, 21, + 35, 49, 21, 67, 25, 5, 13, 8, 11, 3, + 22, 5, 7, 5, 14, 13, 15, 47, 72, 72, + 74, 64, 50, 68, 60, 52, 46, 44, 44, 34, + 1, 1, 15, 24, 24, 12, 15, 6, 2, 13, + 15, 19, 21, 25, 51, 35, 55, 94, 92, 86, + 80, 64, 66, 60, 42, 46, 36, 32, 14, 6, + 9, 25, 50, 52, 4, 52, 56, 36, 24, 26, + 20, 12, 16, 6, 7, 17, 21, 33, 59, 3, + 23, 43, 63, 39, 49, 11, 13, 9, 60, 8, + 8, 9, 28, 84, 58, 96, 10, 86, 78, 42, + 14, 5, 13, 67, 75, 91, 13, 84, 56, 38, + 18, 22, 4, 3, 9, 31, 43, 29, 9, 17, + 2, 10, 5, 3, 10, 16, 18, 10, 24, 26, + 4, 86, 80, 62, 48, 38, 26, 13, 27, 47, + }, + + { + /* Context Tables for I, SI Slices :: qp = 24 */ + + 96, 12, 29, 96, 12, 29, 43, 11, 38, 22, + 4, 7, 54, 64, 106, 28, 2, 1, 10, 30, + 0, 7, 7, 23, 55, 14, 40, 85, 101, 107, + 10, 21, 9, 10, 30, 0, 27, 11, 22, 10, + 1, 17, 31, 1, 27, 29, 55, 10, 15, 31, + 4, 23, 25, 49, 14, 5, 9, 17, 4, 8, + 44, 0, 0, 0, 11, 57, 67, 4, 6, 5, + 104, 14, 11, 75, 37, 37, 0, 23, 45, 33, + 55, 9, 45, 37, 37, 67, 67, 69, 69, 36, + 3, 4, 41, 3, 35, 23, 37, 17, 49, 33, + 67, 26, 21, 3, 57, 37, 13, 17, 0, 12, + 2, 5, 24, 18, 19, 0, 0, 3, 4, 23, + 24, 2, 26, 38, 18, 34, 34, 24, 0, 22, + 6, 4, 12, 39, 49, 27, 35, 15, 25, 19, + 6, 15, 17, 7, 3, 15, 17, 5, 45, 53, + 27, 63, 11, 23, 10, 5, 9, 28, 2, 3, + 9, 28, 13, 29, 2, 53, 54, 64, 52, 52, + 64, 50, 22, 54, 50, 0, 22, 24, 8, 2, + 15, 14, 4, 4, 10, 0, 11, 8, 1, 31, + 11, 13, 25, 15, 51, 86, 94, 82, 86, 82, + 74, 70, 74, 62, 46, 50, 36, 16, 10, 29, + 48, 40, 4, 48, 30, 28, 18, 10, 2, 0, + 9, 33, 11, 27, 55, 45, 73, 5, 60, 42, + 28, 8, 10, 1, 11, 17, 27, 8, 88, 58, + 44, 34, 46, 14, 0, 9, 15, 21, 62, 36, + 20, 8, 16, 1, 11, 21, 41, 33, 18, 2, + 1, 9, 5, 25, 33, 49, 5, 76, 42, 26, + 12, 24, 1, 9, 13, 25, 124, 41, 23, 7, + 5, 11, 2, 12, 14, 7, 12, 22, 22, 27, + 15, 17, 8, 24, 10, 4, 24, 32, 36, 5, + 8, 28, 28, 27, 15, 27, 45, 33, 17, 27, + 19, 21, 15, 9, 17, 17, 15, 7, 19, 21, + 35, 49, 21, 69, 25, 5, 13, 10, 11, 3, + 22, 5, 9, 5, 16, 15, 15, 49, 70, 70, + 74, 62, 46, 66, 58, 50, 44, 42, 42, 32, + 3, 3, 17, 20, 22, 8, 19, 2, 0, 15, + 17, 19, 21, 25, 51, 37, 55, 88, 86, 82, + 76, 58, 60, 54, 36, 42, 32, 28, 8, 2, + 11, 27, 46, 48, 0, 48, 52, 30, 18, 20, + 14, 8, 12, 0, 11, 21, 27, 37, 61, 7, + 27, 47, 61, 37, 47, 9, 11, 9, 62, 10, + 8, 9, 28, 86, 60, 98, 12, 84, 74, 36, + 8, 11, 19, 73, 79, 93, 13, 84, 56, 38, + 18, 22, 4, 3, 9, 29, 43, 27, 7, 17, + 4, 10, 3, 3, 10, 16, 18, 10, 26, 26, + 2, 84, 76, 58, 44, 34, 22, 17, 31, 49, + }, + + { + /* Context Tables for I, SI Slices :: qp = 25 */ + + 94, 12, 29, 94, 12, 29, 39, 9, 40, 22, + 4, 9, 52, 62, 106, 28, 6, 1, 12, 32, + 0, 7, 5, 25, 57, 12, 36, 87, 103, 109, + 16, 17, 9, 12, 32, 0, 25, 9, 24, 8, + 1, 15, 29, 1, 27, 29, 55, 10, 15, 29, + 4, 23, 25, 49, 14, 5, 7, 17, 4, 8, + 44, 0, 0, 0, 9, 57, 67, 4, 4, 5, + 104, 14, 11, 73, 35, 37, 0, 21, 41, 29, + 51, 5, 41, 35, 33, 65, 65, 67, 67, 38, + 1, 6, 39, 3, 33, 21, 33, 17, 47, 33, + 63, 28, 21, 3, 53, 37, 11, 17, 0, 14, + 4, 3, 24, 18, 19, 0, 2, 3, 4, 21, + 24, 2, 26, 38, 16, 34, 34, 24, 3, 22, + 6, 2, 10, 39, 47, 25, 33, 13, 23, 17, + 8, 13, 17, 5, 1, 15, 17, 5, 47, 51, + 25, 63, 9, 23, 12, 5, 11, 30, 2, 3, + 11, 30, 15, 31, 2, 53, 52, 62, 52, 50, + 62, 48, 22, 54, 50, 1, 20, 24, 8, 2, + 15, 14, 2, 2, 8, 1, 13, 8, 1, 33, + 13, 13, 27, 17, 51, 84, 92, 80, 84, 80, + 70, 66, 70, 58, 42, 46, 32, 10, 6, 35, + 46, 36, 2, 44, 26, 24, 14, 6, 1, 3, + 13, 37, 11, 31, 57, 45, 73, 5, 62, 42, + 28, 10, 10, 1, 9, 17, 25, 8, 88, 58, + 46, 34, 48, 16, 2, 7, 13, 19, 64, 38, + 22, 10, 18, 0, 11, 19, 39, 31, 22, 4, + 0, 7, 3, 23, 31, 47, 3, 76, 44, 28, + 12, 26, 0, 7, 11, 23, 124, 39, 21, 5, + 5, 11, 2, 14, 16, 7, 12, 24, 24, 27, + 15, 17, 8, 24, 8, 4, 22, 32, 36, 7, + 6, 28, 28, 27, 17, 29, 43, 31, 15, 27, + 17, 21, 13, 7, 17, 17, 15, 5, 19, 21, + 35, 49, 19, 69, 25, 5, 13, 12, 11, 3, + 24, 5, 9, 5, 18, 17, 15, 49, 68, 70, + 74, 62, 44, 64, 56, 48, 44, 40, 40, 32, + 3, 5, 19, 18, 20, 4, 23, 0, 1, 15, + 17, 19, 19, 23, 51, 39, 53, 84, 82, 78, + 72, 54, 56, 50, 32, 38, 28, 24, 4, 1, + 13, 29, 42, 46, 1, 44, 48, 26, 14, 16, + 10, 4, 8, 3, 15, 23, 31, 41, 63, 11, + 31, 51, 59, 33, 43, 7, 9, 7, 66, 12, + 10, 9, 30, 90, 62, 102, 14, 82, 72, 32, + 2, 15, 25, 77, 83, 95, 13, 84, 56, 38, + 18, 24, 4, 3, 9, 27, 41, 25, 5, 15, + 8, 12, 1, 1, 12, 18, 20, 10, 28, 26, + 2, 82, 74, 56, 42, 30, 18, 21, 35, 49, + }, + + { + /* Context Tables for I, SI Slices :: qp = 26 */ + + 92, 12, 29, 92, 12, 29, 35, 5, 40, 22, + 2, 13, 48, 58, 106, 28, 12, 3, 14, 32, + 0, 9, 5, 27, 59, 12, 32, 91, 105, 109, + 22, 15, 9, 14, 32, 0, 25, 7, 24, 8, + 1, 15, 29, 1, 29, 27, 55, 10, 15, 27, + 2, 23, 25, 49, 16, 3, 7, 15, 6, 8, + 44, 0, 0, 0, 9, 57, 67, 6, 2, 5, + 104, 14, 11, 71, 33, 37, 2, 17, 39, 25, + 49, 3, 39, 31, 29, 63, 63, 65, 67, 40, + 1, 8, 37, 3, 33, 19, 29, 17, 47, 31, + 61, 28, 19, 1, 51, 35, 11, 17, 2, 14, + 4, 1, 26, 20, 21, 1, 2, 3, 4, 21, + 24, 0, 24, 38, 16, 34, 34, 22, 7, 22, + 4, 1, 8, 41, 45, 23, 31, 11, 21, 15, + 10, 11, 15, 3, 0, 15, 17, 3, 49, 51, + 23, 63, 9, 23, 12, 5, 13, 32, 2, 3, + 15, 32, 15, 33, 2, 55, 48, 60, 50, 48, + 60, 46, 20, 52, 50, 3, 18, 24, 8, 2, + 17, 12, 2, 0, 8, 3, 15, 6, 3, 37, + 15, 15, 29, 19, 51, 80, 90, 76, 80, 76, + 66, 60, 66, 52, 38, 42, 26, 6, 2, 39, + 42, 34, 1, 38, 22, 20, 10, 0, 5, 9, + 17, 43, 13, 33, 61, 47, 73, 5, 62, 44, + 28, 10, 10, 1, 9, 17, 25, 10, 90, 58, + 46, 34, 50, 16, 4, 7, 11, 17, 66, 40, + 22, 10, 20, 0, 9, 19, 37, 31, 24, 4, + 0, 7, 1, 21, 31, 45, 3, 78, 44, 28, + 12, 28, 2, 5, 9, 21, 124, 39, 21, 5, + 5, 11, 2, 14, 16, 5, 12, 24, 24, 27, + 15, 17, 6, 26, 6, 2, 20, 30, 36, 9, + 4, 26, 28, 29, 19, 29, 41, 31, 13, 25, + 17, 19, 13, 5, 17, 17, 15, 5, 21, 21, + 35, 47, 17, 69, 25, 3, 13, 12, 11, 3, + 26, 5, 9, 5, 20, 17, 17, 51, 66, 70, + 72, 60, 42, 62, 54, 46, 42, 38, 38, 30, + 5, 7, 21, 14, 16, 2, 27, 1, 3, 17, + 19, 21, 19, 23, 51, 39, 51, 80, 78, 74, + 66, 48, 52, 46, 26, 34, 24, 20, 0, 5, + 17, 31, 38, 42, 3, 40, 44, 22, 10, 12, + 6, 0, 4, 7, 21, 27, 35, 45, 65, 15, + 35, 53, 57, 31, 41, 7, 7, 5, 70, 14, + 12, 9, 32, 92, 66, 104, 14, 78, 68, 26, + 3, 21, 31, 83, 87, 97, 13, 84, 56, 38, + 18, 24, 4, 3, 9, 27, 39, 23, 3, 13, + 10, 14, 1, 0, 14, 20, 22, 12, 28, 28, + 2, 78, 70, 52, 38, 26, 14, 25, 39, 51, + }, + + { + /* Context Tables for I, SI Slices :: qp = 27 */ + + 90, 12, 31, 90, 12, 31, 31, 3, 42, 22, + 2, 15, 46, 56, 106, 28, 16, 3, 14, 34, + 0, 11, 5, 29, 61, 10, 30, 93, 107, 111, + 28, 13, 9, 14, 34, 0, 23, 5, 26, 6, + 0, 13, 27, 3, 29, 27, 55, 10, 13, 27, + 2, 23, 23, 49, 16, 3, 7, 15, 6, 8, + 44, 0, 0, 0, 7, 57, 67, 6, 2, 7, + 104, 14, 11, 69, 33, 37, 2, 15, 35, 21, + 45, 1, 35, 29, 25, 61, 61, 63, 65, 42, + 1, 10, 37, 1, 31, 19, 25, 17, 45, 31, + 59, 28, 19, 1, 47, 35, 9, 15, 2, 14, + 4, 0, 26, 20, 21, 1, 2, 3, 4, 21, + 24, 0, 24, 38, 14, 34, 34, 22, 11, 22, + 4, 5, 6, 43, 45, 21, 31, 11, 19, 15, + 12, 11, 15, 3, 4, 15, 17, 3, 51, 49, + 21, 63, 9, 23, 14, 3, 15, 32, 2, 3, + 17, 34, 17, 35, 2, 57, 46, 58, 50, 48, + 58, 46, 20, 50, 48, 5, 16, 24, 8, 2, + 17, 10, 0, 1, 6, 3, 17, 4, 3, 41, + 17, 15, 31, 21, 51, 78, 86, 74, 76, 72, + 62, 56, 62, 48, 32, 38, 22, 0, 3, 45, + 38, 30, 3, 34, 16, 14, 4, 3, 11, 13, + 23, 47, 15, 37, 63, 49, 73, 3, 64, 44, + 30, 10, 10, 1, 9, 17, 23, 10, 90, 60, + 46, 34, 52, 18, 4, 5, 11, 15, 68, 40, + 24, 10, 22, 2, 9, 17, 35, 29, 26, 6, + 2, 7, 1, 21, 29, 41, 1, 78, 44, 28, + 14, 28, 2, 3, 9, 19, 124, 37, 19, 3, + 5, 11, 2, 14, 16, 5, 14, 24, 26, 27, + 15, 19, 6, 26, 4, 0, 18, 28, 36, 11, + 2, 26, 28, 29, 21, 31, 39, 29, 11, 25, + 15, 19, 11, 5, 17, 15, 13, 3, 21, 21, + 35, 47, 15, 69, 25, 3, 13, 14, 11, 3, + 28, 7, 9, 5, 20, 19, 17, 53, 64, 68, + 72, 58, 40, 62, 54, 44, 40, 38, 36, 28, + 7, 9, 23, 12, 14, 1, 31, 5, 7, 19, + 19, 21, 19, 23, 49, 41, 49, 76, 74, 70, + 62, 42, 48, 42, 22, 30, 18, 16, 5, 9, + 19, 31, 36, 40, 7, 36, 38, 18, 6, 8, + 2, 3, 1, 11, 25, 31, 39, 47, 69, 19, + 39, 57, 55, 29, 39, 5, 5, 3, 72, 16, + 14, 9, 34, 96, 68, 108, 16, 76, 66, 22, + 9, 27, 35, 89, 91, 101, 11, 84, 56, 38, + 18, 24, 4, 3, 9, 25, 37, 23, 1, 11, + 12, 14, 0, 2, 14, 20, 22, 12, 30, 28, + 2, 76, 68, 48, 34, 24, 12, 31, 41, 53, + }, + + { + /* Context Tables for I, SI Slices :: qp = 28 */ + + 86, 12, 31, 86, 12, 31, 29, 0, 42, 22, + 0, 19, 42, 54, 106, 28, 20, 3, 16, 36, + 0, 13, 5, 31, 63, 10, 26, 97, 109, 111, + 34, 11, 9, 16, 36, 0, 23, 3, 26, 6, + 0, 13, 25, 3, 31, 27, 55, 10, 13, 25, + 2, 25, 23, 49, 18, 3, 7, 15, 8, 8, + 44, 0, 0, 0, 7, 59, 67, 8, 0, 7, + 104, 14, 11, 67, 31, 37, 4, 13, 31, 17, + 43, 0, 31, 27, 21, 59, 61, 61, 63, 42, + 1, 12, 35, 1, 31, 17, 21, 17, 45, 29, + 57, 28, 19, 0, 45, 33, 7, 15, 2, 14, + 4, 0, 28, 22, 23, 1, 2, 3, 4, 21, + 24, 1, 22, 38, 12, 34, 34, 22, 15, 22, + 2, 9, 4, 45, 43, 19, 29, 9, 19, 13, + 14, 9, 13, 1, 6, 15, 17, 3, 53, 49, + 19, 63, 9, 23, 16, 3, 17, 34, 2, 3, + 19, 36, 17, 37, 2, 59, 42, 56, 48, 46, + 56, 44, 18, 48, 48, 9, 14, 22, 8, 2, + 19, 8, 1, 3, 4, 5, 19, 2, 5, 45, + 19, 17, 33, 23, 51, 74, 84, 70, 72, 68, + 58, 52, 58, 42, 28, 34, 16, 3, 7, 49, + 34, 26, 7, 30, 12, 10, 0, 7, 15, 19, + 27, 53, 17, 41, 65, 51, 73, 3, 64, 44, + 30, 10, 10, 1, 9, 17, 23, 10, 90, 60, + 46, 34, 54, 18, 6, 5, 9, 15, 70, 42, + 24, 10, 22, 2, 7, 17, 33, 27, 28, 8, + 2, 7, 0, 19, 27, 39, 1, 78, 44, 28, + 14, 30, 4, 3, 7, 17, 124, 37, 19, 3, + 5, 11, 2, 14, 16, 5, 14, 24, 26, 27, + 15, 19, 4, 28, 2, 1, 16, 26, 36, 15, + 0, 26, 28, 31, 23, 31, 39, 27, 9, 23, + 15, 17, 11, 3, 19, 15, 13, 3, 23, 21, + 35, 45, 13, 69, 25, 3, 13, 16, 11, 3, + 30, 7, 11, 5, 22, 21, 17, 55, 62, 68, + 70, 56, 38, 60, 52, 42, 38, 36, 34, 26, + 9, 11, 25, 8, 12, 3, 35, 7, 9, 21, + 21, 23, 19, 23, 49, 41, 49, 70, 68, 66, + 58, 36, 44, 38, 16, 26, 14, 12, 9, 13, + 23, 33, 32, 36, 9, 32, 34, 14, 2, 2, + 1, 7, 5, 15, 31, 35, 43, 51, 71, 23, + 43, 59, 53, 27, 37, 3, 3, 1, 76, 18, + 16, 9, 36, 98, 72, 110, 18, 72, 62, 16, + 15, 33, 41, 95, 95, 103, 11, 84, 56, 38, + 18, 24, 4, 3, 9, 25, 35, 21, 0, 11, + 14, 16, 0, 4, 16, 22, 24, 12, 30, 28, + 2, 74, 64, 44, 30, 20, 8, 35, 45, 55, + }, + + { + /* Context Tables for I, SI Slices :: qp = 29 */ + + 84, 12, 31, 84, 12, 31, 25, 2, 42, 22, + 0, 21, 40, 50, 106, 28, 26, 5, 18, 36, + 0, 13, 3, 33, 65, 8, 22, 101, 111, 113, + 40, 7, 9, 18, 36, 0, 23, 1, 26, 4, + 0, 13, 25, 3, 31, 25, 55, 10, 13, 23, + 0, 25, 23, 49, 18, 1, 5, 13, 8, 8, + 44, 0, 0, 0, 5, 59, 67, 8, 1, 7, + 104, 14, 11, 65, 29, 37, 4, 9, 29, 13, + 39, 2, 29, 23, 17, 57, 59, 59, 63, 44, + 1, 14, 33, 1, 29, 15, 17, 17, 45, 29, + 53, 30, 17, 0, 43, 33, 7, 15, 4, 16, + 6, 2, 28, 22, 23, 3, 2, 3, 4, 21, + 24, 1, 20, 38, 12, 34, 34, 20, 19, 22, + 2, 11, 2, 47, 41, 17, 27, 7, 17, 11, + 16, 7, 13, 0, 8, 15, 17, 1, 55, 47, + 17, 63, 7, 23, 16, 3, 19, 36, 2, 3, + 23, 38, 19, 39, 2, 59, 40, 54, 48, 44, + 54, 42, 16, 48, 48, 11, 12, 22, 8, 2, + 19, 8, 1, 5, 4, 7, 21, 2, 7, 49, + 21, 19, 35, 25, 51, 70, 82, 66, 68, 66, + 54, 46, 54, 38, 24, 30, 12, 9, 11, 55, + 30, 24, 11, 24, 8, 6, 3, 13, 19, 25, + 31, 57, 19, 43, 69, 51, 73, 3, 64, 46, + 30, 10, 10, 1, 9, 17, 21, 12, 92, 60, + 46, 34, 56, 20, 8, 5, 7, 13, 72, 44, + 24, 12, 24, 4, 7, 15, 31, 27, 32, 8, + 2, 5, 2, 17, 27, 37, 0, 80, 46, 28, + 14, 32, 6, 1, 5, 15, 124, 37, 19, 1, + 5, 11, 2, 14, 16, 3, 14, 26, 28, 27, + 15, 19, 2, 28, 0, 3, 14, 26, 36, 17, + 1, 24, 28, 33, 25, 33, 37, 27, 7, 23, + 13, 17, 9, 1, 19, 15, 13, 3, 25, 21, + 35, 45, 11, 69, 25, 1, 13, 16, 11, 3, + 32, 7, 11, 5, 24, 21, 19, 55, 60, 68, + 70, 56, 36, 58, 50, 40, 38, 34, 32, 24, + 11, 13, 27, 6, 8, 7, 39, 9, 11, 23, + 23, 23, 19, 21, 49, 43, 47, 66, 64, 62, + 52, 32, 40, 34, 12, 22, 10, 8, 13, 17, + 25, 35, 28, 32, 11, 28, 30, 10, 1, 1, + 5, 11, 9, 19, 35, 39, 47, 55, 73, 27, + 47, 63, 51, 23, 35, 3, 1, 0, 80, 20, + 18, 9, 38, 102, 74, 114, 18, 70, 58, 12, + 21, 37, 47, 99, 99, 105, 11, 84, 56, 38, + 18, 24, 4, 3, 9, 23, 33, 19, 2, 9, + 16, 18, 2, 6, 18, 24, 26, 14, 32, 30, + 2, 70, 62, 42, 28, 16, 4, 39, 49, 57, + }, + + { + /* Context Tables for I, SI Slices :: qp = 30 */ + + 82, 12, 31, 82, 12, 31, 21, 6, 44, 22, + 1, 25, 36, 48, 106, 28, 30, 5, 20, 38, + 0, 15, 3, 35, 67, 8, 18, 103, 113, 113, + 46, 5, 9, 20, 38, 0, 21, 0, 28, 4, + 0, 11, 23, 5, 33, 25, 55, 10, 13, 23, + 0, 25, 23, 49, 20, 1, 5, 13, 10, 8, + 44, 0, 0, 0, 5, 59, 67, 10, 3, 7, + 104, 14, 11, 63, 27, 37, 6, 7, 25, 9, + 37, 4, 25, 21, 13, 55, 57, 57, 61, 46, + 1, 16, 31, 1, 29, 15, 13, 17, 43, 27, + 51, 30, 17, 2, 39, 31, 5, 15, 4, 16, + 6, 4, 30, 24, 25, 3, 2, 3, 4, 21, + 24, 3, 20, 38, 10, 34, 34, 20, 23, 22, + 0, 15, 0, 49, 39, 15, 25, 5, 15, 11, + 18, 7, 11, 2, 10, 15, 17, 1, 57, 47, + 15, 63, 7, 23, 18, 3, 21, 38, 2, 3, + 25, 40, 19, 41, 2, 61, 36, 52, 46, 42, + 52, 40, 16, 46, 46, 13, 10, 22, 8, 2, + 21, 6, 3, 7, 2, 9, 23, 0, 7, 53, + 23, 19, 37, 27, 51, 68, 78, 64, 64, 62, + 50, 42, 50, 32, 18, 26, 6, 13, 17, 59, + 26, 20, 13, 20, 4, 0, 9, 17, 25, 29, + 35, 63, 21, 47, 71, 53, 73, 3, 66, 46, + 30, 10, 10, 1, 9, 17, 21, 12, 92, 60, + 46, 34, 58, 20, 8, 3, 7, 11, 74, 44, + 26, 12, 26, 4, 5, 15, 29, 25, 34, 10, + 4, 5, 4, 15, 25, 35, 0, 80, 46, 28, + 14, 34, 6, 0, 3, 13, 124, 35, 17, 1, + 5, 11, 2, 14, 16, 3, 14, 26, 28, 27, + 15, 19, 2, 30, 1, 5, 12, 24, 36, 19, + 3, 24, 28, 33, 27, 33, 35, 25, 5, 21, + 13, 15, 9, 0, 19, 15, 13, 1, 25, 21, + 35, 43, 9, 69, 25, 1, 13, 18, 11, 3, + 34, 7, 11, 5, 26, 23, 19, 57, 58, 66, + 68, 54, 34, 56, 48, 38, 36, 32, 30, 22, + 13, 15, 29, 2, 6, 9, 43, 13, 13, 25, + 23, 25, 19, 21, 49, 43, 45, 62, 60, 58, + 48, 26, 36, 30, 6, 18, 6, 4, 19, 21, + 29, 37, 24, 30, 15, 24, 26, 6, 5, 5, + 9, 15, 13, 23, 41, 43, 51, 59, 75, 31, + 51, 65, 49, 21, 33, 1, 0, 2, 82, 22, + 20, 9, 40, 104, 78, 116, 20, 66, 56, 6, + 27, 43, 53, 105, 103, 107, 11, 84, 56, 38, + 18, 24, 4, 3, 9, 23, 31, 17, 4, 7, + 18, 18, 2, 8, 18, 24, 26, 14, 32, 30, + 2, 68, 58, 38, 24, 12, 0, 43, 53, 59, + }, + + { + /* Context Tables for I, SI Slices :: qp = 31 */ + + 80, 12, 31, 80, 12, 31, 17, 8, 44, 22, + 1, 27, 34, 46, 106, 28, 34, 5, 22, 40, + 0, 17, 3, 37, 69, 6, 14, 107, 115, 115, + 52, 3, 9, 22, 40, 0, 21, 2, 28, 2, + 0, 11, 21, 5, 33, 25, 55, 10, 13, 21, + 0, 25, 23, 49, 20, 1, 5, 13, 10, 8, + 44, 0, 0, 0, 3, 59, 67, 10, 5, 7, + 104, 14, 11, 61, 25, 37, 6, 5, 21, 5, + 33, 6, 21, 19, 9, 53, 55, 55, 59, 48, + 1, 18, 29, 1, 27, 13, 9, 17, 43, 27, + 49, 30, 17, 2, 37, 31, 3, 15, 4, 16, + 6, 6, 30, 24, 25, 3, 2, 3, 4, 21, + 24, 3, 18, 38, 8, 34, 34, 20, 27, 22, + 0, 19, 1, 51, 37, 13, 23, 3, 13, 9, + 20, 5, 11, 4, 12, 15, 17, 1, 59, 45, + 13, 63, 7, 23, 20, 3, 23, 40, 2, 3, + 27, 42, 21, 43, 2, 63, 34, 50, 46, 40, + 50, 38, 14, 44, 46, 15, 8, 22, 8, 2, + 21, 4, 5, 9, 0, 11, 25, 1, 9, 57, + 25, 21, 39, 29, 51, 64, 76, 60, 60, 58, + 46, 38, 46, 28, 14, 22, 2, 19, 21, 65, + 22, 16, 17, 16, 0, 3, 13, 21, 29, 35, + 39, 67, 23, 51, 73, 55, 73, 3, 66, 46, + 30, 10, 10, 1, 9, 17, 19, 12, 92, 60, + 46, 34, 60, 22, 10, 3, 5, 9, 76, 46, + 26, 12, 28, 6, 5, 13, 27, 23, 36, 12, + 4, 5, 6, 13, 23, 33, 2, 80, 46, 28, + 14, 36, 8, 2, 1, 11, 124, 35, 17, 0, + 5, 11, 2, 14, 16, 3, 14, 26, 30, 27, + 15, 19, 0, 30, 3, 7, 10, 22, 36, 21, + 5, 24, 28, 35, 29, 35, 33, 23, 3, 21, + 11, 15, 7, 2, 19, 15, 13, 1, 27, 21, + 35, 43, 7, 69, 25, 1, 13, 20, 11, 3, + 36, 7, 11, 5, 28, 25, 19, 59, 56, 66, + 68, 52, 32, 54, 46, 36, 34, 30, 28, 20, + 15, 17, 31, 0, 4, 13, 47, 15, 15, 27, + 25, 25, 19, 21, 49, 45, 43, 58, 56, 54, + 44, 20, 32, 26, 2, 14, 2, 0, 23, 25, + 31, 39, 20, 26, 17, 20, 22, 2, 9, 9, + 13, 19, 17, 27, 45, 47, 55, 63, 77, 35, + 55, 69, 47, 19, 31, 0, 2, 4, 86, 24, + 22, 9, 42, 108, 80, 120, 22, 64, 52, 2, + 33, 49, 59, 111, 107, 109, 11, 84, 56, 38, + 18, 24, 4, 3, 9, 21, 29, 15, 6, 5, + 20, 20, 4, 10, 20, 26, 28, 14, 34, 30, + 2, 66, 56, 34, 20, 8, 3, 47, 57, 61, + }, + + { + /* Context Tables for I, SI Slices :: qp = 32 */ + + 76, 10, 33, 76, 10, 33, 15, 10, 44, 22, + 3, 31, 30, 42, 104, 28, 38, 7, 22, 40, + 1, 19, 3, 41, 73, 4, 10, 111, 117, 117, + 56, 1, 11, 22, 40, 1, 21, 4, 28, 0, + 0, 11, 21, 7, 35, 25, 57, 10, 13, 21, + 1, 27, 23, 49, 20, 1, 5, 13, 10, 6, + 44, 0, 0, 0, 3, 61, 67, 10, 7, 9, + 104, 12, 11, 59, 25, 37, 6, 3, 19, 3, + 31, 8, 19, 17, 7, 51, 55, 53, 59, 48, + 1, 18, 29, 1, 27, 13, 7, 17, 43, 27, + 47, 30, 17, 2, 35, 31, 3, 15, 4, 16, + 6, 6, 30, 24, 27, 5, 2, 5, 4, 21, + 22, 5, 16, 38, 6, 32, 34, 18, 31, 20, + 1, 23, 3, 53, 37, 13, 23, 3, 13, 9, + 22, 5, 11, 4, 14, 17, 17, 1, 63, 45, + 13, 63, 7, 25, 20, 3, 25, 40, 2, 3, + 31, 42, 23, 45, 2, 65, 30, 48, 44, 38, + 48, 36, 12, 42, 44, 19, 6, 20, 6, 2, + 23, 2, 7, 11, 1, 13, 27, 3, 11, 61, + 29, 23, 43, 33, 51, 60, 72, 56, 56, 54, + 40, 32, 40, 22, 8, 16, 3, 25, 27, 71, + 18, 12, 21, 10, 5, 9, 19, 27, 35, 41, + 45, 73, 25, 55, 77, 57, 75, 3, 66, 46, + 30, 10, 10, 3, 9, 17, 19, 12, 92, 60, + 46, 34, 62, 22, 10, 3, 5, 9, 76, 46, + 26, 12, 28, 6, 5, 13, 25, 23, 38, 12, + 4, 5, 6, 13, 23, 31, 2, 80, 46, 28, + 14, 36, 8, 2, 1, 11, 124, 35, 17, 0, + 5, 11, 2, 14, 16, 3, 14, 26, 30, 27, + 15, 21, 1, 30, 5, 9, 8, 20, 34, 25, + 9, 22, 26, 37, 33, 37, 33, 23, 1, 21, + 11, 15, 7, 2, 21, 15, 13, 1, 29, 21, + 35, 43, 7, 71, 25, 1, 13, 20, 13, 3, + 36, 9, 13, 5, 28, 27, 21, 61, 54, 64, + 66, 50, 28, 52, 44, 34, 32, 28, 26, 18, + 17, 21, 35, 3, 0, 17, 51, 19, 19, 29, + 27, 27, 19, 21, 49, 47, 43, 52, 50, 50, + 38, 14, 26, 20, 3, 8, 3, 5, 29, 31, + 35, 41, 16, 22, 21, 16, 16, 3, 15, 15, + 19, 23, 23, 33, 51, 51, 61, 67, 81, 39, + 59, 73, 45, 17, 29, 0, 2, 4, 88, 24, + 22, 9, 42, 110, 82, 122, 22, 60, 48, 3, + 41, 55, 65, 117, 113, 113, 11, 84, 54, 36, + 18, 24, 4, 5, 9, 21, 29, 15, 6, 5, + 22, 20, 4, 10, 20, 26, 28, 14, 34, 30, + 0, 62, 52, 30, 16, 4, 7, 53, 61, 63, + }, + + { + /* Context Tables for I, SI Slices :: qp = 33 */ + + 74, 10, 33, 74, 10, 33, 11, 14, 46, 24, + 3, 33, 28, 40, 104, 28, 44, 7, 24, 42, + 1, 19, 1, 43, 75, 4, 8, 113, 119, 117, + 62, 2, 11, 24, 42, 1, 19, 8, 30, 0, + 2, 9, 19, 7, 35, 23, 57, 10, 11, 19, + 1, 27, 21, 49, 22, 0, 3, 11, 12, 6, + 44, 0, 0, 0, 1, 61, 67, 12, 7, 9, + 104, 12, 11, 55, 23, 37, 8, 0, 15, 0, + 27, 12, 15, 13, 3, 47, 53, 51, 57, 50, + 0, 20, 27, 0, 25, 11, 3, 15, 41, 25, + 43, 32, 15, 4, 31, 29, 1, 13, 6, 18, + 8, 8, 32, 26, 27, 5, 4, 5, 4, 19, + 22, 5, 16, 38, 6, 32, 34, 18, 33, 20, + 1, 25, 5, 53, 35, 11, 21, 1, 11, 7, + 24, 3, 9, 6, 18, 17, 17, 0, 65, 43, + 11, 63, 5, 25, 22, 1, 25, 42, 2, 3, + 33, 44, 23, 47, 2, 65, 28, 48, 44, 38, + 48, 36, 12, 42, 44, 21, 6, 20, 6, 2, + 23, 2, 7, 11, 1, 13, 29, 3, 11, 63, + 31, 23, 45, 35, 51, 58, 70, 54, 54, 52, + 36, 28, 36, 18, 4, 12, 7, 29, 31, 75, + 16, 10, 23, 6, 9, 13, 23, 31, 39, 45, + 49, 77, 25, 57, 79, 57, 75, 1, 68, 48, + 32, 12, 10, 3, 7, 15, 17, 14, 94, 62, + 48, 34, 64, 24, 12, 1, 3, 7, 78, 48, + 28, 14, 30, 8, 3, 11, 21, 21, 42, 14, + 6, 3, 8, 11, 21, 27, 4, 82, 48, 30, + 16, 38, 10, 4, 0, 9, 124, 33, 15, 2, + 5, 9, 2, 16, 18, 1, 16, 28, 32, 25, + 15, 21, 1, 32, 5, 9, 6, 20, 34, 27, + 11, 22, 26, 37, 35, 37, 31, 21, 0, 19, + 9, 13, 5, 4, 21, 13, 11, 0, 29, 19, + 33, 41, 5, 71, 25, 0, 13, 22, 13, 3, + 38, 9, 13, 3, 30, 27, 21, 61, 54, 64, + 66, 50, 26, 52, 44, 34, 32, 28, 26, 18, + 17, 23, 37, 5, 1, 19, 53, 21, 21, 29, + 27, 27, 17, 19, 47, 47, 41, 48, 46, 46, + 34, 10, 22, 16, 7, 4, 7, 9, 33, 35, + 37, 41, 14, 20, 23, 14, 12, 7, 19, 19, + 23, 27, 27, 37, 55, 53, 65, 69, 83, 41, + 61, 75, 41, 13, 25, 2, 4, 6, 92, 26, + 24, 9, 44, 114, 86, 124, 24, 58, 46, 7, + 47, 59, 69, 121, 117, 115, 9, 86, 54, 36, + 18, 26, 4, 5, 9, 19, 27, 13, 8, 3, + 26, 22, 6, 12, 22, 28, 30, 16, 36, 32, + 0, 60, 50, 28, 14, 2, 9, 57, 63, 63, + }, + + { + /* Context Tables for I, SI Slices :: qp = 34 */ + + 72, 10, 33, 72, 10, 33, 7, 16, 46, 24, + 3, 35, 26, 38, 104, 28, 48, 7, 26, 44, + 1, 21, 1, 45, 77, 2, 4, 117, 121, 119, + 68, 4, 11, 26, 44, 1, 19, 10, 30, 1, + 2, 9, 17, 7, 35, 23, 57, 10, 11, 17, + 1, 27, 21, 49, 22, 0, 3, 11, 12, 6, + 44, 0, 0, 0, 0, 61, 67, 12, 9, 9, + 104, 12, 11, 53, 21, 37, 8, 2, 11, 4, + 25, 14, 11, 11, 0, 45, 51, 49, 55, 52, + 0, 22, 25, 0, 25, 9, 0, 15, 41, 25, + 41, 32, 15, 4, 29, 29, 0, 13, 6, 18, + 8, 10, 32, 26, 29, 5, 4, 5, 4, 19, + 22, 5, 14, 38, 4, 32, 34, 18, 37, 20, + 1, 29, 7, 55, 33, 9, 19, 0, 9, 5, + 26, 1, 9, 8, 20, 17, 17, 0, 67, 43, + 9, 63, 5, 25, 24, 1, 27, 44, 2, 3, + 35, 46, 25, 49, 2, 67, 24, 46, 42, 36, + 46, 34, 10, 40, 44, 23, 4, 20, 6, 2, + 23, 0, 9, 13, 3, 15, 31, 5, 13, 67, + 33, 25, 47, 37, 51, 54, 68, 50, 50, 48, + 32, 24, 32, 12, 0, 8, 13, 35, 35, 81, + 12, 6, 27, 2, 13, 17, 27, 35, 43, 51, + 53, 81, 27, 61, 81, 59, 75, 1, 68, 48, + 32, 12, 10, 3, 7, 15, 15, 14, 94, 62, + 48, 34, 66, 24, 14, 1, 1, 5, 80, 50, + 28, 14, 32, 8, 3, 9, 19, 19, 44, 16, + 6, 3, 10, 9, 19, 25, 4, 82, 48, 30, + 16, 40, 12, 6, 2, 7, 124, 33, 15, 2, + 5, 9, 2, 16, 18, 1, 16, 28, 34, 25, + 15, 21, 3, 32, 7, 11, 4, 18, 34, 29, + 13, 22, 26, 39, 37, 39, 29, 19, 2, 19, + 9, 13, 3, 6, 21, 13, 11, 0, 31, 19, + 33, 41, 3, 71, 25, 0, 13, 24, 13, 3, + 40, 9, 13, 3, 32, 29, 21, 63, 52, 64, + 66, 48, 24, 50, 42, 32, 30, 26, 24, 16, + 19, 25, 39, 9, 3, 23, 57, 23, 23, 31, + 29, 27, 17, 19, 47, 49, 39, 44, 42, 42, + 30, 4, 18, 12, 13, 0, 11, 13, 37, 39, + 39, 43, 10, 16, 25, 10, 8, 11, 23, 23, + 27, 31, 31, 41, 59, 57, 69, 73, 85, 45, + 65, 79, 39, 11, 23, 4, 6, 8, 96, 28, + 26, 9, 46, 116, 88, 124, 26, 56, 42, 13, + 53, 65, 75, 125, 121, 117, 9, 86, 54, 36, + 18, 26, 4, 5, 9, 17, 25, 11, 10, 1, + 28, 24, 8, 14, 24, 30, 32, 16, 38, 32, + 0, 58, 46, 24, 10, 1, 13, 61, 67, 65, + }, + + { + /* Context Tables for I, SI Slices :: qp = 35 */ + + 70, 10, 33, 70, 10, 33, 3, 20, 48, 24, + 5, 39, 22, 36, 104, 28, 52, 7, 28, 46, + 1, 23, 1, 47, 79, 2, 0, 119, 123, 119, + 74, 6, 11, 28, 46, 1, 17, 12, 32, 1, + 2, 7, 15, 9, 37, 23, 57, 10, 11, 17, + 1, 27, 21, 49, 24, 0, 3, 11, 14, 6, + 44, 0, 0, 0, 0, 61, 67, 14, 11, 9, + 104, 12, 11, 51, 19, 37, 10, 4, 7, 8, + 21, 16, 7, 9, 4, 43, 49, 47, 53, 54, + 0, 24, 23, 0, 23, 9, 4, 15, 39, 23, + 39, 32, 15, 6, 25, 27, 2, 13, 6, 18, + 8, 12, 34, 28, 29, 5, 4, 5, 4, 19, + 22, 7, 14, 38, 2, 32, 34, 18, 41, 20, + 3, 33, 9, 57, 31, 7, 17, 2, 7, 5, + 28, 1, 7, 10, 22, 17, 17, 0, 69, 41, + 7, 63, 5, 25, 26, 1, 29, 46, 2, 3, + 37, 48, 25, 51, 2, 69, 22, 44, 42, 34, + 44, 32, 10, 38, 42, 25, 2, 20, 6, 2, + 25, 1, 11, 15, 5, 17, 33, 7, 13, 71, + 35, 25, 49, 39, 51, 52, 64, 48, 46, 44, + 28, 20, 28, 8, 5, 4, 17, 39, 41, 85, + 8, 2, 29, 1, 17, 23, 33, 39, 49, 55, + 57, 87, 29, 65, 83, 61, 75, 1, 70, 48, + 32, 12, 10, 3, 7, 15, 15, 14, 94, 62, + 48, 34, 68, 26, 14, 0, 1, 3, 82, 50, + 30, 14, 34, 10, 1, 9, 17, 17, 46, 18, + 8, 3, 12, 7, 17, 23, 6, 82, 48, 30, + 16, 42, 12, 8, 4, 5, 124, 31, 13, 4, + 5, 9, 2, 16, 18, 1, 16, 28, 34, 25, + 15, 21, 3, 34, 9, 13, 2, 16, 34, 31, + 15, 22, 26, 39, 39, 39, 27, 17, 4, 17, + 7, 11, 3, 8, 21, 13, 11, 2, 31, 19, + 33, 39, 1, 71, 25, 0, 13, 26, 13, 3, + 42, 9, 13, 3, 34, 31, 21, 65, 50, 62, + 64, 46, 22, 48, 40, 30, 28, 24, 22, 14, + 21, 27, 41, 11, 5, 25, 61, 27, 25, 33, + 29, 29, 17, 19, 47, 49, 37, 40, 38, 38, + 26, 1, 14, 8, 17, 3, 15, 17, 43, 43, + 43, 45, 6, 14, 29, 6, 4, 15, 27, 27, + 31, 35, 35, 45, 65, 61, 73, 77, 87, 49, + 69, 81, 37, 9, 21, 6, 8, 10, 98, 30, + 28, 9, 48, 120, 92, 124, 28, 52, 40, 17, + 59, 71, 81, 125, 125, 119, 9, 86, 54, 36, + 18, 26, 4, 5, 9, 17, 23, 9, 12, 0, + 30, 24, 8, 16, 24, 30, 32, 16, 38, 32, + 0, 56, 44, 20, 6, 5, 17, 65, 71, 67, + }, + + { + /* Context Tables for I, SI Slices :: qp = 36 */ + + 66, 10, 33, 66, 10, 33, 1, 22, 48, 24, + 5, 41, 20, 32, 104, 28, 58, 9, 30, 46, + 1, 25, 1, 49, 81, 0, 3, 123, 125, 121, + 80, 8, 11, 30, 46, 1, 17, 14, 32, 3, + 2, 7, 15, 9, 37, 21, 57, 10, 11, 15, + 3, 29, 21, 49, 24, 2, 3, 9, 14, 6, + 44, 0, 0, 0, 2, 63, 67, 14, 13, 9, + 104, 12, 11, 49, 17, 37, 10, 8, 5, 12, + 19, 18, 5, 5, 8, 41, 49, 45, 53, 54, + 0, 26, 21, 0, 23, 7, 8, 15, 39, 23, + 37, 32, 13, 6, 23, 27, 2, 13, 8, 18, + 8, 12, 34, 28, 31, 7, 4, 5, 4, 19, + 22, 7, 12, 38, 2, 32, 34, 16, 45, 20, + 3, 37, 11, 59, 29, 5, 15, 4, 7, 3, + 30, 0, 7, 12, 24, 17, 17, 2, 71, 41, + 5, 63, 5, 25, 26, 1, 31, 48, 2, 3, + 41, 50, 27, 53, 2, 71, 18, 42, 40, 32, + 42, 30, 8, 36, 42, 29, 0, 18, 6, 2, + 25, 3, 11, 17, 5, 19, 35, 9, 15, 75, + 37, 27, 51, 41, 51, 48, 62, 44, 42, 40, + 24, 14, 24, 2, 9, 0, 23, 45, 45, 91, + 4, 0, 33, 7, 21, 27, 37, 45, 53, 61, + 61, 91, 31, 67, 87, 63, 75, 1, 70, 50, + 32, 12, 10, 3, 7, 15, 13, 16, 96, 62, + 48, 34, 70, 26, 16, 0, 0, 3, 84, 52, + 30, 14, 34, 10, 1, 7, 15, 17, 48, 18, + 8, 3, 14, 5, 17, 21, 6, 84, 48, 30, + 16, 44, 14, 8, 6, 3, 124, 31, 13, 4, + 5, 9, 2, 16, 18, 0, 16, 28, 36, 25, + 15, 21, 5, 34, 11, 15, 0, 14, 34, 35, + 17, 20, 26, 41, 41, 41, 27, 17, 6, 17, + 7, 11, 1, 10, 23, 13, 11, 2, 33, 19, + 33, 39, 0, 71, 25, 2, 13, 26, 13, 3, + 44, 9, 15, 3, 36, 31, 23, 67, 48, 62, + 64, 44, 20, 46, 38, 28, 26, 22, 20, 12, + 23, 29, 43, 15, 9, 29, 65, 29, 27, 35, + 31, 29, 17, 19, 47, 51, 37, 34, 32, 34, + 20, 7, 10, 4, 23, 7, 19, 21, 47, 47, + 45, 47, 2, 10, 31, 2, 0, 19, 31, 33, + 35, 39, 39, 49, 69, 65, 77, 81, 89, 53, + 73, 85, 35, 7, 19, 6, 10, 12, 102, 32, + 30, 9, 50, 122, 94, 124, 28, 50, 36, 23, + 65, 77, 87, 125, 125, 121, 9, 86, 54, 36, + 18, 26, 4, 5, 9, 15, 21, 7, 14, 0, + 32, 26, 10, 18, 26, 32, 34, 18, 40, 34, + 0, 52, 40, 16, 2, 9, 21, 69, 75, 69, + }, + + { + /* Context Tables for I, SI Slices :: qp = 37 */ + + 64, 10, 33, 64, 10, 33, 2, 26, 48, 24, + 7, 45, 16, 30, 104, 28, 62, 9, 32, 48, + 1, 25, 0, 51, 83, 0, 7, 125, 125, 121, + 86, 12, 11, 32, 48, 1, 17, 16, 32, 3, + 2, 7, 13, 9, 39, 21, 57, 10, 11, 13, + 3, 29, 21, 49, 26, 2, 1, 9, 16, 6, + 44, 0, 0, 0, 2, 63, 67, 16, 15, 9, + 104, 12, 11, 47, 15, 37, 12, 10, 1, 16, + 15, 20, 1, 3, 12, 39, 47, 43, 51, 56, + 0, 28, 19, 0, 21, 5, 12, 15, 39, 21, + 33, 34, 13, 8, 21, 25, 4, 13, 8, 20, + 10, 14, 36, 30, 31, 7, 4, 5, 4, 19, + 22, 9, 10, 38, 0, 32, 34, 16, 49, 20, + 5, 39, 13, 61, 27, 3, 13, 6, 5, 1, + 32, 2, 5, 14, 26, 17, 17, 2, 73, 39, + 3, 63, 3, 25, 28, 1, 33, 50, 2, 3, + 43, 52, 27, 55, 2, 71, 16, 40, 40, 30, + 40, 28, 6, 36, 42, 31, 1, 18, 6, 2, + 27, 3, 13, 19, 7, 21, 37, 9, 17, 79, + 39, 29, 53, 43, 51, 44, 60, 40, 38, 38, + 20, 10, 20, 1, 13, 3, 27, 49, 49, 95, + 0, 3, 37, 11, 25, 31, 41, 49, 57, 67, + 65, 97, 33, 71, 89, 63, 75, 1, 70, 50, + 32, 12, 10, 3, 7, 15, 13, 16, 96, 62, + 48, 34, 72, 28, 18, 0, 2, 1, 86, 54, + 30, 16, 36, 12, 0, 7, 13, 15, 52, 20, + 8, 1, 16, 3, 15, 19, 8, 84, 50, 30, + 16, 46, 16, 10, 8, 1, 124, 31, 13, 6, + 5, 9, 2, 16, 18, 0, 16, 30, 36, 25, + 15, 21, 7, 36, 13, 17, 1, 14, 34, 37, + 19, 20, 26, 43, 43, 41, 25, 15, 8, 15, + 5, 9, 1, 12, 23, 13, 11, 2, 35, 19, + 33, 37, 2, 71, 25, 2, 13, 28, 13, 3, + 46, 9, 15, 3, 38, 33, 23, 67, 46, 62, + 62, 44, 18, 44, 36, 26, 26, 20, 18, 10, + 25, 31, 45, 17, 11, 31, 69, 31, 29, 37, + 33, 31, 17, 17, 47, 51, 35, 30, 28, 30, + 16, 11, 6, 0, 27, 11, 23, 25, 51, 51, + 49, 49, 1, 6, 33, 1, 3, 23, 35, 37, + 39, 43, 43, 53, 75, 69, 81, 85, 91, 57, + 77, 87, 33, 3, 17, 8, 12, 14, 106, 34, + 32, 9, 52, 124, 98, 124, 30, 46, 32, 27, + 71, 81, 93, 125, 125, 123, 9, 86, 54, 36, + 18, 26, 4, 5, 9, 15, 19, 5, 16, 2, + 34, 28, 10, 20, 28, 34, 36, 18, 40, 34, + 0, 50, 38, 14, 0, 13, 25, 73, 79, 71, + }, + + { + /* Context Tables for I, SI Slices :: qp = 38 */ + + 62, 10, 35, 62, 10, 35, 6, 28, 50, 24, + 7, 47, 14, 28, 104, 28, 66, 9, 32, 50, + 1, 27, 0, 53, 85, 1, 9, 125, 125, 123, + 92, 14, 11, 32, 50, 1, 15, 18, 34, 5, + 4, 5, 11, 11, 39, 21, 57, 10, 9, 13, + 3, 29, 19, 49, 26, 2, 1, 9, 16, 6, + 44, 0, 0, 0, 4, 63, 67, 16, 15, 11, + 104, 12, 11, 45, 15, 37, 12, 12, 2, 20, + 13, 22, 2, 1, 16, 37, 45, 41, 49, 58, + 0, 30, 19, 2, 21, 5, 16, 15, 37, 21, + 31, 34, 13, 8, 17, 25, 6, 11, 8, 20, + 10, 16, 36, 30, 33, 7, 4, 5, 4, 19, + 22, 9, 10, 38, 1, 32, 34, 16, 53, 20, + 5, 43, 15, 63, 27, 1, 13, 6, 3, 1, + 34, 2, 5, 14, 30, 17, 17, 2, 75, 39, + 1, 63, 3, 25, 30, 0, 35, 50, 2, 3, + 45, 54, 29, 57, 2, 73, 12, 38, 38, 30, + 38, 28, 6, 34, 40, 33, 3, 18, 6, 2, + 27, 5, 15, 21, 9, 21, 39, 11, 17, 83, + 41, 29, 55, 45, 51, 42, 56, 38, 34, 34, + 16, 6, 16, 7, 19, 7, 33, 55, 55, 101, + 3, 7, 39, 15, 31, 37, 47, 53, 63, 71, + 71, 101, 35, 75, 91, 65, 75, 0, 72, 50, + 34, 12, 10, 3, 7, 15, 11, 16, 96, 64, + 48, 34, 74, 28, 18, 2, 2, 0, 88, 54, + 32, 16, 38, 12, 0, 5, 11, 13, 54, 22, + 10, 1, 16, 3, 13, 15, 8, 84, 50, 30, + 18, 46, 16, 12, 8, 0, 124, 29, 11, 6, + 5, 9, 2, 16, 18, 0, 18, 30, 38, 25, + 15, 23, 7, 36, 15, 19, 3, 12, 34, 39, + 21, 20, 26, 43, 45, 43, 23, 13, 10, 15, + 5, 9, 0, 12, 23, 11, 9, 4, 35, 19, + 33, 37, 4, 71, 25, 2, 13, 30, 13, 3, + 48, 11, 15, 3, 38, 35, 23, 69, 44, 60, + 62, 42, 16, 44, 36, 24, 24, 20, 16, 8, + 27, 33, 47, 21, 13, 35, 73, 35, 33, 39, + 33, 31, 17, 17, 45, 53, 33, 26, 24, 26, + 12, 17, 2, 3, 33, 15, 29, 29, 57, 55, + 51, 49, 3, 4, 37, 5, 9, 27, 39, 41, + 43, 47, 49, 57, 79, 73, 85, 87, 95, 61, + 81, 91, 31, 1, 15, 10, 14, 16, 108, 36, + 34, 9, 54, 124, 100, 124, 32, 44, 30, 33, + 77, 87, 97, 125, 125, 125, 7, 86, 54, 36, + 18, 26, 4, 5, 9, 13, 17, 5, 18, 4, + 36, 28, 12, 22, 28, 34, 36, 18, 42, 34, + 0, 48, 34, 10, 3, 15, 27, 79, 81, 73, + }, + + { + /* Context Tables for I, SI Slices :: qp = 39 */ + + 60, 10, 35, 60, 10, 35, 10, 32, 50, 24, + 9, 51, 10, 24, 104, 28, 72, 11, 34, 50, + 1, 29, 0, 55, 87, 1, 13, 125, 125, 123, + 98, 16, 11, 34, 50, 1, 15, 20, 34, 5, + 4, 5, 11, 11, 41, 19, 57, 10, 9, 11, + 5, 29, 19, 49, 28, 4, 1, 7, 18, 6, + 44, 0, 0, 0, 4, 63, 67, 18, 17, 11, + 104, 12, 11, 43, 13, 37, 14, 16, 4, 24, + 9, 24, 4, 2, 20, 35, 43, 39, 49, 60, + 0, 32, 17, 2, 19, 3, 20, 15, 37, 19, + 29, 34, 11, 10, 15, 23, 6, 11, 10, 20, + 10, 18, 38, 32, 33, 9, 4, 5, 4, 19, + 22, 11, 8, 38, 1, 32, 34, 14, 57, 20, + 7, 47, 17, 65, 25, 0, 11, 8, 1, 0, + 36, 4, 3, 16, 32, 17, 17, 4, 77, 37, + 0, 63, 3, 25, 30, 0, 37, 52, 2, 3, + 49, 56, 29, 59, 2, 75, 10, 36, 38, 28, + 36, 26, 4, 32, 40, 35, 5, 18, 6, 2, + 29, 7, 15, 23, 9, 23, 41, 13, 19, 87, + 43, 31, 57, 47, 51, 38, 54, 34, 30, 30, + 12, 0, 12, 11, 23, 11, 37, 59, 59, 105, + 7, 9, 43, 21, 35, 41, 51, 59, 67, 77, + 75, 107, 37, 77, 95, 67, 75, 0, 72, 52, + 34, 12, 10, 3, 7, 15, 11, 18, 98, 64, + 48, 34, 76, 30, 20, 2, 4, 2, 90, 56, + 32, 16, 40, 14, 2, 5, 9, 13, 56, 22, + 10, 1, 18, 1, 13, 13, 10, 86, 50, 30, + 18, 48, 18, 14, 10, 2, 124, 29, 11, 8, + 5, 9, 2, 16, 18, 2, 18, 30, 38, 25, + 15, 23, 9, 38, 17, 21, 5, 10, 34, 41, + 23, 18, 26, 45, 47, 43, 21, 13, 12, 13, + 3, 7, 0, 14, 23, 11, 9, 4, 37, 19, + 33, 35, 6, 71, 25, 4, 13, 30, 13, 3, + 50, 11, 15, 3, 40, 35, 25, 71, 42, 60, + 60, 40, 14, 42, 34, 22, 22, 18, 14, 6, + 29, 35, 49, 23, 17, 37, 77, 37, 35, 41, + 35, 33, 17, 17, 45, 53, 31, 22, 20, 22, + 6, 23, 1, 7, 37, 19, 33, 33, 61, 59, + 55, 51, 7, 0, 39, 9, 13, 31, 43, 45, + 47, 51, 53, 61, 85, 77, 89, 91, 97, 65, + 85, 93, 29, 0, 13, 10, 16, 18, 112, 38, + 36, 9, 56, 124, 104, 124, 32, 40, 26, 37, + 83, 93, 103, 125, 125, 125, 7, 86, 54, 36, + 18, 26, 4, 5, 9, 13, 15, 3, 20, 6, + 38, 30, 12, 24, 30, 36, 38, 20, 42, 36, + 0, 44, 32, 6, 7, 19, 31, 83, 85, 75, + }, + + { + /* Context Tables for I, SI Slices :: qp = 40 */ + + 56, 8, 35, 56, 8, 35, 12, 34, 50, 24, + 9, 53, 8, 22, 104, 28, 76, 11, 36, 52, + 1, 31, 0, 57, 91, 3, 17, 125, 125, 125, + 102, 18, 11, 36, 52, 1, 15, 22, 34, 7, + 4, 5, 9, 13, 41, 19, 59, 10, 9, 11, + 5, 31, 19, 49, 28, 4, 1, 7, 18, 6, + 44, 0, 0, 0, 6, 65, 67, 18, 19, 11, + 104, 12, 11, 41, 11, 37, 14, 18, 8, 28, + 7, 26, 8, 4, 22, 33, 43, 37, 47, 60, + 0, 34, 15, 2, 19, 3, 22, 15, 37, 19, + 27, 34, 11, 10, 13, 23, 8, 11, 10, 20, + 10, 18, 38, 32, 35, 9, 4, 5, 4, 19, + 22, 11, 6, 38, 3, 32, 34, 14, 61, 20, + 7, 51, 19, 67, 23, 2, 9, 10, 1, 0, + 38, 4, 3, 18, 34, 17, 17, 4, 81, 37, + 2, 63, 3, 27, 32, 0, 39, 54, 2, 3, + 51, 58, 31, 61, 2, 77, 6, 34, 36, 26, + 34, 24, 2, 30, 38, 39, 7, 16, 6, 2, + 29, 9, 17, 25, 11, 25, 43, 15, 21, 91, + 47, 33, 59, 49, 51, 34, 50, 30, 26, 26, + 8, 3, 8, 17, 29, 15, 43, 65, 65, 111, + 11, 13, 47, 25, 39, 47, 57, 63, 73, 83, + 79, 111, 39, 81, 97, 69, 77, 0, 72, 52, + 34, 12, 10, 3, 7, 15, 9, 18, 98, 64, + 48, 34, 78, 30, 20, 2, 4, 2, 92, 56, + 32, 16, 40, 14, 2, 3, 7, 11, 58, 24, + 10, 1, 20, 0, 11, 11, 10, 86, 50, 30, + 18, 50, 18, 14, 12, 2, 124, 29, 11, 8, + 5, 9, 2, 16, 18, 2, 18, 30, 40, 25, + 15, 23, 11, 38, 19, 23, 7, 8, 34, 45, + 27, 18, 26, 47, 49, 45, 21, 11, 14, 13, + 3, 7, 2, 16, 25, 11, 9, 4, 39, 19, + 33, 35, 6, 73, 25, 4, 13, 32, 13, 3, + 50, 11, 17, 3, 42, 37, 25, 73, 40, 58, + 60, 38, 10, 40, 32, 20, 20, 16, 12, 4, + 31, 37, 51, 27, 19, 41, 81, 41, 37, 43, + 37, 33, 17, 17, 45, 55, 31, 16, 14, 18, + 2, 29, 7, 13, 43, 23, 37, 37, 67, 63, + 57, 53, 11, 3, 43, 13, 17, 37, 49, 51, + 53, 55, 57, 67, 89, 81, 95, 95, 99, 69, + 89, 97, 27, 2, 11, 12, 18, 18, 114, 40, + 36, 9, 56, 124, 106, 124, 34, 38, 22, 43, + 89, 99, 109, 125, 125, 125, 7, 86, 54, 36, + 18, 26, 4, 5, 9, 11, 15, 1, 22, 6, + 40, 30, 14, 24, 30, 36, 38, 20, 44, 36, + 1, 42, 28, 2, 11, 23, 35, 87, 89, 77, + }, + + { + /* Context Tables for I, SI Slices :: qp = 41 */ + + 54, 8, 35, 54, 8, 35, 16, 36, 52, 24, + 9, 55, 6, 20, 104, 28, 80, 11, 38, 54, + 1, 31, 2, 59, 93, 5, 21, 125, 125, 125, + 108, 22, 11, 38, 54, 1, 13, 24, 36, 9, + 4, 3, 7, 13, 41, 19, 59, 10, 9, 9, + 5, 31, 19, 49, 28, 4, 0, 7, 18, 6, + 44, 0, 0, 0, 8, 65, 67, 18, 21, 11, + 104, 12, 11, 39, 9, 37, 14, 20, 12, 32, + 3, 30, 12, 6, 26, 31, 41, 35, 45, 62, + 2, 36, 13, 2, 17, 1, 26, 15, 35, 19, + 23, 36, 11, 10, 9, 23, 10, 11, 10, 22, + 12, 20, 38, 32, 35, 9, 6, 5, 4, 17, + 22, 11, 6, 38, 5, 32, 34, 14, 65, 20, + 7, 53, 21, 67, 21, 4, 7, 12, 0, 2, + 40, 6, 3, 20, 36, 17, 17, 4, 83, 35, + 4, 63, 1, 27, 34, 0, 41, 56, 2, 3, + 53, 60, 33, 63, 2, 77, 4, 32, 36, 24, + 32, 22, 2, 30, 38, 41, 9, 16, 6, 2, + 29, 9, 19, 27, 13, 27, 45, 15, 21, 93, + 49, 33, 61, 51, 51, 32, 48, 28, 24, 24, + 4, 7, 4, 21, 33, 19, 47, 71, 69, 117, + 13, 17, 49, 29, 43, 51, 61, 67, 77, 87, + 83, 115, 39, 85, 99, 69, 77, 0, 74, 52, + 34, 14, 10, 3, 5, 15, 7, 18, 98, 64, + 50, 34, 80, 32, 22, 4, 6, 4, 94, 58, + 34, 18, 42, 16, 2, 1, 5, 9, 62, 26, + 12, 0, 22, 2, 9, 9, 12, 86, 52, 32, + 18, 52, 20, 16, 14, 4, 124, 27, 9, 10, + 5, 9, 2, 18, 20, 2, 18, 32, 42, 25, + 15, 23, 11, 38, 21, 23, 9, 8, 34, 47, + 29, 18, 26, 47, 51, 47, 19, 9, 16, 13, + 1, 7, 4, 18, 25, 11, 9, 6, 39, 19, + 33, 35, 8, 73, 25, 4, 13, 34, 13, 3, + 52, 11, 17, 3, 44, 39, 25, 73, 38, 58, + 60, 38, 8, 38, 30, 18, 20, 14, 10, 4, + 31, 39, 53, 29, 21, 45, 85, 43, 39, 43, + 37, 33, 15, 15, 45, 57, 29, 12, 10, 14, + 1, 33, 11, 17, 47, 27, 41, 41, 71, 67, + 59, 55, 15, 5, 45, 17, 21, 41, 53, 55, + 57, 59, 61, 71, 93, 83, 99, 99, 101, 73, + 93, 101, 25, 6, 7, 14, 20, 20, 118, 42, + 38, 9, 58, 124, 108, 124, 36, 36, 20, 47, + 95, 103, 115, 125, 125, 125, 7, 86, 54, 36, + 18, 28, 4, 5, 9, 9, 13, 0, 24, 8, + 44, 32, 16, 26, 32, 38, 40, 20, 46, 36, + 1, 40, 26, 0, 13, 27, 39, 91, 93, 77, + }, + + { + /* Context Tables for I, SI Slices :: qp = 42 */ + + 52, 8, 35, 52, 8, 35, 20, 40, 52, 24, + 11, 59, 2, 16, 104, 28, 86, 13, 40, 54, + 1, 33, 2, 61, 95, 5, 25, 125, 125, 125, + 114, 24, 11, 40, 54, 1, 13, 26, 36, 9, + 4, 3, 7, 13, 43, 17, 59, 10, 9, 7, + 7, 31, 19, 49, 30, 6, 0, 5, 20, 6, + 44, 0, 0, 0, 8, 65, 67, 20, 23, 11, + 104, 12, 11, 37, 7, 37, 16, 24, 14, 36, + 1, 32, 14, 10, 30, 29, 39, 33, 45, 64, + 2, 38, 11, 2, 17, 0, 30, 15, 35, 17, + 21, 36, 9, 12, 7, 21, 10, 11, 12, 22, + 12, 22, 40, 34, 37, 11, 6, 5, 4, 17, + 22, 13, 4, 38, 5, 32, 34, 12, 69, 20, + 9, 57, 23, 69, 19, 6, 5, 14, 2, 4, + 42, 8, 1, 22, 38, 17, 17, 6, 85, 35, + 6, 63, 1, 27, 34, 0, 43, 58, 2, 3, + 57, 62, 33, 65, 2, 79, 0, 30, 34, 22, + 30, 20, 0, 28, 38, 43, 11, 16, 6, 2, + 31, 11, 19, 29, 13, 29, 47, 17, 23, 97, + 51, 35, 63, 53, 51, 28, 46, 24, 20, 20, + 0, 13, 0, 27, 37, 23, 53, 75, 73, 121, + 17, 19, 53, 35, 47, 55, 65, 73, 81, 93, + 87, 121, 41, 87, 103, 71, 77, 0, 74, 54, + 34, 14, 10, 3, 5, 15, 7, 20, 100, 64, + 50, 34, 82, 32, 24, 4, 8, 6, 96, 60, + 34, 18, 44, 16, 4, 1, 3, 9, 64, 26, + 12, 0, 24, 4, 9, 7, 12, 88, 52, 32, + 18, 54, 22, 18, 16, 6, 124, 27, 9, 10, + 5, 9, 2, 18, 20, 4, 18, 32, 42, 25, + 15, 23, 13, 40, 23, 25, 11, 6, 34, 49, + 31, 16, 26, 49, 53, 47, 17, 9, 18, 11, + 1, 5, 4, 20, 25, 11, 9, 6, 41, 19, + 33, 33, 10, 73, 25, 6, 13, 34, 13, 3, + 54, 11, 17, 3, 46, 39, 27, 75, 36, 58, + 58, 36, 6, 36, 28, 16, 18, 12, 8, 2, + 33, 41, 55, 33, 25, 47, 89, 45, 41, 45, + 39, 35, 15, 15, 45, 57, 27, 8, 6, 10, + 7, 39, 15, 21, 53, 31, 45, 45, 75, 71, + 63, 57, 19, 9, 47, 21, 25, 45, 57, 59, + 61, 63, 65, 75, 99, 87, 103, 103, 103, 77, + 97, 103, 23, 8, 5, 14, 22, 22, 122, 44, + 40, 9, 60, 124, 112, 124, 36, 32, 16, 53, + 101, 109, 121, 125, 125, 125, 7, 86, 54, 36, + 18, 28, 4, 5, 9, 9, 11, 2, 26, 10, + 46, 34, 16, 28, 34, 40, 42, 22, 46, 38, + 1, 36, 22, 3, 17, 31, 43, 95, 97, 79, + }, + + { + /* Context Tables for I, SI Slices :: qp = 43 */ + + 50, 8, 37, 50, 8, 37, 24, 42, 54, 24, + 11, 61, 0, 14, 104, 28, 90, 13, 40, 56, + 1, 35, 2, 63, 97, 7, 27, 125, 125, 125, + 120, 26, 11, 40, 56, 1, 11, 28, 38, 11, + 6, 1, 5, 15, 43, 17, 59, 10, 7, 7, + 7, 31, 17, 49, 30, 6, 0, 5, 20, 6, + 44, 0, 0, 0, 10, 65, 67, 20, 23, 13, + 104, 12, 11, 35, 7, 37, 16, 26, 18, 40, + 2, 34, 18, 12, 34, 27, 37, 31, 43, 66, + 2, 40, 11, 4, 15, 0, 34, 15, 33, 17, + 19, 36, 9, 12, 3, 21, 12, 9, 12, 22, + 12, 24, 40, 34, 37, 11, 6, 5, 4, 17, + 22, 13, 4, 38, 7, 32, 34, 12, 73, 20, + 9, 61, 25, 71, 19, 8, 5, 14, 4, 4, + 44, 8, 1, 22, 42, 17, 17, 6, 87, 33, + 8, 63, 1, 27, 36, 2, 45, 58, 2, 3, + 59, 64, 35, 67, 2, 81, 1, 28, 34, 22, + 28, 20, 0, 26, 36, 45, 13, 16, 6, 2, + 31, 13, 21, 31, 15, 29, 49, 19, 23, 101, + 53, 35, 65, 55, 51, 26, 42, 22, 16, 16, + 3, 17, 3, 31, 43, 27, 57, 81, 79, 125, + 21, 23, 55, 39, 53, 61, 71, 77, 87, 97, + 93, 125, 43, 91, 105, 73, 77, 2, 76, 54, + 36, 14, 10, 3, 5, 15, 5, 20, 100, 66, + 50, 34, 84, 34, 24, 6, 8, 8, 98, 60, + 36, 18, 46, 18, 4, 0, 1, 7, 66, 28, + 14, 0, 24, 4, 7, 3, 14, 88, 52, 32, + 20, 54, 22, 20, 16, 8, 124, 25, 7, 12, + 5, 9, 2, 18, 20, 4, 20, 32, 44, 25, + 15, 25, 13, 40, 25, 27, 13, 4, 34, 51, + 33, 16, 26, 49, 55, 49, 15, 7, 20, 11, + 0, 5, 6, 20, 25, 9, 7, 8, 41, 19, + 33, 33, 12, 73, 25, 6, 13, 36, 13, 3, + 56, 13, 17, 3, 46, 41, 27, 77, 34, 56, + 58, 34, 4, 36, 28, 14, 16, 12, 6, 0, + 35, 43, 57, 35, 27, 51, 93, 49, 45, 47, + 39, 35, 15, 15, 43, 59, 25, 4, 2, 6, + 11, 45, 19, 25, 57, 35, 51, 49, 81, 75, + 65, 57, 21, 11, 51, 25, 31, 49, 61, 63, + 65, 67, 71, 79, 103, 91, 107, 105, 107, 81, + 101, 107, 21, 10, 3, 16, 24, 24, 124, 46, + 42, 9, 62, 124, 114, 124, 38, 30, 14, 57, + 107, 115, 125, 125, 125, 125, 5, 86, 54, 36, + 18, 28, 4, 5, 9, 7, 9, 2, 28, 12, + 48, 34, 18, 30, 34, 40, 42, 22, 48, 38, + 1, 34, 20, 7, 21, 33, 45, 101, 99, 81, + }, + + { + /* Context Tables for I, SI Slices :: qp = 44 */ + + 46, 8, 37, 46, 8, 37, 26, 46, 54, 24, + 13, 65, 3, 12, 104, 28, 94, 13, 42, 58, + 1, 37, 2, 65, 99, 7, 31, 125, 125, 125, + 124, 28, 11, 42, 58, 1, 11, 30, 38, 11, + 6, 1, 3, 15, 45, 17, 59, 10, 7, 5, + 7, 33, 17, 49, 32, 6, 0, 5, 22, 6, + 44, 0, 0, 0, 10, 67, 67, 22, 25, 13, + 104, 12, 11, 33, 5, 37, 18, 28, 22, 44, + 4, 36, 22, 14, 38, 25, 37, 29, 41, 66, + 2, 42, 9, 4, 15, 2, 38, 15, 33, 15, + 17, 36, 9, 14, 1, 19, 14, 9, 12, 22, + 12, 24, 42, 36, 39, 11, 6, 5, 4, 17, + 22, 15, 2, 38, 9, 32, 34, 12, 77, 20, + 11, 65, 27, 73, 17, 10, 3, 16, 4, 6, + 46, 10, 0, 24, 44, 17, 17, 6, 89, 33, + 10, 63, 1, 27, 38, 2, 47, 60, 2, 3, + 61, 66, 35, 69, 2, 83, 5, 26, 32, 20, + 26, 18, 1, 24, 36, 49, 15, 14, 6, 2, + 33, 15, 23, 33, 17, 31, 51, 21, 25, 105, + 55, 37, 67, 57, 51, 22, 40, 18, 12, 12, + 7, 21, 7, 37, 47, 31, 63, 85, 83, 125, + 25, 27, 59, 43, 57, 65, 75, 81, 91, 103, + 97, 125, 45, 95, 107, 75, 77, 2, 76, 54, + 36, 14, 10, 3, 5, 15, 5, 20, 100, 66, + 50, 34, 86, 34, 26, 6, 10, 8, 100, 62, + 36, 18, 46, 18, 6, 0, 0, 5, 68, 30, + 14, 0, 26, 6, 5, 1, 14, 88, 52, 32, + 20, 56, 24, 20, 18, 10, 124, 25, 7, 12, + 5, 9, 2, 18, 20, 4, 20, 32, 44, 25, + 15, 25, 15, 42, 27, 29, 15, 2, 34, 55, + 35, 16, 26, 51, 57, 49, 15, 5, 22, 9, + 0, 3, 6, 22, 27, 9, 7, 8, 43, 19, + 33, 31, 14, 73, 25, 6, 13, 38, 13, 3, + 58, 13, 19, 3, 48, 43, 27, 79, 32, 56, + 56, 32, 2, 34, 26, 12, 14, 10, 4, 1, + 37, 45, 59, 39, 29, 53, 97, 51, 47, 49, + 41, 37, 15, 15, 43, 59, 25, 1, 3, 2, + 15, 51, 23, 29, 63, 39, 55, 53, 85, 79, + 69, 59, 25, 15, 53, 29, 35, 53, 65, 69, + 69, 71, 75, 83, 109, 95, 111, 109, 109, 85, + 105, 109, 19, 12, 1, 18, 26, 26, 124, 48, + 44, 9, 64, 124, 118, 124, 40, 26, 10, 63, + 113, 121, 125, 125, 125, 125, 5, 86, 54, 36, + 18, 28, 4, 5, 9, 7, 7, 4, 30, 12, + 50, 36, 18, 32, 36, 42, 44, 22, 48, 38, + 1, 32, 16, 11, 25, 37, 49, 105, 103, 83, + }, + + { + /* Context Tables for I, SI Slices :: qp = 45 */ + + 44, 8, 37, 44, 8, 37, 30, 48, 54, 24, + 13, 67, 5, 8, 104, 28, 100, 15, 44, 58, + 1, 37, 4, 67, 101, 9, 35, 125, 125, 125, + 124, 32, 11, 44, 58, 1, 11, 32, 38, 13, + 6, 1, 3, 15, 45, 15, 59, 10, 7, 3, + 9, 33, 17, 49, 32, 8, 2, 3, 22, 6, + 44, 0, 0, 0, 12, 67, 67, 22, 27, 13, + 104, 12, 11, 31, 3, 37, 18, 32, 24, 48, + 8, 38, 24, 18, 42, 23, 35, 27, 41, 68, + 2, 44, 7, 4, 13, 4, 42, 15, 33, 15, + 13, 38, 7, 14, 0, 19, 14, 9, 14, 24, + 14, 26, 42, 36, 39, 13, 6, 5, 4, 17, + 22, 15, 0, 38, 9, 32, 34, 10, 81, 20, + 11, 67, 29, 75, 15, 12, 1, 18, 6, 8, + 48, 12, 0, 26, 46, 17, 17, 8, 91, 31, + 12, 63, 0, 27, 38, 2, 49, 62, 2, 3, + 65, 68, 37, 71, 2, 83, 7, 24, 32, 18, + 24, 16, 3, 24, 36, 51, 17, 14, 6, 2, + 33, 15, 23, 35, 17, 33, 53, 21, 27, 109, + 57, 39, 69, 59, 51, 18, 38, 14, 8, 10, + 11, 27, 11, 41, 51, 35, 67, 91, 87, 125, + 29, 29, 63, 49, 61, 69, 79, 87, 95, 109, + 101, 125, 47, 97, 111, 75, 77, 2, 76, 56, + 36, 14, 10, 3, 5, 15, 3, 22, 102, 66, + 50, 34, 88, 36, 28, 6, 12, 10, 102, 64, + 36, 20, 48, 20, 6, 2, 2, 5, 72, 30, + 14, 2, 28, 8, 5, 0, 16, 90, 54, 32, + 20, 58, 26, 22, 20, 12, 124, 25, 7, 14, + 5, 9, 2, 18, 20, 6, 20, 34, 46, 25, + 15, 25, 17, 42, 29, 31, 17, 2, 34, 57, + 37, 14, 26, 53, 59, 51, 13, 5, 24, 9, + 2, 3, 8, 24, 27, 9, 7, 8, 45, 19, + 33, 31, 16, 73, 25, 8, 13, 38, 13, 3, + 60, 13, 19, 3, 50, 43, 29, 79, 30, 56, + 56, 32, 0, 32, 24, 10, 14, 8, 2, 3, + 39, 47, 61, 41, 33, 57, 101, 53, 49, 51, + 43, 37, 15, 13, 43, 61, 23, 5, 7, 1, + 21, 55, 27, 33, 67, 43, 59, 57, 89, 83, + 71, 61, 29, 19, 55, 33, 39, 57, 69, 73, + 73, 75, 79, 87, 113, 99, 115, 113, 111, 89, + 109, 113, 17, 16, 0, 18, 28, 28, 124, 50, + 46, 9, 66, 124, 120, 124, 40, 24, 6, 67, + 119, 125, 125, 125, 125, 125, 5, 86, 54, 36, + 18, 28, 4, 5, 9, 5, 5, 6, 32, 14, + 52, 38, 20, 34, 38, 44, 46, 24, 50, 40, + 1, 28, 14, 13, 27, 41, 53, 109, 107, 85, + }, + + { + /* Context Tables for I, SI Slices :: qp = 46 */ + + 42, 8, 37, 42, 8, 37, 34, 52, 56, 24, + 15, 71, 9, 6, 104, 28, 104, 15, 46, 60, + 1, 39, 4, 69, 103, 9, 39, 125, 125, 125, + 124, 34, 11, 46, 60, 1, 9, 34, 40, 13, + 6, 0, 1, 17, 47, 15, 59, 10, 7, 3, + 9, 33, 17, 49, 34, 8, 2, 3, 24, 6, + 44, 0, 0, 0, 12, 67, 67, 24, 29, 13, + 104, 12, 11, 29, 1, 37, 20, 34, 28, 52, + 10, 40, 28, 20, 46, 21, 33, 25, 39, 70, + 2, 46, 5, 4, 13, 4, 46, 15, 31, 13, + 11, 38, 7, 16, 4, 17, 16, 9, 14, 24, + 14, 28, 44, 38, 41, 13, 6, 5, 4, 17, + 22, 17, 0, 38, 11, 32, 34, 10, 85, 20, + 13, 71, 31, 77, 13, 14, 0, 20, 8, 8, + 50, 12, 2, 28, 48, 17, 17, 8, 93, 31, + 14, 63, 0, 27, 40, 2, 51, 64, 2, 3, + 67, 70, 37, 73, 2, 85, 11, 22, 30, 16, + 22, 14, 3, 22, 34, 53, 19, 14, 6, 2, + 35, 17, 25, 37, 19, 35, 55, 23, 27, 113, + 59, 39, 71, 61, 51, 16, 34, 12, 4, 6, + 15, 31, 15, 47, 57, 39, 73, 95, 93, 125, + 33, 33, 65, 53, 65, 75, 85, 91, 101, 113, + 105, 125, 49, 101, 113, 77, 77, 2, 78, 56, + 36, 14, 10, 3, 5, 15, 3, 22, 102, 66, + 50, 34, 90, 36, 28, 8, 12, 12, 104, 64, + 38, 20, 50, 20, 8, 2, 4, 3, 74, 32, + 16, 2, 30, 10, 3, 2, 16, 90, 54, 32, + 20, 60, 26, 24, 22, 14, 124, 23, 5, 14, + 5, 9, 2, 18, 20, 6, 20, 34, 46, 25, + 15, 25, 17, 44, 31, 33, 19, 0, 34, 59, + 39, 14, 26, 53, 61, 51, 11, 3, 26, 7, + 2, 1, 8, 26, 27, 9, 7, 10, 45, 19, + 33, 29, 18, 73, 25, 8, 13, 40, 13, 3, + 62, 13, 19, 3, 52, 45, 29, 81, 28, 54, + 54, 30, 1, 30, 22, 8, 12, 6, 0, 5, + 41, 49, 63, 45, 35, 59, 105, 57, 51, 53, + 43, 39, 15, 13, 43, 61, 21, 9, 11, 5, + 25, 61, 31, 37, 73, 47, 63, 61, 95, 87, + 75, 63, 33, 21, 59, 37, 43, 61, 73, 77, + 77, 79, 83, 91, 119, 103, 119, 117, 113, 93, + 113, 115, 15, 18, 2, 20, 30, 30, 124, 52, + 48, 9, 68, 124, 124, 124, 42, 20, 4, 73, + 125, 125, 125, 125, 125, 125, 5, 86, 54, 36, + 18, 28, 4, 5, 9, 5, 3, 8, 34, 16, + 54, 38, 20, 36, 38, 44, 46, 24, 50, 40, + 1, 26, 10, 17, 31, 45, 57, 113, 111, 87, + }, + + { + /* Context Tables for I, SI Slices :: qp = 47 */ + + 40, 8, 37, 40, 8, 37, 38, 54, 56, 24, + 15, 73, 11, 4, 104, 28, 108, 15, 48, 62, + 1, 41, 4, 71, 105, 11, 43, 125, 125, 125, + 124, 36, 11, 48, 62, 1, 9, 36, 40, 15, + 6, 0, 0, 17, 47, 15, 59, 10, 7, 1, + 9, 33, 17, 49, 34, 8, 2, 3, 24, 6, + 44, 0, 0, 0, 14, 67, 67, 24, 31, 13, + 104, 12, 11, 27, 0, 37, 20, 36, 32, 56, + 14, 42, 32, 22, 50, 19, 31, 23, 37, 72, + 2, 48, 3, 4, 11, 6, 50, 15, 31, 13, + 9, 38, 7, 16, 6, 17, 18, 9, 14, 24, + 14, 30, 44, 38, 41, 13, 6, 5, 4, 17, + 22, 17, 1, 38, 13, 32, 34, 10, 89, 20, + 13, 75, 33, 79, 11, 16, 2, 22, 10, 10, + 52, 14, 2, 30, 50, 17, 17, 8, 95, 29, + 16, 63, 0, 27, 42, 2, 53, 66, 2, 3, + 69, 72, 39, 75, 2, 87, 13, 20, 30, 14, + 20, 12, 5, 20, 34, 55, 21, 14, 6, 2, + 35, 19, 27, 39, 21, 37, 57, 25, 29, 117, + 61, 41, 73, 63, 51, 12, 32, 8, 0, 2, + 19, 35, 19, 51, 61, 43, 77, 101, 97, 125, + 37, 37, 69, 57, 69, 79, 89, 95, 105, 119, + 109, 125, 51, 105, 115, 79, 77, 2, 78, 56, + 36, 14, 10, 3, 5, 15, 1, 22, 102, 66, + 50, 34, 92, 38, 30, 8, 14, 14, 106, 66, + 38, 20, 52, 22, 8, 4, 6, 1, 76, 34, + 16, 2, 32, 12, 1, 4, 18, 90, 54, 32, + 20, 62, 28, 26, 24, 16, 124, 23, 5, 16, + 5, 9, 2, 18, 20, 6, 20, 34, 48, 25, + 15, 25, 19, 44, 33, 35, 21, 1, 34, 61, + 41, 14, 26, 55, 63, 53, 9, 1, 28, 7, + 4, 1, 10, 28, 27, 9, 7, 10, 47, 19, + 33, 29, 20, 73, 25, 8, 13, 42, 13, 3, + 64, 13, 19, 3, 54, 47, 29, 83, 26, 54, + 54, 28, 3, 28, 20, 6, 10, 4, 1, 7, + 43, 51, 65, 47, 37, 63, 109, 59, 53, 55, + 45, 39, 15, 13, 43, 63, 19, 13, 15, 9, + 29, 67, 35, 41, 77, 51, 67, 65, 99, 91, + 77, 65, 37, 25, 61, 41, 47, 65, 77, 81, + 81, 83, 87, 95, 123, 107, 123, 121, 115, 97, + 117, 119, 13, 20, 4, 22, 32, 32, 124, 54, + 50, 9, 70, 124, 124, 124, 44, 18, 0, 77, + 125, 125, 125, 125, 125, 125, 5, 86, 54, 36, + 18, 28, 4, 5, 9, 3, 1, 10, 36, 18, + 56, 40, 22, 38, 40, 46, 48, 24, 52, 40, + 1, 24, 8, 21, 35, 49, 61, 117, 115, 89, + }, + + { + /* Context Tables for I, SI Slices :: qp = 48 */ + + 36, 6, 39, 36, 6, 39, 40, 56, 56, 24, + 17, 77, 15, 0, 102, 28, 112, 17, 48, 62, + 3, 43, 4, 75, 109, 13, 47, 125, 125, 125, + 124, 38, 13, 48, 62, 3, 9, 38, 40, 17, + 6, 0, 0, 19, 49, 15, 61, 10, 7, 1, + 11, 35, 17, 49, 34, 8, 2, 3, 24, 4, + 44, 0, 0, 0, 14, 69, 67, 24, 33, 15, + 104, 10, 11, 25, 0, 37, 20, 38, 34, 58, + 16, 44, 34, 24, 52, 17, 31, 21, 37, 72, + 2, 48, 3, 4, 11, 6, 52, 15, 31, 13, + 7, 38, 7, 16, 8, 17, 18, 9, 14, 24, + 14, 30, 44, 38, 43, 15, 6, 7, 4, 17, + 20, 19, 3, 38, 15, 30, 34, 8, 93, 18, + 15, 79, 35, 81, 11, 16, 2, 22, 10, 10, + 54, 14, 2, 30, 52, 19, 17, 8, 99, 29, + 16, 63, 0, 29, 42, 2, 55, 66, 2, 3, + 73, 72, 41, 77, 2, 89, 17, 18, 28, 12, + 18, 10, 7, 18, 32, 59, 23, 12, 4, 2, + 37, 21, 29, 41, 23, 39, 59, 27, 31, 121, + 65, 43, 77, 67, 51, 8, 28, 4, 3, 1, + 25, 41, 25, 57, 67, 49, 83, 107, 103, 125, + 41, 41, 73, 63, 75, 85, 95, 101, 111, 125, + 115, 125, 53, 109, 119, 81, 79, 2, 78, 56, + 36, 14, 10, 5, 5, 15, 1, 22, 102, 66, + 50, 34, 94, 38, 30, 8, 14, 14, 106, 66, + 38, 20, 52, 22, 8, 4, 8, 1, 78, 34, + 16, 2, 32, 12, 1, 6, 18, 90, 54, 32, + 20, 62, 28, 26, 24, 16, 124, 23, 5, 16, + 5, 9, 2, 18, 20, 6, 20, 34, 48, 25, + 15, 27, 21, 44, 35, 37, 23, 3, 32, 65, + 45, 12, 24, 57, 67, 55, 9, 1, 30, 7, + 4, 1, 10, 28, 29, 9, 7, 10, 49, 19, + 33, 29, 20, 75, 25, 8, 13, 42, 15, 3, + 64, 15, 21, 3, 54, 49, 31, 85, 24, 52, + 52, 26, 7, 26, 18, 4, 8, 2, 3, 9, + 45, 55, 69, 51, 41, 67, 113, 63, 57, 57, + 47, 41, 15, 13, 43, 65, 19, 19, 21, 13, + 35, 73, 41, 47, 83, 57, 73, 71, 105, 97, + 81, 67, 41, 29, 65, 45, 53, 71, 83, 87, + 87, 87, 93, 101, 125, 111, 125, 125, 119, 101, + 121, 123, 11, 22, 6, 22, 32, 32, 124, 54, + 50, 9, 70, 124, 124, 124, 44, 14, 3, 83, + 125, 125, 125, 125, 125, 125, 5, 86, 52, 34, + 18, 28, 4, 7, 9, 3, 1, 10, 36, 18, + 58, 40, 22, 38, 40, 46, 48, 24, 52, 40, + 3, 20, 4, 25, 39, 53, 65, 123, 119, 91, + }, + + { + /* Context Tables for I, SI Slices :: qp = 49 */ + + 34, 6, 39, 34, 6, 39, 44, 60, 58, 26, + 17, 79, 17, 1, 102, 28, 118, 17, 50, 64, + 3, 43, 6, 77, 111, 13, 49, 125, 125, 125, + 124, 42, 13, 50, 64, 3, 7, 42, 42, 17, + 8, 2, 2, 19, 49, 13, 61, 10, 5, 0, + 11, 35, 15, 49, 36, 10, 4, 1, 26, 4, + 44, 0, 0, 0, 16, 69, 67, 26, 33, 15, + 104, 10, 11, 21, 2, 37, 22, 42, 38, 62, + 20, 48, 38, 28, 56, 13, 29, 19, 35, 74, + 4, 50, 1, 6, 9, 8, 56, 13, 29, 11, + 3, 40, 5, 18, 12, 15, 20, 7, 16, 26, + 16, 32, 46, 40, 43, 15, 8, 7, 4, 15, + 20, 19, 3, 38, 15, 30, 34, 8, 95, 18, + 15, 81, 37, 81, 9, 18, 4, 24, 12, 12, + 56, 16, 4, 32, 56, 19, 17, 10, 101, 27, + 18, 63, 2, 29, 44, 4, 55, 68, 2, 3, + 75, 74, 41, 79, 2, 89, 19, 18, 28, 12, + 18, 10, 7, 18, 32, 61, 23, 12, 4, 2, + 37, 21, 29, 41, 23, 39, 61, 27, 31, 123, + 67, 43, 79, 69, 51, 6, 26, 2, 5, 3, + 29, 45, 29, 61, 71, 53, 87, 111, 107, 125, + 43, 43, 75, 67, 79, 89, 99, 105, 115, 125, + 119, 125, 53, 111, 121, 81, 79, 4, 80, 58, + 38, 16, 10, 5, 3, 13, 0, 24, 104, 68, + 52, 34, 96, 40, 32, 10, 16, 16, 108, 68, + 40, 22, 54, 24, 10, 6, 12, 0, 82, 36, + 18, 4, 34, 14, 0, 10, 20, 92, 56, 34, + 22, 64, 30, 28, 26, 18, 124, 21, 3, 18, + 5, 7, 2, 20, 22, 8, 22, 36, 50, 23, + 15, 27, 21, 46, 35, 37, 25, 3, 32, 67, + 47, 12, 24, 57, 69, 55, 7, 0, 32, 5, + 6, 0, 12, 30, 29, 7, 5, 12, 49, 17, + 31, 27, 22, 75, 25, 10, 13, 44, 15, 3, + 66, 15, 21, 1, 56, 49, 31, 85, 24, 52, + 52, 26, 9, 26, 18, 4, 8, 2, 3, 9, + 45, 57, 71, 53, 43, 69, 115, 65, 59, 57, + 47, 41, 13, 11, 41, 65, 17, 23, 25, 17, + 39, 77, 45, 51, 87, 61, 77, 75, 109, 101, + 83, 67, 43, 31, 67, 47, 57, 75, 87, 91, + 91, 91, 97, 105, 125, 113, 125, 125, 121, 103, + 123, 125, 7, 26, 10, 24, 34, 34, 124, 56, + 52, 9, 72, 124, 124, 124, 46, 12, 5, 87, + 125, 125, 125, 125, 125, 125, 3, 88, 52, 34, + 18, 30, 4, 7, 9, 1, 0, 12, 38, 20, + 62, 42, 24, 40, 42, 48, 50, 26, 54, 42, + 3, 18, 2, 27, 41, 55, 67, 125, 121, 91, + }, + + { + /* Context Tables for I, SI Slices :: qp = 50 */ + + 32, 6, 39, 32, 6, 39, 48, 62, 58, 26, + 17, 81, 19, 3, 102, 28, 122, 17, 52, 66, + 3, 45, 6, 79, 113, 15, 53, 125, 125, 125, + 124, 44, 13, 52, 66, 3, 7, 44, 42, 19, + 8, 2, 4, 19, 49, 13, 61, 10, 5, 2, + 11, 35, 15, 49, 36, 10, 4, 1, 26, 4, + 44, 0, 0, 0, 18, 69, 67, 26, 35, 15, + 104, 10, 11, 19, 4, 37, 22, 44, 42, 66, + 22, 50, 42, 30, 60, 11, 27, 17, 33, 76, + 4, 52, 0, 6, 9, 10, 60, 13, 29, 11, + 1, 40, 5, 18, 14, 15, 22, 7, 16, 26, + 16, 34, 46, 40, 45, 15, 8, 7, 4, 15, + 20, 19, 5, 38, 17, 30, 34, 8, 99, 18, + 15, 85, 39, 83, 7, 20, 6, 26, 14, 14, + 58, 18, 4, 34, 58, 19, 17, 10, 103, 27, + 20, 63, 2, 29, 46, 4, 57, 70, 2, 3, + 77, 76, 43, 81, 2, 91, 23, 16, 26, 10, + 16, 8, 9, 16, 32, 63, 25, 12, 4, 2, + 37, 23, 31, 43, 25, 41, 63, 29, 33, 125, + 69, 45, 81, 71, 51, 2, 24, 1, 9, 7, + 33, 49, 33, 67, 75, 57, 93, 117, 111, 125, + 47, 47, 79, 71, 83, 93, 103, 109, 119, 125, + 123, 125, 55, 115, 123, 83, 79, 4, 80, 58, + 38, 16, 10, 5, 3, 13, 2, 24, 104, 68, + 52, 34, 98, 40, 34, 10, 18, 18, 110, 70, + 40, 22, 56, 24, 10, 8, 14, 2, 84, 38, + 18, 4, 36, 16, 2, 12, 20, 92, 56, 34, + 22, 66, 32, 30, 28, 20, 124, 21, 3, 18, + 5, 7, 2, 20, 22, 8, 22, 36, 52, 23, + 15, 27, 23, 46, 37, 39, 27, 5, 32, 69, + 49, 12, 24, 59, 71, 57, 5, 2, 34, 5, + 6, 0, 14, 32, 29, 7, 5, 12, 51, 17, + 31, 27, 24, 75, 25, 10, 13, 46, 15, 3, + 68, 15, 21, 1, 58, 51, 31, 87, 22, 52, + 52, 24, 11, 24, 16, 2, 6, 0, 5, 11, + 47, 59, 73, 57, 45, 73, 119, 67, 61, 59, + 49, 41, 13, 11, 41, 67, 15, 27, 29, 21, + 43, 83, 49, 55, 93, 65, 81, 79, 113, 105, + 85, 69, 47, 35, 69, 51, 61, 79, 91, 95, + 95, 95, 101, 109, 125, 117, 125, 125, 123, 107, + 125, 125, 5, 28, 12, 26, 36, 36, 124, 58, + 54, 9, 74, 124, 124, 124, 48, 10, 9, 93, + 125, 125, 125, 125, 125, 125, 3, 88, 52, 34, + 18, 30, 4, 7, 9, 0, 2, 14, 40, 22, + 64, 44, 26, 42, 44, 50, 52, 26, 56, 42, + 3, 16, 1, 31, 45, 59, 71, 125, 125, 93, + }, + + { + /* Context Tables for I, SI Slices :: qp = 51 */ + + 30, 6, 39, 30, 6, 39, 52, 66, 60, 26, + 19, 85, 23, 5, 102, 28, 124, 17, 54, 68, + 3, 47, 6, 81, 115, 15, 57, 125, 125, 125, + 124, 46, 13, 54, 68, 3, 5, 46, 44, 19, + 8, 4, 6, 21, 51, 13, 61, 10, 5, 2, + 11, 35, 15, 49, 38, 10, 4, 1, 28, 4, + 44, 0, 0, 0, 18, 69, 67, 28, 37, 15, + 104, 10, 11, 17, 6, 37, 24, 46, 46, 70, + 26, 52, 46, 32, 64, 9, 25, 15, 31, 78, + 4, 54, 2, 6, 7, 10, 64, 13, 27, 9, + 0, 40, 5, 20, 18, 13, 24, 7, 16, 26, + 16, 36, 48, 42, 45, 15, 8, 7, 4, 15, + 20, 21, 5, 38, 19, 30, 34, 8, 103, 18, + 17, 89, 41, 85, 5, 22, 8, 28, 16, 14, + 60, 18, 6, 36, 60, 19, 17, 10, 105, 25, + 22, 63, 2, 29, 48, 4, 59, 72, 2, 3, + 79, 78, 43, 83, 2, 93, 25, 14, 26, 8, + 14, 6, 9, 14, 30, 65, 27, 12, 4, 2, + 39, 25, 33, 45, 27, 43, 65, 31, 33, 125, + 71, 45, 83, 73, 51, 0, 20, 3, 13, 11, + 37, 53, 37, 71, 81, 61, 97, 121, 117, 125, + 51, 51, 81, 75, 87, 99, 109, 113, 125, 125, + 125, 125, 57, 119, 125, 85, 79, 4, 82, 58, + 38, 16, 10, 5, 3, 13, 2, 24, 104, 68, + 52, 34, 100, 42, 34, 12, 18, 20, 112, 70, + 42, 22, 58, 26, 12, 8, 16, 4, 86, 40, + 20, 4, 38, 18, 4, 14, 22, 92, 56, 34, + 22, 68, 32, 32, 30, 22, 124, 19, 1, 20, + 5, 7, 2, 20, 22, 8, 22, 36, 52, 23, + 15, 27, 23, 48, 39, 41, 29, 7, 32, 71, + 51, 12, 24, 59, 73, 57, 3, 4, 36, 3, + 8, 2, 14, 34, 29, 7, 5, 14, 51, 17, + 31, 25, 26, 75, 25, 10, 13, 48, 15, 3, + 70, 15, 21, 1, 60, 53, 31, 89, 20, 50, + 50, 22, 13, 22, 14, 0, 4, 1, 7, 13, + 49, 61, 75, 59, 47, 75, 123, 71, 63, 61, + 49, 43, 13, 11, 41, 67, 13, 31, 33, 25, + 47, 89, 53, 59, 97, 69, 85, 83, 119, 109, + 89, 71, 51, 37, 73, 55, 65, 83, 95, 99, + 99, 99, 105, 113, 125, 121, 125, 125, 125, 111, + 125, 125, 3, 30, 14, 28, 38, 38, 124, 60, + 56, 9, 76, 124, 124, 124, 50, 6, 11, 97, + 125, 125, 125, 125, 125, 125, 3, 88, 52, 34, + 18, 30, 4, 7, 9, 0, 4, 16, 42, 24, + 66, 44, 26, 44, 44, 50, 52, 26, 56, 42, + 3, 14, 3, 35, 49, 63, 75, 125, 125, 95, + }, + + }, + +}; diff --git a/common/ih264_cabac_tables.h b/common/ih264_cabac_tables.h new file mode 100755 index 0000000..0cef51e --- /dev/null +++ b/common/ih264_cabac_tables.h @@ -0,0 +1,101 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +****************************************************************************** +* @file ih264_cabac_tables.h +* +* @brief +* This file contains enumerations, macros and extern declarations of H264 +* cabac tables +* +* @author +* Ittiam +* +* @remarks +* none +****************************************************************************** +*/ + +#ifndef IH264_CABAC_TABLES_H_ +#define IH264_CABAC_TABLES_H_ + +/*****************************************************************************/ +/* Constant Macros */ +/*****************************************************************************/ + +/** +****************************************************************************** + * @brief maximum range of cabac_init_idc (0-2) +****************************************************************************** + */ +#define IH264_NUM_CABAC_INIT_IDC_PLUS_ONE 4 + +/** +****************************************************************************** + * @brief max range of qps in H264 (0-51) +****************************************************************************** + */ +#define IH264_MAX_QP 52 + +/** +****************************************************************************** + * @brief max range of cabac contexts in H264 (0-459) +****************************************************************************** + */ +#define IH264_NUM_CABAC_CTXTS 460 + +/*****************************************************************************/ +/* Extern global declarations */ +/*****************************************************************************/ + +/** + ****************************************************************************** + * @breif Table for rangeTabLPS depending on pStateIdx and qCodIRangeIdx + * input : pStateIdx(0-63) and qCodIRangeIdx(0-3) [(Range >> 6) & 0x3] + * output : RLps + * + * @remarks See Table 9-35 of H264 spec for rangeTabLPS + ******************************************************************************* + */ +extern const UWORD8 gau1_ih264_cabac_rlps[64][4]; + + +/** + ****************************************************************************** + * @breif probability+MPS state transition tables based on cur State and bin + * input : curpState[bits7-2] | curMPS[bit1] | decodedBin[bit0] + * output : nextpState[bits6-1] | nextMPS[bit0] + * @remarks Modified form of Table-9-36 State Transition table in H264 spec + ****************************************************************************** + */ +extern const UWORD8 gau1_ih264_next_state[128*2]; + + +/** + ****************************************************************************** + * @brief Init context tables for all combinations of qp and cabac_init_idc + * @remarks Packing format MPS in lsb and pState in bits[1-6] + ****************************************************************************** + */ +extern const UWORD8 gau1_ih264_cab_ctxts[IH264_NUM_CABAC_INIT_IDC_PLUS_ONE][IH264_MAX_QP][IH264_NUM_CABAC_CTXTS]; + + +#endif /* IH264_CABAC_TABLES_H_ */ diff --git a/common/ih264_cavlc_tables.c b/common/ih264_cavlc_tables.c new file mode 100755 index 0000000..f122ab9 --- /dev/null +++ b/common/ih264_cavlc_tables.c @@ -0,0 +1,282 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + + +/** +****************************************************************************** +* @file +* ih264_cavlc_tables.c +* +* @brief +* This file contains H264 cavlc tables for encoding coeff_tokens, levels, total +* zeros and runs before zeros +* +* @author +* Ittiam +* +* @par List of Tables +* - gu1_code_coeff_token_table +* - gu1_size_coeff_token_table +* - gu1_code_coeff_token_table_chroma +* - gu1_size_coeff_token_table_chroma +* - gu1_threshold_vlc_level +* - gu1_size_zero_table +* - gu1_code_zero_table +* - gu1_size_zero_table_chroma +* - gu1_code_zero_table_chroma +* - gu1_index_zero_table +* - gu1_size_run_table +* - gu1_code_run_table +* - gu4_codeword_level_tables +* - gu1_codesize_level_tables +* +* @remarks +* none +* +****************************************************************************** +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* User include files */ +#include "ih264_typedefs.h" +#include "ih264_cavlc_tables.h" + + +/*****************************************************************************/ +/* Extern global definitions */ +/*****************************************************************************/ + +/** + ****************************************************************************** + * @brief Assignment of cbp to a codenum for intra and inter prediction modes + * chroma format idc != 0 + * input : cbp, intra - 0/inter - 1 + * output : codenum + * @remarks Table 9-4 – Assignment of codeNum to values of coded_block_pattern + * for macroblock prediction modes in H264 spec + ****************************************************************************** + */ +const UWORD8 gu1_cbp_map_tables[48][2]= +{ + { 3, 0}, {29, 2}, {30, 3}, {17, 7}, {31, 4}, {18, 8}, {37, 17}, { 8, 13}, + {32, 5}, {38, 18}, {19, 9}, { 9, 14}, {20, 10}, {10, 15}, {11, 16}, { 2, 11}, + {16, 1}, {33, 32}, {34, 33}, {21, 36}, {35, 34}, {22, 37}, {39, 44}, { 4, 40}, + {36, 35}, {40, 45}, {23, 38}, { 5, 41}, {24, 39}, { 6, 42}, { 7, 43}, { 1, 19}, + {41, 6}, {42, 24}, {43, 25}, {25, 20}, {44, 26}, {26, 21}, {46, 46}, {12, 28}, + {45, 27}, {47, 47}, {27, 22}, {13, 29}, {28, 23}, {14, 30}, {15, 31}, { 0, 12}, +}; + + +/** + ****************************************************************************** + * @brief total non-zero coefficients and numbers of trailing ones of a residual + * block are mapped to coeff_token using the tables given below. + * input : VLC-Num | Trailing ones | Total coeffs + * output : coeff_token (code word, size of the code word) + * @remarks Table-9-5 coeff_token mapping to TotalCoeff( coeff_token ) + * and TrailingOnes( coeff_token ) in H264 spec + ****************************************************************************** + */ +const UWORD8 gu1_code_coeff_token_table[3][4][16] = +{ + { + { 5, 7, 7, 7, 7, 15, 11, 8, 15, 11, 15, 11, 15, 11, 7, 4, }, + { 1, 4, 6, 6, 6, 6, 14, 10, 14, 10, 14, 10, 1, 14, 10, 6, }, + { 0, 1, 5, 5, 5, 5, 5, 13, 9, 13, 9, 13, 9, 13, 9, 5, }, + { 0, 0, 3, 3, 4, 4, 4, 4, 4, 12, 12, 8, 12, 8, 12, 8, }, + }, + { + {11, 7, 7, 7, 4, 7, 15, 11, 15, 11, 8, 15, 11, 7, 9, 7, }, + { 2, 7, 10, 6, 6, 6, 6, 14, 10, 14, 10, 14, 10, 11, 8, 6, }, + { 0, 3, 9, 5, 5, 5, 5, 13, 9, 13, 9, 13, 9, 6, 10, 5, }, + { 0, 0, 5, 4, 6, 8, 4, 4, 4, 12, 8, 12, 12, 8, 1, 4, }, + }, + { + {15, 11, 8, 15, 11, 9, 8, 15, 11, 15, 11, 8, 13, 9, 5, 1, }, + {14, 15, 12, 10, 8, 14, 10, 14, 14, 10, 14, 10, 7, 12, 8, 4, }, + { 0, 13, 14, 11, 9, 13, 9, 13, 10, 13, 9, 13, 9, 11, 7, 3, }, + { 0, 0, 12, 11, 10, 9, 8, 13, 12, 12, 12, 8, 12, 10, 6, 2, }, + }, +}; + +const UWORD8 gu1_size_coeff_token_table[3][4][16] = +{ + { + { 6, 8, 9, 10, 11, 13, 13, 13, 14, 14, 15, 15, 16, 16, 16, 16, }, + { 2, 6, 8, 9, 10, 11, 13, 13, 14, 14, 15, 15, 15, 16, 16, 16, }, + { 0, 3, 7, 8, 9, 10, 11, 13, 13, 14, 14, 15, 15, 16, 16, 16, }, + { 0, 0, 5, 6, 7, 8, 9, 10, 11, 13, 14, 14, 15, 15, 16, 16, }, + }, + { + { 6, 6, 7, 8, 8, 9, 11, 11, 12, 12, 12, 13, 13, 13, 14, 14, }, + { 2, 5, 6, 6, 7, 8, 9, 11, 11, 12, 12, 13, 13, 14, 14, 14, }, + { 0, 3, 6, 6, 7, 8, 9, 11, 11, 12, 12, 13, 13, 13, 14, 14, }, + { 0, 0, 4, 4, 5, 6, 6, 7, 9, 11, 11, 12, 13, 13, 13, 14, }, + }, + { + { 6, 6, 6, 7, 7, 7, 7, 8, 8, 9, 9, 9, 10, 10, 10, 10, }, + { 4, 5, 5, 5, 5, 6, 6, 7, 8, 8, 9, 9, 9, 10, 10, 10, }, + { 0, 4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 10, }, + { 0, 0, 4, 4, 4, 4, 4, 5, 6, 7, 8, 8, 9, 10, 10, 10, }, + }, +}; +const UWORD8 gu1_code_coeff_token_table_chroma[4][4] = +{ + { 7, 4, 3, 2, }, + { 1, 6, 3, 3, }, + { 0, 1, 2, 2, }, + { 0, 0, 5, 0, }, +}; + +const UWORD8 gu1_size_coeff_token_table_chroma[4][4] = +{ + { 6, 6, 6, 6, }, + { 1, 6, 7, 8, }, + { 0, 3, 7, 8, }, + { 0, 0, 6, 7, }, +}; + +/** + ****************************************************************************** + * @brief After encoding the current Level, to encode the next level, the choice + * of VLC table needs to be updated. The update is carried basing on a set of thresholds. + * These thresholds are listed in the table below for lookup. + * input : suffix_length + * output : threshold + ****************************************************************************** + */ +const UWORD8 gu1_threshold_vlc_level[6] = +{ + 0, 3, 6, 12, 24, 48 +}; + + +/** + ****************************************************************************** + * @brief table for encoding total number of zeros + * input : coeff_token, total zeros + * output : code word, size of the code word + * @remarks Table-9-7, 9-8 total_zeros tables for 4x4 blocks with + * TotalCoeff( coeff_token ) in H264 spec + ****************************************************************************** + */ +const UWORD8 gu1_size_zero_table[135] = +{ + 1, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 9, + 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 6, 6, 6, 6, + 4, 3, 3, 3, 4, 4, 3, 3, 4, 5, 5, 6, 5, 6, + 5, 3, 4, 4, 3, 3, 3, 4, 3, 4, 5, 5, 5, + 4, 4, 4, 3, 3, 3, 3, 3, 4, 5, 4, 5, + 6, 5, 3, 3, 3, 3, 3, 3, 4, 3, 6, + 6, 5, 3, 3, 3, 2, 3, 4, 3, 6, + 6, 4, 5, 3, 2, 2, 3, 3, 6, + 6, 6, 4, 2, 2, 3, 2, 5, + 5, 5, 3, 2, 2, 2, 4, + 4, 4, 3, 3, 1, 3, + 4, 4, 2, 1, 3, + 3, 3, 1, 2, + 2, 2, 1, + 1, 1, +}; +const UWORD8 gu1_code_zero_table[135] = +{ + 1, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 1, + 7, 6, 5, 4, 3, 5, 4, 3, 2, 3, 2, 3, 2, 1, 0, + 5, 7, 6, 5, 4, 3, 4, 3, 2, 3, 2, 1, 1, 0, + 3, 7, 5, 4, 6, 5, 4, 3, 3, 2, 2, 1, 0, + 5, 4, 3, 7, 6, 5, 4, 3, 2, 1, 1, 0, + 1, 1, 7, 6, 5, 4, 3, 2, 1, 1, 0, + 1, 1, 5, 4, 3, 3, 2, 1, 1, 0, + 1, 1, 1, 3, 3, 2, 2, 1, 0, + 1, 0, 1, 3, 2, 1, 1, 1, + 1, 0, 1, 3, 2, 1, 1, + 0, 1, 1, 2, 1, 3, + 0, 1, 1, 1, 1, + 0, 1, 1, 1, + 0, 1, 1, + 0, 1, +}; +const UWORD8 gu1_size_zero_table_chroma[9] = +{ + 1, 2, 3, 3, + 1, 2, 2, + 1, 1, +}; +const UWORD8 gu1_code_zero_table_chroma[9] = +{ + 1, 1, 1, 0, + 1, 1, 0, + 1, 0, +}; + +/** + ****************************************************************************** + * @brief index to access zero table (look up) + * input : TotalCoeff( coeff_token ) + * output : index to access zero table + ****************************************************************************** + */ +const UWORD8 gu1_index_zero_table[15] = +{ + 0, 16, 31, 45, 58, 70, 81, 91, 100, 108, 115, 121, 126, 130, 133, +}; + +/** + ****************************************************************************** + * @brief table for encoding runs of zeros before + * input : zeros left, runs of zeros before + * output : code word, size of the code word + * @remarks Table-9-10 table for run_before in H264 spec + ****************************************************************************** + */ +const UWORD8 gu1_size_run_table[42] = +{ + 1, 1, + 1, 2, 2, + 2, 2, 2, 2, + 2, 2, 2, 3, 3, + 2, 2, 3, 3, 3, 3, + 2, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, +}; +const UWORD8 gu1_code_run_table[42] = +{ + 1, 0, + 1, 1, 0, + 3, 2, 1, 0, + 3, 2, 1, 1, 0, + 3, 2, 3, 2, 1, 0, + 3, 0, 1, 3, 2, 5, 4, + 7, 6, 5, 4, 3, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; +/** + ****************************************************************************** + * @brief index to access zero table (look up) + * input : TotalCoeff( coeff_token ) + * output : index to access zero table + ****************************************************************************** + */ +const UWORD8 gu1_index_run_table[7] = +{ + 0, 2, 5, 9, 14, 20, 27, +}; diff --git a/common/ih264_cavlc_tables.h b/common/ih264_cavlc_tables.h new file mode 100755 index 0000000..78057b5 --- /dev/null +++ b/common/ih264_cavlc_tables.h @@ -0,0 +1,133 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +****************************************************************************** +* @file ih264_cavlc_tables.h +* +* @brief +* This file contains enumerations, macros and extern declarations of H264 +* cavlc tables +* +* @author +* Ittiam +* +* @remarks +* none +****************************************************************************** +*/ + +#ifndef IH264_CAVLC_TABLES_H_ +#define IH264_CAVLC_TABLES_H_ + +/*****************************************************************************/ +/* Constant Macros */ +/*****************************************************************************/ +/** +****************************************************************************** + * @brief maximum zeros left +****************************************************************************** + */ +#define MAX_ZERO_LEFT 6 + +/*****************************************************************************/ +/* Extern global declarations */ +/*****************************************************************************/ + +/** + ****************************************************************************** + * @brief Assignment of cbp to a codenum for intra and inter prediction modes + * chroma format idc != 0 + * input : cbp, intra - 0/inter - 1 + * output : codenum + * @remarks Table 9-4 – Assignment of codeNum to values of coded_block_pattern + * for macroblock prediction modes in H264 spec + ****************************************************************************** + */ +extern const UWORD8 gu1_cbp_map_tables[48][2]; + +/** + ****************************************************************************** + * @brief total non-zero coefficients and numbers of trailing ones of a residual + * block are mapped to coefftoken using the tables given below. + * input : VLC-Num | Trailing ones | Total coeffs + * output : coeff_token (code word, size of the code word) + * @remarks Table-9-5 coeff_token mapping to TotalCoeff( coeff_token ) + * and TrailingOnes( coeff_token ) in H264 spec + ****************************************************************************** + */ +extern const UWORD8 gu1_code_coeff_token_table[3][4][16]; +extern const UWORD8 gu1_size_coeff_token_table[3][4][16]; +extern const UWORD8 gu1_code_coeff_token_table_chroma[4][4]; +extern const UWORD8 gu1_size_coeff_token_table_chroma[4][4]; + +/** + ****************************************************************************** + * @brief Thresholds for determining whether to increment Level table number. + * input : suffix_length + * output : threshold + ****************************************************************************** + */ +extern const UWORD8 gu1_threshold_vlc_level[6]; + +/** + ****************************************************************************** + * @brief table for encoding total number of zeros + * input : coeff_token, total zeros + * output : code word, size of the code word + * @remarks Table-9-7, 9-8 total_zeros tables for 4x4 blocks with + * TotalCoeff( coeff_token ) in H264 spec + ****************************************************************************** + */ +extern const UWORD8 gu1_size_zero_table[135]; +extern const UWORD8 gu1_code_zero_table[135]; +extern const UWORD8 gu1_size_zero_table_chroma[9]; +extern const UWORD8 gu1_code_zero_table_chroma[9]; + +/** + ****************************************************************************** + * @brief index to access zero table (for speed) + * input : TotalCoeff( coeff_token ) + * output : index to access zero table + ****************************************************************************** + */ +extern const UWORD8 gu1_index_zero_table[15]; + +/** + ****************************************************************************** + * @brief table for encoding runs of zeros before + * input : zeros left, runs of zeros before + * output : code word, size of the code word + * @remarks Table-9-10 table for run_before in H264 spec + ****************************************************************************** + */ +extern const UWORD8 gu1_size_run_table[42]; +extern const UWORD8 gu1_code_run_table[42]; + +/** + ****************************************************************************** + * @brief index to access run table (look up) + * input : zeros left + * output : index to access run table + ****************************************************************************** + */ +extern const UWORD8 gu1_index_run_table[7]; + +#endif /* IH264_CAVLC_TABLES_H_ */ diff --git a/common/ih264_chroma_intra_pred_filters.c b/common/ih264_chroma_intra_pred_filters.c new file mode 100755 index 0000000..ee145e5 --- /dev/null +++ b/common/ih264_chroma_intra_pred_filters.c @@ -0,0 +1,478 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ih264_chroma_intra_pred_filters.c +* +* @brief +* Contains function definitions for chroma intra prediction filters +* +* @author +* Ittiam +* +* @par List of Functions: +* -ih264_intra_pred_chroma_8x8_mode_dc +* -ih264_intra_pred_chroma_8x8_mode_horz +* -ih264_intra_pred_chroma_8x8_mode_vert +* -ih264_intra_pred_chroma_8x8_mode_plane +* +* @remarks +* None +* +******************************************************************************* +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include +#include +#include + +/* User include files */ +#include "ih264_defs.h" +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264_intra_pred_filters.h" + +/* Global variables used only in assembly files*/ +const WORD8 ih264_gai1_intrapred_chroma_plane_coeffs1[] = +{ 0x01,0x00,0x01,0x00, + 0x02,0x00,0x02,0x00, + 0x03,0x00,0x03,0x00, + 0x04,0x00,0x04,0x00 +}; + const WORD8 ih264_gai1_intrapred_chroma_plane_coeffs2[] = + { 0xfd,0xff,0xfe,0xff, + 0xff,0xff,0x00,0x00, + 0x01,0x00,0x02,0x00, + 0x03,0x00,0x04,0x00, + }; + +/*****************************************************************************/ +/* Chroma Intra prediction 8x8 filters */ +/*****************************************************************************/ + +/** +******************************************************************************* +* +* ih264_intra_pred_chroma_8x8_mode_dc +* +* @brief +* Perform Intra prediction for chroma_8x8 mode:DC +* +* @par Description: +* Perform Intra prediction for chroma_8x8 mode:DC ,described in sec 8.3.4.1 +* +* @param[in] pu1_src +* UWORD8 pointer to the source containing alternate U and V samples +* +* @param[out] pu1_dst +* UWORD8 pointer to the destination with alternate U and V samples +* +* @param[in] src_strd +* integer source stride +* +* @param[in] dst_strd +* integer destination stride +* +** @param[in] ngbr_avail +* availability of neighbouring pixels +* +* @returns +* +* @remarks +* None +* +****************************************************************************** +*/ +void ih264_intra_pred_chroma_8x8_mode_dc(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + WORD32 left_avail, left_avail1, left_avail2; /* availability of left predictors (only for DC) */ + WORD32 top_avail; /* availability of top predictors (only for DC) */ + UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */ + UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */ + UNUSED(src_strd); + + /* temporary variables to store accumulated first left half,second left half, + * first top half,second top half of U and V values*/ + WORD32 val_u_l1 = 0, val_u_l2 = 0, val_u_t1 = 0, val_u_t2 = 0; + WORD32 val_v_l1 = 0, val_v_l2 = 0, val_v_t1 = 0, val_v_t2 = 0; + + WORD32 val_u1 = 0, val_u2 = 0, val_v1 = 0, val_v2 = 0; + + WORD32 col, row; /*loop variables*/ + + left_avail = ngbr_avail & 0x11; + left_avail1 = ngbr_avail & 1; + left_avail2 = (ngbr_avail >> 4) & 1; + top_avail = (ngbr_avail >> 2) & 1; + + pu1_top = pu1_src + 2 * BLK8x8SIZE + 2; + pu1_left = pu1_src + 2 * BLK8x8SIZE - 2; + + if(left_avail1) + { /* First 4x4 block*/ + val_u_l1 += *pu1_left; + val_v_l1 += *(pu1_left + 1); + pu1_left -= 2; + val_u_l1 += *pu1_left; + val_v_l1 += *(pu1_left + 1); + pu1_left -= 2; + val_u_l1 += *pu1_left; + val_v_l1 += *(pu1_left + 1); + pu1_left -= 2; + val_u_l1 += *pu1_left + 2; + val_v_l1 += *(pu1_left + 1) + 2; + pu1_left -= 2; + } + else + pu1_left -= 2 * 4; + + if(left_avail2) + { + /* Second 4x4 block*/ + val_u_l2 += *pu1_left; + val_v_l2 += *(pu1_left + 1); + pu1_left -= 2; + val_u_l2 += *pu1_left; + val_v_l2 += *(pu1_left + 1); + pu1_left -= 2; + val_u_l2 += *pu1_left; + val_v_l2 += *(pu1_left + 1); + pu1_left -= 2; + val_u_l2 += *pu1_left + 2; + val_v_l2 += *(pu1_left + 1) + 2; + pu1_left -= 2; + } + else + pu1_left -= 2 * 4; + + if(top_avail) + { + val_u_t1 += *pu1_top + *(pu1_top + 2) + *(pu1_top + 4) + + *(pu1_top + 6) + 2; + val_u_t2 += *(pu1_top + 8) + *(pu1_top + 10) + *(pu1_top + 12) + + *(pu1_top + 14) + 2; + val_v_t1 += *(pu1_top + 1) + *(pu1_top + 3) + *(pu1_top + 5) + + *(pu1_top + 7) + 2; + val_v_t2 += *(pu1_top + 9) + *(pu1_top + 11) + *(pu1_top + 13) + + *(pu1_top + 15) + 2; + } + + if(left_avail + top_avail) + { + val_u1 = (left_avail1 + top_avail) ? + ((val_u_l1 + val_u_t1) + >> (1 + left_avail1 + top_avail)) :128; + val_v1 = (left_avail1 + top_avail) ? + ((val_v_l1 + val_v_t1) + >> (1 + left_avail1 + top_avail)) :128; + if(top_avail) + { + val_u2 = val_u_t2 >> 2; + val_v2 = val_v_t2 >> 2; + } + else if(left_avail1) + { + val_u2 = val_u_l1 >> 2; + val_v2 = val_v_l1 >> 2; + } + else + { + val_u2 = val_v2 = 128; + } + + for(row = 0; row < 4; row++) + { + /*top left 4x4 block*/ + for(col = 0; col < 8; col += 2) + { + *(pu1_dst + row * dst_strd + col) = val_u1; + *(pu1_dst + row * dst_strd + col + 1) = val_v1; + } + /*top right 4x4 block*/ + for(col = 8; col < 16; col += 2) + { + *(pu1_dst + row * dst_strd + col) = val_u2; + *(pu1_dst + row * dst_strd + col + 1) = val_v2; + } + } + + if(left_avail2) + { + val_u1 = val_u_l2 >> 2; + val_v1 = val_v_l2 >> 2; + } + else if(top_avail) + { + val_u1 = val_u_t1 >> 2; + val_v1 = val_v_t1 >> 2; + } + else + { + val_u1 = val_v1 = 128; + } + val_u2 = (left_avail2 + top_avail) ? + ((val_u_l2 + val_u_t2) + >> (1 + left_avail2 + top_avail)) : 128; + val_v2 = (left_avail2 + top_avail) ? + ((val_v_l2 + val_v_t2) + >> (1 + left_avail2 + top_avail)) : 128; + + for(row = 4; row < 8; row++) + { /*bottom left 4x4 block*/ + for(col = 0; col < 8; col += 2) + { + *(pu1_dst + row * dst_strd + col) = val_u1; + *(pu1_dst + row * dst_strd + col + 1) = val_v1; + } + /*bottom right 4x4 block*/ + for(col = 8; col < 16; col += 2) + { + *(pu1_dst + row * dst_strd + col) = val_u2; + *(pu1_dst + row * dst_strd + col + 1) = val_v2; + } + } + } + else + { + /* Both left and top are unavailable, set the block to 128 */ + for(row = 0; row < 8; row++) + { + memset(pu1_dst + row * dst_strd, 128, 8 * sizeof(UWORD16)); + } + } +} + +/** +******************************************************************************* +* +*ih264_intra_pred_chroma_8x8_mode_horz +* +* @brief +* Perform Intra prediction for chroma_8x8 mode:Horizontal +* +* @par Description: +* Perform Intra prediction for chroma_8x8 mode:Horizontal ,described in sec 8.3.4.2 +* +* @param[in] pu1_src +* UWORD8 pointer to the source containing alternate U and V samples +* +* @param[out] pu1_dst +* UWORD8 pointer to the destination with alternate U and V samples +* +* @param[in] src_strd +* integer source stride +* +* @param[in] dst_strd +* integer destination stride +* +* @param[in] ngbr_avail +* availability of neighbouring pixels(Not used in this function) +* +* @returns +* +* @remarks +* None +* +****************************************************************************** +*/ +void ih264_intra_pred_chroma_8x8_mode_horz(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + + UWORD8 *pu1_left = NULL; /* Pointer to start of top predictors */ + WORD32 rows, cols; /* loop variables*/ + UNUSED(src_strd); + UNUSED(ngbr_avail); + pu1_left = pu1_src + 2 * BLK8x8SIZE - 2; + for(rows = 0; rows < 8; rows++) + { + for(cols = 0; cols < 16; cols += 2) + { + *(pu1_dst + rows * dst_strd + cols) = *pu1_left; + + *(pu1_dst + rows * dst_strd + cols + 1) = *(pu1_left + 1); + } + pu1_left -= 2; + } + +} + +/** +******************************************************************************* +* +*ih264_intra_pred_chroma_8x8_mode_vert +* +* @brief +* Perform Intra prediction for chroma_8x8 mode:vertical +* +* @par Description: +* Perform Intra prediction for chroma_8x8 mode:vertical ,described in sec 8.3.4.3 +* +* @param[in] pu1_src +* UWORD8 pointer to the source containing alternate U and V samples +* +* @param[out] pu1_dst +* UWORD8 pointer to the destination with alternate U and V samples +* +* @param[in] src_strd +* integer source stride +* +* @param[in] dst_strd +* integer destination stride +* +* @param[in] ngbr_avail +* availability of neighbouring pixels(Not used in this function) +* +* @returns +* +* @remarks +* None +* +******************************************************************************* +*/ +void ih264_intra_pred_chroma_8x8_mode_vert(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + + UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */ + WORD32 row;/*loop variable*/ + UNUSED(src_strd); + UNUSED(ngbr_avail); + pu1_top = pu1_src + 2 * BLK8x8SIZE + 2; + + /* 8 bytes are copied from src to dst */ + for(row = 0; row < 2; row++) + { + memcpy(pu1_dst, pu1_top, 16); + + pu1_dst += dst_strd; + memcpy(pu1_dst, pu1_top, 16); + + pu1_dst += dst_strd; + memcpy(pu1_dst, pu1_top, 16); + + pu1_dst += dst_strd; + memcpy(pu1_dst, pu1_top, 16); + + pu1_dst += dst_strd; + } +} + +/** +******************************************************************************* +* +* ih264_intra_pred_chroma_8x8_mode_plane +* +* @brief +* Perform Intra prediction for chroma_8x8 mode:PLANE +* +* @par Description: +* Perform Intra prediction for chroma_8x8 mode:PLANE ,described in sec 8.3.4.4 +* +* @param[in] pu1_src +* UWORD8 pointer to the source containing alternate U and V samples +* +* @param[out] pu1_dst +* UWORD8 pointer to the destination with alternate U and V samples +* +* @param[in] src_strd +* integer source stride +* +* @param[in] dst_strd +* integer destination stride +* +* @param[in] ngbr_avail +* availability of neighbouring pixels(Not used in this function) +* +* @returns +* +* @remarks +* None +* +****************************************************************************** +*/ +void ih264_intra_pred_chroma_8x8_mode_plane(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + + UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */ + UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */ + WORD32 val = 0; + WORD32 rows, cols; /* loop variables*/ + WORD32 a_u, b_u, c_u, h_u, v_u; /* Implementing section 8.3.4.4 . The variables represent the corresponding variables in the section*/ + WORD32 a_v, b_v, c_v, h_v, v_v; + UNUSED(src_strd); + UNUSED(ngbr_avail); + a_u = b_u = c_u = h_u = v_u = 0; + a_v = b_v = c_v = h_v = v_v = 0; + /* As chroma format 4:2:0 is used,xCF = 4 * ( chroma_format_idc = = 3 ) = 0 and + yCF = 4 * ( chroma_format_idc != 1 ) = 0 */ + pu1_top = pu1_src + 2 * BLK8x8SIZE + 2; + pu1_left = pu1_src + 2 * BLK8x8SIZE - 2; + /* Implementing section 8.3.4.4 */ + for(cols = 0; cols < 4; cols++) + { + h_u += (cols + 1) * (pu1_top[8 + 2 * cols] - pu1_top[4 - 2 * cols]);/*section 8.3.4.4 equation (8-144)*/ + h_v += (cols + 1) * (pu1_top[8 + 2 * cols + 1] - pu1_top[4 - 2 * cols+ 1]); + + v_u += (cols + 1) * (pu1_left[(4 + cols) * (-2)] - pu1_left[(2 - cols) * (-2)]); + v_v += (cols + 1) * (pu1_left[(4 + cols) * (-2) + 1] - pu1_left[(2 - cols) * (-2) + 1]);/*section 8.3.4.4 equation (8-145)*/ + } + a_u = 16 * (pu1_left[7 * (-2)] + pu1_top[14]); + a_v = 16 * (pu1_left[7 * (-2) + 1] + pu1_top[15]);/*section 8.3.3.4 equation (8-141)*/ + b_u = (34 * h_u + 32) >> 6;/*section 8.3.3.4 equation (8-142)*/ + b_v = (34 * h_v + 32) >> 6;/*section 8.3.3.4 equation (8-142)*/ + c_u = (34 * v_u + 32) >> 6;/*section 8.3.3.4 equation (8-143)*/ + c_v = (34 * v_v + 32) >> 6;/*section 8.3.3.4 equation (8-143)*/ + + for(rows = 0; rows < 8; rows++) + { + for(cols = 0; cols < 8; cols++) + { + val = (a_u + b_u * (cols - 3) + c_u * (rows - 3) );/*section 8.3.4.4 equation (8-140)*/ + val = (val + 16) >> 5; + *(pu1_dst + rows * dst_strd + 2 * cols) = CLIP_U8(val); + val = (a_v + b_v * (cols - 3) + c_v * (rows - 3) );/*section 8.3.4.4 equation (8-140)*/ + val = (val + 16) >> 5; + *(pu1_dst + rows * dst_strd + 2 * cols + 1) = CLIP_U8(val); + } + } +} + diff --git a/common/ih264_common_tables.c b/common/ih264_common_tables.c new file mode 100755 index 0000000..c53c276 --- /dev/null +++ b/common/ih264_common_tables.c @@ -0,0 +1,725 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ih264_common_tables.c +* +* @brief +* Contains common global tables +* +* @author +* Harish M +* +* @par List of Functions: +* +* @remarks +* None +* +******************************************************************************* +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* User include files */ +#include "ih264_typedefs.h" +#include "ih264_defs.h" +#include "ih264_macros.h" +#include "ih264_structs.h" +#include "ih264_common_tables.h" + + +/*****************************************************************************/ +/* Extern global definitions */ +/*****************************************************************************/ + +/** + ****************************************************************************** + * @brief while encoding, basing on the input configuration parameters, the + * the level of the bitstream is computed basing on the table below. + * input : table_idx + * output : level_idc or cpb size + * @remarks Table A-1 – level table limits + ****************************************************************************** + */ +const level_tables_t gas_ih264_lvl_tbl[16] = +{ + { IH264_LEVEL_10, 1485, 99, 297, 64, 175, 64 }, + { IH264_LEVEL_11, 1485, 99, 297, 128, 350, 64 }, + { IH264_LEVEL_1B, 3000, 396, 675, 192, 500, 128 }, + { IH264_LEVEL_12, 6000, 396, 1782, 384, 1000, 128 }, + { IH264_LEVEL_13, 11880, 396, 1782, 768, 2000, 128 }, + { IH264_LEVEL_20, 11880, 396, 1782, 2000, 2000, 128 }, + { IH264_LEVEL_21, 19800, 792, 3564, 4000, 4000, 256 }, + { IH264_LEVEL_22, 20250, 1620, 6075, 4000, 4000, 256 }, + { IH264_LEVEL_30, 40500, 1620, 6075, 10000, 10000, 256 }, + { IH264_LEVEL_31, 108000, 3600, 13500, 14000, 14000, 512 }, + { IH264_LEVEL_32, 216000, 5120, 15360, 20000, 20000, 512 }, + { IH264_LEVEL_40, 245760, 8192, 24576, 20000, 25000, 512 }, + { IH264_LEVEL_41, 245760, 8192, 24576, 50000, 62500, 512 }, + { IH264_LEVEL_42, 522240, 8704, 26112, 50000, 62500, 512 }, + { IH264_LEVEL_50, 589824, 22080, 82800, 135000, 135000, 512 }, + { IH264_LEVEL_51, 983040, 36864, 138240, 240000, 240000, 512 }, +}; + + +/** + * Array containing supported levels + */ +const WORD32 gai4_ih264_levels[] = +{ + IH264_LEVEL_10, + IH264_LEVEL_11, + IH264_LEVEL_12, + IH264_LEVEL_13, + IH264_LEVEL_20, + IH264_LEVEL_21, + IH264_LEVEL_22, + IH264_LEVEL_30, + IH264_LEVEL_31, + IH264_LEVEL_32, + IH264_LEVEL_40, + IH264_LEVEL_41, + IH264_LEVEL_42, + IH264_LEVEL_50, + IH264_LEVEL_51, +}; + + +/** + * Array giving size of max luma samples in a picture for a given level + */ +const WORD32 gai4_ih264_max_luma_pic_size[] = +{ + /* Level 1 */ + 25344, + /* Level 1.1 */ + 101376, + /* Level 1.2 */ + 101376, + /* Level 1.3 */ + 101376, + /* Level 2 */ + 101376, + /* Level 2.1 */ + 202752, + /* Level 2.2 */ + 414720, + /* Level 3 */ + 414720, + /* Level 3.1 */ + 921600, + /* Level 3.1 */ + 1310720, + /* Level 4 */ + 2097152, + /* Level 4.1 */ + 2097152, + /* Level 4.2 */ + 2228224, + /* Level 5 */ + 5652480, + /* Level 5.1 */ + 9437184 +}; + + +/** Max width and height allowed for a given level */ +/** This is derived as SQRT(8 * gai4_ih264_max_luma_pic_size[]) */ +const WORD32 gai4_ih264_max_wd_ht[] = +{ + /* Level 1 */ + 451, + /* Level 1.1 */ + 901, + /* Level 1.2 */ + 901, + /* Level 1.3 */ + 901, + /* Level 2 */ + 901, + /* Level 2.1 */ + 1274, + /* Level 2.2 */ + 1822, + /* Level 3 */ + 1822, + /* Level 3.1 */ + 2716, + /* Level 3.2 */ + 3239, + /* Level 4 */ + 4096, + /* Level 4.1 */ + 4096, + /* Level 4.2 */ + 4223, + /* Level 5 */ + 6725, + /* Level 5.1 */ + 8689 +}; + +/** Min width and height allowed for a given level */ +/** This is derived as gai4_ih264_max_luma_pic_size[]/gai4_ih264_max_wd_ht[] */ +const WORD32 gai4_ih264_min_wd_ht[] = +{ + /* Level 1 */ + 57, + /* Level 1.1 */ + 113, + /* Level 1.2 */ + 113, + /* Level 1.3 */ + 113, + /* Level 2 */ + 113, + /* Level 2.1 */ + 160, + /* Level 2.2 */ + 228, + /* Level 3 */ + 228, + /* Level 3.1 */ + 340, + /* Level 3.2 */ + 405, + /* Level 4 */ + 512, + /* Level 4.1 */ + 512, + /* Level 4.2 */ + 528, + /* Level 5 */ + 841, + /* Level 5.1 */ + 1087 + +}; + + +/** Table 7-11 Macroblock types for I slices */ +intra_mbtype_info_t gas_ih264_i_mbtype_info[] = +{ + /* For first entry, if transform_size_8x8_flag is 1, mode will be MBPART_I8x8 */ + /* This has to be taken care while accessing the table */ + {0, MBPART_I4x4, VERT_I16x16, 0, 0}, + {0, MBPART_I16x16, VERT_I16x16, 0, 0}, + {0, MBPART_I16x16, HORZ_I16x16, 0, 0}, + {0, MBPART_I16x16, DC_I16x16, 0, 0}, + {0, MBPART_I16x16, PLANE_I16x16, 0, 0}, + {0, MBPART_I16x16, VERT_I16x16, 1, 0}, + {0, MBPART_I16x16, HORZ_I16x16, 1, 0}, + {0, MBPART_I16x16, DC_I16x16, 1, 0}, + {0, MBPART_I16x16, PLANE_I16x16, 1, 0}, + {0, MBPART_I16x16, VERT_I16x16, 2, 0}, + {0, MBPART_I16x16, HORZ_I16x16, 2, 0}, + {0, MBPART_I16x16, DC_I16x16, 2, 0}, + {0, MBPART_I16x16, PLANE_I16x16, 2, 0}, + {0, MBPART_I16x16, VERT_I16x16, 0, 15}, + {0, MBPART_I16x16, HORZ_I16x16, 0, 15}, + {0, MBPART_I16x16, DC_I16x16, 0, 15}, + {0, MBPART_I16x16, PLANE_I16x16, 0, 15}, + {0, MBPART_I16x16, VERT_I16x16, 1, 15}, + {0, MBPART_I16x16, HORZ_I16x16, 1, 15}, + {0, MBPART_I16x16, DC_I16x16, 1, 15}, + {0, MBPART_I16x16, PLANE_I16x16, 1, 15}, + {0, MBPART_I16x16, VERT_I16x16, 2, 15}, + {0, MBPART_I16x16, HORZ_I16x16, 2, 15}, + {0, MBPART_I16x16, DC_I16x16, 2, 15}, + {0, MBPART_I16x16, PLANE_I16x16, 2, 15}, + {0, MBPART_IPCM, VERT_I16x16, 0, 0} +}; + +/** Table 7-13 Macroblock types for P slices */ +inter_mbtype_info_t gas_ih264_p_mbtype_info[] = +{ + {1, MBPART_L0, MBPART_NA, 16, 16}, + {2, MBPART_L0, MBPART_L0, 16, 8}, + {2, MBPART_L0, MBPART_L0, 8, 16}, + {4, MBPART_NA, MBPART_NA, 8, 8}, + {4, MBPART_NA, MBPART_NA, 8, 8}, +}; + +/** Table 7-14 Macroblock types for B slices */ +inter_mbtype_info_t gas_ih264_b_mbtype_info[] = +{ + {0, MBPART_DIRECT, MBPART_NA, 8, 8, }, + {1, MBPART_L0, MBPART_NA, 16, 16, }, + {1, MBPART_L1, MBPART_NA, 16, 16, }, + {1, MBPART_BI, MBPART_NA, 16, 16, }, + {2, MBPART_L0, MBPART_L0, 16, 8, }, + {2, MBPART_L0, MBPART_L0, 8, 16, }, + {2, MBPART_L1, MBPART_L1, 16, 8, }, + {2, MBPART_L1, MBPART_L1, 8, 16, }, + {2, MBPART_L0, MBPART_L1, 16, 8, }, + {2, MBPART_L0, MBPART_L1, 8, 16, }, + {2, MBPART_L1, MBPART_L0, 16, 8, }, + {2, MBPART_L1, MBPART_L0, 8, 16, }, + {2, MBPART_L0, MBPART_BI, 16, 8, }, + {2, MBPART_L0, MBPART_BI, 8, 16, }, + {2, MBPART_L1, MBPART_BI, 16, 8, }, + {2, MBPART_L1, MBPART_BI, 8, 16, }, + {2, MBPART_BI, MBPART_L0, 16, 8, }, + {2, MBPART_BI, MBPART_L0, 8, 16, }, + {2, MBPART_BI, MBPART_L1, 16, 8, }, + {2, MBPART_BI, MBPART_L1, 8, 16, }, + {2, MBPART_BI, MBPART_BI, 16, 8, }, + {2, MBPART_BI, MBPART_BI, 8, 16, }, + {4, MBPART_NA, MBPART_NA, 8, 8, }, +}; + +/** Table 7-17 – Sub-macroblock types in P macroblocks */ +submbtype_info_t gas_ih264_p_submbtype_info[] = +{ + {1, MBPART_L0, 8, 8}, + {2, MBPART_L0, 8, 4}, + {2, MBPART_L0, 4, 8}, + {4, MBPART_L0, 4, 4}, +}; + +/** Table 7-18 – Sub-macroblock types in B macroblocks */ +submbtype_info_t gas_ih264_b_submbtype_info[] = +{ + {4, MBPART_DIRECT, 4, 4}, + {1, MBPART_L0, 8, 8}, + {1, MBPART_L1, 8, 8}, + {1, MBPART_BI, 8, 8}, + {2, MBPART_L0, 8, 4}, + {2, MBPART_L0, 4, 8}, + {2, MBPART_L1, 8, 4}, + {2, MBPART_L1, 4, 8}, + {2, MBPART_BI, 8, 4}, + {2, MBPART_BI, 4, 8}, + {4, MBPART_L0, 4, 4}, + {4, MBPART_L1, 4, 4}, + {4, MBPART_BI, 4, 4}, +}; + + + + +const UWORD8 gau1_ih264_inv_scan_prog4x4[] = +{ + 0, 1, 4, 8, + 5, 2, 3, 6, + 9, 12, 13, 10, + 7, 11, 14, 15 +}; + +const UWORD8 gau1_ih264_inv_scan_int4x4[] = +{ + 0, 4, 1, 8, + 12, 5, 9, 13, + 2, 6, 10, 14, + 3, 7, 11, 15 +}; + +/** Inverse scan tables for individual 4x4 blocks of 8x8 transform coeffs of CAVLC */ +/* progressive */ +const UWORD8 gau1_ih264_inv_scan_prog8x8_cavlc[64] = +{ + 0, 9, 17, 18, 12, 40, 27, 7, + 35, 57, 29, 30, 58, 38, 53, 47, + 1, 2, 24, 11, 19, 48, 20, 14, + 42, 50, 22, 37, 59, 31, 60, 55, + 8, 3, 32, 4, 26, 41, 13, 21, + 49, 43, 15, 44, 52, 39, 61, 62, + 16, 10, 25, 5, 33, 34, 6, 28, + 56, 36, 23, 51, 45, 46, 54, 63 +}; + +/* interlace */ +const UWORD8 gau1_ih264_inv_scan_int8x8_cavlc[64] = +{ + 0, 9, 2, 56, 18, 26, 34, 27, + 35, 28, 36, 29, 45, 7, 54, 39, + 8, 24, 25, 33, 41, 11, 42, 12, + 43, 13, 44, 14, 53, 15, 62, 47, + 16, 32, 40, 10, 49, 4, 50, 5, + 51, 6, 52, 22, 61, 38, 23, 55, + 1, 17, 48, 3, 57, 19, 58, 20, + 59, 21, 60, 37, 30, 46, 31, 63 +}; + + + +/*Inverse scan tables for individual 8x8 blocks of 8x8 transform coeffs of CABAC */ +/* progressive */ + +const UWORD8 gau1_ih264_inv_scan_prog8x8_cabac[64] = +{ + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, + 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, + 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, + 53, 60, 61, 54, 47, 55, 62, 63 +}; + + +/* interlace */ + +const UWORD8 gau1_ih264_inv_scan_int8x8_cabac[64] = +{ + 0, 8, 16, 1, 9, 24, 32, 17, + 2, 25, 40, 48, 56, 33, 10, 3, + 18, 41, 49, 57, 26, 11, 4, 19, + 34, 42, 50, 58, 27, 12, 5, 20, + 35, 43, 51, 59, 28, 13, 6, 21, + 36, 44, 52, 60, 29, 14, 22, 37, + 45, 53, 61, 30, 7, 15, 38, 46, + 54, 62, 23, 31, 39, 47, 55, 63 +}; + + +const UWORD8 *gpau1_ih264_inv_scan8x8[] = +{ + gau1_ih264_inv_scan_prog8x8_cavlc, + gau1_ih264_inv_scan_int8x8_cavlc, + gau1_ih264_inv_scan_prog8x8_cabac, + gau1_ih264_inv_scan_int8x8_cabac +}; + +const UWORD8 *gpau1_ih264_inv_scan4x4[] = +{ + gau1_ih264_inv_scan_prog4x4, + gau1_ih264_inv_scan_int4x4, +}; + +const UWORD8 gau1_ih264_8x8_subblk_idx[] = +{ + 0, 1, 4, 5, + 2, 3, 6, 7, + 8, 9, 12, 13, + 10, 11, 14, 15 +}; + + +/* Table 8-15 Chroma QP offset table */ +const UWORD8 gau1_ih264_chroma_qp[] = +{ + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 29, 30, + 31, 32, 32, 33, 34, 34, 35, 35, + 36, 36, 37, 37, 37, 38, 38, 38, + 39, 39, 39, 39 +}; + + +/** +****************************************************************************** +* @brief look up table to compute neigbour availability of 4x4 blocks +* input : subblk idx, mb neighbor availability +* output : sub blk neighbor availability +* @remarks +****************************************************************************** +*/ +const UWORD8 gau1_ih264_4x4_ngbr_avbl[16][16] = +{ + { 0x0, 0x1, 0xc, 0x7, 0x1, 0x1, 0xf, 0x7, 0xc, 0xf, 0xc, 0x7, 0xf, 0x7, 0xf, 0x7 }, + { 0x1, 0x1, 0xf, 0x7, 0x1, 0x1, 0xf, 0x7, 0xf, 0xf, 0xf, 0x7, 0xf, 0x7, 0xf, 0x7 }, + { 0x2, 0x1, 0xc, 0x7, 0x1, 0x1, 0xf, 0x7, 0xc, 0xf, 0xc, 0x7, 0xf, 0x7, 0xf, 0x7 }, + { 0x3, 0x1, 0xf, 0x7, 0x1, 0x1, 0xf, 0x7, 0xf, 0xf, 0xf, 0x7, 0xf, 0x7, 0xf, 0x7 }, + + { 0xc, 0xf, 0xc, 0x7, 0xf, 0x7, 0xf, 0x7, 0xc, 0xf, 0xc, 0x7, 0xf, 0x7, 0xf, 0x7 }, + { 0xd, 0xf, 0xf, 0x7, 0xf, 0x7, 0xf, 0x7, 0xf, 0xf, 0xf, 0x7, 0xf, 0x7, 0xf, 0x7 }, + { 0xe, 0xf, 0xc, 0x7, 0xf, 0x7, 0xf, 0x7, 0xc, 0xf, 0xc, 0x7, 0xf, 0x7, 0xf, 0x7 }, + { 0xf, 0xf, 0xf, 0x7, 0xf, 0x7, 0xf, 0x7, 0xf, 0xf, 0xf, 0x7, 0xf, 0x7, 0xf, 0x7 }, + + { 0x0, 0x1, 0xc, 0x7, 0x1, 0x9, 0xf, 0x7, 0xc, 0xf, 0xc, 0x7, 0xf, 0x7, 0xf, 0x7 }, + { 0x1, 0x1, 0xf, 0x7, 0x1, 0x9, 0xf, 0x7, 0xf, 0xf, 0xf, 0x7, 0xf, 0x7, 0xf, 0x7 }, + { 0x2, 0x1, 0xc, 0x7, 0x1, 0x9, 0xf, 0x7, 0xc, 0xf, 0xc, 0x7, 0xf, 0x7, 0xf, 0x7 }, + { 0x3, 0x1, 0xf, 0x7, 0x1, 0x9, 0xf, 0x7, 0xf, 0xf, 0xf, 0x7, 0xf, 0x7, 0xf, 0x7 }, + + { 0xc, 0xf, 0xc, 0x7, 0xf, 0xf, 0xf, 0x7, 0xc, 0xf, 0xc, 0x7, 0xf, 0x7, 0xf, 0x7 }, + { 0xd, 0xf, 0xf, 0x7, 0xf, 0xf, 0xf, 0x7, 0xf, 0xf, 0xf, 0x7, 0xf, 0x7, 0xf, 0x7 }, + { 0xe, 0xf, 0xc, 0x7, 0xf, 0xf, 0xf, 0x7, 0xc, 0xf, 0xc, 0x7, 0xf, 0x7, 0xf, 0x7 }, + { 0xf, 0xf, 0xf, 0x7, 0xf, 0xf, 0xf, 0x7, 0xf, 0xf, 0xf, 0x7, 0xf, 0x7, 0xf, 0x7 }, +}; + + +/** +****************************************************************************** +* @brief look up table to compute neigbour availability of 8x8 blocks +* input : subblk idx, mb neighbor availability +* output : sub blk neighbor availability +* @remarks +****************************************************************************** +*/ +const UWORD8 gau1_ih264_8x8_ngbr_avbl[16][4] = +{ + { 0x0, 0x1, 0xc, 0x7 }, + { 0x1, 0x1, 0xf, 0x7 }, + { 0x2, 0x1, 0xc, 0x7 }, + { 0x3, 0x1, 0xf, 0x7 }, + + { 0xc, 0x7, 0xc, 0x7 }, + { 0xd, 0x7, 0xf, 0x7 }, + { 0xe, 0x7, 0xc, 0x7 }, + { 0xf, 0x7, 0xf, 0x7 }, + + { 0x0, 0x9, 0xc, 0x7 }, + { 0x1, 0x9, 0xf, 0x7 }, + { 0x2, 0x9, 0xc, 0x7 }, + { 0x3, 0x9, 0xf, 0x7 }, + + { 0xc, 0xf, 0xc, 0x7 }, + { 0xd, 0xf, 0xf, 0x7 }, + { 0xe, 0xf, 0xc, 0x7 }, + { 0xf, 0xf, 0xf, 0x7 }, +}; + +/** Table 7-3 Default intra 4x4 scaling list */ +const UWORD16 gau2_ih264_default_intra4x4_scaling_list[] = +{ + 6, 13, 13, 20, + 20, 20, 28, 28, + 28, 28, 32, 32, + 32, 37, 37, 42 +}; + +/** Table 7-3 Default inter 4x4 scaling list */ +const UWORD16 gau2_ih264_default_inter4x4_scaling_list[] = +{ + 10, 14, 14, 20, + 20, 20, 24, 24, + 24, 24, 27, 27, + 27, 30, 30, 34 +}; + +/* Inverse scanned output of gau2_ih264_default_intra4x4_scaling_list */ +const UWORD16 gau2_ih264_default_intra4x4_weight_scale[] = +{ + 6, 13, 20, 28, + 13, 20, 28, 32, + 20, 28, 32, 37, + 28, 32, 37, 42 +}; + +/* Inverse scanned output of gau2_ih264_default_inter4x4_scaling_list */ +const UWORD16 gau2_ih264_default_inter4x4_weight_scale[] = +{ + 10, 14, 20, 24, + 14, 20, 24, 27, + 20, 24, 27, 30, + 24, 27, 30, 34 +}; + +/** Table 7-4 Default intra 8x8 scaling list */ +const UWORD16 gau2_ih264_default_intra8x8_scaling_list[] = +{ + 6, 10, 10, 13, 11, 13, 16, 16, + 16, 16, 18, 18, 18, 18, 18, 23, + 23, 23, 23, 23, 23, 25, 25, 25, + 25, 25, 25, 25, 27, 27, 27, 27, + 27, 27, 27, 27, 29, 29, 29, 29, + 29, 29, 29, 31, 31, 31, 31, 31, + 31, 33, 33, 33, 33, 33, 36, 36, + 36, 36, 38, 38, 38, 40, 40, 42 +}; + +/** Table 7-4 Default inter 8x8 scaling list */ +const UWORD16 gau2_ih264_default_inter8x8_scaling_list[] = +{ + 9, 13, 13, 15, 13, 15, 17, 17, + 17, 17, 19, 19, 19, 19, 19, 21, + 21, 21, 21, 21, 21, 22, 22, 22, + 22, 22, 22, 22, 24, 24, 24, 24, + 24, 24, 24, 24, 25, 25, 25, 25, + 25, 25, 25, 27, 27, 27, 27, 27, + 27, 28, 28, 28, 28, 28, 30, 30, + 30, 30, 32, 32, 32, 33, 33, 35 +}; + +/* Inverse scanned output of gau2_ih264_default_intra8x8_scaling_list */ +const UWORD16 gau2_ih264_default_intra8x8_weight_scale[] = +{ + 6, 10, 13, 16, 18, 23, 25, 27, + 10, 11, 16, 18, 23, 25, 27, 29, + 13, 16, 18, 23, 25, 27, 29, 31, + 16, 18, 23, 25, 27, 29, 31, 33, + 18, 23, 25, 27, 29, 31, 33, 36, + 23, 25, 27, 29, 31, 33, 36, 38, + 25, 27, 29, 31, 33, 36, 38, 40, + 27, 29, 31, 33, 36, 38, 40, 42 +}; + +/* Inverse scanned output of gau2_ih264_default_inter8x8_scaling_list */ +const UWORD16 gau2_ih264_default_inter8x8_weight_scale[] = +{ + 9, 13, 15, 17, 19, 21, 22, 24, + 13, 13, 17, 19, 21, 22, 24, 25, + 15, 17, 19, 21, 22, 24, 25, 27, + 17, 19, 21, 22, 24, 25, 27, 28, + 19, 21, 22, 24, 25, 27, 28, 30, + 21, 22, 24, 25, 27, 28, 30, 32, + 22, 24, 25, 27, 28, 30, 32, 33, + 24, 25, 27, 28, 30, 32, 33, 35 +}; +/* Eq 7-8 Flat scaling matrix for 4x4 */ +const UWORD16 gau2_ih264_flat_4x4_weight_scale[] = +{ + 16, 16, 16, 16, + 16, 16, 16, 16, + 16, 16, 16, 16, + 16, 16, 16, 16 +}; + +/* Eq 7-9 Flat scaling matrix for 8x8 */ +const UWORD16 gau2_ih264_flat_8x8_weight_scale[] = +{ + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16 +}; + + +/** + ****************************************************************************** + * @brief Scale Table for inverse quantizing 4x4 subblock. To inverse quantize + * a given 4x4 quantized block, the coefficient at index location (i,j) is scaled + * by one of the constants in this table and right shift the result by abs (4 - + * floor(qp/6)), here qp is the quantization parameter used to quantize the mb. + * + * input : 16 * qp%6, index location (i,j) + * output : scale constant. + * + * @remarks 16 constants for each index position of the subblock and 6 for each + * qp%6 in the range 0-5 inclusive. + ****************************************************************************** + */ + +const UWORD16 gau2_ih264_iquant_scale_matrix_4x4[96] = +{ + 10, 13, 10, 13, + 13, 16, 13, 16, + 10, 13, 10, 13, + 13, 16, 13, 16, + + 11, 14, 11, 14, + 14, 18, 14, 18, + 11, 14, 11, 14, + 14, 18, 14, 18, + + 13, 16, 13, 16, + 16, 20, 16, 20, + 13, 16, 13, 16, + 16, 20, 16, 20, + + 14, 18, 14, 18, + 18, 23, 18, 23, + 14, 18, 14, 18, + 18, 23, 18, 23, + + 16, 20, 16, 20, + 20, 25, 20, 25, + 16, 20, 16, 20, + 20, 25, 20, 25, + + 18, 23, 18, 23, + 23, 29, 23, 29, + 18, 23, 18, 23, + 23, 29, 23, 29, + +}; + +/** + ****************************************************************************** + * @brief Scale Table for inverse quantizing 8x8 subblock. To inverse quantize + * a given 8x8 quantized block, the coefficient at index location (i,j) is scaled + * by one of the constants in this table and right shift the result by abs (4 - + * floor(qp/6)), here qp is the quantization parameter used to quantize the mb. + * + * input : qp%6, index location (i,j) + * output : scale constant. + * + * @remarks 64 constants for each index position of the subblock and 6 for each + * qp%6 in the range 0-5 inclusive. + ****************************************************************************** + */ +const UWORD16 gau2_ih264_iquant_scale_matrix_8x8 [384] = +{ + 20, 19, 25, 19, 20, 19, 25, 19, + 19, 18, 24, 18, 19, 18, 24, 18, + 25, 24, 32, 24, 25, 24, 32, 24, + 19, 18, 24, 18, 19, 18, 24, 18, + 20, 19, 25, 19, 20, 19, 25, 19, + 19, 18, 24, 18, 19, 18, 24, 18, + 25, 24, 32, 24, 25, 24, 32, 24, + 19, 18, 24, 18, 19, 18, 24, 18, + + 22, 21, 28, 21, 22, 21, 28, 21, + 21, 19, 26, 19, 21, 19, 26, 19, + 28, 26, 35, 26, 28, 26, 35, 26, + 21, 19, 26, 19, 21, 19, 26, 19, + 22, 21, 28, 21, 22, 21, 28, 21, + 21, 19, 26, 19, 21, 19, 26, 19, + 28, 26, 35, 26, 28, 26, 35, 26, + 21, 19, 26, 19, 21, 19, 26, 19, + + 26, 24, 33, 24, 26, 24, 33, 24, + 24, 23, 31, 23, 24, 23, 31, 23, + 33, 31, 42, 31, 33, 31, 42, 31, + 24, 23, 31, 23, 24, 23, 31, 23, + 26, 24, 33, 24, 26, 24, 33, 24, + 24, 23, 31, 23, 24, 23, 31, 23, + 33, 31, 42, 31, 33, 31, 42, 31, + 24, 23, 31, 23, 24, 23, 31, 23, + + 28, 26, 35, 26, 28, 26, 35, 26, + 26, 25, 33, 25, 26, 25, 33, 25, + 35, 33, 45, 33, 35, 33, 45, 33, + 26, 25, 33, 25, 26, 25, 33, 25, + 28, 26, 35, 26, 28, 26, 35, 26, + 26, 25, 33, 25, 26, 25, 33, 25, + 35, 33, 45, 33, 35, 33, 45, 33, + 26, 25, 33, 25, 26, 25, 33, 25, + + 32, 30, 40, 30, 32, 30, 40, 30, + 30, 28, 38, 28, 30, 28, 38, 28, + 40, 38, 51, 38, 40, 38, 51, 38, + 30, 28, 38, 28, 30, 28, 38, 28, + 32, 30, 40, 30, 32, 30, 40, 30, + 30, 28, 38, 28, 30, 28, 38, 28, + 40, 38, 51, 38, 40, 38, 51, 38, + 30, 28, 38, 28, 30, 28, 38, 28, + + 36, 34, 46, 34, 36, 34, 46, 34, + 34, 32, 43, 32, 34, 32, 43, 32, + 46, 43, 58, 43, 46, 43, 58, 43, + 34, 32, 43, 32, 34, 32, 43, 32, + 36, 34, 46, 34, 36, 34, 46, 34, + 34, 32, 43, 32, 34, 32, 43, 32, + 46, 43, 58, 43, 46, 43, 58, 43, + 34, 32, 43, 32, 34, 32, 43, 32, + +}; diff --git a/common/ih264_common_tables.h b/common/ih264_common_tables.h new file mode 100755 index 0000000..3127a2c --- /dev/null +++ b/common/ih264_common_tables.h @@ -0,0 +1,136 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ih264_common_tables.h +* +* @brief +* Common tables +* +* @author +* Harish +* +* @par List of Functions: +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef _IH264_COMMON_TABLES_H_ +#define _IH264_COMMON_TABLES_H_ + + +/*****************************************************************************/ +/* Structures */ +/*****************************************************************************/ + +/** +****************************************************************************** + * @brief level tables +****************************************************************************** + */ +typedef struct +{ + /* level */ + IH264_LEVEL_T u4_level_idc; + + /* max macroblock processing rate */ + UWORD32 u4_max_mbps; + + /* max frame size in mbs */ + UWORD32 u4_max_fs; + + /* max dpb size / 768 */ + UWORD32 u4_max_dpb_size; + + /* max bit rate */ + UWORD32 u4_max_br; + + /* max cpb size */ + UWORD32 u4_max_cpb_size; + + /* max vertical MV component range */ + UWORD32 u4_max_mv_y; + +}level_tables_t; + +/*****************************************************************************/ +/* Extern global declarations */ +/*****************************************************************************/ + +/** + ****************************************************************************** + * @brief while encoding, basing on the input configuration parameters, the + * the level of the bitstream is computed basing on the table below. + * input : table_idx + * output : level_idc or cpb size + * @remarks Table A-1 – level table limits + ****************************************************************************** + */ +extern const level_tables_t gas_ih264_lvl_tbl[16]; + +extern const WORD32 gai4_ih264_levels[]; +extern const WORD32 gai4_ih264_max_luma_pic_size[]; +extern const WORD32 gai4_ih264_max_wd_ht[]; +extern const WORD32 gai4_ih264_min_wd_ht[]; + +extern intra_mbtype_info_t gas_ih264_i_mbtype_info[]; +extern inter_mbtype_info_t gas_ih264_p_mbtype_info[]; +extern inter_mbtype_info_t gas_ih264_b_mbtype_info[]; +extern submbtype_info_t gas_ih264_p_submbtype_info[]; +extern submbtype_info_t gas_ih264_b_submbtype_info[]; + + +extern const UWORD8 gau1_ih264_inv_scan_prog4x4[]; +extern const UWORD8 gau1_ih264_inv_scan_int4x4[]; +extern const UWORD8 gau1_ih264_inv_scan_prog8x8_cavlc[64]; +extern const UWORD8 gau1_ih264_inv_scan_int8x8_cavlc[64]; +extern const UWORD8 gau1_ih264_inv_scan_prog8x8_cabac[64]; +extern const UWORD8 gau1_ih264_inv_scan_int8x8_cabac[64]; + +extern const UWORD8 *gpau1_ih264_inv_scan8x8[]; +extern const UWORD8 *gpau1_ih264_inv_scan4x4[]; + +extern const UWORD8 gau1_ih264_8x8_subblk_idx[]; + +extern const UWORD8 gau1_ih264_chroma_qp[]; + +extern const UWORD8 gau1_ih264_4x4_ngbr_avbl[16][16]; +extern const UWORD8 gau1_ih264_8x8_ngbr_avbl[16][4]; + + +extern const UWORD16 gau2_ih264_default_inter4x4_weight_scale[]; +extern const UWORD16 gau2_ih264_default_intra4x4_weight_scale[]; +extern const UWORD16 gau2_ih264_default_intra4x4_scaling_list[]; +extern const UWORD16 gau2_ih264_default_inter4x4_scaling_list[]; +extern const UWORD16 gau2_ih264_default_intra8x8_scaling_list[]; +extern const UWORD16 gau2_ih264_default_inter8x8_scaling_list[]; +extern const UWORD16 gau2_ih264_default_intra8x8_weight_scale[]; +extern const UWORD16 gau2_ih264_default_inter8x8_weight_scale[]; +extern const UWORD16 gau2_ih264_flat_4x4_weight_scale[]; +extern const UWORD16 gau2_ih264_flat_8x8_weight_scale[]; + +extern const UWORD16 gau2_ih264_iquant_scale_matrix_4x4 [96]; +extern const UWORD16 gau2_ih264_iquant_scale_matrix_8x8 [384]; + +#endif /*_IH264_COMMON_TABLES_H_*/ diff --git a/common/ih264_deblk_edge_filters.c b/common/ih264_deblk_edge_filters.c new file mode 100755 index 0000000..d2ffefd --- /dev/null +++ b/common/ih264_deblk_edge_filters.c @@ -0,0 +1,2087 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/**************************************************************************** */ +/* */ +/* File Name : ih264_deblk_edge_filters.c */ +/* */ +/* Description : Contains function definitions for deblocking */ +/* */ +/* List of Functions : ih264_deblk_luma_vert_bs4() */ +/* ih264_deblk_luma_horz_bs4() */ +/* ih264_deblk_luma_vert_bslt4() */ +/* ih264_deblk_luma_horz_bslt4() */ +/* ih264_deblk_luma_vert_bs4_mbaff() */ +/* ih264_deblk_luma_vert_bslt4_mbaff() */ +/* ih264_deblk_chroma_vert_bs4_bp() */ +/* ih264_deblk_chroma_horz_bs4_bp() */ +/* ih264_deblk_chroma_vert_bslt4_bp() */ +/* ih264_deblk_chroma_horz_bslt4_bp() */ +/* ih264_deblk_chroma_vert_bs4_mbaff_bp() */ +/* ih264_deblk_chroma_vert_bslt4_mbaff_bp() */ +/* ih264_deblk_chroma_vert_bs4() */ +/* ih264_deblk_chroma_horz_bs4() */ +/* ih264_deblk_chroma_vert_bslt4() */ +/* ih264_deblk_chroma_horz_bslt4() */ +/* ih264_deblk_chroma_vert_bs4_mbaff() */ +/* ih264_deblk_chroma_vert_bslt4_mbaff() */ +/* */ +/* Issues / Problems : None */ +/* */ +/* Revision History : */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 28 11 2013 Ittiam Draft */ +/* 29 12 2014 Kaushik Added double-call vertical */ +/* Senthoor deblocking and high profile */ +/* deblocking functions */ +/* */ +/******************************************************************************/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include + +/* User include files */ +#include "ih264_typedefs.h" +#include "ih264_platform_macros.h" +#include "ih264_deblk_edge_filters.h" +#include "ih264_macros.h" + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_deblk_luma_vert_bs4() */ +/* */ +/* Description : This function performs filtering of a luma block */ +/* vertical edge when the boundary strength is set to 4. */ +/* */ +/* Inputs : pu1_src - pointer to the src sample q0 */ +/* src_strd - source stride */ +/* alpha - alpha value for the boundary */ +/* beta - beta value for the boundary */ +/* */ +/* Globals : None */ +/* */ +/* Processing : This operation is described in Sec. 8.7.2.4 under the */ +/* title "Filtering process for edges for bS equal to 4" in */ +/* ITU T Rec H.264. */ +/* */ +/* Outputs : None */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 28 11 2013 Ittiam Draft */ +/* */ +/*****************************************************************************/ +void ih264_deblk_luma_vert_bs4(UWORD8 *pu1_src, + WORD32 src_strd, + WORD32 alpha, + WORD32 beta) +{ + UWORD8 p3, p2, p1, p0, q0, q1, q2, q3; + WORD32 pos_p3, pos_p2, pos_p1, pos_p0; + WORD32 pos_q0, pos_q1, pos_q2,pos_q3; + UWORD8 a_p, a_q; /* threshold variables */ + WORD32 blk_strd = src_strd << 2; /* block_increment = src_strd * 4 */ + UWORD8 *pu1_src_temp; + WORD8 i = 0, edge; + + pos_q0 = 0; + pos_q1 = 1; + pos_q2 = 2; + pos_q3 = 3; + pos_p0 = -1; + pos_p1 = -2; + pos_p2 = -3; + pos_p3 = -4; + + for(edge = 0; edge < 4; edge++, pu1_src += blk_strd) + { + pu1_src_temp = pu1_src; + for(i = 0; i < 4; ++i, pu1_src_temp += src_strd) + { + q0 = pu1_src_temp[pos_q0]; + q1 = pu1_src_temp[pos_q1]; + p0 = pu1_src_temp[pos_p0]; + p1 = pu1_src_temp[pos_p1]; + + /* Filter Decision */ + if((ABS(p0 - q0) >= alpha) || + (ABS(q1 - q0) >= beta) || + (ABS(p1 - p0) >= beta)) + continue; + + p2 = pu1_src_temp[pos_p2]; + p3 = pu1_src_temp[pos_p3]; + q2 = pu1_src_temp[pos_q2]; + q3 = pu1_src_temp[pos_q3]; + + if(ABS(p0 - q0) < ((alpha >> 2) + 2)) + { + /* Threshold Variables */ + a_p = (UWORD8)ABS(p2 - p0); + a_q = (UWORD8)ABS(q2 - q0); + + if(a_p < beta) + { + /* p0', p1', p2' */ + pu1_src_temp[pos_p0] = ((p2 + X2(p1) + X2(p0) + X2(q0) + q1 + + 4) >> 3); + pu1_src_temp[pos_p1] = ((p2 + p1 + p0 + q0 + 2) >> 2); + pu1_src_temp[pos_p2] = + ((X2(p3) + X3(p2) + p1 + p0 + q0 + + 4) >> 3); + } + else + { + /* p0'*/ + pu1_src_temp[pos_p0] = ((X2(p1) + p0 + q1 + 2) >> 2); + } + + if(a_q < beta) + { + /* q0', q1', q2' */ + pu1_src_temp[pos_q0] = (p1 + X2(p0) + X2(q0) + X2(q1) + q2 + + 4) >> 3; + pu1_src_temp[pos_q1] = (p0 + q0 + q1 + q2 + 2) >> 2; + pu1_src_temp[pos_q2] = (X2(q3) + X3(q2) + q1 + q0 + p0 + 4) + >> 3; + } + else + { + /* q0'*/ + pu1_src_temp[pos_q0] = (X2(q1) + q0 + p1 + 2) >> 2; + } + } + else + { + /* p0', q0'*/ + pu1_src_temp[pos_p0] = ((X2(p1) + p0 + q1 + 2) >> 2); + pu1_src_temp[pos_q0] = (X2(q1) + q0 + p1 + 2) >> 2; + } + } + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_deblk_luma_horz_bs4() */ +/* */ +/* Description : This function performs filtering of a luma block */ +/* horizontal edge when the boundary strength is set to 4. */ +/* */ +/* Inputs : pu1_src - pointer to the src sample q0 */ +/* src_strd - source stride */ +/* alpha - alpha value for the boundary */ +/* beta - beta value for the boundary */ +/* */ +/* Globals : None */ +/* */ +/* Processing : This operation is described in Sec. 8.7.2.4 under the */ +/* title "Filtering process for edges for bS equal to 4" in */ +/* ITU T Rec H.264. */ +/* */ +/* Outputs : None */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 28 11 2013 Ittiam Draft */ +/* */ +/*****************************************************************************/ +void ih264_deblk_luma_horz_bs4(UWORD8 *pu1_src, + WORD32 src_strd, + WORD32 alpha, + WORD32 beta) +{ + UWORD8 p3, p2, p1, p0, q0, q1, q2, q3; + WORD32 pos_p3, pos_p2, pos_p1, pos_p0, pos_q0, pos_q1, + pos_q2, pos_q3; + UWORD8 a_p, a_q; /* threshold variables */ + UWORD8 *pu1_p3; /* pointer to the src sample p3 */ + UWORD8 *pu1_p3_temp; + UWORD8 *pu1_src_temp; + WORD8 i = 0, edge; + + pu1_p3 = pu1_src - (src_strd << 2); + pos_q0 = 0; + pos_q1 = src_strd; + pos_q2 = X2(src_strd); + pos_q3 = X3(src_strd); + pos_p0 = X3(src_strd); + pos_p1 = X2(src_strd); + pos_p2 = src_strd; + pos_p3 = 0; + + for(edge = 0; edge < 4; edge++, pu1_src += 4, pu1_p3 += 4) + { + pu1_src_temp = pu1_src; + pu1_p3_temp = pu1_p3; + for(i = 0; i < 4; ++i, pu1_src_temp++, pu1_p3_temp++) + { + q0 = pu1_src_temp[pos_q0]; + q1 = pu1_src_temp[pos_q1]; + p0 = pu1_p3_temp[pos_p0]; + p1 = pu1_p3_temp[pos_p1]; + + /* Filter Decision */ + if((ABS(p0 - q0) >= alpha) || + (ABS(q1 - q0) >= beta) || + (ABS(p1 - p0) >= beta)) + continue; + + p2 = pu1_p3_temp[pos_p2]; + p3 = pu1_p3_temp[pos_p3]; + q2 = pu1_src_temp[pos_q2]; + q3 = pu1_src_temp[pos_q3]; + + if(ABS(p0 - q0) < ((alpha >> 2) + 2)) + { + /* Threshold Variables */ + a_p = ABS(p2 - p0); + a_q = ABS(q2 - q0); + + if((a_p < beta)) + { + /* p0', p1', p2' */ + pu1_p3_temp[pos_p0] = (p2 + X2(p1) + X2(p0) + X2(q0) + q1 + + 4) >> 3; + pu1_p3_temp[pos_p1] = (p2 + p1 + p0 + q0 + 2) >> 2; + pu1_p3_temp[pos_p2] = + (X2(p3) + X3(p2) + p1 + p0 + q0 + + 4) >> 3; + } + else + { + /* p0'*/ + pu1_p3_temp[pos_p0] = (X2(p1) + p0 + q1 + 2) >> 2; + } + + if(a_q < beta) + { + /* q0', q1', q2' */ + pu1_src_temp[pos_q0] = (p1 + X2(p0) + X2(q0) + X2(q1) + + q2 + 4) >> 3; + pu1_src_temp[pos_q1] = (p0 + q0 + q1 + q2 + 2) >> 2; + pu1_src_temp[pos_q2] = (X2(q3) + X3(q2) + q1 + q0 + p0 + + 4) >> 3; + } + else + { + /* q0'*/ + pu1_src_temp[pos_q0] = (X2(q1) + q0 + p1 + 2) >> 2; + } + } + else + { + /* p0', q0'*/ + pu1_p3_temp[pos_p0] = (X2(p1) + p0 + q1 + 2) >> 2; + pu1_src_temp[pos_q0] = (X2(q1) + q0 + p1 + 2) >> 2; + } + } + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_deblk_chroma_vert_bs4_bp() */ +/* */ +/* Description : This function performs filtering of a chroma block */ +/* vertical edge when the boundary strength is set to 4. */ +/* */ +/* Inputs : pu1_src - pointer to the src sample q0 of U */ +/* src_strd - source stride */ +/* alpha - alpha value for the boundary */ +/* beta - beta value for the boundary */ +/* */ +/* Globals : None */ +/* */ +/* Processing : This operation is described in Sec. 8.7.2.4 under the */ +/* title "Filtering process for edges for bS equal to 4" in */ +/* ITU T Rec H.264. */ +/* */ +/* Outputs : None */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 28 11 2013 Ittiam Draft */ +/* */ +/*****************************************************************************/ +void ih264_deblk_chroma_vert_bs4_bp(UWORD8 *pu1_src, + WORD32 src_strd, + WORD32 alpha, + WORD32 beta) +{ + UWORD8 *pu1_src_u = pu1_src; /* pointer to the src sample q0 of U */ + UWORD8 *pu1_src_v = pu1_src + 1; /* pointer to the src sample q0 of V */ + UWORD8 p1_u, p0_u, q0_u, q1_u, p1_v, p0_v, q0_v, q1_v; + WORD32 blk_strd = src_strd << 1; /* block_increment = src_strd * 2 */ + WORD32 pos_p1, pos_p0, pos_q0, pos_q1; + UWORD8 *pu1_src_temp_u, *pu1_src_temp_v; + WORD8 i = 0, edge; + + pos_q0 = 0; + pos_q1 = 2; + pos_p0 = -2; + pos_p1 = -4; + + for(edge = 0; edge < 4; + edge++, pu1_src_u += blk_strd, pu1_src_v += blk_strd) + { + pu1_src_temp_u = pu1_src_u; + pu1_src_temp_v = pu1_src_v; + for(i = 0; i < 2; ++i, pu1_src_temp_u += src_strd, pu1_src_temp_v += + src_strd) + { + q0_u = pu1_src_temp_u[pos_q0]; + q1_u = pu1_src_temp_u[pos_q1]; + p0_u = pu1_src_temp_u[pos_p0]; + p1_u = pu1_src_temp_u[pos_p1]; + q0_v = pu1_src_temp_v[pos_q0]; + q1_v = pu1_src_temp_v[pos_q1]; + p0_v = pu1_src_temp_v[pos_p0]; + p1_v = pu1_src_temp_v[pos_p1]; + + /* Filter Decision */ + if((ABS(p0_u - q0_u) < alpha) && + (ABS(q1_u - q0_u) < beta) && + (ABS(p1_u - p0_u) < beta)) + { + /* p0' */ + pu1_src_temp_u[pos_p0] = ((X2(p1_u) + p0_u + q1_u + 2) >> 2); + /* q0' */ + pu1_src_temp_u[pos_q0] = (X2(q1_u) + q0_u + p1_u + 2) >> 2; + } + + /* Filter Decision */ + if((ABS(p0_v - q0_v) < alpha) && + (ABS(q1_v - q0_v) < beta) && + (ABS(p1_v - p0_v) < beta)) + { + /* p0' */ + pu1_src_temp_v[pos_p0] = ((X2(p1_v) + p0_v + q1_v + 2) >> 2); + /* q0' */ + pu1_src_temp_v[pos_q0] = (X2(q1_v) + q0_v + p1_v + 2) >> 2; + } + } + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_deblk_chroma_horz_bs4_bp() */ +/* */ +/* Description : This function performs filtering of a chroma block */ +/* horizontal edge when the boundary strength is set to 4. */ +/* */ +/* Inputs : pu1_src - pointer to the src sample q0 of U */ +/* src_strd - source stride */ +/* alpha - alpha value for the boundary */ +/* beta - beta value for the boundary */ +/* */ +/* Globals : None */ +/* */ +/* Processing : This operation is described in Sec. 8.7.2.4 under the */ +/* title "Filtering process for edges for bS equal to 4" in */ +/* ITU T Rec H.264. */ +/* */ +/* Outputs : None */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 28 11 2013 Ittiam Draft */ +/* */ +/*****************************************************************************/ +void ih264_deblk_chroma_horz_bs4_bp(UWORD8 *pu1_src, + WORD32 src_strd, + WORD32 alpha, + WORD32 beta) +{ + UWORD8 *pu1_src_u = pu1_src; /* pointer to the src sample q0 of U */ + UWORD8 *pu1_src_v = pu1_src + 1; /* pointer to the src sample q0 of V */ + UWORD8 p1_u, p0_u, q0_u, q1_u, p1_v, p0_v, q0_v, q1_v; + WORD32 pos_p1, pos_p0, pos_q0, pos_q1; + UWORD8 *pu1_src_temp_u, *pu1_src_temp_v; + UWORD8 *pu1_p1_u; /* pointer to the src sample p1 of U */ + UWORD8 *pu1_p1_v; /* pointer to the src sample p1 of U */ + UWORD8 *pu1_p1_temp_u, *pu1_p1_temp_v; + WORD8 i = 0, edge; + + pu1_p1_u = pu1_src_u - (src_strd << 1); + pu1_p1_v = pu1_src_v - (src_strd << 1); + pos_q0 = 0; + pos_q1 = src_strd; + pos_p0 = src_strd; + pos_p1 = 0; + + for(edge = 0; edge < 4; edge++, pu1_src_u += 4, pu1_p1_u += 4, + pu1_src_v += 4, pu1_p1_v += 4) + { + pu1_src_temp_u = pu1_src_u; + pu1_p1_temp_u = pu1_p1_u; + pu1_src_temp_v = pu1_src_v; + pu1_p1_temp_v = pu1_p1_v; + for(i = 0; i < 2; ++i, pu1_src_temp_u += 2, pu1_p1_temp_u += 2, + pu1_src_temp_v += 2, pu1_p1_temp_v += 2) + { + q0_u = pu1_src_temp_u[pos_q0]; + q1_u = pu1_src_temp_u[pos_q1]; + p0_u = pu1_p1_temp_u[pos_p0]; + p1_u = pu1_p1_temp_u[pos_p1]; + + q0_v = pu1_src_temp_v[pos_q0]; + q1_v = pu1_src_temp_v[pos_q1]; + p0_v = pu1_p1_temp_v[pos_p0]; + p1_v = pu1_p1_temp_v[pos_p1]; + + /* Filter Decision */ + if((ABS(p0_u - q0_u) < alpha) && + (ABS(q1_u - q0_u) < beta) && + (ABS(p1_u - p0_u) < beta)) + { + /* p0' */ + pu1_p1_temp_u[pos_p0] = (X2(p1_u) + p0_u + q1_u + 2) >> 2; + /* q0' */ + pu1_src_temp_u[pos_q0] = (X2(q1_u) + q0_u + p1_u + 2) >> 2; + } + + /* Filter Decision */ + if((ABS(p0_v - q0_v) < alpha) && + (ABS(q1_v - q0_v) < beta) && + (ABS(p1_v - p0_v) < beta)) + { + /* p0' */ + pu1_p1_temp_v[pos_p0] = (X2(p1_v) + p0_v + q1_v + 2) >> 2; + /* q0' */ + pu1_src_temp_v[pos_q0] = (X2(q1_v) + q0_v + p1_v + 2) >> 2; + } + } + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_deblk_luma_vert_bslt4() */ +/* */ +/* Description : This function performs filtering of a luma block */ +/* vertical edge when the boundary strength is less than 4. */ +/* */ +/* Inputs : pu1_src - pointer to the src sample q0 */ +/* src_strd - source stride */ +/* alpha - alpha value for the boundary */ +/* beta - beta value for the boundary */ +/* u4_bs - packed Boundary strength array */ +/* pu1_cliptab - tc0_table */ +/* */ +/* Globals : None */ +/* */ +/* Processing : This operation is described in Sec. 8.7.2.3 under the */ +/* title "Filtering process for edges for bS less than 4" */ +/* in ITU T Rec H.264. */ +/* */ +/* Outputs : None */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 28 11 2013 Ittiam Draft */ +/* */ +/*****************************************************************************/ +void ih264_deblk_luma_vert_bslt4(UWORD8 *pu1_src, + WORD32 src_strd, + WORD32 alpha, + WORD32 beta, + UWORD32 u4_bs, + const UWORD8 *pu1_cliptab) +{ + WORD8 i = 0, edge; + UWORD8 p2, p1, p0, q0, q1, q2; + WORD32 pos_p2, pos_p1, pos_p0, pos_q0, pos_q1, pos_q2; + UWORD8 a_p, a_q; /* threshold variables */ + WORD32 blk_strd = src_strd << 2; /* block_increment = src_strd * 4 */ + UWORD8 *pu1_src_temp; + WORD8 delta; + WORD8 tc; + WORD16 val; + UWORD8 tc0, u1_bs; + + pos_q0 = 0; + pos_q1 = 1; + pos_q2 = 2; + pos_p0 = -1; + pos_p1 = -2; + pos_p2 = -3; + + for(edge = 0; edge < 4; edge++, pu1_src += blk_strd) + { + pu1_src_temp = pu1_src; + /* Filter Decision */ + u1_bs = (UWORD8)((u4_bs >> ((3 - edge) << 3)) & 0x0ff); + if(!u1_bs) + continue; + /* tc0 */ + tc0 = pu1_cliptab[u1_bs]; + for(i = 0; i < 4; ++i, pu1_src_temp += src_strd) + { + q0 = pu1_src_temp[pos_q0]; + q1 = pu1_src_temp[pos_q1]; + p0 = pu1_src_temp[pos_p0]; + p1 = pu1_src_temp[pos_p1]; + + /* Filter Decision */ + if((ABS(p0 - q0) >= alpha) || + (ABS(q1 - q0) >= beta) || + (ABS(p1 - p0) >= beta)) + continue; + + q2 = pu1_src_temp[pos_q2]; + p2 = pu1_src_temp[pos_p2]; + + a_p = ABS(p2 - p0); + a_q = ABS(q2 - q0); + + /* tc */ + tc = tc0 + (a_p < beta) + (a_q < beta); + + val = ((((q0 - p0) << 2) + (p1 - q1) + 4) >> 3); + delta = CLIP3(-tc, tc, val); + + /* p0' */ + val = p0 + delta; + pu1_src_temp[pos_p0] = CLIP_U8(val); + /* q0' */ + val = q0 - delta; + pu1_src_temp[pos_q0] = CLIP_U8(val); + + /* Luma only */ + if(a_p < beta) + { + /* p1' */ + val = ((p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1)) >> 1); + pu1_src_temp[pos_p1] += CLIP3(-tc0, tc0, val); + } + + if(a_q < beta) + { + /* q1' */ + val = ((q2 + ((p0 + q0 + 1) >> 1) - (q1 << 1)) >> 1); + pu1_src_temp[pos_q1] += CLIP3(-tc0, tc0, val); + } + } + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_deblk_chroma_vert_bslt4_bp() */ +/* */ +/* Description : This function performs filtering of a chroma block */ +/* vertical edge when the boundary strength is less than 4. */ +/* */ +/* Inputs : pu1_src - pointer to the src sample q0 of U */ +/* src_strd - source stride */ +/* alpha - alpha value for the boundary */ +/* beta - beta value for the boundary */ +/* u4_bs - packed Boundary strength array */ +/* pu1_cliptab - tc0_table */ +/* */ +/* Globals : None */ +/* */ +/* Processing : This operation is described in Sec. 8.7.2.3 under the */ +/* title "Filtering process for edges for bS less than 4" */ +/* in ITU T Rec H.264. */ +/* */ +/* Outputs : None */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 28 11 2013 Ittiam Draft */ +/* */ +/*****************************************************************************/ +void ih264_deblk_chroma_vert_bslt4_bp(UWORD8 *pu1_src, + WORD32 src_strd, + WORD32 alpha, + WORD32 beta, + UWORD32 u4_bs, + const UWORD8 *pu1_cliptab) +{ + UWORD8 *pu1_src_u = pu1_src; /* Pointer to the src sample q0 of plane U*/ + UWORD8 *pu1_src_v = pu1_src + 1; /* Pointer to the src sample q0 of plane V*/ + UWORD8 p1_u, p0_u, q0_u, q1_u, p1_v, p0_v, q0_v, q1_v; + WORD32 blk_strd = src_strd << 1; /* block_increment = src_strd * (4 >> 1)*/ + WORD32 pos_p1, pos_p0, pos_q0, pos_q1; + UWORD8 *pu1_src_temp_u, *pu1_src_temp_v; + WORD8 i = 0, edge; + WORD8 delta; + WORD8 tc; + WORD16 val; + UWORD8 tc0, u1_bs; + + pos_q0 = 0; + pos_q1 = 2; + pos_p0 = -2; + pos_p1 = -4; + + for(edge = 0; edge < 4; + edge++, pu1_src_u += blk_strd, pu1_src_v += blk_strd) + { + pu1_src_temp_u = pu1_src_u; + pu1_src_temp_v = pu1_src_v; + /* Filter Decision */ + u1_bs = (UWORD8)((u4_bs >> ((3 - edge) << 3)) & 0x0ff); + if(!u1_bs) + continue; + /* tc0 */ + tc0 = pu1_cliptab[u1_bs]; + tc = tc0 + 1; + for(i = 0; i < 2; ++i, pu1_src_temp_u += src_strd, pu1_src_temp_v += + src_strd) + { + q0_u = pu1_src_temp_u[pos_q0]; + q1_u = pu1_src_temp_u[pos_q1]; + p0_u = pu1_src_temp_u[pos_p0]; + p1_u = pu1_src_temp_u[pos_p1]; + + q0_v = pu1_src_temp_v[pos_q0]; + q1_v = pu1_src_temp_v[pos_q1]; + p0_v = pu1_src_temp_v[pos_p0]; + p1_v = pu1_src_temp_v[pos_p1]; + + /* Filter Decision */ + if((ABS(p0_u - q0_u) < alpha) && + (ABS(q1_u - q0_u) < beta) && + (ABS(p1_u - p0_u) < beta)) + { + val = ((((q0_u - p0_u) << 2) + (p1_u - q1_u) + 4) >> 3); + delta = CLIP3(-tc, tc, val); + /* p0' */ + val = p0_u + delta; + pu1_src_temp_u[pos_p0] = CLIP_U8(val); + /* q0' */ + val = q0_u - delta; + pu1_src_temp_u[pos_q0] = CLIP_U8(val); + } + + /* Filter Decision */ + if((ABS(p0_v - q0_v) < alpha) && + (ABS(q1_v - q0_v) < beta) && + (ABS(p1_v - p0_v) < beta)) + { + val = ((((q0_v - p0_v) << 2) + (p1_v - q1_v) + 4) >> 3); + delta = CLIP3(-tc, tc, val); + /* p0' */ + val = p0_v + delta; + pu1_src_temp_v[pos_p0] = CLIP_U8(val); + /* q0' */ + val = q0_v - delta; + pu1_src_temp_v[pos_q0] = CLIP_U8(val); + } + } + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_deblk_luma_horz_bslt4() */ +/* */ +/* Description : This function performs filtering of a luma block */ +/* horizontal edge when boundary strength is less than 4. */ +/* */ +/* Inputs : pu1_src - pointer to the src sample q0 */ +/* src_strd - source stride */ +/* alpha - alpha value for the boundary */ +/* beta - beta value for the boundary */ +/* u4_bs - packed Boundary strength array */ +/* pu1_cliptab - tc0_table */ +/* */ +/* Globals : None */ +/* */ +/* Processing : This operation is described in Sec. 8.7.2.3 under the */ +/* title "Filtering process for edges for bS less than 4" */ +/* in ITU T Rec H.264. */ +/* */ +/* Outputs : None */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 28 11 2013 Ittiam Draft */ +/* */ +/*****************************************************************************/ +void ih264_deblk_luma_horz_bslt4(UWORD8 *pu1_src, + WORD32 src_strd, + WORD32 alpha, + WORD32 beta, + UWORD32 u4_bs, + const UWORD8 *pu1_cliptab) +{ + UWORD8 p2, p1, p0, q0, q1, q2; + WORD32 pos_p2, pos_p1, pos_p0, pos_q0, pos_q1, pos_q2; + UWORD8 a_p, a_q; /* Threshold variables */ + UWORD8 *pu1_p2; /* Pointer to the src sample p2 */ + UWORD8 *pu1_p2_temp; + UWORD8 *pu1_src_temp; + WORD8 i = 0, edge; + WORD8 delta; + WORD8 tc; + WORD16 val; + UWORD8 tc0, u1_bs; + + pu1_p2 = pu1_src - (src_strd << 2); + pos_q0 = 0; + pos_q1 = src_strd; + pos_q2 = X2(src_strd); + pos_p0 = X3(src_strd); + pos_p1 = X2(src_strd); + pos_p2 = src_strd; + + for(edge = 0; edge < 4; edge++, pu1_src += 4, pu1_p2 += 4) + { + pu1_src_temp = pu1_src; + pu1_p2_temp = pu1_p2; + + /* Filter Decision */ + u1_bs = (UWORD8)((u4_bs >> ((3 - edge) << 3)) & 0x0ff); + if(!u1_bs) + continue; + /* tc0 */ + tc0 = pu1_cliptab[u1_bs]; + + for(i = 0; i < 4; ++i, pu1_src_temp++, pu1_p2_temp++) + { + q0 = pu1_src_temp[pos_q0]; + q1 = pu1_src_temp[pos_q1]; + p0 = pu1_p2_temp[pos_p0]; + p1 = pu1_p2_temp[pos_p1]; + + /* Filter Decision */ + if((ABS(p0 - q0) >= alpha) || + (ABS(q1 - q0) >= beta) || + (ABS(p1 - p0) >= beta)) + continue; + + q2 = pu1_src_temp[pos_q2]; + p2 = pu1_p2_temp[pos_p2]; + + a_p = ABS(p2 - p0); + a_q = ABS(q2 - q0); + + /* tc */ + tc = tc0 + (a_p < beta) + (a_q < beta); + val = ((((q0 - p0) << 2) + (p1 - q1) + 4) >> 3); + delta = CLIP3(-tc, tc, val); + /* p0' */ + val = p0 + delta; + pu1_p2_temp[pos_p0] = CLIP_U8(val); + /* q0' */ + val = q0 - delta; + pu1_src_temp[pos_q0] = CLIP_U8(val); + + /* Luma */ + if(a_p < beta) + { + /* p1' */ + val = ((p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1)) >> 1); + pu1_p2_temp[pos_p1] += CLIP3(-tc0, tc0, val); + } + + if(a_q < beta) + { + /* q1' */ + val = ((q2 + ((p0 + q0 + 1) >> 1) - (q1 << 1)) >> 1); + pu1_src_temp[pos_q1] += CLIP3(-tc0, tc0, val); + } + } + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_deblk_chroma_horz_bslt4_bp() */ +/* */ +/* Description : This function performs filtering of a chroma block */ +/* horizontal edge when boundary strength is less than 4. */ +/* */ +/* Inputs : pu1_src - pointer to the src sample q0 of U */ +/* src_strd - source stride */ +/* alpha - alpha value for the boundary */ +/* beta - beta value for the boundary */ +/* u4_bs - packed Boundary strength array */ +/* pu1_cliptab - tc0_table */ +/* */ +/* Globals : None */ +/* */ +/* Processing : This operation is described in Sec. 8.7.2.3 under the */ +/* title "Filtering process for edges for bS less than 4" */ +/* in ITU T Rec H.264. */ +/* */ +/* Outputs : None */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 28 11 2013 Ittiam Draft */ +/* */ +/*****************************************************************************/ +void ih264_deblk_chroma_horz_bslt4_bp(UWORD8 *pu1_src, + WORD32 src_strd, + WORD32 alpha, + WORD32 beta, + UWORD32 u4_bs, + const UWORD8 *pu1_cliptab) +{ + UWORD8 *pu1_src_u = pu1_src; /* Pointer to the src sample q0 of plane U*/ + UWORD8 *pu1_src_v = pu1_src + 1; /* Pointer to the src sample q0 of plane V*/ + UWORD8 p1_u, p0_u, q0_u, q1_u, p1_v, p0_v, q0_v, q1_v; + WORD32 pos_p1, pos_p0, pos_q0, pos_q1; + UWORD8 *pu1_src_temp_u, *pu1_src_temp_v; + UWORD8 *pu1_p1_u; /* Pointer to the src sample p1 of plane U*/ + UWORD8 *pu1_p1_v; /* Pointer to the src sample p1 of plane V*/ + UWORD8 *pu1_p1_temp_u, *pu1_p1_temp_v; + WORD8 i = 0, edge; + WORD8 delta; + WORD8 tc; + WORD16 val; + UWORD8 u1_bs; + UWORD8 tc0; + + pu1_p1_u = pu1_src_u - (src_strd << 1); + pu1_p1_v = pu1_src_v - (src_strd << 1); + pos_q0 = 0; + pos_q1 = src_strd; + pos_p0 = src_strd; + pos_p1 = 0; + + for(edge = 0; edge < 4; edge++, pu1_src_u += 4, pu1_p1_u += 4, + pu1_src_v += 4, pu1_p1_v += 4) + { + pu1_src_temp_u = pu1_src_u; + pu1_p1_temp_u = pu1_p1_u; + pu1_src_temp_v = pu1_src_v; + pu1_p1_temp_v = pu1_p1_v; + + /* Filter Decision */ + u1_bs = (UWORD8)((u4_bs >> ((3 - edge) << 3)) & 0x0ff); + if(!u1_bs) + continue; + /* tc0 */ + tc0 = pu1_cliptab[u1_bs]; + + for(i = 0; i < 2; ++i, pu1_src_temp_u += 2, pu1_p1_temp_u += 2, + pu1_src_temp_v += 2, pu1_p1_temp_v += 2) + { + q0_u = pu1_src_temp_u[pos_q0]; + q1_u = pu1_src_temp_u[pos_q1]; + p0_u = pu1_p1_temp_u[pos_p0]; + p1_u = pu1_p1_temp_u[pos_p1]; + + q0_v = pu1_src_temp_v[pos_q0]; + q1_v = pu1_src_temp_v[pos_q1]; + p0_v = pu1_p1_temp_v[pos_p0]; + p1_v = pu1_p1_temp_v[pos_p1]; + + /* tc */ + tc = tc0 + 1; + /* Filter Decision */ + if(ABS(p0_u - q0_u) < alpha && ABS(q1_u - q0_u) < beta + && ABS(p1_u - p0_u) < beta) + { + val = ((((q0_u - p0_u) << 2) + (p1_u - q1_u) + 4) >> 3); + delta = CLIP3(-tc, tc, val); + /* p0' */ + val = p0_u + delta; + pu1_p1_temp_u[pos_p0] = CLIP_U8(val); + /* q0' */ + val = q0_u - delta; + pu1_src_temp_u[pos_q0] = CLIP_U8(val); + } + /* Filter Decision */ + if(ABS(p0_v - q0_v) < alpha && ABS(q1_v - q0_v) < beta + && ABS(p1_v - p0_v) < beta) + { + val = ((((q0_v - p0_v) << 2) + (p1_v - q1_v) + 4) >> 3); + delta = CLIP3(-tc, tc, val); + /* p0' */ + val = p0_v + delta; + pu1_p1_temp_v[pos_p0] = CLIP_U8(val); + /* q0' */ + val = q0_v - delta; + pu1_src_temp_v[pos_q0] = CLIP_U8(val); + } + } + } +} + +/*****************************************************************************/ +/* Function Definitions for vertical edge deblocking for double-call */ +/*****************************************************************************/ + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_deblk_luma_vert_bs4_mbaff() */ +/* */ +/* Description : This function performs filtering of a luma block */ +/* vertical edge when boundary strength is set to 4. */ +/* */ +/* Inputs : pu1_src - pointer to the src sample q0 */ +/* src_strd - source stride */ +/* alpha - alpha value for the boundary */ +/* beta - beta value for the boundary */ +/* */ +/* Globals : None */ +/* */ +/* Processing : When the function is called twice, this operation is as */ +/* described in Sec. 8.7.2.3 under the title "Filtering */ +/* process for edges for bS equal to 4" in ITU T Rec H.264. */ +/* */ +/* Outputs : None */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 29 12 2014 Kaushik Draft */ +/* Senthoor */ +/* */ +/*****************************************************************************/ +void ih264_deblk_luma_vert_bs4_mbaff(UWORD8 *pu1_src, + WORD32 src_strd, + WORD32 alpha, + WORD32 beta) +{ + UWORD8 p3, p2, p1, p0, q0, q1, q2, q3; + WORD32 pos_p3, pos_p2, pos_p1, pos_p0; + WORD32 pos_q0, pos_q1, pos_q2, pos_q3; + UWORD8 a_p, a_q; /* threshold variables */ + WORD32 blk_strd = src_strd << 1; /* block_increment = src_strd * 2 */ + UWORD8 *pu1_src_temp; + WORD8 i = 0, edge; + + pos_q0 = 0; + pos_q1 = 1; + pos_q2 = 2; + pos_q3 = 3; + pos_p0 = -1; + pos_p1 = -2; + pos_p2 = -3; + pos_p3 = -4; + + for(edge = 0; edge < 4; edge++, pu1_src += blk_strd) + { + pu1_src_temp = pu1_src; + for(i = 0; i < 2; ++i, pu1_src_temp += src_strd) + { + q0 = pu1_src_temp[pos_q0]; + q1 = pu1_src_temp[pos_q1]; + p0 = pu1_src_temp[pos_p0]; + p1 = pu1_src_temp[pos_p1]; + + /* Filter Decision */ + if((ABS(p0 - q0) >= alpha) || + (ABS(q1 - q0) >= beta) || + (ABS(p1 - p0) >= beta)) + continue; + + p2 = pu1_src_temp[pos_p2]; + p3 = pu1_src_temp[pos_p3]; + q2 = pu1_src_temp[pos_q2]; + q3 = pu1_src_temp[pos_q3]; + + if(ABS(p0 - q0) < ((alpha >> 2) + 2)) + { + /* Threshold Variables */ + a_p = (UWORD8)ABS(p2 - p0); + a_q = (UWORD8)ABS(q2 - q0); + + if(a_p < beta) + { + /* p0', p1', p2' */ + pu1_src_temp[pos_p0] = ((p2 + X2(p1) + X2(p0) + X2(q0) + q1 + + 4) >> 3); + pu1_src_temp[pos_p1] = ((p2 + p1 + p0 + q0 + 2) >> 2); + pu1_src_temp[pos_p2] = + ((X2(p3) + X3(p2) + p1 + p0 + q0 + + 4) >> 3); + } + else + { + /* p0'*/ + pu1_src_temp[pos_p0] = ((X2(p1) + p0 + q1 + 2) >> 2); + } + + if(a_q < beta) + { + /* q0', q1', q2' */ + pu1_src_temp[pos_q0] = (p1 + X2(p0) + X2(q0) + X2(q1) + q2 + + 4) >> 3; + pu1_src_temp[pos_q1] = (p0 + q0 + q1 + q2 + 2) >> 2; + pu1_src_temp[pos_q2] = (X2(q3) + X3(q2) + q1 + q0 + p0 + 4) + >> 3; + } + else + { + /* q0'*/ + pu1_src_temp[pos_q0] = (X2(q1) + q0 + p1 + 2) >> 2; + } + } + else + { + /* p0', q0'*/ + pu1_src_temp[pos_p0] = ((X2(p1) + p0 + q1 + 2) >> 2); + pu1_src_temp[pos_q0] = (X2(q1) + q0 + p1 + 2) >> 2; + } + } + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_deblk_chroma_vert_bs4_mbaff_bp() */ +/* */ +/* Description : This function performs filtering of a chroma block */ +/* vertical edge when boundary strength is set to 4. */ +/* */ +/* Inputs : pu1_src - pointer to the src sample q0 of U */ +/* src_strd - source stride */ +/* alpha - alpha value for the boundary */ +/* beta - beta value for the boundary */ +/* */ +/* Globals : None */ +/* */ +/* Processing : When the function is called twice, this operation is as */ +/* described in Sec. 8.7.2.3 under the title "Filtering */ +/* process for edges for bS equal to 4" in ITU T Rec H.264. */ +/* */ +/* Outputs : None */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 29 12 2014 Kaushik Draft */ +/* Senthoor */ +/* */ +/*****************************************************************************/ +void ih264_deblk_chroma_vert_bs4_mbaff_bp(UWORD8 *pu1_src, + WORD32 src_strd, + WORD32 alpha, + WORD32 beta) +{ + UWORD8 *pu1_src_u = pu1_src; /* Pointer to the src sample q0 of U */ + UWORD8 *pu1_src_v = pu1_src + 1; /* Pointer to the src sample q0 of V */ + UWORD8 p1_u, p0_u, q0_u, q1_u, p1_v, p0_v, q0_v, q1_v; + WORD32 blk_strd = src_strd; + WORD32 pos_p1, pos_p0, pos_q0, pos_q1; + UWORD8 *pu1_src_temp_u, *pu1_src_temp_v; + WORD8 edge; + + pos_q0 = 0; + pos_q1 = 2; + pos_p0 = -2; + pos_p1 = -4; + + for(edge = 0; edge < 4; + edge++, pu1_src_u += blk_strd, pu1_src_v += blk_strd) + { + pu1_src_temp_u = pu1_src_u; + pu1_src_temp_v = pu1_src_v; + + q0_u = pu1_src_temp_u[pos_q0]; + q1_u = pu1_src_temp_u[pos_q1]; + p0_u = pu1_src_temp_u[pos_p0]; + p1_u = pu1_src_temp_u[pos_p1]; + q0_v = pu1_src_temp_v[pos_q0]; + q1_v = pu1_src_temp_v[pos_q1]; + p0_v = pu1_src_temp_v[pos_p0]; + p1_v = pu1_src_temp_v[pos_p1]; + + /* Filter Decision */ + if((ABS(p0_u - q0_u) < alpha) && + (ABS(q1_u - q0_u) < beta) && + (ABS(p1_u - p0_u) < beta)) + { + /* p0' */ + pu1_src_temp_u[pos_p0] = ((X2(p1_u) + p0_u + q1_u + 2) >> 2); + /* q0' */ + pu1_src_temp_u[pos_q0] = (X2(q1_u) + q0_u + p1_u + 2) >> 2; + } + + /* Filter Decision */ + if(ABS(p0_v - q0_v) < alpha && ABS(q1_v - q0_v) < beta + && ABS(p1_v - p0_v) < beta) + { + /* p0' */ + pu1_src_temp_v[pos_p0] = ((X2(p1_v) + p0_v + q1_v + 2) >> 2); + /* q0' */ + pu1_src_temp_v[pos_q0] = (X2(q1_v) + q0_v + p1_v + 2) >> 2; + } + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_deblk_luma_vert_bslt4_mbaff() */ +/* */ +/* Description : This function performs filtering of a luma block */ +/* vertical edge when boundary strength is less than 4. */ +/* */ +/* Inputs : pu1_src - pointer to the src sample q0 */ +/* src_strd - source stride */ +/* alpha - alpha value for the boundary */ +/* beta - beta value for the boundary */ +/* u4_bs - packed Boundary strength array */ +/* pu1_cliptab - tc0_table */ +/* */ +/* Globals : None */ +/* */ +/* Processing : When the function is called twice, this operation is as */ +/* described in Sec. 8.7.2.3 under the title "Filtering */ +/* process for edges for bS less than 4" in ITU T Rec H.264.*/ +/* */ +/* Outputs : None */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 29 12 2014 Kaushik Draft */ +/* Senthoor */ +/* */ +/*****************************************************************************/ +void ih264_deblk_luma_vert_bslt4_mbaff(UWORD8 *pu1_src, + WORD32 src_strd, + WORD32 alpha, + WORD32 beta, + UWORD32 u4_bs, + const UWORD8 *pu1_cliptab) +{ + WORD8 i = 0, edge; + UWORD8 p2, p1, p0, q0, q1, q2; + WORD32 pos_p2, pos_p1, pos_p0, pos_q0, pos_q1, pos_q2; + UWORD8 a_p, a_q; /* Threshold variables */ + WORD32 blk_strd = src_strd << 1; /* block_increment = src_strd * 2 */ + UWORD8 *pu1_src_temp; + WORD8 delta; + WORD8 tc; + WORD16 val; + UWORD8 tc0, u1_bs; + + pos_q0 = 0; + pos_q1 = 1; + pos_q2 = 2; + pos_p0 = -1; + pos_p1 = -2; + pos_p2 = -3; + + for(edge = 0; edge < 4; edge++, pu1_src += blk_strd) + { + pu1_src_temp = pu1_src; + /* Filter Decision */ + u1_bs = (UWORD8)((u4_bs >> ((3 - edge) << 3)) & 0x0ff); + if(!u1_bs) + continue; + /* tc0 */ + tc0 = pu1_cliptab[u1_bs]; + for(i = 0; i < 2; ++i, pu1_src_temp += src_strd) + { + q0 = pu1_src_temp[pos_q0]; + q1 = pu1_src_temp[pos_q1]; + p0 = pu1_src_temp[pos_p0]; + p1 = pu1_src_temp[pos_p1]; + + /* Filter Decision */ + if((ABS(p0 - q0) >= alpha) || + (ABS(q1 - q0) >= beta) || + (ABS(p1 - p0) >= beta)) + continue; + + q2 = pu1_src_temp[pos_q2]; + p2 = pu1_src_temp[pos_p2]; + + a_p = ABS(p2 - p0); + a_q = ABS(q2 - q0); + + /* tc */ + tc = tc0 + (a_p < beta) + (a_q < beta); + + val = ((((q0 - p0) << 2) + (p1 - q1) + 4) >> 3); + delta = CLIP3(-tc, tc, val); + /* p0' */ + val = p0 + delta; + pu1_src_temp[pos_p0] = CLIP_U8(val); + /* q0' */ + val = q0 - delta; + pu1_src_temp[pos_q0] = CLIP_U8(val); + + /* Luma only */ + if(a_p < beta) + { + /* p1' */ + val = ((p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1)) >> 1); + pu1_src_temp[pos_p1] += CLIP3(-tc0, tc0, val); + } + + if(a_q < beta) + { + /* q1' */ + val = ((q2 + ((p0 + q0 + 1) >> 1) - (q1 << 1)) >> 1); + pu1_src_temp[pos_q1] += CLIP3(-tc0, tc0, val); + } + } + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_deblk_chroma_vert_bslt4_mbaff_bp() */ +/* */ +/* Description : This function performs filtering of a chroma block */ +/* vertical edge when boundary strength is less than 4. */ +/* */ +/* Inputs : pu1_src - pointer to the src sample q0 of U */ +/* src_strd - source stride */ +/* alpha - alpha value for the boundary */ +/* beta - beta value for the boundary */ +/* u4_bs - packed Boundary strength array */ +/* pu1_cliptab - tc0_table */ +/* */ +/* Globals : None */ +/* */ +/* Processing : When the function is called twice, this operation is as */ +/* described in Sec. 8.7.2.3 under the title "Filtering */ +/* process for edges for bS less than 4" in ITU T Rec H.264.*/ +/* */ +/* Outputs : None */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 29 12 2014 Kaushik Draft */ +/* Senthoor */ +/* */ +/*****************************************************************************/ +void ih264_deblk_chroma_vert_bslt4_mbaff_bp(UWORD8 *pu1_src, + WORD32 src_strd, + WORD32 alpha, + WORD32 beta, + UWORD32 u4_bs, + const UWORD8 *pu1_cliptab) +{ + UWORD8 *pu1_src_u = pu1_src; /* Pointer to the src sample q0 of plane U*/ + UWORD8 *pu1_src_v = pu1_src + 1; /* Pointer to the src sample q0 of plane V*/ + UWORD8 p1_u, p0_u, q0_u, q1_u, p1_v, p0_v, q0_v, q1_v; + WORD32 blk_strd = src_strd; + WORD32 pos_p1, pos_p0, pos_q0, pos_q1; + UWORD8 *pu1_src_temp_u, *pu1_src_temp_v; + WORD8 edge; + WORD8 delta; + WORD8 tc; + WORD16 val; + UWORD8 tc0, u1_bs; + + pos_q0 = 0; + pos_q1 = 2; + pos_p0 = -2; + pos_p1 = -4; + + for(edge = 0; edge < 4; + edge++, pu1_src_u += blk_strd, pu1_src_v += blk_strd) + { + pu1_src_temp_u = pu1_src_u; + pu1_src_temp_v = pu1_src_v; + /* Filter Decision */ + u1_bs = (UWORD8)((u4_bs >> ((3 - edge) << 3)) & 0x0ff); + if(!u1_bs) + continue; + /* tc0 */ + tc0 = pu1_cliptab[u1_bs]; + tc = tc0 + 1; + + q0_u = pu1_src_temp_u[pos_q0]; + q1_u = pu1_src_temp_u[pos_q1]; + p0_u = pu1_src_temp_u[pos_p0]; + p1_u = pu1_src_temp_u[pos_p1]; + + q0_v = pu1_src_temp_v[pos_q0]; + q1_v = pu1_src_temp_v[pos_q1]; + p0_v = pu1_src_temp_v[pos_p0]; + p1_v = pu1_src_temp_v[pos_p1]; + + /* Filter Decision */ + if((ABS(p0_u - q0_u) < alpha) && + (ABS(q1_u - q0_u) < beta) && + (ABS(p1_u - p0_u) < beta)) + { + val = ((((q0_u - p0_u) << 2) + (p1_u - q1_u) + 4) >> 3); + delta = CLIP3(-tc, tc, val); + /* p0' */ + val = p0_u + delta; + pu1_src_temp_u[pos_p0] = CLIP_U8(val); + /* q0' */ + val = q0_u - delta; + pu1_src_temp_u[pos_q0] = CLIP_U8(val); + } + + /* Filter Decision */ + if((ABS(p0_v - q0_v) < alpha) && + (ABS(q1_v - q0_v) < beta) && + (ABS(p1_v - p0_v) < beta)) + { + val = ((((q0_v - p0_v) << 2) + (p1_v - q1_v) + 4) >> 3); + delta = CLIP3(-tc, tc, val); + /* p0' */ + val = p0_v + delta; + pu1_src_temp_v[pos_p0] = CLIP_U8(val); + /* q0' */ + val = q0_v - delta; + pu1_src_temp_v[pos_q0] = CLIP_U8(val); + } + } +} + +/*****************************************************************************/ +/* Function Definitions for chroma deblocking in high profile */ +/*****************************************************************************/ + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_deblk_chroma_vert_bs4() */ +/* */ +/* Description : This function performs filtering of a chroma block */ +/* vertical edge when the boundary strength is set to 4 in */ +/* high profile. */ +/* */ +/* Inputs : pu1_src - pointer to the src sample q0 of U */ +/* src_strd - source stride */ +/* alpha_cb - alpha value for the boundary in U */ +/* beta_cb - beta value for the boundary in U */ +/* alpha_cr - alpha value for the boundary in V */ +/* beta_cr - beta value for the boundary in V */ +/* */ +/* Globals : None */ +/* */ +/* Processing : This operation is described in Sec. 8.7.2.4 under the */ +/* title "Filtering process for edges for bS equal to 4" in */ +/* ITU T Rec H.264 with alpha and beta values different in */ +/* U and V. */ +/* */ +/* Outputs : None */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 29 12 2014 Kaushik Draft */ +/* Senthoor */ +/* */ +/*****************************************************************************/ +void ih264_deblk_chroma_vert_bs4(UWORD8 *pu1_src, + WORD32 src_strd, + WORD32 alpha_cb, + WORD32 beta_cb, + WORD32 alpha_cr, + WORD32 beta_cr) +{ + UWORD8 *pu1_src_u = pu1_src; /* Pointer to the src sample q0 of U */ + UWORD8 *pu1_src_v = pu1_src + 1; /* Pointer to the src sample q0 of V */ + UWORD8 p1_u, p0_u, q0_u, q1_u, p1_v, p0_v, q0_v, q1_v; + WORD32 blk_strd = src_strd << 1; /* block_increment = src_strd * 2*/ + WORD32 pos_p1, pos_p0, pos_q0, pos_q1; + UWORD8 *pu1_src_temp_u, *pu1_src_temp_v; + WORD8 i = 0, edge; + + pos_q0 = 0; + pos_q1 = 2; + pos_p0 = -2; + pos_p1 = -4; + + for(edge = 0; edge < 4; + edge++, pu1_src_u += blk_strd, pu1_src_v += blk_strd) + { + pu1_src_temp_u = pu1_src_u; + pu1_src_temp_v = pu1_src_v; + for(i = 0; i < 2; ++i, pu1_src_temp_u += src_strd, pu1_src_temp_v += + src_strd) + { + q0_u = pu1_src_temp_u[pos_q0]; + q1_u = pu1_src_temp_u[pos_q1]; + p0_u = pu1_src_temp_u[pos_p0]; + p1_u = pu1_src_temp_u[pos_p1]; + q0_v = pu1_src_temp_v[pos_q0]; + q1_v = pu1_src_temp_v[pos_q1]; + p0_v = pu1_src_temp_v[pos_p0]; + p1_v = pu1_src_temp_v[pos_p1]; + + /* Filter Decision */ + if((ABS(p0_u - q0_u) < alpha_cb) && + (ABS(q1_u - q0_u) < beta_cb) && + (ABS(p1_u - p0_u) < beta_cb)) + { + /* p0' */ + pu1_src_temp_u[pos_p0] = ((X2(p1_u) + p0_u + q1_u + 2) >> 2); + /* q0' */ + pu1_src_temp_u[pos_q0] = (X2(q1_u) + q0_u + p1_u + 2) >> 2; + } + + /* Filter Decision */ + if((ABS(p0_v - q0_v) < alpha_cr) && + (ABS(q1_v - q0_v) < beta_cr) && + (ABS(p1_v - p0_v) < beta_cr)) + { + /* p0' */ + pu1_src_temp_v[pos_p0] = ((X2(p1_v) + p0_v + q1_v + 2) >> 2); + /* q0' */ + pu1_src_temp_v[pos_q0] = (X2(q1_v) + q0_v + p1_v + 2) >> 2; + } + } + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_deblk_chroma_horz_bs4() */ +/* */ +/* Description : This function performs filtering of a chroma block */ +/* horizontal edge when the boundary strength is set to 4 */ +/* in high profile. */ +/* */ +/* Inputs : pu1_src - pointer to the src sample q0 of U */ +/* src_strd - source stride */ +/* alpha_cb - alpha value for the boundary in U */ +/* beta_cb - beta value for the boundary in U */ +/* alpha_cr - alpha value for the boundary in V */ +/* beta_cr - beta value for the boundary in V */ +/* */ +/* Globals : None */ +/* */ +/* Processing : This operation is described in Sec. 8.7.2.4 under the */ +/* title "Filtering process for edges for bS equal to 4" in */ +/* ITU T Rec H.264 with alpha and beta values different in */ +/* U and V. */ +/* */ +/* Outputs : None */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 29 12 2014 Kaushik Draft */ +/* Senthoor */ +/* */ +/*****************************************************************************/ +void ih264_deblk_chroma_horz_bs4(UWORD8 *pu1_src, + WORD32 src_strd, + WORD32 alpha_cb, + WORD32 beta_cb, + WORD32 alpha_cr, + WORD32 beta_cr) +{ + UWORD8 *pu1_src_u = pu1_src; /* Pointer to the src sample q0 of U */ + UWORD8 *pu1_src_v = pu1_src + 1; /* Pointer to the src sample q0 of V */ + UWORD8 p1_u, p0_u, q0_u, q1_u, p1_v, p0_v, q0_v, q1_v; + WORD32 pos_p1, pos_p0, pos_q0, pos_q1; + UWORD8 *pu1_src_temp_u, *pu1_src_temp_v; + UWORD8 *pu1_p1_u; /* Pointer to the src sample p1 of U */ + UWORD8 *pu1_p1_v; /* Pointer to the src sample p1 of U */ + UWORD8 *pu1_p1_temp_u, *pu1_p1_temp_v; + WORD8 i = 0, edge; + + pu1_p1_u = pu1_src_u - (src_strd << 1); + pu1_p1_v = pu1_src_v - (src_strd << 1); + pos_q0 = 0; + pos_q1 = src_strd; + pos_p0 = src_strd; + pos_p1 = 0; + + for(edge = 0; edge < 4; edge++, pu1_src_u += 4, pu1_p1_u += 4, pu1_src_v += + 4, pu1_p1_v += 4) + { + pu1_src_temp_u = pu1_src_u; + pu1_p1_temp_u = pu1_p1_u; + pu1_src_temp_v = pu1_src_v; + pu1_p1_temp_v = pu1_p1_v; + for(i = 0; i < 2; ++i, pu1_src_temp_u += 2, pu1_p1_temp_u += 2, + pu1_src_temp_v += 2, pu1_p1_temp_v += 2) + { + q0_u = pu1_src_temp_u[pos_q0]; + q1_u = pu1_src_temp_u[pos_q1]; + p0_u = pu1_p1_temp_u[pos_p0]; + p1_u = pu1_p1_temp_u[pos_p1]; + + q0_v = pu1_src_temp_v[pos_q0]; + q1_v = pu1_src_temp_v[pos_q1]; + p0_v = pu1_p1_temp_v[pos_p0]; + p1_v = pu1_p1_temp_v[pos_p1]; + + /* Filter Decision */ + if(ABS(p0_u - q0_u) < alpha_cb && ABS(q1_u - q0_u) < beta_cb + && ABS(p1_u - p0_u) < beta_cb) + { + /* p0' */ + pu1_p1_temp_u[pos_p0] = (X2(p1_u) + p0_u + q1_u + 2) >> 2; + /* q0' */ + pu1_src_temp_u[pos_q0] = (X2(q1_u) + q0_u + p1_u + 2) >> 2; + } + + /* Filter Decision */ + if(ABS(p0_v - q0_v) < alpha_cr && ABS(q1_v - q0_v) < beta_cr + && ABS(p1_v - p0_v) < beta_cr) + { + /* p0' */ + pu1_p1_temp_v[pos_p0] = (X2(p1_v) + p0_v + q1_v + 2) >> 2; + /* q0' */ + pu1_src_temp_v[pos_q0] = (X2(q1_v) + q0_v + p1_v + 2) >> 2; + } + } + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_deblk_chroma_vert_bslt4() */ +/* */ +/* Description : This function performs filtering of a chroma block */ +/* vertical edge when the boundary strength is less than 4 */ +/* in high profile. */ +/* */ +/* Inputs : pu1_src - pointer to the src sample q0 of U */ +/* src_strd - source stride */ +/* alpha_cb - alpha value for the boundary in U */ +/* beta_cb - beta value for the boundary in U */ +/* alpha_cr - alpha value for the boundary in V */ +/* beta_cr - beta value for the boundary in V */ +/* u4_bs - packed Boundary strength array */ +/* pu1_cliptab_cb - tc0_table for U */ +/* pu1_cliptab_cr - tc0_table for V */ +/* */ +/* Globals : None */ +/* */ +/* Processing : This operation is described in Sec. 8.7.2.3 under the */ +/* title "Filtering process for edges for bS less than 4" */ +/* in ITU T Rec H.264 with alpha and beta values different */ +/* in U and V. */ +/* */ +/* Outputs : None */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 29 12 2014 Kaushik Draft */ +/* Senthoor */ +/* */ +/*****************************************************************************/ +void ih264_deblk_chroma_vert_bslt4(UWORD8 *pu1_src, + WORD32 src_strd, + WORD32 alpha_cb, + WORD32 beta_cb, + WORD32 alpha_cr, + WORD32 beta_cr, + UWORD32 u4_bs, + const UWORD8 *pu1_cliptab_cb, + const UWORD8 *pu1_cliptab_cr) +{ + UWORD8 *pu1_src_u = pu1_src; /* Pointer to the src sample q0 of plane U*/ + UWORD8 *pu1_src_v = pu1_src + 1; /* Pointer to the src sample q0 of plane V*/ + UWORD8 p1_u, p0_u, q0_u, q1_u, p1_v, p0_v, q0_v, q1_v; + WORD32 blk_strd = src_strd << 1; /* block_increment = src_strd * 2 */ + WORD32 pos_p1, pos_p0, pos_q0, pos_q1; + UWORD8 *pu1_src_temp_u, *pu1_src_temp_v; + WORD8 i = 0, edge; + WORD8 delta; + WORD8 tcb, tcr; + WORD16 val; + UWORD8 tcb0, tcr0, u1_bs; + + pos_q0 = 0; + pos_q1 = 2; + pos_p0 = -2; + pos_p1 = -4; + + for(edge = 0; edge < 4; + edge++, pu1_src_u += blk_strd, pu1_src_v += blk_strd) + { + pu1_src_temp_u = pu1_src_u; + pu1_src_temp_v = pu1_src_v; + /* Filter Decision */ + u1_bs = (UWORD8)((u4_bs >> ((3 - edge) << 3)) & 0x0ff); + if(!u1_bs) + continue; + /* tc0 */ + tcb0 = pu1_cliptab_cb[u1_bs]; + tcr0 = pu1_cliptab_cr[u1_bs]; + tcb = tcb0 + 1; + tcr = tcr0 + 1; + for(i = 0; i < 2; ++i, pu1_src_temp_u += src_strd, pu1_src_temp_v += + src_strd) + { + q0_u = pu1_src_temp_u[pos_q0]; + q1_u = pu1_src_temp_u[pos_q1]; + p0_u = pu1_src_temp_u[pos_p0]; + p1_u = pu1_src_temp_u[pos_p1]; + + q0_v = pu1_src_temp_v[pos_q0]; + q1_v = pu1_src_temp_v[pos_q1]; + p0_v = pu1_src_temp_v[pos_p0]; + p1_v = pu1_src_temp_v[pos_p1]; + + /* Filter Decision */ + if(ABS(p0_u - q0_u) < alpha_cb && ABS(q1_u - q0_u) < beta_cb + && ABS(p1_u - p0_u) < beta_cb) + { + val = ((((q0_u - p0_u) << 2) + (p1_u - q1_u) + 4) >> 3); + delta = CLIP3(-tcb, tcb, val); + /* p0' */ + val = p0_u + delta; + pu1_src_temp_u[pos_p0] = CLIP_U8(val); + /* q0' */ + val = q0_u - delta; + pu1_src_temp_u[pos_q0] = CLIP_U8(val); + } + + /* Filter Decision */ + if(ABS(p0_v - q0_v) < alpha_cr && ABS(q1_v - q0_v) < beta_cr + && ABS(p1_v - p0_v) < beta_cr) + { + val = ((((q0_v - p0_v) << 2) + (p1_v - q1_v) + 4) >> 3); + delta = CLIP3(-tcr, tcr, val); + /* p0' */ + val = p0_v + delta; + pu1_src_temp_v[pos_p0] = CLIP_U8(val); + /* q0' */ + val = q0_v - delta; + pu1_src_temp_v[pos_q0] = CLIP_U8(val); + } + } + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_deblk_chroma_horz_bslt4() */ +/* */ +/* Description : This function performs filtering of a chroma block */ +/* horizontal edge when the boundary strength is less than */ +/* 4 in high profile. */ +/* */ +/* Inputs : pu1_src - pointer to the src sample q0 of U */ +/* src_strd - source stride */ +/* alpha_cb - alpha value for the boundary in U */ +/* beta_cb - beta value for the boundary in U */ +/* alpha_cr - alpha value for the boundary in V */ +/* beta_cr - beta value for the boundary in V */ +/* u4_bs - packed Boundary strength array */ +/* pu1_cliptab_cb - tc0_table for U */ +/* pu1_cliptab_cr - tc0_table for V */ +/* */ +/* Globals : None */ +/* */ +/* Processing : This operation is described in Sec. 8.7.2.3 under the */ +/* title "Filtering process for edges for bS less than 4" */ +/* in ITU T Rec H.264 with alpha and beta values different */ +/* in U and V. */ +/* */ +/* Outputs : None */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 29 12 2014 Kaushik Draft */ +/* Senthoor */ +/* */ +/*****************************************************************************/ +void ih264_deblk_chroma_horz_bslt4(UWORD8 *pu1_src, + WORD32 src_strd, + WORD32 alpha_cb, + WORD32 beta_cb, + WORD32 alpha_cr, + WORD32 beta_cr, + UWORD32 u4_bs, + const UWORD8 *pu1_cliptab_cb, + const UWORD8 *pu1_cliptab_cr) +{ + UWORD8 *pu1_src_u = pu1_src; /* Pointer to the src sample q0 of plane U*/ + UWORD8 *pu1_src_v = pu1_src + 1; /* Pointer to the src sample q0 of plane V*/ + UWORD8 p1_u, p0_u, q0_u, q1_u, p1_v, p0_v, q0_v, q1_v; + WORD32 pos_p1, pos_p0, pos_q0, pos_q1; + UWORD8 *pu1_src_temp_u, *pu1_src_temp_v; + UWORD8 *pu1_p1_u; /* Pointer to the src sample p1 of plane U*/ + UWORD8 *pu1_p1_v; /* Pointer to the src sample p1 of plane V*/ + UWORD8 *pu1_p1_temp_u, *pu1_p1_temp_v; + WORD8 i = 0, edge; + WORD8 delta; + WORD8 tcb, tcr; + WORD16 val; + UWORD8 u1_bs; + UWORD8 tcb0, tcr0; + + pu1_p1_u = pu1_src_u - (src_strd << 1); + pu1_p1_v = pu1_src_v - (src_strd << 1); + pos_q0 = 0; + pos_q1 = src_strd; + pos_p0 = src_strd; + pos_p1 = 0; + + for(edge = 0; edge < 4; edge++, pu1_src_u += 4, pu1_p1_u += 4, + pu1_src_v += 4, pu1_p1_v += 4) + { + pu1_src_temp_u = pu1_src_u; + pu1_p1_temp_u = pu1_p1_u; + pu1_src_temp_v = pu1_src_v; + pu1_p1_temp_v = pu1_p1_v; + + /* Filter Decision */ + u1_bs = (UWORD8)((u4_bs >> ((3 - edge) << 3)) & 0x0ff); + if(!u1_bs) + continue; + /* tc0 */ + tcb0 = pu1_cliptab_cb[u1_bs]; + tcr0 = pu1_cliptab_cr[u1_bs]; + + for(i = 0; i < 2; ++i, pu1_src_temp_u += 2, pu1_p1_temp_u += 2, + pu1_src_temp_v += 2, pu1_p1_temp_v += 2) + { + q0_u = pu1_src_temp_u[pos_q0]; + q1_u = pu1_src_temp_u[pos_q1]; + p0_u = pu1_p1_temp_u[pos_p0]; + p1_u = pu1_p1_temp_u[pos_p1]; + + q0_v = pu1_src_temp_v[pos_q0]; + q1_v = pu1_src_temp_v[pos_q1]; + p0_v = pu1_p1_temp_v[pos_p0]; + p1_v = pu1_p1_temp_v[pos_p1]; + + /* tc */ + tcb = tcb0 + 1; + tcr = tcr0 + 1; + /* Filter Decision */ + if(ABS(p0_u - q0_u) < alpha_cb && ABS(q1_u - q0_u) < beta_cb + && ABS(p1_u - p0_u) < beta_cb) + { + val = ((((q0_u - p0_u) << 2) + (p1_u - q1_u) + 4) >> 3); + delta = CLIP3(-tcb, tcb, val); + /* p0' */ + val = p0_u + delta; + pu1_p1_temp_u[pos_p0] = CLIP_U8(val); + /* q0' */ + val = q0_u - delta; + pu1_src_temp_u[pos_q0] = CLIP_U8(val); + } + /* Filter Decision */ + if(ABS(p0_v - q0_v) < alpha_cr && ABS(q1_v - q0_v) < beta_cr + && ABS(p1_v - p0_v) < beta_cr) + { + val = ((((q0_v - p0_v) << 2) + (p1_v - q1_v) + 4) >> 3); + delta = CLIP3(-tcr, tcr, val); + /* p0' */ + val = p0_v + delta; + pu1_p1_temp_v[pos_p0] = CLIP_U8(val); + /* q0' */ + val = q0_v - delta; + pu1_src_temp_v[pos_q0] = CLIP_U8(val); + } + } + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_deblk_chroma_vert_bs4_mbaff() */ +/* */ +/* Description : This function performs filtering of a chroma block */ +/* vertical edge when boundary strength is set to 4 in high */ +/* profile. */ +/* */ +/* Inputs : pu1_src - pointer to the src sample q0 of U */ +/* src_strd - source stride */ +/* alpha_cb - alpha value for the boundary in U */ +/* beta_cb - beta value for the boundary in U */ +/* alpha_cr - alpha value for the boundary in V */ +/* beta_cr - beta value for the boundary in V */ +/* u4_bs - packed Boundary strength array */ +/* pu1_cliptab_cb - tc0_table for U */ +/* pu1_cliptab_cr - tc0_table for V */ +/* */ +/* Globals : None */ +/* */ +/* Processing : When the function is called twice, this operation is as */ +/* described in Sec. 8.7.2.4 under the title "Filtering */ +/* process for edges for bS equal to 4" in ITU T Rec H.264 */ +/* with alpha and beta values different in U and V. */ +/* */ +/* Outputs : None */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 29 12 2014 Kaushik Draft */ +/* Senthoor */ +/* */ +/*****************************************************************************/ +void ih264_deblk_chroma_vert_bs4_mbaff(UWORD8 *pu1_src, + WORD32 src_strd, + WORD32 alpha_cb, + WORD32 beta_cb, + WORD32 alpha_cr, + WORD32 beta_cr) +{ + UWORD8 *pu1_src_u = pu1_src; /* Pointer to the src sample q0 of U */ + UWORD8 *pu1_src_v = pu1_src + 1; /* Pointer to the src sample q0 of V */ + UWORD8 p1_u, p0_u, q0_u, q1_u, p1_v, p0_v, q0_v, q1_v; + WORD32 blk_strd = src_strd; + WORD32 pos_p1, pos_p0, pos_q0, pos_q1; + UWORD8 *pu1_src_temp_u, *pu1_src_temp_v; + WORD8 edge; + + pos_q0 = 0; + pos_q1 = 2; + pos_p0 = -2; + pos_p1 = -4; + + for(edge = 0; edge < 4; + edge++, pu1_src_u += blk_strd, pu1_src_v += blk_strd) + { + pu1_src_temp_u = pu1_src_u; + pu1_src_temp_v = pu1_src_v; + q0_u = pu1_src_temp_u[pos_q0]; + q1_u = pu1_src_temp_u[pos_q1]; + p0_u = pu1_src_temp_u[pos_p0]; + p1_u = pu1_src_temp_u[pos_p1]; + q0_v = pu1_src_temp_v[pos_q0]; + q1_v = pu1_src_temp_v[pos_q1]; + p0_v = pu1_src_temp_v[pos_p0]; + p1_v = pu1_src_temp_v[pos_p1]; + + /* Filter Decision */ + if((ABS(p0_u - q0_u) < alpha_cb) && + (ABS(q1_u - q0_u) < beta_cb) && + (ABS(p1_u - p0_u) < beta_cb)) + { + /* p0' */ + pu1_src_temp_u[pos_p0] = ((X2(p1_u) + p0_u + q1_u + 2) >> 2); + /* q0' */ + pu1_src_temp_u[pos_q0] = (X2(q1_u) + q0_u + p1_u + 2) >> 2; + } + + /* Filter Decision */ + if((ABS(p0_v - q0_v) < alpha_cr) && + (ABS(q1_v - q0_v) < beta_cr) && + (ABS(p1_v - p0_v) < beta_cr)) + { + /* p0' */ + pu1_src_temp_v[pos_p0] = ((X2(p1_v) + p0_v + q1_v + 2) >> 2); + /* q0' */ + pu1_src_temp_v[pos_q0] = (X2(q1_v) + q0_v + p1_v + 2) >> 2; + } + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_deblk_chroma_vert_bslt4_mbaff() */ +/* */ +/* Description : This function performs filtering of a chroma block */ +/* vertical edge when boundary strength is less than 4 in */ +/* high profile. */ +/* */ +/* Inputs : pu1_src - pointer to the src sample q0 of U */ +/* src_strd - source stride */ +/* alpha_cb - alpha value for the boundary in U */ +/* beta_cb - beta value for the boundary in U */ +/* alpha_cr - alpha value for the boundary in V */ +/* beta_cr - beta value for the boundary in V */ +/* u4_bs - packed Boundary strength array */ +/* pu1_cliptab_cb - tc0_table for U */ +/* pu1_cliptab_cr - tc0_table for V */ +/* */ +/* Globals : None */ +/* */ +/* Processing : When the function is called twice, this operation is as */ +/* described in Sec. 8.7.2.4 under the title "Filtering */ +/* process for edges for bS less than 4" in ITU T Rec H.264 */ +/* with alpha and beta values different in U and V. */ +/* */ +/* Outputs : None */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 29 12 2014 Kaushik Draft */ +/* Senthoor */ +/* */ +/*****************************************************************************/ +void ih264_deblk_chroma_vert_bslt4_mbaff(UWORD8 *pu1_src, + WORD32 src_strd, + WORD32 alpha_cb, + WORD32 beta_cb, + WORD32 alpha_cr, + WORD32 beta_cr, + UWORD32 u4_bs, + const UWORD8 *pu1_cliptab_cb, + const UWORD8 *pu1_cliptab_cr) +{ + UWORD8 *pu1_src_u = pu1_src; /* Pointer to the src sample q0 of plane U*/ + UWORD8 *pu1_src_v = pu1_src + 1; /* Pointer to the src sample q0 of plane V*/ + UWORD8 p1_u, p0_u, q0_u, q1_u, p1_v, p0_v, q0_v, q1_v; + WORD32 blk_strd = src_strd; + WORD32 pos_p1, pos_p0, pos_q0, pos_q1; + UWORD8 *pu1_src_temp_u, *pu1_src_temp_v; + WORD8 edge; + WORD8 delta; + WORD8 tcb, tcr; + WORD16 val; + UWORD8 tcb0, tcr0, u1_bs; + + pos_q0 = 0; + pos_q1 = 2; + pos_p0 = -2; + pos_p1 = -4; + + for(edge = 0; edge < 4; + edge++, pu1_src_u += blk_strd, pu1_src_v += blk_strd) + { + pu1_src_temp_u = pu1_src_u; + pu1_src_temp_v = pu1_src_v; + /* Filter Decision */ + u1_bs = (UWORD8)((u4_bs >> ((3 - edge) << 3)) & 0x0ff); + if(!u1_bs) + continue; + /* tc0 */ + tcb0 = pu1_cliptab_cb[u1_bs]; + tcr0 = pu1_cliptab_cr[u1_bs]; + tcb = tcb0 + 1; + tcr = tcr0 + 1; + q0_u = pu1_src_temp_u[pos_q0]; + q1_u = pu1_src_temp_u[pos_q1]; + p0_u = pu1_src_temp_u[pos_p0]; + p1_u = pu1_src_temp_u[pos_p1]; + + q0_v = pu1_src_temp_v[pos_q0]; + q1_v = pu1_src_temp_v[pos_q1]; + p0_v = pu1_src_temp_v[pos_p0]; + p1_v = pu1_src_temp_v[pos_p1]; + + /* Filter Decision */ + if((ABS(p0_u - q0_u) < alpha_cb) && + (ABS(q1_u - q0_u) < beta_cb) && + (ABS(p1_u - p0_u) < beta_cb)) + { + val = ((((q0_u - p0_u) << 2) + (p1_u - q1_u) + 4) >> 3); + delta = CLIP3(-tcb, tcb, val); + /* p0' */ + val = p0_u + delta; + pu1_src_temp_u[pos_p0] = CLIP_U8(val); + /* q0' */ + val = q0_u - delta; + pu1_src_temp_u[pos_q0] = CLIP_U8(val); + } + + /* Filter Decision */ + if((ABS(p0_v - q0_v) < alpha_cr) && + (ABS(q1_v - q0_v) < beta_cr) && + (ABS(p1_v - p0_v) < beta_cr)) + { + val = ((((q0_v - p0_v) << 2) + (p1_v - q1_v) + 4) >> 3); + delta = CLIP3(-tcr, tcr, val); + /* p0' */ + val = p0_v + delta; + pu1_src_temp_v[pos_p0] = CLIP_U8(val); + /* q0' */ + val = q0_v - delta; + pu1_src_temp_v[pos_q0] = CLIP_U8(val); + } + } +} diff --git a/common/ih264_deblk_edge_filters.h b/common/ih264_deblk_edge_filters.h new file mode 100755 index 0000000..4079dd2 --- /dev/null +++ b/common/ih264_deblk_edge_filters.h @@ -0,0 +1,195 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** + ******************************************************************************* + * @file + * ih264_deblk_edge_filters.h + * + * @brief + * This file contains declarations of functions used for deblocking + * + * @author + * Ittiam + * + * @remarks + * None + * + ******************************************************************************* + */ + +#ifndef IH264_DEBLK_H_ +#define IH264_DEBLK_H_ + +/*****************************************************************************/ +/* Extern Function Declarations */ +/*****************************************************************************/ + +typedef void ih264_deblk_edge_bslt4_ft(UWORD8 *pu1_src, + WORD32 src_strd, + WORD32 alpha, + WORD32 beta, + UWORD32 u4_bs, + const UWORD8 *pu1_cliptab ); + +typedef void ih264_deblk_edge_bs4_ft(UWORD8 *pu1_src, + WORD32 src_strd, + WORD32 alpha, + WORD32 beta ); + +typedef void ih264_deblk_chroma_edge_bslt4_ft(UWORD8 *pu1_src, + WORD32 src_strd, + WORD32 alpha_cb, + WORD32 beta_cb, + WORD32 alpha_cr, + WORD32 beta_cr, + UWORD32 u4_bs, + const UWORD8 *pu1_cliptab_cb, + const UWORD8 *pu1_cliptab_cr); + +typedef void ih264_deblk_chroma_edge_bs4_ft(UWORD8 *pu1_src, + WORD32 src_strd, + WORD32 alpha_cb, + WORD32 beta_cb, + WORD32 alpha_cr, + WORD32 beta_cr); + + + +ih264_deblk_edge_bs4_ft ih264_deblk_luma_horz_bs4; +ih264_deblk_edge_bs4_ft ih264_deblk_luma_vert_bs4; +ih264_deblk_edge_bs4_ft ih264_deblk_luma_vert_bs4_mbaff; + + +ih264_deblk_edge_bs4_ft ih264_deblk_chroma_horz_bs4_bp; +ih264_deblk_edge_bs4_ft ih264_deblk_chroma_vert_bs4_bp; +ih264_deblk_edge_bs4_ft ih264_deblk_chroma_vert_bs4_mbaff_bp; + + +ih264_deblk_edge_bslt4_ft ih264_deblk_luma_horz_bslt4; +ih264_deblk_edge_bslt4_ft ih264_deblk_luma_vert_bslt4; +ih264_deblk_edge_bslt4_ft ih264_deblk_luma_vert_bslt4_mbaff; + + +ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_bp; +ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_bp; +ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_mbaff_bp; + +ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_vert_bs4; +ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_horz_bs4; +ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_vert_bs4_mbaff; +ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_horz_bs4_mbaff; + +ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4; +ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4; +ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_mbaff; +ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_mbaff; + + +/*A9*/ +ih264_deblk_edge_bs4_ft ih264_deblk_luma_horz_bs4_a9; +ih264_deblk_edge_bs4_ft ih264_deblk_luma_vert_bs4_a9; +ih264_deblk_edge_bs4_ft ih264_deblk_luma_vert_bs4_mbaff_a9; + + +ih264_deblk_edge_bs4_ft ih264_deblk_chroma_horz_bs4_bp_a9; +ih264_deblk_edge_bs4_ft ih264_deblk_chroma_vert_bs4_bp_a9; +ih264_deblk_edge_bs4_ft ih264_deblk_chroma_vert_bs4_mbaff_bp_a9; + + +ih264_deblk_edge_bslt4_ft ih264_deblk_luma_horz_bslt4_a9; +ih264_deblk_edge_bslt4_ft ih264_deblk_luma_vert_bslt4_a9; +ih264_deblk_edge_bslt4_ft ih264_deblk_luma_vert_bslt4_mbaff_a9; + + +ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_bp_a9; +ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_bp_a9; +ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_mbaff_bp_a9; + +ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_vert_bs4_a9; +ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_horz_bs4_a9; +ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_vert_bs4_mbaff_a9; +ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_horz_bs4_mbaff_a9; + +ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_a9; +ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_a9; +ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_mbaff_a9; +ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_mbaff_a9; + +/*AV8*/ +ih264_deblk_edge_bs4_ft ih264_deblk_luma_horz_bs4_av8; +ih264_deblk_edge_bs4_ft ih264_deblk_luma_vert_bs4_av8; +ih264_deblk_edge_bs4_ft ih264_deblk_luma_vert_bs4_mbaff_av8; + + +ih264_deblk_edge_bs4_ft ih264_deblk_chroma_horz_bs4_bp_av8; +ih264_deblk_edge_bs4_ft ih264_deblk_chroma_vert_bs4_bp_av8; +ih264_deblk_edge_bs4_ft ih264_deblk_chroma_vert_bs4_mbaff_bp_av8; + + +ih264_deblk_edge_bslt4_ft ih264_deblk_luma_horz_bslt4_av8; +ih264_deblk_edge_bslt4_ft ih264_deblk_luma_vert_bslt4_av8; +ih264_deblk_edge_bslt4_ft ih264_deblk_luma_vert_bslt4_mbaff_av8; + + +ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_bp_av8; +ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_bp_av8; +ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_mbaff_bp_av8; + +ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_vert_bs4_av8; +ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_horz_bs4_av8; +ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_vert_bs4_mbaff_av8; +ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_horz_bs4_mbaff_av8; + +ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_av8; +ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_av8; +ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_mbaff_av8; +ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_mbaff_av8; + +/*SSE3*/ +ih264_deblk_edge_bs4_ft ih264_deblk_luma_horz_bs4_ssse3; +ih264_deblk_edge_bs4_ft ih264_deblk_luma_vert_bs4_ssse3; +ih264_deblk_edge_bs4_ft ih264_deblk_luma_vert_bs4_mbaff_ssse3; + + +ih264_deblk_edge_bs4_ft ih264_deblk_chroma_horz_bs4_bp_ssse3; +ih264_deblk_edge_bs4_ft ih264_deblk_chroma_vert_bs4_bp_ssse3; +ih264_deblk_edge_bs4_ft ih264_deblk_chroma_vert_bs4_mbaff_bp_ssse3; + + +ih264_deblk_edge_bslt4_ft ih264_deblk_luma_horz_bslt4_ssse3; +ih264_deblk_edge_bslt4_ft ih264_deblk_luma_vert_bslt4_ssse3; +ih264_deblk_edge_bslt4_ft ih264_deblk_luma_vert_bslt4_mbaff_ssse3; + + +ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_bp_ssse3; +ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_bp_ssse3; +ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_mbaff_bp_ssse3; + +ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_vert_bs4_ssse3; +ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_horz_bs4_ssse3; +ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_vert_bs4_mbaff_ssse3; +ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_horz_bs4_mbaff_ssse3; + +ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_ssse3; +ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_ssse3; +ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_mbaff_ssse3; +ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_mbaff_ssse3; + +#endif /* IH264_DEBLK_H_ */ diff --git a/common/ih264_deblk_tables.c b/common/ih264_deblk_tables.c new file mode 100755 index 0000000..91e28e0 --- /dev/null +++ b/common/ih264_deblk_tables.c @@ -0,0 +1,119 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ih264_deblk_tables.c +* +* @brief +* Contains tables used for deblocking +* +* @author +* Ittiam +* +* @par List of Tables: +* - guc_ih264_qp_scale_cr[] +* - guc_ih264_alpha_table[] +* - guc_ih264_beta_table[] +* - guc_ih264_clip_table[][] +* +* @remarks +* None +* +******************************************************************************* +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include + +/* User include files */ +#include "ih264_typedefs.h" +#include "ih264_deblk_tables.h" + +/*****************************************************************************/ +/* Extern global definitions */ +/*****************************************************************************/ + +/** + ****************************************************************************** + * @brief alpha & beta tables for deblocking + * input : indexA [0-51] & indexB [0-51] + * output : alpha & beta + * + * @remarks Table 8-16 – in H264 Specification, + * Derivation of offset dependent threshold variables + * alpha and beta from indexA and indexB + ****************************************************************************** + */ +const UWORD8 gu1_ih264_alpha_table[52] = +{ + /* indexA :: 0-51 inclusive */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 4, 4, 5, 6, 7, 8, 9, 10, + 12, 13, 15, 17, 20, 22, 25, 28, + 32, 36, 40, 45, 50, 56, 63, 71, + 80, 90, 101, 113, 127, 144, 162, 182, + 203, 226, 255, 255, +}; + +const UWORD8 gu1_ih264_beta_table[52] = +{ + /* indexB :: 0-51 inclusive */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 2, 2, 2, 3, 3, 3, 3, 4, + 4, 4, 6, 6, 7, 7, 8, 8, + 9, 9, 10, 10, 11, 11, 12, 12, + 13, 13, 14, 14, 15, 15, 16, 16, + 17, 17, 18, 18, +}; + +/** + ****************************************************************************** + * @brief t'C0 table for deblocking + * input : indexA [0-51] and bS [1,3] + * output : t'C0 + * + * @remarks Table 8-17 – in H264 Specification, + * Value of variable t'C0 as a function of indexA and bS + ****************************************************************************** + */ +const UWORD8 gu1_ih264_clip_table[52][4] = +{ + /* indexA :: 0-51 inclusive */ + { 0, 0, 0, 0}, { 0, 0, 0, 0}, { 0, 0, 0, 0}, { 0, 0, 0, 0}, + { 0, 0, 0, 0}, { 0, 0, 0, 0}, { 0, 0, 0, 0}, { 0, 0, 0, 0}, + { 0, 0, 0, 0}, { 0, 0, 0, 0}, { 0, 0, 0, 0}, { 0, 0, 0, 0}, + { 0, 0, 0, 0}, { 0, 0, 0, 0}, { 0, 0, 0, 0}, { 0, 0, 0, 0}, + { 0, 0, 0, 0}, { 0, 0, 0, 1}, { 0, 0, 0, 1}, { 0, 0, 0, 1}, + { 0, 0, 0, 1}, { 0, 0, 1, 1}, { 0, 0, 1, 1}, { 0, 1, 1, 1}, + { 0, 1, 1, 1}, { 0, 1, 1, 1}, { 0, 1, 1, 1}, { 0, 1, 1, 2}, + { 0, 1, 1, 2}, { 0, 1, 1, 2}, { 0, 1, 1, 2}, { 0, 1, 2, 3}, + { 0, 1, 2, 3}, { 0, 2, 2, 3}, { 0, 2, 2, 4}, { 0, 2, 3, 4}, + { 0, 2, 3, 4}, { 0, 3, 3, 5}, { 0, 3, 4, 6}, { 0, 3, 4, 6}, + { 0, 4, 5, 7}, { 0, 4, 5, 8}, { 0, 4, 6, 9}, { 0, 5, 7,10}, + { 0, 6, 8,11}, { 0, 6, 8,13}, { 0, 7,10,14}, { 0, 8,11,16}, + { 0, 9,12,18}, { 0,10,13,20}, { 0,11,15,23}, { 0,13,17,25}, +}; diff --git a/common/ih264_deblk_tables.h b/common/ih264_deblk_tables.h new file mode 100755 index 0000000..3935dcb --- /dev/null +++ b/common/ih264_deblk_tables.h @@ -0,0 +1,73 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** + ******************************************************************************* + * @file + * ih264_deblk_tables.h + * + * @brief + * This file contains declarations of tables used for deblocking + * + * @author + * Ittiam + * + * @par List of Functions: + * + * @remarks + * None + * + ******************************************************************************* + */ + +#ifndef IH264_DEBLK_TABLES_H_ +#define IH264_DEBLK_TABLES_H_ + +/*****************************************************************************/ +/* Extern global declarations */ +/*****************************************************************************/ + +/** + ****************************************************************************** + * @brief alpha & beta tables for deblocking + * input : indexA [0-51] & indexB [0-51] + * output : alpha & beta + * + * @remarks Table 8-16 – in H264 Specification, + * Derivation of offset dependent threshold variables + * alpha and beta from indexA and indexB + ****************************************************************************** + */ +extern const UWORD8 gu1_ih264_alpha_table[52]; + +extern const UWORD8 gu1_ih264_beta_table[52]; + +/** + ****************************************************************************** + * @brief t'C0 table for deblocking + * input : indexA [0-51] and bS [1,3] + * output : t'C0 + * + * @remarks Table 8-17 – in H264 Specification, + * Value of variable t'C0 as a function of indexA and bS + ****************************************************************************** + */ +extern const UWORD8 gu1_ih264_clip_table[52][4]; + +#endif /* IH264_DEBLK_TABLES_H_ */ diff --git a/common/ih264_debug.h b/common/ih264_debug.h new file mode 100755 index 0000000..96ff2a7 --- /dev/null +++ b/common/ih264_debug.h @@ -0,0 +1,61 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ih264_debug.h +* +* @brief +* Definitions for codec debugging +* +* @author +* Ittiam +* +* @par List of Functions: +* +* @remarks +* None +* +******************************************************************************* +*/ +#ifndef _IH264_DEBUG_H_ +#define _IH264_DEBUG_H_ + + +#if DEBUG_PRINT + +#define DEBUG(...) \ +{ \ + printf("\n[H264 DBG] %s/%d:: ", __FUNCTION__, __LINE__); \ + printf(__VA_ARGS__); \ +} + +#else + +#define DEBUG(...) {} + +#endif + + +#define ASSERT(x) assert((x)) + + +#endif /* _IH264_DEBUG_H_ */ + diff --git a/common/ih264_defs.h b/common/ih264_defs.h new file mode 100755 index 0000000..8d7e387 --- /dev/null +++ b/common/ih264_defs.h @@ -0,0 +1,690 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ih264_defs.h +* +* @brief +* Definitions used in the codec +* +* @author +* Ittiam +* +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef IH264_DEFS_H_ +#define IH264_DEFS_H_ + +/*****************************************************************************/ +/* Enums */ +/*****************************************************************************/ + + +/*****************************************************************************/ +/* Profile and Levels */ +/*****************************************************************************/ + +/** +****************************************************************************** + * @enum PROFILE_IDC + * @brief Defines the set of possible profiles +****************************************************************************** +*/ +enum +{ + IH264_PROFILE_BASELINE = 66, + IH264_PROFILE_MAIN = 77, + IH264_PROFILE_EXTENDED = 88, + IH264_PROFILE_HIGH = 100, + IH264_PROFILE_HIGH10 = 110, + IH264_PROFILE_HIGH422 = 122, + IH264_PROFILE_HIGH444 = 144, +}; + +/** +****************************************************************************** + * @enum LEVEL_IDC + * @brief Defines the set of possible levels +****************************************************************************** +*/ +typedef enum +{ + IH264_LEVEL_10 = 10, + IH264_LEVEL_1B = 9, + IH264_LEVEL_11 = 11, + IH264_LEVEL_12 = 12, + IH264_LEVEL_13 = 13, + IH264_LEVEL_20 = 20, + IH264_LEVEL_21 = 21, + IH264_LEVEL_22 = 22, + IH264_LEVEL_30 = 30, + IH264_LEVEL_31 = 31, + IH264_LEVEL_32 = 32, + IH264_LEVEL_40 = 40, + IH264_LEVEL_41 = 41, + IH264_LEVEL_42 = 42, + IH264_LEVEL_50 = 50, + IH264_LEVEL_51 = 51, +}IH264_LEVEL_T; + + +/** +****************************************************************************** + * @enum PIC TYPES + * @brief Defines the set of possible picture type - not signaled in bitstream +****************************************************************************** +*/ +typedef enum +{ + PIC_NA = 0x7FFFFFFF, + PIC_IDR = 0, + PIC_I = 1, + PIC_P = 2, + PIC_B = 3, + PIC_P_NONREF = 4, + PIC_B_NONREF = 5, + PIC_MAX, +}PIC_TYPE_T; + +/** +****************************************************************************** + * @enum FRAME-FIELD types + * @brief Defines the set of possible field types. +****************************************************************************** +*/ +enum +{ + TOP_FIELD, + BOTTOM_FIELD, + FRAME, +}; + +/** +****************************************************************************** + * @enum SLICE TYPES + * @brief Defines the set of possible SLICE TYPES +****************************************************************************** +*/ +enum +{ + PSLICE = 0, + BSLICE = 1, + ISLICE = 2, + SPSLICE = 3, + SISLICE = 4, + MAXSLICE_TYPE, +}; + +/** +****************************************************************************** + * @enum NAL_UNIT_TYPE + * @brief Defines the set of possible nal unit types +****************************************************************************** +*/ +enum +{ + NAL_UNSPEC_0 = 0, + NAL_SLICE_NON_IDR = 1, + NAL_SLICE_DPA = 2, + NAL_SLICE_DPB = 3, + NAL_SLICE_DPC = 4, + NAL_SLICE_IDR = 5, + NAL_SEI = 6, + NAL_SPS = 7, + NAL_PPS = 8, + NAL_AUD = 9, + NAL_EOSEQ = 10, + NAL_EOSTR = 11, + NAL_FILLER = 12, + NAL_SPSE = 13, + NAL_RES_18 = 14, + NAL_AUX_PIC = 19, + NAL_RES_23 = 20, + NAL_UNSPEC_31 = 24, +}; + +/** +****************************************************************************** + * @enum CHROMA_FORMAT_IDC + * @brief Defines the set of possible chroma formats + * Note Chorma format Do not change enum values +****************************************************************************** +*/ +enum +{ + CHROMA_FMT_IDC_MONOCHROME = 0, + CHROMA_FMT_IDC_YUV420 = 1, + CHROMA_FMT_IDC_YUV422 = 2, + CHROMA_FMT_IDC_YUV444 = 3, + CHROMA_FMT_IDC_YUV444_PLANES = 4, +}; + + +/** +****************************************************************************** + * @enum MBMODES_I16x16 + * @brief Defines the set of possible intra 16x16 mb modes +****************************************************************************** +*/ +typedef enum +{ + VERT_I16x16 = 0, + HORZ_I16x16 = 1, + DC_I16x16 = 2, + PLANE_I16x16 = 3, + MAX_I16x16 = 4, +}MBMODES_I16x16; + +/** +****************************************************************************** + * @enum MBMODES_I4x4 + * @brief Defines the set of possible intra 4x4 mb modes +****************************************************************************** +*/ +typedef enum +{ + VERT_I4x4 = 0, + HORZ_I4x4 = 1, + DC_I4x4 = 2, + DIAG_DL_I4x4 = 3, + DIAG_DR_I4x4 = 4, + VERT_R_I4x4 = 5, + HORZ_D_I4x4 = 6, + VERT_L_I4x4 = 7, + HORZ_U_I4x4 = 8, + MAX_I4x4 = 9, +}MBMODES_I4x4; + +/** +****************************************************************************** + * @enum MBMODES_I8x8 + * @brief Defines the set of possible intra 8x8 mb modes +****************************************************************************** +*/ +typedef enum +{ + VERT_I8x8 = 0, + HORZ_I8x8 = 1, + DC_I8x8 = 2, + DIAG_DL_I8x8 = 3, + DIAG_DR_I8x8 = 4, + VERT_R_I8x8 = 5, + HORZ_D_I8x8 = 6, + VERT_L_I8x8 = 7, + HORZ_U_I8x8 = 8, + MAX_I8x8 = 9, +}MBMODES_I8x8; + +/** +****************************************************************************** + * @enum MBMODES_CHROMA_I8x8 (Chroma) + * @brief Defines the set of possible intra 8x8 mb modes for chroma +****************************************************************************** +*/ +typedef enum +{ + DC_CH_I8x8 = 0, + HORZ_CH_I8x8 = 1, + VERT_CH_I8x8 = 2, + PLANE_CH_I8x8 = 3, + MAX_CH_I8x8 = 4, +}MBMODES_CHROMA_I8x8; + +/** +****************************************************************************** + * @enum MBTYPES + * @brief Defines the set of possible macro block types +****************************************************************************** +*/ +typedef enum +{ + I16x16 = 0, + I4x4 = 1, + I8x8 = 2, + P16x16 = 3, + P16x8 = 4, + P8x16 = 5, + P8x8 = 6, + PSKIP = 7, + IPCM = 8, + MAX_MBTYPES, +}MBTYPES_T; + +/* Prediction list */ +/* Do not change enum values */ +enum +{ + PRED_L0 = 0, + PRED_L1 = 1, + PRED_BI = 2 +}; + + +/** +****************************************************************************** + * @enum ENTROPY_BLK_TYPE + * @brief Defines the nature of blocks employed in entropy coding +****************************************************************************** +*/ +typedef enum +{ + ENTROPY_BLK_INVALID = -1, + CAVLC_LUMA_4x4_DC = 0, + CAVLC_LUMA_4x4_AC = 1, + CAVLC_LUMA_4x4 = 2, + CAVLC_CHROMA_4x4_DC = 3, + CAVLC_CHROMA_4x4_AC = 4, +} ENTROPY_BLK_TYPE; + +/** +****************************************************************************** + * @enum ENTROPY_MODE + * @brief Entropy coding modes +****************************************************************************** +*/ +typedef enum +{ + CAVLC = 0, + CABAC = 1, +} ENTROPY_MODE; + +/** +****************************************************************************** + * @enum COMPONENT_TYPE + * @brief components Y, U & V +****************************************************************************** +*/ +typedef enum +{ + Y, + U, + V, +} COMPONENT_TYPE; + + +/** +****************************************************************************** + * @enum MBPART_PREDMODE_T + * @brief MbPartPredMode Table 7-11 to 7-14 +****************************************************************************** +*/ +typedef enum +{ + MBPART_NA, + MBPART_I4x4, + MBPART_I8x8, + MBPART_I16x16, + MBPART_L0, + MBPART_L1, + MBPART_BI, + MBPART_DIRECT, + MBPART_IPCM, +}MBPART_PREDMODE_T; + + +typedef enum +{ + I_NxN, + I_16x16_0_0_0, + I_16x16_1_0_0, + I_16x16_2_0_0, + I_16x16_3_0_0, + I_16x16_0_1_0, + I_16x16_1_1_0, + I_16x16_2_1_0, + I_16x16_3_1_0, + I_16x16_0_2_0, + I_16x16_1_2_0, + I_16x16_2_2_0, + I_16x16_3_2_0, + I_16x16_0_0_1, + I_16x16_1_0_1, + I_16x16_2_0_1, + I_16x16_3_0_1, + I_16x16_0_1_1, + I_16x16_1_1_1, + I_16x16_2_1_1, + I_16x16_3_1_1, + I_16x16_0_2_1, + I_16x16_1_2_1, + I_16x16_2_2_1, + I_16x16_3_2_1, + I_PCM, +}MBTYPE_ISLICE_T; + +typedef enum +{ + P_L0_16x16, + P_L0_L0_16x8, + P_L0_L0_8x16, + P_8x8, + P_8x8REF0, + P_SKIP +}MBTYPE_PSLICE_T; + +typedef enum +{ + B_DIRECT_16x16, + B_L0_16x16, + B_L1_16x16, + B_BI_16x16, + B_L0_L0_16x8, + B_L0_L0_8x16, + B_L1_L1_16x8, + B_L1_L1_8x16, + B_L0_L1_16x8, + B_L0_L1_8x16, + B_L1_L0_16x8, + B_L1_L0_8x16, + B_L0_BI_16x8, + B_L0_BI_8x16, + B_L1_BI_16x8, + B_L1_BI_8x16, + B_BI_L0_16x8, + B_BI_L0_8x16, + B_BI_L1_16x8, + B_BI_L1_8x16, + B_BI_BI_16x8, + B_BI_BI_8x16, + B_8x8, + B_SKIP, +}MBTYPE_BSLICE_T; + + +typedef enum +{ + P_L0_8x8, + P_L0_8x4, + P_L0_4x8, + P_L0_4x4, +}SUBMBTYPE_PSLICE_T; + +typedef enum +{ + B_DIRECT_8x8, + B_L0_8x8, + B_L1_8x8, + B_BI_8x8, + B_L0_8x4, + B_L0_4x8, + B_L1_8x4, + B_L1_4x8, + B_BI_8x4, + B_BI_4x8, + B_L0_4x4, + B_L1_4x4, + B_BI_4x4, +}SUBMBTYPE_BSLICE_T; + +/** + * DC Mode pattern for 4 4x4 sub blocks in an MB row + */ +#define DC_I16X16_MB_ROW (DC_I16x16 << 24) | (DC_I16x16 << 16) | \ + (DC_I16x16 << 8) | DC_I16x16 + + + +/*****************************************************************************/ +/* Constant Macros */ +/*****************************************************************************/ + +/*****************************************************************************/ +/* Reference frame defs */ +/*****************************************************************************/ +/* Maximum DPB size */ +#define MAX_DPB_SIZE 16 + +/* Maximum mmco commands in slice header */ +#define MAX_MMCO_COMMANDS 32 + +/* Maximum reference reorder idc */ +#define MAX_MODICATION_IDC 32 + +/*****************************************************************************/ +/* SPS restrictions */ +/*****************************************************************************/ + +/* Number of SPS allowed */ +/* An extra buffer is allocated to write the parsed data + * It is copied to the appropriate location later */ +#define MAX_SPS_CNT (32 + 1) + +/* Maximum long term reference pics */ +#define MAX_LTREF_PICS_SPS 16 + +/* Maximum short term reference pics */ +#define MAX_STREF_PICS_SPS 64 + + +/*****************************************************************************/ +/* PPS restrictions */ +/*****************************************************************************/ + +/* Number of PPS allowed */ +/* An extra buffer is allocated to write the parsed data + * It is copied to the appropriate location later */ +#define MAX_PPS_CNT (256 + 1) + +/*****************************************************************************/ +/* Macro definitions for sizes of MB, PU, TU, CU */ +/*****************************************************************************/ +#define MB_SIZE 16 +#define BLK8x8SIZE 8 +#define BLK_SIZE 4 + + +/* TU Size Range */ +#define MAX_TU_SIZE 8 +#define MIN_TU_SIZE 4 + +/* Max Transform Size */ +#define MAX_TRANS_SIZE (MAX_TU_SIZE*MAX_TU_SIZE) + +/* PU Size Range */ +#define MAX_PU_SIZE 16 +#define MIN_PU_SIZE 4 + +/* Number of max TU in a MB row */ +#define MAX_TU_IN_MB_ROW ((MB_SIZE / MIN_TU_SIZE)) + +/* Number of max PU in a CTb row */ +#define MAX_PU_IN_MB_ROW ((MB_SIZE / MIN_PU_SIZE)) + + +/* Number of max PU in a MB */ +/*****************************************************************************/ +/* Note though for 64 x 64 MB, Max PU in MB is 128, in order to store */ +/* intra pred info, 256 entries are needed */ +/*****************************************************************************/ +#define MAX_PU_IN_MB ((MB_SIZE / MIN_PU_SIZE) * \ + (MB_SIZE / MIN_PU_SIZE)) + +/* Number of max TU in a MB */ +#define MAX_TU_IN_MB ((MB_SIZE / MIN_TU_SIZE) * \ + (MB_SIZE / MIN_TU_SIZE)) + + + +/** + * Maximum transform depths + */ +#define MAX_TRAFO_DEPTH 5 + +#define MAX_DC_4x4_SUBBLK_LUMA 1 +#define MAX_AC_4x4_SUBBLK_LUMA 16 +#define MAX_DC_4x4_SUBBLK_CHROMA 2 +#define MAX_AC_4x4_SUBBLK_CHROMA 8 + +#define MAX_4x4_SUBBLKS (MAX_DC_4x4_SUBBLK_LUMA + MAX_DC_4x4_SUBBLK_CHROMA +\ + MAX_AC_4x4_SUBBLK_LUMA + MAX_AC_4x4_SUBBLK_CHROMA) + +/* Max number of deblocking edges */ +#define MAX_VERT_DEBLK_EDGES ((MB_SIZE/8) * (MB_SIZE/4)) +#define MAX_HORZ_DEBLK_EDGES ((MB_SIZE/4) * (MB_SIZE/8)) + +/* Qp can not change below 8x8 level */ +#define MAX_DEBLK_QP_CNT ((MB_SIZE/8) * (MB_SIZE/8)) + +/*****************************************************************************/ +/* Parsing related macros */ +/*****************************************************************************/ +#define SUBBLK_COEFF_CNT 16 + +/* Quant and Trans defs */ + +/*****************************************************************************/ +/* Sizes for Transform functions */ +/*****************************************************************************/ +#define TRANS_SIZE_4 4 +#define TRANS_SIZE_8 8 +#define TRANS_SIZE_16 16 +#define TRANS_SIZE_32 32 + + +#define IT_SHIFT_STAGE_1 7 +#define IT_SHIFT_STAGE_2 12 + +/** + * @breif Maximum transform dynamic range (excluding sign bit) + */ +#define MAX_TR_DYNAMIC_RANGE 15 + +/** + * @brief Q(QP%6) * IQ(QP%6) = 2^20 + */ +#define QUANT_IQUANT_SHIFT 20 + +/** + * @breif Q factor for Qp%6 multiplication + */ +#define QUANT_SHIFT 14 + +/** + * @breif Q shift factor for flat rescale matrix weights + */ +#define FLAT_RESCALE_MAT_Q_SHIFT 11 + +/** + * @breif Scaling matrix is represented in Q15 format + */ +#define SCALING_Q_SHIFT 15 + +/** + * @brief rounding factor for quantization represented in Q9 format + */ +#define QUANT_ROUND_FACTOR_Q 9 + +/** + * @brief Minimum qp supported in H264 spec + */ +#define MIN_H264_QP 0 + +/** + * @brief Maximum qp supported in H264 spec + */ +#define MAX_H264_QP 51 + +/** + * @breif Total number of transform sizes + * used for sizeID while getting scale matrix + */ +#define NUM_UNIQUE_TRANS_SIZE 4 + +/** + * @breif Maximum number of bits in frameNumber signaling + */ +#define MAX_BITS_IN_FRAME_NUM 16 + +/** + * @breif Maximum number of bits in POC LSB signaling + */ +#define MAX_BITS_IN_POC_LSB 16 + + +/** + * @breif Maximum PIC Order Count type + */ +#define MAX_PIC_ORDER_COUNT_TYPE 2 + + +/** + * @breif Maximum Weighted bipred idc + */ +#define MAX_WEIGHT_BIPRED_IDC 2 + +/*****************************************************************************/ +/* Number of scaling matrices for each transform size */ +/*****************************************************************************/ +#define SCALE_MAT_CNT_TRANS_SIZE_4 6 +#define SCALE_MAT_CNT_TRANS_SIZE_8 6 +#define SCALE_MAT_CNT_TRANS_SIZE_16 6 +#define SCALE_MAT_CNT_TRANS_SIZE_32 2 + +/* Maximum number of scale matrices for a given transform size */ +#define SCALE_MAT_CNT_MAX_PER_TRANS_SIZE 6 + +/* Total number of scale matrices */ +#define TOTAL_SCALE_MAT_COUNT (SCALE_MAT_CNT_TRANS_SIZE_4 + \ + SCALE_MAT_CNT_TRANS_SIZE_8 + \ + SCALE_MAT_CNT_TRANS_SIZE_16 + \ + SCALE_MAT_CNT_TRANS_SIZE_32) + + +/*****************************************************************************/ +/* Intra pred Macros */ +/*****************************************************************************/ +/** Planar Intra prediction mode */ +#define INTRA_PLANAR 0 + +/** DC Intra prediction mode */ +#define INTRA_DC 1 + +/** Gives angular mode for intra prediction */ +#define INTRA_ANGULAR(x) (x) + +/** Following is used to signal no intra prediction in case of pcm blocks + */ +#define INTRA_PRED_NONE 63 + + +/** Following is used to signal no intra prediction is needed for first three + * 4x4 luma blocks in case of 4x4 TU sizes + * Also used in pcm cases + */ +#define INTRA_PRED_CHROMA_IDX_NONE 7 + + +/** +****************************************************************************** + * @brief neighbor availability masks +****************************************************************************** + */ +#define LEFT_MB_AVAILABLE_MASK 0x01 +#define TOP_LEFT_MB_AVAILABLE_MASK 0x02 +#define TOP_MB_AVAILABLE_MASK 0x04 +#define TOP_RIGHT_MB_AVAILABLE_MASK 0x08 + +#endif /* IH264_DEFS_H_ */ diff --git a/common/ih264_disp_mgr.c b/common/ih264_disp_mgr.c new file mode 100755 index 0000000..2bdb524 --- /dev/null +++ b/common/ih264_disp_mgr.c @@ -0,0 +1,186 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ih264_disp_mgr.c +* +* @brief +* Contains function definitions for display management +* +* @author +* Srinivas T +* +* @par List of Functions: +* - ih264_disp_mgr_init() +* - ih264_disp_mgr_add() +* - ih264_disp_mgr_get() +* +* @remarks +* None +* +******************************************************************************* +*/ +#include +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_disp_mgr.h" + + +/** +******************************************************************************* +* +* @brief +* Initialization function for display buffer manager +* +* @par Description: +* Initializes the display buffer management structure +* +* @param[in] ps_disp_mgr +* Pointer to the display buffer management structure +* +* @returns none +* +* @remarks +* None +* +******************************************************************************* +*/ +void ih264_disp_mgr_init(disp_mgr_t *ps_disp_mgr) +{ + WORD32 id; + + ps_disp_mgr->u4_last_abs_poc = DEFAULT_POC; + + for(id = 0; id < DISP_MGR_MAX_CNT; id++) + { + ps_disp_mgr->ai4_abs_poc[id] = DEFAULT_POC; + ps_disp_mgr->apv_ptr[id] = NULL; + } +} + + +/** +******************************************************************************* +* +* @brief +* Adds a buffer to the display manager +* +* @par Description: +* Adds a buffer to the display buffer manager +* +* @param[in] ps_disp_mgr +* Pointer to the display buffer management structure +* +* @param[in] buf_id +* ID of the display buffer +* +* @param[in] abs_poc +* Absolute POC of the display buffer +* +* @param[in] pv_ptr +* Pointer to the display buffer +* +* @returns 0 if success, -1 otherwise +* +* @remarks +* None +* +******************************************************************************* +*/ +WORD32 ih264_disp_mgr_add(disp_mgr_t *ps_disp_mgr, + WORD32 buf_id, + WORD32 abs_poc, + void *pv_ptr) +{ + if(buf_id >= DISP_MGR_MAX_CNT) + { + return (-1); + } + + if(ps_disp_mgr->apv_ptr[buf_id] != NULL) + { + return (-1); + } + + ps_disp_mgr->apv_ptr[buf_id] = pv_ptr; + ps_disp_mgr->ai4_abs_poc[buf_id] = abs_poc; + return 0; +} + + +/** +******************************************************************************* +* +* @brief +* Gets the next buffer +* +* @par Description: +* Gets the next display buffer +* +* @param[in] ps_disp_mgr +* Pointer to the display buffer structure +* +* @param[out] pi4_buf_id +* Pointer to hold buffer id of the display buffer being returned +* +* @returns Pointer to the next display buffer +* +* @remarks +* None +* +******************************************************************************* +*/ +void* ih264_disp_mgr_get(disp_mgr_t *ps_disp_mgr, WORD32 *pi4_buf_id) +{ + WORD32 id; + void *pv_ret_ptr; + WORD32 i4_min_poc; + WORD32 min_poc_id; + + + pv_ret_ptr = NULL; + i4_min_poc = 0x7FFFFFFF; + min_poc_id = -1; + + /* Find minimum POC */ + for(id = 0; id < DISP_MGR_MAX_CNT; id++) + { + if((DEFAULT_POC != ps_disp_mgr->ai4_abs_poc[id]) && + (ps_disp_mgr->ai4_abs_poc[id] <= i4_min_poc)) + { + i4_min_poc = ps_disp_mgr->ai4_abs_poc[id]; + min_poc_id = id; + } + } + *pi4_buf_id = min_poc_id; + /* If all pocs are still default_poc then return NULL */ + if(-1 == min_poc_id) + { + return NULL; + } + + pv_ret_ptr = ps_disp_mgr->apv_ptr[min_poc_id]; + + /* Set abs poc to default and apv_ptr to null so that the buffer is not returned again */ + ps_disp_mgr->apv_ptr[min_poc_id] = NULL; + ps_disp_mgr->ai4_abs_poc[min_poc_id] = DEFAULT_POC; + return pv_ret_ptr; +} diff --git a/common/ih264_disp_mgr.h b/common/ih264_disp_mgr.h new file mode 100755 index 0000000..6f56493 --- /dev/null +++ b/common/ih264_disp_mgr.h @@ -0,0 +1,70 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ih264_disp_mgr.h +* +* @brief +* Function declarations used for display management +* +* @author +* Srinivas T +* +* +* @remarks +* None +* +******************************************************************************* +*/ +#ifndef _DISP_MGR_H_ +#define _DISP_MGR_H_ + +#define DISP_MGR_MAX_CNT 64 +#define DEFAULT_POC 0x7FFFFFFF + +typedef struct +{ + /** + * last_abs_poc + */ + UWORD32 u4_last_abs_poc; + + /** + * au4_abs_poc[DISP_MGR_MAX_CNT] + */ + WORD32 ai4_abs_poc[DISP_MGR_MAX_CNT]; + + /** + * apv_ptr[DISP_MGR_MAX_CNT] + */ + void *apv_ptr[DISP_MGR_MAX_CNT]; +}disp_mgr_t; + +void ih264_disp_mgr_init(disp_mgr_t *ps_disp_mgr); + +WORD32 ih264_disp_mgr_add(disp_mgr_t *ps_disp_mgr, + WORD32 id, + WORD32 abs_poc, + void *pv_ptr); + +void* ih264_disp_mgr_get(disp_mgr_t *ps_disp_mgr, WORD32 *pi4_buf_id); + +#endif //_DISP_MGR_H_ diff --git a/common/ih264_dpb_mgr.c b/common/ih264_dpb_mgr.c new file mode 100755 index 0000000..8e087d3 --- /dev/null +++ b/common/ih264_dpb_mgr.c @@ -0,0 +1,1176 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** + ******************************************************************************* + * @file + * ih264_dpb_mgr.c + * + * @brief + * Function definitions used for decoded picture buffer management + * + * @author + * Srinivas T + * + * @par List of Functions: + * - ih264_dpb_mgr_init() + * - ih264_dpb_mgr_sort_short_term_fields_by_frame_num() + * - ih264_dpb_mgr_sort_short_term_fields_by_poc_l0() + * - ih264_dpb_mgr_sort_short_term_fields_by_poc_l1() + * - ih264_dpb_mgr_sort_long_term_fields_by_frame_idx() + * - ih264_dpb_mgr_alternate_ref_fields() + * - ih264_dpb_mgr_insert_ref_field() + * - ih264_dpb_mgr_insert_ref_frame() + * - ih264_dpb_mgr_count_ref_frames() + * - ih264_dpb_mgr_delete_ref_frame() + * - ih264_dpb_mgr_delete_long_ref_fields_max_frame_idx() + * - ih264_dpb_mgr_delete_short_ref_frame() + * - ih264_dpb_mgr_delete_all_ref_frames() + * - ih264_dpb_mgr_reset() + * - ih264_dpb_mgr_release_pics() + * + * @remarks + * None + * + ******************************************************************************* + */ + +#include +#include +#include + +#include "ih264_typedefs.h" +#include "ih264_defs.h" +#include "ih264_macros.h" +#include "ih264_error.h" +#include "ih264_structs.h" +#include "ih264_buf_mgr.h" +#include "ih264_dpb_mgr.h" +#include "ih264_debug.h" + +/** + ******************************************************************************* + * + * @brief + * DPB manager initializer + * + * @par Description: + * Initialises the DPB manager structure + * + * @param[in] ps_dpb_mgr + * Pointer to the DPB manager structure + * + * @returns + * + * @remarks + * + * + ******************************************************************************* + */ + +void ih264_dpb_mgr_init(dpb_mgr_t *ps_dpb_mgr) +{ + UWORD32 i; + dpb_info_t *ps_dpb_info = ps_dpb_mgr->as_dpb_info; + for(i = 0; i < MAX_DPB_BUFS; i++) + { + ps_dpb_info[i].ps_prev_dpb = NULL; + ps_dpb_info[i].ps_pic_buf = NULL; + ps_dpb_mgr->as_top_field_pics[i].i4_used_as_ref = INVALID; + ps_dpb_mgr->as_bottom_field_pics[i].i4_used_as_ref = INVALID; + ps_dpb_mgr->as_top_field_pics[i].i1_field_type = INVALID; + ps_dpb_mgr->as_bottom_field_pics[i].i1_field_type = INVALID; + ps_dpb_mgr->as_top_field_pics[i].i4_long_term_frame_idx = -1; + ps_dpb_mgr->as_bottom_field_pics[i].i4_long_term_frame_idx = -1; + } + + ps_dpb_mgr->u1_num_short_term_ref_bufs = 0; + ps_dpb_mgr->u1_num_long_term_ref_bufs = 0; + ps_dpb_mgr->ps_dpb_short_term_head = NULL; + ps_dpb_mgr->ps_dpb_long_term_head = NULL; +} + +/** + ******************************************************************************* + * + * @brief + * Function to sort sort term pics by frame_num. + * + * @par Description: + * Sorts short term fields by frame_num. For 2 fields having same frame_num, + * orders them based on requested first field type. + * + * @param[in] ps_dpb_mgr + * Pointer to the DPB manager structure + * + * @param[in] curr_frame_num + * frame_num of the current pic + * + * @param[in] first_field_type + * For complementary fields, required first field + * + * @param[in] max_frame_num + * Maximum frame_num allowed + * + * @returns + * + * @remarks + * + * + ******************************************************************************* + */ +WORD32 ih264_dpb_mgr_sort_short_term_fields_by_frame_num(dpb_mgr_t *ps_dpb_mgr, + WORD32 curr_frame_num, + WORD32 first_field_type, + WORD32 max_frame_num) +{ + dpb_info_t *ps_dpb_node1 = ps_dpb_mgr->ps_dpb_short_term_head; + dpb_info_t *ps_dpb_node2; + WORD32 frame_num_node1; + WORD32 frame_num_node2; + pic_buf_t *ps_pic_buf; + + if(ps_dpb_node1 == NULL) + return -1; + + for (; ps_dpb_node1 != NULL; ps_dpb_node1 = ps_dpb_node1->ps_prev_dpb) + { + for (ps_dpb_node2 = ps_dpb_node1->ps_prev_dpb; ps_dpb_node2 != NULL; ps_dpb_node2 = ps_dpb_node2->ps_prev_dpb) + { + frame_num_node1 = ps_dpb_node1->ps_pic_buf->i4_frame_num; + frame_num_node2 = ps_dpb_node2->ps_pic_buf->i4_frame_num; + + if(frame_num_node1 > curr_frame_num) + frame_num_node1 = frame_num_node1 - max_frame_num; + if(frame_num_node2 > curr_frame_num) + frame_num_node2 = frame_num_node2 - max_frame_num; + + if(frame_num_node1 < frame_num_node2) + { + ps_pic_buf = ps_dpb_node1->ps_pic_buf; + ps_dpb_node1->ps_pic_buf = ps_dpb_node2->ps_pic_buf; + ps_dpb_node2->ps_pic_buf = ps_pic_buf; + } + } + } + + /** + * For frames and complementary field pairs, + * ensure first_field_type appears first in the list + */ + ps_dpb_node1 = ps_dpb_mgr->ps_dpb_short_term_head; + ps_dpb_node2 = ps_dpb_node1->ps_prev_dpb; + while(ps_dpb_node2 != NULL) + { + pic_buf_t *ps_pic_node1 = ps_dpb_node1->ps_pic_buf; + pic_buf_t *ps_pic_node2 = ps_dpb_node2->ps_pic_buf; + frame_num_node1 = ps_pic_node1->i4_frame_num; + frame_num_node2 = ps_pic_node2->i4_frame_num; + if(frame_num_node1 == frame_num_node2) + { + ASSERT(ps_pic_node1->i1_field_type != ps_pic_node2->i1_field_type); + if(ps_pic_node1->i1_field_type != first_field_type) + { + ps_dpb_node1->ps_pic_buf = ps_pic_node2; + ps_dpb_node2->ps_pic_buf = ps_pic_node1; + } + } + ps_dpb_node1 = ps_dpb_node2; + ps_dpb_node2 = ps_dpb_node2->ps_prev_dpb; + } + return 0; + +} + +/** + ******************************************************************************* + * + * @brief + * Function to sort sort term pics by poc for list 0. + * + * @par Description: + * Orders all the pocs less than current poc in the descending order. + * Then orders all the pocs greater than current poc in the ascending order. + * + * @param[in] ps_dpb_mgr + * Pointer to the DPB manager structure + * + * @param[in] curr_poc + * Poc of the current pic + * + * @param[in] first_field_type + * For complementary fields, required first field + * + * @returns + * + * @remarks + * + * + ******************************************************************************* + */ +WORD32 ih264_dpb_mgr_sort_short_term_fields_by_poc_l0(dpb_mgr_t *ps_dpb_mgr, + WORD32 curr_poc, + WORD32 first_field_type) +{ + dpb_info_t *ps_dpb_node1 = ps_dpb_mgr->ps_dpb_short_term_head; + dpb_info_t *ps_dpb_node2; + WORD32 poc_node1; + WORD32 poc_node2; + WORD32 frame_num_node1; + WORD32 frame_num_node2; + pic_buf_t *ps_pic_buf; + + if(ps_dpb_node1 == NULL) + return -1; + + /** + * Sort the fields by poc. + * All POCs less than current poc are first placed in the descending order. + * Then all POCs greater than current poc are placed in the ascending order. + */ + for (; ps_dpb_node1 != NULL; ps_dpb_node1 = ps_dpb_node1->ps_prev_dpb) + { + for (ps_dpb_node2 = ps_dpb_node1->ps_prev_dpb; ps_dpb_node2 != NULL; ps_dpb_node2 = ps_dpb_node2->ps_prev_dpb) + { + poc_node1 = ps_dpb_node1->ps_pic_buf->i4_abs_poc; + poc_node2 = ps_dpb_node2->ps_pic_buf->i4_abs_poc; + ASSERT(poc_node1 != curr_poc); + ASSERT(poc_node2 != curr_poc); + if(((poc_node1 < curr_poc) && (poc_node2 > curr_poc)) || + ((poc_node1 < curr_poc) && (poc_node2 < curr_poc) && (poc_node1 > poc_node2)) || + ((poc_node1 > curr_poc) && (poc_node2 > curr_poc) && (poc_node1 < poc_node2))) + continue; + + ps_pic_buf = ps_dpb_node1->ps_pic_buf; + ps_dpb_node1->ps_pic_buf = ps_dpb_node2->ps_pic_buf; + ps_dpb_node2->ps_pic_buf = ps_pic_buf; + } + } + + ps_dpb_node1 = ps_dpb_mgr->ps_dpb_short_term_head; + ps_dpb_node2 = ps_dpb_node1->ps_prev_dpb; + while(ps_dpb_node2 != NULL) + { + pic_buf_t *ps_pic_node1 = ps_dpb_node1->ps_pic_buf; + pic_buf_t *ps_pic_node2 = ps_dpb_node2->ps_pic_buf; + frame_num_node1 = ps_pic_node1->i4_frame_num; + frame_num_node2 = ps_pic_node2->i4_frame_num; + if(frame_num_node1 == frame_num_node2) + { + ASSERT(ps_pic_node1->i1_field_type != ps_pic_node2->i1_field_type); + if(ps_pic_node1->i1_field_type != first_field_type) + { + ps_dpb_node1->ps_pic_buf = ps_pic_node2; + ps_dpb_node2->ps_pic_buf = ps_pic_node1; + } + } + ps_dpb_node1 = ps_dpb_node2; + ps_dpb_node2 = ps_dpb_node2->ps_prev_dpb; + } + return 0; + +} + +/** + ******************************************************************************* + * + * @brief + * Function to sort sort term pics by poc for list 1. + * + * @par Description: + * Orders all the pocs greater than current poc in the ascending order. + * Then rrders all the pocs less than current poc in the descending order. + * + * @param[in] ps_dpb_mgr + * Pointer to the DPB manager structure + * + * @param[in] curr_poc + * Poc of the current pic + * + * @param[in] first_field_type + * For complementary fields, required first field + * + * @returns + * + * @remarks + * + * + ******************************************************************************* + */ +WORD32 ih264_dpb_mgr_sort_short_term_fields_by_poc_l1(dpb_mgr_t *ps_dpb_mgr, + WORD32 curr_poc, + WORD32 first_field_type) +{ + dpb_info_t *ps_dpb_node1 = ps_dpb_mgr->ps_dpb_short_term_head; + dpb_info_t *ps_dpb_node2; + WORD32 poc_node1; + WORD32 poc_node2; + WORD32 frame_num_node1; + WORD32 frame_num_node2; + pic_buf_t *ps_pic_buf; + + if(ps_dpb_node1 == NULL) + return -1; + + /** + * Sort the fields by poc. + * All POCs greater than current poc are first placed in the ascending order. + * Then all POCs less than current poc are placed in the decending order. + */ + for (; ps_dpb_node1 != NULL; ps_dpb_node1 = ps_dpb_node1->ps_prev_dpb) + { + for (ps_dpb_node2 = ps_dpb_node1->ps_prev_dpb; ps_dpb_node2 != NULL; ps_dpb_node2 = ps_dpb_node2->ps_prev_dpb) + { + poc_node1 = ps_dpb_node1->ps_pic_buf->i4_abs_poc; + poc_node2 = ps_dpb_node2->ps_pic_buf->i4_abs_poc; + ASSERT(poc_node1 != curr_poc); + ASSERT(poc_node2 != curr_poc); + if(((poc_node1 > curr_poc) && (poc_node2 < curr_poc)) || + ((poc_node1 < curr_poc) && (poc_node2 < curr_poc) && (poc_node1 > poc_node2)) || + ((poc_node1 > curr_poc) && (poc_node2 > curr_poc) && (poc_node1 < poc_node2))) + continue; + + ps_pic_buf = ps_dpb_node1->ps_pic_buf; + ps_dpb_node1->ps_pic_buf = ps_dpb_node2->ps_pic_buf; + ps_dpb_node2->ps_pic_buf = ps_pic_buf; + } + } + + ps_dpb_node1 = ps_dpb_mgr->ps_dpb_short_term_head; + ps_dpb_node2 = ps_dpb_node1->ps_prev_dpb; + while(ps_dpb_node2 != NULL) + { + pic_buf_t *ps_pic_node1 = ps_dpb_node1->ps_pic_buf; + pic_buf_t *ps_pic_node2 = ps_dpb_node2->ps_pic_buf; + frame_num_node1 = ps_pic_node1->i4_frame_num; + frame_num_node2 = ps_pic_node2->i4_frame_num; + if(frame_num_node1 == frame_num_node2) + { + ASSERT(ps_pic_node1->i1_field_type != ps_pic_node2->i1_field_type); + if(ps_pic_node1->i1_field_type != first_field_type) + { + ps_dpb_node1->ps_pic_buf = ps_pic_node2; + ps_dpb_node2->ps_pic_buf = ps_pic_node1; + } + } + ps_dpb_node1 = ps_dpb_node2; + ps_dpb_node2 = ps_dpb_node2->ps_prev_dpb; + } + return 0; +} +/** + ******************************************************************************* + * + * @brief + * Function to sort long term pics by long term frame idx. + * + * @par Description: + * Sorts long term fields by long term frame idx. For 2 fields + * having same frame_num, orders them based on requested first field type. + * + * @param[in] ps_dpb_mgr + * Pointer to the DPB manager structure + * + * @param[in] first_field_type + * For complementary fields, required first field + * + * @returns + * + * @remarks + * + * + ******************************************************************************* + */ +WORD32 ih264_dpb_mgr_sort_long_term_fields_by_frame_idx(dpb_mgr_t *ps_dpb_mgr, + WORD32 first_field_type) +{ + dpb_info_t *ps_dpb_node1 = ps_dpb_mgr->ps_dpb_long_term_head; + dpb_info_t *ps_dpb_node2; + WORD32 frame_idx_node1; + WORD32 frame_idx_node2; + pic_buf_t *ps_pic_buf; + + if(ps_dpb_node1 == NULL) + return -1; + + /* Sort the fields by frame idx */ + for (; ps_dpb_node1 != NULL; ps_dpb_node1 = ps_dpb_node1->ps_prev_dpb) + { + for (ps_dpb_node2 = ps_dpb_node1->ps_prev_dpb; ps_dpb_node2 != NULL; ps_dpb_node2 = ps_dpb_node2->ps_prev_dpb) + { + frame_idx_node1 = ps_dpb_node1->ps_pic_buf->i4_long_term_frame_idx; + frame_idx_node2 = ps_dpb_node2->ps_pic_buf->i4_long_term_frame_idx; + + if(frame_idx_node1 > frame_idx_node2) + { + ps_pic_buf = ps_dpb_node1->ps_pic_buf; + ps_dpb_node1->ps_pic_buf = ps_dpb_node2->ps_pic_buf; + ps_dpb_node2->ps_pic_buf = ps_pic_buf; + } + } + } + + /** + * For frames and complementary field pairs, + * ensure first_field_type appears first in the list + */ + ps_dpb_node1 = ps_dpb_mgr->ps_dpb_long_term_head; + ps_dpb_node2 = ps_dpb_node1->ps_prev_dpb; + while(ps_dpb_node2 != NULL) + { + pic_buf_t *ps_pic_node1 = ps_dpb_node1->ps_pic_buf; + pic_buf_t *ps_pic_node2 = ps_dpb_node2->ps_pic_buf; + frame_idx_node1 = ps_pic_node1->i4_long_term_frame_idx; + frame_idx_node2 = ps_pic_node2->i4_long_term_frame_idx; + if(frame_idx_node1 == frame_idx_node2) + { + ASSERT(ps_pic_node1->i1_field_type != ps_pic_node2->i1_field_type); + if(ps_pic_node1->i1_field_type != first_field_type) + { + ps_dpb_node1->ps_pic_buf = ps_pic_node2; + ps_dpb_node2->ps_pic_buf = ps_pic_node1; + } + } + ps_dpb_node1 = ps_dpb_node2; + ps_dpb_node2 = ps_dpb_node2->ps_prev_dpb; + } + return 0; +} + +/** + ******************************************************************************* + * + * @brief + * Function to alternate fields. + * + * @par Description: + * In the ordered list of fields, alternate fields starting with + * first_field_type + * + * @param[in] ps_dpb_mgr + * Pointer to the DPB manager structure + * + * @param[in] reference_type + * This is used to select between short-term and long-term linked list. + * + * @param[in] first_field_type + * For complementary fields, required first field + * + * @returns + * + * @remarks + * + * + ******************************************************************************* + */ +WORD32 ih264_dpb_mgr_alternate_ref_fields(dpb_mgr_t *ps_dpb_mgr, + WORD32 reference_type, + WORD32 first_field_type) +{ + dpb_info_t s_dpb_head; + dpb_info_t *ps_dpb_head; + dpb_info_t *ps_dpb_node1; + dpb_info_t *ps_dpb_node2; + dpb_info_t *ps_dpb_node3; + dpb_info_t *ps_dpb_node4; + WORD32 expected_field; + + expected_field = first_field_type; + + ps_dpb_head = &s_dpb_head; + + ps_dpb_head->ps_prev_dpb = (reference_type == SHORT_TERM_REF) ? + ps_dpb_mgr->ps_dpb_short_term_head: + ps_dpb_mgr->ps_dpb_long_term_head; + + ps_dpb_node1 = ps_dpb_head; + ps_dpb_node2 = ps_dpb_node1->ps_prev_dpb; + while(ps_dpb_node2 != NULL) + { + pic_buf_t *ps_pic_node2 = ps_dpb_node2->ps_pic_buf; + if(ps_pic_node2->i1_field_type != expected_field) + { + /* + * If it is not expected field, loop over the node till + * the expected field. + */ + ps_dpb_node3 = ps_dpb_node2; + ps_dpb_node4 = ps_dpb_node2->ps_prev_dpb; + while((ps_dpb_node4 != NULL) && + (ps_dpb_node4->ps_pic_buf->i1_field_type != expected_field)) + { + ps_dpb_node3 = ps_dpb_node4; + ps_dpb_node4 = ps_dpb_node4->ps_prev_dpb; + } + if(ps_dpb_node4 != NULL) + { + ps_dpb_node1->ps_prev_dpb = ps_dpb_node4; + ps_dpb_node3->ps_prev_dpb = ps_dpb_node4->ps_prev_dpb; + ps_dpb_node4->ps_prev_dpb = ps_dpb_node2; + } + else + { + /* node4 null means we have reached the end */ + break; + } + } + ps_dpb_node1 = ps_dpb_node1->ps_prev_dpb; + ps_dpb_node2 = ps_dpb_node1->ps_prev_dpb; + expected_field = (ps_dpb_node1->ps_pic_buf->i1_field_type == TOP_FIELD)? + BOTTOM_FIELD:TOP_FIELD; + } + + if((reference_type == SHORT_TERM_REF)) + { + ps_dpb_mgr->ps_dpb_short_term_head = ps_dpb_head->ps_prev_dpb; + } + else + { + ps_dpb_mgr->ps_dpb_long_term_head = ps_dpb_head->ps_prev_dpb; + } + + return 0; +} + +/** + ******************************************************************************* + * + * @brief + * Add a ref field to short-term or long-term linked list. + * + * @par Description: + * This function adds a ref field to either short-term or long-term linked + * list. It picks up memory for the link from the array of dpb_info in + * dpb_mgr. The field is added to the beginning of the linked list and the + * head is set the the field. + * + * @param[in] ps_dpb_mgr + * Pointer to the DPB manager structure + * + * @param[in] ps_pic_buf + * Pic buf structure for the field being added. + * + * @param[in] reference_type + * This is used to select between short-term and long-term linked list. + * + * @param[in] frame_num + * frame_num for the field. + * + * @param[in] long_term_frame_idx + * If the ref being added is long-term, long_term_frame_idx of the field. + * Otherwise invalid. + * + * @returns + * + * @remarks + * + * + ******************************************************************************* + */ +WORD32 ih264_dpb_mgr_insert_ref_field(dpb_mgr_t *ps_dpb_mgr, + pic_buf_t *ps_pic_buf, + WORD32 reference_type, + UWORD32 frame_num, + WORD32 long_term_frame_idx) +{ + WORD32 i; + dpb_info_t *ps_dpb_info; + dpb_info_t *ps_dpb_head; + + ps_dpb_info = ps_dpb_mgr->as_dpb_info; + + /* Return error if buffer is already present in the DPB */ + for(i = 0; i < MAX_DPB_BUFS; i++) + { + if( (ps_dpb_info[i].ps_pic_buf == ps_pic_buf) + && (ps_dpb_info[i].ps_pic_buf->i4_used_as_ref == reference_type) ) + { + return (-1); + } + } + + /* Find an unused DPB location */ + for(i = 0; i < MAX_DPB_BUFS; i++) + { + if(NULL == ps_dpb_info[i].ps_pic_buf) + { + break; + } + } + if(i == MAX_DPB_BUFS) + { + return (-1); + } + + ps_dpb_head = (reference_type == SHORT_TERM_REF) + ?ps_dpb_mgr->ps_dpb_short_term_head + :ps_dpb_mgr->ps_dpb_long_term_head; + + if(reference_type == SHORT_TERM_REF) + long_term_frame_idx = -1; + + /* Create DPB info */ + ps_dpb_info[i].ps_pic_buf = ps_pic_buf; + ps_dpb_info[i].ps_prev_dpb = ps_dpb_head; + ps_dpb_info[i].ps_pic_buf->i4_used_as_ref = reference_type; + ps_dpb_info[i].ps_pic_buf->i4_frame_num = frame_num; + ps_dpb_info[i].ps_pic_buf->i4_long_term_frame_idx = long_term_frame_idx; + + /* update the head node of linked list to point to the current picture */ + if(reference_type == SHORT_TERM_REF) + { + ps_dpb_mgr->ps_dpb_short_term_head = ps_dpb_info + i; + + /* Increment Short term buffer count */ + ps_dpb_mgr->u1_num_short_term_ref_bufs++; + + } + else + { + ps_dpb_mgr->ps_dpb_long_term_head = ps_dpb_info + i; + + /* Increment Long term buffer count */ + ps_dpb_mgr->u1_num_long_term_ref_bufs++; + } + + return 0; +} + +/** + ******************************************************************************* + * + * @brief + * Add a ref frame to short-term or long-term linked list. + * + * @par Description: + * This function adds a ref frame to either short-term or long-term linked + * list. Internally it calls add ref field twice to add top and bottom field. + * + * @param[in] ps_dpb_mgr + * Pointer to the DPB manager structure + * + * @param[in] ps_pic_buf + * Pic buf structure for the field being added. + * + * @param[in] reference_type + * This is used to select between short-term and long-term linked list. + * + * @param[in] frame_num + * frame_num for the field. + * + * @param[in] long_term_frame_idx + * If the ref being added is long-term, long_term_frame_idx of the field. + * Otherwise invalid. + * + * @returns + * + * @remarks + * + * + ******************************************************************************* + */ +WORD32 ih264_dpb_mgr_insert_ref_frame(dpb_mgr_t *ps_dpb_mgr, + pic_buf_t *ps_pic_buf, + WORD32 reference_type, + UWORD32 frame_num, + WORD32 long_term_frame_idx) +{ + WORD32 buf_id; + pic_buf_t *ps_pic_top; + pic_buf_t *ps_pic_bottom; + WORD32 ret; + + /* + * For a frame, since the ps_pic_buf passed to this function is that of top field + * obtain bottom field using buf_id. + */ + ps_pic_top = ps_pic_buf; + buf_id = ps_pic_top->i4_buf_id; + ps_pic_bottom = &ps_dpb_mgr->as_bottom_field_pics[buf_id]; + + /* Insert top field */ + ret = ih264_dpb_mgr_insert_ref_field(ps_dpb_mgr, + ps_pic_top, + reference_type, + frame_num, + long_term_frame_idx); + + if(ret != 0) + return ret; + + /* Insert bottom field */ + ret = ih264_dpb_mgr_insert_ref_field(ps_dpb_mgr, + ps_pic_bottom, + reference_type, + frame_num, + long_term_frame_idx); + + if(ret != 0) + return ret; + + return ret; +} + +/** + ******************************************************************************* + * + * @brief + * Returns the number of ref frames in both the linked list. + * + * @par Description: + * Returns the count of number of frames, number of complementary field pairs + * and number of unpaired fields. + * + * @param[in] ps_dpb_mgr + * Pointer to the DPB manager structure + * + * @param[in] curr_frame_num + * frame_num for the field. + * + * @param[in] max_frame_num + * Maximum frame_num allowed + * + * @returns + * + * @remarks + * + * + ******************************************************************************* + */ +WORD32 ih264_dpb_mgr_count_ref_frames(dpb_mgr_t *ps_dpb_mgr, + WORD32 curr_frame_num, + WORD32 max_frame_num) +{ + WORD32 numShortTerm = 0; + WORD32 numLongTerm = 0; + dpb_info_t *ps_dpb_node; + WORD32 frame_num; + WORD32 prev_frame_num; + + /* + * Compute the number of short-term frames/complementary field pairs/ + * unpaired fields + */ + if(ps_dpb_mgr->ps_dpb_short_term_head != NULL) + { + /* Sort the short-term list by frame_num */ + ih264_dpb_mgr_sort_short_term_fields_by_frame_num(ps_dpb_mgr, + curr_frame_num, + TOP_FIELD, + max_frame_num); + + ps_dpb_node = ps_dpb_mgr->ps_dpb_short_term_head; + if(ps_dpb_node != NULL) + { + numShortTerm++; + prev_frame_num = ps_dpb_node->ps_pic_buf->i4_frame_num; + ps_dpb_node = ps_dpb_node->ps_prev_dpb; + } + + while(ps_dpb_node != NULL) + { + frame_num = ps_dpb_node->ps_pic_buf->i4_frame_num; + if(frame_num != prev_frame_num) + numShortTerm++; + prev_frame_num = ps_dpb_node->ps_pic_buf->i4_frame_num; + ps_dpb_node = ps_dpb_node->ps_prev_dpb; + } + } + + /* + * Compute the number of long-term frames/complementary field pairs/ + * unpaired fields + */ + if(ps_dpb_mgr->ps_dpb_long_term_head != NULL) + { + ih264_dpb_mgr_sort_long_term_fields_by_frame_idx(ps_dpb_mgr, + TOP_FIELD); + + ps_dpb_node = ps_dpb_mgr->ps_dpb_long_term_head; + if(ps_dpb_node != NULL) + { + numLongTerm++; + prev_frame_num = ps_dpb_node->ps_pic_buf->i4_frame_num; + ps_dpb_node = ps_dpb_node->ps_prev_dpb; + } + + while(ps_dpb_node != NULL) + { + frame_num = ps_dpb_node->ps_pic_buf->i4_frame_num; + if(frame_num != prev_frame_num) + numLongTerm++; + prev_frame_num = ps_dpb_node->ps_pic_buf->i4_frame_num; + ps_dpb_node = ps_dpb_node->ps_prev_dpb; + } + } + return (numShortTerm + numLongTerm); +} + +/** + ******************************************************************************* + * + * @brief + * Deletes the ref frame at the end of the linked list. + * + * @par Description: + * Deletes the ref frame at the end of the linked list. For unpaired fields, + * it deletes just the last node. For frame or complementary field pair, it + * deletes the last two nodes. + * + * @param[in] ps_dpb_mgr + * Pointer to the DPB manager structure + * + * @param[in] reference_type + * This is used to select between short-term and long-term linked list. + * + * @returns + * + * @remarks + * + * + ******************************************************************************* + */ +WORD32 ih264_dpb_mgr_delete_ref_frame(dpb_mgr_t *ps_dpb_mgr, + WORD32 reference_type) +{ + dpb_info_t *ps_dpb_node1; + dpb_info_t *ps_dpb_node2; + dpb_info_t *ps_dpb_node3; + + /* + * Assumption: The nodes sorted for frame num. + */ + + + /* Select bw short-term and long-term list. */ + ps_dpb_node1 = (reference_type == SHORT_TERM_REF) + ?ps_dpb_mgr->ps_dpb_short_term_head + :ps_dpb_mgr->ps_dpb_long_term_head; + /* If null, no entries in the list. Hence return. */ + if(ps_dpb_node1 == NULL) + return 0; + + /* If only one node in the list, set as unsed for refer and return. */ + if(ps_dpb_node1->ps_prev_dpb == NULL) + { + /* Set the picture as unused for reference */ + ps_dpb_node1->ps_pic_buf->i4_used_as_ref = UNUSED_FOR_REF; + ps_dpb_node1->ps_pic_buf = NULL; + + if(reference_type == SHORT_TERM_REF) + { + ps_dpb_mgr->ps_dpb_short_term_head = NULL; + + /* Increment Short term buffer count */ + ps_dpb_mgr->u1_num_short_term_ref_bufs = 0; + + } + else + { + ps_dpb_mgr->ps_dpb_long_term_head = NULL; + + /* Increment Long term buffer count */ + ps_dpb_mgr->u1_num_long_term_ref_bufs = 0; + + } + return 0; + } + + /** + * If there are only 2 nodes in the list, set second node as unused for reference. + * If the frame_num of second node and first node is same, set first node also as + * unused for reference and set the corresponding head to NULL. + */ + ps_dpb_node2 = ps_dpb_node1->ps_prev_dpb; + if(ps_dpb_node2->ps_prev_dpb == NULL) + { + /* Set the picture as unused for reference */ + if(ps_dpb_node2->ps_pic_buf->i4_frame_num == ps_dpb_node1->ps_pic_buf->i4_frame_num) + { + /* Set the picture as unused for reference */ + ps_dpb_node1->ps_pic_buf->i4_used_as_ref = UNUSED_FOR_REF; + ps_dpb_node1->ps_pic_buf = NULL; + if(reference_type == SHORT_TERM_REF) + { + ps_dpb_mgr->ps_dpb_short_term_head = NULL; + + /* Increment Short term buffer count */ + ps_dpb_mgr->u1_num_short_term_ref_bufs = 0; + + } + else + { + ps_dpb_mgr->ps_dpb_long_term_head = NULL; + + /* Increment Long term buffer count */ + ps_dpb_mgr->u1_num_long_term_ref_bufs = 0; + + } + + } + ps_dpb_node2->ps_pic_buf->i4_used_as_ref = UNUSED_FOR_REF; + ps_dpb_node2->ps_pic_buf = NULL; + ps_dpb_node1->ps_prev_dpb = NULL; + return 0; + } + /* + * If there are more than 2 nodes, run a loop to get the last 3 nodes. + */ + ps_dpb_node3 = ps_dpb_node2->ps_prev_dpb; + while(ps_dpb_node3->ps_prev_dpb != NULL) + { + ps_dpb_node1 = ps_dpb_node2; + ps_dpb_node2 = ps_dpb_node3; + ps_dpb_node3 = ps_dpb_node3->ps_prev_dpb; + } + /* + * If node 2 and node 3 frame_nums are same, set node 2 also as unsed for + * reference and del reference from node1. + */ + if(ps_dpb_node2->ps_pic_buf->i4_frame_num == ps_dpb_node3->ps_pic_buf->i4_frame_num) + { + ps_dpb_node2->ps_pic_buf->i4_used_as_ref = UNUSED_FOR_REF; + ps_dpb_node2->ps_pic_buf = NULL; + ps_dpb_node1->ps_prev_dpb = NULL; + + } + /* Set the third node as unused for reference */ + ps_dpb_node3->ps_pic_buf->i4_used_as_ref = UNUSED_FOR_REF; + ps_dpb_node3->ps_pic_buf = NULL; + ps_dpb_node2->ps_prev_dpb = NULL; + + return 0; +} +/** + ******************************************************************************* + * + * @brief + * Delete long-term ref fields above max frame idx. + * + * @par Description: + * Deletes all the long-term ref fields having idx greater than max_frame_idx + * + * @param[in] ps_dpb_mgr + * Pointer to the DPB manager structure + * + * @param[in] max_frame_idx + * Max long-term frame idx allowed. + * + * @returns + * + * @remarks + * + * + ******************************************************************************* + */ +WORD32 ih264_dpb_mgr_delete_long_ref_fields_max_frame_idx(dpb_mgr_t *ps_dpb_mgr, + WORD32 max_frame_idx) +{ + dpb_info_t *ps_dpb_node1; + dpb_info_t *ps_dpb_node2; + /* + * Loop until there is node which isn't to be deleted is encountered. + */ + while(ps_dpb_mgr->ps_dpb_long_term_head != NULL) + { + if(ps_dpb_mgr->ps_dpb_long_term_head->ps_pic_buf->i4_long_term_frame_idx + <= max_frame_idx) + { + break; + } + ps_dpb_mgr->ps_dpb_long_term_head->ps_pic_buf->i4_used_as_ref = UNUSED_FOR_REF; + ps_dpb_mgr->ps_dpb_long_term_head->ps_pic_buf = NULL; + ps_dpb_mgr->ps_dpb_long_term_head = ps_dpb_mgr->ps_dpb_long_term_head->ps_prev_dpb; + } + + ps_dpb_node1 = ps_dpb_mgr->ps_dpb_long_term_head; + if(ps_dpb_node1 == NULL) + return 0; + /* + * With the node that isn't to be deleted as head, loop until the end. + */ + ps_dpb_node2 = ps_dpb_node1->ps_prev_dpb; + while(ps_dpb_node2 != NULL) + { + if(ps_dpb_node2->ps_pic_buf->i4_long_term_frame_idx > max_frame_idx) + { + ps_dpb_node2->ps_pic_buf->i4_used_as_ref = UNUSED_FOR_REF; + ps_dpb_node2->ps_pic_buf = NULL; + ps_dpb_node1->ps_prev_dpb = ps_dpb_node2->ps_prev_dpb; + } + ps_dpb_node1 = ps_dpb_node1->ps_prev_dpb; + if(ps_dpb_node1 == NULL) + break; + ps_dpb_node2 = ps_dpb_node1->ps_prev_dpb; + } + return 0; +} + +/** + ******************************************************************************* + * + * @brief + * Deletes the short-term with least frame_num + * + * @par Description: + * Deletes the short-term with least frame_num. It sorts the function the + * short-term linked list by frame-num and the function that deletes the last + * frame in the linked list. + * + * @param[in] ps_dpb_mgr + * Pointer to the DPB manager structure + * + * @param[in] curr_frame_num + * frame_num of the current pic + * + * @param[in] max_frame_num + * Maximum frame_num allowed + * + * @returns + * + * @remarks + * + * + ******************************************************************************* + */ +WORD32 ih264_dpb_mgr_delete_short_ref_frame(dpb_mgr_t *ps_dpb_mgr, + WORD32 curr_frame_num, + WORD32 max_frame_num) +{ + WORD32 ret; + /* Sort the short-term list by frame_num */ + ret = ih264_dpb_mgr_sort_short_term_fields_by_frame_num(ps_dpb_mgr, + curr_frame_num, + TOP_FIELD, + max_frame_num); + + /* Delete the last reference frame or field */ + ret = ih264_dpb_mgr_delete_ref_frame(ps_dpb_mgr,SHORT_TERM_REF); + + if(ret != 0) + { + ASSERT(0); + } + + return ret; +} +/** + ******************************************************************************* + * + * @brief + * Deletes all the ref frames. + * + * @par Description: + * Deletes all of the ref frames/fields in the short-term and long-term linked + * list. + * + * @param[in] ps_dpb_mgr + * Pointer to the DPB manager structure + * + * @returns + * + * @remarks + * + * + ******************************************************************************* + */ +WORD32 ih264_dpb_mgr_delete_all_ref_frames(dpb_mgr_t *ps_dpb_mgr) +{ + /* Loop over short-term linked list. */ + while(ps_dpb_mgr->ps_dpb_short_term_head != NULL) + { + ih264_dpb_mgr_delete_ref_frame(ps_dpb_mgr,SHORT_TERM_REF); + } + + /* Loop over long-term linked list. */ + while(ps_dpb_mgr->ps_dpb_long_term_head != NULL) + { + ih264_dpb_mgr_delete_ref_frame(ps_dpb_mgr,LONG_TERM_REF); + } + return 0; +} + + +void ih264_dpb_mgr_reset(dpb_mgr_t *ps_dpb_mgr, buf_mgr_t *ps_buf_mgr) +{ + WORD32 i; + dpb_info_t *ps_dpb_info; + ASSERT(0); + + + ps_dpb_info = ps_dpb_mgr->as_dpb_info; + + for(i = 0; i < MAX_DPB_BUFS; i++) + { + if(ps_dpb_info[i].ps_pic_buf->i4_used_as_ref) + { + ps_dpb_info[i].ps_pic_buf->i4_used_as_ref = UNUSED_FOR_REF; + ps_dpb_info[i].ps_prev_dpb = NULL; + //Release physical buffer + ih264_buf_mgr_release(ps_buf_mgr, ps_dpb_info[i].ps_pic_buf->i4_buf_id, + BUF_MGR_REF); + + ps_dpb_info[i].ps_pic_buf = NULL; + } + } + ps_dpb_mgr->u1_num_short_term_ref_bufs = 0; + ps_dpb_mgr->u1_num_long_term_ref_bufs = 0; + ps_dpb_mgr->ps_dpb_short_term_head = NULL; + ps_dpb_mgr->ps_dpb_long_term_head = NULL; + +} + +/** + ******************************************************************************* + * + * @brief + * deletes all pictures from DPB + * + * @par Description: + * Deletes all pictures present in the DPB manager + * + * @param[in] ps_buf_mgr + * Pointer to buffer manager structure + * + * @param[in] u1_disp_bufs + * Number of buffers to be deleted + * + * @returns + * + * @remarks + * + * + ******************************************************************************* + */ + +void ih264_dpb_mgr_release_pics(buf_mgr_t *ps_buf_mgr, UWORD8 u1_disp_bufs) +{ + WORD8 i; + UWORD32 buf_status; + ASSERT(0); + + for(i = 0; i < u1_disp_bufs; i++) + { + buf_status = ih264_buf_mgr_get_status(ps_buf_mgr, i); + if(0 != buf_status) + { + ih264_buf_mgr_release((buf_mgr_t *)ps_buf_mgr, i, BUF_MGR_REF); + } + } +} diff --git a/common/ih264_dpb_mgr.h b/common/ih264_dpb_mgr.h new file mode 100755 index 0000000..b0cf0fd --- /dev/null +++ b/common/ih264_dpb_mgr.h @@ -0,0 +1,186 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** + ******************************************************************************* + * @file + * ih264_dpb_mgr.h + * + * @brief + * Function declarations used for decoded picture buffer management + * + * @author + * Srinivas T + * + * + * @remarks + * None + * + ******************************************************************************* + */ +#ifndef _IH264_DPB_MGR_H_ +#define _IH264_DPB_MGR_H_ + +/* Temporary definitions. Have to be defined later */ + +#define MAX_DPB_BUFS (MAX_DPB_SIZE * 4) + +#define MARK_ST_PICNUM_AS_NONREF 1 +#define MARK_LT_INDEX_AS_NONREF 2 +#define MARK_ST_PICNUM_AS_LT_INDEX 3 +#define RESET_REF_PICTURES 5 + +typedef struct dpb_info_t dpb_info_t; + +enum +{ + INVALID = -1, + UNUSED_FOR_REF = 0 , + LONG_TERM_REF , + SHORT_TERM_REF , +}; +struct dpb_info_t +{ + /** + * Pointer to picture buffer structure + */ + pic_buf_t *ps_pic_buf; + + /** + * Link to the DPB buffer with previous link + */ + dpb_info_t *ps_prev_dpb; + +}; + +typedef struct +{ + /** + * Pointer to the most recent pic Num + */ + dpb_info_t *ps_dpb_short_term_head; + + /** + * Pointer to the most recent pic Num + */ + dpb_info_t *ps_dpb_long_term_head; + + /** + * Physical storage for dpbInfo for ref bufs + */ + dpb_info_t as_dpb_info[MAX_DPB_BUFS]; + + /** + * Array of structures for bottom field. + */ + pic_buf_t as_top_field_pics[MAX_DPB_BUFS]; + + /** + * Array of structures for bottom field. + */ + pic_buf_t as_bottom_field_pics[MAX_DPB_BUFS]; + + /** + * Number of short-term reference buffers + */ + UWORD8 u1_num_short_term_ref_bufs; + + /** + * Number of long-term reference buffers + */ + UWORD8 u1_num_long_term_ref_bufs; + + /** + * buffer ID current frame + */ + WORD32 i4_cur_frame_buf_id; + +} dpb_mgr_t; + +void ih264_dpb_mgr_init(dpb_mgr_t *ps_dpb_mgr); + +WORD32 ih264_dpb_mgr_insert_ref_frame(dpb_mgr_t *ps_dpb_mgr, + pic_buf_t *ps_pic_buf, + WORD32 reference_type, + UWORD32 frame_num, + WORD32 long_term_frame_idx); + +WORD32 ih264_dpb_mgr_delete_ref_frame(dpb_mgr_t *ps_dpb_mgr, + WORD32 reference_type); + +WORD32 ih264_dpb_mgr_delete_all_ref_frames(dpb_mgr_t *ps_dpb_mgr); + +WORD32 ih264_dpb_mgr_count_ref_frames(dpb_mgr_t *ps_dpb_mgr, + WORD32 curr_frame_num, + WORD32 max_frame_num); + +WORD32 ih264_dpb_mgr_delete_short_ref_frame(dpb_mgr_t *ps_dpb_mgr, + WORD32 curr_frame_num, + WORD32 max_frame_num); + +WORD32 ih264_dpb_mgr_insert_ref_field(dpb_mgr_t *ps_dpb_mgr, + pic_buf_t *ps_pic_buf, + WORD32 reference_type, + UWORD32 frame_num, + WORD32 long_term_frame_idx); + +WORD32 ih264_dpb_mgr_delete_ref_field(dpb_mgr_t *ps_dpb_mgr, + WORD32 reference_type); + +WORD32 ih264_dpb_mgr_alternate_ref_fields(dpb_mgr_t *ps_dpb_mgr, + WORD32 reference_type, + WORD32 first_field_type); + +WORD32 ih264_dpb_mgr_sort_short_term_fields_by_frame_num(dpb_mgr_t *ps_dpb_mgr, + WORD32 curr_frame_num, + WORD32 first_field_type, + WORD32 max_frame_num); + +WORD32 ih264_dpb_mgr_sort_short_term_fields_by_poc_l0(dpb_mgr_t *ps_dpb_mgr, + WORD32 curr_poc, + WORD32 first_field_type); + +WORD32 ih264_dpb_mgr_sort_short_term_fields_by_poc_l1(dpb_mgr_t *ps_dpb_mgr, + WORD32 curr_poc, + WORD32 first_field_type); + +WORD32 ih264_dpb_mgr_sort_long_term_fields_by_frame_idx(dpb_mgr_t *ps_dpb_mgr, + WORD32 first_field_type); + +WORD32 ih264_dpb_mgr_delete_long_ref_fields_max_frame_idx(dpb_mgr_t *ps_dpb_mgr, + WORD32 max_frame_idx); + +void ih264_dpb_mgr_del_ref(dpb_mgr_t *ps_dpb_mgr, + buf_mgr_t *ps_buf_mgr, + WORD32 u4_abs_poc); + +pic_buf_t *ih264_dpb_mgr_get_ref_by_nearest_poc(dpb_mgr_t *ps_dpb_mgr, + WORD32 cur_abs_poc); + +pic_buf_t *ih264_dpb_mgr_get_ref_by_poc(dpb_mgr_t *ps_dpb_mgr, WORD32 abs_poc); + +pic_buf_t *ih264_dpb_mgr_get_ref_by_poc_lsb(dpb_mgr_t *ps_dpb_mgr, + WORD32 poc_lsb); + +void ih264_dpb_mgr_reset(dpb_mgr_t *ps_dpb_mgr, buf_mgr_t *ps_buf_mgr); + +void ih264_dpb_mgr_release_pics(buf_mgr_t *ps_buf_mgr, UWORD8 u1_disp_bufs); + +#endif /* _IH264_DPB_MGR_H_ */ diff --git a/common/ih264_error.h b/common/ih264_error.h new file mode 100755 index 0000000..ff1662d --- /dev/null +++ b/common/ih264_error.h @@ -0,0 +1,68 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ih264_error.h +* +* @brief +* Definitions related to error handling for common modules +* +* @author +* Harish +* +* @par List of Functions: +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef _IH264_ERROR_H_ +#define _IH264_ERROR_H_ + +/** + * Enumerations for error codes used in the codec. + * Not all these are expected to be returned to the application. + * Only select few will be exported + */ +typedef enum +{ + /** + * No error + */ + IH264_SUCCESS = 0, + /** + * Start error code for decoder + */ + IH264_DEC_ERROR_START = 0x100, + + /** + * Start error code for encoder + */ + IH264_ENC_ERROR_START = 0x200, + /** + * Generic failure + */ + IH264_FAIL = 0x7FFFFFFF +}IH264_ERROR_T; + +#endif /* _IH264_ERROR_H_ */ diff --git a/common/ih264_ihadamard_scaling.c b/common/ih264_ihadamard_scaling.c new file mode 100755 index 0000000..e4729c8 --- /dev/null +++ b/common/ih264_ihadamard_scaling.c @@ -0,0 +1,216 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** + ******************************************************************************* + * @file + * ih264_ihadamard_scaling.c + * + * @brief + * Contains definition of functions for h264 inverse hadamard 4x4 transform and scaling + * + * @author + * Mohit + * + * @par List of Functions: + * - ih264_ihadamard_scaling_4x4() + * + * @remarks + * + ******************************************************************************* + */ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* User include files */ +#include "ih264_typedefs.h" +#include "ih264_defs.h" +#include "ih264_trans_macros.h" +#include "ih264_macros.h" +#include "ih264_trans_data.h" +#include "ih264_size_defs.h" +#include "ih264_structs.h" +#include "ih264_trans_quant_itrans_iquant.h" + +/* + ******************************************************************************** + * + * @brief This function performs a 4x4 inverse hadamard transform on the 4x4 DC coefficients + * of a 16x16 intra prediction macroblock, and then performs scaling. + * prediction buffer + * + * @par Description: + * The DC coefficients pass through a 2-stage inverse hadamard transform. + * This inverse transformed content is scaled to based on Qp value. + * + * @param[in] pi2_src + * input 4x4 block of DC coefficients + * + * @param[out] pi2_out + * output 4x4 block + * + * @param[in] pu2_iscal_mat + * pointer to scaling list + * + * @param[in] pu2_weigh_mat + * pointer to weight matrix + * + * @param[in] u4_qp_div_6 + * Floor (qp/6) + * + * @param[in] pi4_tmp + * temporary buffer of size 1*16 + * + * @returns none + * + * @remarks none + * + ******************************************************************************* + */ +void ih264_ihadamard_scaling_4x4(WORD16* pi2_src, + WORD16* pi2_out, + const UWORD16 *pu2_iscal_mat, + const UWORD16 *pu2_weigh_mat, + UWORD32 u4_qp_div_6, + WORD32* pi4_tmp) +{ + WORD32 i; + WORD32 x0, x1, x2, x3, x4, x5, x6, x7; + WORD16* pi2_src_ptr, *pi2_out_ptr; + WORD32* pi4_tmp_ptr; + WORD32 rnd_fact = (u4_qp_div_6 < 6) ? (1 << (5 - u4_qp_div_6)) : 0; + pi4_tmp_ptr = pi4_tmp; + pi2_src_ptr = pi2_src; + pi2_out_ptr = pi2_out; + // Horizontal transform + for(i = 0; i < SUB_BLK_WIDTH_4x4; i++) + { + x4 = pi2_src_ptr[0]; + x5 = pi2_src_ptr[1]; + x6 = pi2_src_ptr[2]; + x7 = pi2_src_ptr[3]; + + x0 = x4 + x7; + x1 = x5 + x6; + x2 = x5 - x6; + x3 = x4 - x7; + + pi4_tmp_ptr[0] = x0 + x1; + pi4_tmp_ptr[1] = x2 + x3; + pi4_tmp_ptr[2] = x0 - x1; + pi4_tmp_ptr[3] = x3 - x2; + + pi4_tmp_ptr += SUB_BLK_WIDTH_4x4; + pi2_src_ptr += SUB_BLK_WIDTH_4x4; + } + pi4_tmp_ptr = pi4_tmp; + // Vertical Transform + for(i = 0; i < SUB_BLK_WIDTH_4x4; i++) + { + x4 = pi4_tmp_ptr[0]; + x5 = pi4_tmp_ptr[4]; + x6 = pi4_tmp_ptr[8]; + x7 = pi4_tmp_ptr[12]; + + x0 = x4 + x7; + x1 = x5 + x6; + x2 = x5 - x6; + x3 = x4 - x7; + + pi4_tmp_ptr[0] = x0 + x1; + pi4_tmp_ptr[4] = x2 + x3; + pi4_tmp_ptr[8] = x0 - x1; + pi4_tmp_ptr[12] = x3 - x2; + + pi4_tmp_ptr++; + } + pi4_tmp_ptr = pi4_tmp; + //Scaling + for(i = 0; i < (SUB_BLK_WIDTH_4x4 * SUB_BLK_WIDTH_4x4); i++) + { + INV_QUANT(pi4_tmp_ptr[i], pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, + rnd_fact, 6); + pi2_out_ptr[i] = pi4_tmp_ptr[i]; + } +} + +void ih264_ihadamard_scaling_2x2_uv(WORD16* pi2_src, + WORD16* pi2_out, + const UWORD16 *pu2_iscal_mat, + const UWORD16 *pu2_weigh_mat, + UWORD32 u4_qp_div_6, + WORD32* pi4_tmp) +{ + WORD32 i4_x0,i4_x1,i4_x2,i4_x3,i4_x4,i4_x5,i4_x6,i4_x7; + WORD32 i4_y0,i4_y1,i4_y2,i4_y3,i4_y4,i4_y5,i4_y6,i4_y7; + + UNUSED(pi4_tmp); + + i4_x4 = pi2_src[0]; + i4_x5 = pi2_src[1]; + i4_x6 = pi2_src[2]; + i4_x7 = pi2_src[3]; + + i4_x0 = i4_x4 + i4_x5; + i4_x1 = i4_x4 - i4_x5; + i4_x2 = i4_x6 + i4_x7; + i4_x3 = i4_x6 - i4_x7; + + i4_x4 = i4_x0+i4_x2; + i4_x5 = i4_x1+i4_x3; + i4_x6 = i4_x0-i4_x2; + i4_x7 = i4_x1-i4_x3; + + INV_QUANT(i4_x4,pu2_iscal_mat[0],pu2_weigh_mat[0],u4_qp_div_6,0,5); + INV_QUANT(i4_x5,pu2_iscal_mat[0],pu2_weigh_mat[0],u4_qp_div_6,0,5); + INV_QUANT(i4_x6,pu2_iscal_mat[0],pu2_weigh_mat[0],u4_qp_div_6,0,5); + INV_QUANT(i4_x7,pu2_iscal_mat[0],pu2_weigh_mat[0],u4_qp_div_6,0,5); + + pi2_out[0] = i4_x4; + pi2_out[1] = i4_x5; + pi2_out[2] = i4_x6; + pi2_out[3] = i4_x7; + + i4_y4 = pi2_src[4]; + i4_y5 = pi2_src[5]; + i4_y6 = pi2_src[6]; + i4_y7 = pi2_src[7]; + + i4_y0 = i4_y4 + i4_y5; + i4_y1 = i4_y4 - i4_y5; + i4_y2 = i4_y6 + i4_y7; + i4_y3 = i4_y6 - i4_y7; + + i4_y4 = i4_y0+i4_y2; + i4_y5 = i4_y1+i4_y3; + i4_y6 = i4_y0-i4_y2; + i4_y7 = i4_y1-i4_y3; + + INV_QUANT(i4_y4,pu2_iscal_mat[0],pu2_weigh_mat[0],u4_qp_div_6,0,5); + INV_QUANT(i4_y5,pu2_iscal_mat[0],pu2_weigh_mat[0],u4_qp_div_6,0,5); + INV_QUANT(i4_y6,pu2_iscal_mat[0],pu2_weigh_mat[0],u4_qp_div_6,0,5); + INV_QUANT(i4_y7,pu2_iscal_mat[0],pu2_weigh_mat[0],u4_qp_div_6,0,5); + + pi2_out[4] = i4_y4; + pi2_out[5] = i4_y5; + pi2_out[6] = i4_y6; + pi2_out[7] = i4_y7; +} diff --git a/common/ih264_inter_pred_filters.c b/common/ih264_inter_pred_filters.c new file mode 100755 index 0000000..7d1e407 --- /dev/null +++ b/common/ih264_inter_pred_filters.c @@ -0,0 +1,1042 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** + ******************************************************************************* + * @file + * ih264_inter_pred_filters.c + * + * @brief + * Contains function definitions for inter prediction interpolation filters + * + * @author + * Ittiam + * + * @par List of Functions: + * - ih264_inter_pred_luma_copy + * - ih264_interleave_copy + * - ih264_inter_pred_luma_horz + * - ih264_inter_pred_luma_vert + * - ih264_inter_pred_luma_horz_hpel_vert_hpel + * - ih264_inter_pred_luma_horz_qpel + * - ih264_inter_pred_luma_vert_qpel + * - ih264_inter_pred_luma_horz_qpel_vert_qpel + * - ih264_inter_pred_luma_horz_hpel_vert_qpel + * - ih264_inter_pred_luma_horz_qpel_vert_hpel + * - ih264_inter_pred_luma_bilinear + * - ih264_inter_pred_chroma + * + * @remarks + * None + * + ******************************************************************************* + */ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* User include files */ +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264_inter_pred_filters.h" + + +/*****************************************************************************/ +/* Constant Data variables */ +/*****************************************************************************/ + +/* coefficients for 6 tap filtering*/ +const WORD32 ih264_g_six_tap[3] ={1,-5,20}; + + +/*****************************************************************************/ +/* Function definitions . */ +/*****************************************************************************/ +/** + ******************************************************************************* + * + * @brief + * Interprediction luma function for copy + * + * @par Description: + * Copies the array of width 'wd' and height 'ht' from the location pointed + * by 'src' to the location pointed by 'dst' + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * + * @param[in] ht + * integer height of the array + * + * @param[in] wd + * integer width of the array + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ + +void ih264_inter_pred_luma_copy(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ht, + WORD32 wd, + UWORD8* pu1_tmp, + WORD32 dydx) +{ + WORD32 row, col; + UNUSED(pu1_tmp); + UNUSED(dydx); + for(row = 0; row < ht; row++) + { + for(col = 0; col < wd; col++) + { + pu1_dst[col] = pu1_src[col]; + } + + pu1_src += src_strd; + pu1_dst += dst_strd; + } +} + +/** + ******************************************************************************* + * + * @brief + * Fucntion for copying to an interleaved destination + * + * @par Description: + * Copies the array of width 'wd' and height 'ht' from the location pointed + * by 'src' to the location pointed by 'dst' + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ht + * integer height of the array + * + * @param[in] wd + * integer width of the array + * + * @returns + * + * @remarks + * The alternate elements of src will be copied to alternate locations in dsr + * Other locations are not touched + * + ******************************************************************************* + */ +void ih264_interleave_copy(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ht, + WORD32 wd) +{ + WORD32 row, col; + wd *= 2; + + for(row = 0; row < ht; row++) + { + for(col = 0; col < wd; col+=2) + { + pu1_dst[col] = pu1_src[col]; + } + + pu1_src += src_strd; + pu1_dst += dst_strd; + } +} + +/** + ******************************************************************************* + * + * @brief + * Interprediction luma filter for horizontal input + * + * @par Description: + * Applies a 6 tap horizontal filter .The output is clipped to 8 bits + * sec 8.4.2.2.1 titled "Luma sample interpolation process" + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ht + * integer height of the array + * + * @param[in] wd + * integer width of the array + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +void ih264_inter_pred_luma_horz(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ht, + WORD32 wd, + UWORD8* pu1_tmp, + WORD32 dydx) +{ + WORD32 row, col; + WORD16 i2_tmp; + UNUSED(pu1_tmp); + UNUSED(dydx); + + for(row = 0; row < ht; row++) + { + for(col = 0; col < wd; col++) + { + i2_tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/ + i2_tmp = ih264_g_six_tap[0] * + (pu1_src[col - 2] + pu1_src[col + 3]) + + ih264_g_six_tap[1] * + (pu1_src[col - 1] + pu1_src[col + 2]) + + ih264_g_six_tap[2] * + (pu1_src[col] + pu1_src[col + 1]); + i2_tmp = (i2_tmp + 16) >> 5; + pu1_dst[col] = CLIP_U8(i2_tmp); + } + + pu1_src += src_strd; + pu1_dst += dst_strd; + } + +} + +/** + ******************************************************************************* + * + * @brief + * Interprediction luma filter for vertical input + * + * @par Description: + * Applies a 6 tap vertical filter.The output is clipped to 8 bits + * sec 8.4.2.2.1 titled "Luma sample interpolation process" + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ht + * integer height of the array + * + * @param[in] wd + * integer width of the array + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +void ih264_inter_pred_luma_vert(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ht, + WORD32 wd, + UWORD8* pu1_tmp, + WORD32 dydx) +{ + WORD32 row, col; + WORD16 i2_tmp; + UNUSED(pu1_tmp); + UNUSED(dydx); + + for(row = 0; row < ht; row++) + { + for(col = 0; col < wd; col++) + { + i2_tmp = 0; /*ih264_g_six_tap[] is the array containing the filter coeffs*/ + i2_tmp = ih264_g_six_tap[0] * + (pu1_src[col - 2 * src_strd] + pu1_src[col + 3 * src_strd]) + + ih264_g_six_tap[1] * + (pu1_src[col - 1 * src_strd] + pu1_src[col + 2 * src_strd]) + + ih264_g_six_tap[2] * + (pu1_src[col] + pu1_src[col + 1 * src_strd]); + i2_tmp = (i2_tmp + 16) >> 5; + pu1_dst[col] = CLIP_U8(i2_tmp); + } + pu1_src += src_strd; + pu1_dst += dst_strd; + } +} + +/*! + ************************************************************************** + * \if Function name : ih264_inter_pred_luma_horz_hpel_vert_hpel \endif + * + * \brief + * This function implements a two stage cascaded six tap filter. It + * applies the six tap filter in the horizontal direction on the + * predictor values, followed by applying the same filter in the + * vertical direction on the output of the first stage. The six tap + * filtering operation is described in sec 8.4.2.2.1 titled "Luma sample + * interpolation process" + * + * \param pu1_src: Pointer to the buffer containing the predictor values. + * pu1_src could point to the frame buffer or the predictor buffer. + * \param pu1_dst: Pointer to the destination buffer where the output of + * the six tap filter is stored. + * \param ht: Height of the rectangular pixel grid to be interpolated + * \param wd: Width of the rectangular pixel grid to be interpolated + * \param src_strd: Width of the buffer pointed to by pu1_src. + * \param dst_strd: Width of the destination buffer + * \param pu1_tmp: temporary buffer. + * \param dydx: x and y reference offset for qpel calculations: UNUSED in this function. + * + * \return + * None. + * + * \note + * This function takes the 8 bit predictor values, applies the six tap + * filter in the horizontal direction and outputs the result clipped to + * 8 bit precision. The input is stored in the buffer pointed to by + * pu1_src while the output is stored in the buffer pointed by pu1_dst. + * Both pu1_src and pu1_dst could point to the same buffer i.e. the + * six tap filter could be done in place. + * + ************************************************************************** + */ +void ih264_inter_pred_luma_horz_hpel_vert_hpel(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ht, + WORD32 wd, + UWORD8* pu1_tmp, + WORD32 dydx) +{ + WORD32 row, col; + WORD32 tmp; + WORD16* pi2_pred1_temp; + WORD16* pi2_pred1; + UNUSED(dydx); + pi2_pred1_temp = (WORD16*)pu1_tmp; + pi2_pred1_temp += 2; + pi2_pred1 = pi2_pred1_temp; + for(row = 0; row < ht; row++) + { + for(col = -2; col < wd + 3; col++) + { + tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/ + tmp = ih264_g_six_tap[0] * + (pu1_src[col - 2 * src_strd] + pu1_src[col + 3 * src_strd]) + + ih264_g_six_tap[1] * + (pu1_src[col - 1 * src_strd] + pu1_src[col + 2 * src_strd]) + + ih264_g_six_tap[2] * + (pu1_src[col] + pu1_src[col + 1 * src_strd]); + pi2_pred1_temp[col] = tmp; + } + pu1_src += src_strd; + pi2_pred1_temp = pi2_pred1_temp + wd + 5; + } + + for(row = 0; row < ht; row++) + { + for(col = 0; col < wd; col++) + { + tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/ + tmp = ih264_g_six_tap[0] * + (pi2_pred1[col - 2] + pi2_pred1[col + 3]) + + ih264_g_six_tap[1] * + (pi2_pred1[col - 1] + pi2_pred1[col + 2]) + + ih264_g_six_tap[2] * (pi2_pred1[col] + pi2_pred1[col + 1]); + tmp = (tmp + 512) >> 10; + pu1_dst[col] = CLIP_U8(tmp); + } + pi2_pred1 += (wd + 5); + pu1_dst += dst_strd; + } +} + +/*! + ************************************************************************** + * \if Function name : ih264_inter_pred_luma_horz_qpel \endif + * + * \brief + * This routine applies the six tap filter to the predictors in the + * horizontal direction. The six tap filtering operation is described in + * sec 8.4.2.2.1 titled "Luma sample interpolation process" + * + * \param pu1_src: Pointer to the buffer containing the predictor values. + * pu1_src could point to the frame buffer or the predictor buffer. + * \param pu1_dst: Pointer to the destination buffer where the output of + * the six tap filter is stored. + * \param ht: Height of the rectangular pixel grid to be interpolated + * \param wd: Width of the rectangular pixel grid to be interpolated + * \param src_strd: Width of the buffer pointed to by pu1_src. + * \param dst_strd: Width of the destination buffer + * \param pu1_tmp: temporary buffer: UNUSED in this function + * \param dydx: x and y reference offset for qpel calculations. + * + * \return + * None. + * + * \note + * This function takes the 8 bit predictor values, applies the six tap + * filter in the horizontal direction and outputs the result clipped to + * 8 bit precision. The input is stored in the buffer pointed to by + * pu1_src while the output is stored in the buffer pointed by pu1_dst. + * Both pu1_src and pu1_dst could point to the same buffer i.e. the + * six tap filter could be done in place. + * + ************************************************************************** + */ +void ih264_inter_pred_luma_horz_qpel(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ht, + WORD32 wd, + UWORD8* pu1_tmp, + WORD32 dydx) +{ + WORD32 row, col; + UWORD8 *pu1_pred1; + WORD32 x_offset = dydx & 0x3; + UNUSED(pu1_tmp); + pu1_pred1 = pu1_src + (x_offset >> 1); + + for(row = 0; row < ht; row++) + { + for(col = 0; col < wd; col++, pu1_src++, pu1_dst++) + { + WORD16 i2_temp; + /* The logic below implements the following equation + i2_temp = puc_pred[-2] - 5 * (puc_pred[-1] + puc_pred[2]) + + 20 * (puc_pred[0] + puc_pred[1]) + puc_pred[3]; */ + i2_temp = pu1_src[-2] + pu1_src[3] + - (pu1_src[-1] + pu1_src[2]) + + ((pu1_src[0] + pu1_src[1] - pu1_src[-1] - pu1_src[2]) << 2) + + ((pu1_src[0] + pu1_src[1]) << 4); + i2_temp = (i2_temp + 16) >> 5; + i2_temp = CLIP_U8(i2_temp); + *pu1_dst = (i2_temp + *pu1_pred1 + 1) >> 1; + + pu1_pred1++; + } + pu1_dst += dst_strd - wd; + pu1_src += src_strd - wd; + pu1_pred1 += src_strd - wd; + } +} + +/*! + ************************************************************************** + * \if Function name : ih264_inter_pred_luma_vert_qpel \endif + * + * \brief + * This routine applies the six tap filter to the predictors in the + * vertical direction and interpolates them to obtain pixels at quarter vertical + * positions (0, 1/4) and (0, 3/4). The six tap filtering operation is + * described in sec 8.4.2.2.1 titled "Luma sample interpolation process" + * + * \param pu1_src: Pointer to the buffer containing the predictor values. + * pu1_src could point to the frame buffer or the predictor buffer. + * \param pu1_dst: Pointer to the destination buffer where the output of + * the six tap filter is stored. + * \param ht: Height of the rectangular pixel grid to be interpolated + * \param wd: Width of the rectangular pixel grid to be interpolated + * \param src_strd: Width of the buffer pointed to by puc_pred. + * \param dst_strd: Width of the destination buffer + * \param pu1_tmp: temporary buffer: UNUSED in this function + * \param dydx: x and y reference offset for qpel calculations. + * + * \return + * void + * + * \note + * This function takes the 8 bit predictor values, applies the six tap + * filter in the vertical direction and outputs the result clipped to + * 8 bit precision. The input is stored in the buffer pointed to by + * puc_pred while the output is stored in the buffer pointed by puc_dest. + * Both puc_pred and puc_dest could point to the same buffer i.e. the + * six tap filter could be done in place. + * + * \para + * <paragraph> + * ... + ************************************************************************** + */ +void ih264_inter_pred_luma_vert_qpel(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ht, + WORD32 wd, + UWORD8* pu1_tmp, + WORD32 dydx) +{ + WORD32 row, col; + WORD32 y_offset = dydx >> 2; + WORD32 off1, off2, off3; + UWORD8 *pu1_pred1; + UNUSED(pu1_tmp); + y_offset = y_offset & 0x3; + + off1 = src_strd; + off2 = src_strd << 1; + off3 = off1 + off2; + + pu1_pred1 = pu1_src + (y_offset >> 1) * src_strd; + + for(row = 0; row < ht; row++) + { + for(col = 0; col < wd; col++, pu1_dst++, pu1_src++, pu1_pred1++) + { + WORD16 i2_temp; + /* The logic below implements the following equation + i16_temp = puc_pred[-2*src_strd] + puc_pred[3*src_strd] - + 5 * (puc_pred[-1*src_strd] + puc_pred[2*src_strd]) + + 20 * (puc_pred[0] + puc_pred[src_strd]); */ + i2_temp = pu1_src[-off2] + pu1_src[off3] + - (pu1_src[-off1] + pu1_src[off2]) + + ((pu1_src[0] + pu1_src[off1] - pu1_src[-off1] - pu1_src[off2]) << 2) + + ((pu1_src[0] + pu1_src[off1]) << 4); + i2_temp = (i2_temp + 16) >> 5; + i2_temp = CLIP_U8(i2_temp); + + *pu1_dst = (i2_temp + *pu1_pred1 + 1) >> 1; + } + pu1_src += src_strd - wd; + pu1_pred1 += src_strd - wd; + pu1_dst += dst_strd - wd; + } +} + +/*! + ************************************************************************** + * \if Function name : ih264_inter_pred_luma_horz_qpel_vert_qpel \endif + * + * \brief + * This routine applies the six tap filter to the predictors in the + * vertical and horizontal direction and averages them to get pixels at locations + * (1/4,1/4), (1/4, 3/4), (3/4, 1/4) & (3/4, 3/4). The six tap filtering operation + * is described in sec 8.4.2.2.1 titled "Luma sample interpolation process" + * + * \param pu1_src: Pointer to the buffer containing the predictor values. + * pu1_src could point to the frame buffer or the predictor buffer. + * \param pu1_dst: Pointer to the destination buffer where the output of + * the six tap filter is stored. + * \param wd: Width of the rectangular pixel grid to be interpolated + * \param ht: Height of the rectangular pixel grid to be interpolated + * \param src_strd: Width of the buffer pointed to by puc_pred. + * \param dst_strd: Width of the destination buffer + * \param pu1_tmp: temporary buffer, UNUSED in this function + * \param dydx: x and y reference offset for qpel calculations. + * + * \return + * void + * + * \note + * This function takes the 8 bit predictor values, applies the six tap + * filter in the vertical direction and outputs the result clipped to + * 8 bit precision. The input is stored in the buffer pointed to by + * puc_pred while the output is stored in the buffer pointed by puc_dest. + * Both puc_pred and puc_dest could point to the same buffer i.e. the + * six tap filter could be done in place. + * + * \para <title> + * <paragraph> + * ... + ************************************************************************** + */ +void ih264_inter_pred_luma_horz_qpel_vert_qpel(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ht, + WORD32 wd, + UWORD8* pu1_tmp, + WORD32 dydx) +{ + WORD32 row, col; + WORD32 x_offset = dydx & 0x3; + WORD32 y_offset = dydx >> 2; + + WORD32 off1, off2, off3; + UWORD8* pu1_pred_vert, *pu1_pred_horz; + UNUSED(pu1_tmp); + y_offset = y_offset & 0x3; + + off1 = src_strd; + off2 = src_strd << 1; + off3 = off1 + off2; + + pu1_pred_horz = pu1_src + (y_offset >> 1) * src_strd; + pu1_pred_vert = pu1_src + (x_offset >> 1); + + for(row = 0; row < ht; row++) + { + for(col = 0; col < wd; + col++, pu1_dst++, pu1_pred_vert++, pu1_pred_horz++) + { + WORD16 i2_temp_vert, i2_temp_horz; + /* The logic below implements the following equation + i2_temp = puc_pred[-2*src_strd] + puc_pred[3*src_strd] - + 5 * (puc_pred[-1*src_strd] + puc_pred[2*src_strd]) + + 20 * (puc_pred[0] + puc_pred[src_strd]); */ + i2_temp_vert = pu1_pred_vert[-off2] + pu1_pred_vert[off3] + - (pu1_pred_vert[-off1] + pu1_pred_vert[off2]) + + ((pu1_pred_vert[0] + pu1_pred_vert[off1] + - pu1_pred_vert[-off1] + - pu1_pred_vert[off2]) << 2) + + ((pu1_pred_vert[0] + pu1_pred_vert[off1]) << 4); + i2_temp_vert = (i2_temp_vert + 16) >> 5; + i2_temp_vert = CLIP_U8(i2_temp_vert); + + /* The logic below implements the following equation + i16_temp = puc_pred[-2] - 5 * (puc_pred[-1] + puc_pred[2]) + + 20 * (puc_pred[0] + puc_pred[1]) + puc_pred[3]; */ + i2_temp_horz = pu1_pred_horz[-2] + pu1_pred_horz[3] + - (pu1_pred_horz[-1] + pu1_pred_horz[2]) + + ((pu1_pred_horz[0] + pu1_pred_horz[1] + - pu1_pred_horz[-1] + - pu1_pred_horz[2]) << 2) + + ((pu1_pred_horz[0] + pu1_pred_horz[1]) << 4); + i2_temp_horz = (i2_temp_horz + 16) >> 5; + i2_temp_horz = CLIP_U8(i2_temp_horz); + *pu1_dst = (i2_temp_vert + i2_temp_horz + 1) >> 1; + } + pu1_pred_vert += (src_strd - wd); + pu1_pred_horz += (src_strd - wd); + pu1_dst += (dst_strd - wd); + } +} + +/*! + ************************************************************************** + * \if Function name : ih264_inter_pred_luma_horz_qpel_vert_hpel \endif + * + * \brief + * This routine applies the six tap filter to the predictors in the vertical + * and horizontal direction to obtain the pixel at (1/2,1/2). It then interpolates + * pixel at (0,1/2) and (1/2,1/2) to obtain pixel at (1/4,1/2). Similarly for (3/4,1/2). + * The six tap filtering operation is described in sec 8.4.2.2.1 titled + * "Luma sample interpolation process" + * + * \param pu1_src: Pointer to the buffer containing the predictor values. + * pu1_src could point to the frame buffer or the predictor buffer. + * \param pu1_dst: Pointer to the destination buffer where the output of + * the six tap filter followed by interpolation is stored. + * \param wd: Width of the rectangular pixel grid to be interpolated + * \param ht: Height of the rectangular pixel grid to be interpolated + * \param src_strd: Width of the buffer pointed to by puc_pred. + * \param dst_strd: Width of the destination buffer + * \param pu1_tmp: buffer to store temporary output after 1st 6-tap filter. + * \param dydx: x and y reference offset for qpel calculations. + * + * \return + * void + * + * \note + * This function takes the 8 bit predictor values, applies the six tap + * filter in the vertical direction and outputs the result clipped to + * 8 bit precision. The input is stored in the buffer pointed to by + * puc_pred while the output is stored in the buffer pointed by puc_dest. + * Both puc_pred and puc_dest could point to the same buffer i.e. the + * six tap filter could be done in place. + * + * \para <title> + * <paragraph> + * ... + ************************************************************************** + */ +void ih264_inter_pred_luma_horz_qpel_vert_hpel(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ht, + WORD32 wd, + UWORD8* pu1_tmp, + WORD32 dydx) +{ + WORD32 row, col; + WORD32 tmp; + WORD16* pi2_pred1_temp, *pi2_pred1; + UWORD8* pu1_dst_tmp; + WORD32 x_offset = dydx & 0x3; + WORD16 i2_macro; + + pi2_pred1_temp = (WORD16*)pu1_tmp; + pi2_pred1_temp += 2; + pi2_pred1 = pi2_pred1_temp; + pu1_dst_tmp = pu1_dst; + + for(row = 0; row < ht; row++) + { + for(col = -2; col < wd + 3; col++) + { + tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/ + tmp = ih264_g_six_tap[0] * + (pu1_src[col - 2 * src_strd] + pu1_src[col + 3 * src_strd]) + + ih264_g_six_tap[1] * + (pu1_src[col - 1 * src_strd] + pu1_src[col + 2 * src_strd]) + + ih264_g_six_tap[2] * + (pu1_src[col] + pu1_src[col + 1 * src_strd]); + pi2_pred1_temp[col] = tmp; + } + + pu1_src += src_strd; + pi2_pred1_temp = pi2_pred1_temp + wd + 5; + } + + pi2_pred1_temp = pi2_pred1; + for(row = 0; row < ht; row++) + { + for(col = 0; col < wd; col++) + { + tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/ + tmp = ih264_g_six_tap[0] * + (pi2_pred1[col - 2] + pi2_pred1[col + 3]) + + ih264_g_six_tap[1] * + (pi2_pred1[col - 1] + pi2_pred1[col + 2]) + + ih264_g_six_tap[2] * + (pi2_pred1[col] + pi2_pred1[col + 1]); + tmp = (tmp + 512) >> 10; + pu1_dst[col] = CLIP_U8(tmp); + } + pi2_pred1 += (wd + 5); + pu1_dst += dst_strd; + } + + pu1_dst = pu1_dst_tmp; + pi2_pred1_temp += (x_offset >> 1); + for(row = ht; row != 0; row--) + { + for(col = wd; col != 0; col--, pu1_dst++, pi2_pred1_temp++) + { + UWORD8 uc_temp; + /* Clipping the output of the six tap filter obtained from the + first stage of the 2d filter stage */ + *pi2_pred1_temp = (*pi2_pred1_temp + 16) >> 5; + i2_macro = (*pi2_pred1_temp); + uc_temp = CLIP_U8(i2_macro); + *pu1_dst = (*pu1_dst + uc_temp + 1) >> 1; + } + pi2_pred1_temp += 5; + pu1_dst += dst_strd - wd; + } +} + +/*! + ************************************************************************** + * \if Function name : ih264_inter_pred_luma_horz_hpel_vert_qpel \endif + * + * \brief + * This routine applies the six tap filter to the predictors in the horizontal + * and vertical direction to obtain the pixel at (1/2,1/2). It then interpolates + * pixel at (1/2,0) and (1/2,1/2) to obtain pixel at (1/2,1/4). Similarly for (1/2,3/4). + * The six tap filtering operation is described in sec 8.4.2.2.1 titled + * "Luma sample interpolation process" + * + * \param pu1_src: Pointer to the buffer containing the predictor values. + * pu1_src could point to the frame buffer or the predictor buffer. + * \param pu1_dst: Pointer to the destination buffer where the output of + * the six tap filter followed by interpolation is stored. + * \param wd: Width of the rectangular pixel grid to be interpolated + * \param ht: Height of the rectangular pixel grid to be interpolated + * \param src_strd: Width of the buffer pointed to by puc_pred. + * \param dst_strd: Width of the destination buffer + * \param pu1_tmp: buffer to store temporary output after 1st 6-tap filter. + * \param dydx: x and y reference offset for qpel calculations. + * + * \return + * void + * + * \note + * This function takes the 8 bit predictor values, applies the six tap + * filter in the vertical direction and outputs the result clipped to + * 8 bit precision. The input is stored in the buffer pointed to by + * puc_pred while the output is stored in the buffer pointed by puc_dest. + * Both puc_pred and puc_dest could point to the same buffer i.e. the + * six tap filter could be done in place. + * + * \para <title> + * <paragraph> + * ... + ************************************************************************** + */ +void ih264_inter_pred_luma_horz_hpel_vert_qpel(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ht, + WORD32 wd, + UWORD8* pu1_tmp, + WORD32 dydx) +{ + + WORD32 row, col; + WORD32 tmp; + WORD32 y_offset = dydx >> 2; + WORD16* pi2_pred1_temp, *pi2_pred1; + UWORD8* pu1_dst_tmp; + //WORD32 x_offset = dydx & 0x3; + WORD16 i2_macro; + + y_offset = y_offset & 0x3; + + pi2_pred1_temp = (WORD16*)pu1_tmp; + pi2_pred1_temp += 2 * wd; + pi2_pred1 = pi2_pred1_temp; + pu1_dst_tmp = pu1_dst; + pu1_src -= 2 * src_strd; + for(row = -2; row < ht + 3; row++) + { + for(col = 0; col < wd; col++) + { + tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/ + tmp = ih264_g_six_tap[0] * (pu1_src[col - 2] + pu1_src[col + 3]) + + ih264_g_six_tap[1] * (pu1_src[col - 1] + pu1_src[col + 2]) + + ih264_g_six_tap[2] * (pu1_src[col] + pu1_src[col + 1]); + pi2_pred1_temp[col - 2 * wd] = tmp; + } + + pu1_src += src_strd; + pi2_pred1_temp += wd; + } + pi2_pred1_temp = pi2_pred1; + for(row = 0; row < ht; row++) + { + for(col = 0; col < wd; col++) + { + tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/ + tmp = ih264_g_six_tap[0] * (pi2_pred1[col - 2 * wd] + pi2_pred1[col + 3 * wd]) + + ih264_g_six_tap[1] * (pi2_pred1[col - 1 * wd] + pi2_pred1[col + 2 * wd]) + + ih264_g_six_tap[2] * (pi2_pred1[col] + pi2_pred1[col + 1 * wd]); + tmp = (tmp + 512) >> 10; + pu1_dst[col] = CLIP_U8(tmp); + } + pi2_pred1 += wd; + pu1_dst += dst_strd; + } + pu1_dst = pu1_dst_tmp; + pi2_pred1_temp += (y_offset >> 1) * wd; + for(row = ht; row != 0; row--) + + { + for(col = wd; col != 0; col--, pu1_dst++, pi2_pred1_temp++) + { + UWORD8 u1_temp; + /* Clipping the output of the six tap filter obtained from the + first stage of the 2d filter stage */ + *pi2_pred1_temp = (*pi2_pred1_temp + 16) >> 5; + i2_macro = (*pi2_pred1_temp); + u1_temp = CLIP_U8(i2_macro); + *pu1_dst = (*pu1_dst + u1_temp + 1) >> 1; + } + //pi16_pred1_temp += wd; + pu1_dst += dst_strd - wd; + } +} + +/** + ******************************************************************************* + * function:ih264_inter_pred_luma_bilinear + * + * @brief + * This routine applies the bilinear filter to the predictors . + * The filtering operation is described in + * sec 8.4.2.2.1 titled "Luma sample interpolation process" + * + * @par Description: +\note + * This function is called to obtain pixels lying at the following + * locations (1/4,1), (3/4,1),(1,1/4), (1,3/4) ,(1/4,1/2), (3/4,1/2),(1/2,1/4), (1/2,3/4),(3/4,1/4),(1/4,3/4),(3/4,3/4)&& (1/4,1/4) . + * The function averages the two adjacent values from the two input arrays in horizontal direction. + * + * + * @param[in] pu1_src1: + * UWORD8 Pointer to the buffer containing the first input array. + * + * @param[in] pu1_src2: + * UWORD8 Pointer to the buffer containing the second input array. + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination where the output of bilinear filter is stored. + * + * @param[in] src_strd1 + * Stride of the first input buffer + * + * @param[in] src_strd2 + * Stride of the second input buffer + * + * @param[in] dst_strd + * integer destination stride of pu1_dst + * + * @param[in] ht + * integer height of the array + * + * @param[in] wd + * integer width of the array + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +void ih264_inter_pred_luma_bilinear(UWORD8 *pu1_src1, + UWORD8 *pu1_src2, + UWORD8 *pu1_dst, + WORD32 src_strd1, + WORD32 src_strd2, + WORD32 dst_strd, + WORD32 ht, + WORD32 wd) +{ + WORD32 row, col; + WORD16 i2_tmp; + + for(row = 0; row < ht; row++) + { + for(col = 0; col < wd; col++) + { + i2_tmp = pu1_src1[col] + pu1_src2[col]; + i2_tmp = (i2_tmp + 1) >> 1; + pu1_dst[col] = CLIP_U8(i2_tmp); + } + pu1_src1 += src_strd1; + pu1_src2 += src_strd2; + pu1_dst += dst_strd; + } + +} + +/** + ******************************************************************************* + * + * @brief + * Interprediction chroma filter + * + * @par Description: + * Applies filtering to chroma samples as mentioned in + * sec 8.4.2.2.2 titled "chroma sample interpolation process" + * + * @param[in] pu1_src + * UWORD8 pointer to the source containing alternate U and V samples + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] u1_dx + * dx value where the sample is to be produced(refer sec 8.4.2.2.2 ) + * + * @param[in] u1_dy + * dy value where the sample is to be produced(refer sec 8.4.2.2.2 ) + * + * @param[in] ht + * integer height of the array + * + * @param[in] wd + * integer width of the array + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +void ih264_inter_pred_chroma(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 dx, + WORD32 dy, + WORD32 ht, + WORD32 wd) +{ + WORD32 row, col; + WORD16 i2_tmp; + + for(row = 0; row < ht; row++) + { + for(col = 0; col < 2 * wd; col++) + { + i2_tmp = 0; /* applies equation (8-266) in section 8.4.2.2.2 */ + i2_tmp = (8 - dx) * (8 - dy) * pu1_src[col] + + (dx) * (8 - dy) * pu1_src[col + 2] + + (8 - dx) * (dy) * (pu1_src + src_strd)[col] + + (dx) * (dy) * (pu1_src + src_strd)[col + 2]; + i2_tmp = (i2_tmp + 32) >> 6; + pu1_dst[col] = CLIP_U8(i2_tmp); + } + pu1_src += src_strd; + pu1_dst += dst_strd; + } +} diff --git a/common/ih264_inter_pred_filters.h b/common/ih264_inter_pred_filters.h new file mode 100755 index 0000000..c439ab8 --- /dev/null +++ b/common/ih264_inter_pred_filters.h @@ -0,0 +1,241 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** + ******************************************************************************* + * @file + * ih264_inter_pred_filters.h + * + * @brief + * Declarations of functions used for inter prediction + * + * @author + * Ittiam + * + * @par List of Functions: + * -ih264_inter_pred_luma_copy + * -ih264_interleave_copy + * -ih264_inter_pred_luma_horz + * -ih264_inter_pred_luma_vert + * -ih264_inter_pred_luma_horz_hpel_vert_hpel + * -ih264_inter_pred_luma_vert_qpel + * -ih264_inter_pred_luma_horz_qpel + * -ih264_inter_pred_luma_horz_qpel_vert_qpel + * -ih264_inter_pred_luma_horz_qpel_vert_hpel + * -ih264_inter_pred_luma_horz_hpel_vert_qpel + * -ih264_inter_pred_luma_bilinear + * -ih264_inter_pred_chroma + * -ih264_inter_pred_luma_copy_a9q + * -ih264_interleave_copy_a9 + * -ih264_inter_pred_luma_horz_a9q + * -ih264_inter_pred_luma_vert_a9q + * -ih264_inter_pred_luma_bilinear_a9q + * -ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q + * -ih264_inter_pred_luma_horz_qpel_a9q + * -ih264_inter_pred_luma_vert_qpel_a9q + * -ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q + * -ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q + * -ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q + * -ih264_inter_pred_chroma_a9q + * -ih264_inter_pred_luma_copy_av8 + * -ih264_interleave_copy_av8 + * -ih264_inter_pred_luma_horz_av8 + * -ih264_inter_pred_luma_vert_av8 + * -ih264_inter_pred_luma_bilinear_av8 + * -ih264_inter_pred_luma_horz_hpel_vert_hpel_av8 + * -ih264_inter_pred_luma_horz_qpel_av8 + * -ih264_inter_pred_luma_vert_qpel_av8 + * -ih264_inter_pred_luma_horz_qpel_vert_qpel_av8 + * -ih264_inter_pred_luma_horz_qpel_vert_hpel_av8 + * -ih264_inter_pred_luma_horz_hpel_vert_qpel_av8 + * -ih264_inter_pred_chroma_av8 + * -ih264_inter_pred_chroma_dx_zero_av8 + * -ih264_inter_pred_chroma_dy_zero_av8 + * -ih264_inter_pred_luma_copy_ssse3 + * -ih264_inter_pred_luma_copy_ssse3 + * -ih264_inter_pred_luma_horz_ssse3 + * -ih264_inter_pred_luma_vert_ssse3 + * -ih264_inter_pred_luma_bilinear_ssse3 + * -ih264_inter_pred_luma_horz_hpel_vert_hpel_ssse3 + * -ih264_inter_pred_luma_horz_qpel_ssse3 + * -ih264_inter_pred_luma_vert_qpel_ssse3 + * -ih264_inter_pred_luma_horz_qpel_vert_qpel_ssse3 + * -ih264_inter_pred_luma_horz_qpel_vert_hpel_ssse3 + * -ih264_inter_pred_luma_horz_hpel_vert_qpel_ssse3 + * -ih264_inter_pred_chroma_ssse3 + * + * @remarks + * None + * + ******************************************************************************* + */ + +#ifndef _IH264_INTER_PRED_H_ +#define _IH264_INTER_PRED_H_ + +/*****************************************************************************/ +/* Constant Data variables */ +/*****************************************************************************/ + +extern const WORD32 ih264_g_six_tap[3];/* coefficients for 6 tap filtering*/ + +/*****************************************************************************/ +/* Extern Function Declarations */ +/*****************************************************************************/ + +typedef void ih264_inter_pred_luma_ft(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ht, + WORD32 wd, + UWORD8* pu1_tmp, + WORD32 dydx); + +typedef void ih264_interleave_copy_ft(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ht, + WORD32 wd); + +typedef void ih264_inter_pred_luma_bilinear_ft(UWORD8 *pu1_src1, + UWORD8 *pu1_src2, + UWORD8 *pu1_dst, + WORD32 src_strd1, + WORD32 src_strd2, + WORD32 dst_strd, + WORD32 height, + WORD32 width); + +typedef void ih264_inter_pred_chroma_ft(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 dx, + WORD32 dy, + WORD32 ht, + WORD32 wd); + +/* No NEON Declarations */ + +ih264_inter_pred_luma_ft ih264_inter_pred_luma_copy; + +ih264_interleave_copy_ft ih264_interleave_copy; + +ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz; + +ih264_inter_pred_luma_ft ih264_inter_pred_luma_vert; + +ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_hpel_vert_hpel; + +ih264_inter_pred_luma_ft ih264_inter_pred_luma_vert_qpel; + +ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel; + +ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_vert_qpel; + +ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_vert_hpel; + +ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_hpel_vert_qpel; + +ih264_inter_pred_luma_bilinear_ft ih264_inter_pred_luma_bilinear; + +ih264_inter_pred_chroma_ft ih264_inter_pred_chroma; + +/* A9 NEON Declarations */ +ih264_inter_pred_luma_ft ih264_inter_pred_luma_copy_a9q; + +ih264_interleave_copy_ft ih264_interleave_copy_a9; + +ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_a9q; + +ih264_inter_pred_luma_ft ih264_inter_pred_luma_vert_a9q; + +ih264_inter_pred_luma_bilinear_ft ih264_inter_pred_luma_bilinear_a9q; + +ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q; + +ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_a9q; + +ih264_inter_pred_luma_ft ih264_inter_pred_luma_vert_qpel_a9q; + +ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q; + +ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q; + +ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q; + +ih264_inter_pred_chroma_ft ih264_inter_pred_chroma_a9q; + +/* AV8 NEON Declarations */ +ih264_inter_pred_luma_ft ih264_inter_pred_luma_copy_av8; + +ih264_interleave_copy_ft ih264_interleave_copy_av8; + +ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_av8; + +ih264_inter_pred_luma_ft ih264_inter_pred_luma_vert_av8; + +ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_hpel_vert_hpel_av8; + +ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_av8; + +ih264_inter_pred_luma_ft ih264_inter_pred_luma_vert_qpel_av8; + +ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_vert_qpel_av8; + +ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_vert_hpel_av8; + +ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_hpel_vert_qpel_av8; + +ih264_inter_pred_chroma_ft ih264_inter_pred_chroma_av8; + +ih264_inter_pred_chroma_ft ih264_inter_pred_chroma_dx_zero_av8; + +ih264_inter_pred_chroma_ft ih264_inter_pred_chroma_dy_zero_av8; + + +/* SSSE3 Intrinsic Declarations */ +ih264_inter_pred_luma_ft ih264_inter_pred_luma_copy_ssse3; + +ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_ssse3; + +ih264_inter_pred_luma_ft ih264_inter_pred_luma_vert_ssse3; + +ih264_inter_pred_luma_bilinear_ft ih264_inter_pred_luma_bilinear_ssse3; + +ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_hpel_vert_hpel_ssse3; + +ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_ssse3; + +ih264_inter_pred_luma_ft ih264_inter_pred_luma_vert_qpel_ssse3; + +ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_vert_qpel_ssse3; + +ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_vert_hpel_ssse3; + +ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_hpel_vert_qpel_ssse3; + +ih264_inter_pred_chroma_ft ih264_inter_pred_chroma_ssse3; + +#endif + +/** Nothing past this point */ diff --git a/common/ih264_intra_pred_filters.h b/common/ih264_intra_pred_filters.h new file mode 100755 index 0000000..caf6b33 --- /dev/null +++ b/common/ih264_intra_pred_filters.h @@ -0,0 +1,331 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** + ******************************************************************************* + * @file + * ih264_intra_pred_filters.h + * + * @brief + * Declarations of functions used for intra prediction + * + * @author + * Ittiam + * + * @remarks + * None + * + ******************************************************************************* + */ + +#ifndef IH264_INTRA_PRED_FILTERS_H_ + +#define IH264_INTRA_PRED_FILTERS_H_ + +/*****************************************************************************/ +/* Macro Expansion */ +/*****************************************************************************/ + +/*! Filter (1,2,1) i.e (a + 2b + c) / 4 */ +#define FILT121(a,b,c) ((a + (b<<1) + c + 2)>>2) +/*! Filter (1,1) i.e (a + b) / 2 */ +#define FILT11(a,b) ((a + b + 1)>>1) +/*****************************************************************************/ +/* Global Variables */ +/*****************************************************************************/ + +/* Global variables used only in assembly files*/ +extern const WORD8 ih264_gai1_intrapred_luma_plane_coeffs[]; +extern const WORD8 ih264_gai1_intrapred_chroma_plane_coeffs1[]; +extern const WORD8 ih264_gai1_intrapred_chroma_plane_coeffs2[]; +extern const WORD8 ih264_gai1_intrapred_luma_8x8_horz_u[]; + +/*****************************************************************************/ +/* Extern Function Declarations */ +/*****************************************************************************/ + + +typedef void ih264_intra_pred_ref_filtering_ft(UWORD8 *pu1_left, + UWORD8 *pu1_topleft, + UWORD8 *pu1_top, + UWORD8 *pu1_dst, + WORD32 left_strd, + WORD32 ngbr_avail); + +typedef void ih264_intra_pred_luma_ft(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail); + +/* No Neon Definitions */ + +/* Luma 4x4 Intra pred filters */ + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_dc; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_diag_dl; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_diag_dr; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_r; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_d; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_l; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_u; + +/* Luma 8x8 Intra pred filters */ + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_dc; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_diag_dl; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_diag_dr; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_r; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_d; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_l; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_u; + +/* Luma 16x16 Intra pred filters */ + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_vert; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_horz; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_dc; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_plane; + +/* Chroma 8x8 Intra pred filters */ + +typedef ih264_intra_pred_luma_ft ih264_intra_pred_chroma_ft; + +ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_dc; + +ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_horz; + +ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_vert; + +ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_plane; + + +ih264_intra_pred_ref_filtering_ft ih264_intra_pred_luma_8x8_mode_ref_filtering; + +/* A9 Definition */ + +/* Luma 4x4 Intra pred filters */ + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_a9q; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_a9q; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_dc_a9q; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_diag_dl_a9q; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_diag_dr_a9q; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_r_a9q; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_d_a9q; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_l_a9q; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_u_a9q; + +/* Luma 8x8 Intra pred filters */ + +ih264_intra_pred_ref_filtering_ft ih264_intra_pred_luma_8x8_mode_ref_filtering_a9q; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_a9q; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_a9q; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_dc_a9q; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_diag_dl_a9q; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_diag_dr_a9q; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_r_a9q; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_d_a9q; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_l_a9q; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_u_a9q; + +/* Luma 16x16 Intra pred filters */ + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_vert_a9q; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_horz_a9q; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_dc_a9q; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_plane_a9q; + +/* Chroma 8x8 Intra pred filters */ + +ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_dc_a9q; + +ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_horz_a9q; + +ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_vert_a9q; + +ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_plane_a9q; + +/* X86 Intrinsic Definitions */ + +/* Luma 4x4 Intra pred filters */ + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_ssse3; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_ssse3; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_dc_ssse3; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_diag_dl_ssse3; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_diag_dr_ssse3; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_r_ssse3; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_d_ssse3; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_l_ssse3; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_u_ssse3; + +/* Luma 8x8 Intra pred filters */ + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_ssse3; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_ssse3; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_dc_ssse3; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_diag_dl_ssse3; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_diag_dr_ssse3; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_r_ssse3; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_d_ssse3; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_l_ssse3; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_u_ssse3; + +/* Luma 16x16 Intra pred filters */ + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_vert_ssse3; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_horz_ssse3; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_dc_ssse3; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_plane_ssse3; + +/* Chroma 8x8 Intra pred filters */ + +ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_dc_ssse3; + +ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_horz_ssse3; + +ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_vert_ssse3; + +ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_plane_ssse3; + +/* AV8 Definition */ + +/* Luma 4x4 Intra pred filters */ +ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_av8; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_av8; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_dc_av8; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_diag_dl_av8; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_diag_dr_av8; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_r_av8; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_d_av8; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_l_av8; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_u_av8; + +/* Luma 8x8 Intra pred filters */ + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_av8; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_av8; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_dc_av8; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_diag_dl_av8; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_diag_dr_av8; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_r_av8; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_d_av8; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_l_av8; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_u_av8; + +/* Luma 16x16 Intra pred filters */ + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_vert_av8; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_horz_av8; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_dc_av8; + +ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_plane_av8; + +/* Chroma 8x8 Intra pred filters */ + +ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_dc_av8; + +ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_horz_av8; + +ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_vert_av8; + +ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_plane_av8; + +#endif /* IH264_INTRA_PRED_FILTERS_H_ */ diff --git a/common/ih264_iquant_itrans_recon.c b/common/ih264_iquant_itrans_recon.c new file mode 100755 index 0000000..3c14046 --- /dev/null +++ b/common/ih264_iquant_itrans_recon.c @@ -0,0 +1,873 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** + ******************************************************************************* + * @file + * ih264_iquant_itrans_recon.c + * + * @brief + * Contains definition of functions for h264 inverse quantization inverse transformation and recon + * + * @author + * Ittiam + * + * @par List of Functions: + * - ih264_iquant_itrans_recon_4x4() + * - ih264_iquant_itrans_recon_8x8() + * - ih264_iquant_itrans_recon_4x4_dc() + * - ih264_iquant_itrans_recon_8x8_dc() + * - ih264_iquant_itrans_recon_chroma_4x4() + * -ih264_iquant_itrans_recon_chroma_4x4_dc() + * + * @remarks + * + ******************************************************************************* + */ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* User include files */ +#include "ih264_typedefs.h" +#include "ih264_defs.h" +#include "ih264_trans_macros.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264_trans_data.h" +#include "ih264_size_defs.h" +#include "ih264_structs.h" +#include "ih264_trans_quant_itrans_iquant.h" + +/* + ******************************************************************************** + * + * @brief This function reconstructs a 4x4 sub block from quantized resiude and + * prediction buffer + * + * @par Description: + * The quantized residue is first inverse quantized, then inverse transformed. + * This inverse transformed content is added to the prediction buffer to recon- + * struct the end output + * + * @param[in] pi2_src + * quantized 4x4 block + * + * @param[in] pu1_pred + * prediction 4x4 block + * + * @param[out] pu1_out + * reconstructed 4x4 block + * + * @param[in] src_strd + * quantization buffer stride + * + * @param[in] pred_strd, + * Prediction buffer stride + * + * @param[in] out_strd + * recon buffer Stride + * + * @param[in] pu2_scaling_list + * pointer to scaling list + * + * @param[in] pu2_norm_adjust + * pointer to inverse scale matrix + * + * @param[in] u4_qp_div_6 + * Floor (qp/6) + * + * @param[in] pi4_tmp + * temporary buffer of size 1*16 + * + * @returns none + * + * @remarks none + * + ******************************************************************************* + */ +void ih264_iquant_itrans_recon_4x4(WORD16 *pi2_src, + UWORD8 *pu1_pred, + UWORD8 *pu1_out, + WORD32 pred_strd, + WORD32 out_strd, + const UWORD16 *pu2_iscal_mat, + const UWORD16 *pu2_weigh_mat, + UWORD32 u4_qp_div_6, + WORD16 *pi2_tmp, + WORD32 iq_start_idx, + WORD16 *pi2_dc_ld_addr +) +{ + WORD16 *pi2_src_ptr = pi2_src; + WORD16 *pi2_tmp_ptr = pi2_tmp; + UWORD8 *pu1_pred_ptr = pu1_pred; + UWORD8 *pu1_out_ptr = pu1_out; + WORD16 x0, x1, x2, x3, i; + WORD32 q0, q1, q2, q3; + WORD16 i_macro; + WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0; + + /* inverse quant */ + /*horizontal inverse transform */ + for(i = 0; i < SUB_BLK_WIDTH_4x4; i++) + { + q0 = pi2_src_ptr[0]; + INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, + 4); + if (i==0 && iq_start_idx == 1) + q0 = pi2_dc_ld_addr[0]; // Restoring dc value for intra case + + q2 = pi2_src_ptr[2]; + INV_QUANT(q2, pu2_iscal_mat[2], pu2_weigh_mat[2], u4_qp_div_6, rnd_fact, + 4); + + x0 = q0 + q2; + x1 = q0 - q2; + + q1 = pi2_src_ptr[1]; + INV_QUANT(q1, pu2_iscal_mat[1], pu2_weigh_mat[1], u4_qp_div_6, rnd_fact, + 4); + + q3 = pi2_src_ptr[3]; + INV_QUANT(q3, pu2_iscal_mat[3], pu2_weigh_mat[3], u4_qp_div_6, rnd_fact, + 4); + + x2 = (q1 >> 1) - q3; + x3 = q1 + (q3 >> 1); + + pi2_tmp_ptr[0] = x0 + x3; + pi2_tmp_ptr[1] = x1 + x2; + pi2_tmp_ptr[2] = x1 - x2; + pi2_tmp_ptr[3] = x0 - x3; + + pi2_src_ptr += SUB_BLK_WIDTH_4x4; + pi2_tmp_ptr += SUB_BLK_WIDTH_4x4; + pu2_iscal_mat += SUB_BLK_WIDTH_4x4; + pu2_weigh_mat += SUB_BLK_WIDTH_4x4; + } + + /* vertical inverse transform */ + pi2_tmp_ptr = pi2_tmp; + for(i = 0; i < SUB_BLK_WIDTH_4x4; i++) + { + pu1_pred_ptr = pu1_pred; + pu1_out = pu1_out_ptr; + + x0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[8]); + x1 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[8]); + x2 = (pi2_tmp_ptr[4] >> 1) - pi2_tmp_ptr[12]; + x3 = pi2_tmp_ptr[4] + (pi2_tmp_ptr[12] >> 1); + + /* inverse prediction */ + i_macro = x0 + x3; + i_macro = ((i_macro + 32) >> 6); + i_macro += *pu1_pred_ptr; + *pu1_out = CLIP_U8(i_macro); + pu1_pred_ptr += pred_strd; + pu1_out += out_strd; + + i_macro = x1 + x2; + i_macro = ((i_macro + 32) >> 6); + i_macro += *pu1_pred_ptr; + *pu1_out = CLIP_U8(i_macro); + pu1_pred_ptr += pred_strd; + pu1_out += out_strd; + + i_macro = x1 - x2; + i_macro = ((i_macro + 32) >> 6); + i_macro += *pu1_pred_ptr; + *pu1_out = CLIP_U8(i_macro); + pu1_pred_ptr += pred_strd; + pu1_out += out_strd; + + i_macro = x0 - x3; + i_macro = ((i_macro + 32) >> 6); + i_macro += *pu1_pred_ptr; + *pu1_out = CLIP_U8(i_macro); + + pi2_tmp_ptr++; + pu1_out_ptr++; + pu1_pred++; + } + +} + +void ih264_iquant_itrans_recon_4x4_dc(WORD16 *pi2_src, + UWORD8 *pu1_pred, + UWORD8 *pu1_out, + WORD32 pred_strd, + WORD32 out_strd, + const UWORD16 *pu2_iscal_mat, + const UWORD16 *pu2_weigh_mat, + UWORD32 u4_qp_div_6, + WORD16 *pi2_tmp, + WORD32 iq_start_idx, + WORD16 *pi2_dc_ld_addr) +{ + UWORD8 *pu1_pred_ptr = pu1_pred; + UWORD8 *pu1_out_ptr = pu1_out; + WORD32 q0; + WORD16 x, i_macro, i; + WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0; + UNUSED(pi2_tmp); + + if (iq_start_idx == 0) + { + q0 = pi2_src[0]; + INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4); + } + else + { + q0 = pi2_dc_ld_addr[0]; // Restoring dc value for intra case3 + } + i_macro = ((q0 + 32) >> 6); + for(i = 0; i < SUB_BLK_WIDTH_4x4; i++) + { + pu1_pred_ptr = pu1_pred; + pu1_out = pu1_out_ptr; + + /* inverse prediction */ + + x = i_macro + *pu1_pred_ptr; + *pu1_out = CLIP_U8(x); + pu1_pred_ptr += pred_strd; + pu1_out += out_strd; + + x = i_macro + *pu1_pred_ptr; + *pu1_out = CLIP_U8(x); + pu1_pred_ptr += pred_strd; + pu1_out += out_strd; + + x = i_macro + *pu1_pred_ptr; + *pu1_out = CLIP_U8(x); + pu1_pred_ptr += pred_strd; + pu1_out += out_strd; + + x = i_macro + *pu1_pred_ptr; + *pu1_out = CLIP_U8(x); + + pu1_out_ptr++; + pu1_pred++; + } +} + +/** + ******************************************************************************* + * + * @brief + * This function performs inverse quant and Inverse transform type Ci4 for 8x8 block + * + * @par Description: + * Performs inverse transform Ci8 and adds the residue to get the + * reconstructed block + * + * @param[in] pi2_src + * Input 8x8coefficients + * + * @param[in] pu1_pred + * Prediction 8x8 block + * + * @param[out] pu1_recon + * Output 8x8 block + * + * @param[in] q_div + * QP/6 + * + * @param[in] q_rem + * QP%6 + * + * @param[in] q_lev + * Quantizer level + * + * @param[in] src_strd + * Input stride + * + * @param[in] pred_strd, + * Prediction stride + * + * @param[in] out_strd + * Output Stride + * + * @param[in] pi4_tmp + * temporary buffer of size 1*16 we dont need a bigger blcok since we reuse + * the tmp for each block + * + * @param[in] pu4_iquant_mat + * Pointer to the inverse quantization matrix + * + * @returns Void + * + * @remarks + * None + * + ******************************************************************************* + */ +void ih264_iquant_itrans_recon_8x8(WORD16 *pi2_src, + UWORD8 *pu1_pred, + UWORD8 *pu1_out, + WORD32 pred_strd, + WORD32 out_strd, + const UWORD16 *pu2_iscale_mat, + const UWORD16 *pu2_weigh_mat, + UWORD32 qp_div, + WORD16 *pi2_tmp, + WORD32 iq_start_idx, + WORD16 *pi2_dc_ld_addr +) +{ + WORD32 i; + WORD16 *pi2_tmp_ptr = pi2_tmp; + UWORD8 *pu1_pred_ptr = pu1_pred; + UWORD8 *pu1_out_ptr = pu1_out; + WORD16 i_z0, i_z1, i_z2, i_z3, i_z4, i_z5, i_z6, i_z7; + WORD16 i_y0, i_y1, i_y2, i_y3, i_y4, i_y5, i_y6, i_y7; + WORD16 i_macro; + WORD32 q; + WORD32 rnd_fact = (qp_div < 6) ? (1 << (5 - qp_div)) : 0; + UNUSED(iq_start_idx); + UNUSED(pi2_dc_ld_addr); + /*************************************************************/ + /* De quantization of coefficients. Will be replaced by SIMD */ + /* operations on platform. Note : DC coeff is not scaled */ + /*************************************************************/ + for(i = 0; i < (SUB_BLK_WIDTH_8x8 * SUB_BLK_WIDTH_8x8); i++) + { + q = pi2_src[i]; + INV_QUANT(q, pu2_iscale_mat[i], pu2_weigh_mat[i], qp_div, rnd_fact, 6); + pi2_tmp_ptr[i] = q; + } + /* Perform Inverse transform */ + /*--------------------------------------------------------------------*/ + /* IDCT [ Horizontal transformation ] */ + /*--------------------------------------------------------------------*/ + for(i = 0; i < SUB_BLK_WIDTH_8x8; i++) + { + /*------------------------------------------------------------------*/ + /* y0 = w0 + w4 */ + /* y1 = -w3 + w5 - w7 - (w7 >> 1) */ + /* y2 = w0 - w4 */ + /* y3 = w1 + w7 - w3 - (w3 >> 1) */ + /* y4 = (w2 >> 1) - w6 */ + /* y5 = -w1 + w7 + w5 + (w5 >> 1) */ + /* y6 = w2 + (w6 >> 1) */ + /* y7 = w3 + w5 + w1 + (w1 >> 1) */ + /*------------------------------------------------------------------*/ + i_y0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[4] ); + + i_y1 = ((WORD32)(-pi2_tmp_ptr[3]) + pi2_tmp_ptr[5] - pi2_tmp_ptr[7] + - (pi2_tmp_ptr[7] >> 1)); + + i_y2 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[4] ); + + i_y3 = ((WORD32)pi2_tmp_ptr[1] + pi2_tmp_ptr[7] - pi2_tmp_ptr[3] + - (pi2_tmp_ptr[3] >> 1)); + + i_y4 = ((pi2_tmp_ptr[2] >> 1) - pi2_tmp_ptr[6] ); + + i_y5 = ((WORD32)(-pi2_tmp_ptr[1]) + pi2_tmp_ptr[7] + pi2_tmp_ptr[5] + + (pi2_tmp_ptr[5] >> 1)); + + i_y6 = (pi2_tmp_ptr[2] + (pi2_tmp_ptr[6] >> 1)); + + i_y7 = ((WORD32)pi2_tmp_ptr[3] + pi2_tmp_ptr[5] + pi2_tmp_ptr[1] + + (pi2_tmp_ptr[1] >> 1)); + + /*------------------------------------------------------------------*/ + /* z0 = y0 + y6 */ + /* z1 = y1 + (y7 >> 2) */ + /* z2 = y2 + y4 */ + /* z3 = y3 + (y5 >> 2) */ + /* z4 = y2 - y4 */ + /* z5 = (y3 >> 2) - y5 */ + /* z6 = y0 - y6 */ + /* z7 = y7 - (y1 >> 2) */ + /*------------------------------------------------------------------*/ + i_z0 = i_y0 + i_y6; + i_z1 = i_y1 + (i_y7 >> 2); + i_z2 = i_y2 + i_y4; + i_z3 = i_y3 + (i_y5 >> 2); + i_z4 = i_y2 - i_y4; + i_z5 = (i_y3 >> 2) - i_y5; + i_z6 = i_y0 - i_y6; + i_z7 = i_y7 - (i_y1 >> 2); + + /*------------------------------------------------------------------*/ + /* x0 = z0 + z7 */ + /* x1 = z2 + z5 */ + /* x2 = z4 + z3 */ + /* x3 = z6 + z1 */ + /* x4 = z6 - z1 */ + /* x5 = z4 - z3 */ + /* x6 = z2 - z5 */ + /* x7 = z0 - z7 */ + /*------------------------------------------------------------------*/ + pi2_tmp_ptr[0] = i_z0 + i_z7; + pi2_tmp_ptr[1] = i_z2 + i_z5; + pi2_tmp_ptr[2] = i_z4 + i_z3; + pi2_tmp_ptr[3] = i_z6 + i_z1; + pi2_tmp_ptr[4] = i_z6 - i_z1; + pi2_tmp_ptr[5] = i_z4 - i_z3; + pi2_tmp_ptr[6] = i_z2 - i_z5; + pi2_tmp_ptr[7] = i_z0 - i_z7; + + /* move to the next row */ + //pi2_src_ptr += SUB_BLK_WIDTH_8x8; + pi2_tmp_ptr += SUB_BLK_WIDTH_8x8; + } + /*--------------------------------------------------------------------*/ + /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6 */ + /* */ + /* Add the prediction and store it back to reconstructed frame buffer */ + /* [Prediction buffer itself in this case] */ + /*--------------------------------------------------------------------*/ + + pi2_tmp_ptr = pi2_tmp; + for(i = 0; i < SUB_BLK_WIDTH_8x8; i++) + { + pu1_pred_ptr = pu1_pred; + pu1_out = pu1_out_ptr; + /*------------------------------------------------------------------*/ + /* y0j = w0j + w4j */ + /* y1j = -w3j + w5j -w7j -(w7j >> 1) */ + /* y2j = w0j -w4j */ + /* y3j = w1j + w7j -w3j -(w3j >> 1) */ + /* y4j = ( w2j >> 1 ) -w6j */ + /* y5j = -w1j + w7j + w5j + (w5j >> 1) */ + /* y6j = w2j + ( w6j >> 1 ) */ + /* y7j = w3j + w5j + w1j + (w1j >> 1) */ + /*------------------------------------------------------------------*/ + i_y0 = pi2_tmp_ptr[0] + pi2_tmp_ptr[32]; + + i_y1 = (WORD32)(-pi2_tmp_ptr[24]) + pi2_tmp_ptr[40] - pi2_tmp_ptr[56] + - (pi2_tmp_ptr[56] >> 1); + + i_y2 = pi2_tmp_ptr[0] - pi2_tmp_ptr[32]; + + i_y3 = (WORD32)pi2_tmp_ptr[8] + pi2_tmp_ptr[56] - pi2_tmp_ptr[24] + - (pi2_tmp_ptr[24] >> 1); + + i_y4 = (pi2_tmp_ptr[16] >> 1) - pi2_tmp_ptr[48]; + + i_y5 = (WORD32)(-pi2_tmp_ptr[8]) + pi2_tmp_ptr[56] + pi2_tmp_ptr[40] + + (pi2_tmp_ptr[40] >> 1); + + i_y6 = pi2_tmp_ptr[16] + (pi2_tmp_ptr[48] >> 1); + + i_y7 = (WORD32)pi2_tmp_ptr[24] + pi2_tmp_ptr[40] + pi2_tmp_ptr[8] + + (pi2_tmp_ptr[8] >> 1); + + /*------------------------------------------------------------------*/ + /* z0j = y0j + y6j */ + /* z1j = y1j + (y7j >> 2) */ + /* z2j = y2j + y4j */ + /* z3j = y3j + (y5j >> 2) */ + /* z4j = y2j -y4j */ + /* z5j = (y3j >> 2) -y5j */ + /* z6j = y0j -y6j */ + /* z7j = y7j -(y1j >> 2) */ + /*------------------------------------------------------------------*/ + i_z0 = i_y0 + i_y6; + i_z1 = i_y1 + (i_y7 >> 2); + i_z2 = i_y2 + i_y4; + i_z3 = i_y3 + (i_y5 >> 2); + i_z4 = i_y2 - i_y4; + i_z5 = (i_y3 >> 2) - i_y5; + i_z6 = i_y0 - i_y6; + i_z7 = i_y7 - (i_y1 >> 2); + + /*------------------------------------------------------------------*/ + /* x0j = z0j + z7j */ + /* x1j = z2j + z5j */ + /* x2j = z4j + z3j */ + /* x3j = z6j + z1j */ + /* x4j = z6j -z1j */ + /* x5j = z4j -z3j */ + /* x6j = z2j -z5j */ + /* x7j = z0j -z7j */ + /*------------------------------------------------------------------*/ + i_macro = ((i_z0 + i_z7 + 32) >> 6) + *pu1_pred_ptr; + *pu1_out = CLIP_U8(i_macro); + /* Change uc_recBuffer to Point to next element in the same column*/ + pu1_pred_ptr += pred_strd; + pu1_out += out_strd; + + i_macro = ((i_z2 + i_z5 + 32) >> 6) + *pu1_pred_ptr; + *pu1_out = CLIP_U8(i_macro); + pu1_pred_ptr += pred_strd; + pu1_out += out_strd; + + i_macro = ((i_z4 + i_z3 + 32) >> 6) + *pu1_pred_ptr; + *pu1_out = CLIP_U8(i_macro); + pu1_pred_ptr += pred_strd; + pu1_out += out_strd; + + i_macro = ((i_z6 + i_z1 + 32) >> 6) + *pu1_pred_ptr; + *pu1_out = CLIP_U8(i_macro); + pu1_pred_ptr += pred_strd; + pu1_out += out_strd; + + i_macro = ((i_z6 - i_z1 + 32) >> 6) + *pu1_pred_ptr; + *pu1_out = CLIP_U8(i_macro); + pu1_pred_ptr += pred_strd; + pu1_out += out_strd; + + i_macro = ((i_z4 - i_z3 + 32) >> 6) + *pu1_pred_ptr; + *pu1_out = CLIP_U8(i_macro); + pu1_pred_ptr += pred_strd; + pu1_out += out_strd; + + i_macro = ((i_z2 - i_z5 + 32) >> 6) + *pu1_pred_ptr; + *pu1_out = CLIP_U8(i_macro); + pu1_pred_ptr += pred_strd; + pu1_out += out_strd; + + i_macro = ((i_z0 - i_z7 + 32) >> 6) + *pu1_pred_ptr; + *pu1_out = CLIP_U8(i_macro); + + pi2_tmp_ptr++; + pu1_out_ptr++; + pu1_pred++; + } +} + +void ih264_iquant_itrans_recon_8x8_dc(WORD16 *pi2_src, + UWORD8 *pu1_pred, + UWORD8 *pu1_out, + WORD32 pred_strd, + WORD32 out_strd, + const UWORD16 *pu2_iscale_mat, + const UWORD16 *pu2_weigh_mat, + UWORD32 qp_div, + WORD16 *pi2_tmp, + WORD32 iq_start_idx, + WORD16 *pi2_dc_ld_addr) +{ + UWORD8 *pu1_pred_ptr = pu1_pred; + UWORD8 *pu1_out_ptr = pu1_out; + WORD16 x, i, i_macro; + WORD32 q; + WORD32 rnd_fact = (qp_div < 6) ? (1 << (5 - qp_div)) : 0; + UNUSED(pi2_tmp); + UNUSED(iq_start_idx); + UNUSED(pi2_dc_ld_addr); + /*************************************************************/ + /* Dequantization of coefficients. Will be replaced by SIMD */ + /* operations on platform. Note : DC coeff is not scaled */ + /*************************************************************/ + q = pi2_src[0]; + INV_QUANT(q, pu2_iscale_mat[0], pu2_weigh_mat[0], qp_div, rnd_fact, 6); + i_macro = (q + 32) >> 6; + /* Perform Inverse transform */ + /*--------------------------------------------------------------------*/ + /* IDCT [ Horizontal transformation ] */ + /*--------------------------------------------------------------------*/ + /*--------------------------------------------------------------------*/ + /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6 */ + /* */ + /* Add the prediction and store it back to reconstructed frame buffer */ + /* [Prediction buffer itself in this case] */ + /*--------------------------------------------------------------------*/ + for(i = 0; i < SUB_BLK_WIDTH_8x8; i++) + { + pu1_pred_ptr = pu1_pred; + pu1_out = pu1_out_ptr; + + x = i_macro + *pu1_pred_ptr; + *pu1_out = CLIP_U8(x); + /* Change uc_recBuffer to Point to next element in the same column*/ + pu1_pred_ptr += pred_strd; + pu1_out += out_strd; + + x = i_macro + *pu1_pred_ptr; + *pu1_out = CLIP_U8(x); + pu1_pred_ptr += pred_strd; + pu1_out += out_strd; + + x = i_macro + *pu1_pred_ptr; + *pu1_out = CLIP_U8(x); + pu1_pred_ptr += pred_strd; + pu1_out += out_strd; + + x = i_macro + *pu1_pred_ptr; + *pu1_out = CLIP_U8(x); + pu1_pred_ptr += pred_strd; + pu1_out += out_strd; + + x = i_macro + *pu1_pred_ptr; + *pu1_out = CLIP_U8(x); + pu1_pred_ptr += pred_strd; + pu1_out += out_strd; + + x = i_macro + *pu1_pred_ptr; + *pu1_out = CLIP_U8(x); + pu1_pred_ptr += pred_strd; + pu1_out += out_strd; + + x = i_macro + *pu1_pred_ptr; + *pu1_out = CLIP_U8(x); + pu1_pred_ptr += pred_strd; + pu1_out += out_strd; + + x = i_macro + *pu1_pred_ptr; + *pu1_out = CLIP_U8(x); + + pu1_out_ptr++; + pu1_pred++; + } +} + +/* + ******************************************************************************** + * + * @brief This function reconstructs a 4x4 sub block from quantized resiude and + * prediction buffer + * + * @par Description: + * The quantized residue is first inverse quantized, then inverse transformed. + * This inverse transformed content is added to the prediction buffer to recon- + * struct the end output + * + * @param[in] pi2_src + * quantized 4x4 block + * + * @param[in] pu1_pred + * prediction 4x4 block + * + * @param[out] pu1_out + * reconstructed 4x4 block + * + * @param[in] src_strd + * quantization buffer stride + * + * @param[in] pred_strd, + * Prediction buffer stride + * + * @param[in] out_strd + * recon buffer Stride + * + * @param[in] pu2_scaling_list + * pointer to scaling list + * + * @param[in] pu2_norm_adjust + * pointer to inverse scale matrix + * + * @param[in] u4_qp_div_6 + * Floor (qp/6) + * + * @param[in] pi4_tmp + * temporary buffer of size 1*16 + * + * @returns none + * + * @remarks none + * + ******************************************************************************* + */ +void ih264_iquant_itrans_recon_chroma_4x4(WORD16 *pi2_src, + UWORD8 *pu1_pred, + UWORD8 *pu1_out, + WORD32 pred_strd, + WORD32 out_strd, + const UWORD16 *pu2_iscal_mat, + const UWORD16 *pu2_weigh_mat, + UWORD32 u4_qp_div_6, + WORD16 *pi2_tmp, + WORD16 *pi2_dc_src) +{ + WORD16 *pi2_src_ptr = pi2_src; + WORD16 *pi2_tmp_ptr = pi2_tmp; + UWORD8 *pu1_pred_ptr = pu1_pred; + UWORD8 *pu1_out_ptr = pu1_out; + WORD16 x0, x1, x2, x3, i; + WORD32 q0, q1, q2, q3; + WORD16 i_macro; + WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0; + + /* inverse quant */ + /*horizontal inverse transform */ + for(i = 0; i < SUB_BLK_WIDTH_4x4; i++) + { + if(i==0) + { + q0 = pi2_dc_src[0]; + } + else + { + q0 = pi2_src_ptr[0]; + INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4); + } + + q2 = pi2_src_ptr[2]; + INV_QUANT(q2, pu2_iscal_mat[2], pu2_weigh_mat[2], u4_qp_div_6, rnd_fact, + 4); + + x0 = q0 + q2; + x1 = q0 - q2; + + q1 = pi2_src_ptr[1]; + INV_QUANT(q1, pu2_iscal_mat[1], pu2_weigh_mat[1], u4_qp_div_6, rnd_fact, + 4); + + q3 = pi2_src_ptr[3]; + INV_QUANT(q3, pu2_iscal_mat[3], pu2_weigh_mat[3], u4_qp_div_6, rnd_fact, + 4); + + x2 = (q1 >> 1) - q3; + x3 = q1 + (q3 >> 1); + + pi2_tmp_ptr[0] = x0 + x3; + pi2_tmp_ptr[1] = x1 + x2; + pi2_tmp_ptr[2] = x1 - x2; + pi2_tmp_ptr[3] = x0 - x3; + + pi2_src_ptr += SUB_BLK_WIDTH_4x4; + pi2_tmp_ptr += SUB_BLK_WIDTH_4x4; + pu2_iscal_mat += SUB_BLK_WIDTH_4x4; + pu2_weigh_mat += SUB_BLK_WIDTH_4x4; + } + + /* vertical inverse transform */ + pi2_tmp_ptr = pi2_tmp; + for(i = 0; i < SUB_BLK_WIDTH_4x4; i++) + { + pu1_pred_ptr = pu1_pred; + pu1_out = pu1_out_ptr; + + x0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[8]); + x1 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[8]); + x2 = (pi2_tmp_ptr[4] >> 1) - pi2_tmp_ptr[12]; + x3 = pi2_tmp_ptr[4] + (pi2_tmp_ptr[12] >> 1); + + /* inverse prediction */ + i_macro = x0 + x3; + i_macro = ((i_macro + 32) >> 6); + i_macro += *pu1_pred_ptr; + *pu1_out = CLIP_U8(i_macro); + pu1_pred_ptr += pred_strd; + pu1_out += out_strd; + + i_macro = x1 + x2; + i_macro = ((i_macro + 32) >> 6); + i_macro += *pu1_pred_ptr; + *pu1_out = CLIP_U8(i_macro); + pu1_pred_ptr += pred_strd; + pu1_out += out_strd; + + i_macro = x1 - x2; + i_macro = ((i_macro + 32) >> 6); + i_macro += *pu1_pred_ptr; + *pu1_out = CLIP_U8(i_macro); + pu1_pred_ptr += pred_strd; + pu1_out += out_strd; + + i_macro = x0 - x3; + i_macro = ((i_macro + 32) >> 6); + i_macro += *pu1_pred_ptr; + *pu1_out = CLIP_U8(i_macro); + + pi2_tmp_ptr++; + pu1_out_ptr+= 2; //Interleaved store for output + pu1_pred+= 2; //Interleaved load for pred buffer + } +} + +/* + ******************************************************************************** + * + * @brief This function reconstructs a 4x4 sub block from quantized resiude and + * prediction buffer if only dc value is present for residue + * + * @par Description: + * The quantized residue is first inverse quantized, + * This inverse quantized content is added to the prediction buffer to recon- + * struct the end output + * + * @param[in] pi2_src + * quantized dc coefficient + * + * @param[in] pu1_pred + * prediction 4x4 block in interleaved format + * + * @param[in] pred_strd, + * Prediction buffer stride in interleaved format + * + * @param[in] out_strd + * recon buffer Stride + * + * @returns none + * + * @remarks none + * + ******************************************************************************* + */ + +void ih264_iquant_itrans_recon_chroma_4x4_dc(WORD16 *pi2_src, + UWORD8 *pu1_pred, + UWORD8 *pu1_out, + WORD32 pred_strd, + WORD32 out_strd, + const UWORD16 *pu2_iscal_mat, + const UWORD16 *pu2_weigh_mat, + UWORD32 u4_qp_div_6, + WORD16 *pi2_tmp, + WORD16 *pi2_dc_src) +{ + UWORD8 *pu1_pred_ptr = pu1_pred; + UWORD8 *pu1_out_ptr = pu1_out; + WORD32 q0; + WORD16 x, i_macro, i; + UNUSED(pi2_src); + UNUSED(pu2_iscal_mat); + UNUSED(pu2_weigh_mat); + UNUSED(u4_qp_div_6); + UNUSED(pi2_tmp); + + q0 = pi2_dc_src[0]; // Restoring dc value for intra case3 + i_macro = ((q0 + 32) >> 6); + + for(i = 0; i < SUB_BLK_WIDTH_4x4; i++) + { + pu1_pred_ptr = pu1_pred; + pu1_out = pu1_out_ptr; + + /* inverse prediction */ + x = i_macro + *pu1_pred_ptr; + *pu1_out = CLIP_U8(x); + pu1_pred_ptr += pred_strd; + pu1_out += out_strd; + + x = i_macro + *pu1_pred_ptr; + *pu1_out = CLIP_U8(x); + pu1_pred_ptr += pred_strd; + pu1_out += out_strd; + + x = i_macro + *pu1_pred_ptr; + *pu1_out = CLIP_U8(x); + pu1_pred_ptr += pred_strd; + pu1_out += out_strd; + + x = i_macro + *pu1_pred_ptr; + *pu1_out = CLIP_U8(x); + + pu1_out_ptr+=2; + pu1_pred+=2; + } +} diff --git a/common/ih264_itrans_recon.h b/common/ih264_itrans_recon.h new file mode 100755 index 0000000..fd1f239 --- /dev/null +++ b/common/ih264_itrans_recon.h @@ -0,0 +1,71 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ih264_itrans_recon.h +* +* @brief +* Contains function declarations for inverse transform and reconstruction of +* the quantized macro blocks +* +* @author +* Ittiam +* +* @par List of Functions: +* - ih264_itrans_recon_ft +* - ih264_itrans_recon_4x4 +* - ih264_itrans_recon_8x8 +* - ih264_itrans_recon_4x4_a9 +* +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef IH264_ITRANS_RECON_H_ +#define IH264_ITRANS_RECON_H_ + +/*****************************************************************************/ +/* Extern Function Declarations */ +/*****************************************************************************/ + +typedef void ih264_itrans_recon_ft(WORD16 *pi2_src, + UWORD8 *pu1_pred, + UWORD8 *pu1_recon, + WORD32 src_strd, + WORD32 pred_strd, + WORD32 dst_strd, + UWORD32 q_lev, + WORD32 *pi4_tmp); + +/*C declarations*/ + +ih264_itrans_recon_ft ih264_itrans_recon_4x4; + +ih264_itrans_recon_ft ih264_itrans_recon_8x8; + +/*A9 declarations */ + +ih264_itrans_recon_ft ih264_itrans_recon_4x4_a9; + +#endif /* IH264_ITRANS_RECON_H_ */ diff --git a/common/ih264_list.c b/common/ih264_list.c new file mode 100755 index 0000000..736b41c --- /dev/null +++ b/common/ih264_list.c @@ -0,0 +1,574 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ih264_list.c +* +* @brief +* Contains functions for buf queue +* +* @author +* Harish +* +* @par List of Functions: +* ih264_list_size() +* ih264_list_lock() +* ih264_list_unlock() +* ih264_list_yield() +* ih264_list_free() +* ih264_list_init() +* ih264_list_reset() +* ih264_list_deinit() +* ih264_list_terminate() +* ih264_list_queue() +* ih264_list_dequeue() +* +* @remarks +* None +* +******************************************************************************* +*/ +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> + +#include "ih264_typedefs.h" +#include "ithread.h" +#include "ih264_platform_macros.h" +#include "ih264_macros.h" +#include "ih264_debug.h" +#include "ih264_error.h" +#include "ih264_list.h" + +/** +******************************************************************************* +* +* @brief Returns size for buf queue context. Does not include buf queue buffer +* requirements +* +* @par Description +* Returns size for buf queue context. Does not include buf queue buffer +* requirements. Buffer size required to store the bufs should be allocated in +* addition to the value returned here. +* +* @returns Size of the buf queue context +* +* @remarks +* +******************************************************************************* +*/ +WORD32 ih264_list_size(WORD32 num_entries, WORD32 entry_size) +{ + WORD32 size; + WORD32 clz; + size = sizeof(list_t); + size += ithread_get_mutex_lock_size(); + + /* Use next power of two number of entries*/ + clz = CLZ(num_entries); + num_entries = 1 << (32 - clz); + + size += num_entries * entry_size; + return size; +} + +/** +******************************************************************************* +* +* @brief +* Locks the list context +* +* @par Description +* Locks the list context by calling ithread_mutex_lock() +* +* @param[in] ps_list +* Job Queue context +* +* @returns IH264_FAIL if mutex lock fails else IH264_SUCCESS +* +* @remarks +* +******************************************************************************* +*/ +IH264_ERROR_T ih264_list_lock(list_t *ps_list) +{ + WORD32 retval; + retval = ithread_mutex_lock(ps_list->pv_mutex); + if(retval) + { + return IH264_FAIL; + } + return IH264_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Unlocks the list context +* +* @par Description +* Unlocks the list context by calling ithread_mutex_unlock() +* +* @param[in] ps_list +* Job Queue context +* +* @returns IH264_FAIL if mutex unlock fails else IH264_SUCCESS +* +* @remarks +* +******************************************************************************* +*/ + +IH264_ERROR_T ih264_list_unlock(list_t *ps_list) +{ + WORD32 retval; + retval = ithread_mutex_unlock(ps_list->pv_mutex); + if(retval) + { + return IH264_FAIL; + } + return IH264_SUCCESS; + +} +/** +******************************************************************************* +* +* @brief +* Yields the thread +* +* @par Description +* Unlocks the list context by calling +* ih264_list_unlock(), ithread_yield() and then ih264_list_lock() +* list is unlocked before to ensure the list can be accessed by other threads +* If unlock is not done before calling yield then no other thread can access +* the list functions and update list. +* +* @param[in] ps_list +* Job Queue context +* +* @returns IH264_FAIL if mutex lock unlock or yield fails else IH264_SUCCESS +* +* @remarks +* +******************************************************************************* +*/ +IH264_ERROR_T ih264_list_yield(list_t *ps_list) +{ + + IH264_ERROR_T ret = IH264_SUCCESS; + + IH264_ERROR_T rettmp; + rettmp = ih264_list_unlock(ps_list); + RETURN_IF((rettmp != IH264_SUCCESS), rettmp); + + ithread_yield(); + + if(ps_list->i4_yeild_interval_us > 0) + ithread_usleep(ps_list->i4_yeild_interval_us); + + rettmp = ih264_list_lock(ps_list); + RETURN_IF((rettmp != IH264_SUCCESS), rettmp); + return ret; +} + + +/** +******************************************************************************* +* +* @brief free the buf queue pointers +* +* @par Description +* Frees the list context +* +* @param[in] pv_buf +* Memory for buf queue buffer and buf queue context +* +* @returns Pointer to buf queue context +* +* @remarks +* Since it will be called only once by master thread this is not thread safe. +* +******************************************************************************* +*/ +IH264_ERROR_T ih264_list_free(list_t *ps_list) +{ + WORD32 ret; + ret = ithread_mutex_destroy(ps_list->pv_mutex); + + if(0 == ret) + return IH264_SUCCESS; + else + return IH264_FAIL; +} + +/** +******************************************************************************* +* +* @brief Initialize the buf queue +* +* @par Description +* Initializes the list context and sets write and read pointers to start of +* buf queue buffer +* +* @param[in] pv_buf +* Memoy for buf queue buffer and buf queue context +* +* @param[in] buf_size +* Size of the total memory allocated +* +* @returns Pointer to buf queue context +* +* @remarks +* Since it will be called only once by master thread this is not thread safe. +* +******************************************************************************* +*/ +void* ih264_list_init(void *pv_buf, + WORD32 buf_size, + WORD32 num_entries, + WORD32 entry_size, + WORD32 yeild_interval_us) +{ + list_t *ps_list; + UWORD8 *pu1_buf; + + pu1_buf = (UWORD8 *)pv_buf; + + ps_list = (list_t *)pu1_buf; + pu1_buf += sizeof(list_t); + buf_size -= sizeof(list_t); + + ps_list->pv_mutex = pu1_buf; + pu1_buf += ithread_get_mutex_lock_size(); + buf_size -= ithread_get_mutex_lock_size(); + + if (buf_size <= 0) + return NULL; + + ithread_mutex_init(ps_list->pv_mutex); + + /* Ensure num_entries is power of two */ + ASSERT(0 == (num_entries & (num_entries - 1))); + + /* Ensure remaining buffer is large enough to hold given number of entries */ + ASSERT((num_entries * entry_size) <= buf_size); + + ps_list->pv_buf_base = pu1_buf; + ps_list->i4_terminate = 0; + ps_list->i4_entry_size = entry_size; + ps_list->i4_buf_rd_idx = 0; + ps_list->i4_buf_wr_idx = 0; + ps_list->i4_log2_buf_max_idx = 32 - CLZ(num_entries); + ps_list->i4_buf_max_idx = num_entries; + ps_list->i4_yeild_interval_us = yeild_interval_us; + + return ps_list; +} +/** +******************************************************************************* +* +* @brief +* Resets the list context +* +* @par Description +* Resets the list context by initializing buf queue context elements +* +* @param[in] ps_list +* Job Queue context +* +* @returns IH264_FAIL if lock unlock fails else IH264_SUCCESS +* +* @remarks +* +******************************************************************************* +*/ +IH264_ERROR_T ih264_list_reset(list_t *ps_list) +{ + IH264_ERROR_T ret = IH264_SUCCESS; + ret = ih264_list_lock(ps_list); + RETURN_IF((ret != IH264_SUCCESS), ret); + + ps_list->i4_terminate = 0; + ps_list->i4_buf_rd_idx = 0; + ps_list->i4_buf_wr_idx = 0; + + ret = ih264_list_unlock(ps_list); + RETURN_IF((ret != IH264_SUCCESS), ret); + + return ret; +} + +/** +******************************************************************************* +* +* @brief +* Deinitializes the list context +* +* @par Description +* Deinitializes the list context by calling ih264_list_reset() +* and then destrying the mutex created +* +* @param[in] ps_list +* Job Queue context +* +* @returns IH264_FAIL if lock unlock fails else IH264_SUCCESS +* +* @remarks +* +******************************************************************************* +*/ +IH264_ERROR_T ih264_list_deinit(list_t *ps_list) +{ + WORD32 retval; + IH264_ERROR_T ret = IH264_SUCCESS; + + ret = ih264_list_reset(ps_list); + RETURN_IF((ret != IH264_SUCCESS), ret); + + retval = ithread_mutex_destroy(ps_list->pv_mutex); + if(retval) + { + return IH264_FAIL; + } + + return IH264_SUCCESS; +} + + +/** +******************************************************************************* +* +* @brief +* Terminates the list +* +* @par Description +* Terminates the list by setting a flag in context. +* +* @param[in] ps_list +* Job Queue context +* +* @returns IH264_FAIL if lock unlock fails else IH264_SUCCESS +* +* @remarks +* +******************************************************************************* +*/ + +IH264_ERROR_T ih264_list_terminate(list_t *ps_list) +{ + IH264_ERROR_T ret = IH264_SUCCESS; + ret = ih264_list_lock(ps_list); + RETURN_IF((ret != IH264_SUCCESS), ret); + + ps_list->i4_terminate = 1; + + ret = ih264_list_unlock(ps_list); + RETURN_IF((ret != IH264_SUCCESS), ret); + return ret; +} + + +/** +******************************************************************************* +* +* @brief Adds a buf to the queue +* +* @par Description +* Adds a buf to the queue and updates wr address to next location. +* Format/content of the buf structure is abstracted and hence size of the buf +* buffer is being passed. +* +* @param[in] ps_list +* Job Queue context +* +* @param[in] pv_buf +* Pointer to the location that contains details of the buf to be added +* +* @param[in] buf_size +* Size of the buf buffer +* +* @param[in] blocking +* To signal if the write is blocking or non-blocking. +* +* @returns +* +* @remarks +* Job Queue buffer is assumed to be allocated to handle worst case number of bufs +* Wrap around is not supported +* +******************************************************************************* +*/ +IH264_ERROR_T ih264_list_queue(list_t *ps_list, void *pv_buf, WORD32 blocking) +{ + IH264_ERROR_T ret = IH264_SUCCESS; + IH264_ERROR_T rettmp; + + WORD32 diff; + void *pv_buf_wr; + + volatile WORD32 *pi4_wr_idx, *pi4_rd_idx; + WORD32 buf_size = ps_list->i4_entry_size; + + + rettmp = ih264_list_lock(ps_list); + RETURN_IF((rettmp != IH264_SUCCESS), rettmp); + + + + while(1) + { + /* Ensure wr idx does not go beyond rd idx by more than number of entries + */ + pi4_wr_idx = &ps_list->i4_buf_wr_idx; + pi4_rd_idx = &ps_list->i4_buf_rd_idx; + diff = *pi4_wr_idx - *pi4_rd_idx; + + if(diff < ps_list->i4_buf_max_idx) + { + WORD32 wr_idx; + wr_idx = ps_list->i4_buf_wr_idx & (ps_list->i4_buf_max_idx - 1); + pv_buf_wr = (UWORD8 *)ps_list->pv_buf_base + wr_idx * buf_size; + + memcpy(pv_buf_wr, pv_buf, buf_size); + ps_list->i4_buf_wr_idx++; + break; + } + else + { + /* wr is ahead, so wait for rd to consume */ + if(blocking) + { + ih264_list_yield(ps_list); + } + else + { + ret = IH264_FAIL; + break; + } + } + + } + ps_list->i4_terminate = 0; + + rettmp = ih264_list_unlock(ps_list); + RETURN_IF((rettmp != IH264_SUCCESS), rettmp); + + return ret; +} +/** +******************************************************************************* +* +* @brief Gets next from the Job queue +* +* @par Description +* Gets next buf from the buf queue and updates rd address to next location. +* Format/content of the buf structure is abstracted and hence size of the buf +* buffer is being passed. If it is a blocking call and if there is no new buf +* then this functions unlocks the mutex and calls yield and then locks it back. +* and continues till a buf is available or terminate is set +* +* @param[in] ps_list +* Job Queue context +* +* @param[out] pv_buf +* Pointer to the location that contains details of the buf to be written +* +* @param[in] buf_size +* Size of the buf buffer +* +* @param[in] blocking +* To signal if the read is blocking or non-blocking. +* +* @returns +* +* @remarks +* Job Queue buffer is assumed to be allocated to handle worst case number of bufs +* Wrap around is not supported +* +******************************************************************************* +*/ +IH264_ERROR_T ih264_list_dequeue(list_t *ps_list, void *pv_buf, WORD32 blocking) +{ + IH264_ERROR_T ret = IH264_SUCCESS; + IH264_ERROR_T rettmp; + WORD32 buf_size = ps_list->i4_entry_size; + WORD32 diff; + + void *pv_buf_rd; + volatile WORD32 *pi4_wr_idx, *pi4_rd_idx; + + rettmp = ih264_list_lock(ps_list); + RETURN_IF((rettmp != IH264_SUCCESS), rettmp); + + while(1) + { + /* Ensure wr idx is ahead of rd idx and + * wr idx does not go beyond rd idx by more than number of entries + */ + pi4_wr_idx = &ps_list->i4_buf_wr_idx; + pi4_rd_idx = &ps_list->i4_buf_rd_idx; + diff = *pi4_wr_idx - *pi4_rd_idx; + + + if(diff > 0) + { + WORD32 rd_idx; + rd_idx = ps_list->i4_buf_rd_idx & (ps_list->i4_buf_max_idx - 1); + pv_buf_rd = (UWORD8 *)ps_list->pv_buf_base + rd_idx * buf_size; + + memcpy(pv_buf, pv_buf_rd, buf_size); + ps_list->i4_buf_rd_idx++; + break; + } + else + { + /* If terminate is signaled then break */ + if(ps_list->i4_terminate) + { + ret = IH264_FAIL; + break; + } + /* wr is ahead, so wait for rd to consume */ + if(blocking) + { + ih264_list_yield(ps_list); + } + else + { + ret = IH264_FAIL; + break; + } + } + + } + + + rettmp = ih264_list_unlock(ps_list); + RETURN_IF((rettmp != IH264_SUCCESS), rettmp); + + return ret; +} diff --git a/common/ih264_list.h b/common/ih264_list.h new file mode 100755 index 0000000..fc59d95 --- /dev/null +++ b/common/ih264_list.h @@ -0,0 +1,93 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ih264_list.h +* +* @brief +* Contains functions for buf queue +* +* @author +* Harish +* +* @par List of Functions: +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef _IH264_LIST_H_ +#define _IH264_LIST_H_ + +typedef struct +{ + /** Pointer to buffer base which contains the bufs */ + void *pv_buf_base; + + /** Mutex used to keep the functions thread-safe */ + void *pv_mutex; + + /** Current write index */ + volatile WORD32 i4_buf_wr_idx; + + /** Current read index */ + volatile WORD32 i4_buf_rd_idx; + + /** Maximum index */ + WORD32 i4_buf_max_idx; + + /** Log2(buf_max_idx) - + * To ensure number of entries is power of two + * This makes it easier to wrap around by using AND with buf_max_idx - 1 + * */ + WORD32 i4_log2_buf_max_idx; + + /** Flag to indicate list has to be terminated */ + WORD32 i4_terminate; + + /** Size of each entry */ + WORD32 i4_entry_size; + + /** If the list is to be used frequently send this as zero, else send a large value + * to ensure cores are not loaded unnecessarily. + * For eg: For picture level queues this can be a large value like 100us + * but for jobq this will be zero. + */ + WORD32 i4_yeild_interval_us; + +}list_t; + +WORD32 ih264_list_size(WORD32 num_entries, WORD32 entry_size); +void* ih264_list_init(void *pv_buf, + WORD32 buf_size, + WORD32 num_entries, + WORD32 entry_size, + WORD32 yeild_interval_us); +IH264_ERROR_T ih264_list_free(list_t *ps_list); +IH264_ERROR_T ih264_list_reset(list_t *ps_list); +IH264_ERROR_T ih264_list_deinit(list_t *ps_list); +IH264_ERROR_T ih264_list_terminate(list_t *ps_list); +IH264_ERROR_T ih264_list_queue(list_t *ps_list, void *pv_buf, WORD32 blocking); +IH264_ERROR_T ih264_list_dequeue(list_t *ps_list, void *pv_buf, WORD32 blocking); + +#endif /* _IH264_PROCESS_SLICE_H_ */ diff --git a/common/ih264_luma_intra_pred_filters.c b/common/ih264_luma_intra_pred_filters.c new file mode 100755 index 0000000..4a5b143 --- /dev/null +++ b/common/ih264_luma_intra_pred_filters.c @@ -0,0 +1,1933 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** + ******************************************************************************* + * @file + * ih264_luma_intra_pred_filters.c + * + * @brief + * Contains function definitions for intra prediction filters + * + * @author + * Ittiam + * + * @par List of Functions: + * - ih264_intra_pred_luma_4x4_mode_vert + * - ih264_intra_pred_luma_4x4_mode_horz + * - ih264_intra_pred_luma_4x4_mode_dc + * - ih264_intra_pred_luma_4x4_mode_diag_dl + * - ih264_intra_pred_luma_4x4_mode_diag_dr + * - ih264_intra_pred_luma_4x4_mode_vert_r + * - ih264_intra_pred_luma_4x4_mode_horz_d + * - ih264_intra_pred_luma_4x4_mode_vert_l + * - ih264_intra_pred_luma_4x4_mode_horz_u + * - ih264_intra_pred_luma_8x8_mode_ref_filtering + * - ih264_intra_pred_luma_8x8_mode_vert + * - ih264_intra_pred_luma_8x8_mode_horz + * - ih264_intra_pred_luma_8x8_mode_dc + * - ih264_intra_pred_luma_8x8_mode_diag_dl + * - ih264_intra_pred_luma_8x8_mode_diag_dr + * - ih264_intra_pred_luma_8x8_mode_vert_r + * - ih264_intra_pred_luma_8x8_mode_horz_d + * - ih264_intra_pred_luma_8x8_mode_vert_l + * - ih264_intra_pred_luma_8x8_mode_horz_u + * - ih264_intra_pred_luma_16x16_mode_vert + * - ih264_intra_pred_luma_16x16_mode_horz + * - ih264_intra_pred_luma_16x16_mode_dc + * - ih264_intra_pred_luma_16x16_mode_plane + * + * + * @remarks + * None + * + ****************************************************************************** + */ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ +/* System include files */ +#include <stdio.h> +#include <stddef.h> +#include <string.h> + +/* User include files */ +#include "ih264_defs.h" +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264_intra_pred_filters.h" + +/* Global variables used only in assembly files*/ +const WORD8 ih264_gai1_intrapred_luma_plane_coeffs[] = +{ 0x01, 0x02, 0x03, 0x04, + 0x05, 0x06, 0x07, 0x08, + 0x09, 0x0A, 0x0B, 0x0C, + 0x0D, 0x0E, 0x0F, 0x10, }; + +const WORD8 ih264_gai1_intrapred_luma_8x8_horz_u[] = +{ 0x06,0x15,0x05,0x14, + 0x04,0x13,0x03,0x12, + 0x02,0x11,0x01,0x10, + 0x00,0x1F,0x0F,0x0F +}; + +/******************* LUMA INTRAPREDICTION *******************/ + +/******************* 4x4 Modes *******************/ + +/** + ******************************************************************************* + * + *ih264_intra_pred_luma_4x4_mode_vert + * + * @brief + * Perform Intra prediction for luma_4x4 mode:vertical + * + * @par Description: + * Perform Intra prediction for luma_4x4 mode:vertical ,described in sec 8.3.1.2.1 + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ngbr_avail + * availability of neighbouring pixels(Not used in this function) + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +void ih264_intra_pred_luma_4x4_mode_vert(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */ + UNUSED(src_strd); + UNUSED(ngbr_avail); + pu1_top = pu1_src + BLK_SIZE + 1; + + memcpy(pu1_dst, pu1_top, 4); + memcpy(pu1_dst + dst_strd, pu1_top, 4); + memcpy(pu1_dst + 2 * dst_strd, pu1_top, 4); + memcpy(pu1_dst + 3 * dst_strd, pu1_top, 4); +} + +/** + ******************************************************************************* + * + *ih264_intra_pred_luma_4x4_mode_horz + * + * @brief + * Perform Intra prediction for luma_4x4 mode:horizontal + * + * @par Description: + * Perform Intra prediction for luma_4x4 mode:horizontal ,described in sec 8.3.1.2.2 + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ngbr_avail + * availability of neighbouring pixels(Not used in this function) + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +void ih264_intra_pred_luma_4x4_mode_horz(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */ + + UNUSED(src_strd); + UNUSED(ngbr_avail); + pu1_left = pu1_src + BLK_SIZE - 1; + + memset(pu1_dst, *pu1_left, 4); + memset(pu1_dst + dst_strd, *(pu1_left - 1), 4); + memset(pu1_dst + 2 * dst_strd, *(pu1_left - 2), 4); + memset(pu1_dst + 3 * dst_strd, *(pu1_left - 3), 4); +} + +/** + ******************************************************************************* + * + *ih264_intra_pred_luma_4x4_mode_dc + * + * @brief + * Perform Intra prediction for luma_4x4 mode:DC + * + * @par Description: + * Perform Intra prediction for luma_4x4 mode:DC ,described in sec 8.3.1.2.3 + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ngbr_avail + * availability of neighbouring pixels + * + * @returns + * + * @remarks + * None + * + *******************************************************************************/ +void ih264_intra_pred_luma_4x4_mode_dc(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + UWORD8 u1_useleft; /* availability of left predictors (only for DC) */ + UWORD8 u1_usetop; /* availability of top predictors (only for DC) */ + UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */ + UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */ + WORD32 val = 0; + UNUSED(src_strd); + UNUSED(ngbr_avail); + u1_useleft = BOOLEAN(ngbr_avail & LEFT_MB_AVAILABLE_MASK); + u1_usetop = BOOLEAN(ngbr_avail & TOP_MB_AVAILABLE_MASK); + pu1_top = pu1_src + BLK_SIZE + 1; + pu1_left = pu1_src + BLK_SIZE - 1; + + if(u1_useleft) + { + val += *pu1_left--; + val += *pu1_left--; + val += *pu1_left--; + val += *pu1_left + 2; + } + if(u1_usetop) + { + val += *pu1_top + *(pu1_top + 1) + *(pu1_top + 2) + *(pu1_top + 3) + + 2; + } + /* Since 2 is added if either left/top pred is there, + val still being zero implies both preds are not there */ + val = (val) ? (val >> (1 + u1_useleft + u1_usetop)) : 128; + + /* 4 bytes are copied from src to dst */ + memset(pu1_dst, val, 4); + memset(pu1_dst + dst_strd, val, 4); + memset(pu1_dst + 2 * dst_strd, val, 4); + memset(pu1_dst + 3 * dst_strd, val, 4); +} + +/** + ******************************************************************************* + * + *ih264_intra_pred_luma_4x4_mode_diag_dl + * + * @brief + * Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Left + * + * @par Description: + * Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Left ,described in sec 8.3.1.2.4 + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ngbr_avail + * availability of neighbouring pixels(Not used in this function) + * + * @returns + * + * @remarks + * None + * + *******************************************************************************/ +void ih264_intra_pred_luma_4x4_mode_diag_dl(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */ + UWORD32 ui4_a, ui4_b, ui4_c, ui4_d, ui4_e, ui4_f, ui4_g, ui4_h; + UWORD8 predicted_pixels[7]; + UNUSED(src_strd); + UNUSED(ngbr_avail); + pu1_top = pu1_src +BLK_SIZE + 1; + + ui4_a = *pu1_top++; + ui4_b = *pu1_top++; + ui4_c = *pu1_top++; + ui4_d = *pu1_top++; + ui4_e = *pu1_top++; + ui4_f = *pu1_top++; + ui4_g = *pu1_top++; + ui4_h = *pu1_top; + + predicted_pixels[0] = FILT121(ui4_a, ui4_b, ui4_c); + predicted_pixels[1] = FILT121(ui4_b, ui4_c, ui4_d); + predicted_pixels[2] = FILT121(ui4_c, ui4_d, ui4_e); + predicted_pixels[3] = FILT121(ui4_d, ui4_e, ui4_f); + predicted_pixels[4] = FILT121(ui4_e, ui4_f, ui4_g); + predicted_pixels[5] = FILT121(ui4_f, ui4_g, ui4_h); + predicted_pixels[6] = FILT121(ui4_g, ui4_h, ui4_h); + + memcpy(pu1_dst, predicted_pixels, 4); + memcpy(pu1_dst + dst_strd, predicted_pixels + 1, 4); + memcpy(pu1_dst + 2 * dst_strd, predicted_pixels + 2, 4); + memcpy(pu1_dst + 3 * dst_strd, predicted_pixels + 3, 4); +} + +/** + ******************************************************************************* + * + *ih264_intra_pred_luma_4x4_mode_diag_dr + * + * @brief + * Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Right + * + * @par Description: + * Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Right ,described in sec 8.3.1.2.5 + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ngbr_avail + * availability of neighbouring pixels(Not used in this function) + * + * @returns + * + * @remarks + * None + * + *******************************************************************************/ +void ih264_intra_pred_luma_4x4_mode_diag_dr(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */ + UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */ + UWORD8 *pu1_topleft = NULL;/* Pointer to top left predictor */ + UWORD32 ui4_a, ui4_b, ui4_c, ui4_d, ui4_i, ui4_j, ui4_k, ui4_l, ui4_m; + UWORD8 predicted_pixels[7]; + UNUSED(src_strd); + UNUSED(ngbr_avail); + pu1_top = pu1_src + BLK_SIZE + 1; + pu1_left = pu1_src + BLK_SIZE - 1; + pu1_topleft = pu1_src +BLK_SIZE; + + ui4_a = *pu1_top++; + ui4_b = *pu1_top++; + ui4_c = *pu1_top++; + ui4_d = *pu1_top++; + ui4_i = *pu1_left--; + ui4_j = *pu1_left--; + ui4_k = *pu1_left--; + ui4_l = *pu1_left; + ui4_m = *pu1_topleft; + + predicted_pixels[2] = FILT121(ui4_j, ui4_i, ui4_m); + predicted_pixels[1] = FILT121(ui4_k, ui4_j, ui4_i); + predicted_pixels[0] = FILT121(ui4_l, ui4_k, ui4_j); + predicted_pixels[3] = FILT121(ui4_i, ui4_m, ui4_a); + predicted_pixels[4] = FILT121(ui4_m, ui4_a, ui4_b); + predicted_pixels[5] = FILT121(ui4_a, ui4_b, ui4_c); + predicted_pixels[6] = FILT121(ui4_b, ui4_c, ui4_d); + + memcpy(pu1_dst, predicted_pixels + 3, 4); + memcpy(pu1_dst + dst_strd, predicted_pixels + 2, 4); + memcpy(pu1_dst + 2 * dst_strd, predicted_pixels + 1, 4); + memcpy(pu1_dst + 3 * dst_strd, predicted_pixels, 4); +} + +/** + ******************************************************************************* + * + *ih264_intra_pred_luma_4x4_mode_vert_r + * + * @brief + * Perform Intra prediction for luma_4x4 mode:Vertical_Right + * + * @par Description: + * Perform Intra prediction for luma_4x4 mode:Vertical_Right ,described in sec 8.3.1.2.6 + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ngbr_avail + * availability of neighbouring pixels(Not used in this function) + * + * @returns + * + * @remarks + * None + * + *******************************************************************************/ +void ih264_intra_pred_luma_4x4_mode_vert_r(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + + UWORD32 ui4_a, ui4_b, ui4_c, ui4_d, ui4_i, ui4_j, ui4_k, ui4_m; + UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */ + UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */ + UWORD8 *pu1_topleft = NULL;/* Pointer to top left predictor */ + UWORD8 predicted_pixels[10]; + UNUSED(src_strd); + UNUSED(ngbr_avail); + pu1_top = pu1_src +BLK_SIZE + 1; + pu1_left = pu1_src + BLK_SIZE - 1; + pu1_topleft = pu1_src + BLK_SIZE; + + ui4_a = *pu1_top++; + ui4_b = *pu1_top++; + ui4_c = *pu1_top++; + ui4_d = *pu1_top++; + ui4_i = *pu1_left--; + ui4_j = *pu1_left--; + ui4_k = *pu1_left; + ui4_m = *pu1_topleft; + + predicted_pixels[6] = FILT11(ui4_m, ui4_a); + predicted_pixels[7] = FILT11(ui4_a, ui4_b); + predicted_pixels[8] = FILT11(ui4_b, ui4_c); + predicted_pixels[9] = FILT11(ui4_c, ui4_d); + predicted_pixels[1] = FILT121(ui4_i, ui4_m, ui4_a); + predicted_pixels[2] = FILT121(ui4_m, ui4_a, ui4_b); + predicted_pixels[3] = FILT121(ui4_a, ui4_b, ui4_c); + predicted_pixels[4] = FILT121(ui4_b, ui4_c, ui4_d); + predicted_pixels[5] = FILT121(ui4_j, ui4_i, ui4_m); + predicted_pixels[0] = FILT121(ui4_k, ui4_j, ui4_i); + + memcpy(pu1_dst, predicted_pixels + 6, 4); + memcpy(pu1_dst + dst_strd, predicted_pixels + 1, 4); + memcpy(pu1_dst + 2 * dst_strd, predicted_pixels + 5, 4); + memcpy(pu1_dst + 3 * dst_strd, predicted_pixels, 4); +} + +/* + ******************************************************************************* + * + *ih264_intra_pred_luma_4x4_mode_horz_d + * + * @brief + * Perform Intra prediction for luma_4x4 mode:Horizontal_Down + * + * @par Description: + * Perform Intra prediction for luma_4x4 mode:Horizontal_Down ,described in sec 8.3.1.2.7 + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ngbr_avail + * availability of neighbouring pixels(Not used in this function) + * + * @returns + * + * @remarks + * None + * + *******************************************************************************/ +void ih264_intra_pred_luma_4x4_mode_horz_d(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */ + UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */ + UWORD8 *pu1_topleft = NULL;/* Pointer to top left predictor */ + UWORD32 ui4_a, ui4_b, ui4_c, ui4_i, ui4_j, ui4_k, ui4_l, ui4_m; + UWORD8 predicted_pixels[10]; + UNUSED(src_strd); + UNUSED(ngbr_avail); + pu1_top = pu1_src + BLK_SIZE + 1; + pu1_left = pu1_src + BLK_SIZE - 1; + pu1_topleft = pu1_src + BLK_SIZE; + + ui4_a = *pu1_top++; + ui4_b = *pu1_top++; + ui4_c = *pu1_top++; + ui4_i = *pu1_left--; + ui4_j = *pu1_left--; + ui4_k = *pu1_left--; + ui4_l = *pu1_left--; + ui4_m = *pu1_topleft; + + predicted_pixels[6] = FILT11(ui4_i, ui4_m); + predicted_pixels[7] = FILT121(ui4_i, ui4_m, ui4_a); + predicted_pixels[8] = FILT121(ui4_m, ui4_a, ui4_b); + predicted_pixels[9] = FILT121(ui4_a, ui4_b, ui4_c); + predicted_pixels[1] = FILT121(ui4_l, ui4_k, ui4_j); + predicted_pixels[2] = FILT11(ui4_k, ui4_j); + predicted_pixels[3] = FILT121(ui4_k, ui4_j, ui4_i); + predicted_pixels[4] = FILT11(ui4_j, ui4_i); + predicted_pixels[5] = FILT121(ui4_j, ui4_i, ui4_m); + predicted_pixels[0] = FILT11(ui4_l, ui4_k); + + memcpy(pu1_dst, predicted_pixels + 6, 4); + memcpy(pu1_dst + dst_strd, predicted_pixels + 4, 4); + memcpy(pu1_dst + 2 * dst_strd, predicted_pixels + 2, 4); + memcpy(pu1_dst + 3 * dst_strd, predicted_pixels, 4); +} + +/** + ******************************************************************************* + * + *ih264_intra_pred_luma_4x4_mode_vert_l + * + * @brief + * Perform Intra prediction for luma_4x4 mode:Vertical_Left + * + * @par Description: + * Perform Intra prediction for luma_4x4 mode:Vertical_Left ,described in sec 8.3.1.2.8 + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ngbr_avail + * availability of neighbouring pixels(Not used in this function) + * + * @returns + * + * @remarks + * None + * + *******************************************************************************/ +void ih264_intra_pred_luma_4x4_mode_vert_l(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */ + UWORD32 ui4_a, ui4_b, ui4_c, ui4_d, ui4_e, ui4_f, ui4_g; + UWORD8 predicted_pixels[10]; + UNUSED(src_strd); + UNUSED(ngbr_avail); + pu1_top = pu1_src + BLK_SIZE + 1; + + ui4_a = *pu1_top++; + ui4_b = *pu1_top++; + ui4_c = *pu1_top++; + ui4_d = *pu1_top++; + ui4_e = *pu1_top++; + ui4_f = *pu1_top++; + ui4_g = *pu1_top; + + predicted_pixels[5] = FILT11(ui4_a, ui4_b); + predicted_pixels[6] = FILT11(ui4_b, ui4_c); + predicted_pixels[7] = FILT11(ui4_c, ui4_d); + predicted_pixels[8] = FILT11(ui4_d, ui4_e); + predicted_pixels[0] = FILT121(ui4_a, ui4_b, ui4_c); + predicted_pixels[1] = FILT121(ui4_b, ui4_c, ui4_d); + predicted_pixels[2] = FILT121(ui4_c, ui4_d, ui4_e); + predicted_pixels[3] = FILT121(ui4_d, ui4_e, ui4_f); + predicted_pixels[9] = FILT11(ui4_e, ui4_f); + predicted_pixels[4] = FILT121(ui4_e, ui4_f, ui4_g); + + memcpy(pu1_dst, predicted_pixels + 5, 4); + memcpy(pu1_dst + dst_strd, predicted_pixels, 4); + memcpy(pu1_dst + 2 * dst_strd, predicted_pixels + 6, 4); + memcpy(pu1_dst + 3 * dst_strd, predicted_pixels + 1, 4); +} + +/** + ******************************************************************************* + * + *ih264_intra_pred_luma_4x4_mode_horz_u + * + * @brief + * Perform Intra prediction for luma_4x4 mode:Horizontal_Up + * + * @par Description: + * Perform Intra prediction for luma_4x4 mode:Horizontal_Up ,described in sec 8.3.1.2.9 + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ngbr_avail + * availability of neighbouring pixels(Not used in this function) + * + * @returns + * + * @remarks + * None + * + *******************************************************************************/ +void ih264_intra_pred_luma_4x4_mode_horz_u(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */ + UWORD32 ui4_i, ui4_j, ui4_k, ui4_l; + UWORD8 predicted_pixels[10]; + UNUSED(src_strd); + UNUSED(ngbr_avail); + pu1_left = pu1_src + BLK_SIZE - 1; + + ui4_i = *pu1_left--; + ui4_j = *pu1_left--; + ui4_k = *pu1_left--; + ui4_l = *pu1_left--; + + predicted_pixels[0] = FILT11(ui4_j, ui4_i); + predicted_pixels[1] = FILT121(ui4_k, ui4_j, ui4_i); + predicted_pixels[2] = FILT11(ui4_k, ui4_j); + predicted_pixels[3] = FILT121(ui4_l, ui4_k, ui4_j); + predicted_pixels[4] = FILT11(ui4_l, ui4_k); + predicted_pixels[5] = FILT121(ui4_l, ui4_l, ui4_k); + predicted_pixels[6] = ui4_l; + predicted_pixels[7] = ui4_l; + predicted_pixels[8] = ui4_l; + predicted_pixels[9] = ui4_l; + + memcpy(pu1_dst, predicted_pixels, 4); + memcpy(pu1_dst + dst_strd, predicted_pixels + 2, 4); + memcpy(pu1_dst + 2 * dst_strd, predicted_pixels + 4, 4); + memcpy(pu1_dst + 3 * dst_strd, predicted_pixels + 6, 4); +} + +/******************* 8x8 Modes *******************/ + +/** + ******************************************************************************* + * + *ih264_intra_pred_luma_8x8_mode_ref_filtering + * + * @brief + * Reference sample filtering process for Intra_8x8 sample prediction + * + * @par Description: + * Perform Reference sample filtering process for Intra_8x8 sample prediction ,described in sec 8.3.2.2.1 + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride[Not Used] + * + * @param[in] dst_strd + * integer destination stride[Not Used] + * + * @param[in] ngbr_avail + * availability of neighbouring pixels(Not used in this function) + * + * @returns + * + * @remarks + * None + * + * + *******************************************************************************/ +void ih264_intra_pred_luma_8x8_mode_ref_filtering(UWORD8 *pu1_left, + UWORD8 *pu1_topleft, + UWORD8 *pu1_top, + UWORD8 *pu1_dst, + WORD32 left_strd, + WORD32 ngbr_avail) +{ + WORD32 top_avail, left_avail, top_left_avail, top_right_avail; + + left_avail = BOOLEAN(ngbr_avail & LEFT_MB_AVAILABLE_MASK); + top_avail = BOOLEAN(ngbr_avail & TOP_MB_AVAILABLE_MASK); + top_left_avail = BOOLEAN(ngbr_avail & TOP_LEFT_MB_AVAILABLE_MASK); + top_right_avail = BOOLEAN(ngbr_avail & TOP_RIGHT_MB_AVAILABLE_MASK); + + if(top_avail) + { + WORD32 i; + UWORD32 u4_xm1; + + if(!top_right_avail) + { + memset(pu1_dst + 8 + 1 + 8, pu1_top[7], 8); + top_right_avail = 1; + } + else + { + memcpy(pu1_dst + 8 + 1 + 8, pu1_top + 8, 8); + } + + if(top_left_avail) + { + pu1_dst[8 + 1 + 0] = FILT121((*pu1_topleft), pu1_top[0], + pu1_top[1]); + + } + else + { + pu1_dst[8 + 1] = ((3 * pu1_top[0]) + pu1_top[1] + 2) >> 2; + } + + for(i = 1; i <= 6; i++) + { + pu1_dst[8 + 1 + i] = FILT121(pu1_top[i - 1], pu1_top[i], + pu1_top[i + 1]); + + } + /* First byte of Top Right input is in pu1_dst[8 + 1 + 8]*/ + pu1_dst[8 + 1 + 7] = FILT121(pu1_top[6], pu1_top[7], + pu1_dst[8 + 1 + 8]); + + /* filtered output and source in same buf, to prevent output(x - 1) + being over written in process */ + u4_xm1 = pu1_top[7]; + + for(i = 8; i <= 14; i++) + { + UWORD32 u4_x; + u4_x = (u4_xm1 + (pu1_dst[8 + 1 + i] << 1) + pu1_dst[8 + 1 + i + 1] + + 2) >> 2; + /* assigning u4_xm1 from the un-filtered values for the next iteration */ + u4_xm1 = pu1_dst[8 + 1 + i]; + pu1_dst[8 + 1 + i] = u4_x; + } + + pu1_dst[8 + 1 + 15] = (u4_xm1 + (3 * pu1_dst[8 + 1 + 15]) + 2) >> 2; + + } + + /* pu1_topleft is overloaded. It is both: */ + /* a. A pointer for the top left pixel */ + /* b. An indicator of availability of top left. */ + /* If it is null then top left not available */ + if(top_left_avail) + { + if((!top_avail) || (!left_avail)) + { + if(top_avail) + pu1_dst[8] = (3 * pu1_topleft[0] + pu1_top[0] + 2) >> 2; + else if(left_avail) + pu1_dst[8] = (3 * pu1_topleft[0] + pu1_left[0] + 2) >> 2; + } + else + { + pu1_dst[8] = FILT121(pu1_top[0], (*pu1_topleft), pu1_left[0]); + } + } + + if(left_avail) + { + UWORD32 idx; + if(0 != pu1_topleft) + { + pu1_dst[7] = FILT121((*pu1_topleft), pu1_left[0], + pu1_left[left_strd]); + } + else + { + pu1_dst[7] = ((3 * pu1_left[0]) + pu1_left[left_strd] + 2) >> 2; + } + + for(idx = 1; idx <= 6; idx++) + { + pu1_dst[7 - idx] = FILT121(pu1_left[(idx - 1) * left_strd], + pu1_left[idx * left_strd], + pu1_left[(idx + 1) * left_strd]); + + } + pu1_dst[0] = (pu1_left[6 * left_strd] + 3 * pu1_left[7 * left_strd] + 2) + >> 2; + + } +} + +/** + ******************************************************************************* + * + *ih264_intra_pred_luma_8x8_mode_vert + * + * @brief + * Perform Intra prediction for luma_8x8 mode:vertical + * + * @par Description: + * Perform Intra prediction for luma_8x8 mode:vertical ,described in sec 8.3.2.2.2 + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ngbr_avail + * availability of neighbouring pixels(Not used in this function) + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +void ih264_intra_pred_luma_8x8_mode_vert(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + UWORD8 *pu1_top = NULL; + UNUSED(src_strd); + UNUSED(ngbr_avail); + pu1_top = pu1_src + BLK8x8SIZE + 1; + + memcpy(pu1_dst, pu1_top, 8); + memcpy(pu1_dst + dst_strd, pu1_top, 8); + memcpy(pu1_dst + 2 * dst_strd, pu1_top, 8); + memcpy(pu1_dst + 3 * dst_strd, pu1_top, 8); + memcpy(pu1_dst + 4 * dst_strd, pu1_top, 8); + memcpy(pu1_dst + 5 * dst_strd, pu1_top, 8); + memcpy(pu1_dst + 6 * dst_strd, pu1_top, 8); + memcpy(pu1_dst + 7 * dst_strd, pu1_top, 8); +} + +/** + ******************************************************************************* + * + *ih264_intra_pred_luma_8x8_mode_horz + * + * @brief + * Perform Intra prediction for luma_8x8 mode:horizontal + * + * @par Description: + * Perform Intra prediction for luma_8x8 mode:horizontal ,described in sec 8.3.2.2.2 + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ngbr_avail + * availability of neighbouring pixels(Not used in this function) + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ + +void ih264_intra_pred_luma_8x8_mode_horz(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + UWORD8 *pu1_left = pu1_src + BLK8x8SIZE - 1; + UNUSED(src_strd); + UNUSED(ngbr_avail); + memset(pu1_dst, *pu1_left, 8); + memset(pu1_dst + dst_strd, *(pu1_left - 1), 8); + memset(pu1_dst + 2 * dst_strd, *(pu1_left - 2), 8); + memset(pu1_dst + 3 * dst_strd, *(pu1_left - 3), 8); + memset(pu1_dst + 4 * dst_strd, *(pu1_left - 4), 8); + memset(pu1_dst + 5 * dst_strd, *(pu1_left - 5), 8); + memset(pu1_dst + 6 * dst_strd, *(pu1_left - 6), 8); + memset(pu1_dst + 7 * dst_strd, *(pu1_left - 7), 8); +} + +/** + ******************************************************************************* + * + *ih264_intra_pred_luma_8x8_mode_dc + * + * @brief + * Perform Intra prediction for luma_8x8 mode:DC + * + * @par Description: + * Perform Intra prediction for luma_8x8 mode:DC ,described in sec 8.3.2.2.4 + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ngbr_avail + * availability of neighbouring pixels + * + * @returns + * + * @remarks + * None + * + *******************************************************************************/ +void ih264_intra_pred_luma_8x8_mode_dc(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + UWORD8 u1_useleft; /* availability of left predictors (only for DC) */ + UWORD8 u1_usetop; /* availability of top predictors (only for DC) */ + UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */ + UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */ + WORD32 row; + WORD32 val = 0; + UNUSED(src_strd); + + u1_useleft = BOOLEAN(ngbr_avail & LEFT_MB_AVAILABLE_MASK); + u1_usetop = BOOLEAN(ngbr_avail & TOP_MB_AVAILABLE_MASK); + pu1_top = pu1_src + BLK8x8SIZE + 1; + pu1_left = pu1_src + BLK8x8SIZE - 1; + + if(u1_useleft) + { + for(row = 0; row < BLK8x8SIZE; row++) + val += *(pu1_left - row); + val += 4; + } + if(u1_usetop) + { + for(row = 0; row < BLK8x8SIZE; row++) + val += *(pu1_top + row); + val += 4; + } + + /* Since 4 is added if either left/top pred is there, + val still being zero implies both preds are not there */ + val = (val) ? (val >> (2 + u1_useleft + u1_usetop)) : 128; + + memset(pu1_dst, val, 8); + memset(pu1_dst + dst_strd, val, 8); + memset(pu1_dst + 2 * dst_strd, val, 8); + memset(pu1_dst + 3 * dst_strd, val, 8); + memset(pu1_dst + 4 * dst_strd, val, 8); + memset(pu1_dst + 5 * dst_strd, val, 8); + memset(pu1_dst + 6 * dst_strd, val, 8); + memset(pu1_dst + 7 * dst_strd, val, 8); +} + +/** + ******************************************************************************* + * + *ih264_intra_pred_luma_8x8_mode_diag_dl + * + * @brief + * Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Left + * + * @par Description: + * Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Left ,described in sec 8.3.2.2.5 + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ngbr_avail + * availability of neighbouring pixels(Not used in this function) + * + * @returns + * + * @remarks + * None + * + *******************************************************************************/ +void ih264_intra_pred_luma_8x8_mode_diag_dl(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */ + UWORD32 ui4_a, ui4_b, ui4_c, ui4_d, ui4_e, ui4_f, ui4_g, ui4_h; + UWORD32 ui4_i, ui4_j, ui4_k, ui4_l, ui4_m, ui4_n, ui4_o, ui4_p; + UWORD8 predicted_pixels[15]; + UNUSED(src_strd); + UNUSED(ngbr_avail); + pu1_top = pu1_src + BLK8x8SIZE + 1; + + ui4_a = *pu1_top++; + ui4_b = *pu1_top++; + ui4_c = *pu1_top++; + ui4_d = *pu1_top++; + ui4_e = *pu1_top++; + ui4_f = *pu1_top++; + ui4_g = *pu1_top++; + ui4_h = *pu1_top++; + ui4_i = *pu1_top++; + ui4_j = *pu1_top++; + ui4_k = *pu1_top++; + ui4_l = *pu1_top++; + ui4_m = *pu1_top++; + ui4_n = *pu1_top++; + ui4_o = *pu1_top++; + ui4_p = *pu1_top; + + predicted_pixels[0] = FILT121(ui4_a, ui4_b, ui4_c); + predicted_pixels[1] = FILT121(ui4_b, ui4_c, ui4_d); + predicted_pixels[2] = FILT121(ui4_c, ui4_d, ui4_e); + predicted_pixels[3] = FILT121(ui4_d, ui4_e, ui4_f); + predicted_pixels[4] = FILT121(ui4_e, ui4_f, ui4_g); + predicted_pixels[5] = FILT121(ui4_f, ui4_g, ui4_h); + predicted_pixels[6] = FILT121(ui4_g, ui4_h, ui4_i); + predicted_pixels[7] = FILT121(ui4_h, ui4_i, ui4_j); + predicted_pixels[8] = FILT121(ui4_i, ui4_j, ui4_k); + predicted_pixels[9] = FILT121(ui4_j, ui4_k, ui4_l); + predicted_pixels[10] = FILT121(ui4_k, ui4_l, ui4_m); + predicted_pixels[11] = FILT121(ui4_l, ui4_m, ui4_n); + predicted_pixels[12] = FILT121(ui4_m, ui4_n, ui4_o); + predicted_pixels[13] = FILT121(ui4_n, ui4_o, ui4_p); + predicted_pixels[14] = FILT121(ui4_o, ui4_p, ui4_p); + + memcpy(pu1_dst, predicted_pixels, 8); + memcpy(pu1_dst + dst_strd, predicted_pixels + 1, 8); + memcpy(pu1_dst + 2 * dst_strd, predicted_pixels + 2, 8); + memcpy(pu1_dst + 3 * dst_strd, predicted_pixels + 3, 8); + memcpy(pu1_dst + 4 * dst_strd, predicted_pixels + 4, 8); + memcpy(pu1_dst + 5 * dst_strd, predicted_pixels + 5, 8); + memcpy(pu1_dst + 6 * dst_strd, predicted_pixels + 6, 8); + memcpy(pu1_dst + 7 * dst_strd, predicted_pixels + 7, 8); +} + +/** + ******************************************************************************* + * + *ih264_intra_pred_luma_8x8_mode_diag_dr + * + * @brief + * Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Right + * + * @par Description: + * Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Right ,described in sec 8.3.2.2.6 + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ngbr_avail + * availability of neighbouring pixels(Not used in this function) + * + * @returns + * + * @remarks + * None + * + *******************************************************************************/ +void ih264_intra_pred_luma_8x8_mode_diag_dr(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */ + UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */ + UWORD8 *pu1_topleft = NULL; /* Pointer to start of top left predictors */ + UWORD32 ui4_a; + UWORD32 ui4_b, ui4_c, ui4_d, ui4_e, ui4_f, ui4_g, ui4_h, ui4_i; + UWORD32 ui4_j, ui4_k, ui4_l, ui4_m, ui4_n, ui4_o, ui4_p, ui4_q; + UWORD8 predicted_pixels[15]; + UNUSED(src_strd); + UNUSED(ngbr_avail); + pu1_top = pu1_src + BLK8x8SIZE + 1; + pu1_left = pu1_src + BLK8x8SIZE - 1; + pu1_topleft = pu1_src + BLK8x8SIZE; + + ui4_a = *pu1_topleft; + ui4_b = *pu1_top++; + ui4_c = *pu1_top++; + ui4_d = *pu1_top++; + ui4_e = *pu1_top++; + ui4_f = *pu1_top++; + ui4_g = *pu1_top++; + ui4_h = *pu1_top++; + ui4_i = *pu1_top; + ui4_j = *pu1_left--; + ui4_k = *pu1_left--; + ui4_l = *pu1_left--; + ui4_m = *pu1_left--; + ui4_n = *pu1_left--; + ui4_o = *pu1_left--; + ui4_p = *pu1_left--; + ui4_q = *pu1_left; + + predicted_pixels[6] = FILT121(ui4_a, ui4_j, ui4_k); + predicted_pixels[5] = FILT121(ui4_j, ui4_k, ui4_l); + predicted_pixels[4] = FILT121(ui4_k, ui4_l, ui4_m); + predicted_pixels[3] = FILT121(ui4_l, ui4_m, ui4_n); + predicted_pixels[2] = FILT121(ui4_m, ui4_n, ui4_o); + predicted_pixels[1] = FILT121(ui4_n, ui4_o, ui4_p); + predicted_pixels[0] = FILT121(ui4_o, ui4_p, ui4_q); + predicted_pixels[7] = FILT121(ui4_b, ui4_a, ui4_j); + predicted_pixels[8] = FILT121(ui4_a, ui4_b, ui4_c); + predicted_pixels[9] = FILT121(ui4_b, ui4_c, ui4_d); + predicted_pixels[10] = FILT121(ui4_c, ui4_d, ui4_e); + predicted_pixels[11] = FILT121(ui4_d, ui4_e, ui4_f); + predicted_pixels[12] = FILT121(ui4_e, ui4_f, ui4_g); + predicted_pixels[13] = FILT121(ui4_f, ui4_g, ui4_h); + predicted_pixels[14] = FILT121(ui4_g, ui4_h, ui4_i); + + memcpy(pu1_dst, predicted_pixels + 7, 8); + memcpy(pu1_dst + dst_strd, predicted_pixels + 6, 8); + memcpy(pu1_dst + 2 * dst_strd, predicted_pixels + 5, 8); + memcpy(pu1_dst + 3 * dst_strd, predicted_pixels + 4, 8); + memcpy(pu1_dst + 4 * dst_strd, predicted_pixels + 3, 8); + memcpy(pu1_dst + 5 * dst_strd, predicted_pixels + 2, 8); + memcpy(pu1_dst + 6 * dst_strd, predicted_pixels + 1, 8); + memcpy(pu1_dst + 7 * dst_strd, predicted_pixels, 8); +} + +/** + ******************************************************************************* + * + *ih264_intra_pred_luma_8x8_mode_vert_r + * + * @brief + * Perform Intra prediction for luma_8x8 mode:Vertical_Right + * + * @par Description: + * Perform Intra prediction for luma_8x8 mode:Vertical_Right ,described in sec 8.3.2.2.7 + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ngbr_avail + * availability of neighbouring pixels(Not used in this function) + * + * @returns + * + * @remarks + * None + * + *******************************************************************************/ +void ih264_intra_pred_luma_8x8_mode_vert_r(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */ + UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */ + UWORD8 *pu1_topleft = NULL; /* Pointer to start of top left predictors */ + UWORD32 ui4_a; + UWORD32 ui4_b, ui4_c, ui4_d, ui4_e, ui4_f, ui4_g, ui4_h, ui4_i; + UWORD32 ui4_j, ui4_k, ui4_l, ui4_m, ui4_n, ui4_o, ui4_p; + UWORD8 predicted_pixels[22]; + + UNUSED(src_strd); + UNUSED(ngbr_avail); + pu1_top = pu1_src + BLK8x8SIZE + 1; + pu1_left = pu1_src + BLK8x8SIZE - 1; + pu1_topleft = pu1_src + BLK8x8SIZE; + + ui4_a = *pu1_topleft; + + ui4_b = *pu1_top++; + ui4_c = *pu1_top++; + ui4_d = *pu1_top++; + ui4_e = *pu1_top++; + ui4_f = *pu1_top++; + ui4_g = *pu1_top++; + ui4_h = *pu1_top++; + ui4_i = *pu1_top; + ui4_j = *pu1_left--; + ui4_k = *pu1_left--; + ui4_l = *pu1_left--; + ui4_m = *pu1_left--; + ui4_n = *pu1_left--; + ui4_o = *pu1_left--; + ui4_p = *pu1_left--; + + predicted_pixels[0] = FILT121(ui4_o, ui4_n, ui4_m); + predicted_pixels[1] = FILT121(ui4_m, ui4_l, ui4_k); + predicted_pixels[2] = FILT121(ui4_k, ui4_j, ui4_a); + predicted_pixels[3] = FILT11(ui4_a, ui4_b); + predicted_pixels[4] = FILT11(ui4_b, ui4_c); + predicted_pixels[5] = FILT11(ui4_c, ui4_d); + predicted_pixels[6] = FILT11(ui4_d, ui4_e); + predicted_pixels[7] = FILT11(ui4_e, ui4_f); + predicted_pixels[8] = FILT11(ui4_f, ui4_g); + predicted_pixels[9] = FILT11(ui4_g, ui4_h); + predicted_pixels[10] = FILT11(ui4_h, ui4_i); + predicted_pixels[11] = FILT121(ui4_p, ui4_o, ui4_n); + predicted_pixels[12] = FILT121(ui4_n, ui4_m, ui4_l); + predicted_pixels[13] = FILT121(ui4_l, ui4_k, ui4_j); + predicted_pixels[14] = FILT121(ui4_b, ui4_a, ui4_j); + predicted_pixels[15] = FILT121(ui4_a, ui4_b, ui4_c); + predicted_pixels[16] = FILT121(ui4_b, ui4_c, ui4_d); + predicted_pixels[17] = FILT121(ui4_c, ui4_d, ui4_e); + predicted_pixels[18] = FILT121(ui4_d, ui4_e, ui4_f); + predicted_pixels[19] = FILT121(ui4_e, ui4_f, ui4_g); + predicted_pixels[20] = FILT121(ui4_f, ui4_g, ui4_h); + predicted_pixels[21] = FILT121(ui4_g, ui4_h, ui4_i); + + memcpy(pu1_dst, predicted_pixels + 3, 8); + memcpy(pu1_dst + 1 * dst_strd, predicted_pixels + 14, 8); + memcpy(pu1_dst + 2 * dst_strd, predicted_pixels + 2, 8); + memcpy(pu1_dst + 3 * dst_strd, predicted_pixels + 13, 8); + memcpy(pu1_dst + 4 * dst_strd, predicted_pixels + 1, 8); + memcpy(pu1_dst + 5 * dst_strd, predicted_pixels + 12, 8); + memcpy(pu1_dst + 6 * dst_strd, predicted_pixels, 8); + memcpy(pu1_dst + 7 * dst_strd, predicted_pixels + 11, 8); + +} + +/* + ******************************************************************************* + * + *ih264_intra_pred_luma_8x8_mode_horz_d + * + * @brief + * Perform Intra prediction for luma_8x8 mode:Horizontal_Down + * + * @par Description: + * Perform Intra prediction for luma_8x8 mode:Horizontal_Down ,described in sec 8.3.2.2.8 + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ngbr_avail + * availability of neighbouring pixels(Not used in this function) + * + * @returns + * + * @remarks + * None + * + *******************************************************************************/ + +void ih264_intra_pred_luma_8x8_mode_horz_d(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */ + UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */ + UWORD8 *pu1_topleft = NULL; /* Pointer to start of top left predictors */ + UWORD32 ui4_a; + UWORD32 ui4_b, ui4_c, ui4_d, ui4_e, ui4_f, ui4_g, ui4_h, ui4_i; + UWORD32 ui4_j, ui4_k, ui4_l, ui4_m, ui4_n, ui4_o, ui4_p; + UWORD8 predicted_pixels[22]; + UNUSED(src_strd); + UNUSED(ngbr_avail); + pu1_top = pu1_src + BLK8x8SIZE + 1; + pu1_left = pu1_src + BLK8x8SIZE - 1; + pu1_topleft = pu1_src + BLK8x8SIZE; + + ui4_a = *pu1_topleft; + ui4_j = *pu1_top++; + ui4_k = *pu1_top++; + ui4_l = *pu1_top++; + ui4_m = *pu1_top++; + ui4_n = *pu1_top++; + ui4_o = *pu1_top++; + ui4_p = *pu1_top++; + ui4_b = *pu1_left--; + ui4_c = *pu1_left--; + ui4_d = *pu1_left--; + ui4_e = *pu1_left--; + ui4_f = *pu1_left--; + ui4_g = *pu1_left--; + ui4_h = *pu1_left--; + ui4_i = *pu1_left; + + predicted_pixels[0] = FILT11(ui4_h, ui4_i); + predicted_pixels[1] = FILT121(ui4_g, ui4_h, ui4_i); + predicted_pixels[2] = FILT11(ui4_g, ui4_h); + predicted_pixels[3] = FILT121(ui4_f, ui4_g, ui4_h); + predicted_pixels[4] = FILT11(ui4_f, ui4_g); + predicted_pixels[5] = FILT121(ui4_e, ui4_f, ui4_g); + predicted_pixels[6] = FILT11(ui4_e, ui4_f); + predicted_pixels[7] = FILT121(ui4_d, ui4_e, ui4_f); + predicted_pixels[8] = FILT11(ui4_d, ui4_e); + predicted_pixels[9] = FILT121(ui4_c, ui4_d, ui4_e); + predicted_pixels[10] = FILT11(ui4_c, ui4_d); + predicted_pixels[11] = FILT121(ui4_b, ui4_c, ui4_d); + predicted_pixels[12] = FILT11(ui4_b, ui4_c); + predicted_pixels[13] = FILT121(ui4_a, ui4_b, ui4_c); + predicted_pixels[14] = FILT11(ui4_a, ui4_b); + predicted_pixels[15] = FILT121(ui4_j, ui4_a, ui4_b); + predicted_pixels[16] = FILT121(ui4_k, ui4_j, ui4_a); + predicted_pixels[17] = FILT121(ui4_l, ui4_k, ui4_j); + predicted_pixels[18] = FILT121(ui4_m, ui4_l, ui4_k); + predicted_pixels[19] = FILT121(ui4_n, ui4_m, ui4_l); + predicted_pixels[20] = FILT121(ui4_o, ui4_n, ui4_m); + predicted_pixels[21] = FILT121(ui4_p, ui4_o, ui4_n); + + memcpy(pu1_dst, predicted_pixels + 14, 8); + memcpy(pu1_dst + dst_strd, predicted_pixels + 12, 8); + memcpy(pu1_dst + 2 * dst_strd, predicted_pixels + 10, 8); + memcpy(pu1_dst + 3 * dst_strd, predicted_pixels + 8, 8); + memcpy(pu1_dst + 4 * dst_strd, predicted_pixels + 6, 8); + memcpy(pu1_dst + 5 * dst_strd, predicted_pixels + 4, 8); + memcpy(pu1_dst + 6 * dst_strd, predicted_pixels + 2, 8); + memcpy(pu1_dst + 7 * dst_strd, predicted_pixels, 8); +} + +/** + ******************************************************************************* + * + *ih264_intra_pred_luma_8x8_mode_vert_l + * + * @brief + * Perform Intra prediction for luma_8x8 mode:Vertical_Left + * + * @par Description: + * Perform Intra prediction for luma_8x8 mode:Vertical_Left ,described in sec 8.3.2.2.9 + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ngbr_avail + * availability of neighbouring pixels(Not used in this function) + * + * @returns + * + * @remarks + * None + * + *******************************************************************************/ + +void ih264_intra_pred_luma_8x8_mode_vert_l(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */ + UWORD32 ui4_a, ui4_b, ui4_c, ui4_d, ui4_e, ui4_f, ui4_g, ui4_h; + UWORD32 ui4_i, ui4_j, ui4_k, ui4_l, ui4_m; + UWORD8 predicted_pixels[22]; + UNUSED(src_strd); + UNUSED(ngbr_avail); + pu1_top = pu1_src + BLK8x8SIZE + 1; + + ui4_a = *pu1_top++; + ui4_b = *pu1_top++; + ui4_c = *pu1_top++; + ui4_d = *pu1_top++; + ui4_e = *pu1_top++; + ui4_f = *pu1_top++; + ui4_g = *pu1_top++; + ui4_h = *pu1_top++; + ui4_i = *pu1_top++; + ui4_j = *pu1_top++; + ui4_k = *pu1_top++; + ui4_l = *pu1_top++; + ui4_m = *pu1_top++; + + predicted_pixels[0] = FILT11(ui4_a, ui4_b); + predicted_pixels[1] = FILT11(ui4_b, ui4_c); + predicted_pixels[2] = FILT11(ui4_c, ui4_d); + predicted_pixels[3] = FILT11(ui4_d, ui4_e); + predicted_pixels[4] = FILT11(ui4_e, ui4_f); + predicted_pixels[5] = FILT11(ui4_f, ui4_g); + predicted_pixels[6] = FILT11(ui4_g, ui4_h); + predicted_pixels[7] = FILT11(ui4_h, ui4_i); + predicted_pixels[8] = FILT11(ui4_i, ui4_j); + predicted_pixels[9] = FILT11(ui4_j, ui4_k); + predicted_pixels[10] = FILT11(ui4_k, ui4_l); + predicted_pixels[11] = FILT121(ui4_a, ui4_b, ui4_c); + predicted_pixels[12] = FILT121(ui4_b, ui4_c, ui4_d); + predicted_pixels[13] = FILT121(ui4_c, ui4_d, ui4_e); + predicted_pixels[14] = FILT121(ui4_d, ui4_e, ui4_f); + predicted_pixels[15] = FILT121(ui4_e, ui4_f, ui4_g); + predicted_pixels[16] = FILT121(ui4_f, ui4_g, ui4_h); + predicted_pixels[17] = FILT121(ui4_g, ui4_h, ui4_i); + predicted_pixels[18] = FILT121(ui4_h, ui4_i, ui4_j); + predicted_pixels[19] = FILT121(ui4_i, ui4_j, ui4_k); + predicted_pixels[20] = FILT121(ui4_j, ui4_k, ui4_l); + predicted_pixels[21] = FILT121(ui4_k, ui4_l, ui4_m); + + memcpy(pu1_dst, predicted_pixels, 8); + memcpy(pu1_dst + 2 * dst_strd, predicted_pixels + 1, 8); + memcpy(pu1_dst + 4 * dst_strd, predicted_pixels + 2, 8); + memcpy(pu1_dst + 6 * dst_strd, predicted_pixels + 3, 8); + memcpy(pu1_dst + 1 * dst_strd, predicted_pixels + 11, 8); + memcpy(pu1_dst + 3 * dst_strd, predicted_pixels + 12, 8); + memcpy(pu1_dst + 5 * dst_strd, predicted_pixels + 13, 8); + memcpy(pu1_dst + 7 * dst_strd, predicted_pixels + 14, 8); +} + +/** + ******************************************************************************* + * + *ih264_intra_pred_luma_8x8_mode_horz_u + * + * @brief + * Perform Intra prediction for luma_8x8 mode:Horizontal_Up + * + * @par Description: + * Perform Intra prediction for luma_8x8 mode:Horizontal_Up ,described in sec 8.3.2.2.10 + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ngbr_avail + * availability of neighbouring pixels(Not used in this function) + * + * @returns + * + * @remarks + * None + * + *******************************************************************************/ + +void ih264_intra_pred_luma_8x8_mode_horz_u(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) + +{ + UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */ + UWORD32 ui4_j, ui4_k, ui4_l, ui4_m, ui4_n, ui4_o, ui4_p, ui4_q; + UWORD8 predicted_pixels[22]; + UNUSED(src_strd); + UNUSED(ngbr_avail); + pu1_left = pu1_src + BLK8x8SIZE - 1; + + ui4_j = *pu1_left--; + ui4_k = *pu1_left--; + ui4_l = *pu1_left--; + ui4_m = *pu1_left--; + ui4_n = *pu1_left--; + ui4_o = *pu1_left--; + ui4_p = *pu1_left--; + ui4_q = *pu1_left; + + pu1_left = pu1_src + BLK8x8SIZE - 1; + + predicted_pixels[0] = FILT11(ui4_j, ui4_k); + predicted_pixels[1] = FILT121(ui4_j, ui4_k, ui4_l); + predicted_pixels[2] = FILT11(ui4_k, ui4_l); + predicted_pixels[3] = FILT121(ui4_k, ui4_l, ui4_m); + predicted_pixels[4] = FILT11(ui4_l, ui4_m); + predicted_pixels[5] = FILT121(ui4_l, ui4_m, ui4_n); + predicted_pixels[6] = FILT11(ui4_m, ui4_n); + predicted_pixels[7] = FILT121(ui4_m, ui4_n, ui4_o); + predicted_pixels[8] = FILT11(ui4_n, ui4_o); + predicted_pixels[9] = FILT121(ui4_n, ui4_o, ui4_p); + predicted_pixels[10] = FILT11(ui4_o, ui4_p); + predicted_pixels[11] = FILT121(ui4_o, ui4_p, ui4_q); + predicted_pixels[12] = FILT11(ui4_p, ui4_q); + predicted_pixels[13] = FILT121(ui4_p, ui4_q, ui4_q); + memset(predicted_pixels+14,ui4_q,8); + + memcpy(pu1_dst, predicted_pixels, 8); + memcpy(pu1_dst + 1 * dst_strd, predicted_pixels + 2, 8); + memcpy(pu1_dst + 2 * dst_strd, predicted_pixels + 4, 8); + memcpy(pu1_dst + 3 * dst_strd, predicted_pixels + 6, 8); + memcpy(pu1_dst + 4 * dst_strd, predicted_pixels + 8, 8); + memcpy(pu1_dst + 5 * dst_strd, predicted_pixels + 10, 8); + memcpy(pu1_dst + 6 * dst_strd, predicted_pixels + 12, 8); + memcpy(pu1_dst + 7 * dst_strd, predicted_pixels + 14, 8); +} + + +/******************* 16x16 Modes *******************/ + +/** + ******************************************************************************* + * + *ih264_intra_pred_luma_16x16_mode_vert + * + * @brief + * Perform Intra prediction for luma_16x16 mode:Vertical + * + * @par Description: + * Perform Intra prediction for luma_16x16 mode:Vertical, described in sec 8.3.3.1 + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ngbr_avail + * availability of neighbouring pixels (Not used in this function) + * + * @returns + * + * @remarks + * None + * + *******************************************************************************/ + +void ih264_intra_pred_luma_16x16_mode_vert(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */ + WORD32 rows; /* loop variables*/ + UNUSED(src_strd); + UNUSED(ngbr_avail); + pu1_top = pu1_src + MB_SIZE + 1; + + for(rows = 0; rows < 16; rows += 4, pu1_dst += dst_strd) + { + memcpy(pu1_dst, pu1_top, 16); + pu1_dst += dst_strd; + memcpy(pu1_dst, pu1_top, 16); + pu1_dst += dst_strd; + memcpy(pu1_dst, pu1_top, 16); + pu1_dst += dst_strd; + memcpy(pu1_dst, pu1_top, 16); + } +} + +/** + ******************************************************************************* + * + *ih264_intra_pred_luma_16x16_mode_horz + * + * @brief + * Perform Intra prediction for luma_16x16 mode:Horizontal + * + * @par Description: + * Perform Intra prediction for luma_16x16 mode:Horizontal, described in sec 8.3.3.2 + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ngbr_avail + * availability of neighbouring pixels(Not used in this function) + * + * @returns + * + * @remarks + * None + * + *******************************************************************************/ + +void ih264_intra_pred_luma_16x16_mode_horz(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + UWORD8 *pu1_left = NULL; /* Pointer to start of top predictors */ + WORD32 rows; + UNUSED(src_strd); + UNUSED(ngbr_avail); + pu1_left = pu1_src + MB_SIZE - 1; + + for(rows = 0; rows < 16; rows += 4, pu1_dst += dst_strd, pu1_left --) + { + memset(pu1_dst, *pu1_left, 16); /* copy the left value to the entire row*/ + pu1_left --; + pu1_dst += dst_strd; + memset(pu1_dst, *pu1_left, 16); + pu1_left --; + pu1_dst += dst_strd; + memset(pu1_dst, *pu1_left, 16); + pu1_left --; + pu1_dst += dst_strd; + memset(pu1_dst, *pu1_left, 16); + } +} + +/** + ******************************************************************************* + * + *ih264_intra_pred_luma_16x16_mode_dc + * + * @brief + * Perform Intra prediction for luma_16x16 mode:DC + * + * @par Description: + * Perform Intra prediction for luma_16x16 mode:DC, described in sec 8.3.3.3 + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + ** @param[in] ngbr_avail + * availability of neighbouring pixels + * + * @returns + * + * @remarks + * None + * + *******************************************************************************/ + +void ih264_intra_pred_luma_16x16_mode_dc(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + WORD8 u1_useleft; /* availability of left predictors (only for DC) */ + UWORD8 u1_usetop; /* availability of top predictors (only for DC) */ + UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */ + UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */ + WORD32 rows; /* loop variables*/ + WORD32 val = 0; + UNUSED(src_strd); + + u1_useleft = BOOLEAN(ngbr_avail & LEFT_MB_AVAILABLE_MASK); + u1_usetop = BOOLEAN(ngbr_avail & TOP_MB_AVAILABLE_MASK); + pu1_top = pu1_src + MB_SIZE + 1; + pu1_left = pu1_src + MB_SIZE - 1; + if(u1_useleft) + { + for(rows = 0; rows < 16; rows++) + val += *(pu1_left - rows); + val += 8; + } + if(u1_usetop) + { + for(rows = 0; rows < 16; rows++) + val += *(pu1_top + rows); + val += 8; + } + /* Since 8 is added if either left/top pred is there, + val still being zero implies both preds are not there */ + val = (val) ? (val >> (3 + u1_useleft + u1_usetop)) : 128; + + for(rows = 0; rows < 16; rows += 4, pu1_dst += dst_strd) + { + memset(pu1_dst, val, 16); + pu1_dst += dst_strd; + memset(pu1_dst, val, 16); + pu1_dst += dst_strd; + memset(pu1_dst, val, 16); + pu1_dst += dst_strd; + memset(pu1_dst, val, 16); + } +} + +/** + ******************************************************************************* + * + *ih264_intra_pred_luma_16x16_mode_plane + * + * @brief + * Perform Intra prediction for luma_16x16 mode:PLANE + * + * @par Description: + * Perform Intra prediction for luma_16x16 mode:PLANE, described in sec 8.3.3.4 + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ngbr_avail + * availability of neighbouring pixels(Not used in this function) + * + * @returns + * + * @remarks + * None + * + *******************************************************************************/ + +void ih264_intra_pred_luma_16x16_mode_plane(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + /*! Written with no multiplications */ + UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */ + UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */ + UWORD8 *pu1_topleft = NULL; + WORD32 a, b, c, tmp; + UWORD8 *pu1_tmp1, *pu1_tmp2; + WORD32 shift; + UNUSED(src_strd); + UNUSED(ngbr_avail); + pu1_top = pu1_src + MB_SIZE + 1; + pu1_left = pu1_src + MB_SIZE - 1; + pu1_topleft = pu1_src + MB_SIZE; + + { + a = (*(pu1_top + 15) + *(pu1_left - 15)) << 4; + + /*! Implement Sum(x*(P((x+7),-1) - P((x-7),-1))) x=1...8 */ + pu1_tmp1 = pu1_top + 8; + pu1_tmp2 = pu1_tmp1 - 2; + + /* Pixel diffs are only 9 bits; + so sign extension allows shifts to be used even for signed */ + b = ((*pu1_tmp1++) - (*pu1_tmp2--)); /* x=1 */ + b += ((*pu1_tmp1++) - (*pu1_tmp2--)) << 1; /* x=2 */ + tmp = ((*pu1_tmp1++) - (*pu1_tmp2--)); + b += (tmp << 1) + tmp; /* x=3 */ + b += ((*pu1_tmp1++) - (*pu1_tmp2--)) << 2; /* x=4 */ + + tmp = ((*pu1_tmp1++) - (*pu1_tmp2--)); + b += (tmp << 2) + tmp; /* x=5 */ + tmp = ((*pu1_tmp1++) - (*pu1_tmp2--)); + b += (tmp << 2) + (tmp << 1); /* x=6 */ + tmp = ((*pu1_tmp1++) - (*pu1_tmp2--)); + b += (tmp << 3) - tmp; /* x=7 */ + b += ((*pu1_tmp1) - (*pu1_topleft)) << 3; /* x=8 */ + + b = ((b << 2) + b + 32) >> 6; /*! (5*H + 32)>>6 */ + + /*! Implement Sum(y*(P(-1,(y+7)) - P(-1,(y-7)))) y=1...8 */ + pu1_tmp1 = pu1_left - 8; + pu1_tmp2 = pu1_tmp1 + 2; + + c = ((*pu1_tmp1) - (*pu1_tmp2)); /* y=1 */ + pu1_tmp1--; + pu1_tmp2++; + c += ((*pu1_tmp1) - (*pu1_tmp2)) << 1; /* y=2 */ + pu1_tmp1--; + pu1_tmp2++; + tmp = ((*pu1_tmp1) - (*pu1_tmp2)); + c += (tmp << 1) + tmp; /* y=3 */ + pu1_tmp1--; + pu1_tmp2++; + c += ((*pu1_tmp1) - (*pu1_tmp2)) << 2; /* y=4 */ + pu1_tmp1--; + pu1_tmp2++; + + tmp = ((*pu1_tmp1) - (*pu1_tmp2)); + c += (tmp << 2) + tmp; /* y=5 */ + pu1_tmp1--; + pu1_tmp2++; + tmp = ((*pu1_tmp1) - (*pu1_tmp2)); + c += (tmp << 2) + (tmp << 1); /* y=6 */ + pu1_tmp1--; + pu1_tmp2++; + tmp = ((*pu1_tmp1) - (*pu1_tmp2)); + c += (tmp << 3) - tmp; /* y=7 */ + pu1_tmp1--; //pu1_tmp2 ++; + /* Modified to get (-1,-1) location as *(pu1_top - 1) instead of (pu1_left - ui4_stride) */ + //c += ((*pu1_tmp1) - (*(pu1_top - 1)))<<3; /* y=8 */ + c += ((*pu1_tmp1) - (*pu1_topleft)) << 3; /* y=8 */ + + c = ((c << 2) + c + 32) >> 6; /*! (5*V + 32)>>32 */ + shift = 3; + } + + /*! Now from the plane parameters a, b, and c, + compute the fitted plane values over the block */ + { + WORD32 tmp1, tmpx, tmpx_init, j, i; + + tmpx_init = -(b << shift); /* -8b */ + tmp = a - (c << shift) + 16; /* a-((4or8)*c)+16 */ + for(i = 0; i < 16; i++) + { + tmp += c; /*increment every time by c to get c*(y-7or3)*/ + tmpx = tmpx_init; /* Init to -8b */ + for(j = 0; j < 16; j++) + { + tmpx += b; /* increment every time by b to get b*(x-7or3) */ + tmp1 = (tmp + tmpx) >> 5; + *pu1_dst++ = CLIP_U8(tmp1); + } + pu1_dst += (dst_strd - 16); + } + } +} diff --git a/common/ih264_macros.h b/common/ih264_macros.h new file mode 100755 index 0000000..6e4cb16 --- /dev/null +++ b/common/ih264_macros.h @@ -0,0 +1,110 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/********************************************************************************* +* @file +* ih264_macros.h +* +* @brief +* Macro definitions used in the codec +* +* @author +* Ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ +#ifndef _IH264_MACROS_H_ +#define _IH264_MACROS_H_ + +/*****************************************************************************/ +/* Function Macros */ +/*****************************************************************************/ +#define RETURN_IF(cond, retval) if(cond) {return (retval);} +#define UNUSED(x) ((void)(x)) + +#define ALIGN128(x) ((((x) + 127) >> 7) << 7) +#define ALIGN64(x) ((((x) + 63) >> 6) << 6) +#define ALIGN32(x) ((((x) + 31) >> 5) << 5) +#define ALIGN16(x) ((((x) + 15) >> 4) << 4) +#define ALIGN8(x) ((((x) + 7) >> 3) << 3) +#define ALIGN4(x) ((((x) + 3) >> 2) << 2) + + +/** +****************************************************************************** + * @brief Min, Max +****************************************************************************** + */ +#define MAX(a,b) ((a > b)?(a):(b)) +#define MIN(a,b) ((a < b)?(a):(b)) +#define MIN3(a,b,c) ((a) < (b)) ? (((a) < (c)) ? (a) : (c)) : (((b) < (c)) ? (b) : (c)) +#define MAX3(a,b,c) ((a) > (b)) ? (((a) > (c)) ? (a) : (c)) : (((b) > (c)) ? (b) : (c)) +/** +****************************************************************************** + * @brief Div, Mod +****************************************************************************** + */ +#define MOD(x,y) ((x)%(y)) +#define DIV(x,y) ((x)/(y)) + +/** +****************************************************************************** + * @brief Clip +****************************************************************************** + */ +#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y))) + +/** +****************************************************************************** + * @brief True, False +****************************************************************************** + */ +#define BOOLEAN(x) (!!(x)) + +/** +****************************************************************************** + * @brief Frequently used multiplications x2. x3, and x4 +****************************************************************************** + */ +#define X2(a) ((a) << 1) +#define X3(a) (((a) << 1) + (a)) +#define X4(a) ((a) << 2) + +/** +****************************************************************************** + * @brief Misc +****************************************************************************** + */ +#define ABS(x) ((x) < 0 ? (-(x)) : (x)) +#define SIGNXY(x,y) (((y) < 0) ? (-1 * (x)) : (x)) + +#define SIGN(x) (((x) >= 0) ? (((x) > 0) ? 1 : 0) : -1) + +#define RESET_BIT(x, pos) (x) = (x) & ~(1 << pos); +#define SET_BIT(x, pos) (x) = (x) | (1 << pos); +#define GET_BIT(x, pos) ((x) >> (pos)) & 0x1 + +#define INSERT_BIT(x, pos, bit) { RESET_BIT(x, pos); (x) = (x) | (bit << pos); } +#endif /*_IH264_MACROS_H_*/ + + diff --git a/common/ih264_mem_fns.c b/common/ih264_mem_fns.c new file mode 100755 index 0000000..1c1f328 --- /dev/null +++ b/common/ih264_mem_fns.c @@ -0,0 +1,176 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** + ******************************************************************************* + * @file + * ih264_mem_fns.c + * + * @brief + * Functions used for memory operations + * + * @author + * Ittiam + * + * @par List of Functions: + * ih264_memcpy() + * ih264_memcpy_mul_8() + * ih264_memset() + * ih264_memset_mul_8() + * ih264_memset_16bit() + * ih264_memset_16bit_mul_8() + * + * @remarks + * None + * + ****************************************************************************** + */ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ +/* System include files */ +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> + + +/* User include files */ +#include "ih264_typedefs.h" +#include "ih264_mem_fns.h" + +/** + ******************************************************************************* + * + * @brief + * memcpy of a 8,16 or 32 bytes + * + * @par Description: + * Does memcpy of 8bit data from source to destination for 8,16 or 32 number of bytes + * + * @param[in] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[in] num_bytes + * number of bytes to copy + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ + +void ih264_memcpy(UWORD8 *pu1_dst, UWORD8 *pu1_src, UWORD32 num_bytes) +{ + memcpy(pu1_dst, pu1_src, num_bytes); +} + + +void ih264_memcpy_mul_8(UWORD8 *pu1_dst, UWORD8 *pu1_src, UWORD32 num_bytes) +{ + memcpy(pu1_dst, pu1_src, num_bytes); +} + +/** + ******************************************************************************* + * + * @brief + * memset of a 8,16 or 32 bytes + * + * @par Description: + * Does memset of 8bit data for 8,16 or 32 number of bytes + * + * @param[in] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] value + * UWORD8 value used for memset + * + * @param[in] num_bytes + * number of bytes to set + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ + +void ih264_memset(UWORD8 *pu1_dst, UWORD8 value, UWORD32 num_bytes) +{ + memset(pu1_dst, value, num_bytes); +} + + +void ih264_memset_mul_8(UWORD8 *pu1_dst, UWORD8 value, UWORD32 num_bytes) +{ + memset(pu1_dst, value, num_bytes); +} + +/** + ******************************************************************************* + * + * @brief + * memset of 16bit data of a 8,16 or 32 bytes + * + * @par Description: + * Does memset of 16bit data for 8,16 or 32 number of bytes + * + * @param[in] pu2_dst + * UWORD8 pointer to the destination + * + * @param[in] value + * UWORD16 value used for memset + * + * @param[in] num_words + * number of words to set + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ + +void ih264_memset_16bit(UWORD16 *pu2_dst, UWORD16 value, UWORD32 num_words) +{ + UWORD32 i; + for(i = 0; i < num_words; i++) + { + *pu2_dst++ = value; + } +} + +void ih264_memset_16bit_mul_8(UWORD16 *pu2_dst, + UWORD16 value, + UWORD32 num_words) +{ + UWORD32 i; + for(i = 0; i < num_words; i++) + { + *pu2_dst++ = value; + } +} + diff --git a/common/ih264_mem_fns.h b/common/ih264_mem_fns.h new file mode 100755 index 0000000..e0167f4 --- /dev/null +++ b/common/ih264_mem_fns.h @@ -0,0 +1,126 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ih264_mem_fns.h +* +* @brief +* Function declarations used for memory functions +* +* @author +* Ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ +#ifndef _IH264_MEM_FNS_H_ +#define _IH264_MEM_FNS_H_ + +typedef void ih264_memcpy_ft(UWORD8 *pu1_dst, UWORD8 *pu1_src, UWORD32 num_bytes); + +typedef void ih264_memcpy_mul_8_ft(UWORD8 *pu1_dst, UWORD8 *pu1_src, UWORD32 num_bytes); +/** + ******************************************************************************* + * + * @brief + * memset of a 8,16 or 32 bytes + * + * @par Description: + * Does memset of 8bit data for 8,16 or 32 number of bytes + * + * @param[in] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] value + * UWORD8 value used for memset + * + * @param[in] num_bytes + * number of bytes to set + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +typedef void ih264_memset_ft(UWORD8 *pu1_dst, UWORD8 value, UWORD32 num_bytes); + +typedef void ih264_memset_mul_8_ft(UWORD8 *pu1_dst, UWORD8 value, UWORD32 num_bytes); + +/** + ******************************************************************************* + * + * @brief + * memset of 16bit data of a 8,16 or 32 bytes + * + * @par Description: + * Does memset of 16bit data for 8,16 or 32 number of bytes + * + * @param[in] pu2_dst + * UWORD8 pointer to the destination + * + * @param[in] value + * UWORD16 value used for memset + * + * @param[in] num_words + * number of words to set + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +typedef void ih264_memset_16bit_ft(UWORD16 *pu2_dst, UWORD16 value, UWORD32 num_words); + +typedef void ih264_memset_16bit_mul_8_ft(UWORD16 *pu2_dst, UWORD16 value, UWORD32 num_words); + +/* C function declarations */ +ih264_memcpy_ft ih264_memcpy; +ih264_memcpy_mul_8_ft ih264_memcpy_mul_8; +ih264_memset_ft ih264_memset; +ih264_memset_mul_8_ft ih264_memset_mul_8; +ih264_memset_16bit_ft ih264_memset_16bit; +ih264_memset_16bit_mul_8_ft ih264_memset_16bit_mul_8; + +/* A9 Q function declarations */ +ih264_memcpy_ft ih264_memcpy_a9q; +ih264_memcpy_mul_8_ft ih264_memcpy_mul_8_a9q; +ih264_memset_ft ih264_memset_a9q; +ih264_memset_mul_8_ft ih264_memset_mul_8_a9q; +ih264_memset_16bit_ft ih264_memset_16bit_a9q; +ih264_memset_16bit_mul_8_ft ih264_memset_16bit_mul_8_a9q; + +/* AV8 function declarations */ +ih264_memcpy_ft ih264_memcpy_av8; +ih264_memcpy_mul_8_ft ih264_memcpy_mul_8_av8; +ih264_memset_ft ih264_memset_av8; +ih264_memset_mul_8_ft ih264_memset_mul_8_av8; +ih264_memset_16bit_ft ih264_memset_16bit_av8; +ih264_memset_16bit_mul_8_ft ih264_memset_16bit_mul_8_av8; + + +ih264_memcpy_mul_8_ft ih264_memcpy_mul_8_ssse3; +ih264_memset_mul_8_ft ih264_memset_mul_8_ssse3; +ih264_memset_16bit_mul_8_ft ih264_memset_16bit_mul_8_ssse3; +#endif //_MEM_FNS_H_ diff --git a/common/ih264_padding.c b/common/ih264_padding.c new file mode 100755 index 0000000..8e8f3e2 --- /dev/null +++ b/common/ih264_padding.c @@ -0,0 +1,331 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +******************************************************************************* +* @file +* ih264_padding.c +* +* @brief +* Contains function definitions for Padding +* +* @author +* Ittiam +* +* @par List of Functions: +* - ih264_pad_top() +* - ih264_pad_bottom() +* - ih264_pad_left_luma() +* - ih264_pad_left_chroma() +* - ih264_pad_right_luma() +* - ih264_pad_right_chroma() +* +* @remarks +* None +* +******************************************************************************* +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include <stddef.h> +#include <string.h> + +/* User include files */ +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_padding.h" + + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + +/** +******************************************************************************* +* +* @brief pad at the top of a 2d array +* +* @par Description: +* The top row of a 2d array is replicated for pad_size times at the top +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[in] src_strd +* integer source stride +* +* @param[in] wd +* integer width of the array +* +* @param[in] pad_size +* integer -padding size of the array +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void ih264_pad_top(UWORD8 *pu1_src, + WORD32 src_strd, + WORD32 wd, + WORD32 pad_size) +{ + WORD32 row; + + for(row = 1; row <= pad_size; row++) + { + memcpy(pu1_src - row * src_strd, pu1_src, wd); + } +} + + + +/** +******************************************************************************* +* +* @brief pad at the bottom of a 2d array +* +* @par Description: +* The bottom row of a 2d array is replicated for pad_size times at the bottom +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[in] src_strd +* integer source stride +* +* @param[in] wd +* integer width of the array +* +* @param[in] pad_size +* integer -padding size of the array +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void ih264_pad_bottom(UWORD8 *pu1_src, + WORD32 src_strd, + WORD32 wd, + WORD32 pad_size) +{ + WORD32 row; + + for(row = 1; row <= pad_size; row++) + { + memcpy(pu1_src + (row - 1) * src_strd, pu1_src - 1 * src_strd, wd); + } +} + +/** +******************************************************************************* +* +* @brief pad (luma block) at the left of a 2d array +* +* @par Description: +* The left column of a 2d array is replicated for pad_size times to the left +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[in] src_strd +* integer source stride +* +* @param[in] ht +* integer height of the array +* +* @param[in] pad_size +* integer -padding size of the array +* +* @returns none +* +* @remarks none +* +******************************************************************************* + */ +void ih264_pad_left_luma(UWORD8 *pu1_src, + WORD32 src_strd, + WORD32 ht, + WORD32 pad_size) +{ + WORD32 row; + + for(row = 0; row < ht; row++) + { + + memset(pu1_src - pad_size, *pu1_src, pad_size); + + pu1_src += src_strd; + } +} + +/** +******************************************************************************* +* +* @brief pad (chroma block) at the left of a 2d array +* +* @par Description: +* The left column of a 2d array is replicated for pad_size times to the left +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[in] src_strd +* integer source stride +* +* @param[in] ht +* integer height of the array +* +* @param[in] pad_size +* integer -padding size of the array +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void ih264_pad_left_chroma(UWORD8 *pu1_src, + WORD32 src_strd, + WORD32 ht, + WORD32 pad_size) +{ + /* temp var */ + WORD32 row, col; + UWORD16 u2_uv_val; + + /* pointer to src */ + UWORD16 *pu2_src = (UWORD16 *)pu1_src; + + src_strd >>= 1; + pad_size >>= 1; + + for(row = 0; row < ht; row++) + { + u2_uv_val = pu2_src[0]; + + for (col = -pad_size; col < 0; col++) + { + pu2_src[col] = u2_uv_val; + } + + pu2_src += src_strd; + } +} + +/** +******************************************************************************* +* +* @brief pad (luma block) at the right of a 2d array +* +* @par Description: +* The right column of a 2d array is replicated for pad_size times at the right +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[in] src_strd +* integer source stride +* +* @param[in] ht +* integer height of the array +* +* @param[in] pad_size +* integer -padding size of the array +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void ih264_pad_right_luma(UWORD8 *pu1_src, + WORD32 src_strd, + WORD32 ht, + WORD32 pad_size) +{ + WORD32 row; + + for(row = 0; row < ht; row++) + { + memset(pu1_src, *(pu1_src -1), pad_size); + + pu1_src += src_strd; + } +} + +/** +******************************************************************************* +* +* @brief pad (chroma block) at the right of a 2d array +* +* @par Description: +* The right column of a 2d array is replicated for pad_size times at the right +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[in] src_strd +* integer source stride +* +* @param[in] ht +* integer height of the array +* +* @param[in] pad_size +* integer -padding size of the array +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void ih264_pad_right_chroma(UWORD8 *pu1_src, + WORD32 src_strd, + WORD32 ht, + WORD32 pad_size) +{ + WORD32 row, col; + UWORD16 u2_uv_val; + UWORD16 *pu2_src = (UWORD16 *)pu1_src; + + src_strd >>= 1; + pad_size >>= 1; + + for(row = 0; row < ht; row++) + { + u2_uv_val = pu2_src[-1]; + + for (col = 0; col < pad_size; col++) + { + pu2_src[col] = u2_uv_val; + } + + pu2_src += src_strd; + } +} + diff --git a/common/ih264_padding.h b/common/ih264_padding.h new file mode 100755 index 0000000..e4e18fb --- /dev/null +++ b/common/ih264_padding.h @@ -0,0 +1,74 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +******************************************************************************* +* @file +* ih264_padding.h +* +* @brief +* Declarations for padding functions +* +* @author +* Ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ +#ifndef _IH264_PADDING_H_ +#define _IH264_PADDING_H_ + +/*****************************************************************************/ +/* Function Declarations */ +/*****************************************************************************/ + +typedef void ih264_pad(UWORD8 *, WORD32, WORD32, WORD32); + +/* C function declarations */ +ih264_pad ih264_pad_top; +ih264_pad ih264_pad_bottom; +ih264_pad ih264_pad_left_luma; +ih264_pad ih264_pad_left_chroma; +ih264_pad ih264_pad_right_luma; +ih264_pad ih264_pad_right_chroma; + +/* A9 Q function declarations */ +ih264_pad ih264_pad_top_a9q; +ih264_pad ih264_pad_left_luma_a9q; +ih264_pad ih264_pad_left_chroma_a9q; +ih264_pad ih264_pad_right_luma_a9q; +ih264_pad ih264_pad_right_chroma_a9q; + +/* AV8 function declarations */ +ih264_pad ih264_pad_top_av8; +ih264_pad ih264_pad_left_luma_av8; +ih264_pad ih264_pad_left_chroma_av8; +ih264_pad ih264_pad_right_luma_av8; +ih264_pad ih264_pad_right_chroma_av8; + + +ih264_pad ih264_pad_left_luma_ssse3; +ih264_pad ih264_pad_left_chroma_ssse3; +ih264_pad ih264_pad_right_luma_ssse3; +ih264_pad ih264_pad_right_chroma_ssse3; + +#endif /*_IH264_PADDING_H_*/ diff --git a/common/ih264_resi_trans.h b/common/ih264_resi_trans.h new file mode 100755 index 0000000..ee0add3 --- /dev/null +++ b/common/ih264_resi_trans.h @@ -0,0 +1,70 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ih264_resi_trans.h +* +* @brief +* Functions declarations for residue and forward transform +* +* @par List of Functions: +* - ih264_resi_trans_ft +* - ih264_resi_trans_4x4 +* - ih264_resi_trans_4x4 +* - ih264_resi_trans_4x4_a9 +* - ih264_resi_trans_4x4_a9 +* +* @author +* Ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef IH264_RESI_TRANS_H_ +#define IH264_RESI_TRANS_H_ + +/*****************************************************************************/ +/* Extern Function Declarations */ +/*****************************************************************************/ + +typedef void ih264_resi_trans_ft(UWORD8 *pu1_src, + UWORD8 *pu1_pred, + WORD32 *pi4_out, + WORD32 src_strd, + WORD32 pred_strd, + WORD32 out_strd); + +/*C functions*/ + +ih264_resi_trans_ft ih264_resi_trans_4x4; + +ih264_resi_trans_ft ih264_resi_trans_8x8; + +/*A9 functions*/ + +ih264_resi_trans_ft ih264_resi_trans_4x4_a9; + +ih264_resi_trans_ft ih264_resi_trans_8x8_a9; + +#endif /* IH264_RESI_TRANS_H_ */ diff --git a/common/ih264_resi_trans_quant.c b/common/ih264_resi_trans_quant.c new file mode 100755 index 0000000..cf1d43c --- /dev/null +++ b/common/ih264_resi_trans_quant.c @@ -0,0 +1,814 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** + ******************************************************************************* + * @file + * ih264_resi_trans_quant.c + * + * @brief + * Contains function definitions single stage forward transform for H.264 + * It will calculate the residue, do the cf and then do quantization + * + * @author + * Ittiam + * + * @par List of Functions: + * - ih264_resi_trans_quant_4x4() + * - ih264_resi_trans_quant_chroma_4x4 + * - ih264_hadamard_quant_4x4 + * - ih264_hadamard_quant_2x2_uv + * - ih264_resi_trans_quant_8x8 + * + * @remarks + ******************************************************************************* + */ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include <stddef.h> + +/* User include files */ +#include "ih264_typedefs.h" +#include "ih264_defs.h" +#include "ih264_size_defs.h" +#include "ih264_macros.h" +#include "ih264_trans_macros.h" +#include "ih264_trans_data.h" +#include "ih264_structs.h" +#include "ih264_trans_quant_itrans_iquant.h" + +/** + ******************************************************************************* + * + * @brief + * This function performs forward transform and quantization on a 4*4 block + * + * @par Description: + * The function accepts source buffer and estimation buffer. From these, it + * computes the residue. This is residue is then transformed and quantized. + * The transform and quantization are in placed computed. They use the residue + * buffer for this. + * + * @param[in] pu1_src + * Pointer to source sub-block + * + * @param[in] pu1_pred + * Pointer to prediction sub-block + * + * @param[in] pi2_out + * Pointer to residual sub-block + * + * @param[in] src_strd + * Source stride + * + * @param[in] pred_strd + * Prediction stride + * + * @param[in] dst_strd + * Destination stride + * + * @param[in] u4_qbits + * QP_BITS_h264_4x4 + floor(QP/6) + * + * @param[in] pu2_threshold_matrix + * Pointer to Forward Quant Threshold Matrix + * + * @param[in] pu2_scale_matrix + * Pointer to Forward Quant Scale Matrix + * + * @param[in] u4_round_factor + * Quantization Round factor + * + * @param[out] pu1_nnz + * Total non-zero coefficients in the current sub-block + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +void ih264_resi_trans_quant_4x4(UWORD8 *pu1_src, + UWORD8 *pu1_pred, + WORD16 *pi2_out, + WORD32 src_strd, + WORD32 pred_strd, + const UWORD16 *pu2_scale_matrix, + const UWORD16 *pu2_threshold_matrix, + UWORD32 u4_qbits, + UWORD32 u4_round_factor, + UWORD8 *pu1_nnz, + WORD16 *pi2_alt_dc_addr) +{ + UWORD32 i; + WORD32 x0, x1, x2, x3, x4, x5, x6, x7; + WORD32 i4_value, i4_sign; + UWORD32 u4_abs_value; + WORD16 *pi2_out_tmp = pi2_out; + UWORD32 u4_nonzero_coeff = 0; + + for (i = 0; i < SUB_BLK_WIDTH_4x4; i++) + { + /* computing prediction error (residue) */ + x4 = pu1_src[0] - pu1_pred[0]; + x5 = pu1_src[1] - pu1_pred[1]; + x6 = pu1_src[2] - pu1_pred[2]; + x7 = pu1_src[3] - pu1_pred[3]; + + /* Horizontal transform */ + x0 = x4 + x7; + x1 = x5 + x6; + x2 = x5 - x6; + x3 = x4 - x7; + + pi2_out_tmp[0] = x0 + x1; + pi2_out_tmp[1] = (x3 <<1) + x2; + pi2_out_tmp[2] = x0 - x1; + pi2_out_tmp[3] = x3 - (x2<<1); + + /* pointing to next row; */ + pu1_src += src_strd; + pu1_pred += pred_strd; + pi2_out_tmp += 4; + + } + pi2_out_tmp = pi2_out; + for (i = 0; i < SUB_BLK_WIDTH_4x4; i++) + { + + /* Vertical transform and quantization */ + x4 = pi2_out_tmp[0]; + x5 = pi2_out_tmp[4]; + x6 = pi2_out_tmp[8]; + x7 = pi2_out_tmp[12]; + + + x0 = x4 + x7; + x1 = x5 + x6; + x2 = x5 - x6; + x3 = x4 - x7; + + /* quantization is done in place */ + + i4_value = x0 + x1; + + if(i==0) + { + (*pi2_alt_dc_addr) = i4_value; + } + + FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits, u4_nonzero_coeff); + pi2_out_tmp[0] = i4_value; + + + i4_value = (x3 << 1) + x2; + FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[4], pu2_scale_matrix[4], u4_round_factor, u4_qbits, u4_nonzero_coeff); + pi2_out_tmp[4] = i4_value; + + + i4_value = x0 - x1; + FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[8], pu2_scale_matrix[8], u4_round_factor, u4_qbits, u4_nonzero_coeff); + pi2_out_tmp[8] = i4_value; + + + i4_value = x3 - (x2 << 1); + FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[12], pu2_scale_matrix[12], u4_round_factor, u4_qbits, u4_nonzero_coeff); + pi2_out_tmp[12] = i4_value; + + pi2_out_tmp ++; + pu2_scale_matrix++; + pu2_threshold_matrix++; + } + + /* Return total nonzero coefficients in the current sub block */ + *pu1_nnz = u4_nonzero_coeff; +} +/** + ******************************************************************************* + * + * @brief + * This function performs forward transform and quantization on a 4*4 chroma block + * with interleaved values + * + * @par Description: + * The function accepts source buffer and estimation buffer. From these, it + * computes the residue. This is residue is then transformed and quantized. + * The transform and quantization are in placed computed. They use the residue + * buffer for this. + * + * @param[in] pu1_src + * Pointer to source sub-block + * + * @param[in] pu1_pred + * Pointer to prediction sub-block + * + * @param[in] pi2_out + * Pointer to residual sub-block + * + * @param[in] src_strd + * Source stride + * + * @param[in] pred_strd + * Prediction stride + * + * @param[in] dst_strd + * Destination stride + * + * @param[in] u4_qbits + * QP_BITS_h264_4x4 + floor(QP/6) + * + * @param[in] pu2_threshold_matrix + * Pointer to Forward Quant Threshold Matrix + * + * @param[in] pu2_scale_matrix + * Pointer to Forward Quant Scale Matrix + * + * @param[in] u4_round_factor + * Quantization Round factor + * + * @param[out] pu1_nnz + * Total non-zero coefficients in the current sub-block + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +void ih264_resi_trans_quant_chroma_4x4(UWORD8 *pu1_src, + UWORD8 *pu1_pred, + WORD16 *pi2_out, + WORD32 src_strd, + WORD32 pred_strd, + const UWORD16 *pu2_scale_matrix, + const UWORD16 *pu2_threshold_matrix, + UWORD32 u4_qbits, + UWORD32 u4_round_factor, + UWORD8 *pu1_nnz, + WORD16 *pu1_dc_alt_addr) +{ + UWORD32 i; + WORD32 x0, x1, x2, x3, x4, x5, x6, x7; + WORD32 i4_value, i4_sign; + UWORD32 u4_abs_value; + WORD16 *pi2_out_tmp = pi2_out; + UWORD32 u4_nonzero_coeff = 0; + + for (i = 0; i < SUB_BLK_WIDTH_4x4; i++) + { + /* computing prediction error (residue) */ + x4 = pu1_src[0] - pu1_pred[0]; + x5 = pu1_src[2] - pu1_pred[2]; + x6 = pu1_src[4] - pu1_pred[4]; + x7 = pu1_src[6] - pu1_pred[6]; + + /* Horizontal transform */ + x0 = x4 + x7; + x1 = x5 + x6; + x2 = x5 - x6; + x3 = x4 - x7; + + pi2_out_tmp[0] = x0 + x1; + pi2_out_tmp[1] = (x3 <<1) + x2; + pi2_out_tmp[2] = x0 - x1; + pi2_out_tmp[3] = x3 - (x2<<1); + + /* pointing to next row; */ + pu1_src += src_strd; + pu1_pred += pred_strd; + pi2_out_tmp += 4; + + } + pi2_out_tmp = pi2_out; + for (i = 0; i < SUB_BLK_WIDTH_4x4; i++) + { + + /* Vertical transform and quantization */ + x4 = pi2_out_tmp[0]; + x5 = pi2_out_tmp[4]; + x6 = pi2_out_tmp[8]; + x7 = pi2_out_tmp[12]; + + + x0 = x4 + x7; + x1 = x5 + x6; + x2 = x5 - x6; + x3 = x4 - x7; + + /* quantization is done in place */ + + i4_value = x0 + x1; + + if(i==0) + { + *pu1_dc_alt_addr = i4_value; + } + + FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[0], + pu2_scale_matrix[0], u4_round_factor, u4_qbits, + u4_nonzero_coeff); + pi2_out_tmp[0] = i4_value; + + i4_value = (x3 << 1) + x2; + FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[4], + pu2_scale_matrix[4], u4_round_factor, u4_qbits, + u4_nonzero_coeff); + pi2_out_tmp[4] = i4_value; + + i4_value = x0 - x1; + FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[8], + pu2_scale_matrix[8], u4_round_factor, u4_qbits, + u4_nonzero_coeff); + pi2_out_tmp[8] = i4_value; + + i4_value = x3 - (x2 << 1); + FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[12], + pu2_scale_matrix[12], u4_round_factor, u4_qbits, + u4_nonzero_coeff); + pi2_out_tmp[12] = i4_value; + + pi2_out_tmp ++; + pu2_scale_matrix++; + pu2_threshold_matrix++; + } + + /* Return total nonzero coefficients in the current sub block */ + *pu1_nnz = u4_nonzero_coeff; +} + +/** + ******************************************************************************* + * + * @brief + * This function performs forward hadamard transform and quantization on a 4*4 block + * + * @par Description: + * The function accepts source buffer and estimation buffer. From these, it + * computes the residue. This is residue is then transformed and quantized. + * The transform and quantization are in placed computed. They use the residue + * buffer for this. + * + * @param[in] pu1_src + * Pointer to source sub-block + * + * @param[in] pu1_pred + * Pointer to prediction sub-block + * + * @param[in] pi2_out + * Pointer to residual sub-block + * + * @param[in] src_strd + * Source stride + * + * @param[in] pred_strd + * Prediction stride + * + * @param[in] dst_strd + * Destination stride + * + * @param[in] u4_qbits + * QP_BITS_h264_4x4 + floor(QP/6) + * + * @param[in] pu2_threshold_matrix + * Pointer to Forward Quant Threshold Matrix + * + * @param[in] pu2_scale_matrix + * Pointer to Forward Quant Scale Matrix + * + * @param[in] u4_round_factor + * Quantization Round factor + * + * @param[out] pu1_nnz + * Total non-zero coefficients in the current sub-block + * + * @returns + * + * @remarks + * None + * + */ + +void ih264_hadamard_quant_4x4(WORD16 *pi2_src, + WORD16 *pi2_dst, + const UWORD16 *pu2_scale_matrix, + const UWORD16 *pu2_threshold_matrix, + UWORD32 u4_qbits, + UWORD32 u4_round_factor, + UWORD8 *pu1_nnz) +{ + WORD32 i; + WORD32 x0,x1,x2,x3,x4,x5,x6,x7,i4_value; + UWORD32 u4_abs_value; + WORD32 i4_sign; + + *pu1_nnz = 0; + + for (i = 0; i < SUB_BLK_WIDTH_4x4; i++) + { + x4 = pi2_src[0]; + x5 = pi2_src[1]; + x6 = pi2_src[2]; + x7 = pi2_src[3]; + + x0 = x4 + x7; + x1 = x5 + x6; + x2 = x5 - x6; + x3 = x4 - x7; + + pi2_dst[0] = x0 + x1; + pi2_dst[1] = x3 + x2; + pi2_dst[2] = x0 - x1; + pi2_dst[3] = x3 - x2; + + pi2_src += 4; + pi2_dst += 4; + } + + /* Vertical transform and quantization */ + pi2_dst -= SUB_BLK_WIDTH_4x4<<2; + + for (i = 0; i < SUB_BLK_WIDTH_4x4; i++) + { + x4 = pi2_dst[0]; + x5 = pi2_dst[4]; + x6 = pi2_dst[8]; + x7 = pi2_dst[12] ; + + x0 = x4 + x7; + x1 = x5 + x6; + x2 = x5 - x6; + x3 = x4 - x7; + + + i4_value = (x0 + x1) >> 1; + FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[0], + pu2_scale_matrix[0], u4_round_factor, u4_qbits, pu1_nnz[0]); + pi2_dst[0] = i4_value; + + i4_value = (x3 + x2) >> 1; + FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[0], + pu2_scale_matrix[0], u4_round_factor, u4_qbits, pu1_nnz[0]); + pi2_dst[4] = i4_value; + + i4_value = (x0 - x1) >> 1; + FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[0], + pu2_scale_matrix[0], u4_round_factor, u4_qbits, pu1_nnz[0]); + pi2_dst[8] = i4_value; + + i4_value = (x3 - x2) >> 1; + FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[0], + pu2_scale_matrix[0], u4_round_factor, u4_qbits, pu1_nnz[0]); + pi2_dst[12] = i4_value; + + pi2_dst ++; + } +} + +/** + ******************************************************************************* + * + * @brief + * This function performs forward hadamard transform and quantization on a 2*2 block + * for both U and V planes + * + * @par Description: + * The function accepts source buffer and estimation buffer. From these, it + * computes the residue. This is residue is then transformed and quantized. + * The transform and quantization are in placed computed. They use the residue + * buffer for this. + * + * @param[in] pu1_src + * Pointer to source sub-block + * + * @param[in] pu1_pred + * Pointer to prediction sub-block + * + * @param[in] pi2_out + * Pointer to residual sub-block + * + * @param[in] src_strd + * Source stride + * + * @param[in] pred_strd + * Prediction stride + * + * @param[in] dst_strd + * Destination stride + * + * @param[in] u4_qbits + * QP_BITS_h264_4x4 + floor(QP/6) + * + * @param[in] pu2_threshold_matrix + * Pointer to Forward Quant Threshold Matrix + * + * @param[in] pu2_scale_matrix + * Pointer to Forward Quant Scale Matrix + * + * @param[in] u4_round_factor + * Quantization Round factor + * + * @param[out] pu1_nnz + * Total non-zero coefficients in the current sub-block + * + * @returns + * + * @remarks + * NNZ for dc is populated at 0 and 5th position of pu1_nnz + * + */ + +void ih264_hadamard_quant_2x2_uv(WORD16 *pi2_src, + WORD16 *pi2_dst, + const UWORD16 *pu2_scale_matrix, + const UWORD16 *pu2_threshold_matrix, + UWORD32 u4_qbits, + UWORD32 u4_round_factor, + UWORD8 *pu1_nnz) +{ + WORD32 x0, x1, x2, x3, x4, x5, x6, x7; + WORD32 i4_value, i4_sign, plane; + UWORD32 u4_abs_value; + + for(plane = 0; plane < 2; plane++) + { + pu1_nnz[plane] = 0; + + /* Horizontal transform */ + x4 = pi2_src[0]; + x5 = pi2_src[1]; + x6 = pi2_src[2]; + x7 = pi2_src[3]; + + x0 = x4 + x5; + x1 = x4 - x5; + x2 = x6 + x7; + x3 = x6 - x7; + + /* Vertical transform and quantization */ + i4_value = (x0 + x2); + FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[0], + pu2_scale_matrix[0], u4_round_factor, u4_qbits, + pu1_nnz[plane]); + pi2_dst[0] = i4_value; + + i4_value = (x0 - x2); + FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[0], + pu2_scale_matrix[0], u4_round_factor, u4_qbits, + pu1_nnz[plane]); + pi2_dst[2] = i4_value; + + i4_value = (x1 - x3); + FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[0], + pu2_scale_matrix[0], u4_round_factor, u4_qbits, + pu1_nnz[plane]); + pi2_dst[3] = i4_value; + + i4_value = (x1 + x3); + FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[0], + pu2_scale_matrix[0], u4_round_factor, u4_qbits, + pu1_nnz[plane]); + pi2_dst[1] = i4_value; + + pi2_dst += 4; + pi2_src += 4; + + } +} + +/* + ******************************************************************************* + * + * @brief + * This function performs Single stage forward transform CF8 and quantization on 8*8 blocks + * for h.264 + * + * @par Description: + * Performs single stage 8x8 forward transform CF8 after calculating the residue + * The result is then quantized + * + * @param[in] pu1_src + * Input 8x8 pixels + * + * @param[in] pu1_pred + * Input 8x8 pixels + * + * @param[in] pi1_out + * Output 8x8 pixels + * + * @param[in] u4_thresh + * Threshold under which the coeffs are not quantized + * + * @param[in] u4_qp_div + * QP/6 + * + * @param[in] u4_qp_rem + * QP%6 + * + * @param[in] u2_src_stride + * Source stride + * + * @param[in] pred_strd + * stride for prediciton buffer + * + * @param[in] dst_strd + * stride for destination buffer + * + * @param[in] pu4_quant_mat + * Pointer to the 4x4 quantization matrix + * + * @returns Void + * + * + ******************************************************************************* + */ +void ih264_resi_trans_quant_8x8(UWORD8 *pu1_src, + UWORD8 *pu1_pred, + WORD16 *pi2_out, + WORD32 src_strd, + WORD32 pred_strd, + const UWORD16 *pu2_scale_matrix, + const UWORD16 *pu2_threshold_matrix, + UWORD32 u4_qbits, + UWORD32 u4_round_factor, + UWORD8 *pu1_nnz, + WORD16 *pu1_dc_alt_addr) + +{ + WORD16 *pi2_out_tmp = pi2_out; + UWORD32 i; + WORD32 a0, a1, a2, a3, a4, a5, a6, a7; + WORD32 r0, r1, r2, r3, r4, r5, r6, r7; + WORD32 i4_sign; + UWORD32 u4_abs_value; + UWORD32 u4_nonzero_coeff = 0; + + UNUSED(pu1_dc_alt_addr); + + /*Horizontal transform */ + /* we are going to use the a's and r's in a twisted way since */ + /*i dont want to declare more variables */ + for(i = 0; i < SUB_BLK_WIDTH_8x8; ++i) + { + r0 = pu1_src[0]; + r0 -= pu1_pred[0]; + r1 = pu1_src[1]; + r1 -= pu1_pred[1]; + r2 = pu1_src[2];r2 -= pu1_pred[2]; + r3 = pu1_src[3];r3 -= pu1_pred[3]; + r4 = pu1_src[4];r4 -= pu1_pred[4]; + r5 = pu1_src[5];r5 -= pu1_pred[5]; + r6 = pu1_src[6];r6 -= pu1_pred[6]; + r7 = pu1_src[7];r7 -= pu1_pred[7]; + + + a0 = r0 + r7; + a1 = r1 + r6; + a2 = r2 + r5; + a3 = r3 + r4; + + a4 = a0 + a3; + a5 = a1 + a2; + a6 = a0 - a3; + a7 = a1 - a2; + + pi2_out_tmp[0] = a4 + a5; + + pi2_out_tmp[2] = a6 + (a7>>1); + pi2_out_tmp[4] = a4 - a5; + pi2_out_tmp[6] = (a6>>1) - a7; + + a0 = r0 - r7; + a1 = r1 - r6; + a2 = r2 - r5; + a3 = r3 - r4; + + a4 = a1 + a2 + ((a0>>1) + a0); + a5 = a0 - a3 - ((a2>>1) + a2); + a6 = a0 + a3 - ((a1>>1) + a1); + a7 = a1 - a2 + ((a3>>1) + a3); + + pi2_out_tmp[1] = a4 + (a7>>2); + pi2_out_tmp[3] = a5 + (a6>>2); + pi2_out_tmp[5] = a6 - (a5>>2); + pi2_out_tmp[7] = (a4>>2) - a7; + + pu1_src += src_strd; + pu1_pred += pred_strd; + pi2_out_tmp += 8; + } + + /*vertical transform and quant */ + + pi2_out_tmp = pi2_out; + + for (i = 0; i < SUB_BLK_WIDTH_8x8; ++i) + { + + r0 = pi2_out_tmp[0]; + r1 = pi2_out_tmp[8]; + r2 = pi2_out_tmp[16]; + r3 = pi2_out_tmp[24]; + r4 = pi2_out_tmp[32]; + r5 = pi2_out_tmp[40]; + r6 = pi2_out_tmp[48]; + r7 = pi2_out_tmp[56]; + + a0 = r0 + r7; + a1 = r1 + r6; + a2 = r2 + r5; + a3 = r3 + r4; + + a4 = a0 + a3; + a5 = a1 + a2; + a6 = a0 - a3; + a7 = a1 - a2; + + a0 = r0 - r7; + a1 = r1 - r6; + a2 = r2 - r5; + a3 = r3 - r4; + + r0 = a4 + a5; + r2 = a6 + (a7>>1); + r4 = a4 - a5; + r6 = (a6>>1) - a7; + + a4 = a1 + a2 + ((a0>>1) + a0); + a5 = a0 - a3 - ((a2>>1) + a2); + a6 = a0 + a3 - ((a1>>1) + a1); + a7 = a1 - a2 + ((a3>>1) + a3); + + r1 = a4 + (a7>>2); + r3 = a5 + (a6>>2); + r5 = a6 - (a5>>2); + r7 = (a4>>2) - a7; + + FWD_QUANT(r0, u4_abs_value, i4_sign, pu2_threshold_matrix[0], + pu2_scale_matrix[0], u4_round_factor, u4_qbits, + u4_nonzero_coeff); + pi2_out_tmp[0] = r0; + + FWD_QUANT(r1, u4_abs_value, i4_sign, pu2_threshold_matrix[8], + pu2_scale_matrix[8], u4_round_factor, u4_qbits, + u4_nonzero_coeff); + pi2_out_tmp[8] = r1; + + FWD_QUANT(r2, u4_abs_value, i4_sign, pu2_threshold_matrix[16], + pu2_scale_matrix[16], u4_round_factor, u4_qbits, + u4_nonzero_coeff); + pi2_out_tmp[16] = r2; + + FWD_QUANT(r3, u4_abs_value, i4_sign, pu2_threshold_matrix[24], + pu2_scale_matrix[24], u4_round_factor, u4_qbits, + u4_nonzero_coeff); + pi2_out_tmp[24] = r3; + + FWD_QUANT(r4, u4_abs_value, i4_sign, pu2_threshold_matrix[32], + pu2_scale_matrix[32], u4_round_factor, u4_qbits, + u4_nonzero_coeff); + pi2_out_tmp[32] = r4; + + FWD_QUANT(r5, u4_abs_value, i4_sign, pu2_threshold_matrix[40], + pu2_scale_matrix[40], u4_round_factor, u4_qbits, + u4_nonzero_coeff); + pi2_out_tmp[40] = r5; + + FWD_QUANT(r6, u4_abs_value, i4_sign, pu2_threshold_matrix[48], + pu2_scale_matrix[48], u4_round_factor, u4_qbits, + u4_nonzero_coeff); + pi2_out_tmp[48] = r6; + + FWD_QUANT(r7, u4_abs_value, i4_sign, pu2_threshold_matrix[56], + pu2_scale_matrix[56], u4_round_factor, u4_qbits, + u4_nonzero_coeff); + pi2_out_tmp[56] = r7; + + pi2_out_tmp++; + pu2_scale_matrix++; + pu2_threshold_matrix++; + } + /* Return total nonzero coefficients in the current sub block */ + *pu1_nnz = u4_nonzero_coeff; +} diff --git a/common/ih264_size_defs.h b/common/ih264_size_defs.h new file mode 100755 index 0000000..e2a8b76 --- /dev/null +++ b/common/ih264_size_defs.h @@ -0,0 +1,85 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** + ******************************************************************************* + * @file + * ih264_size_defs.h + * + * @brief + * Contains declaration of global variables for H264 transform , quant and inverse quant + * + * @author + * Ittiam + * + * @remarks + * + ********************************************************************************/ + +#ifndef IH264_SIZE_DEFS_H_ +#define IH264_SIZE_DEFS_H_ + +/*****************************************************************************/ +/* Constant Macros */ +/*****************************************************************************/ + +/*-----------------------Primary defs--------------------------*/ + +/*Width of a 4x4 block*/ +#define SUB_BLK_WIDTH_4x4 4 + +/*Width of an 8x8 block*/ +#define SUB_BLK_WIDTH_8x8 8 + +/*Number of chroma blocks in a row of coffs*/ +#define SUB_BLK_COUNT_CHROMA_4x4_420 2 + +/*Number of luma blocks in a row of coffs*/ +#define SUB_BLK_COUNT_LUMA_4x4 4 + +/*Numbr of chroma planes*/ +#define NUM_CHROMA_PLANES 2 + +/*Constant bit shifts*/ +#define QP_BITS_h264_4x4 15 +#define QP_BITS_h264_8x8 16 + + +/*---------------------------Derived defs------------------------*/ + +/*Number of coefficients ina 4x4 block*/ +#define COFF_CNT_SUB_BLK_4x4 SUB_BLK_WIDTH_4x4*SUB_BLK_WIDTH_4x4; + +/*Number of luma blocks in a row of coffs*/ +#define SUB_BLK_LUMA_4X4_CNT_MB SUB_BLK_COUNT_LUMA_4x4 * SUB_BLK_COUNT_LUMA_4x4 + +/*Number of chroma coffs in an MB*/ +#define SUB_BLK_CHROMA_4X4_CNT_MB SUB_BLK_COUNT_CHROMA_4x4_420 * SUB_BLK_COUNT_CHROMA_4x4_420 +#define SUB_BLK_CHROMA_4X4_CNT_MB_BIPLANE SUB_BLK_CHROMA_4X4_CNT_MB*NUM_CHROMA_PLANES + +/*Size of trans buff = 4x4 for DC block + 4x4 * coffs for 4x4 ac blocks*/ +#define SIZE_TRANS_BUFF (SUB_BLK_WIDTH_4x4*SUB_BLK_WIDTH_4x4*+ \ + SUB_BLK_WIDTH_4x4*SUB_BLK_WIDTH_4x4* \ + SUB_BLK_COUNT_LUMA_4x4*SUB_BLK_COUNT_LUMA_4x4) + +/*memory size = memory size of 4x4 block of resi coff + 4x4 for DC coff block */ +#define SIZE_TMP_BUFF_ITRANS ((SUB_BLK_WIDTH_4x4*SUB_BLK_WIDTH_4x4) +\ + (SUB_BLK_WIDTH_4x4*SUB_BLK_WIDTH_4x4)) + +#endif /* IH264_DEFS_H_ */ diff --git a/common/ih264_structs.h b/common/ih264_structs.h new file mode 100755 index 0000000..fa4e142 --- /dev/null +++ b/common/ih264_structs.h @@ -0,0 +1,1722 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** + ******************************************************************************* + * @file + * ih264_structs.h + * + * @brief + * Structure definitions used in the code + * + * @author + * Ittiam + * + * @par List of Functions: + * + * @remarks + * None + * + ******************************************************************************* + */ + +#ifndef _IH264_STRUCTS_H_ +#define _IH264_STRUCTS_H_ + +/** MB Type info for Intra MBs */ +typedef struct +{ + UWORD32 u4_num_mbpart; + MBPART_PREDMODE_T e_mbpart_predmode; + MBMODES_I16x16 e_intra_predmode; + UWORD32 u4_cpb_chroma; + UWORD32 u4_cpb_luma; +}intra_mbtype_info_t; + +/** MB Type info for Inter MBs */ +typedef struct +{ + UWORD32 u4_num_mbpart; + MBPART_PREDMODE_T e_mbpart_predmode_0; + MBPART_PREDMODE_T e_mbpart_predmode_1; + UWORD32 u4_mbpart_wd; + UWORD32 u4_mbpart_ht; +}inter_mbtype_info_t; + + +/** Sub MB Type info for Inter MBs */ +typedef struct +{ + UWORD32 u4_num_mbpart; + MBPART_PREDMODE_T e_mbpart_predmode; + UWORD32 u4_mbpart_wd; + UWORD32 u4_mbpart_ht; +}submbtype_info_t; + +/** + * Picture buffer + */ +typedef struct +{ + UWORD8* pu1_luma; + UWORD8* pu1_chroma; + + WORD32 i4_abs_poc; + WORD32 i4_poc_lsb; + + + /** Lower 32 bit of time stamp */ + UWORD32 u4_timestamp_low; + + /** Upper 32 bit of time stamp */ + UWORD32 u4_timestamp_high; + + WORD32 i4_used_as_ref; + + /** + * frame_num in the slice header + */ + WORD32 i4_frame_num; + + /** + * Long-term frame idx + * TODO: store in frame_num + */ + WORD32 i4_long_term_frame_idx; + + /* + * 0: Top Field + * 1: Bottom Field + */ + WORD8 i1_field_type; + + /** + * buffer ID from frame buffer manager + */ + WORD32 i4_buf_id; + +} pic_buf_t; + + +/** + * Reference List + */ +typedef struct +{ + void *pv_pic_buf; + + void *pv_mv_buf; + +} ref_list_t; + + +/** + * Motion vector + */ +typedef struct +{ + /** + * Horizontal Motion Vector + */ + WORD16 i2_mvx; + + /** + * Vertical Motion Vector + */ + WORD16 i2_mvy; +} mv_t; + +/*****************************************************************************/ +/* Following results in packed 48 bit structure. If mv_t included */ +/* ref_pic_buf_id, then 8 bits will be wasted for each mv for aligning. */ +/* Also using mv_t as elements directly instead of a pointer to l0 and l1 */ +/* mvs. Since pointer takes 4 bytes and MV itself is 4 bytes. It does not */ +/* really help using pointers. */ +/*****************************************************************************/ + +/** + * PU Motion Vector info + */ +typedef struct +{ + /** + * L0 Motion Vector + */ + mv_t s_l0_mv; + + /** + * L1 Motion Vector + */ + mv_t s_l1_mv; + + /** + * L0 Ref index + */ + WORD8 i1_l0_ref_idx; + + /** + * L1 Ref index + */ + WORD8 i1_l1_ref_idx; + + /** + * L0 Ref Pic Buf ID + */ + WORD8 i1_l0_ref_pic_buf_id; + + /** + * L1 Ref Pic Buf ID + */ + WORD8 i1_l1_ref_pic_buf_id; + +} pu_mv_t; + +/** + * PU information + */ +typedef struct +{ + + /** + * Motion Vectors + */ + pu_mv_t s_mv; + + /** + * PU X position in terms of min PU (4x4) units + */ + UWORD32 b2_pos_x : 2; + + /** + * PU Y position in terms of min PU (4x4) units + */ + UWORD32 b2_pos_y : 2; + + /** + * PU width in pixels = (b2_wd + 1) << 2 + */ + UWORD32 b2_wd : 2; + + /** + * PU height in pixels = (b2_ht + 1) << 2 + */ + UWORD32 b2_ht : 2; + + /** + * Intra or Inter flag for each partition - 0 or 1 + */ + UWORD32 b1_intra_flag : 1; + + /** + * PRED_L0, PRED_L1, PRED_BI + */ + UWORD32 b2_pred_mode : 2; + +} pu_t; + + +/** + * MB information to be stored for entire frame + */ +typedef struct +{ + /** + * Transform sizes 0: 4x4, 1: 8x8, + */ + UWORD32 b1_trans_size : 1; + + /** + * CBP - 4 bits for Y, 1 for U and 1 for V + */ + UWORD32 b6_cbp: 6; + + /** + * Intra pred sizes 0: 4x4, 1: 8x8, 2: 16x16 + */ + UWORD32 b2_intra_pred_size : 2; + + /** + * Flag to signal if the current MB is IPCM + */ + UWORD32 b1_ipcm : 1; + +}mb_t; + +/*****************************************************************************/ +/* Info from last TU row of MB is stored in a row level neighbour buffer */ +/* , which will be used for Boundary Strength computation */ +/*****************************************************************************/ +/** + * MB neighbor info + */ +typedef struct +{ + /** + * Slice index of the mb + */ + UWORD16 u2_slice_idx; + + /*************************************************************************/ + /* CBF of bottom TU row (replicated in 4 pixel boundary) */ + /* MSB contains CBF of first TU in the last row and LSB contains CBF */ + /* of last TU in the last row */ + /*************************************************************************/ + /** + * CBF of bottom TU row + */ + UWORD16 u2_packed_cbf; + + /*************************************************************************/ + /* QP of bottom TU row (replicated at 8 pixel boundary (Since QP can */ + /* not change at less than min CU granularity) */ + /*************************************************************************/ + /** + * QP of bottom TU row + */ + UWORD8 u1_qp; + +} mb_top_ny_info_t; + +/** + * MB level context + */ +typedef struct _mb_ctxt_t +{ + /*************************************************************************/ + /* Tile boundary can be detected by looking at tile start x and tile */ + /* start y. And based on the tile, slice and frame boundary the */ + /* following will be initialized. */ + /*************************************************************************/ + /** + * Pointer to left MB + */ + /* If not available, this will be set to NULL */ + struct _mb_ctxt_t *ps_mb_left; + + /** + * Pointer to top-left MB + */ + /* If not available, this will be set to NULL */ + mb_top_ny_info_t *ps_mb_ny_topleft; + + /** + * Pointer to top MB + */ + /* If not available, this will be set to NULL */ + mb_top_ny_info_t *ps_mb_ny_top; + + /** + * Pointer to top-right MB + */ + /* If not available, this will be set to NULL */ + mb_top_ny_info_t *ps_mb_ny_topright; + + /*************************************************************************/ + /* Pointer to PU data. */ + /* This points to a MV Bank stored at frame level. Though this */ + /* pointer can be derived by reading offset at frame level, it is */ + /* stored here for faster access. Can be removed if storage of MB */ + /* structure is critical */ + /*************************************************************************/ + /** + * Pointer to PU data + */ + pu_t *ps_pu; + + /*************************************************************************/ + /* Pointer to a PU map stored at frame level, */ + /* Though this pointer can be derived by multiplying MB address with */ + /* number of minTUs in a MB, it is stored here for faster access. */ + /* Can be removed if storage of MB structure is critical */ + /*************************************************************************/ + /** + * Pointer to a PU map stored at frame level + */ + UWORD8 *pu1_pu_map; + + /** + * Number of TUs filled in as_tu + */ + /*************************************************************************/ + /* Having the first entry as 32 bit data, helps in keeping each of */ + /* the structures aligned to 32 bits at MB level */ + /*************************************************************************/ + WORD32 i4_tu_cnt; + + /** + * Pointer to transform coeff data + */ + /*************************************************************************/ + /* Following format is repeated for every coded TU */ + /* Luma Block */ + /* num_coeffs : 16 bits */ + /* zero_cols : 8 bits ( 1 bit per 4 columns) */ + /* sig_coeff_map : ((TU Size * TU Size) + 31) >> 5 number of WORD32s */ + /* coeff_data : Non zero coefficients */ + /* Cb Block (only for last TU in 4x4 case else for every luma TU) */ + /* num_coeffs : 16 bits */ + /* zero_cols : 8 bits ( 1 bit per 4 columns) */ + /* sig_coeff_map : ((TU Size * TU Size) + 31) >> 5 number of WORD32s */ + /* coeff_data : Non zero coefficients */ + /* Cr Block (only for last TU in 4x4 case else for every luma TU) */ + /* num_coeffs : 16 bits */ + /* zero_cols : 8 bits ( 1 bit per 4 columns) */ + /* sig_coeff_map : ((TU Size * TU Size) + 31) >> 5 number of WORD32s */ + /* coeff_data : Non zero coefficients */ + /*************************************************************************/ + void *pv_coeff_data; + + /** + * Slice to which the MB belongs to + */ + WORD32 i4_slice_idx; + + /** + * MB column position + */ + WORD32 i4_pos_x; + + /** + * MB row position + */ + WORD32 i4_pos_y; + + /** + * Number of PUs filled in ps_pu + */ + WORD32 i4_pu_cnt; + + /** + * Index of current PU being processed in ps_pu + */ + /* Scratch variable set to 0 at the start of any PU processing function */ + WORD32 i4_pu_idx; + + /** + * Vertical Boundary strength + */ + /* Two bits per edge. + Stored in format. BS[15] | BS[14] | .. |BS[0]*/ + UWORD32 *pu4_vert_bs; + + /** + * Horizontal Boundary strength + */ + + /* Two bits per edge. + Stored in format. BS[15] | BS[14] | .. |BS[0]*/ + UWORD32 *pu4_horz_bs; + + /** + * Qp array stored for each 8x8 pixels + */ + UWORD8 *pu1_qp; + + /** + * Pointer to current frame's pu_t array + */ + pu_t *ps_frm_pu; + + /** + * Pointer to current frame's pu_t index array, which stores starting index + * of pu_t for every MB + */ + UWORD32 *pu4_frm_pu_idx; + + /** + * Pointer to current frame's pu map array + */ + UWORD8 *pu1_frm_pu_map; + + /*************************************************************************/ + /* Need to add encoder specific elements for identifying the order of */ + /* coding for CU, TU and PU if any */ + /*************************************************************************/ +} mb_ctxt_t; + +/*************************************************************************/ +/* The following describes how each of the CU cases are handled */ +/*************************************************************************/ + +/*************************************************************************/ +/* For SKIP MB */ +/* One Inter PU with appropriate MV */ +/* One TU which says CBP is zero and size is 16x16 */ +/*************************************************************************/ + +/*************************************************************************/ +/* For Inter MB */ +/* M Inter PU with appropriate MVs (M between 1 to 4) */ +/* Number of TUs derived based on transform size */ +/*************************************************************************/ + +/*************************************************************************/ +/* For Intra MB */ +/* Number of TUs derived based on transform size */ +/* N Intra Modes are signaled along with coeff data at the start */ +/*************************************************************************/ + +/*************************************************************************/ +/* For Intra PCM MB */ +/* One TU which says ipcm is 1 */ +/*************************************************************************/ + + + +/** + * Structure to hold quantization parameters of an mb + */ +typedef struct +{ + + /* + * mb qp + */ + UWORD8 u1_mb_qp; + + /* + * mb qp / 6 + */ + UWORD8 u1_qp_div; + + /* + * mb qp mod 6 + */ + UWORD8 u1_qp_rem; + + /* + * QP bits + */ + UWORD8 u1_qbits; + + /* + * forward scale matrix + */ + const UWORD16 *pu2_scale_mat; + + /* + * threshold matrix for quantization + */ + UWORD16 *pu2_thres_mat; + + /* + * Threshold to compare the sad with + */ + UWORD16 *pu2_sad_thrsh; + + /* + * qp dependent rounding constant + */ + UWORD32 u4_dead_zone; + + /* + * inverse scale matrix + */ + const UWORD16 *pu2_iscale_mat; + + /* + * Weight matrix in iquant + */ + UWORD16 *pu2_weigh_mat; + +}quant_params_t; + +/** + * Structure to hold Profile tier level info for a given layer + */ + +typedef struct +{ + /** + * NAL unit type + */ + WORD8 i1_nal_unit_type; + + /** + * NAL ref idc + */ + WORD8 i1_nal_ref_idc; + + +} nal_header_t; + +/** + * HRD parameters Info + */ +typedef struct +{ + /** + * Specifies the number of alternative CPB specifications in the + * bitstream + */ + UWORD8 u1_cpb_cnt_minus1; + + /** + * (together with bit_rate_value_minus1) specifies the + * maximum input bit rate of the i-th CPB + */ + UWORD32 u4_bit_rate_scale; + + /** + * (together with cpb_size_du_value_minus1) specifies + * CPB size of the i-th CPB when the CPB operates + * at the access unit level + */ + UWORD32 u4_cpb_size_scale; + + /** + * (together with bit_rate_scale) specifies the + * maximum input bit rate for the i-th CPB + */ + UWORD32 au4_bit_rate_value_minus1[32]; + /** + * together with cpb_size_scale to specify the + * CPB size when the CPB operates at the access unit level. + */ + UWORD32 au4_cpb_size_value_minus1[32]; + + /** + * if 1, specifies that the HSS operates in a constant bit rate (CBR) mode + * if 0, specifies that the HSS operates in a intermittent bit rate (CBR) mode + */ + UWORD8 au1_cbr_flag[32]; + + + /** + * specifies the length, in bits for initial cpb delay (nal/vcl)syntax in bp sei + */ + UWORD8 u1_initial_cpb_removal_delay_length_minus1; + + /** + * specifies the length, in bits for the cpb delay syntax in pt_sei + */ + UWORD8 u1_cpb_removal_delay_length_minus1; + + /** + * specifies the length, in bits, of the pic_dpb_output_delay syntax element in the pt SEI message + */ + UWORD8 u1_dpb_output_delay_length_minus1; + + /** + * Specifies length of the time offset parameter + */ + UWORD8 u1_time_offset_length; + +}hrd_params_t; + + +/** + * Structure to hold VUI parameters Info + */ +typedef struct +{ + /** + * indicates the presence of aspect_ratio + */ + UWORD8 u1_aspect_ratio_info_present_flag; + + /** + * specifies the aspect ratio of the luma samples + */ + UWORD8 u1_aspect_ratio_idc; + + /** + * width of the luma samples. user dependent + */ + UWORD16 u2_sar_width; + + /** + * Height of the luma samples. user dependent + */ + UWORD16 u2_sar_height; + + /** + * if 1, specifies that the overscan_appropriate_flag is present + * if 0, the preferred display method for the video signal is unspecified + */ + UWORD8 u1_overscan_info_present_flag; + + /** + * if 1,indicates that the cropped decoded pictures output + * are suitable for display using overscan + */ + UWORD8 u1_overscan_appropriate_flag; + + /** + * if 1 specifies that video_format, video_full_range_flag and + * colour_description_present_flag are present + */ + UWORD8 u1_video_signal_type_present_flag; + + /** + * pal, secam, ntsc, ... + */ + UWORD8 u1_video_format; + + /** + * indicates the black level and range of the luma and chroma signals + */ + UWORD8 u1_video_full_range_flag; + + /** + * if 1,to 1 specifies that colour_primaries, transfer_characteristics + * and matrix_coefficients are present + */ + UWORD8 u1_colour_description_present_flag; + + /** + * indicates the chromaticity coordinates of the source primaries + */ + UWORD8 u1_colour_primaries; + + /** + * indicates the opto-electronic transfer characteristic of the source picture + */ + UWORD8 u1_transfer_characteristics; + + /** + * the matrix coefficients used in deriving luma and chroma signals + * from the green, blue, and red primaries + */ + UWORD8 u1_matrix_coefficients; + + /** + * if 1, specifies that chroma_sample_loc_type_top_field and + * chroma_sample_loc_type_bottom_field are present + */ + UWORD8 u1_chroma_loc_info_present_flag; + + /** + * location of chroma samples + */ + UWORD8 u1_chroma_sample_loc_type_top_field; + + UWORD8 u1_chroma_sample_loc_type_bottom_field; + + /** + * Indicates the presence of the + * num_units_in_ticks, time_scale flag + */ + UWORD8 u1_vui_timing_info_present_flag; + + /** + * Number of units that + * correspond to one increment of the + * clock. Indicates the resolution + */ + UWORD32 u4_vui_num_units_in_tick; + + /** + * The number of time units that pass in one second + */ + UWORD32 u4_vui_time_scale; + + /** + * Flag indicating that time difference between two frames is a constant + */ + UWORD8 u1_fixed_frame_rate_flag; + + /** + * Indicates the presence of NAL HRD parameters + */ + UWORD8 u1_nal_hrd_parameters_present_flag; + + /** + * NAL level HRD parameters + */ + hrd_params_t s_nal_hrd_parameters; + + /** + * Indicates the presence of VCL HRD parameters + */ + UWORD8 u1_vcl_hrd_parameters_present_flag; + + /** + * VCL level HRD parameters + */ + hrd_params_t s_vcl_hrd_parameters; + + /** + * Specifies the HRD operational mode + */ + UWORD8 u1_low_delay_hrd_flag; + + /** + * Indicates presence of SEI messages which include pic_struct syntax element + */ + UWORD8 u1_pic_struct_present_flag; + + /** + * 1, specifies that the following cvs bitstream restriction parameters are present + */ + UWORD8 u1_bitstream_restriction_flag; + + /** + * if 0, indicates that no pel outside the pic boundaries and + * no sub-pels derived using pels outside the pic boundaries is used for inter prediction + */ + UWORD8 u1_motion_vectors_over_pic_boundaries_flag; + + /** + * Indicates a number of bytes not exceeded by the sum of the sizes of the VCL NAL units + * associated with any coded picture + */ + UWORD8 u1_max_bytes_per_pic_denom; + + /** + * Indicates an upper bound for the number of bits of coding_unit() data + */ + UWORD8 u1_max_bits_per_mb_denom; + + /** + * Indicate the maximum absolute value of a decoded horizontal MV component + * in quarter-pel luma units + */ + UWORD8 u1_log2_max_mv_length_horizontal; + + /** + * Indicate the maximum absolute value of a decoded vertical MV component + * in quarter-pel luma units + */ + UWORD8 u1_log2_max_mv_length_vertical; + + /** + * Max number of frames that are not synchronized in display and decode order + */ + UWORD8 u1_num_reorder_frames; + + /** + * specifies required size of the HRD DPB in units of frame buffers. + */ + UWORD8 u1_max_dec_frame_buffering; + +} vui_t; + + +/** + * Structure to hold SPS info + */ +typedef struct +{ + /** + * profile_idc + */ + UWORD8 u1_profile_idc; + + /** constraint_set0_flag */ + UWORD8 u1_constraint_set0_flag; + + /** constraint_set1_flag */ + UWORD8 u1_constraint_set1_flag; + + /** constraint_set2_flag */ + UWORD8 u1_constraint_set2_flag; + + /** constraint_set3_flag */ + UWORD8 u1_constraint_set3_flag; + + /** + * level_idc + */ + UWORD8 u1_level_idc; + + /** + * seq_parameter_set_id + */ + UWORD8 u1_sps_id; + + + /** + * chroma_format_idc + */ + UWORD8 u1_chroma_format_idc; + + /** + * residual_colour_transform_flag + */ + WORD8 i1_residual_colour_transform_flag; + + /** + * bit_depth_luma_minus8 + */ + WORD8 i1_bit_depth_luma; + + /** + * bit_depth_chroma_minus8 + */ + WORD8 i1_bit_depth_chroma; + + /** + * qpprime_y_zero_transform_bypass_flag + */ + WORD8 i1_qpprime_y_zero_transform_bypass_flag; + + /** + * seq_scaling_matrix_present_flag + */ + WORD8 i1_seq_scaling_matrix_present_flag; + + /** + * seq_scaling_list_present_flag + */ + WORD8 ai1_seq_scaling_list_present_flag[8]; + + /** + * log2_max_frame_num_minus4 + */ + WORD8 i1_log2_max_frame_num; + + /** + * MaxFrameNum in the standard + * 1 << i1_log2_max_frame_num + */ + WORD32 i4_max_frame_num; + + /** + * pic_order_cnt_type + */ + WORD8 i1_pic_order_cnt_type; + + /** + * log2_max_pic_order_cnt_lsb_minus4 + */ + WORD8 i1_log2_max_pic_order_cnt_lsb; + + /** + * MaxPicOrderCntLsb in the standard. + * 1 << log2_max_pic_order_cnt_lsb_minus4 + */ + WORD32 i4_max_pic_order_cnt_lsb; + + /** + * delta_pic_order_always_zero_flag + */ + WORD8 i1_delta_pic_order_always_zero_flag; + + /** + * offset_for_non_ref_pic + */ + WORD32 i4_offset_for_non_ref_pic; + + /** + * offset_for_top_to_bottom_field + */ + WORD32 i4_offset_for_top_to_bottom_field; + + /** + * num_ref_frames_in_pic_order_cnt_cycle + */ + UWORD8 u1_num_ref_frames_in_pic_order_cnt_cycle; + + /** + * Offset_for_ref_frame + */ + WORD32 ai4_offset_for_ref_frame[256]; + + /** + * max_num_ref_frames + */ + UWORD8 u1_max_num_ref_frames; + + /** + * gaps_in_frame_num_value_allowed_flag + */ + WORD8 i1_gaps_in_frame_num_value_allowed_flag; + + /** + * pic_width_in_mbs_minus1 + */ + WORD16 i2_pic_width_in_mbs_minus1; + + /** + * pic_height_in_map_units_minus1 + */ + WORD16 i2_pic_height_in_map_units_minus1; + + /** + * frame_mbs_only_flag + */ + WORD8 i1_frame_mbs_only_flag; + + /** + * mb_adaptive_frame_field_flag + */ + WORD8 i1_mb_adaptive_frame_field_flag; + + /** + * direct_8x8_inference_flag + */ + WORD8 i1_direct_8x8_inference_flag; + + /** + * frame_cropping_flag + */ + WORD8 i1_frame_cropping_flag; + + /** + * frame_crop_left_offset + */ + WORD16 i2_frame_crop_left_offset; + + /** + * frame_crop_right_offset + */ + WORD16 i2_frame_crop_right_offset; + + /** + * frame_crop_top_offset + */ + WORD16 i2_frame_crop_top_offset; + + /** + * frame_crop_bottom_offset + */ + WORD16 i2_frame_crop_bottom_offset; + + /** + * vui_parameters_present_flag + */ + WORD8 i1_vui_parameters_present_flag; + + /** + * vui_parameters_Structure_info + */ + vui_t s_vui_parameters; + + /** + * Flag to give status of SPS structure + */ + WORD8 i1_sps_valid; + + /** + * Coded Picture width + */ + WORD32 i2_pic_wd; + + /** + * Coded Picture height + */ + WORD32 i2_pic_ht; + + /** + * Picture width in MB units + */ + + WORD16 i2_pic_wd_in_mb; + + /** + * Picture height in MB units + */ + + WORD16 i2_pic_ht_in_mb; + + /** + * useDefaultScalingMatrixFlag + */ + WORD8 ai1_use_default_scaling_matrix_flag[8]; + + /** + * 4x4 Scaling lists after inverse zig zag scan + */ + UWORD16 au2_4x4_weight_scale[6][16]; + + /** + * 4x4 Scaling lists after inverse zig zag scan + */ + UWORD16 au2_8x8_weight_scale[2][64]; + +} sps_t; + + +/** + * Structure to hold PPS info + */ +typedef struct +{ + /** + * pic_parameter_set_id + */ + UWORD8 u1_pps_id; + + /** + * seq_parameter_set_id + */ + UWORD8 u1_sps_id; + + /** + * Entropy coding : 0-VLC; 1 - CABAC + */ + UWORD8 u1_entropy_coding_mode_flag; + + /* + * Pic order present flag + */ + UWORD8 u1_pic_order_present_flag; + + /* + * Number of slice groups + */ + UWORD8 u1_num_slice_groups; + + /* + * Slice group map type + */ + UWORD8 u1_slice_group_map_type; + + /* + * Maximum reference picture index in the reference list 0 : range [0 - 31] + */ + WORD8 i1_num_ref_idx_l0_default_active; + + /* + * Maximum reference picture index in the reference list 1 : range [0 - 31] + */ + WORD8 i1_num_ref_idx_l1_default_active; + + /** + * weighted_pred_flag + */ + WORD8 i1_weighted_pred_flag; + + /** + * weighted_bipred_flag + */ + WORD8 i1_weighted_bipred_idc; + + /** + * pic_init_qp_minus26 + */ + WORD8 i1_pic_init_qp; + + /** + * pic_init_qs_minus26 + */ + WORD8 i1_pic_init_qs; + + /* + * Chroma QP offset w.r.t QPY {-12,12} + */ + WORD8 i1_chroma_qp_index_offset; + + /** + * deblocking_filter_control_present_flag + */ + WORD8 i1_deblocking_filter_control_present_flag; + + /** + * constrained_intra_pred_flag + */ + WORD8 i1_constrained_intra_pred_flag; + + /** + * redundant_pic_cnt_present_flag + */ + WORD8 i1_redundant_pic_cnt_present_flag; + + /** + * transform_8x8_mode_flag + */ + WORD8 i1_transform_8x8_mode_flag; + + /** + * pic_scaling_matrix_present_flag + */ + WORD8 i1_pic_scaling_matrix_present_flag; + + /* + * Second chroma QP offset + */ + WORD8 i1_second_chroma_qp_index_offset; + + + /** + * useDefaultScalingMatrixFlag + */ + WORD8 ai1_use_default_scaling_matrix_flag[8]; + + /** + * 4x4 Scaling lists after inverse zig zag scan + */ + UWORD16 au2_4x4_weight_scale[6][16]; + + /** + * 4x4 Scaling lists after inverse zig zag scan + */ + UWORD16 au2_8x8_weight_scale[2][64]; + + + /** + * pic_scaling_list_present_flag + */ + WORD8 ai1_pic_scaling_list_present_flag[8]; + + /** + * Flag to give status of PPS structure + */ + WORD8 i1_pps_valid; + + +} pps_t; + +/** + * MMCO commands and params. + */ +typedef struct +{ + /* memory management control operation command */ + UWORD8 u1_memory_management_control_operation; + + /* + * Contains difference of pic nums of short-term pic/frame + * 1. To signal it as "unused for reference" if mmco = 1 + * 2. To signal it as "used for long-term reference" if mmco = 3 + */ + UWORD32 u4_difference_of_pic_nums_minus1; + + /* Long-term pic num to be set as "unused for reference" */ + UWORD8 u1_long_term_pic_num; + + /* + * Assign a long-term idx to a picture as follows + * 1. Assign to a short-term pic if mmco = 3 + * 2. Assign to the current pic if mmco = 6 + */ + UWORD8 u1_long_term_frame_idx; + + /* + * The max long-term idx. The long-term pics having idx above + * are set as "unused for reference + */ + UWORD8 u1_max_long_term_frame_idx_plus1; + +}mmco_prms_t; + +/** + * Structure to hold Reference picture list modification info + */ +typedef struct +{ + /* ref_pic_list_modification_flag_l0 */ + WORD8 i1_ref_pic_list_modification_flag_l0; + + /* Modification required in list0 */ + WORD8 i1_modification_of_pic_nums_idc_l0[MAX_MODICATION_IDC]; + + /* + * The absolute difference between the picture number of + * the picture being moved to the current index in + * list0 and the picture number prediction value + */ + UWORD32 u4_abs_diff_pic_num_minus1_l0[MAX_MODICATION_IDC]; + + /* + * The long-term picture number of the picture being moved + * to the current index in list0 + */ + UWORD8 u1_long_term_pic_num_l0[MAX_MODICATION_IDC]; + + /* ref_pic_list_modification_flag_l1 */ + WORD8 i1_ref_pic_list_modification_flag_l1; + + /* Modification required in list1 */ + WORD8 i1_modification_of_pic_nums_idc_l1[MAX_MODICATION_IDC]; + + /* + * The absolute difference between the picture number of + * the picture being moved to the current index in + * list1 and the picture number prediction value + */ + UWORD32 u4_abs_diff_pic_num_minus1_l1[MAX_MODICATION_IDC]; + + /* + * The long-term picture number of the picture being moved + * to the current index in list1 + */ + UWORD8 u1_long_term_pic_num_l1[MAX_MODICATION_IDC]; +}rplm_t; + +/** + * Structure to hold Slice Header info + */ +typedef struct +{ + + /* + * nal_unit_type + */ + WORD8 i1_nal_unit_type; + + /* + * nal_unit_idc + */ + WORD8 i1_nal_unit_idc; + + /* + * first_mb_in_slice + */ + UWORD16 u2_first_mb_in_slice; + + /* + * slice_type + */ + UWORD8 u1_slice_type; + + /* + * pic_parameter_set_id + */ + UWORD8 u1_pps_id; + + /* + * frame_num + */ + WORD32 i4_frame_num; + + /* + * field_pic_flag + */ + WORD8 i1_field_pic_flag; + + /* + * bottom_field_flag + */ + WORD8 i1_bottom_field_flag; + + /* + * second_field + */ + WORD8 i1_second_field_flag; + + /* + * idr_pic_id + */ + UWORD16 u2_idr_pic_id ; + + /* + * pic_order_cnt_lsb + */ + UWORD16 i4_pic_order_cnt_lsb; + + /* + * delta_pic_order_cnt_bottom + */ + WORD32 i4_delta_pic_order_cnt_bottom; + + /* + * delta_pic_order_cnt + */ + WORD32 ai4_delta_pic_order_cnt[2]; + + /* + * redundant_pic_cnt + */ + UWORD8 u1_redundant_pic_cnt; + + /* + * direct_spatial_mv_pred_flag + */ + UWORD8 u1_direct_spatial_mv_pred_flag; + + /* + * num_ref_idx_active_override_flag + */ + UWORD8 u1_num_ref_idx_active_override_flag; + + /* + * num_ref_idx_l0_active + */ + WORD8 i1_num_ref_idx_l0_active; + + /* + * num_ref_idx_l1_active_minus1 + */ + WORD8 i1_num_ref_idx_l1_active; + + /* + * ref_pic_list_reordering_flag_l0 + */ + UWORD8 u1_ref_idx_reordering_flag_l0; + + /** + * Reference prediction list modification + */ + rplm_t s_rplm; + + /** + * L0 Reference pic lists + */ + ref_list_t as_ref_pic_list0[MAX_DPB_SIZE]; + + /** + * L1 Reference pic lists + */ + ref_list_t as_ref_pic_list1[MAX_DPB_SIZE]; + + /* + * weighted_bipred_idc + */ + WORD8 u1_weighted_bipred_idc; + + /* + * no_output_of_prior_pics_flag + */ + UWORD8 u1_no_output_of_prior_pics_flag; + + /* + * long_term_reference_flag + */ + UWORD8 u1_long_term_reference_flag; + + /* + * adaptive_ref_pic_marking_mode_flag + */ + UWORD8 u1_adaptive_ref_pic_marking_mode_flag; + + /* + * Array to structures to store mmco commands + * and parameters. + */ + mmco_prms_t as_mmco_prms[MAX_MMCO_COMMANDS]; + + /* + * entropy_coding_mode_flag + */ + WORD8 u1_entropy_coding_mode_flag; + + /* + * cabac_init_idc + */ + WORD8 i1_cabac_init_idc; + + /* + * i1_slice_qp + */ + WORD8 i1_slice_qp; + + /* + * sp_for_switch_flag + */ + UWORD8 u1_sp_for_switch_flag; + + /* + * slice_qs_delta + */ + UWORD8 u1_slice_qs; + + /* + * disable_deblocking_filter_idc + */ + WORD8 u1_disable_deblocking_filter_idc; + + /* + * slice_alpha_c0_offset_div2 + */ + WORD8 i1_slice_alpha_c0_offset_div2; + + /* + * slice_beta_offset_div2 + */ + WORD8 i1_slice_beta_offset_div2; + + /* + * num_slice_groups_minus1 + */ + WORD8 u1_num_slice_groups_minus1; + + /* + * slice_group_change_cycle + */ + WORD8 u1_slice_group_change_cycle; + + /** + * Start MB X + */ + UWORD16 i2_mb_x; + + /** + * Start MB Y + */ + UWORD16 i2_mb_y; + + /** + * Absolute POC. Contains minimum of top and bottom POC. + */ + WORD32 i4_abs_pic_order_cnt; + + /** + * Absolute top POC. Contains top poc for frame or top + * field. Invalid for bottom field. + */ + WORD32 i4_abs_top_pic_order_cnt; + + /** + * Absolute top POC. Contains bottom poc for frame or bottom + * field. Invalid for top field. + */ + WORD32 i4_abs_bottom_pic_order_cnt; + + /** Flag signaling if the current slice is ref slice */ + UWORD8 i1_nal_ref_idc; + + /** Flag to indicate if the current slice is MBAFF Frame */ + UWORD8 u1_mbaff_frame_flag; + + /** luma_log2_weight_denom */ + UWORD8 u1_luma_log2_weight_denom; + + /** chroma_log2_weight_denom */ + UWORD8 u1_chroma_log2_weight_denom; + + /** luma_weight_l0_flag */ + UWORD8 au1_luma_weight_l0_flag[MAX_DPB_SIZE]; + + /** luma_weight_l0 : (-128, 127 )is the range of weights + * when weighted pred is enabled, 128 is default value */ + WORD16 ai2_luma_weight_l0[MAX_DPB_SIZE]; + + /** luma_offset_l0 : (-128, 127 )is the range of offset + * when weighted pred is enabled, 0 is default value */ + WORD8 ai1_luma_offset_l0[MAX_DPB_SIZE]; + + /** chroma_weight_l0_flag */ + UWORD8 au1_chroma_weight_l0_flag[MAX_DPB_SIZE]; + + /** chroma_weight_l0 : (-128, 127 )is the range of weights + * when weighted pred is enabled, 128 is default value*/ + WORD16 ai2_chroma_weight_l0[MAX_DPB_SIZE][2]; + + /** chroma_offset_l0 : (-128, 127 )is the range of offset + * when weighted pred is enabled, 0 is default value*/ + WORD8 ai1_chroma_offset_l0[MAX_DPB_SIZE][2]; + + /** luma_weight_l0_flag */ + UWORD8 au1_luma_weight_l1_flag[MAX_DPB_SIZE]; + + /** luma_weight_l1 : (-128, 127 )is the range of weights + * when weighted pred is enabled, 128 is default value */ + WORD16 ai2_luma_weight_l1[MAX_DPB_SIZE]; + + /** luma_offset_l1 : (-128, 127 )is the range of offset + * when weighted pred is enabled, 0 is default value */ + WORD8 ai1_luma_offset_l1[MAX_DPB_SIZE]; + + /** chroma_weight_l1_flag */ + UWORD8 au1_chroma_weight_l1_flag[MAX_DPB_SIZE]; + + /** chroma_weight_l1 : (-128, 127 )is the range of weights + * when weighted pred is enabled, 128 is default value */ + WORD16 ai2_chroma_weight_l1[MAX_DPB_SIZE][2]; + + /** chroma_offset_l1 :(-128, 127 )is the range of offset + * when weighted pred is enabled, 0 is default value */ + WORD8 ai1_chroma_offset_l1[MAX_DPB_SIZE][2]; +}slice_header_t; + + +/*****************************************************************************/ +/* The following can be used to type cast coefficient data that is stored */ +/* per subblock. Note that though i2_level is shown as an array that */ +/* holds 16 coefficients, only the first few entries will be valid. Next */ +/* subblocks data starts after the valid number of coefficients. Number */ +/* of non-zero coefficients will be derived using number of non-zero bits */ +/* in sig coeff map */ +/*****************************************************************************/ + +/** + * Structure to hold coefficient info for a 2x2 chroma DC transform + */ +typedef struct +{ + /** + * significant coefficient map + */ + UWORD8 u1_sig_coeff_map; + + /** + * sub block position + */ + UWORD8 u1_subblk_pos; + + /** + * holds coefficients + */ + WORD16 ai2_level[2 * 2]; +}tu_sblk2x2_coeff_data_t; + +/** + * Structure to hold coefficient info for a 4x4 transform + */ +typedef struct +{ + /** + * significant coefficient map + */ + UWORD16 u2_sig_coeff_map; + + /** + * sub block position + */ + UWORD16 u2_subblk_pos; + + /** + * holds coefficients + */ + WORD16 ai2_level[SUBBLK_COEFF_CNT]; +}tu_sblk4x4_coeff_data_t; + +/** + * Structure to hold coefficient info for a 8x8 transform + */ +typedef struct +{ + + /** + * significant coefficient map + */ + UWORD32 au4_sig_coeff_map[2]; + + /** + * sub block position + */ + UWORD16 u2_subblk_pos; + + /** + * holds coefficients + */ + WORD16 ai2_level[TRANS_SIZE_8 * TRANS_SIZE_8]; +}tu_blk8x8_coeff_data_t; + + +/** + * Structure to hold coefficient info for a 16x16 IPCM MB + */ +typedef struct +{ + /** + * holds coefficients + */ + UWORD8 au1_level[MB_SIZE * MB_SIZE * 3 / 2]; +}tu_ipcm_coeff_data_t; + + +typedef struct +{ + /** + * Transform sizes 0: 4x4, 1: 8x8, + */ + UWORD32 b1_trans_size : 1; + + /** + * Flag to signal if the current MB is IPCM + */ + UWORD32 b1_ipcm : 1; + + /** + * Intra pred sizes 0: 4x4, 1: 8x8, 2: 16x16 + */ + UWORD32 b2_intra_pred_size : 2; + + /** + * Chroma intra mode + */ + UWORD32 b2_intra_chroma_pred_mode: 2; + + /** + * Number of coded subblocks in the current MB, for which + * tu data is sent. Maximum of 27 subblocks in the following + * order. + * 1 4x4 luma DC(for intra16x16), + * 16 4x4 luma, + * 2 2x2 chroma DC, + * 8 4x4 chroma, + */ + WORD32 b5_num_coded_sblks: 5; + + /** + * Flag to signal if 4x4 subblock for DC values (in INTRA 16x16 MB) + * is coded + */ + UWORD32 b1_luma_dc_coded: 1; + + /** + * Flag to signal if 4x4 subblock for DC values (in INTRA 16x16 MB) + * is coded + */ + UWORD32 b1_chroma_dc_coded: 1; + + /** + * CSBP - 16 bits, 1 bit for each 4x4 + * for intra16x16 mb_type only ac coefficients are + */ + UWORD32 b16_luma_csbp: 16; + + /** + * CSBP - 16 bits, 1 bit for each 4x4 + * for intra16x16 mb_type only ac coefficients are + */ + UWORD32 b8_chroma_csbp: 8; + + /** + * Luma Intra pred modes, + * Based on intra pred size either 16, 4 or 1 entry will be + * populated below. + */ + UWORD8 au1_luma_intra_modes[16]; + +}intra_mb_t; + + +typedef struct +{ + /** + * Transform sizes 0: 4x4, 1: 8x8, + */ + UWORD8 b1_trans_size : 1; + + + /** + * Skip flag + */ + UWORD8 b1_skip : 1; + + + /** + * Number of coded subblocks in the current MB, for which + * tu data is sent. Maximum of 26 subblocks in the following + * order. + * 16 4x4 luma, + * 2 2x2 chroma DC, + * 8 4x4 chroma, + */ + WORD32 b5_num_coded_sblks: 5; + + /** + * CSBP - 16 bits, 1 bit for each 4x4 + * for intra16x16 mb_type only ac coefficients are + */ + UWORD32 b16_luma_csbp: 16; + + /** + * CSBP - 16 bits, 1 bit for each 4x4 + * for intra16x16 mb_type only ac coefficients are + */ + UWORD32 b16_chroma_csbp: 8; +}inter_mb_t; + +#endif /* _IH264_STRUCTS_H_ */ diff --git a/common/ih264_trans_data.c b/common/ih264_trans_data.c new file mode 100755 index 0000000..a1231e6 --- /dev/null +++ b/common/ih264_trans_data.c @@ -0,0 +1,312 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** + ******************************************************************************* + * @file + * ih264_trans_data.c + * + * @brief + * Contains definition of global variables for H264 encoder + * + * @author + * Ittiam + * + * @remarks + * + ******************************************************************************* + */ + +#include "ih264_typedefs.h" +#include "ih264_trans_data.h" + +/*****************************************************************************/ +/* Extern global definitions */ +/*****************************************************************************/ + +/* + * Since we don't have a division operation in neon + * we will multiply by LCM of 16,6,10 and scale accordingly + * so care that to get the actual transform you need to divide by LCM + * LCM = 240 + */ + +const UWORD16 g_scal_coff_h264_4x4[16] ={ + 15,40,40,40, + 40,24,40,24, + 15,40,40,15, + 40,24,40,24}; + + + +const UWORD16 g_scal_coff_h264_8x8[16]= + { + 16, 15, 20, 15, + 15, 14, 19, 14, + 20, 19, 25, 19, + 15, 14, 19, 14 + }; +/* + * The scaling is by an 8x8 matrix, but due its 4x4 symmetry we can use + * a 4x4 matrix for scaling + * now since divide is to be avoided, we will compute 1/ values and scale it up + * to preserve information since our data is max 10 bit +1 sign bit we can shift a maximum of 21 bits up + * hence multiply the matrix as such +{16.000 15.059 20.227 15.059 +15.059 14.173 19.051 14.173 +20.227 19.051 25.600 19.051 +15.059 14.173 19.051 14.173}; +{512, 544, 405, 544, +544, 578, 430, 578, +405, 430, 320, 430, +544, 578, 430, 578};*/ + + +/** + ****************************************************************************** + * @brief Scale Table for quantizing 4x4 subblock. To quantize a given 4x4 DCT + * transformed block, the coefficient at index location (i,j) is scaled by one of + * the constants in this table and right shift the result by (QP_BITS_h264_4x4 + + * floor(qp/6)), here qp is the quantization parameter used to quantize the mb. + * + * input : qp%6, index location (i,j) + * output : scale constant. + * + * @remarks 16 constants for each index position of the subblock and 6 for each + * qp%6 in the range 0-5 inclusive. + ****************************************************************************** + */ +const UWORD16 gu2_quant_scale_matrix_4x4[96] = +{ + 13107, 8066, 13107, 8066, + 8066, 5243, 8066, 5243, + 13107, 8066, 13107, 8066, + 8066, 5243, 8066, 5243, + + 11916, 7490, 11916, 7490, + 7490, 4660, 7490, 4660, + 11916, 7490, 11916, 7490, + 7490, 4660, 7490, 4660, + + 10082, 6554, 10082, 6554, + 6554, 4194, 6554, 4194, + 10082, 6554, 10082, 6554, + 6554, 4194, 6554, 4194, + + 9362, 5825, 9362, 5825, + 5825, 3647, 5825, 3647, + 9362, 5825, 9362, 5825, + 5825, 3647, 5825, 3647, + + 8192, 5243, 8192, 5243, + 5243, 3355, 5243, 3355, + 8192, 5243, 8192, 5243, + 5243, 3355, 5243, 3355, + + 7282, 4559, 7282, 4559, + 4559, 2893, 4559, 2893, + 7282, 4559, 7282, 4559, + 4559, 2893, 4559, 2893, + +}; + +/** + ****************************************************************************** + * @brief Round Factor for quantizing subblock. While quantizing a given 4x4 DCT + * transformed block, the coefficient at index location (i,j) is scaled by one of + * the constants in the table gu2_forward_quant_scalar_4x4 and then right shift + * the result by (QP_BITS_h264_4x4 + floor(qp/6)). + * Before right shifting a round factor is added. + * The round factor can be any value [a * (1 << (QP_BITS_h264_4x4 + floor(qp/6)))] + * for 'a' lies in the range 0-0.5. + * Here qp is the quantization parameter used to quantize the mb. + * + * input : qp/6 + * output : round factor. + * + * @remarks The round factor is constructed by setting a = 1/3 + * + * round factor constructed by setting a = 1/3 + { + 10922, 21845, 43690, 87381, + 174762, 349525, 699050, 1398101, + 2796202, + } + * + * round factor constructed by setting a = 0.49 + *{ + 16056, 32112, 64225, + 128450, 256901, 513802, + 1027604, 2055208, 4110417, + }; + + * round factor constructed by setting a = 0.5 + 16384, 32768, 65536, + 131072, 262144, 524288, + 1048576, 2097152, 4194304, + + ****************************************************************************** + */ +const UWORD32 gu4_forward_quant_round_factor_4x4[9] = +{ + 10922, 21845, 43690, 87381, + 174762, 349525, 699050, 1398101, + 2796202, +}; + + + +/** + ****************************************************************************** + * @brief Threshold Table. Quantizing the given DCT coefficient is done only if + * it exceeds the threshold value presented in this table. + * + * input : qp/6, qp%6, index location (i,j) + * output : Threshold constant. + * + * @remarks 16 constants for each index position of the subblock and 6 for each + * qp%6 in the range 0-5 inclusive and 9 for each qp/6 in the range 0-51. + ****************************************************************************** + */ +const UWORD16 gu2_forward_quant_threshold_4x4[96] = +{ + 426, 693, 426, 693, + 693, 1066, 693, 1066, + 426, 693, 426, 693, + 693, 1066, 693, 1066, + + 469, 746, 469, 746, + 746, 1200, 746, 1200, + 469, 746, 469, 746, + 746, 1200, 746, 1200, + + 554, 853, 554, 853, + 853, 1333, 853, 1333, + 554, 853, 554, 853, + 853, 1333, 853, 1333, + + 597, 960, 597, 960, + 960, 1533, 960, 1533, + 597, 960, 597, 960, + 960, 1533, 960, 1533, + + 682, 1066, 682, 1066, + 1066, 1666, 1066, 1666, + 682, 1066, 682, 1066, + 1066, 1666, 1066, 1666, + + 767, 1226, 767, 1226, + 1226, 1933, 1226, 1933, + 767, 1226, 767, 1226, + 1226, 1933, 1226, 1933, +}; + +/** + ****************************************************************************** + * @brief Scale Table for quantizing 8x8 subblock. To quantize a given 8x8 DCT + * transformed block, the coefficient at index location (i,j) is scaled by one of + * the constants in this table and right shift the result by (QP_BITS_h264_8x8 + + * floor(qp/6)), here qp is the quantization parameter used to quantize the mb. + * + * input : qp%6, index location (i,j) + * output : scale constant. + * + * @remarks 64 constants for each index position of the subblock and 6 for each + * qp%6 in the range 0-5 inclusive. + ****************************************************************************** + */ +const UWORD16 gu2_quant_scale_matrix_8x8 [384] = +{ + 13107, 12222, 16777, 12222, 13107, 12222, 16777, 12222, + 12222, 11428, 15481, 11428, 12222, 11428, 15481, 11428, + 16777, 15481, 20972, 15481, 16777, 15481, 20972, 15481, + 12222, 11428, 15481, 11428, 12222, 11428, 15481, 11428, + 13107, 12222, 16777, 12222, 13107, 12222, 16777, 12222, + 12222, 11428, 15481, 11428, 12222, 11428, 15481, 11428, + 16777, 15481, 20972, 15481, 16777, 15481, 20972, 15481, + 12222, 11428, 15481, 11428, 12222, 11428, 15481, 11428, + + 11916, 11058, 14980, 11058, 11916, 11058, 14980, 11058, + 11058, 10826, 14290, 10826, 11058, 10826, 14290, 10826, + 14980, 14290, 19174, 14290, 14980, 14290, 19174, 14290, + 11058, 10826, 14290, 10826, 11058, 10826, 14290, 10826, + 11916, 11058, 14980, 11058, 11916, 11058, 14980, 11058, + 11058, 10826, 14290, 10826, 11058, 10826, 14290, 10826, + 14980, 14290, 19174, 14290, 14980, 14290, 19174, 14290, + 11058, 10826, 14290, 10826, 11058, 10826, 14290, 10826, + + 10082, 9675, 12710, 9675, 10082, 9675, 12710, 9675, + 9675, 8943, 11985, 8943, 9675, 8943, 11985, 8943, + 12710, 11985, 15978, 11985, 12710, 11985, 15978, 11985, + 9675, 8943, 11985, 8943, 9675, 8943, 11985, 8943, + 10082, 9675, 12710, 9675, 10082, 9675, 12710, 9675, + 9675, 8943, 11985, 8943, 9675, 8943, 11985, 8943, + 12710, 11985, 15978, 11985, 12710, 11985, 15978, 11985, + 9675, 8943, 11985, 8943, 9675, 8943, 11985, 8943, + + 9362, 8931, 11984, 8931, 9362, 8931, 11984, 8931, + 8931, 8228, 11259, 8228, 8931, 8228, 11259, 8228, + 11984, 11259, 14913, 11259, 11984, 11259, 14913, 11259, + 8931, 8228, 11259, 8228, 8931, 8228, 11259, 8228, + 9362, 8931, 11984, 8931, 9362, 8931, 11984, 8931, + 8931, 8228, 11259, 8228, 8931, 8228, 11259, 8228, + 11984, 11259, 14913, 11259, 11984, 11259, 14913, 11259, + 8931, 8228, 11259, 8228, 8931, 8228, 11259, 8228, + + 8192, 7740, 10486, 7740, 8192, 7740, 10486, 7740, + 7740, 7346, 9777, 7346, 7740, 7346, 9777, 7346, + 10486, 9777, 13159, 9777, 10486, 9777, 13159, 9777, + 7740, 7346, 9777, 7346, 7740, 7346, 9777, 7346, + 8192, 7740, 10486, 7740, 8192, 7740, 10486, 7740, + 7740, 7346, 9777, 7346, 7740, 7346, 9777, 7346, + 10486, 9777, 13159, 9777, 10486, 9777, 13159, 9777, + 7740, 7346, 9777, 7346, 7740, 7346, 9777, 7346, + + 7282, 6830, 9118, 6830, 7282, 6830, 9118, 6830, + 6830, 6428, 8640, 6428, 6830, 6428, 8640, 6428, + 9118, 8640, 11570, 8640, 9118, 8640, 11570, 8640, + 6830, 6428, 8640, 6428, 6830, 6428, 8640, 6428, + 7282, 6830, 9118, 6830, 7282, 6830, 9118, 6830, + 6830, 6428, 8640, 6428, 6830, 6428, 8640, 6428, + 9118, 8640, 11570, 8640, 9118, 8640, 11570, 8640, + 6830, 6428, 8640, 6428, 6830, 6428, 8640, 6428, + +}; + + +/** + ****************************************************************************** + * @brief Specification of QPc as a function of qPi + * + * input : qp luma + * output : qp chroma. + * + * @remarks Refer Table 8-15 of h264 specification. + ****************************************************************************** + */ +const UWORD8 gu1_qpc_fqpi[52] = +{ + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 29, 30, + 31, 32, 32, 33, 34, 34, 35, 35, + 36, 36, 37, 37, 37, 38, 38, 38, + 39, 39, 39, 39, +}; diff --git a/common/ih264_trans_data.h b/common/ih264_trans_data.h new file mode 100755 index 0000000..dc77ae7 --- /dev/null +++ b/common/ih264_trans_data.h @@ -0,0 +1,125 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** + ******************************************************************************* + * @file + * ih264_trans_data.h + * + * @brief + * Contains declaration of global variables for H264 transform , qnat and inverse quant + * + * @author + * Ittiam + * + * @remarks + * + ******************************************************************************* + */ +#ifndef IH264_GLOBAL_DATA_H_ +#define IH264_GLOBAL_DATA_H_ + +/*****************************************************************************/ +/* Extern global declarations */ +/*****************************************************************************/ + +/* Scaling matrices for h264 quantization */ +extern const UWORD16 g_scal_coff_h264_4x4[16]; +extern const UWORD16 g_scal_coff_h264_8x8[16]; + + +/** + ****************************************************************************** + * @brief Scale Table for quantizing 4x4 subblock. To quantize a given 4x4 DCT + * transformed block, the coefficient at index location (i,j) is scaled by one of + * the constants in this table and right shift the result by (QP_BITS_h264_4x4 + + * floor(qp/6)), here qp is the quantization parameter used to quantize the mb. + * + * input : qp%6, index location (i,j) + * output : scale constant. + * + * @remarks 16 constants for each index position of the subblock and 6 for each + * qp%6 in the range 0-5 inclusive. + ****************************************************************************** + */ +extern const UWORD16 gu2_quant_scale_matrix_4x4[96]; + +/** + ****************************************************************************** + * @brief Round Factor for quantizing subblock. While quantizing a given 4x4 DCT + * transformed block, the coefficient at index location (i,j) is scaled by one of + * the constants in the table gu2_forward_quant_scalar_4x4 and then right shift + * the result by (QP_BITS_h264_4x4 + floor(qp/6)). + * Before right shifting a round factor is added. + * The round factor can be any value [a * (1 << (QP_BITS_h264_4x4 + floor(qp/6)))] + * for 'a' lies in the range 0-0.5. + * Here qp is the quantization parameter used to quantize the mb. + * + * input : qp/6 + * output : round factor. + * + * @remarks The round factor is constructed by setting a = 1/3 + ****************************************************************************** + */ +extern const UWORD32 gu4_forward_quant_round_factor_4x4[9]; + +/** + ****************************************************************************** + * @brief Threshold Table. Quantizing the given DCT coefficient is done only if + * it exceeds the threshold value presented in this table. + * + * input : qp/6, qp%6, index location (i,j) + * output : Threshold constant. + * + * @remarks 16 constants for each index position of the subblock and 6 for each + * qp%6 in the range 0-5 inclusive and 9 for each qp/6 in the range 0-51. + ****************************************************************************** + */ +extern const UWORD16 gu2_forward_quant_threshold_4x4[96]; + +/** + ****************************************************************************** + * @brief Scale Table for quantizing 8x8 subblock. To quantize a given 8x8 DCT + * transformed block, the coefficient at index location (i,j) is scaled by one of + * the constants in this table and right shift the result by (QP_BITS_h264_8x8 + + * floor(qp/6)), here qp is the quantization parameter used to quantize the mb. + * + * input : qp%6, index location (i,j) + * output : scale constant. + * + * @remarks 64 constants for each index position of the subblock and 6 for each + * qp%6 in the range 0-5 inclusive. + ****************************************************************************** + */ +extern const UWORD16 gu2_quant_scale_matrix_8x8 [384]; + +/** + ****************************************************************************** + * @brief Specification of QPc as a function of qPi + * + * input : qp luma + * output : qp chroma. + * + * @remarks Refer Table 8-15 of h264 specification. + ****************************************************************************** + */ +extern const UWORD8 gu1_qpc_fqpi[52]; + + +#endif /* IH264_GLOBAL_DATA_H_ */ diff --git a/common/ih264_trans_macros.h b/common/ih264_trans_macros.h new file mode 100755 index 0000000..f114d0e --- /dev/null +++ b/common/ih264_trans_macros.h @@ -0,0 +1,124 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ih264_trans_macros.h +* +* @brief +* The file contains definitions of macros that perform forward and inverse +* quantization +* +* @author +* Ittiam +* +* @remark +* None +* +******************************************************************************* +*/ + +#ifndef IH264_TRANS_MACROS_H_ +#define IH264_TRANS_MACROS_H_ + +/*****************************************************************************/ +/* Function Macros */ +/*****************************************************************************/ + +/** +****************************************************************************** + * @brief Macro to perform forward quantization. + * @description The value to be quantized is first compared with a threshold. + * If the value is less than the threshold, the quantization value is returned + * as zero else the value is quantized traditionally as per the rules of + * h264 specification +****************************************************************************** + */ +#define FWD_QUANT(i4_value, u4_abs_value, i4_sign, threshold, scale, rndfactor, qbits, u4_nnz) \ + {\ + if (i4_value < 0)\ + {\ + u4_abs_value = -i4_value;\ + i4_sign = -1;\ + }\ + else\ + {\ + u4_abs_value = i4_value;\ + i4_sign = 1;\ + }\ + if (u4_abs_value < threshold)\ + {\ + i4_value = 0;\ + }\ + else\ + {\ + u4_abs_value *= scale;\ + u4_abs_value += rndfactor;\ + u4_abs_value >>= qbits;\ + i4_value = u4_abs_value * i4_sign;\ + if (i4_value)\ + {\ + u4_nnz++;\ + }\ + }\ + } + +/** +****************************************************************************** + * @brief Macro to perform inverse quantization. + * @remarks The value can also be de-quantized as + * if (u4_qp_div_6 < 4) + * { + * i4_value = (quant_scale * weight_scale * i4_value + (1 << (3-u4_qp_div_6))) + * i4_value >>= (4 - u4_qp_div_6) + * } + * else + * { + * i4_value = (quant_scale * weight_scale * i4_value) << (u4_qp_div_6 -4) + * } +****************************************************************************** + */ +#define INV_QUANT(i4_value, quant_scale, weight_scale, u4_qp_div_6, rndfactor, qbits)\ + {\ + i4_value *= quant_scale;\ + i4_value *= weight_scale;\ + i4_value += rndfactor;\ + i4_value <<= u4_qp_div_6;\ + i4_value >>= qbits;\ + } + +#define QUANT_H264(x,y,w,z,shft) (shft = ABS(x),\ + shft *= y,\ + shft += z,\ + shft = shft>>w,\ + shft = SIGNXY(shft,x)) + +#define IQUANT_H264(x,y,wscal,w,shft) (shft = x, \ + shft *=y, \ + shft *=wscal, \ + shft = shft<<w) + +#define IQUANT_lev_H264(x,y,wscal,add_f,w,shft) (shft = x, \ + shft *=y, \ + shft *=wscal, \ + shft+= add_f, \ + shft = shft>>w) + +#endif /* IH264_TRANS_MACROS_H_ */ diff --git a/common/ih264_trans_quant_itrans_iquant.h b/common/ih264_trans_quant_itrans_iquant.h new file mode 100755 index 0000000..83551aa --- /dev/null +++ b/common/ih264_trans_quant_itrans_iquant.h @@ -0,0 +1,232 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** + ******************************************************************************* + * @file + * ih264_trans_quant.h + * + * @brief + * Contains declarations for forward and inverse transform paths for H264 + * + * @author + * Ittiam + * + * @remarks + * + ******************************************************************************* + */ + +#ifndef IH264_TRANS_QUANT_H_ +#define IH264_TRANS_QUANT_H_ + +/*****************************************************************************/ +/* Extern Function Declarations */ +/*****************************************************************************/ + + +typedef void ih264_resi_trans_dctrans_quant_ft(UWORD8*pu1_src, + UWORD8 *pu1_pred, + WORD16 *pi2_out, + WORD32 src_strd, + WORD32 pred_strd, + WORD32 dst_strd, + const UWORD16 *pu2_scale_mat, + const UWORD16 *pu2_thresh_mat, + UWORD32 u4_qbit, + UWORD32 u4_round_fact, + UWORD8 *pu1_nnz); + +typedef void ih264_idctrans_iquant_itrans_recon_ft(WORD16 *pi2_src, + UWORD8 *pu1_pred, + UWORD8 *pu1_out, + WORD32 src_strd, + WORD32 pred_strd, + WORD32 out_strd, + const UWORD16 *pu2_iscale_mat, + const UWORD16 *pu2_weigh_mat, + UWORD32 qp_div, + UWORD32 pi4_cntrl, + WORD32 *pi4_tmp); + + +/*Function prototype declarations*/ +typedef void ih264_resi_trans_quant_ft(UWORD8*pu1_src, + UWORD8 *pu1_pred, + WORD16 *pi2_out, + WORD32 src_strd, + WORD32 pred_strd, + const UWORD16 *pu2_scale_mat, + const UWORD16 *pu2_thresh_mat, + UWORD32 u4_qbit, + UWORD32 u4_round_fact, + UWORD8 *pu1_nnz, + WORD16 *pi2_alt_dc_addr); + +typedef void ih264_luma_16x16_resi_trans_dctrans_quant_ft(UWORD8 *pu1_src, + UWORD8 *pu1_pred, + WORD16 *pi2_out, + WORD32 src_strd, + WORD32 pred_strd, + WORD32 dst_strd, + const UWORD16 *pu2_scale_matrix, + const UWORD16 *pu2_threshold_matrix, + UWORD32 u4_qbits, + UWORD32 u4_round_factor, + UWORD8 *pu1_nnz, + UWORD32 u4_dc_flag); + +typedef void ih264_chroma_8x8_resi_trans_dctrans_quant_ft(UWORD8 *pu1_src, + UWORD8 *pu1_pred, + WORD16 *pi2_out, + WORD32 src_strd, + WORD32 pred_strd, + WORD32 dst_strd, + const UWORD16 *pu2_scale_matrix, + const UWORD16 *pu2_threshold_matrix, + UWORD32 u4_qbits, + UWORD32 u4_round_factor, + UWORD8 *pu1_nnz); + +typedef void ih264_iquant_itrans_recon_ft(WORD16 *pi2_src, + UWORD8 *pu1_pred, + UWORD8 *pu1_out, + WORD32 pred_strd, + WORD32 out_strd, + const UWORD16 *pu2_iscale_mat, + const UWORD16 *pu2_weigh_mat, + UWORD32 qp_div, + WORD16 *pi2_tmp, + WORD32 iq_start_idx, + WORD16 *pi2_dc_ld_addr); + + +typedef void ih264_iquant_itrans_recon_chroma_ft(WORD16 *pi2_src, + UWORD8 *pu1_pred, + UWORD8 *pu1_out, + WORD32 pred_strd, + WORD32 out_strd, + const UWORD16 *pu2_iscal_mat, + const UWORD16 *pu2_weigh_mat, + UWORD32 u4_qp_div_6, + WORD16 *pi2_tmp, + WORD16 *pi2_dc_src); + + +typedef void ih264_luma_16x16_idctrans_iquant_itrans_recon_ft(WORD16 *pi2_src, + UWORD8 *pu1_pred, + UWORD8 *pu1_out, + WORD32 src_strd, + WORD32 pred_strd, + WORD32 out_strd, + const UWORD16 *pu2_iscale_mat, + const UWORD16 *pu2_weigh_mat, + UWORD32 qp_div, + UWORD32 pi4_cntrl, + UWORD32 u4_dc_trans_flag, + WORD32 *pi4_tmp); + +typedef void ih264_chroma_8x8_idctrans_iquant_itrans_recon_ft(WORD16 *pi2_src, + UWORD8 *pu1_pred, + UWORD8 *pu1_out, + WORD32 src_strd, + WORD32 pred_strd, + WORD32 out_strd, + const UWORD16 *pu2_iscale_mat, + const UWORD16 *pu2_weigh_mat, + UWORD32 qp_div, + UWORD32 pi4_cntrl, + WORD32 *pi4_tmp); + +typedef void ih264_ihadamard_scaling_ft(WORD16* pi2_src, + WORD16* pi2_out, + const UWORD16 *pu2_iscal_mat, + const UWORD16 *pu2_weigh_mat, + UWORD32 u4_qp_div_6, + WORD32* pi4_tmp); + +typedef void ih264_hadamard_quant_ft(WORD16 *pi2_src, WORD16 *pi2_dst, + const UWORD16 *pu2_scale_matrix, + const UWORD16 *pu2_threshold_matrix, UWORD32 u4_qbits, + UWORD32 u4_round_factor,UWORD8 *pu1_nnz); + +ih264_resi_trans_quant_ft ih264_resi_trans_quant_4x4; +ih264_resi_trans_quant_ft ih264_resi_trans_quant_chroma_4x4; +ih264_resi_trans_quant_ft ih264_resi_trans_quant_8x8; +ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_4x4; +ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_8x8; +ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_4x4_dc; +ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_8x8_dc; +ih264_iquant_itrans_recon_chroma_ft ih264_iquant_itrans_recon_chroma_4x4; +ih264_iquant_itrans_recon_chroma_ft ih264_iquant_itrans_recon_chroma_4x4_dc; +ih264_ihadamard_scaling_ft ih264_ihadamard_scaling_4x4; +ih264_ihadamard_scaling_ft ih264_ihadamard_scaling_2x2_uv; +ih264_hadamard_quant_ft ih264_hadamard_quant_4x4; +ih264_hadamard_quant_ft ih264_hadamard_quant_2x2_uv; + +/*A9 Declarations*/ +ih264_resi_trans_quant_ft ih264_resi_trans_quant_4x4_a9; +ih264_resi_trans_quant_ft ih264_resi_trans_quant_chroma_4x4_a9; +ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_4x4_a9; +ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_8x8_a9; +ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_4x4_dc_a9; +ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_8x8_dc_a9; +ih264_iquant_itrans_recon_chroma_ft ih264_iquant_itrans_recon_chroma_4x4_a9; +ih264_iquant_itrans_recon_chroma_ft ih264_iquant_itrans_recon_chroma_4x4_dc_a9; +ih264_luma_16x16_resi_trans_dctrans_quant_ft ih264_luma_16x16_resi_trans_dctrans_quant_a9; +ih264_chroma_8x8_resi_trans_dctrans_quant_ft ih264_chroma_8x8_resi_trans_dctrans_quant_a9; +ih264_luma_16x16_idctrans_iquant_itrans_recon_ft ih264_luma_16x16_idctrans_iquant_itrans_recon_a9; +ih264_chroma_8x8_idctrans_iquant_itrans_recon_ft ih264_chroma_8x8_idctrans_iquant_itrans_recon_a9; +ih264_ihadamard_scaling_ft ih264_ihadamard_scaling_4x4_a9; +ih264_ihadamard_scaling_ft ih264_ihadamard_scaling_2x2_uv_a9; +ih264_hadamard_quant_ft ih264_hadamard_quant_4x4_a9; +ih264_hadamard_quant_ft ih264_hadamard_quant_2x2_uv_a9; + +/*Av8 Declarations*/ +ih264_resi_trans_quant_ft ih264_resi_trans_quant_4x4_av8; +ih264_resi_trans_quant_ft ih264_resi_trans_quant_chroma_4x4_av8; +ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_4x4_av8; +ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_8x8_av8; +ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_4x4_dc_av8; +ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_8x8_dc_av8; +ih264_iquant_itrans_recon_chroma_ft ih264_iquant_itrans_recon_chroma_4x4_av8; +ih264_iquant_itrans_recon_chroma_ft ih264_iquant_itrans_recon_chroma_4x4_dc_av8; +ih264_ihadamard_scaling_ft ih264_ihadamard_scaling_4x4_av8; +ih264_ihadamard_scaling_ft ih264_ihadamard_scaling_2x2_uv_av8; +ih264_hadamard_quant_ft ih264_hadamard_quant_4x4_av8; +ih264_hadamard_quant_ft ih264_hadamard_quant_2x2_uv_av8; + +/*SSSE3 Declarations*/ +ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_4x4_ssse3; +ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_8x8_ssse3; +ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_4x4_dc_ssse3; +ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_8x8_dc_ssse3; +ih264_iquant_itrans_recon_chroma_ft ih264_iquant_itrans_recon_chroma_4x4_dc_ssse3; +ih264_ihadamard_scaling_ft ih264_ihadamard_scaling_4x4_ssse3; +ih264_ihadamard_scaling_ft ih264_ihadamard_scaling_2x2_uv_ssse3; +/*SSSE42 Declarations*/ +ih264_resi_trans_quant_ft ih264_resi_trans_quant_4x4_sse42; +ih264_resi_trans_quant_ft ih264_resi_trans_quant_chroma_4x4_sse42; +ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_4x4_sse42; +ih264_iquant_itrans_recon_chroma_ft ih264_iquant_itrans_recon_chroma_4x4_sse42; +ih264_ihadamard_scaling_ft ih264_ihadamard_scaling_4x4_sse42; +ih264_hadamard_quant_ft ih264_hadamard_quant_4x4_sse42; +ih264_hadamard_quant_ft ih264_hadamard_quant_2x2_uv_sse42; + +#endif /* IH264_TRANS_QUANT_H_ */ diff --git a/common/ih264_typedefs.h b/common/ih264_typedefs.h new file mode 100755 index 0000000..8e4685a --- /dev/null +++ b/common/ih264_typedefs.h @@ -0,0 +1,64 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ih264_typedefs.h +* +* @brief +* Type definitions used in the code +* +* @author +* Ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef _IH264_TYPEDEFS_H_ +#define _IH264_TYPEDEFS_H_ + + +/*****************************************************************************/ +/* Unsigned data types */ +/*****************************************************************************/ +typedef unsigned char UWORD8; +typedef unsigned short UWORD16; +typedef unsigned int UWORD32; +typedef unsigned long long UWORD64; + + +/*****************************************************************************/ +/* Signed data types */ +/*****************************************************************************/ +typedef signed char WORD8; +typedef short WORD16; +typedef int WORD32; + + +/*****************************************************************************/ +/* Miscellaneous data types */ +/*****************************************************************************/ +typedef char CHAR; +typedef double DOUBLE; + +#endif /* _IH264_TYPEDEFS_H_ */ diff --git a/common/ih264_weighted_pred.c b/common/ih264_weighted_pred.c new file mode 100755 index 0000000..d5d73f2 --- /dev/null +++ b/common/ih264_weighted_pred.c @@ -0,0 +1,495 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*****************************************************************************/ +/* */ +/* File Name : ih264_weighted_pred.c */ +/* */ +/* Description : Contains function definitions for weighted */ +/* prediction functions */ +/* */ +/* List of Functions : ih264_default_weighted_pred_luma() */ +/* ih264_default_weighted_pred_chroma() */ +/* ih264_weighted_pred_luma() */ +/* ih264_weighted_pred_chroma() */ +/* ih264_weighted_bipred_luma() */ +/* ih264_weighted_bipred_chroma() */ +/* */ +/* Issues / Problems : None */ +/* */ +/* Revision History : */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 01 2015 Kaushik Initial version */ +/* Senthoor */ +/* */ +/*****************************************************************************/ +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* User include files */ +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264_weighted_pred.h" + +/*****************************************************************************/ +/* Function definitions . */ +/*****************************************************************************/ +/*****************************************************************************/ +/* */ +/* Function Name : ih264_default_weighted_pred_luma */ +/* */ +/* Description : This function performs the default weighted prediction */ +/* as described in sec 8.4.2.3.1 titled "Default weighted */ +/* sample prediction process" for luma. The function gets */ +/* two ht x wd blocks, calculates their rounded-average and */ +/* stores it in the destination block. (ht,wd) can be */ +/* (4,4), (8,4), (4,8), (8,8), (16,8), (8,16) or (16,16). */ +/* */ +/* Inputs : puc_src1 - Pointer to source 1 */ +/* puc_src2 - Pointer to source 2 */ +/* puc_dst - Pointer to destination */ +/* src_strd1 - stride for source 1 */ +/* src_strd1 - stride for source 2 */ +/* dst_strd - stride for destination */ +/* ht - height of the block */ +/* wd - width of the block */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 01 2015 Kaushik Initial Version */ +/* Senthoor */ +/* */ +/*****************************************************************************/ +void ih264_default_weighted_pred_luma(UWORD8 *pu1_src1, + UWORD8 *pu1_src2, + UWORD8 *pu1_dst, + WORD32 src_strd1, + WORD32 src_strd2, + WORD32 dst_strd, + WORD32 ht, + WORD32 wd) +{ + WORD32 i, j; + + src_strd1 -= wd; + src_strd2 -= wd; + dst_strd -= wd; + + for(i = 0; i < ht; i++) + { + for(j = 0; j < wd; j++, pu1_src1++, pu1_src2++, pu1_dst++) + *pu1_dst = (*pu1_src1 + *pu1_src2 + 1) >> 1; + + pu1_src1 += src_strd1; + pu1_src2 += src_strd2; + pu1_dst += dst_strd; + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_default_weighted_pred_chroma */ +/* */ +/* Description : This function performs the default weighted prediction */ +/* as described in sec 8.4.2.3.1 titled "Default weighted */ +/* sample prediction process" for chroma. The function gets */ +/* two ht x wd blocks, calculates their rounded-average and */ +/* stores it in the destination block. (ht,wd) can be */ +/* (2,2), (4,2) , (2,4), (4,4), (8,4), (4,8) or (8,8). */ +/* */ +/* Inputs : puc_src1 - Pointer to source 1 */ +/* puc_src2 - Pointer to source 2 */ +/* puc_dst - Pointer to destination */ +/* src_strd1 - stride for source 1 */ +/* src_strd1 - stride for source 2 */ +/* dst_strd - stride for destination */ +/* ht - height of the block */ +/* wd - width of the block */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 01 2015 Kaushik Initial Version */ +/* Senthoor */ +/* */ +/*****************************************************************************/ +void ih264_default_weighted_pred_chroma(UWORD8 *pu1_src1, + UWORD8 *pu1_src2, + UWORD8 *pu1_dst, + WORD32 src_strd1, + WORD32 src_strd2, + WORD32 dst_strd, + WORD32 ht, + WORD32 wd) +{ + WORD32 i, j; + + wd = wd << 1; + + src_strd1 -= wd; + src_strd2 -= wd; + dst_strd -= wd; + + for(i = 0; i < ht; i++) + { + for(j = 0; j < wd; j++, pu1_src1++, pu1_src2++, pu1_dst++) + *pu1_dst = (*pu1_src1 + *pu1_src2 + 1) >> 1; + + pu1_src1 += src_strd1; + pu1_src2 += src_strd2; + pu1_dst += dst_strd; + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_weighted_pred_luma */ +/* */ +/* Description : This function performs the weighted prediction as */ +/* described in sec 8.4.2.3.2 titled "Weighted sample */ +/* prediction process" for luma. The function gets one */ +/* ht x wd block, weights it, rounds it off, offsets it, */ +/* saturates it to unsigned 8-bit and stores it in the */ +/* destination block. (ht,wd) can be (4,4), (8,4), (4,8), */ +/* (8,8), (16,8), (8,16) or (16,16). */ +/* */ +/* Inputs : puc_src - Pointer to source */ +/* puc_dst - Pointer to destination */ +/* src_strd - stride for source */ +/* dst_strd - stride for destination */ +/* log_wd - number of bits to be rounded off */ +/* wt - weight value */ +/* ofst - offset value */ +/* ht - height of the block */ +/* wd - width of the block */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 01 2015 Kaushik Initial Version */ +/* Senthoor */ +/* */ +/*****************************************************************************/ +void ih264_weighted_pred_luma(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 log_wd, + WORD32 wt, + WORD32 ofst, + WORD32 ht, + WORD32 wd) +{ + WORD32 i, j; + + wt = (WORD16)(wt & 0xffff); + ofst = (WORD8)(ofst & 0xff); + + src_strd -= wd; + dst_strd -= wd; + + if(log_wd >= 1) + { + WORD32 i_ofst = (1 << (log_wd - 1)) + (ofst << log_wd); + for(i = 0; i < ht; i++) + { + for(j = 0; j < wd; j++, pu1_src++, pu1_dst++) + *pu1_dst = CLIP_U8((wt * (*pu1_src) + i_ofst) >> log_wd); + + pu1_src += src_strd; + pu1_dst += dst_strd; + } + } + else + { + for(i = 0; i < ht; i++) + { + for(j = 0; j < wd; j++, pu1_src++, pu1_dst++) + *pu1_dst = CLIP_U8(wt * (*pu1_src) + ofst); + + pu1_src += src_strd; + pu1_dst += dst_strd; + } + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_weighted_pred_chroma */ +/* */ +/* Description : This function performs the weighted prediction as */ +/* described in sec 8.4.2.3.2 titled "Weighted sample */ +/* prediction process" for chroma. The function gets one */ +/* ht x wd block, weights it, rounds it off, offsets it, */ +/* saturates it to unsigned 8-bit and stores it in the */ +/* destination block. (ht,wd) can be (2,2), (4,2), (2,4), */ +/* (4,4), (8,4), (4,8) or (8,8). */ +/* */ +/* Inputs : puc_src - Pointer to source */ +/* puc_dst - Pointer to destination */ +/* src_strd - stride for source */ +/* dst_strd - stride for destination */ +/* log_wd - number of bits to be rounded off */ +/* wt - weight values for u and v */ +/* ofst - offset values for u and v */ +/* ht - height of the block */ +/* wd - width of the block */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 01 2015 Kaushik Initial Version */ +/* Senthoor */ +/* */ +/*****************************************************************************/ +void ih264_weighted_pred_chroma(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 log_wd, + WORD32 wt, + WORD32 ofst, + WORD32 ht, + WORD32 wd) +{ + WORD32 i, j; + WORD32 wt_u, wt_v; + WORD32 ofst_u, ofst_v; + + wt_u = (WORD16)(wt & 0xffff); + wt_v = (WORD16)(wt >> 16); + + ofst_u = (WORD8)(ofst & 0xff); + ofst_v = (WORD8)(ofst >> 8); + + src_strd -= wd << 1; + dst_strd -= wd << 1; + + if(log_wd >= 1) + { + ofst_u = (1 << (log_wd - 1)) + (ofst_u << log_wd); + ofst_v = (1 << (log_wd - 1)) + (ofst_v << log_wd); + + for(i = 0; i < ht; i++) + { + for(j = 0; j < wd; j++, pu1_src++, pu1_dst++) + { + *pu1_dst = CLIP_U8((wt_u * (*pu1_src) + ofst_u) >> log_wd); + pu1_src++; + pu1_dst++; + *pu1_dst = CLIP_U8((wt_v * (*pu1_src) + ofst_v) >> log_wd); + } + pu1_src += src_strd; + pu1_dst += dst_strd; + } + } + else + { + for(i = 0; i < ht; i++) + { + for(j = 0; j < wd; j++, pu1_src++, pu1_dst++) + { + *pu1_dst = CLIP_U8(wt_u * (*pu1_src) + ofst_u); + pu1_src++; + pu1_dst++; + *pu1_dst = CLIP_U8(wt_v * (*pu1_src) + ofst_v); + } + pu1_src += src_strd; + pu1_dst += dst_strd; + } + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_weighted_bi_pred_luma */ +/* */ +/* Description : This function performs the weighted biprediction as */ +/* described in sec 8.4.2.3.2 titled "Weighted sample */ +/* prediction process" for luma. The function gets two */ +/* ht x wd blocks, weights them, adds them, rounds off the */ +/* sum, offsets it, saturates it to unsigned 8-bit and */ +/* stores it in the destination block. (ht,wd) can be */ +/* (4,4), (8,4), (4,8), (8,8), (16,8), (8,16) or (16,16). */ +/* */ +/* Inputs : puc_src1 - Pointer to source 1 */ +/* puc_src2 - Pointer to source 2 */ +/* puc_dst - Pointer to destination */ +/* src_strd1 - stride for source 1 */ +/* src_strd2 - stride for source 2 */ +/* dst_strd2 - stride for destination */ +/* log_wd - number of bits to be rounded off */ +/* wt1 - weight value for source 1 */ +/* wt2 - weight value for source 2 */ +/* ofst1 - offset value for source 1 */ +/* ofst2 - offset value for source 2 */ +/* ht - height of the block */ +/* wd - width of the block */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 01 2015 Kaushik Initial Version */ +/* Senthoor */ +/* */ +/*****************************************************************************/ +void ih264_weighted_bi_pred_luma(UWORD8 *pu1_src1, + UWORD8 *pu1_src2, + UWORD8 *pu1_dst, + WORD32 src_strd1, + WORD32 src_strd2, + WORD32 dst_strd, + WORD32 log_wd, + WORD32 wt1, + WORD32 wt2, + WORD32 ofst1, + WORD32 ofst2, + WORD32 ht, + WORD32 wd) +{ + WORD32 i, j; + WORD32 shft, ofst; + + ofst1 = (WORD8)(ofst1 & 0xff); + ofst2 = (WORD8)(ofst2 & 0xff); + wt1 = (WORD16)(wt1 & 0xffff); + wt2 = (WORD16)(wt2 & 0xffff); + ofst = (ofst1 + ofst2 + 1) >> 1; + + shft = log_wd + 1; + ofst = (1 << log_wd) + (ofst << shft); + + src_strd1 -= wd; + src_strd2 -= wd; + dst_strd -= wd; + + for(i = 0; i < ht; i++) + { + for(j = 0; j < wd; j++, pu1_src1++, pu1_src2++, pu1_dst++) + *pu1_dst = CLIP_U8((wt1 * (*pu1_src1) + wt2 * (*pu1_src2) + ofst) >> shft); + + pu1_src1 += src_strd1; + pu1_src2 += src_strd2; + pu1_dst += dst_strd; + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_weighted_bi_pred_chroma */ +/* */ +/* Description : This function performs the weighted biprediction as */ +/* described in sec 8.4.2.3.2 titled "Weighted sample */ +/* prediction process" for chroma. The function gets two */ +/* ht x wd blocks, weights them, adds them, rounds off the */ +/* sum, offsets it, saturates it to unsigned 8-bit and */ +/* stores it in the destination block. (ht,wd) can be */ +/* (2,2), (4,2), (2,4), (4,4), (8,4), (4,8) or (8,8). */ +/* */ +/* Inputs : puc_src1 - Pointer to source 1 */ +/* puc_src2 - Pointer to source 2 */ +/* puc_dst - Pointer to destination */ +/* src_strd1 - stride for source 1 */ +/* src_strd2 - stride for source 2 */ +/* dst_strd2 - stride for destination */ +/* log_wd - number of bits to be rounded off */ +/* wt1 - weight values for u and v in source 1 */ +/* wt2 - weight values for u and v in source 2 */ +/* ofst1 - offset value for u and v in source 1 */ +/* ofst2 - offset value for u and v in source 2 */ +/* ht - height of the block */ +/* wd - width of the block */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 01 2015 Kaushik Initial Version */ +/* Senthoor */ +/* */ +/*****************************************************************************/ +void ih264_weighted_bi_pred_chroma(UWORD8 *pu1_src1, + UWORD8 *pu1_src2, + UWORD8 *pu1_dst, + WORD32 src_strd1, + WORD32 src_strd2, + WORD32 dst_strd, + WORD32 log_wd, + WORD32 wt1, + WORD32 wt2, + WORD32 ofst1, + WORD32 ofst2, + WORD32 ht, + WORD32 wd) +{ + WORD32 i, j; + WORD32 wt1_u, wt1_v, wt2_u, wt2_v; + WORD32 ofst1_u, ofst1_v, ofst2_u, ofst2_v; + WORD32 ofst_u, ofst_v; + WORD32 shft; + + ofst1_u = (WORD8)(ofst1 & 0xff); + ofst1_v = (WORD8)(ofst1 >> 8); + ofst2_u = (WORD8)(ofst2 & 0xff); + ofst2_v = (WORD8)(ofst2 >> 8); + wt1_u = (WORD16)(wt1 & 0xffff); + wt1_v = (WORD16)(wt1 >> 16); + wt2_u = (WORD16)(wt2 & 0xffff); + wt2_v = (WORD16)(wt2 >> 16); + ofst_u = (ofst1_u + ofst2_u + 1) >> 1; + ofst_v = (ofst1_v + ofst2_v + 1) >> 1; + + src_strd1 -= wd << 1; + src_strd2 -= wd << 1; + dst_strd -= wd << 1; + + shft = log_wd + 1; + ofst_u = (1 << log_wd) + (ofst_u << shft); + ofst_v = (1 << log_wd) + (ofst_v << shft); + + for(i = 0; i < ht; i++) + { + for(j = 0; j < wd; j++, pu1_src1++, pu1_src2++, pu1_dst++) + { + *pu1_dst = CLIP_U8((wt1_u * (*pu1_src1) + wt2_u * (*pu1_src2) + ofst_u) >> shft); + pu1_src1++; + pu1_src2++; + pu1_dst++; + *pu1_dst = CLIP_U8((wt1_v * (*pu1_src1) + wt2_v * (*pu1_src2) + ofst_v) >> shft); + } + pu1_src1 += src_strd1; + pu1_src2 += src_strd2; + pu1_dst += dst_strd; + } +} diff --git a/common/ih264_weighted_pred.h b/common/ih264_weighted_pred.h new file mode 100755 index 0000000..f9b93b0 --- /dev/null +++ b/common/ih264_weighted_pred.h @@ -0,0 +1,164 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +******************************************************************************* +* @file +* ih264_weighted_pred.h +* +* @brief +* Declarations of functions used for weighted prediction +* +* @author +* Ittiam +* +* @par List of Functions: +* -ih264_default_weighted_pred_luma +* -ih264_default_weighted_pred_chroma +* -ih264_weighted_pred_luma +* -ih264_weighted_pred_chroma +* -ih264_weighted_bi_pred_luma +* -ih264_weighted_bi_pred_chroma +* -ih264_default_weighted_pred_luma_a9q +* -ih264_default_weighted_pred_chroma_a9q +* -ih264_weighted_pred_luma_a9q +* -ih264_weighted_pred_luma_a9q +* -ih264_weighted_bi_pred_luma_a9q +* -ih264_weighted_bi_pred_chroma_a9q +* -ih264_default_weighted_pred_luma_av8 +* -ih264_default_weighted_pred_chroma_av8 +* -ih264_weighted_pred_luma_av8 +* -ih264_weighted_pred_chroma_av8 +* -ih264_weighted_bi_pred_luma_av8 +* -ih264_weighted_bi_pred_chroma_av8 +* -ih264_default_weighted_pred_luma_sse42 +* -ih264_default_weighted_pred_chroma_sse42 +* -ih264_weighted_pred_luma_sse42 +* -ih264_weighted_pred_chroma_sse42 +* -ih264_weighted_bi_pred_luma_sse42 +* -ih264_weighted_bi_pred_chroma_sse42 +* +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef IH264_WEIGHTED_PRED_H_ +#define IH264_WEIGHTED_PRED_H_ + +/*****************************************************************************/ +/* Extern Function Declarations */ +/*****************************************************************************/ +typedef void ih264_default_weighted_pred_ft(UWORD8 *puc_src1, + UWORD8 *puc_src2, + UWORD8 *puc_dst, + WORD32 src_strd1, + WORD32 src_strd2, + WORD32 dst_strd, + WORD32 ht, + WORD32 wd); + +typedef void ih264_weighted_pred_ft(UWORD8 *puc_src, + UWORD8 *puc_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 log_wd, + WORD32 wt, + WORD32 ofst, + WORD32 ht, + WORD32 wd); + +typedef void ih264_weighted_bi_pred_ft(UWORD8 *puc_src1, + UWORD8 *puc_src2, + UWORD8 *puc_dst, + WORD32 src_strd1, + WORD32 src_strd2, + WORD32 dst_strd, + WORD32 log_wd, + WORD32 wt1, + WORD32 wt2, + WORD32 ofst1, + WORD32 ofst2, + WORD32 ht, + WORD32 wd); + +/* No NEON Declarations */ + +ih264_default_weighted_pred_ft ih264_default_weighted_pred_luma; + +ih264_default_weighted_pred_ft ih264_default_weighted_pred_chroma; + +ih264_weighted_pred_ft ih264_weighted_pred_luma; + +ih264_weighted_pred_ft ih264_weighted_pred_chroma; + +ih264_weighted_bi_pred_ft ih264_weighted_bi_pred_luma; + +ih264_weighted_bi_pred_ft ih264_weighted_bi_pred_chroma; + +/* A9 NEON Declarations */ + +ih264_default_weighted_pred_ft ih264_default_weighted_pred_luma_a9q; + +ih264_default_weighted_pred_ft ih264_default_weighted_pred_chroma_a9q; + +ih264_weighted_pred_ft ih264_weighted_pred_luma_a9q; + +ih264_weighted_pred_ft ih264_weighted_pred_chroma_a9q; + +ih264_weighted_bi_pred_ft ih264_weighted_bi_pred_luma_a9q; + +ih264_weighted_bi_pred_ft ih264_weighted_bi_pred_chroma_a9q; + + +/* AV8 NEON Declarations */ + +ih264_default_weighted_pred_ft ih264_default_weighted_pred_luma_av8; + +ih264_default_weighted_pred_ft ih264_default_weighted_pred_chroma_av8; + +ih264_weighted_pred_ft ih264_weighted_pred_luma_av8; + +ih264_weighted_pred_ft ih264_weighted_pred_chroma_av8; + +ih264_weighted_bi_pred_ft ih264_weighted_bi_pred_luma_av8; + +ih264_weighted_bi_pred_ft ih264_weighted_bi_pred_chroma_av8; + + +/* SSE42 Intrinsic Declarations */ + +ih264_default_weighted_pred_ft ih264_default_weighted_pred_luma_sse42; + +ih264_default_weighted_pred_ft ih264_default_weighted_pred_chroma_sse42; + +ih264_weighted_pred_ft ih264_weighted_pred_luma_sse42; + +ih264_weighted_pred_ft ih264_weighted_pred_chroma_sse42; + +ih264_weighted_bi_pred_ft ih264_weighted_bi_pred_luma_sse42; + +ih264_weighted_bi_pred_ft ih264_weighted_bi_pred_chroma_sse42; + +#endif /* IH264_WEIGHTED_PRED_H_ */ + +/** Nothing past this point */ diff --git a/common/ithread.c b/common/ithread.c new file mode 100755 index 0000000..4ffb98a --- /dev/null +++ b/common/ithread.c @@ -0,0 +1,604 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*****************************************************************************/ +/* */ +/* File Name : ithread.c */ +/* */ +/* Description : Contains abstraction for threads, mutex and semaphores*/ +/* */ +/* List of Functions : */ +/* */ +/* Issues / Problems : None */ +/* */ +/* Revision History : */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 09 2012 Harish Initial Version */ +/*****************************************************************************/ +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ +#include <string.h> +#include "ih264_typedefs.h" + +/* + * If the end target is bare metal, then there shall be no OS. + * In this case, the functions ithread_* used inside the h264 encoder library to assist multicore + * will not longer be functional. To resolve link issues, the functions are re-defined with no body. + */ +#ifndef BAREMETAL + + +#include "ithread.h" +#include <sys/types.h> + + +#define UNUSED(x) ((void)(x)) + +#ifndef X86_MSVC +//#define PTHREAD_AFFINITY +//#define SYSCALL_AFFINITY + +#ifdef PTHREAD_AFFINITY +#define _GNU_SOURCE +#define __USE_GNU +#endif + +#include <pthread.h> +#include <sched.h> +#include <semaphore.h> +#include <unistd.h> +#ifdef PTHREAD_AFFINITY +#include <sys/prctl.h> +#endif + +#endif + +#ifdef X86_MSVC + +#include <windows.h> +#define SEM_MAX_COUNT 100 +#define SEM_INCREMENT_COUNT 1 + +UWORD32 ithread_get_handle_size(void) +{ + return (sizeof(HANDLE)); +} + +UWORD32 ithread_get_mutex_lock_size(void) +{ + return (sizeof(HANDLE)); +} + +WORD32 ithread_create(void *thread_handle, void *attribute, void *strt, void *argument) +{ + HANDLE *ppv_thread_handle; + HANDLE thread_handle_value; + + UNUSED(attribute); + + if(0 == thread_handle) + return -1; + + ppv_thread_handle = (HANDLE *)thread_handle; + thread_handle_value = (void *)CreateThread + (NULL, /* Attributes */ + 1024*128, /* Stack i4_size */ + (LPTHREAD_START_ROUTINE)strt, /* Thread function */ + argument, /* Parameters */ + 0, /* Creation flags */ + NULL); /* Thread ID */ + *ppv_thread_handle = (HANDLE)thread_handle_value; + + return 0; +} + +WORD32 ithread_join(void *thread_handle, void ** val_ptr) +{ + HANDLE *ppv_thread_handle; + HANDLE thread_handle_value; + + UNUSED(val_ptr); + + if(0 == thread_handle) + return -1; + + ppv_thread_handle = (HANDLE *)thread_handle; + thread_handle_value = *ppv_thread_handle; + + if(WAIT_OBJECT_0 == WaitForSingleObject(thread_handle_value, INFINITE)) + { + CloseHandle(thread_handle_value); + } + + return 0; +} + +void ithread_exit(void *thread_handle) +{ + HANDLE *ppv_thread_handle; + HANDLE thread_handle_value; + DWORD thread_exit_code; + + if(0 == thread_handle) + return; + + ppv_thread_handle = (HANDLE *)thread_handle; + thread_handle_value = *ppv_thread_handle; + /* Get exit code for thread. If the return value is 0, means thread is busy */ + if( 0 != GetExitCodeThread(thread_handle_value, &thread_exit_code)) + { + TerminateThread(thread_handle_value, thread_exit_code); + } + + return; +} + +WORD32 ithread_get_mutex_struct_size(void) +{ + return (sizeof(HANDLE)); +} + +WORD32 ithread_mutex_init(void *mutex) +{ + HANDLE *ppv_mutex_handle; + HANDLE mutex_handle_value; + + if(0 == mutex) + return -1; + + ppv_mutex_handle = (HANDLE *)mutex; + mutex_handle_value = CreateSemaphore(NULL, 1, 1, NULL); + *ppv_mutex_handle = mutex_handle_value; + return 0; +} + +WORD32 ithread_mutex_destroy(void *mutex) +{ + HANDLE *ppv_mutex_handle; + HANDLE mutex_handle_value; + + if(0 == mutex) + return -1; + + ppv_mutex_handle = (HANDLE *)mutex; + mutex_handle_value = *ppv_mutex_handle; + CloseHandle(mutex_handle_value); + return 0; +} + +WORD32 ithread_mutex_lock(void *mutex) +{ + HANDLE *ppv_mutex_handle; + HANDLE mutex_handle_value; + DWORD result = 0; + + if(0 == mutex) + return -1; + + ppv_mutex_handle = (HANDLE *)mutex; + mutex_handle_value = *ppv_mutex_handle; + result = WaitForSingleObject(mutex_handle_value, INFINITE); + + if(WAIT_OBJECT_0 == result) + return 0; + + return 1; + +} + +WORD32 ithread_mutex_unlock(void *mutex) +{ + HANDLE *ppv_mutex_handle; + HANDLE mutex_handle_value; + DWORD result = 0; + + if(0 == mutex) + return -1; + + ppv_mutex_handle = (HANDLE *)mutex; + mutex_handle_value = *ppv_mutex_handle; + result = ReleaseSemaphore(mutex_handle_value, 1, NULL); + + if(0 == result) + return -1; + + return 0; +} + +void ithread_yield(void) { } + +void ithread_usleep(UWORD32 u4_time_us) +{ + UWORD32 u4_time_ms = u4_time_us / 1000; + Sleep(u4_time_ms); +} + +void ithread_msleep(UWORD32 u4_time_ms) +{ + Sleep(u4_time_ms); +} + +void ithread_sleep(UWORD32 u4_time) +{ + UWORD32 u4_time_ms = u4_time * 1000; + Sleep(u4_time_ms); +} + +UWORD32 ithread_get_sem_struct_size(void) +{ + return (sizeof(HANDLE)); +} + +WORD32 ithread_sem_init(void *sem,WORD32 pshared,UWORD32 value) +{ + HANDLE *sem_handle = (HANDLE *)sem; + HANDLE sem_handle_value; + + if(0 == sem) + return -1; + + sem_handle_value = CreateSemaphore(NULL, /* Security Attribute*/ + value, /* Initial count */ + SEM_MAX_COUNT,/* Max value */ + NULL); /* Name, not used */ + *sem_handle = sem_handle_value; + return 0; +} + +WORD32 ithread_sem_post(void *sem) +{ + HANDLE *sem_handle = (HANDLE *)sem; + HANDLE sem_handle_value; + + if(0 == sem) + return -1; + + sem_handle_value = *sem_handle; + + /* Post on Semaphore by releasing the lock on mutex */ + if(ReleaseSemaphore(sem_handle_value, SEM_INCREMENT_COUNT, NULL)) + return 0; + + return -1; +} + +WORD32 ithread_sem_wait(void *sem) +{ + DWORD result = 0; + HANDLE *sem_handle = (HANDLE *)sem; + HANDLE sem_handle_value; + + if(0 == sem) + return -1; + + sem_handle_value = *sem_handle; + + /* Wait on Semaphore object infinitly */ + result = WaitForSingleObject(sem_handle_value, INFINITE); + + /* If lock on semaphore is acquired, return SUCCESS */ + if(WAIT_OBJECT_0 == result) + return 0; + + /* If call timeouts, return FAILURE */ + if(WAIT_TIMEOUT == result) + return -1; + + return 0; +} + +WORD32 ithread_sem_destroy(void *sem) +{ + HANDLE *sem_handle = (HANDLE *)sem; + HANDLE sem_handle_value; + + if(0 == sem) + return -1; + + sem_handle_value = *sem_handle; + + if(FALSE == CloseHandle(sem_handle_value) ) + { + return -1; + } + return 0; +} + +WORD32 ithread_set_affinity(WORD32 core_id) +{ + return 1; +} + +#else + +UWORD32 ithread_get_handle_size(void) +{ + return sizeof(pthread_t); +} + +UWORD32 ithread_get_mutex_lock_size(void) +{ + return sizeof(pthread_mutex_t); +} + + +WORD32 ithread_create(void *thread_handle, void *attribute, void *strt, void *argument) +{ + UNUSED(attribute); + return pthread_create((pthread_t *)thread_handle, NULL,(void *(*)(void *)) strt, argument); +} + +WORD32 ithread_join(void *thread_handle, void ** val_ptr) +{ + UNUSED(val_ptr); + pthread_t *pthread_handle = (pthread_t *)thread_handle; + return pthread_join(*pthread_handle, NULL); +} + +void ithread_exit(void *val_ptr) +{ + return pthread_exit(val_ptr); +} + +WORD32 ithread_get_mutex_struct_size(void) +{ + return(sizeof(pthread_mutex_t)); +} +WORD32 ithread_mutex_init(void *mutex) +{ + return pthread_mutex_init((pthread_mutex_t *) mutex, NULL); +} + +WORD32 ithread_mutex_destroy(void *mutex) +{ + return pthread_mutex_destroy((pthread_mutex_t *) mutex); +} + +WORD32 ithread_mutex_lock(void *mutex) +{ + return pthread_mutex_lock((pthread_mutex_t *)mutex); +} + +WORD32 ithread_mutex_unlock(void *mutex) +{ + return pthread_mutex_unlock((pthread_mutex_t *)mutex); +} + +void ithread_yield(void) +{ + sched_yield(); +} + +void ithread_sleep(UWORD32 u4_time) +{ + usleep(u4_time * 1000 * 1000); +} + +void ithread_msleep(UWORD32 u4_time_ms) +{ + usleep(u4_time_ms * 1000); +} + +void ithread_usleep(UWORD32 u4_time_us) +{ + usleep(u4_time_us); +} + +UWORD32 ithread_get_sem_struct_size(void) +{ + return(sizeof(sem_t)); +} + + +WORD32 ithread_sem_init(void *sem,WORD32 pshared,UWORD32 value) +{ + return sem_init((sem_t *)sem,pshared,value); +} + +WORD32 ithread_sem_post(void *sem) +{ + return sem_post((sem_t *)sem); +} + + +WORD32 ithread_sem_wait(void *sem) +{ + return sem_wait((sem_t *)sem); +} + + +WORD32 ithread_sem_destroy(void *sem) +{ + return sem_destroy((sem_t *)sem); +} + +void ithread_set_name(CHAR *pc_thread_name) +{ + +#ifndef WIN32 +#ifndef QNX +#ifndef IOS + UNUSED(pc_thread_name); +//prctl(PR_SET_NAME, (unsigned long)pu1_thread_name, 0, 0, 0); +#endif +#endif +#endif + +} +WORD32 ithread_set_affinity(WORD32 core_id) +{ +#ifdef PTHREAD_AFFINITY + cpu_set_t cpuset; + int num_cores = sysconf(_SC_NPROCESSORS_ONLN); + pthread_t cur_thread = pthread_self(); + + if (core_id >= num_cores) + return -1; + + CPU_ZERO(&cpuset); + CPU_SET(core_id, &cpuset); + + return pthread_setaffinity_np(cur_thread, sizeof(cpu_set_t), &cpuset); + +#elif SYSCALL_AFFINITY + WORD32 i4_sys_res; + UNUSED(core_id); + + pid_t pid = gettid(); + + + i4_sys_res = syscall(__NR_sched_setaffinity, pid, sizeof(i4_mask), &i4_mask); + if (i4_sys_res) + { + //WORD32 err; + //err = errno; + //perror("Error in setaffinity syscall PERROR : "); + //LOG_ERROR("Error in the syscall setaffinity: mask=0x%x err=0x%x", i4_mask, i4_sys_res); + return -1; + } +#else + UNUSED(core_id); +#endif + return 1; + +} +#endif + +#else + +UWORD32 ithread_get_handle_size(void) +{ + return sizeof(int); +} + +UWORD32 ithread_get_mutex_lock_size(void) +{ + return sizeof(int); +} + +UWORD32 ithread_get_cond_size(void) +{ + return(sizeof(int)); +} +WORD32 ithread_create(void *thread_handle, void *attribute, void *strt, void *argument) +{ + return 0; +} + +WORD32 ithread_join(void *thread_handle, void ** val_ptr) +{ + return 0; +} + +void ithread_exit(void *val_ptr) +{ + return; +} + +WORD32 ithread_mutex_init(void *mutex) +{ + return 0; +} + +WORD32 ithread_mutex_destroy(void *mutex) +{ + return 0; +} + +WORD32 ithread_mutex_lock(void *mutex) +{ + return 0; +} + +WORD32 ithread_mutex_unlock(void *mutex) +{ + return 0; +} + +void ithread_yield(void) +{ + return; +} + +void ithread_sleep(UWORD32 u4_time_in_us) +{ + return; +} + +void ithread_usleep(UWORD32 u4_time_us) +{ + return; +} + +UWORD32 ithread_get_sem_strcut_size(void) +{ + return(sizeof(int)); +} + + +WORD32 ithread_sem_init(void *sem,WORD32 pshared,UWORD32 value) +{ + return 0; +} + +WORD32 ithread_sem_post(void *sem) +{ + return 0; +} + + +WORD32 ithread_sem_wait(void *sem) +{ + return 0; +} + +WORD32 ithread_sem_destroy(void *sem) +{ + return 0; +} + +void ithread_set_name(UWORD8 *pu1_thread_name) +{ + return; +} + +void ithread_condition_init(void *condition) +{ + return; +} + +void ithread_condition_signal(void * condition) +{ + return; +} + + + +void ithread_condition_wait(void *condition,void *mutex) +{ + return; +} + +WORD32 ithread_set_affinity(WORD32 core_id) +{ + return 1; +} +#endif diff --git a/common/ithread.h b/common/ithread.h new file mode 100755 index 0000000..f926f83 --- /dev/null +++ b/common/ithread.h @@ -0,0 +1,104 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*****************************************************************************/ +/* */ +/* File Name : ithread.h */ +/* */ +/* Description : This file contains all the necessary structure and */ +/* enumeration definitions needed for the Application */ +/* Program Interface(API) of the */ +/* Thread Abstraction Layer */ +/* */ +/* List of Functions : ithread_get_handle_size */ +/* ithread_get_mutex_lock_size */ +/* ithread_create */ +/* ithread_exit */ +/* ithread_join */ +/* ithread_get_mutex_struct_size */ +/* ithread_mutex_init */ +/* ithread_mutex_destroy */ +/* ithread_mutex_lock */ +/* ithread_mutex_unlock */ +/* ithread_yield */ +/* ithread_sleep */ +/* ithread_msleep */ +/* ithread_usleep */ +/* ithread_get_sem_struct_size */ +/* ithread_sem_init */ +/* ithread_sem_post */ +/* ithread_sem_wait */ +/* ithread_sem_destroy */ +/* ithread_set_affinity */ +/* */ +/* Issues / Problems : None */ +/* */ +/* Revision History : */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 06 09 2012 Harish Initial Version */ +/* */ +/*****************************************************************************/ + +#ifndef _ITHREAD_H_ +#define _ITHREAD_H_ + +UWORD32 ithread_get_handle_size(void); + +UWORD32 ithread_get_mutex_lock_size(void); + +WORD32 ithread_create(void *thread_handle, void *attribute, void *strt, void *argument); + +void ithread_exit(void *val_ptr); + +WORD32 ithread_join(void *thread_id, void ** val_ptr); + +WORD32 ithread_get_mutex_struct_size(void); + +WORD32 ithread_mutex_init(void *mutex); + +WORD32 ithread_mutex_destroy(void *mutex); + +WORD32 ithread_mutex_lock(void *mutex); + +WORD32 ithread_mutex_unlock(void *mutex); + +void ithread_yield(void); + +void ithread_sleep(UWORD32 u4_time); + +void ithread_msleep(UWORD32 u4_time_ms); + +void ithread_usleep(UWORD32 u4_time_us); + +UWORD32 ithread_get_sem_struct_size(void); + +WORD32 ithread_sem_init(void *sem,WORD32 pshared,UWORD32 value); + +WORD32 ithread_sem_post(void *sem); + +WORD32 ithread_sem_wait(void *sem); + +WORD32 ithread_sem_destroy(void *sem); + +WORD32 ithread_set_affinity(WORD32 core_id); + +void ithread_set_name(CHAR *pc_thread_name); + +#endif /* _ITHREAD_H_ */ diff --git a/common/mips/ih264_platform_macros.h b/common/mips/ih264_platform_macros.h new file mode 100755 index 0000000..d098372 --- /dev/null +++ b/common/mips/ih264_platform_macros.h @@ -0,0 +1,102 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ih264_platform_macros.h +* +* @brief +* Platform specific Macro definitions used in the codec +* +* @author +* Ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ + + +#ifndef _IH264_PLATFORM_MACROS_H_ +#define _IH264_PLATFORM_MACROS_H_ + +#define CLIP_U8(x) CLIP3(0, 255, (x)) +#define CLIP_S8(x) CLIP3(-128, 127, (x)) + +#define CLIP_U10(x) CLIP3(0, 1023, (x)) +#define CLIP_S10(x) CLIP3(-512, 511, (x)) + +#define CLIP_U12(x) CLIP3(0, 4095, (x)) +#define CLIP_S12(x) CLIP3(-2048, 2047, (x)) + +#define CLIP_U16(x) CLIP3(0, 65535, (x)) +#define CLIP_S16(x) CLIP3(-32768, 32767, (x)) + +#define MEM_ALIGN16 __attribute__ ((aligned (16))) + +#define SHL(x,y) (((y) < 32) ? ((x) << (y)) : 0) +#define SHR(x,y) (((y) < 32) ? ((x) >> (y)) : 0) + +#define SHR_NEG(val,shift) ((shift>0)?(val>>shift):(val<<(-shift))) +#define SHL_NEG(val,shift) ((shift<0)?(val>>(-shift)):(val<<shift)) + + +#define ITT_BIG_ENDIAN(x) ((x << 24)) | \ + ((x & 0x0000ff00) << 8) | \ + ((x & 0x00ff0000) >> 8) | \ + ((UWORD32)x >> 24); + + +#define NOP(nop_cnt) {UWORD32 nop_i; for (nop_i = 0; nop_i < nop_cnt; nop_i++);} + +#define PLD(a) + +static __inline UWORD32 CLZ(UWORD32 u4_word) +{ + if(u4_word) + return(__builtin_clz(u4_word)); + else + return 32; +} + +static __inline UWORD32 CTZ(UWORD32 u4_word) +{ + if(0 == u4_word) + return 31; + else + { + unsigned int index; + index = __builtin_ctz(u4_word); + return (UWORD32)index; + } +} + +#define DATA_SYNC() + +#define INLINE + +#define PREFETCH(ptr, type) + +#define MEM_ALIGN8 __attribute__ ((aligned (8))) +#define MEM_ALIGN16 __attribute__ ((aligned (16))) +#define MEM_ALIGN32 __attribute__ ((aligned (32))) + +#endif /* _IH264_PLATFORM_MACROS_H_ */ diff --git a/common/x86/ih264_chroma_intra_pred_filters_ssse3.c b/common/x86/ih264_chroma_intra_pred_filters_ssse3.c new file mode 100755 index 0000000..45101a4 --- /dev/null +++ b/common/x86/ih264_chroma_intra_pred_filters_ssse3.c @@ -0,0 +1,433 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ih264_chroma_intra_pred_filters_ssse3.c +* +* @brief +* Contains function definitions for chroma intra prediction filters in x86 +* intrinsics +* +* @author +* Ittiam +* +* @par List of Functions: +* -ih264_intra_pred_chroma_8x8_mode_horz_ssse3 +* -ih264_intra_pred_chroma_8x8_mode_vert_ssse3 +* -ih264_intra_pred_chroma_8x8_mode_plane_ssse3 +* +* @remarks +* None +* +******************************************************************************* +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include <stdio.h> +#include <stddef.h> +#include <string.h> + +/* User include files */ +#include "ih264_defs.h" +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264_intra_pred_filters.h" + + +/*****************************************************************************/ +/* Chroma Intra prediction 8x8 filters */ +/*****************************************************************************/ +/** +******************************************************************************* +* +* ih264_intra_pred_chroma_8x8_mode_horz_ssse3 +* +* @brief +* Perform Intra prediction for chroma_8x8 mode:Horizontal +* +* @par Description: +* Perform Intra prediction for chroma_8x8 mode:Horizontal ,described in sec 8.3.4.2 +* +* @param[in] pu1_src +* UWORD8 pointer to the source containing alternate U and V samples +* +* @param[out] pu1_dst +* UWORD8 pointer to the destination with alternate U and V samples +* +* @param[in] src_strd +* integer source stride +* +* @param[in] dst_strd +* integer destination stride +* +* @param[in] ngbr_avail +* availability of neighbouring pixels(Not used in this function) +* +* @returns +* +* @remarks +* None +* +****************************************************************************** +*/ +void ih264_intra_pred_chroma_8x8_mode_horz_ssse3(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + + UWORD8 *pu1_left; /* Pointer to start of top predictors */ + WORD32 dst_strd2; + + __m128i left_16x8b, left_sh_16x8b; + __m128i row1_16x8b, row2_16x8b; + __m128i const_14_15_16x8b; + + UNUSED(src_strd); + UNUSED(ngbr_avail); + + pu1_left = pu1_src + 2 * BLK8x8SIZE - 2; + + left_16x8b = _mm_loadu_si128((__m128i *)(pu1_left - 14)); + + const_14_15_16x8b = _mm_set1_epi16(0x0f0e); + + dst_strd2 = dst_strd << 1; + left_sh_16x8b = _mm_slli_si128(left_16x8b, 2); + row1_16x8b = _mm_shuffle_epi8(left_16x8b, const_14_15_16x8b); + row2_16x8b = _mm_shuffle_epi8(left_sh_16x8b, const_14_15_16x8b); + _mm_storeu_si128((__m128i *)pu1_dst, row1_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), row2_16x8b); + + left_16x8b = _mm_slli_si128(left_16x8b, 4); + left_sh_16x8b = _mm_slli_si128(left_sh_16x8b, 4); + pu1_dst += dst_strd2; + row1_16x8b = _mm_shuffle_epi8(left_16x8b, const_14_15_16x8b); + row2_16x8b = _mm_shuffle_epi8(left_sh_16x8b, const_14_15_16x8b); + _mm_storeu_si128((__m128i *)pu1_dst, row1_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), row2_16x8b); + + left_16x8b = _mm_slli_si128(left_16x8b, 4); + left_sh_16x8b = _mm_slli_si128(left_sh_16x8b, 4); + pu1_dst += dst_strd2; + row1_16x8b = _mm_shuffle_epi8(left_16x8b, const_14_15_16x8b); + row2_16x8b = _mm_shuffle_epi8(left_sh_16x8b, const_14_15_16x8b); + _mm_storeu_si128((__m128i *)pu1_dst, row1_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), row2_16x8b); + + left_16x8b = _mm_slli_si128(left_16x8b, 4); + left_sh_16x8b = _mm_slli_si128(left_sh_16x8b, 4); + pu1_dst += dst_strd2; + row1_16x8b = _mm_shuffle_epi8(left_16x8b, const_14_15_16x8b); + row2_16x8b = _mm_shuffle_epi8(left_sh_16x8b, const_14_15_16x8b); + _mm_storeu_si128((__m128i *)pu1_dst, row1_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), row2_16x8b); +} + +/** +******************************************************************************* +* +* ih264_intra_pred_chroma_8x8_mode_vert_ssse3 +* +* @brief +* Perform Intra prediction for chroma_8x8 mode:vertical +* +* @par Description: +* Perform Intra prediction for chroma_8x8 mode:vertical ,described in sec 8.3.4.3 +* +* @param[in] pu1_src +* UWORD8 pointer to the source containing alternate U and V samples +* +* @param[out] pu1_dst +* UWORD8 pointer to the destination with alternate U and V samples +* +* @param[in] src_strd +* integer source stride +* +* @param[in] dst_strd +* integer destination stride +* +* @param[in] ngbr_avail +* availability of neighbouring pixels(Not used in this function) +* +* @returns +* +* @remarks +* None +* +******************************************************************************* +*/ +void ih264_intra_pred_chroma_8x8_mode_vert_ssse3(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + UWORD8 *pu1_top; /* Pointer to start of top predictors */ + WORD32 dst_strd2; + + __m128i top_16x8b; + + UNUSED(src_strd); + UNUSED(ngbr_avail); + + pu1_top = pu1_src + 2 * BLK8x8SIZE + 2; + + top_16x8b = _mm_loadu_si128((__m128i *)pu1_top); + + dst_strd2 = dst_strd << 1; + _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), top_16x8b); + + pu1_dst += dst_strd2; + _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), top_16x8b); + + pu1_dst += dst_strd2; + _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), top_16x8b); + + pu1_dst += dst_strd2; + _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), top_16x8b); +} + +/** +******************************************************************************* +* +* ih264_intra_pred_chroma_8x8_mode_plane_ssse3 +* +* @brief +* Perform Intra prediction for chroma_8x8 mode:PLANE +* +* @par Description: +* Perform Intra prediction for chroma_8x8 mode:PLANE ,described in sec 8.3.4.4 +* +* @param[in] pu1_src +* UWORD8 pointer to the source containing alternate U and V samples +* +* @param[out] pu1_dst +* UWORD8 pointer to the destination with alternate U and V samples +* +* @param[in] src_strd +* integer source stride +* +* @param[in] dst_strd +* integer destination stride +* +* @param[in] ngbr_avail +* availability of neighbouring pixels(Not used in this function) +* +* @returns +* +* @remarks +* None +* +****************************************************************************** +*/ +void ih264_intra_pred_chroma_8x8_mode_plane_ssse3(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + UWORD8 *pu1_left, *pu1_top; + WORD32 a_u, a_v, b_u, b_v, c_u, c_v; + + __m128i mul_8x16b, shuffle_8x16b; + + UNUSED(src_strd); + UNUSED(ngbr_avail); + + pu1_top = pu1_src + MB_SIZE + 2; + pu1_left = pu1_src + MB_SIZE - 2; + + mul_8x16b = _mm_setr_epi16(1, 2, 3, 4, 1, 2, 3, 4); + shuffle_8x16b = _mm_setr_epi16(0xff00, 0xff02, 0xff04, 0xff06, + 0xff01, 0xff03, 0xff05, 0xff07); + + //calculating a, b and c + { + WORD32 h_u, h_v, v_u, v_v; + WORD32 temp1, temp2; + + __m128i h_val1_16x8b, h_val2_16x8b; + __m128i h_val1_8x16b, h_val2_8x16b, h_val_4x32b; + __m128i v_val1_16x8b, v_val2_16x8b; + __m128i v_val1_8x16b, v_val2_8x16b, v_val_4x32b; + __m128i hv_val_4x32b; + + h_val1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_top + 8)); + h_val2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_top - 2)); + v_val1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_left - 14)); + v_val2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_left - 4)); + + // reversing the order + h_val2_16x8b = _mm_shufflelo_epi16(h_val2_16x8b, 0x1b); + v_val1_16x8b = _mm_shufflelo_epi16(v_val1_16x8b, 0x1b); + + // separating u and v and 8-bit to 16-bit conversion + h_val1_8x16b = _mm_shuffle_epi8(h_val1_16x8b, shuffle_8x16b); + h_val2_8x16b = _mm_shuffle_epi8(h_val2_16x8b, shuffle_8x16b); + v_val1_8x16b = _mm_shuffle_epi8(v_val1_16x8b, shuffle_8x16b); + v_val2_8x16b = _mm_shuffle_epi8(v_val2_16x8b, shuffle_8x16b); + + h_val1_8x16b = _mm_sub_epi16(h_val1_8x16b, h_val2_8x16b); + v_val1_8x16b = _mm_sub_epi16(v_val1_8x16b, v_val2_8x16b); + + h_val_4x32b = _mm_madd_epi16(mul_8x16b, h_val1_8x16b); + v_val_4x32b = _mm_madd_epi16(mul_8x16b, v_val1_8x16b); + + temp1 = _mm_extract_epi16(h_val1_16x8b, 3); + temp2 = _mm_extract_epi16(v_val1_16x8b, 3); + + hv_val_4x32b = _mm_hadd_epi32(h_val_4x32b, v_val_4x32b); + + a_u = ((temp1 & 0xff) + (temp2 & 0xff)) << 4; + a_v = ((temp1 >> 8) + (temp2 >> 8)) << 4; + + h_u = _mm_extract_epi16(hv_val_4x32b, 0); + h_v = _mm_extract_epi16(hv_val_4x32b, 2); + v_u = _mm_extract_epi16(hv_val_4x32b, 4); + v_v = _mm_extract_epi16(hv_val_4x32b, 6); + + h_u = (h_u << 16) >> 15; // sign-extension and multiplication by 2 + h_v = (h_v << 16) >> 15; + v_u = (v_u << 16) >> 15; + v_v = (v_v << 16) >> 15; + + b_u = ((h_u << 4) + h_u + 32) >> 6; + b_v = ((h_v << 4) + h_v + 32) >> 6; + c_u = ((v_u << 4) + v_u + 32) >> 6; + c_v = ((v_v << 4) + v_v + 32) >> 6; + } + //using a, b and c to compute the fitted plane values + { + __m128i const_8x16b, c2_8x16b; + __m128i res1_l_8x16b, res1_h_8x16b; + __m128i res2_l_8x16b, res2_h_8x16b; + __m128i res1_sh_l_8x16b, res1_sh_h_8x16b, res1_16x8b; + __m128i res2_sh_l_8x16b, res2_sh_h_8x16b, res2_16x8b; + + WORD32 b_u2, b_v2, b_u3, b_v3; + WORD32 const_u, const_v; + WORD32 dst_strd2; + + const_u = a_u - (c_u << 1) - c_u + 16; + const_v = a_v - (c_v << 1) - c_v + 16; + + b_u2 = b_u << 1; + b_v2 = b_v << 1; + b_u3 = b_u + b_u2; + b_v3 = b_v + b_v2; + + const_8x16b = _mm_setr_epi16(const_u, const_v, const_u, const_v, const_u, const_v, const_u, const_v); + res1_l_8x16b = _mm_setr_epi16(-b_u3, -b_v3, -b_u2, -b_v2, -b_u, -b_v, 0, 0); + //contains {-b*3, -b*2, -b*1, b*0} + res1_h_8x16b = _mm_setr_epi16(b_u, b_v, b_u2, b_v2, b_u3, b_v3, b_u << 2, b_v << 2); + //contains {b*1, b*2, b*3, b*4} + c2_8x16b = _mm_setr_epi16(c_u, c_v, c_u, c_v, c_u, c_v, c_u, c_v); + + // rows 1, 2 + res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, const_8x16b); + res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, const_8x16b); + res2_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b); + res2_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b); + + res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5); + res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5); + res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5); + res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5); + + dst_strd2 = dst_strd << 1; + c2_8x16b = _mm_slli_epi16(c2_8x16b, 1); + + res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b); + res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b); + + _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b); + + // rows 3, 4 + res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b); + res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b); + res2_l_8x16b = _mm_add_epi16(res2_l_8x16b, c2_8x16b); + res2_h_8x16b = _mm_add_epi16(res2_h_8x16b, c2_8x16b); + + res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5); + res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5); + res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5); + res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5); + + pu1_dst += dst_strd2; + + res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b); + res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b); + + _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b); + + // rows 5, 6 + res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b); + res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b); + res2_l_8x16b = _mm_add_epi16(res2_l_8x16b, c2_8x16b); + res2_h_8x16b = _mm_add_epi16(res2_h_8x16b, c2_8x16b); + + res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5); + res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5); + res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5); + res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5); + + pu1_dst += dst_strd2; + + res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b); + res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b); + + _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b); + + // rows 7, 8 + res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b); + res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b); + res2_l_8x16b = _mm_add_epi16(res2_l_8x16b, c2_8x16b); + res2_h_8x16b = _mm_add_epi16(res2_h_8x16b, c2_8x16b); + + res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5); + res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5); + res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5); + res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5); + + pu1_dst += dst_strd2; + + res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b); + res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b); + + _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b); + + } +} diff --git a/common/x86/ih264_deblk_chroma_ssse3.c b/common/x86/ih264_deblk_chroma_ssse3.c new file mode 100755 index 0000000..a36447a --- /dev/null +++ b/common/x86/ih264_deblk_chroma_ssse3.c @@ -0,0 +1,1087 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*****************************************************************************/ +/* */ +/* File Name : ih264_deblk_chroma_ssse3.c */ +/* */ +/* Description : Contains function definitions for deblocking */ +/* */ +/* List of Functions : ih264_deblk_chroma_vert_bs4_ssse3() */ +/* ih264_deblk_chroma_horz_bs4_ssse3() */ +/* ih264_deblk_chroma_vert_bslt4_ssse3() */ +/* ih264_deblk_chroma_horz_bslt4_ssse3() */ +/* ih264_deblk_chroma_vert_bs4_mbaff_ssse3() */ +/* ih264_deblk_chroma_vert_bslt4_mbaff_ssse3() */ +/* */ +/* Issues / Problems : None */ +/* */ +/* Revision History : */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 12 02 2015 Naveen Kumar P Added chrom deblocking ssse3 */ +/* intrinsics */ +/* */ +/*****************************************************************************/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include <stdio.h> + +/* User include files */ +#include "ih264_typedefs.h" +#include "ih264_platform_macros.h" +#include "ih264_deblk_edge_filters.h" +#include "ih264_macros.h" + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_deblk_chroma_vert_bs4_ssse3() */ +/* */ +/* Description : This function performs filtering of a chroma block */ +/* vertical edge when the boundary strength is set to 4 in */ +/* high profile. */ +/* */ +/* Inputs : pu1_src - pointer to the src sample q0 of U */ +/* src_strd - source stride */ +/* alpha_cb - alpha value for the boundary in U */ +/* beta_cb - beta value for the boundary in U */ +/* alpha_cr - alpha value for the boundary in V */ +/* beta_cr - beta value for the boundary in V */ +/* */ +/* Globals : None */ +/* */ +/* Processing : This operation is described in Sec. 8.7.2.4 under the */ +/* title "Filtering process for edges for bS equal to 4" in */ +/* ITU T Rec H.264 with alpha and beta values different in */ +/* U and V. */ +/* */ +/* Outputs : None */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 12 02 2015 Naveen Kumar P Initial version */ +/* */ +/*****************************************************************************/ +void ih264_deblk_chroma_vert_bs4_ssse3(UWORD8 *pu1_src, + WORD32 src_strd, + WORD32 alpha_cb, + WORD32 beta_cb, + WORD32 alpha_cr, + WORD32 beta_cr) +{ + UWORD8 *pu1_src_uv = pu1_src; /* Pointer to the src sample q0 of plane U*/ + WORD32 alpha_cbcr = (alpha_cr << 16) + alpha_cb; + WORD32 beta_cbcr = (beta_cr << 16) + beta_cb; + __m128i linea, lineb, linec, lined, linee, linef, lineg, lineh; + __m128i temp1, temp2, temp3, temp4; + + __m128i q0_uv_16x8, p0_uv_16x8, q1_uv_16x8, p1_uv_16x8; + __m128i q0_uv_8x16, p0_uv_8x16, q1_uv_8x16, p1_uv_8x16; + __m128i flag1, flag2; + __m128i diff, alpha_cbcr_16x8, beta_cbcr_16x8; + __m128i zero = _mm_setzero_si128(); + __m128i p0_uv_8x16_1, p0_uv_8x16_2, q0_uv_8x16_1, q0_uv_8x16_2; + + /* Load and transpose the pixel values */ + linea = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4)); + lineb = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + src_strd)); + linec = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + 2 * src_strd)); + lined = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + 3 * src_strd)); + linee = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + 4 * src_strd)); + linef = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + 5 * src_strd)); + lineg = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + 6 * src_strd)); + lineh = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + 7 * src_strd)); + + temp1 = _mm_unpacklo_epi16(linea, lineb); + temp2 = _mm_unpacklo_epi16(linec, lined); + temp3 = _mm_unpacklo_epi16(linee, linef); + temp4 = _mm_unpacklo_epi16(lineg, lineh); + + p1_uv_8x16 = _mm_unpacklo_epi32(temp1, temp2); + p0_uv_8x16 = _mm_unpacklo_epi32(temp3, temp4); + q0_uv_8x16 = _mm_unpackhi_epi32(temp1, temp2); + q1_uv_8x16 = _mm_unpackhi_epi32(temp3, temp4); + + p1_uv_16x8 = _mm_unpacklo_epi64(p1_uv_8x16, p0_uv_8x16); + p0_uv_16x8 = _mm_unpackhi_epi64(p1_uv_8x16, p0_uv_8x16); + q0_uv_16x8 = _mm_unpacklo_epi64(q0_uv_8x16, q1_uv_8x16); + q1_uv_16x8 = _mm_unpackhi_epi64(q0_uv_8x16, q1_uv_8x16); + /* End of transpose */ + + q0_uv_8x16 = _mm_unpacklo_epi8(q0_uv_16x8, zero); + q1_uv_8x16 = _mm_unpacklo_epi8(q1_uv_16x8, zero); + p1_uv_8x16 = _mm_unpacklo_epi8(p1_uv_16x8, zero); + p0_uv_8x16 = _mm_unpacklo_epi8(p0_uv_16x8, zero); + + diff = _mm_subs_epi16(p0_uv_8x16, q0_uv_8x16); //Condn 1 + diff = _mm_abs_epi16(diff); + alpha_cbcr_16x8 = _mm_set1_epi32(alpha_cbcr); + flag1 = _mm_cmpgt_epi16(alpha_cbcr_16x8, diff); + + diff = _mm_subs_epi16(q1_uv_8x16, q0_uv_8x16); //Condtn 2 + diff = _mm_abs_epi16(diff); + beta_cbcr_16x8 = _mm_set1_epi32(beta_cbcr); + flag1 = _mm_and_si128(flag1, _mm_cmpgt_epi16(beta_cbcr_16x8, diff)); + + diff = _mm_subs_epi16(p1_uv_8x16, p0_uv_8x16); //Condtn 3 + diff = _mm_abs_epi16(diff); + flag1 = _mm_and_si128(flag1, _mm_cmpgt_epi16(beta_cbcr_16x8, diff)); + + temp1 = _mm_slli_epi16(p1_uv_8x16, 1); + temp2 = _mm_add_epi16(p0_uv_8x16, q1_uv_8x16); + temp1 = _mm_add_epi16(temp1, _mm_set1_epi16(2)); + temp1 = _mm_add_epi16(temp1, temp2); + p0_uv_8x16_1 = _mm_srai_epi16(temp1, 2); + + temp1 = _mm_slli_epi16(q1_uv_8x16, 1); + temp2 = _mm_add_epi16(p1_uv_8x16, q0_uv_8x16); + temp1 = _mm_add_epi16(temp1, _mm_set1_epi16(2)); + temp1 = _mm_add_epi16(temp1, temp2); + q0_uv_8x16_1 = _mm_srai_epi16(temp1, 2); + + q0_uv_8x16 = _mm_unpackhi_epi8(q0_uv_16x8, zero); + q1_uv_8x16 = _mm_unpackhi_epi8(q1_uv_16x8, zero); + p1_uv_8x16 = _mm_unpackhi_epi8(p1_uv_16x8, zero); + p0_uv_8x16 = _mm_unpackhi_epi8(p0_uv_16x8, zero); + + diff = _mm_subs_epi16(p0_uv_8x16, q0_uv_8x16); //Condn 1 + diff = _mm_abs_epi16(diff); + alpha_cbcr_16x8 = _mm_set1_epi32(alpha_cbcr); + flag2 = _mm_cmpgt_epi16(alpha_cbcr_16x8, diff); + + diff = _mm_subs_epi16(q1_uv_8x16, q0_uv_8x16); //Condtn 2 + diff = _mm_abs_epi16(diff); + beta_cbcr_16x8 = _mm_set1_epi32(beta_cbcr); + flag2 = _mm_and_si128(flag2, _mm_cmpgt_epi16(beta_cbcr_16x8, diff)); + + diff = _mm_subs_epi16(p1_uv_8x16, p0_uv_8x16); //Condtn 3 + diff = _mm_abs_epi16(diff); + flag2 = _mm_and_si128(flag2, _mm_cmpgt_epi16(beta_cbcr_16x8, diff)); + + temp1 = _mm_slli_epi16(p1_uv_8x16, 1); + temp2 = _mm_add_epi16(p0_uv_8x16, q1_uv_8x16); + temp1 = _mm_add_epi16(temp1, _mm_set1_epi16(2)); + temp1 = _mm_add_epi16(temp1, temp2); + p0_uv_8x16_2 = _mm_srai_epi16(temp1, 2); + + temp1 = _mm_slli_epi16(q1_uv_8x16, 1); + temp2 = _mm_add_epi16(p1_uv_8x16, q0_uv_8x16); + temp1 = _mm_add_epi16(temp1, _mm_set1_epi16(2)); + temp1 = _mm_add_epi16(temp1, temp2); + q0_uv_8x16_2 = _mm_srai_epi16(temp1, 2); + + p0_uv_8x16_2 = _mm_packus_epi16(p0_uv_8x16_1, p0_uv_8x16_2); + q0_uv_8x16_2 = _mm_packus_epi16(q0_uv_8x16_1, q0_uv_8x16_2); + + flag1 = _mm_packs_epi16(flag1, flag2); + + p0_uv_8x16_1 = _mm_and_si128(p0_uv_16x8, + _mm_xor_si128(flag1, _mm_set1_epi8(0xFF))); + p0_uv_8x16_2 = _mm_and_si128(p0_uv_8x16_2, flag1); + p0_uv_16x8 = _mm_add_epi8(p0_uv_8x16_1, p0_uv_8x16_2); + + q0_uv_8x16_1 = _mm_and_si128(q0_uv_16x8, + _mm_xor_si128(flag1, _mm_set1_epi8(0xFF))); + q0_uv_8x16_2 = _mm_and_si128(q0_uv_8x16_2, flag1); + q0_uv_16x8 = _mm_add_epi8(q0_uv_8x16_1, q0_uv_8x16_2); + + /* Inverse-transpose and store back */ + temp1 = _mm_unpacklo_epi16(p1_uv_16x8, p0_uv_16x8); + temp2 = _mm_unpackhi_epi16(p1_uv_16x8, p0_uv_16x8); + temp3 = _mm_unpacklo_epi16(q0_uv_16x8, q1_uv_16x8); + temp4 = _mm_unpackhi_epi16(q0_uv_16x8, q1_uv_16x8); + + linea = _mm_unpacklo_epi32(temp1, temp3); + lineb = _mm_srli_si128(linea, 8); + linec = _mm_unpackhi_epi32(temp1, temp3); + lined = _mm_srli_si128(linec, 8); + linee = _mm_unpacklo_epi32(temp2, temp4); + linef = _mm_srli_si128(linee, 8); + lineg = _mm_unpackhi_epi32(temp2, temp4); + lineh = _mm_srli_si128(lineg, 8); + + _mm_storel_epi64((__m128i *)(pu1_src_uv - 4), linea); + _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + src_strd), lineb); + _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + 2 * src_strd), linec); + _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + 3 * src_strd), lined); + _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + 4 * src_strd), linee); + _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + 5 * src_strd), linef); + _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + 6 * src_strd), lineg); + _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + 7 * src_strd), lineh); + +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_deblk_chroma_horz_bs4_ssse3() */ +/* */ +/* Description : This function performs filtering of a chroma block */ +/* horizontal edge when the boundary strength is set to 4 */ +/* in high profile. */ +/* */ +/* Inputs : pu1_src - pointer to the src sample q0 of U */ +/* src_strd - source stride */ +/* alpha_cb - alpha value for the boundary in U */ +/* beta_cb - beta value for the boundary in U */ +/* alpha_cr - alpha value for the boundary in V */ +/* beta_cr - beta value for the boundary in V */ +/* */ +/* Globals : None */ +/* */ +/* Processing : This operation is described in Sec. 8.7.2.4 under the */ +/* title "Filtering process for edges for bS equal to 4" in */ +/* ITU T Rec H.264 with alpha and beta values different in */ +/* U and V. */ +/* */ +/* Outputs : None */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 12 02 2015 Naveen Kumar P Initial version */ +/* */ +/*****************************************************************************/ +void ih264_deblk_chroma_horz_bs4_ssse3(UWORD8 *pu1_src, + WORD32 src_strd, + WORD32 alpha_cb, + WORD32 beta_cb, + WORD32 alpha_cr, + WORD32 beta_cr) +{ + UWORD8 *pu1_src_uv = pu1_src; /* Pointer to the src sample q0 of plane U*/ + WORD16 i16_posP1, i16_posP0, i16_posQ1; + + UWORD8 *pu1_HorzPixelUV; /*! < Pointer to the first pixel of the boundary */ + WORD32 alpha_cbcr = (alpha_cr << 16) + alpha_cb; + WORD32 beta_cbcr = (beta_cr << 16) + beta_cb; + __m128i q0_uv_16x8, p0_uv_16x8, q1_uv_16x8, p1_uv_16x8; + __m128i q0_uv_8x16, p0_uv_8x16, q1_uv_8x16, p1_uv_8x16; + __m128i flag1, flag2; + __m128i diff, alpha_cbcr_16x8, beta_cbcr_16x8; + __m128i zero = _mm_setzero_si128(); + __m128i p0_uv_8x16_1, p0_uv_8x16_2, q0_uv_8x16_1, q0_uv_8x16_2; + __m128i temp1, temp2; + + pu1_HorzPixelUV = pu1_src_uv - (src_strd << 1); + + i16_posQ1 = src_strd; + i16_posP0 = src_strd; + i16_posP1 = 0; + + q0_uv_16x8 = _mm_loadu_si128((__m128i *)(pu1_src_uv)); + q1_uv_16x8 = _mm_loadu_si128((__m128i *)(pu1_src_uv + i16_posQ1)); + p1_uv_16x8 = _mm_loadu_si128((__m128i *)(pu1_HorzPixelUV + i16_posP1)); + p0_uv_16x8 = _mm_loadu_si128((__m128i *)(pu1_HorzPixelUV + i16_posP0)); + + q0_uv_8x16 = _mm_unpacklo_epi8(q0_uv_16x8, zero); + q1_uv_8x16 = _mm_unpacklo_epi8(q1_uv_16x8, zero); + p1_uv_8x16 = _mm_unpacklo_epi8(p1_uv_16x8, zero); + p0_uv_8x16 = _mm_unpacklo_epi8(p0_uv_16x8, zero); + + diff = _mm_subs_epi16(p0_uv_8x16, q0_uv_8x16); //Condn 1 + diff = _mm_abs_epi16(diff); + alpha_cbcr_16x8 = _mm_set1_epi32(alpha_cbcr); + flag1 = _mm_cmpgt_epi16(alpha_cbcr_16x8, diff); + + diff = _mm_subs_epi16(q1_uv_8x16, q0_uv_8x16); //Condtn 2 + diff = _mm_abs_epi16(diff); + beta_cbcr_16x8 = _mm_set1_epi32(beta_cbcr); + flag1 = _mm_and_si128(flag1, _mm_cmpgt_epi16(beta_cbcr_16x8, diff)); + + diff = _mm_subs_epi16(p1_uv_8x16, p0_uv_8x16); //Condtn 3 + diff = _mm_abs_epi16(diff); + flag1 = _mm_and_si128(flag1, _mm_cmpgt_epi16(beta_cbcr_16x8, diff)); + + temp1 = _mm_slli_epi16(p1_uv_8x16, 1); + temp2 = _mm_add_epi16(p0_uv_8x16, q1_uv_8x16); + temp1 = _mm_add_epi16(temp1, _mm_set1_epi16(2)); + temp1 = _mm_add_epi16(temp1, temp2); + p0_uv_8x16_1 = _mm_srai_epi16(temp1, 2); + + temp1 = _mm_slli_epi16(q1_uv_8x16, 1); + temp2 = _mm_add_epi16(p1_uv_8x16, q0_uv_8x16); + temp1 = _mm_add_epi16(temp1, _mm_set1_epi16(2)); + temp1 = _mm_add_epi16(temp1, temp2); + q0_uv_8x16_1 = _mm_srai_epi16(temp1, 2); + + q0_uv_8x16 = _mm_unpackhi_epi8(q0_uv_16x8, zero); + q1_uv_8x16 = _mm_unpackhi_epi8(q1_uv_16x8, zero); + p1_uv_8x16 = _mm_unpackhi_epi8(p1_uv_16x8, zero); + p0_uv_8x16 = _mm_unpackhi_epi8(p0_uv_16x8, zero); + + diff = _mm_subs_epi16(p0_uv_8x16, q0_uv_8x16); //Condn 1 + diff = _mm_abs_epi16(diff); + alpha_cbcr_16x8 = _mm_set1_epi32(alpha_cbcr); + flag2 = _mm_cmpgt_epi16(alpha_cbcr_16x8, diff); + + diff = _mm_subs_epi16(q1_uv_8x16, q0_uv_8x16); //Condtn 2 + diff = _mm_abs_epi16(diff); + beta_cbcr_16x8 = _mm_set1_epi32(beta_cbcr); + flag2 = _mm_and_si128(flag2, _mm_cmpgt_epi16(beta_cbcr_16x8, diff)); + + diff = _mm_subs_epi16(p1_uv_8x16, p0_uv_8x16); //Condtn 3 + diff = _mm_abs_epi16(diff); + flag2 = _mm_and_si128(flag2, _mm_cmpgt_epi16(beta_cbcr_16x8, diff)); + + temp1 = _mm_slli_epi16(p1_uv_8x16, 1); + temp2 = _mm_add_epi16(p0_uv_8x16, q1_uv_8x16); + temp1 = _mm_add_epi16(temp1, _mm_set1_epi16(2)); + temp1 = _mm_add_epi16(temp1, temp2); + p0_uv_8x16_2 = _mm_srai_epi16(temp1, 2); + + temp1 = _mm_slli_epi16(q1_uv_8x16, 1); + temp2 = _mm_add_epi16(p1_uv_8x16, q0_uv_8x16); + temp1 = _mm_add_epi16(temp1, _mm_set1_epi16(2)); + temp1 = _mm_add_epi16(temp1, temp2); + q0_uv_8x16_2 = _mm_srai_epi16(temp1, 2); + + p0_uv_8x16_2 = _mm_packus_epi16(p0_uv_8x16_1, p0_uv_8x16_2); + q0_uv_8x16_2 = _mm_packus_epi16(q0_uv_8x16_1, q0_uv_8x16_2); + + flag1 = _mm_packs_epi16(flag1, flag2); + + p0_uv_8x16_1 = _mm_and_si128(p0_uv_16x8, + _mm_xor_si128(flag1, _mm_set1_epi8(0xFF))); + p0_uv_8x16_2 = _mm_and_si128(p0_uv_8x16_2, flag1); + p0_uv_8x16_1 = _mm_add_epi8(p0_uv_8x16_1, p0_uv_8x16_2); + _mm_storeu_si128((__m128i *)(pu1_HorzPixelUV + i16_posP0), p0_uv_8x16_1); + + q0_uv_8x16_1 = _mm_and_si128(q0_uv_16x8, + _mm_xor_si128(flag1, _mm_set1_epi8(0xFF))); + q0_uv_8x16_2 = _mm_and_si128(q0_uv_8x16_2, flag1); + q0_uv_8x16_1 = _mm_add_epi8(q0_uv_8x16_1, q0_uv_8x16_2); + _mm_storeu_si128((__m128i *)(pu1_src_uv), q0_uv_8x16_1); + +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_deblk_chroma_vert_bslt4_ssse3() */ +/* */ +/* Description : This function performs filtering of a chroma block */ +/* vertical edge when the boundary strength is less than 4 */ +/* in high profile. */ +/* */ +/* Inputs : pu1_src - pointer to the src sample q0 of U */ +/* src_strd - source stride */ +/* alpha_cb - alpha value for the boundary in U */ +/* beta_cb - beta value for the boundary in U */ +/* alpha_cr - alpha value for the boundary in V */ +/* beta_cr - beta value for the boundary in V */ +/* u4_bs - packed Boundary strength array */ +/* pu1_cliptab_cb - tc0_table for U */ +/* pu1_cliptab_cr - tc0_table for V */ +/* */ +/* Globals : None */ +/* */ +/* Processing : This operation is described in Sec. 8.7.2.3 under the */ +/* title "Filtering process for edges for bS less than 4" */ +/* in ITU T Rec H.264 with alpha and beta values different */ +/* in U and V. */ +/* */ +/* Outputs : None */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 12 02 2015 Naveen Kumar P Initial version */ +/* */ +/*****************************************************************************/ +void ih264_deblk_chroma_vert_bslt4_ssse3(UWORD8 *pu1_src, + WORD32 src_strd, + WORD32 alpha_cb, + WORD32 beta_cb, + WORD32 alpha_cr, + WORD32 beta_cr, + UWORD32 u4_bs, + const UWORD8 *pu1_cliptab_cb, + const UWORD8 *pu1_cliptab_cr) +{ + UWORD8 *pu1_src_uv = pu1_src; /* Pointer to the src sample q0 of plane U*/ + UWORD8 u1_Bs0, u1_Bs1, u1_Bs2, u1_Bs3; + WORD32 alpha_cbcr = (alpha_cr << 16) + alpha_cb; + WORD32 beta_cbcr = (beta_cr << 16) + beta_cb; + __m128i linea, lineb, linec, lined, linee, linef, lineg, lineh; + __m128i temp1, temp2, temp3, temp4; + + __m128i q0_uv_16x8, p0_uv_16x8, q1_uv_16x8, p1_uv_16x8; + __m128i q0_uv_8x16, p0_uv_8x16, q1_uv_8x16, p1_uv_8x16; + __m128i flag_bs, flag1, flag2; + __m128i diff, diff1, alpha_cbcr_16x8, beta_cbcr_16x8, in_macro; + __m128i zero = _mm_setzero_si128(); + __m128i C0_uv_8x16; + __m128i p0_uv_8x16_1, p0_uv_8x16_2, q0_uv_8x16_1, q0_uv_8x16_2; + + u1_Bs0 = (u4_bs >> 24) & 0xff; + u1_Bs1 = (u4_bs >> 16) & 0xff; + u1_Bs2 = (u4_bs >> 8) & 0xff; + u1_Bs3 = (u4_bs >> 0) & 0xff; + + flag_bs = _mm_set_epi8(u1_Bs3, u1_Bs3, u1_Bs3, u1_Bs3, u1_Bs2, u1_Bs2, + u1_Bs2, u1_Bs2, u1_Bs1, u1_Bs1, u1_Bs1, u1_Bs1, + u1_Bs0, u1_Bs0, u1_Bs0, u1_Bs0); + flag_bs = _mm_cmpeq_epi8(flag_bs, zero); //Set flag to 1s and 0s + flag_bs = _mm_xor_si128(flag_bs, _mm_set1_epi8(0xFF)); //Invert for required mask + + /* Load and transpose the pixel values */ + linea = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4)); + lineb = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + src_strd)); + linec = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + 2 * src_strd)); + lined = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + 3 * src_strd)); + linee = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + 4 * src_strd)); + linef = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + 5 * src_strd)); + lineg = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + 6 * src_strd)); + lineh = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + 7 * src_strd)); + + temp1 = _mm_unpacklo_epi16(linea, lineb); + temp2 = _mm_unpacklo_epi16(linec, lined); + temp3 = _mm_unpacklo_epi16(linee, linef); + temp4 = _mm_unpacklo_epi16(lineg, lineh); + + p1_uv_8x16 = _mm_unpacklo_epi32(temp1, temp2); + p0_uv_8x16 = _mm_unpacklo_epi32(temp3, temp4); + q0_uv_8x16 = _mm_unpackhi_epi32(temp1, temp2); + q1_uv_8x16 = _mm_unpackhi_epi32(temp3, temp4); + + p1_uv_16x8 = _mm_unpacklo_epi64(p1_uv_8x16, p0_uv_8x16); + p0_uv_16x8 = _mm_unpackhi_epi64(p1_uv_8x16, p0_uv_8x16); + q0_uv_16x8 = _mm_unpacklo_epi64(q0_uv_8x16, q1_uv_8x16); + q1_uv_16x8 = _mm_unpackhi_epi64(q0_uv_8x16, q1_uv_8x16); + /* End of transpose */ + + q0_uv_8x16 = _mm_unpacklo_epi8(q0_uv_16x8, zero); + q1_uv_8x16 = _mm_unpacklo_epi8(q1_uv_16x8, zero); + p1_uv_8x16 = _mm_unpacklo_epi8(p1_uv_16x8, zero); + p0_uv_8x16 = _mm_unpacklo_epi8(p0_uv_16x8, zero); + + diff = _mm_subs_epi16(p0_uv_8x16, q0_uv_8x16); //Condn 1 + diff = _mm_abs_epi16(diff); + alpha_cbcr_16x8 = _mm_set1_epi32(alpha_cbcr); + flag1 = _mm_cmpgt_epi16(alpha_cbcr_16x8, diff); + + diff = _mm_subs_epi16(q1_uv_8x16, q0_uv_8x16); //Condtn 2 + diff = _mm_abs_epi16(diff); + beta_cbcr_16x8 = _mm_set1_epi32(beta_cbcr); + flag1 = _mm_and_si128(flag1, _mm_cmpgt_epi16(beta_cbcr_16x8, diff)); + + diff = _mm_subs_epi16(p1_uv_8x16, p0_uv_8x16); //Condtn 3 + diff = _mm_abs_epi16(diff); + flag1 = _mm_and_si128(flag1, _mm_cmpgt_epi16(beta_cbcr_16x8, diff)); + + diff = _mm_subs_epi16(q0_uv_8x16, p0_uv_8x16); + diff = _mm_slli_epi16(diff, 2); + diff1 = _mm_subs_epi16(p1_uv_8x16, q1_uv_8x16); + diff = _mm_add_epi16(diff, diff1); + diff = _mm_add_epi16(diff, _mm_set1_epi16(4)); + in_macro = _mm_srai_epi16(diff, 3); + + C0_uv_8x16 = _mm_set_epi16(pu1_cliptab_cr[u1_Bs1], pu1_cliptab_cb[u1_Bs1], + pu1_cliptab_cr[u1_Bs1], pu1_cliptab_cb[u1_Bs1], + pu1_cliptab_cr[u1_Bs0], pu1_cliptab_cb[u1_Bs0], + pu1_cliptab_cr[u1_Bs0], pu1_cliptab_cb[u1_Bs0]); + + C0_uv_8x16 = _mm_add_epi16(C0_uv_8x16, _mm_set1_epi16(1)); + + in_macro = _mm_min_epi16(C0_uv_8x16, in_macro); //CLIP3 + C0_uv_8x16 = _mm_subs_epi16(zero, C0_uv_8x16); + in_macro = _mm_max_epi16(C0_uv_8x16, in_macro); + + p0_uv_8x16_1 = _mm_add_epi16(p0_uv_8x16, in_macro); + q0_uv_8x16_1 = _mm_sub_epi16(q0_uv_8x16, in_macro); + + q0_uv_8x16 = _mm_unpackhi_epi8(q0_uv_16x8, zero); + q1_uv_8x16 = _mm_unpackhi_epi8(q1_uv_16x8, zero); + p1_uv_8x16 = _mm_unpackhi_epi8(p1_uv_16x8, zero); + p0_uv_8x16 = _mm_unpackhi_epi8(p0_uv_16x8, zero); + + diff = _mm_subs_epi16(p0_uv_8x16, q0_uv_8x16); //Condn 1 + diff = _mm_abs_epi16(diff); + alpha_cbcr_16x8 = _mm_set1_epi32(alpha_cbcr); + flag2 = _mm_cmpgt_epi16(alpha_cbcr_16x8, diff); + + diff = _mm_subs_epi16(q1_uv_8x16, q0_uv_8x16); //Condtn 2 + diff = _mm_abs_epi16(diff); + beta_cbcr_16x8 = _mm_set1_epi32(beta_cbcr); + flag2 = _mm_and_si128(flag2, _mm_cmpgt_epi16(beta_cbcr_16x8, diff)); + + diff = _mm_subs_epi16(p1_uv_8x16, p0_uv_8x16); //Condtn 3 + diff = _mm_abs_epi16(diff); + flag2 = _mm_and_si128(flag2, _mm_cmpgt_epi16(beta_cbcr_16x8, diff)); + + diff = _mm_subs_epi16(q0_uv_8x16, p0_uv_8x16); + diff = _mm_slli_epi16(diff, 2); + diff1 = _mm_subs_epi16(p1_uv_8x16, q1_uv_8x16); + diff = _mm_add_epi16(diff, diff1); + diff = _mm_add_epi16(diff, _mm_set1_epi16(4)); + in_macro = _mm_srai_epi16(diff, 3); + + C0_uv_8x16 = _mm_set_epi16(pu1_cliptab_cr[u1_Bs3], pu1_cliptab_cb[u1_Bs3], + pu1_cliptab_cr[u1_Bs3], pu1_cliptab_cb[u1_Bs3], + pu1_cliptab_cr[u1_Bs2], pu1_cliptab_cb[u1_Bs2], + pu1_cliptab_cr[u1_Bs2], pu1_cliptab_cb[u1_Bs2]); + + C0_uv_8x16 = _mm_add_epi16(C0_uv_8x16, _mm_set1_epi16(1)); + + in_macro = _mm_min_epi16(C0_uv_8x16, in_macro); //CLIP3 + C0_uv_8x16 = _mm_subs_epi16(zero, C0_uv_8x16); + in_macro = _mm_max_epi16(C0_uv_8x16, in_macro); + + p0_uv_8x16_2 = _mm_add_epi16(p0_uv_8x16, in_macro); + q0_uv_8x16_2 = _mm_sub_epi16(q0_uv_8x16, in_macro); + + p0_uv_8x16_2 = _mm_packus_epi16(p0_uv_8x16_1, p0_uv_8x16_2); + q0_uv_8x16_2 = _mm_packus_epi16(q0_uv_8x16_1, q0_uv_8x16_2); + + flag1 = _mm_packs_epi16(flag1, flag2); + flag1 = _mm_and_si128(flag1, flag_bs); //Final flag (BS condition + other 3 conditions) + + p0_uv_8x16_1 = _mm_and_si128(p0_uv_16x8, + _mm_xor_si128(flag1, _mm_set1_epi8(0xFF))); + p0_uv_8x16_2 = _mm_and_si128(p0_uv_8x16_2, flag1); + p0_uv_16x8 = _mm_add_epi8(p0_uv_8x16_1, p0_uv_8x16_2); + + q0_uv_8x16_1 = _mm_and_si128(q0_uv_16x8, + _mm_xor_si128(flag1, _mm_set1_epi8(0xFF))); + q0_uv_8x16_2 = _mm_and_si128(q0_uv_8x16_2, flag1); + q0_uv_16x8 = _mm_add_epi8(q0_uv_8x16_1, q0_uv_8x16_2); + + /* Inverse-transpose and store back */ + temp1 = _mm_unpacklo_epi16(p1_uv_16x8, p0_uv_16x8); + temp2 = _mm_unpackhi_epi16(p1_uv_16x8, p0_uv_16x8); + temp3 = _mm_unpacklo_epi16(q0_uv_16x8, q1_uv_16x8); + temp4 = _mm_unpackhi_epi16(q0_uv_16x8, q1_uv_16x8); + + linea = _mm_unpacklo_epi32(temp1, temp3); + lineb = _mm_srli_si128(linea, 8); + linec = _mm_unpackhi_epi32(temp1, temp3); + lined = _mm_srli_si128(linec, 8); + linee = _mm_unpacklo_epi32(temp2, temp4); + linef = _mm_srli_si128(linee, 8); + lineg = _mm_unpackhi_epi32(temp2, temp4); + lineh = _mm_srli_si128(lineg, 8); + + _mm_storel_epi64((__m128i *)(pu1_src_uv - 4), linea); + _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + src_strd), lineb); + _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + 2 * src_strd), linec); + _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + 3 * src_strd), lined); + _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + 4 * src_strd), linee); + _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + 5 * src_strd), linef); + _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + 6 * src_strd), lineg); + _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + 7 * src_strd), lineh); + +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_deblk_chroma_horz_bslt4_ssse3() */ +/* */ +/* Description : This function performs filtering of a chroma block */ +/* horizontal edge when the boundary strength is less than */ +/* 4 in high profile. */ +/* */ +/* Inputs : pu1_src - pointer to the src sample q0 of U */ +/* src_strd - source stride */ +/* alpha_cb - alpha value for the boundary in U */ +/* beta_cb - beta value for the boundary in U */ +/* alpha_cr - alpha value for the boundary in V */ +/* beta_cr - beta value for the boundary in V */ +/* u4_bs - packed Boundary strength array */ +/* pu1_cliptab_cb - tc0_table for U */ +/* pu1_cliptab_cr - tc0_table for V */ +/* */ +/* Globals : None */ +/* */ +/* Processing : This operation is described in Sec. 8.7.2.3 under the */ +/* title "Filtering process for edges for bS less than 4" */ +/* in ITU T Rec H.264 with alpha and beta values different */ +/* in U and V. */ +/* */ +/* Outputs : None */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 12 02 2015 Naveen Kumar P Initial version */ +/* */ +/*****************************************************************************/ +void ih264_deblk_chroma_horz_bslt4_ssse3(UWORD8 *pu1_src, + WORD32 src_strd, + WORD32 alpha_cb, + WORD32 beta_cb, + WORD32 alpha_cr, + WORD32 beta_cr, + UWORD32 u4_bs, + const UWORD8 *pu1_cliptab_cb, + const UWORD8 *pu1_cliptab_cr) +{ + UWORD8 *pu1_src_uv = pu1_src; /* Pointer to the src sample q0 of plane U*/ + WORD16 i16_posP1, i16_posP0, i16_posQ1; + UWORD8 u1_Bs0, u1_Bs1, u1_Bs2, u1_Bs3; + + UWORD8 *pu1_HorzPixelUV; /*! < Pointer to the first pixel of the boundary */ + WORD32 alpha_cbcr = (alpha_cr << 16) + alpha_cb; + WORD32 beta_cbcr = (beta_cr << 16) + beta_cb; + __m128i q0_uv_16x8, p0_uv_16x8, q1_uv_16x8, p1_uv_16x8; + __m128i q0_uv_8x16, p0_uv_8x16, q1_uv_8x16, p1_uv_8x16; + __m128i flag_bs, flag1, flag2; + __m128i diff, diff1, alpha_cbcr_16x8, beta_cbcr_16x8, in_macro; + __m128i zero = _mm_setzero_si128(); + __m128i C0_uv_8x16; + __m128i p0_uv_8x16_1, p0_uv_8x16_2, q0_uv_8x16_1, q0_uv_8x16_2; + + pu1_HorzPixelUV = pu1_src_uv - (src_strd << 1); + + i16_posQ1 = src_strd; + i16_posP0 = src_strd; + i16_posP1 = 0; + + u1_Bs0 = (u4_bs >> 24) & 0xff; + u1_Bs1 = (u4_bs >> 16) & 0xff; + u1_Bs2 = (u4_bs >> 8) & 0xff; + u1_Bs3 = (u4_bs >> 0) & 0xff; + + flag_bs = _mm_set_epi8(u1_Bs3, u1_Bs3, u1_Bs3, u1_Bs3, u1_Bs2, u1_Bs2, + u1_Bs2, u1_Bs2, u1_Bs1, u1_Bs1, u1_Bs1, u1_Bs1, + u1_Bs0, u1_Bs0, u1_Bs0, u1_Bs0); + flag_bs = _mm_cmpeq_epi8(flag_bs, zero); //Set flag to 1s and 0s + flag_bs = _mm_xor_si128(flag_bs, _mm_set1_epi8(0xFF)); //Invert for required mask + + q0_uv_16x8 = _mm_loadu_si128((__m128i *)(pu1_src_uv)); + q1_uv_16x8 = _mm_loadu_si128((__m128i *)(pu1_src_uv + i16_posQ1)); + p1_uv_16x8 = _mm_loadu_si128((__m128i *)(pu1_HorzPixelUV + i16_posP1)); + p0_uv_16x8 = _mm_loadu_si128((__m128i *)(pu1_HorzPixelUV + i16_posP0)); + + q0_uv_8x16 = _mm_unpacklo_epi8(q0_uv_16x8, zero); + q1_uv_8x16 = _mm_unpacklo_epi8(q1_uv_16x8, zero); + p1_uv_8x16 = _mm_unpacklo_epi8(p1_uv_16x8, zero); + p0_uv_8x16 = _mm_unpacklo_epi8(p0_uv_16x8, zero); + + diff = _mm_subs_epi16(p0_uv_8x16, q0_uv_8x16); //Condn 1 + diff = _mm_abs_epi16(diff); + alpha_cbcr_16x8 = _mm_set1_epi32(alpha_cbcr); + flag1 = _mm_cmpgt_epi16(alpha_cbcr_16x8, diff); + + diff = _mm_subs_epi16(q1_uv_8x16, q0_uv_8x16); //Condtn 2 + diff = _mm_abs_epi16(diff); + beta_cbcr_16x8 = _mm_set1_epi32(beta_cbcr); + flag1 = _mm_and_si128(flag1, _mm_cmpgt_epi16(beta_cbcr_16x8, diff)); + + diff = _mm_subs_epi16(p1_uv_8x16, p0_uv_8x16); //Condtn 3 + diff = _mm_abs_epi16(diff); + flag1 = _mm_and_si128(flag1, _mm_cmpgt_epi16(beta_cbcr_16x8, diff)); + + diff = _mm_subs_epi16(q0_uv_8x16, p0_uv_8x16); + diff = _mm_slli_epi16(diff, 2); + diff1 = _mm_subs_epi16(p1_uv_8x16, q1_uv_8x16); + diff = _mm_add_epi16(diff, diff1); + diff = _mm_add_epi16(diff, _mm_set1_epi16(4)); + in_macro = _mm_srai_epi16(diff, 3); + + C0_uv_8x16 = _mm_set_epi16(pu1_cliptab_cr[u1_Bs1], pu1_cliptab_cb[u1_Bs1], + pu1_cliptab_cr[u1_Bs1], pu1_cliptab_cb[u1_Bs1], + pu1_cliptab_cr[u1_Bs0], pu1_cliptab_cb[u1_Bs0], + pu1_cliptab_cr[u1_Bs0], pu1_cliptab_cb[u1_Bs0]); + + C0_uv_8x16 = _mm_add_epi16(C0_uv_8x16, _mm_set1_epi16(1)); + + in_macro = _mm_min_epi16(C0_uv_8x16, in_macro); //CLIP3 + C0_uv_8x16 = _mm_subs_epi16(zero, C0_uv_8x16); + in_macro = _mm_max_epi16(C0_uv_8x16, in_macro); + + p0_uv_8x16_1 = _mm_add_epi16(p0_uv_8x16, in_macro); + q0_uv_8x16_1 = _mm_sub_epi16(q0_uv_8x16, in_macro); + + q0_uv_8x16 = _mm_unpackhi_epi8(q0_uv_16x8, zero); + q1_uv_8x16 = _mm_unpackhi_epi8(q1_uv_16x8, zero); + p1_uv_8x16 = _mm_unpackhi_epi8(p1_uv_16x8, zero); + p0_uv_8x16 = _mm_unpackhi_epi8(p0_uv_16x8, zero); + + diff = _mm_subs_epi16(p0_uv_8x16, q0_uv_8x16); //Condn 1 + diff = _mm_abs_epi16(diff); + alpha_cbcr_16x8 = _mm_set1_epi32(alpha_cbcr); + flag2 = _mm_cmpgt_epi16(alpha_cbcr_16x8, diff); + + diff = _mm_subs_epi16(q1_uv_8x16, q0_uv_8x16); //Condtn 2 + diff = _mm_abs_epi16(diff); + beta_cbcr_16x8 = _mm_set1_epi32(beta_cbcr); + flag2 = _mm_and_si128(flag2, _mm_cmpgt_epi16(beta_cbcr_16x8, diff)); + + diff = _mm_subs_epi16(p1_uv_8x16, p0_uv_8x16); //Condtn 3 + diff = _mm_abs_epi16(diff); + flag2 = _mm_and_si128(flag2, _mm_cmpgt_epi16(beta_cbcr_16x8, diff)); + + diff = _mm_subs_epi16(q0_uv_8x16, p0_uv_8x16); + diff = _mm_slli_epi16(diff, 2); + diff1 = _mm_subs_epi16(p1_uv_8x16, q1_uv_8x16); + diff = _mm_add_epi16(diff, diff1); + diff = _mm_add_epi16(diff, _mm_set1_epi16(4)); + in_macro = _mm_srai_epi16(diff, 3); + + C0_uv_8x16 = _mm_set_epi16(pu1_cliptab_cr[u1_Bs3], pu1_cliptab_cb[u1_Bs3], + pu1_cliptab_cr[u1_Bs3], pu1_cliptab_cb[u1_Bs3], + pu1_cliptab_cr[u1_Bs2], pu1_cliptab_cb[u1_Bs2], + pu1_cliptab_cr[u1_Bs2], pu1_cliptab_cb[u1_Bs2]); + + C0_uv_8x16 = _mm_add_epi16(C0_uv_8x16, _mm_set1_epi16(1)); + + in_macro = _mm_min_epi16(C0_uv_8x16, in_macro); //CLIP3 + C0_uv_8x16 = _mm_subs_epi16(zero, C0_uv_8x16); + in_macro = _mm_max_epi16(C0_uv_8x16, in_macro); + + p0_uv_8x16_2 = _mm_add_epi16(p0_uv_8x16, in_macro); + q0_uv_8x16_2 = _mm_sub_epi16(q0_uv_8x16, in_macro); + + p0_uv_8x16_2 = _mm_packus_epi16(p0_uv_8x16_1, p0_uv_8x16_2); + q0_uv_8x16_2 = _mm_packus_epi16(q0_uv_8x16_1, q0_uv_8x16_2); + + flag1 = _mm_packs_epi16(flag1, flag2); + flag1 = _mm_and_si128(flag1, flag_bs); //Final flag (BS condition + other 3 conditions) + + p0_uv_8x16_1 = _mm_and_si128(p0_uv_16x8, + _mm_xor_si128(flag1, _mm_set1_epi8(0xFF))); + p0_uv_8x16_2 = _mm_and_si128(p0_uv_8x16_2, flag1); + p0_uv_8x16_1 = _mm_add_epi8(p0_uv_8x16_1, p0_uv_8x16_2); + _mm_storeu_si128((__m128i *)(pu1_HorzPixelUV + i16_posP0), p0_uv_8x16_1); + + q0_uv_8x16_1 = _mm_and_si128(q0_uv_16x8, + _mm_xor_si128(flag1, _mm_set1_epi8(0xFF))); + q0_uv_8x16_2 = _mm_and_si128(q0_uv_8x16_2, flag1); + q0_uv_8x16_1 = _mm_add_epi8(q0_uv_8x16_1, q0_uv_8x16_2); + _mm_storeu_si128((__m128i *)(pu1_src_uv), q0_uv_8x16_1); + +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_deblk_chroma_vert_bs4_mbaff_ssse3() */ +/* */ +/* Description : This function performs filtering of a chroma block */ +/* vertical edge when boundary strength is set to 4 in high */ +/* profile. */ +/* */ +/* Inputs : pu1_src - pointer to the src sample q0 of U */ +/* src_strd - source stride */ +/* alpha_cb - alpha value for the boundary in U */ +/* beta_cb - beta value for the boundary in U */ +/* alpha_cr - alpha value for the boundary in V */ +/* beta_cr - beta value for the boundary in V */ +/* u4_bs - packed Boundary strength array */ +/* pu1_cliptab_cb - tc0_table for U */ +/* pu1_cliptab_cr - tc0_table for V */ +/* */ +/* Globals : None */ +/* */ +/* Processing : When the function is called twice, this operation is as */ +/* described in Sec. 8.7.2.4 under the title "Filtering */ +/* process for edges for bS equal to 4" in ITU T Rec H.264 */ +/* with alpha and beta values different in U and V. */ +/* */ +/* Outputs : None */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 12 02 2015 Naveen Kumar P Initial version */ +/* */ +/*****************************************************************************/ +void ih264_deblk_chroma_vert_bs4_mbaff_ssse3(UWORD8 *pu1_src, + WORD32 src_strd, + WORD32 alpha_cb, + WORD32 beta_cb, + WORD32 alpha_cr, + WORD32 beta_cr) +{ + UWORD8 *pu1_src_uv = pu1_src; /* Pointer to the src sample q0 of plane U*/ + WORD32 alpha_cbcr = (alpha_cr << 16) + alpha_cb; + WORD32 beta_cbcr = (beta_cr << 16) + beta_cb; + __m128i linea, lineb, linec, lined; + __m128i temp1, temp2; + + __m128i q0_uv_16x8, p0_uv_16x8, q1_uv_16x8, p1_uv_16x8; + __m128i q0_uv_8x16, p0_uv_8x16, q1_uv_8x16, p1_uv_8x16; + __m128i flag1; + __m128i diff, alpha_cbcr_16x8, beta_cbcr_16x8; + __m128i zero = _mm_setzero_si128(); + __m128i p0_uv_8x16_1, p0_uv_8x16_2, q0_uv_8x16_1, q0_uv_8x16_2; + + /* Load and transpose the pixel values */ + linea = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4)); + lineb = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + src_strd)); + linec = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + 2 * src_strd)); + lined = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + 3 * src_strd)); + + temp1 = _mm_unpacklo_epi16(linea, lineb); + temp2 = _mm_unpacklo_epi16(linec, lined); + + p1_uv_16x8 = _mm_unpacklo_epi32(temp1, temp2); + p0_uv_16x8 = _mm_srli_si128(p1_uv_16x8, 8); + q0_uv_16x8 = _mm_unpackhi_epi32(temp1, temp2); + q1_uv_16x8 = _mm_srli_si128(q0_uv_16x8, 8); + /* End of transpose */ + + q0_uv_8x16 = _mm_unpacklo_epi8(q0_uv_16x8, zero); + q1_uv_8x16 = _mm_unpacklo_epi8(q1_uv_16x8, zero); + p1_uv_8x16 = _mm_unpacklo_epi8(p1_uv_16x8, zero); + p0_uv_8x16 = _mm_unpacklo_epi8(p0_uv_16x8, zero); + + diff = _mm_subs_epi16(p0_uv_8x16, q0_uv_8x16); //Condn 1 + diff = _mm_abs_epi16(diff); + alpha_cbcr_16x8 = _mm_set1_epi32(alpha_cbcr); + flag1 = _mm_cmpgt_epi16(alpha_cbcr_16x8, diff); + + diff = _mm_subs_epi16(q1_uv_8x16, q0_uv_8x16); //Condtn 2 + diff = _mm_abs_epi16(diff); + beta_cbcr_16x8 = _mm_set1_epi32(beta_cbcr); + flag1 = _mm_and_si128(flag1, _mm_cmpgt_epi16(beta_cbcr_16x8, diff)); + + diff = _mm_subs_epi16(p1_uv_8x16, p0_uv_8x16); //Condtn 3 + diff = _mm_abs_epi16(diff); + flag1 = _mm_and_si128(flag1, _mm_cmpgt_epi16(beta_cbcr_16x8, diff)); + + temp1 = _mm_slli_epi16(p1_uv_8x16, 1); + temp2 = _mm_add_epi16(p0_uv_8x16, q1_uv_8x16); + temp1 = _mm_add_epi16(temp1, _mm_set1_epi16(2)); + temp1 = _mm_add_epi16(temp1, temp2); + p0_uv_8x16_1 = _mm_srai_epi16(temp1, 2); + + temp1 = _mm_slli_epi16(q1_uv_8x16, 1); + temp2 = _mm_add_epi16(p1_uv_8x16, q0_uv_8x16); + temp1 = _mm_add_epi16(temp1, _mm_set1_epi16(2)); + temp1 = _mm_add_epi16(temp1, temp2); + q0_uv_8x16_1 = _mm_srai_epi16(temp1, 2); + + p0_uv_8x16_2 = _mm_packus_epi16(p0_uv_8x16_1, p0_uv_8x16_1); + q0_uv_8x16_2 = _mm_packus_epi16(q0_uv_8x16_1, q0_uv_8x16_1); + + flag1 = _mm_packs_epi16(flag1, flag1); + + p0_uv_8x16_1 = _mm_and_si128(p0_uv_16x8, + _mm_xor_si128(flag1, _mm_set1_epi8(0xFF))); + p0_uv_8x16_2 = _mm_and_si128(p0_uv_8x16_2, flag1); + p0_uv_16x8 = _mm_add_epi8(p0_uv_8x16_1, p0_uv_8x16_2); + + q0_uv_8x16_1 = _mm_and_si128(q0_uv_16x8, + _mm_xor_si128(flag1, _mm_set1_epi8(0xFF))); + q0_uv_8x16_2 = _mm_and_si128(q0_uv_8x16_2, flag1); + q0_uv_16x8 = _mm_add_epi8(q0_uv_8x16_1, q0_uv_8x16_2); + + /* Inverse-transpose and store back */ + temp1 = _mm_unpacklo_epi16(p1_uv_16x8, p0_uv_16x8); + temp2 = _mm_unpacklo_epi16(q0_uv_16x8, q1_uv_16x8); + + linea = _mm_unpacklo_epi32(temp1, temp2); + lineb = _mm_srli_si128(linea, 8); + linec = _mm_unpackhi_epi32(temp1, temp2); + lined = _mm_srli_si128(linec, 8); + + _mm_storel_epi64((__m128i *)(pu1_src_uv - 4), linea); + _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + src_strd), lineb); + _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + 2 * src_strd), linec); + _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + 3 * src_strd), lined); + +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_deblk_chroma_vert_bslt4_mbaff_ssse3() */ +/* */ +/* Description : This function performs filtering of a chroma block */ +/* vertical edge when boundary strength is less than 4 in */ +/* high profile. */ +/* */ +/* Inputs : pu1_src - pointer to the src sample q0 of U */ +/* src_strd - source stride */ +/* alpha_cb - alpha value for the boundary in U */ +/* beta_cb - beta value for the boundary in U */ +/* alpha_cr - alpha value for the boundary in V */ +/* beta_cr - beta value for the boundary in V */ +/* u4_bs - packed Boundary strength array */ +/* pu1_cliptab_cb - tc0_table for U */ +/* pu1_cliptab_cr - tc0_table for V */ +/* */ +/* Globals : None */ +/* */ +/* Processing : When the function is called twice, this operation is as */ +/* described in Sec. 8.7.2.4 under the title "Filtering */ +/* process for edges for bS less than 4" in ITU T Rec H.264 */ +/* with alpha and beta values different in U and V. */ +/* */ +/* Outputs : None */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 12 02 2015 Naveen Kumar P Initial version */ +/* */ +/*****************************************************************************/ +void ih264_deblk_chroma_vert_bslt4_mbaff_ssse3(UWORD8 *pu1_src, + WORD32 src_strd, + WORD32 alpha_cb, + WORD32 beta_cb, + WORD32 alpha_cr, + WORD32 beta_cr, + UWORD32 u4_bs, + const UWORD8 *pu1_cliptab_cb, + const UWORD8 *pu1_cliptab_cr) +{ + UWORD8 *pu1_src_uv = pu1_src; /* Pointer to the src sample q0 of plane U*/ + UWORD8 u1_Bs0, u1_Bs1, u1_Bs2, u1_Bs3; + WORD32 alpha_cbcr = (alpha_cr << 16) + alpha_cb; + WORD32 beta_cbcr = (beta_cr << 16) + beta_cb; + __m128i linea, lineb, linec, lined; + __m128i temp1, temp2; + + __m128i q0_uv_16x8, p0_uv_16x8, q1_uv_16x8, p1_uv_16x8; + __m128i q0_uv_8x16, p0_uv_8x16, q1_uv_8x16, p1_uv_8x16; + __m128i flag_bs, flag1; + __m128i diff, diff1, alpha_cbcr_16x8, beta_cbcr_16x8, in_macro; + __m128i zero = _mm_setzero_si128(); + __m128i C0_uv_8x16; + __m128i p0_uv_8x16_1, p0_uv_8x16_2, q0_uv_8x16_1, q0_uv_8x16_2; + + u1_Bs0 = (u4_bs >> 24) & 0xff; + u1_Bs1 = (u4_bs >> 16) & 0xff; + u1_Bs2 = (u4_bs >> 8) & 0xff; + u1_Bs3 = (u4_bs >> 0) & 0xff; + + flag_bs = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, u1_Bs3, u1_Bs3, u1_Bs2, + u1_Bs2, u1_Bs1, u1_Bs1, u1_Bs0, u1_Bs0); + flag_bs = _mm_cmpeq_epi8(flag_bs, zero); //Set flag to 1s and 0s + flag_bs = _mm_xor_si128(flag_bs, _mm_set1_epi8(0xFF)); //Invert for required mask + + /* Load and transpose the pixel values */ + linea = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4)); + lineb = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + src_strd)); + linec = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + 2 * src_strd)); + lined = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + 3 * src_strd)); + + temp1 = _mm_unpacklo_epi16(linea, lineb); + temp2 = _mm_unpacklo_epi16(linec, lined); + + p1_uv_16x8 = _mm_unpacklo_epi32(temp1, temp2); + p0_uv_16x8 = _mm_srli_si128(p1_uv_16x8, 8); + q0_uv_16x8 = _mm_unpackhi_epi32(temp1, temp2); + q1_uv_16x8 = _mm_srli_si128(q0_uv_16x8, 8); + /* End of transpose */ + + q0_uv_8x16 = _mm_unpacklo_epi8(q0_uv_16x8, zero); + q1_uv_8x16 = _mm_unpacklo_epi8(q1_uv_16x8, zero); + p1_uv_8x16 = _mm_unpacklo_epi8(p1_uv_16x8, zero); + p0_uv_8x16 = _mm_unpacklo_epi8(p0_uv_16x8, zero); + + diff = _mm_subs_epi16(p0_uv_8x16, q0_uv_8x16); //Condn 1 + diff = _mm_abs_epi16(diff); + alpha_cbcr_16x8 = _mm_set1_epi32(alpha_cbcr); + flag1 = _mm_cmpgt_epi16(alpha_cbcr_16x8, diff); + + diff = _mm_subs_epi16(q1_uv_8x16, q0_uv_8x16); //Condtn 2 + diff = _mm_abs_epi16(diff); + beta_cbcr_16x8 = _mm_set1_epi32(beta_cbcr); + flag1 = _mm_and_si128(flag1, _mm_cmpgt_epi16(beta_cbcr_16x8, diff)); + + diff = _mm_subs_epi16(p1_uv_8x16, p0_uv_8x16); //Condtn 3 + diff = _mm_abs_epi16(diff); + flag1 = _mm_and_si128(flag1, _mm_cmpgt_epi16(beta_cbcr_16x8, diff)); + + diff = _mm_subs_epi16(q0_uv_8x16, p0_uv_8x16); + diff = _mm_slli_epi16(diff, 2); + diff1 = _mm_subs_epi16(p1_uv_8x16, q1_uv_8x16); + diff = _mm_add_epi16(diff, diff1); + diff = _mm_add_epi16(diff, _mm_set1_epi16(4)); + in_macro = _mm_srai_epi16(diff, 3); + + C0_uv_8x16 = _mm_set_epi16(pu1_cliptab_cr[u1_Bs3], pu1_cliptab_cb[u1_Bs3], + pu1_cliptab_cr[u1_Bs2], pu1_cliptab_cb[u1_Bs2], + pu1_cliptab_cr[u1_Bs1], pu1_cliptab_cb[u1_Bs1], + pu1_cliptab_cr[u1_Bs0], pu1_cliptab_cb[u1_Bs0]); + + C0_uv_8x16 = _mm_add_epi16(C0_uv_8x16, _mm_set1_epi16(1)); + + in_macro = _mm_min_epi16(C0_uv_8x16, in_macro); //CLIP3 + C0_uv_8x16 = _mm_subs_epi16(zero, C0_uv_8x16); + in_macro = _mm_max_epi16(C0_uv_8x16, in_macro); + + p0_uv_8x16_1 = _mm_add_epi16(p0_uv_8x16, in_macro); + q0_uv_8x16_1 = _mm_sub_epi16(q0_uv_8x16, in_macro); + + p0_uv_8x16_2 = _mm_packus_epi16(p0_uv_8x16_1, p0_uv_8x16_1); + q0_uv_8x16_2 = _mm_packus_epi16(q0_uv_8x16_1, q0_uv_8x16_1); + + flag1 = _mm_packs_epi16(flag1, flag1); + flag1 = _mm_and_si128(flag1, flag_bs); //Final flag (BS condition + other 3 conditions) + + p0_uv_8x16_1 = _mm_and_si128(p0_uv_16x8, + _mm_xor_si128(flag1, _mm_set1_epi8(0xFF))); + p0_uv_8x16_2 = _mm_and_si128(p0_uv_8x16_2, flag1); + p0_uv_16x8 = _mm_add_epi8(p0_uv_8x16_1, p0_uv_8x16_2); + + q0_uv_8x16_1 = _mm_and_si128(q0_uv_16x8, + _mm_xor_si128(flag1, _mm_set1_epi8(0xFF))); + q0_uv_8x16_2 = _mm_and_si128(q0_uv_8x16_2, flag1); + q0_uv_16x8 = _mm_add_epi8(q0_uv_8x16_1, q0_uv_8x16_2); + + /* Inverse-transpose and store back */ + temp1 = _mm_unpacklo_epi16(p1_uv_16x8, p0_uv_16x8); + temp2 = _mm_unpacklo_epi16(q0_uv_16x8, q1_uv_16x8); + + linea = _mm_unpacklo_epi32(temp1, temp2); + lineb = _mm_srli_si128(linea, 8); + linec = _mm_unpackhi_epi32(temp1, temp2); + lined = _mm_srli_si128(linec, 8); + + _mm_storel_epi64((__m128i *)(pu1_src_uv - 4), linea); + _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + src_strd), lineb); + _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + 2 * src_strd), linec); + _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + 3 * src_strd), lined); + +} + diff --git a/common/x86/ih264_deblk_luma_ssse3.c b/common/x86/ih264_deblk_luma_ssse3.c new file mode 100755 index 0000000..440d5f0 --- /dev/null +++ b/common/x86/ih264_deblk_luma_ssse3.c @@ -0,0 +1,2012 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*****************************************************************************/ +/* */ +/* File Name : ih264_deblk_luma_ssse3.c */ +/* */ +/* Description : Contains function definitions for deblocking */ +/* */ +/* List of Functions : ih264_deblk_luma_vert_bs4_ssse3() */ +/* ih264_deblk_luma_horz_bs4_ssse3() */ +/* ih264_deblk_luma_vert_bslt4_ssse3() */ +/* ih264_deblk_luma_horz_bslt4_ssse3() */ +/* ih264_deblk_luma_vert_bs4_mbaff_ssse3() */ +/* ih264_deblk_luma_vert_bslt4_mbaff_ssse3() */ +/* */ +/* Issues / Problems : None */ +/* */ +/* Revision History : */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 12 02 2015 Naveen Kumar P Added luma deblocking ssse3 */ +/* intrinsics */ +/* */ +/*****************************************************************************/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include <stdio.h> + +/* User include files */ +#include "ih264_typedefs.h" +#include "ih264_platform_macros.h" +#include "ih264_deblk_edge_filters.h" +#include "ih264_macros.h" + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_deblk_luma_vert_bs4_ssse3() */ +/* */ +/* Description : This function performs filtering of a luma block */ +/* vertical edge when the boundary strength is set to 4. */ +/* */ +/* Inputs : pu1_src - pointer to the src sample q0 */ +/* src_strd - source stride */ +/* alpha - alpha value for the boundary */ +/* beta - beta value for the boundary */ +/* */ +/* Globals : None */ +/* */ +/* Processing : This operation is described in Sec. 8.7.2.4 under the */ +/* title "Filtering process for edges for bS equal to 4" in */ +/* ITU T Rec H.264. */ +/* */ +/* Outputs : None */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 12 02 2015 Naveen Kumar P Initial version */ +/* */ +/*****************************************************************************/ +void ih264_deblk_luma_vert_bs4_ssse3(UWORD8 *pu1_src, + WORD32 src_strd, + WORD32 alpha, + WORD32 beta) +{ + __m128i zero = _mm_setzero_si128(); + __m128i q0_16x8, q1_16x8, q2_16x8, q3_16x8; + __m128i p0_16x8, p1_16x8, p2_16x8, p3_16x8; + __m128i q0_8x16, q1_8x16, q2_8x16, q3_8x16; + __m128i p0_8x16, p1_8x16, p2_8x16, p3_8x16; + __m128i q0_16x8_1; + __m128i p0_16x8_1; + __m128i q0_16x8_2, q1_16x8_2, q2_16x8_2; + __m128i p0_16x8_2, p1_16x8_2, p2_16x8_2; + __m128i temp1, temp2, temp3, temp4, temp5, temp6; + __m128i Alpha_8x16, Beta_8x16; + __m128i flag1_16x8, flag2_16x8, flag3_16x8, flag4_16x8; + __m128i const_val2_16x8 = _mm_set1_epi16(2); + __m128i line1, line2, line3, line4, line5, line6, line7, line8; + + Alpha_8x16 = _mm_set1_epi16(alpha); + Beta_8x16 = _mm_set1_epi16(beta); + + line1 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 0 * src_strd)); + line2 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 1 * src_strd)); + line3 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 2 * src_strd)); + line4 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 3 * src_strd)); + line5 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 4 * src_strd)); + line6 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 5 * src_strd)); + line7 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 6 * src_strd)); + line8 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 7 * src_strd)); + + temp1 = _mm_unpacklo_epi8(line1, line2); + temp2 = _mm_unpacklo_epi8(line3, line4); + temp3 = _mm_unpacklo_epi8(line5, line6); + temp4 = _mm_unpacklo_epi8(line7, line8); + + line1 = _mm_unpacklo_epi16(temp1, temp2); + line2 = _mm_unpackhi_epi16(temp1, temp2); + line3 = _mm_unpacklo_epi16(temp3, temp4); + line4 = _mm_unpackhi_epi16(temp3, temp4); + + p1_8x16 = _mm_unpacklo_epi32(line1, line3); + p0_8x16 = _mm_unpackhi_epi32(line1, line3); + q0_8x16 = _mm_unpacklo_epi32(line2, line4); + q1_8x16 = _mm_unpackhi_epi32(line2, line4); + + line1 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 8 * src_strd)); + line2 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 9 * src_strd)); + line3 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 10 * src_strd)); + line4 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 11 * src_strd)); + line5 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 12 * src_strd)); + line6 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 13 * src_strd)); + line7 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 14 * src_strd)); + line8 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 15 * src_strd)); + + temp1 = _mm_unpacklo_epi8(line1, line2); + temp2 = _mm_unpacklo_epi8(line3, line4); + temp3 = _mm_unpacklo_epi8(line5, line6); + temp4 = _mm_unpacklo_epi8(line7, line8); + + line1 = _mm_unpacklo_epi16(temp1, temp2); + line2 = _mm_unpackhi_epi16(temp1, temp2); + line3 = _mm_unpacklo_epi16(temp3, temp4); + line4 = _mm_unpackhi_epi16(temp3, temp4); + + temp1 = _mm_unpacklo_epi32(line1, line3); + temp2 = _mm_unpackhi_epi32(line1, line3); + temp3 = _mm_unpacklo_epi32(line2, line4); + temp4 = _mm_unpackhi_epi32(line2, line4); + + p3_16x8 = _mm_unpacklo_epi64(p1_8x16, temp1); + p2_16x8 = _mm_unpackhi_epi64(p1_8x16, temp1); + q2_16x8 = _mm_unpacklo_epi64(q1_8x16, temp4); + q3_16x8 = _mm_unpackhi_epi64(q1_8x16, temp4); + p1_16x8 = _mm_unpacklo_epi64(p0_8x16, temp2); + p0_16x8 = _mm_unpackhi_epi64(p0_8x16, temp2); + q0_16x8 = _mm_unpacklo_epi64(q0_8x16, temp3); + q1_16x8 = _mm_unpackhi_epi64(q0_8x16, temp3); + + //Cond1 (ABS(p0 - q0) < alpha) + temp1 = _mm_subs_epu8(q0_16x8, p0_16x8); + temp2 = _mm_subs_epu8(p0_16x8, q0_16x8); + temp1 = _mm_add_epi8(temp1, temp2); + + temp2 = _mm_unpacklo_epi8(temp1, zero); + temp1 = _mm_unpackhi_epi8(temp1, zero); + + temp2 = _mm_cmpgt_epi16(Alpha_8x16, temp2); + temp1 = _mm_cmpgt_epi16(Alpha_8x16, temp1); + + flag1_16x8 = _mm_packs_epi16(temp2, temp1); + + //Cond2 (ABS(q1 - q0) < beta) + temp1 = _mm_subs_epu8(q0_16x8, q1_16x8); + temp2 = _mm_subs_epu8(q1_16x8, q0_16x8); + temp1 = _mm_add_epi8(temp1, temp2); + + temp2 = _mm_unpacklo_epi8(temp1, zero); + temp1 = _mm_unpackhi_epi8(temp1, zero); + + temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2); + temp1 = _mm_cmpgt_epi16(Beta_8x16, temp1); + + flag2_16x8 = _mm_packs_epi16(temp2, temp1); + + flag1_16x8 = _mm_and_si128(flag1_16x8, flag2_16x8); + + //Cond3 (ABS(p1 - p0) < beta) + temp1 = _mm_subs_epu8(p0_16x8, p1_16x8); + temp2 = _mm_subs_epu8(p1_16x8, p0_16x8); + temp1 = _mm_add_epi8(temp1, temp2); + + temp2 = _mm_unpacklo_epi8(temp1, zero); + temp1 = _mm_unpackhi_epi8(temp1, zero); + + temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2); + temp1 = _mm_cmpgt_epi16(Beta_8x16, temp1); + + flag2_16x8 = _mm_packs_epi16(temp2, temp1); + + // !((ABS(p0 - q0) < alpha) || (ABS(q1 - q0) < beta) || (ABS(p1 - p0) < beta)) + flag1_16x8 = _mm_and_si128(flag1_16x8, flag2_16x8); + + // (ABS(p0 - q0) < ((alpha >> 2) + 2)) + temp1 = _mm_subs_epu8(p0_16x8, q0_16x8); + temp2 = _mm_subs_epu8(q0_16x8, p0_16x8); + temp1 = _mm_add_epi8(temp1, temp2); + Alpha_8x16 = _mm_srai_epi16(Alpha_8x16, 2); + Alpha_8x16 = _mm_add_epi16(Alpha_8x16, const_val2_16x8); + + temp2 = _mm_unpacklo_epi8(temp1, zero); + temp1 = _mm_unpackhi_epi8(temp1, zero); + temp2 = _mm_cmpgt_epi16(Alpha_8x16, temp2); + temp1 = _mm_cmpgt_epi16(Alpha_8x16, temp1); + + flag2_16x8 = _mm_packs_epi16(temp2, temp1); + flag2_16x8 = _mm_and_si128(flag1_16x8, flag2_16x8); + + // (ABS(p2 - p0) < beta) + temp1 = _mm_subs_epu8(p0_16x8, p2_16x8); + temp2 = _mm_subs_epu8(p2_16x8, p0_16x8); + temp1 = _mm_add_epi8(temp1, temp2); + + temp2 = _mm_unpacklo_epi8(temp1, zero); + temp1 = _mm_unpackhi_epi8(temp1, zero); + temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2); + temp1 = _mm_cmpgt_epi16(Beta_8x16, temp1); + + flag3_16x8 = _mm_packs_epi16(temp2, temp1); + flag3_16x8 = _mm_and_si128(flag3_16x8, flag2_16x8); + + // (ABS(q2 - q0) < beta) + temp1 = _mm_subs_epu8(q0_16x8, q2_16x8); + temp2 = _mm_subs_epu8(q2_16x8, q0_16x8); + temp1 = _mm_add_epi8(temp1, temp2); + + temp2 = _mm_unpacklo_epi8(temp1, zero); + temp1 = _mm_unpackhi_epi8(temp1, zero); + temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2); + temp1 = _mm_cmpgt_epi16(Beta_8x16, temp1); + + flag4_16x8 = _mm_packs_epi16(temp2, temp1); + flag4_16x8 = _mm_and_si128(flag4_16x8, flag2_16x8); + + // First 8 pixels + p3_8x16 = _mm_unpacklo_epi8(p3_16x8, zero); + p2_8x16 = _mm_unpacklo_epi8(p2_16x8, zero); + p1_8x16 = _mm_unpacklo_epi8(p1_16x8, zero); + p0_8x16 = _mm_unpacklo_epi8(p0_16x8, zero); + q0_8x16 = _mm_unpacklo_epi8(q0_16x8, zero); + q1_8x16 = _mm_unpacklo_epi8(q1_16x8, zero); + q2_8x16 = _mm_unpacklo_epi8(q2_16x8, zero); + q3_8x16 = _mm_unpacklo_epi8(q3_16x8, zero); + + // p0_1 and q0_1 + temp1 = _mm_add_epi16(p0_8x16, q1_8x16); + temp2 = _mm_add_epi16(p1_8x16, q0_8x16); + temp5 = _mm_add_epi16(temp1, const_val2_16x8); + temp6 = _mm_add_epi16(temp2, const_val2_16x8); + temp3 = _mm_slli_epi16(p1_8x16, 1); + temp4 = _mm_slli_epi16(q1_8x16, 1); + temp1 = _mm_add_epi16(temp5, temp3); + temp2 = _mm_add_epi16(temp6, temp4); + p0_16x8_1 = _mm_srai_epi16(temp1, 2); + q0_16x8_1 = _mm_srai_epi16(temp2, 2); + + // p1_2 and q1_2 + temp6 = _mm_add_epi16(temp6, p0_8x16); + temp5 = _mm_add_epi16(temp5, q0_8x16); + temp1 = _mm_add_epi16(temp6, p2_8x16); + temp2 = _mm_add_epi16(temp5, q2_8x16); + p1_16x8_2 = _mm_srai_epi16(temp1, 2); + q1_16x8_2 = _mm_srai_epi16(temp2, 2); + + // p0_2 and q0_2 + temp1 = _mm_add_epi16(temp3, p2_8x16); + temp2 = _mm_add_epi16(temp4, q2_8x16); + temp1 = _mm_add_epi16(temp1, q1_8x16); + temp2 = _mm_add_epi16(temp2, p1_8x16); + temp3 = _mm_add_epi16(p0_8x16, q0_8x16); + temp3 = _mm_slli_epi16(temp3, 1); + temp1 = _mm_add_epi16(temp1, temp3); + temp2 = _mm_add_epi16(temp2, temp3); + temp1 = _mm_add_epi16(temp1, _mm_set1_epi16(4)); + temp2 = _mm_add_epi16(temp2, _mm_set1_epi16(4)); + p0_16x8_2 = _mm_srai_epi16(temp1, 3); + q0_16x8_2 = _mm_srai_epi16(temp2, 3); + + // p2_2 and q2_2 + temp1 = _mm_add_epi16(temp6, const_val2_16x8); + temp2 = _mm_add_epi16(temp5, const_val2_16x8); + temp3 = _mm_slli_epi16(p2_8x16, 1); + temp4 = _mm_slli_epi16(q2_8x16, 1); + temp3 = _mm_add_epi16(p2_8x16, temp3); + temp4 = _mm_add_epi16(q2_8x16, temp4); + temp5 = _mm_slli_epi16(p3_8x16, 1); + temp6 = _mm_slli_epi16(q3_8x16, 1); + temp1 = _mm_add_epi16(temp1, temp3); + temp2 = _mm_add_epi16(temp2, temp4); + temp1 = _mm_add_epi16(temp1, temp5); + temp2 = _mm_add_epi16(temp2, temp6); + p2_16x8_2 = _mm_srai_epi16(temp1, 3); + q2_16x8_2 = _mm_srai_epi16(temp2, 3); + + // Second 8 pixels and packing with first 8 pixels + p3_8x16 = _mm_unpackhi_epi8(p3_16x8, zero); + p2_8x16 = _mm_unpackhi_epi8(p2_16x8, zero); + p1_8x16 = _mm_unpackhi_epi8(p1_16x8, zero); + p0_8x16 = _mm_unpackhi_epi8(p0_16x8, zero); + q0_8x16 = _mm_unpackhi_epi8(q0_16x8, zero); + q1_8x16 = _mm_unpackhi_epi8(q1_16x8, zero); + q2_8x16 = _mm_unpackhi_epi8(q2_16x8, zero); + q3_8x16 = _mm_unpackhi_epi8(q3_16x8, zero); + + // p0_1 and q0_1 + temp1 = _mm_add_epi16(p0_8x16, q1_8x16); + temp2 = _mm_add_epi16(p1_8x16, q0_8x16); + temp5 = _mm_add_epi16(temp1, const_val2_16x8); + temp6 = _mm_add_epi16(temp2, const_val2_16x8); + temp3 = _mm_slli_epi16(p1_8x16, 1); + temp4 = _mm_slli_epi16(q1_8x16, 1); + temp1 = _mm_add_epi16(temp5, temp3); + temp2 = _mm_add_epi16(temp6, temp4); + temp1 = _mm_srai_epi16(temp1, 2); + temp2 = _mm_srai_epi16(temp2, 2); + p0_16x8_1 = _mm_packus_epi16(p0_16x8_1, temp1); + q0_16x8_1 = _mm_packus_epi16(q0_16x8_1, temp2); + + // p1_2 and q1_2 + temp6 = _mm_add_epi16(temp6, p0_8x16); + temp5 = _mm_add_epi16(temp5, q0_8x16); + temp1 = _mm_add_epi16(temp6, p2_8x16); + temp2 = _mm_add_epi16(temp5, q2_8x16); + temp1 = _mm_srai_epi16(temp1, 2); + temp2 = _mm_srai_epi16(temp2, 2); + p1_16x8_2 = _mm_packus_epi16(p1_16x8_2, temp1); + q1_16x8_2 = _mm_packus_epi16(q1_16x8_2, temp2); + + // p0_2 and q0_2 + temp1 = _mm_add_epi16(temp3, p2_8x16); + temp2 = _mm_add_epi16(temp4, q2_8x16); + temp1 = _mm_add_epi16(temp1, q1_8x16); + temp2 = _mm_add_epi16(temp2, p1_8x16); + temp3 = _mm_add_epi16(p0_8x16, q0_8x16); + temp3 = _mm_slli_epi16(temp3, 1); + temp1 = _mm_add_epi16(temp1, temp3); + temp2 = _mm_add_epi16(temp2, temp3); + temp1 = _mm_add_epi16(temp1, _mm_set1_epi16(4)); + temp2 = _mm_add_epi16(temp2, _mm_set1_epi16(4)); + temp1 = _mm_srai_epi16(temp1, 3); + temp2 = _mm_srai_epi16(temp2, 3); + p0_16x8_2 = _mm_packus_epi16(p0_16x8_2, temp1); + q0_16x8_2 = _mm_packus_epi16(q0_16x8_2, temp2); + + // p2_2 and q2_2 + temp1 = _mm_add_epi16(temp6, const_val2_16x8); + temp2 = _mm_add_epi16(temp5, const_val2_16x8); + temp3 = _mm_slli_epi16(p2_8x16, 1); + temp4 = _mm_slli_epi16(q2_8x16, 1); + temp3 = _mm_add_epi16(p2_8x16, temp3); + temp4 = _mm_add_epi16(q2_8x16, temp4); + temp5 = _mm_slli_epi16(p3_8x16, 1); + temp6 = _mm_slli_epi16(q3_8x16, 1); + temp1 = _mm_add_epi16(temp1, temp3); + temp2 = _mm_add_epi16(temp2, temp4); + temp1 = _mm_add_epi16(temp1, temp5); + temp2 = _mm_add_epi16(temp2, temp6); + temp1 = _mm_srai_epi16(temp1, 3); + temp2 = _mm_srai_epi16(temp2, 3); + p2_16x8_2 = _mm_packus_epi16(p2_16x8_2, temp1); + q2_16x8_2 = _mm_packus_epi16(q2_16x8_2, temp2); + + // p0 and q0 + p0_16x8 = _mm_and_si128(p0_16x8, + _mm_xor_si128(flag1_16x8, _mm_set1_epi8(0xFF))); + p0_16x8_1 = _mm_and_si128(p0_16x8_1, flag1_16x8); + p0_16x8 = _mm_add_epi8(p0_16x8, p0_16x8_1); + q0_16x8 = _mm_and_si128(q0_16x8, + _mm_xor_si128(flag1_16x8, _mm_set1_epi8(0xFF))); + q0_16x8_1 = _mm_and_si128(q0_16x8_1, flag1_16x8); + q0_16x8 = _mm_add_epi8(q0_16x8, q0_16x8_1); + + // p0 and q0 + p0_16x8 = _mm_and_si128(p0_16x8, + _mm_xor_si128(flag3_16x8, _mm_set1_epi8(0xFF))); + p0_16x8_2 = _mm_and_si128(p0_16x8_2, flag3_16x8); + p0_16x8 = _mm_add_epi8(p0_16x8, p0_16x8_2); + q0_16x8 = _mm_and_si128(q0_16x8, + _mm_xor_si128(flag4_16x8, _mm_set1_epi8(0xFF))); + q0_16x8_2 = _mm_and_si128(q0_16x8_2, flag4_16x8); + q0_16x8 = _mm_add_epi8(q0_16x8, q0_16x8_2); + + // p1 and q1 + p1_16x8 = _mm_and_si128(p1_16x8, + _mm_xor_si128(flag3_16x8, _mm_set1_epi8(0xFF))); + p1_16x8_2 = _mm_and_si128(p1_16x8_2, flag3_16x8); + p1_16x8 = _mm_add_epi8(p1_16x8, p1_16x8_2); + q1_16x8 = _mm_and_si128(q1_16x8, + _mm_xor_si128(flag4_16x8, _mm_set1_epi8(0xFF))); + q1_16x8_2 = _mm_and_si128(q1_16x8_2, flag4_16x8); + q1_16x8 = _mm_add_epi8(q1_16x8, q1_16x8_2); + + // p2 and q2 + p2_16x8 = _mm_and_si128(p2_16x8, + _mm_xor_si128(flag3_16x8, _mm_set1_epi8(0xFF))); + p2_16x8_2 = _mm_and_si128(p2_16x8_2, flag3_16x8); + p2_16x8 = _mm_add_epi8(p2_16x8, p2_16x8_2); + q2_16x8 = _mm_and_si128(q2_16x8, + _mm_xor_si128(flag4_16x8, _mm_set1_epi8(0xFF))); + q2_16x8_2 = _mm_and_si128(q2_16x8_2, flag4_16x8); + q2_16x8 = _mm_add_epi8(q2_16x8, q2_16x8_2); + + temp1 = _mm_unpacklo_epi8(p3_16x8, p2_16x8); + temp2 = _mm_unpacklo_epi8(p1_16x8, p0_16x8); + temp3 = _mm_unpacklo_epi8(q0_16x8, q1_16x8); + temp4 = _mm_unpacklo_epi8(q2_16x8, q3_16x8); + + p3_8x16 = _mm_unpacklo_epi16(temp1, temp2); + p2_8x16 = _mm_unpackhi_epi16(temp1, temp2); + q2_8x16 = _mm_unpacklo_epi16(temp3, temp4); + q3_8x16 = _mm_unpackhi_epi16(temp3, temp4); + + line1 = _mm_unpacklo_epi32(p3_8x16, q2_8x16); + line2 = _mm_srli_si128(line1, 8); + line3 = _mm_unpackhi_epi32(p3_8x16, q2_8x16); + line4 = _mm_srli_si128(line3, 8); + line5 = _mm_unpacklo_epi32(p2_8x16, q3_8x16); + line6 = _mm_srli_si128(line5, 8); + line7 = _mm_unpackhi_epi32(p2_8x16, q3_8x16); + line8 = _mm_srli_si128(line7, 8); + + _mm_storel_epi64((__m128i *)(pu1_src - 4 + 0 * src_strd), line1); + _mm_storel_epi64((__m128i *)(pu1_src - 4 + 1 * src_strd), line2); + _mm_storel_epi64((__m128i *)(pu1_src - 4 + 2 * src_strd), line3); + _mm_storel_epi64((__m128i *)(pu1_src - 4 + 3 * src_strd), line4); + _mm_storel_epi64((__m128i *)(pu1_src - 4 + 4 * src_strd), line5); + _mm_storel_epi64((__m128i *)(pu1_src - 4 + 5 * src_strd), line6); + _mm_storel_epi64((__m128i *)(pu1_src - 4 + 6 * src_strd), line7); + _mm_storel_epi64((__m128i *)(pu1_src - 4 + 7 * src_strd), line8); + + temp1 = _mm_unpackhi_epi8(p3_16x8, p2_16x8); + temp2 = _mm_unpackhi_epi8(p1_16x8, p0_16x8); + temp3 = _mm_unpackhi_epi8(q0_16x8, q1_16x8); + temp4 = _mm_unpackhi_epi8(q2_16x8, q3_16x8); + + p3_8x16 = _mm_unpacklo_epi16(temp1, temp2); + p2_8x16 = _mm_unpackhi_epi16(temp1, temp2); + q2_8x16 = _mm_unpacklo_epi16(temp3, temp4); + q3_8x16 = _mm_unpackhi_epi16(temp3, temp4); + + line1 = _mm_unpacklo_epi32(p3_8x16, q2_8x16); + line2 = _mm_srli_si128(line1, 8); + line3 = _mm_unpackhi_epi32(p3_8x16, q2_8x16); + line4 = _mm_srli_si128(line3, 8); + line5 = _mm_unpacklo_epi32(p2_8x16, q3_8x16); + line6 = _mm_srli_si128(line5, 8); + line7 = _mm_unpackhi_epi32(p2_8x16, q3_8x16); + line8 = _mm_srli_si128(line7, 8); + + _mm_storel_epi64((__m128i *)(pu1_src - 4 + 8 * src_strd), line1); + _mm_storel_epi64((__m128i *)(pu1_src - 4 + 9 * src_strd), line2); + _mm_storel_epi64((__m128i *)(pu1_src - 4 + 10 * src_strd), line3); + _mm_storel_epi64((__m128i *)(pu1_src - 4 + 11 * src_strd), line4); + _mm_storel_epi64((__m128i *)(pu1_src - 4 + 12 * src_strd), line5); + _mm_storel_epi64((__m128i *)(pu1_src - 4 + 13 * src_strd), line6); + _mm_storel_epi64((__m128i *)(pu1_src - 4 + 14 * src_strd), line7); + _mm_storel_epi64((__m128i *)(pu1_src - 4 + 15 * src_strd), line8); + +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_deblk_luma_horz_bs4_ssse3() */ +/* */ +/* Description : This function performs filtering of a luma block */ +/* horizontal edge when the boundary strength is set to 4. */ +/* */ +/* Inputs : pu1_src - pointer to the src sample q0 */ +/* src_strd - source stride */ +/* alpha - alpha value for the boundary */ +/* beta - beta value for the boundary */ +/* */ +/* Globals : None */ +/* */ +/* Processing : This operation is described in Sec. 8.7.2.4 under the */ +/* title "Filtering process for edges for bS equal to 4" in */ +/* ITU T Rec H.264. */ +/* */ +/* Outputs : None */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 12 02 2015 Naveen Kumar P Initial version */ +/* */ +/*****************************************************************************/ +void ih264_deblk_luma_horz_bs4_ssse3(UWORD8 *pu1_src, + WORD32 src_strd, + WORD32 alpha, + WORD32 beta) +{ + WORD16 i16_posP3, i16_posP2, i16_posP1, i16_posP0; + WORD16 i16_posQ1, i16_posQ2, i16_posQ3; + UWORD8 *pu1_HorzPixel; + __m128i zero = _mm_setzero_si128(); + __m128i q0_16x8, q1_16x8, q2_16x8, q3_16x8; + __m128i p0_16x8, p1_16x8, p2_16x8, p3_16x8; + __m128i q0_8x16, q1_8x16, q2_8x16, q3_8x16; + __m128i p0_8x16, p1_8x16, p2_8x16, p3_8x16; + __m128i q0_16x8_1; + __m128i p0_16x8_1; + __m128i q0_16x8_2, q1_16x8_2, q2_16x8_2; + __m128i p0_16x8_2, p1_16x8_2, p2_16x8_2; + __m128i temp1, temp2, temp3, temp4, temp5, temp6; + __m128i Alpha_8x16, Beta_8x16; + __m128i flag1_16x8, flag2_16x8, flag3_16x8, flag4_16x8; + __m128i const_val2_16x8 = _mm_set1_epi16(2); + + pu1_HorzPixel = pu1_src - (src_strd << 2); + + i16_posQ1 = src_strd; + i16_posQ2 = X2(src_strd); + i16_posQ3 = X3(src_strd); + i16_posP0 = X3(src_strd); + i16_posP1 = X2(src_strd); + i16_posP2 = src_strd; + i16_posP3 = 0; + + Alpha_8x16 = _mm_set1_epi16(alpha); + Beta_8x16 = _mm_set1_epi16(beta); + + p3_16x8 = _mm_loadu_si128((__m128i *)(pu1_HorzPixel + i16_posP3)); + p2_16x8 = _mm_loadu_si128((__m128i *)(pu1_HorzPixel + i16_posP2)); + p1_16x8 = _mm_loadu_si128((__m128i *)(pu1_HorzPixel + i16_posP1)); + p0_16x8 = _mm_loadu_si128((__m128i *)(pu1_HorzPixel + i16_posP0)); + q0_16x8 = _mm_loadu_si128((__m128i *)(pu1_src)); + q1_16x8 = _mm_loadu_si128((__m128i *)(pu1_src + i16_posQ1)); + q2_16x8 = _mm_loadu_si128((__m128i *)(pu1_src + i16_posQ2)); + q3_16x8 = _mm_loadu_si128((__m128i *)(pu1_src + i16_posQ3)); + + //Cond1 (ABS(p0 - q0) < alpha) + temp1 = _mm_subs_epu8(q0_16x8, p0_16x8); + temp2 = _mm_subs_epu8(p0_16x8, q0_16x8); + temp1 = _mm_add_epi8(temp1, temp2); + + temp2 = _mm_unpacklo_epi8(temp1, zero); + temp1 = _mm_unpackhi_epi8(temp1, zero); + + temp2 = _mm_cmpgt_epi16(Alpha_8x16, temp2); + temp1 = _mm_cmpgt_epi16(Alpha_8x16, temp1); + + flag1_16x8 = _mm_packs_epi16(temp2, temp1); + + //Cond2 (ABS(q1 - q0) < beta) + temp1 = _mm_subs_epu8(q0_16x8, q1_16x8); + temp2 = _mm_subs_epu8(q1_16x8, q0_16x8); + temp1 = _mm_add_epi8(temp1, temp2); + + temp2 = _mm_unpacklo_epi8(temp1, zero); + temp1 = _mm_unpackhi_epi8(temp1, zero); + + temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2); + temp1 = _mm_cmpgt_epi16(Beta_8x16, temp1); + + flag2_16x8 = _mm_packs_epi16(temp2, temp1); + + flag1_16x8 = _mm_and_si128(flag1_16x8, flag2_16x8); + + //Cond3 (ABS(p1 - p0) < beta) + temp1 = _mm_subs_epu8(p0_16x8, p1_16x8); + temp2 = _mm_subs_epu8(p1_16x8, p0_16x8); + temp1 = _mm_add_epi8(temp1, temp2); + + temp2 = _mm_unpacklo_epi8(temp1, zero); + temp1 = _mm_unpackhi_epi8(temp1, zero); + + temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2); + temp1 = _mm_cmpgt_epi16(Beta_8x16, temp1); + + flag2_16x8 = _mm_packs_epi16(temp2, temp1); + + // !((ABS(p0 - q0) < alpha) || (ABS(q1 - q0) < beta) || (ABS(p1 - p0) < beta)) + flag1_16x8 = _mm_and_si128(flag1_16x8, flag2_16x8); + + // (ABS(p0 - q0) < ((alpha >> 2) + 2)) + temp1 = _mm_subs_epu8(p0_16x8, q0_16x8); + temp2 = _mm_subs_epu8(q0_16x8, p0_16x8); + temp1 = _mm_add_epi8(temp1, temp2); + Alpha_8x16 = _mm_srai_epi16(Alpha_8x16, 2); + Alpha_8x16 = _mm_add_epi16(Alpha_8x16, const_val2_16x8); + + temp2 = _mm_unpacklo_epi8(temp1, zero); + temp1 = _mm_unpackhi_epi8(temp1, zero); + temp2 = _mm_cmpgt_epi16(Alpha_8x16, temp2); + temp1 = _mm_cmpgt_epi16(Alpha_8x16, temp1); + + flag2_16x8 = _mm_packs_epi16(temp2, temp1); + flag2_16x8 = _mm_and_si128(flag1_16x8, flag2_16x8); + + // (ABS(p2 - p0) < beta) + temp1 = _mm_subs_epu8(p0_16x8, p2_16x8); + temp2 = _mm_subs_epu8(p2_16x8, p0_16x8); + temp1 = _mm_add_epi8(temp1, temp2); + + temp2 = _mm_unpacklo_epi8(temp1, zero); + temp1 = _mm_unpackhi_epi8(temp1, zero); + temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2); + temp1 = _mm_cmpgt_epi16(Beta_8x16, temp1); + + flag3_16x8 = _mm_packs_epi16(temp2, temp1); + flag3_16x8 = _mm_and_si128(flag3_16x8, flag2_16x8); + + // (ABS(q2 - q0) < beta) + temp1 = _mm_subs_epu8(q0_16x8, q2_16x8); + temp2 = _mm_subs_epu8(q2_16x8, q0_16x8); + temp1 = _mm_add_epi8(temp1, temp2); + + temp2 = _mm_unpacklo_epi8(temp1, zero); + temp1 = _mm_unpackhi_epi8(temp1, zero); + temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2); + temp1 = _mm_cmpgt_epi16(Beta_8x16, temp1); + + flag4_16x8 = _mm_packs_epi16(temp2, temp1); + flag4_16x8 = _mm_and_si128(flag4_16x8, flag2_16x8); + + // First 8 pixels + p3_8x16 = _mm_unpacklo_epi8(p3_16x8, zero); + p2_8x16 = _mm_unpacklo_epi8(p2_16x8, zero); + p1_8x16 = _mm_unpacklo_epi8(p1_16x8, zero); + p0_8x16 = _mm_unpacklo_epi8(p0_16x8, zero); + q0_8x16 = _mm_unpacklo_epi8(q0_16x8, zero); + q1_8x16 = _mm_unpacklo_epi8(q1_16x8, zero); + q2_8x16 = _mm_unpacklo_epi8(q2_16x8, zero); + q3_8x16 = _mm_unpacklo_epi8(q3_16x8, zero); + + // p0_1 and q0_1 + temp1 = _mm_add_epi16(p0_8x16, q1_8x16); + temp2 = _mm_add_epi16(p1_8x16, q0_8x16); + temp5 = _mm_add_epi16(temp1, const_val2_16x8); + temp6 = _mm_add_epi16(temp2, const_val2_16x8); + temp3 = _mm_slli_epi16(p1_8x16, 1); + temp4 = _mm_slli_epi16(q1_8x16, 1); + temp1 = _mm_add_epi16(temp5, temp3); + temp2 = _mm_add_epi16(temp6, temp4); + p0_16x8_1 = _mm_srai_epi16(temp1, 2); + q0_16x8_1 = _mm_srai_epi16(temp2, 2); + + // p1_2 and q1_2 + temp6 = _mm_add_epi16(temp6, p0_8x16); + temp5 = _mm_add_epi16(temp5, q0_8x16); + temp1 = _mm_add_epi16(temp6, p2_8x16); + temp2 = _mm_add_epi16(temp5, q2_8x16); + p1_16x8_2 = _mm_srai_epi16(temp1, 2); + q1_16x8_2 = _mm_srai_epi16(temp2, 2); + + // p0_2 and q0_2 + temp1 = _mm_add_epi16(temp3, p2_8x16); + temp2 = _mm_add_epi16(temp4, q2_8x16); + temp1 = _mm_add_epi16(temp1, q1_8x16); + temp2 = _mm_add_epi16(temp2, p1_8x16); + temp3 = _mm_add_epi16(p0_8x16, q0_8x16); + temp3 = _mm_slli_epi16(temp3, 1); + temp1 = _mm_add_epi16(temp1, temp3); + temp2 = _mm_add_epi16(temp2, temp3); + temp1 = _mm_add_epi16(temp1, _mm_set1_epi16(4)); + temp2 = _mm_add_epi16(temp2, _mm_set1_epi16(4)); + p0_16x8_2 = _mm_srai_epi16(temp1, 3); + q0_16x8_2 = _mm_srai_epi16(temp2, 3); + + // p2_2 and q2_2 + temp1 = _mm_add_epi16(temp6, const_val2_16x8); + temp2 = _mm_add_epi16(temp5, const_val2_16x8); + temp3 = _mm_slli_epi16(p2_8x16, 1); + temp4 = _mm_slli_epi16(q2_8x16, 1); + temp3 = _mm_add_epi16(p2_8x16, temp3); + temp4 = _mm_add_epi16(q2_8x16, temp4); + temp5 = _mm_slli_epi16(p3_8x16, 1); + temp6 = _mm_slli_epi16(q3_8x16, 1); + temp1 = _mm_add_epi16(temp1, temp3); + temp2 = _mm_add_epi16(temp2, temp4); + temp1 = _mm_add_epi16(temp1, temp5); + temp2 = _mm_add_epi16(temp2, temp6); + p2_16x8_2 = _mm_srai_epi16(temp1, 3); + q2_16x8_2 = _mm_srai_epi16(temp2, 3); + + // Second 8 pixels and packing with first 8 pixels + p3_8x16 = _mm_unpackhi_epi8(p3_16x8, zero); + p2_8x16 = _mm_unpackhi_epi8(p2_16x8, zero); + p1_8x16 = _mm_unpackhi_epi8(p1_16x8, zero); + p0_8x16 = _mm_unpackhi_epi8(p0_16x8, zero); + q0_8x16 = _mm_unpackhi_epi8(q0_16x8, zero); + q1_8x16 = _mm_unpackhi_epi8(q1_16x8, zero); + q2_8x16 = _mm_unpackhi_epi8(q2_16x8, zero); + q3_8x16 = _mm_unpackhi_epi8(q3_16x8, zero); + + // p0_1 and q0_1 + temp1 = _mm_add_epi16(p0_8x16, q1_8x16); + temp2 = _mm_add_epi16(p1_8x16, q0_8x16); + temp5 = _mm_add_epi16(temp1, const_val2_16x8); + temp6 = _mm_add_epi16(temp2, const_val2_16x8); + temp3 = _mm_slli_epi16(p1_8x16, 1); + temp4 = _mm_slli_epi16(q1_8x16, 1); + temp1 = _mm_add_epi16(temp5, temp3); + temp2 = _mm_add_epi16(temp6, temp4); + temp1 = _mm_srai_epi16(temp1, 2); + temp2 = _mm_srai_epi16(temp2, 2); + p0_16x8_1 = _mm_packus_epi16(p0_16x8_1, temp1); + q0_16x8_1 = _mm_packus_epi16(q0_16x8_1, temp2); + + // p1_2 and q1_2 + temp6 = _mm_add_epi16(temp6, p0_8x16); + temp5 = _mm_add_epi16(temp5, q0_8x16); + temp1 = _mm_add_epi16(temp6, p2_8x16); + temp2 = _mm_add_epi16(temp5, q2_8x16); + temp1 = _mm_srai_epi16(temp1, 2); + temp2 = _mm_srai_epi16(temp2, 2); + p1_16x8_2 = _mm_packus_epi16(p1_16x8_2, temp1); + q1_16x8_2 = _mm_packus_epi16(q1_16x8_2, temp2); + + // p0_2 and q0_2 + temp1 = _mm_add_epi16(temp3, p2_8x16); + temp2 = _mm_add_epi16(temp4, q2_8x16); + temp1 = _mm_add_epi16(temp1, q1_8x16); + temp2 = _mm_add_epi16(temp2, p1_8x16); + temp3 = _mm_add_epi16(p0_8x16, q0_8x16); + temp3 = _mm_slli_epi16(temp3, 1); + temp1 = _mm_add_epi16(temp1, temp3); + temp2 = _mm_add_epi16(temp2, temp3); + temp1 = _mm_add_epi16(temp1, _mm_set1_epi16(4)); + temp2 = _mm_add_epi16(temp2, _mm_set1_epi16(4)); + temp1 = _mm_srai_epi16(temp1, 3); + temp2 = _mm_srai_epi16(temp2, 3); + p0_16x8_2 = _mm_packus_epi16(p0_16x8_2, temp1); + q0_16x8_2 = _mm_packus_epi16(q0_16x8_2, temp2); + + // p2_2 and q2_2 + temp1 = _mm_add_epi16(temp6, const_val2_16x8); + temp2 = _mm_add_epi16(temp5, const_val2_16x8); + temp3 = _mm_slli_epi16(p2_8x16, 1); + temp4 = _mm_slli_epi16(q2_8x16, 1); + temp3 = _mm_add_epi16(p2_8x16, temp3); + temp4 = _mm_add_epi16(q2_8x16, temp4); + temp5 = _mm_slli_epi16(p3_8x16, 1); + temp6 = _mm_slli_epi16(q3_8x16, 1); + temp1 = _mm_add_epi16(temp1, temp3); + temp2 = _mm_add_epi16(temp2, temp4); + temp1 = _mm_add_epi16(temp1, temp5); + temp2 = _mm_add_epi16(temp2, temp6); + temp1 = _mm_srai_epi16(temp1, 3); + temp2 = _mm_srai_epi16(temp2, 3); + p2_16x8_2 = _mm_packus_epi16(p2_16x8_2, temp1); + q2_16x8_2 = _mm_packus_epi16(q2_16x8_2, temp2); + + // p0 and q0 + p0_16x8 = _mm_and_si128(p0_16x8, + _mm_xor_si128(flag1_16x8, _mm_set1_epi8(0xFF))); + p0_16x8_1 = _mm_and_si128(p0_16x8_1, flag1_16x8); + p0_16x8 = _mm_add_epi8(p0_16x8, p0_16x8_1); + q0_16x8 = _mm_and_si128(q0_16x8, + _mm_xor_si128(flag1_16x8, _mm_set1_epi8(0xFF))); + q0_16x8_1 = _mm_and_si128(q0_16x8_1, flag1_16x8); + q0_16x8 = _mm_add_epi8(q0_16x8, q0_16x8_1); + + // p0 and q0 + p0_16x8 = _mm_and_si128(p0_16x8, + _mm_xor_si128(flag3_16x8, _mm_set1_epi8(0xFF))); + p0_16x8_2 = _mm_and_si128(p0_16x8_2, flag3_16x8); + p0_16x8 = _mm_add_epi8(p0_16x8, p0_16x8_2); + q0_16x8 = _mm_and_si128(q0_16x8, + _mm_xor_si128(flag4_16x8, _mm_set1_epi8(0xFF))); + q0_16x8_2 = _mm_and_si128(q0_16x8_2, flag4_16x8); + q0_16x8 = _mm_add_epi8(q0_16x8, q0_16x8_2); + + // p1 and q1 + p1_16x8 = _mm_and_si128(p1_16x8, + _mm_xor_si128(flag3_16x8, _mm_set1_epi8(0xFF))); + p1_16x8_2 = _mm_and_si128(p1_16x8_2, flag3_16x8); + p1_16x8 = _mm_add_epi8(p1_16x8, p1_16x8_2); + q1_16x8 = _mm_and_si128(q1_16x8, + _mm_xor_si128(flag4_16x8, _mm_set1_epi8(0xFF))); + q1_16x8_2 = _mm_and_si128(q1_16x8_2, flag4_16x8); + q1_16x8 = _mm_add_epi8(q1_16x8, q1_16x8_2); + + // p2 and q2 + p2_16x8 = _mm_and_si128(p2_16x8, + _mm_xor_si128(flag3_16x8, _mm_set1_epi8(0xFF))); + p2_16x8_2 = _mm_and_si128(p2_16x8_2, flag3_16x8); + p2_16x8 = _mm_add_epi8(p2_16x8, p2_16x8_2); + q2_16x8 = _mm_and_si128(q2_16x8, + _mm_xor_si128(flag4_16x8, _mm_set1_epi8(0xFF))); + q2_16x8_2 = _mm_and_si128(q2_16x8_2, flag4_16x8); + q2_16x8 = _mm_add_epi8(q2_16x8, q2_16x8_2); + + _mm_storeu_si128((__m128i *)(pu1_HorzPixel + i16_posP2), p2_16x8); + _mm_storeu_si128((__m128i *)(pu1_HorzPixel + i16_posP1), p1_16x8); + _mm_storeu_si128((__m128i *)(pu1_HorzPixel + i16_posP0), p0_16x8); + + _mm_storeu_si128((__m128i *)(pu1_src), q0_16x8); + _mm_storeu_si128((__m128i *)(pu1_src + i16_posQ1), q1_16x8); + _mm_storeu_si128((__m128i *)(pu1_src + i16_posQ2), q2_16x8); + +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_deblk_luma_vert_bslt4_ssse3() */ +/* */ +/* Description : This function performs filtering of a luma block */ +/* vertical edge when the boundary strength is less than 4. */ +/* */ +/* Inputs : pu1_src - pointer to the src sample q0 */ +/* src_strd - source stride */ +/* alpha - alpha value for the boundary */ +/* beta - beta value for the boundary */ +/* u4_bs - packed Boundary strength array */ +/* pu1_cliptab - tc0_table */ +/* */ +/* Globals : None */ +/* */ +/* Processing : This operation is described in Sec. 8.7.2.3 under the */ +/* title "Filtering process for edges for bS less than 4" */ +/* in ITU T Rec H.264. */ +/* */ +/* Outputs : None */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 12 02 2015 Naveen Kumar P Initial version */ +/* */ +/*****************************************************************************/ +void ih264_deblk_luma_vert_bslt4_ssse3(UWORD8 *pu1_src, + WORD32 src_strd, + WORD32 alpha, + WORD32 beta, + UWORD32 u4_bs, + const UWORD8 *pu1_cliptab) +{ + UWORD8 u1_Bs, u1_Bs1; + + UWORD32 j = 0; + + __m128i linea, lineb, linec, lined, linee, linef, lineg, lineh; + __m128i int1, int2, int3, int4, high1, high2; + __m128i flag, flag1, i_C, i_C0; + __m128i i_Ap, i_Aq, diff, const1, const2, in_macro, in_macrotemp, temp, + temp1; + __m128i zero = _mm_setzero_si128(); + + for(j = 0; j <= 8 * src_strd; j += 8 * src_strd) + { + //Transpose + linea = _mm_loadl_epi64((__m128i *)(pu1_src - 3 + j)); + lineb = _mm_loadl_epi64((__m128i *)(pu1_src - 3 + src_strd + j)); + linec = _mm_loadl_epi64((__m128i *)(pu1_src - 3 + 2 * src_strd + j)); + lined = _mm_loadl_epi64((__m128i *)(pu1_src - 3 + 3 * src_strd + j)); + + linea = _mm_unpacklo_epi8(linea, zero); + lineb = _mm_unpacklo_epi8(lineb, zero); + linec = _mm_unpacklo_epi8(linec, zero); + lined = _mm_unpacklo_epi8(lined, zero); + + int1 = _mm_unpacklo_epi16(linea, lineb); + lineb = _mm_unpackhi_epi16(linea, lineb); + + int2 = _mm_unpacklo_epi16(linec, lined); + lined = _mm_unpackhi_epi16(linec, lined); + + linea = _mm_unpacklo_epi16(int1, int2); + int1 = _mm_unpackhi_epi16(int1, int2); + + linec = _mm_unpacklo_epi16(lineb, lined); + high1 = _mm_unpackhi_epi16(lineb, lined); + + linee = _mm_loadl_epi64((__m128i *)(pu1_src - 3 + 4 * src_strd + j)); + linef = _mm_loadl_epi64((__m128i *)(pu1_src - 3 + 5 * src_strd + j)); + lineg = _mm_loadl_epi64((__m128i *)(pu1_src - 3 + 6 * src_strd + j)); + lineh = _mm_loadl_epi64((__m128i *)(pu1_src - 3 + 7 * src_strd + j)); + + linee = _mm_unpacklo_epi8(linee, zero); + linef = _mm_unpacklo_epi8(linef, zero); + lineg = _mm_unpacklo_epi8(lineg, zero); + lineh = _mm_unpacklo_epi8(lineh, zero); + + int2 = _mm_unpacklo_epi16(linee, linef); + linef = _mm_unpackhi_epi16(linee, linef); + + int3 = _mm_unpacklo_epi16(lineg, lineh); + lineh = _mm_unpackhi_epi16(lineg, lineh); + + linee = _mm_unpacklo_epi16(int2, int3); + int2 = _mm_unpackhi_epi16(int2, int3); + + lineg = _mm_unpacklo_epi16(linef, lineh); + high2 = _mm_unpackhi_epi16(linef, lineh); + + int4 = _mm_unpacklo_epi16(linea, linee); + lineb = _mm_unpackhi_epi16(linea, linee); + + int3 = _mm_unpacklo_epi16(int1, int2); + lined = _mm_unpackhi_epi16(int1, int2); + + int2 = _mm_unpacklo_epi16(linec, lineg); + linef = _mm_unpackhi_epi16(linec, lineg); + + linea = int4; + linec = int3; + linee = int2; + + lineg = _mm_unpacklo_epi16(high1, high2); + lineh = _mm_unpackhi_epi16(high1, high2); + + //end of transpose + + u1_Bs = (u4_bs >> 24) & 0xff; + u1_Bs1 = (u4_bs >> 16) & 0xff; + u4_bs <<= 16; + + flag1 = _mm_set_epi16(u1_Bs1, u1_Bs, u1_Bs1, u1_Bs, u1_Bs1, u1_Bs, + u1_Bs1, u1_Bs); + flag1 = _mm_cmpeq_epi16(flag1, zero); //Set flag to 1s and 0s + flag1 = _mm_xor_si128(flag1, _mm_set1_epi16(0xFFFF)); //Invert for required mask + + i_C0 = _mm_set_epi16(pu1_cliptab[u1_Bs1], pu1_cliptab[u1_Bs], + pu1_cliptab[u1_Bs1], pu1_cliptab[u1_Bs], + pu1_cliptab[u1_Bs1], pu1_cliptab[u1_Bs], + pu1_cliptab[u1_Bs1], pu1_cliptab[u1_Bs]); + + diff = _mm_subs_epi16(linec, lined); //Condn 1 + diff = _mm_abs_epi16(diff); + const1 = _mm_set1_epi16(alpha); + flag = _mm_cmpgt_epi16(const1, diff); + + diff = _mm_subs_epi16(linee, lined); //Condtn 2 + diff = _mm_abs_epi16(diff); + const1 = _mm_set1_epi16(beta); + flag = _mm_and_si128(flag, _mm_cmpgt_epi16(const1, diff)); + + diff = _mm_subs_epi16(lineb, linec); //Condtn 3 + diff = _mm_abs_epi16(diff); + flag = _mm_and_si128(flag, _mm_cmpgt_epi16(const1, diff)); //Const 1= Beta from now on + + flag = _mm_and_si128(flag, flag1); //Final flag (ui_B condition + other 3 conditions) + + //Adding Ap<Beta and Aq<Beta + i_Ap = _mm_subs_epi16(linea, linec); + i_Ap = _mm_abs_epi16(i_Ap); + const2 = _mm_cmpgt_epi16(const1, i_Ap); + const2 = _mm_subs_epi16(zero, const2); //Make FFFF=1 and 0000=0 + i_C = _mm_add_epi16(i_C0, const2); + + i_Aq = _mm_subs_epi16(linef, lined); + i_Aq = _mm_abs_epi16(i_Aq); + const2 = _mm_cmpgt_epi16(const1, i_Aq); + const2 = _mm_subs_epi16(zero, const2); + i_C = _mm_add_epi16(i_C, const2); + + //Calculate in_macro + diff = _mm_subs_epi16(lined, linec); + diff = _mm_slli_epi16(diff, 2); + const2 = _mm_subs_epi16(lineb, linee); + diff = _mm_add_epi16(diff, const2); + const2 = _mm_set1_epi16(4); + diff = _mm_add_epi16(diff, const2); + in_macro = _mm_srai_epi16(diff, 3); + + in_macro = _mm_min_epi16(i_C, in_macro); //CLIP3 + i_C = _mm_subs_epi16(zero, i_C); + in_macro = _mm_max_epi16(i_C, in_macro); + + //Compute and store + in_macrotemp = _mm_add_epi16(linec, in_macro); + in_macrotemp = _mm_and_si128(in_macrotemp, flag); + temp = _mm_and_si128(linec, + _mm_xor_si128(flag, _mm_set1_epi16(0xFFFF))); + temp = _mm_add_epi16(temp, in_macrotemp); + //temp= _mm_packus_epi16 (temp, zero); + //_mm_storel_epi64(uc_HorzPixel+i16_posP0+i, in_macrotemp); + + in_macrotemp = _mm_subs_epi16(lined, in_macro); + in_macrotemp = _mm_and_si128(in_macrotemp, flag); + temp1 = _mm_and_si128(lined, + _mm_xor_si128(flag, _mm_set1_epi16(0xFFFF))); + temp1 = _mm_add_epi16(temp1, in_macrotemp); + //temp1= _mm_packus_epi16 (temp1, zero); + //_mm_storel_epi64(pu1_src+i, in_macrotemp); + + //If Ap<Beta + flag1 = _mm_cmpgt_epi16(const1, i_Ap); + flag1 = _mm_and_si128(flag, flag1); + in_macrotemp = _mm_add_epi16(linec, lined); + in_macrotemp = _mm_add_epi16(in_macrotemp, _mm_set1_epi16(1)); + in_macrotemp = _mm_srai_epi16(in_macrotemp, 1); + in_macro = _mm_add_epi16(in_macrotemp, linea); + in_macro = _mm_subs_epi16(in_macro, _mm_slli_epi16(lineb, 1)); + in_macro = _mm_srai_epi16(in_macro, 1); + + in_macro = _mm_min_epi16(i_C0, in_macro); //CLIP3 + i_C0 = _mm_subs_epi16(zero, i_C0); + in_macro = _mm_max_epi16(i_C0, in_macro); + + in_macro = _mm_and_si128(in_macro, flag1); + lineb = _mm_add_epi16(lineb, in_macro); + //in_macro= _mm_packus_epi16 (i_p1, zero); + //_mm_storel_epi64(uc_HorzPixel+i16_posP1+i, in_macro); + + flag1 = _mm_cmpgt_epi16(const1, i_Aq); + flag1 = _mm_and_si128(flag, flag1); + in_macro = _mm_add_epi16(in_macrotemp, linef); + in_macro = _mm_subs_epi16(in_macro, _mm_slli_epi16(linee, 1)); + in_macro = _mm_srai_epi16(in_macro, 1); + + i_C0 = _mm_abs_epi16(i_C0); + in_macro = _mm_min_epi16(i_C0, in_macro); //CLIP3 + i_C0 = _mm_subs_epi16(zero, i_C0); + in_macro = _mm_max_epi16(i_C0, in_macro); + + in_macro = _mm_and_si128(in_macro, flag1); + linee = _mm_add_epi16(linee, in_macro); + //in_macro= _mm_packus_epi16 (i_q1, zero); + //_mm_storel_epi64(pu1_src+i16_posQ1+i, in_macro); + linec = temp; + lined = temp1; + //End of filtering + + int1 = _mm_unpacklo_epi16(linea, linee); + linee = _mm_unpackhi_epi16(linea, linee); + + int2 = _mm_unpacklo_epi16(linec, lineg); + lineg = _mm_unpackhi_epi16(linec, lineg); + + linea = _mm_unpacklo_epi16(int1, int2); + int3 = _mm_unpackhi_epi16(int1, int2); + + linec = _mm_unpacklo_epi16(linee, lineg); + lineg = _mm_unpackhi_epi16(linee, lineg); + + int1 = _mm_unpacklo_epi16(lineb, linef); + linef = _mm_unpackhi_epi16(lineb, linef); + + int2 = _mm_unpacklo_epi16(lined, lineh); + lineh = _mm_unpackhi_epi16(lined, lineh); + + lineb = _mm_unpacklo_epi16(int1, int2); + int4 = _mm_unpackhi_epi16(int1, int2); + + lined = _mm_unpacklo_epi16(linef, lineh); + lineh = _mm_unpackhi_epi16(linef, lineh); + + int1 = _mm_unpackhi_epi16(linea, lineb); + linea = _mm_unpacklo_epi16(linea, lineb); + + int2 = _mm_unpacklo_epi16(int3, int4); + high1 = _mm_unpackhi_epi16(int3, int4); + + lineb = _mm_unpacklo_epi16(linec, lined); + linef = _mm_unpackhi_epi16(linec, lined); + + lined = _mm_unpacklo_epi16(lineg, lineh); + lineh = _mm_unpackhi_epi16(lineg, lineh); + + linee = int1; + lineg = high1; + linec = int2; + //End of inverse transpose + + //Packs and stores + linea = _mm_packus_epi16(linea, zero); + _mm_storel_epi64((__m128i *)(pu1_src - 3 + j), linea); + + lineb = _mm_packus_epi16(lineb, zero); + _mm_storel_epi64((__m128i *)(pu1_src - 3 + src_strd + j), lineb); + + linec = _mm_packus_epi16(linec, zero); + _mm_storel_epi64((__m128i *)(pu1_src - 3 + 2 * src_strd + j), linec); + + lined = _mm_packus_epi16(lined, zero); + _mm_storel_epi64((__m128i *)(pu1_src - 3 + 3 * src_strd + j), lined); + + linee = _mm_packus_epi16(linee, zero); + _mm_storel_epi64((__m128i *)(pu1_src - 3 + 4 * src_strd + j), linee); + + linef = _mm_packus_epi16(linef, zero); + _mm_storel_epi64((__m128i *)(pu1_src - 3 + 5 * src_strd + j), linef); + + lineg = _mm_packus_epi16(lineg, zero); + _mm_storel_epi64((__m128i *)(pu1_src - 3 + 6 * src_strd + j), lineg); + + lineh = _mm_packus_epi16(lineh, zero); + _mm_storel_epi64((__m128i *)(pu1_src - 3 + 7 * src_strd + j), lineh); + + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_deblk_luma_horz_bslt4_ssse3() */ +/* */ +/* Description : This function performs filtering of a luma block */ +/* horizontal edge when boundary strength is less than 4. */ +/* */ +/* Inputs : pu1_src - pointer to the src sample q0 */ +/* src_strd - source stride */ +/* alpha - alpha value for the boundary */ +/* beta - beta value for the boundary */ +/* u4_bs - packed Boundary strength array */ +/* pu1_cliptab - tc0_table */ +/* */ +/* Globals : None */ +/* */ +/* Processing : This operation is described in Sec. 8.7.2.3 under the */ +/* title "Filtering process for edges for bS less than 4" */ +/* in ITU T Rec H.264. */ +/* */ +/* Outputs : None */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 12 02 2015 Naveen Kumar P Initial version */ +/* */ +/*****************************************************************************/ +void ih264_deblk_luma_horz_bslt4_ssse3(UWORD8 *pu1_src, + WORD32 src_strd, + WORD32 alpha, + WORD32 beta, + UWORD32 u4_bs, + const UWORD8 *pu1_cliptab) +{ + WORD16 i16_posP2, i16_posP1, i16_posP0, i16_posQ1, i16_posQ2; + UWORD8 *pu1_HorzPixel; + __m128i zero = _mm_setzero_si128(); + __m128i bs_flag_16x8b, C0_16x8, C0_8x16, C0_hi_8x16, C_8x16, C_hi_8x16; + __m128i q0_16x8, q1_16x8, q2_16x8, p0_16x8, p1_16x8, p2_16x8; + __m128i temp1, temp2; + __m128i Alpha_8x16, Beta_8x16, flag1_16x8, flag2_16x8, flag3_16x8; + __m128i in_macro_16x8, in_macro_hi_16x8; + __m128i const_val4_8x16; + UWORD8 u1_Bs0, u1_Bs1, u1_Bs2, u1_Bs3; + UWORD8 clip0, clip1, clip2, clip3; + + pu1_HorzPixel = pu1_src - (src_strd << 2); + + i16_posQ1 = src_strd; + i16_posQ2 = X2(src_strd); + i16_posP0 = X3(src_strd); + i16_posP1 = X2(src_strd); + i16_posP2 = src_strd; + + q0_16x8 = _mm_loadu_si128((__m128i *)(pu1_src)); + q1_16x8 = _mm_loadu_si128((__m128i *)(pu1_src + i16_posQ1)); + + u1_Bs0 = (u4_bs >> 24) & 0xff; + u1_Bs1 = (u4_bs >> 16) & 0xff; + u1_Bs2 = (u4_bs >> 8) & 0xff; + u1_Bs3 = (u4_bs >> 0) & 0xff; + clip0 = pu1_cliptab[u1_Bs0]; + clip1 = pu1_cliptab[u1_Bs1]; + clip2 = pu1_cliptab[u1_Bs2]; + clip3 = pu1_cliptab[u1_Bs3]; + + Alpha_8x16 = _mm_set1_epi16(alpha); + Beta_8x16 = _mm_set1_epi16(beta); + + bs_flag_16x8b = _mm_set_epi8(u1_Bs3, u1_Bs3, u1_Bs3, u1_Bs3, u1_Bs2, u1_Bs2, + u1_Bs2, u1_Bs2, u1_Bs1, u1_Bs1, u1_Bs1, u1_Bs1, + u1_Bs0, u1_Bs0, u1_Bs0, u1_Bs0); + + C0_16x8 = _mm_set_epi8(clip3, clip3, clip3, clip3, clip2, clip2, clip2, + clip2, clip1, clip1, clip1, clip1, clip0, clip0, + clip0, clip0); + + bs_flag_16x8b = _mm_cmpeq_epi8(bs_flag_16x8b, zero); + bs_flag_16x8b = _mm_xor_si128(bs_flag_16x8b, _mm_set1_epi8(0xFF)); //Invert for required mask + C0_8x16 = _mm_unpacklo_epi8(C0_16x8, zero); + C0_hi_8x16 = _mm_unpackhi_epi8(C0_16x8, zero); + + p1_16x8 = _mm_loadu_si128((__m128i *)(pu1_HorzPixel + i16_posP1)); + p0_16x8 = _mm_loadu_si128((__m128i *)(pu1_HorzPixel + i16_posP0)); + p2_16x8 = _mm_loadu_si128((__m128i *)(pu1_HorzPixel + i16_posP2)); + q2_16x8 = _mm_loadu_si128((__m128i *)(pu1_src + i16_posQ2)); + + //Cond1 (ABS(p0 - q0) < alpha) + temp1 = _mm_subs_epu8(q0_16x8, p0_16x8); + temp2 = _mm_subs_epu8(p0_16x8, q0_16x8); + temp1 = _mm_add_epi8(temp1, temp2); + + temp2 = _mm_unpacklo_epi8(temp1, zero); + temp1 = _mm_unpackhi_epi8(temp1, zero); + + temp2 = _mm_cmpgt_epi16(Alpha_8x16, temp2); + temp1 = _mm_cmpgt_epi16(Alpha_8x16, temp1); + + flag1_16x8 = _mm_packs_epi16(temp2, temp1); + flag1_16x8 = _mm_and_si128(flag1_16x8, bs_flag_16x8b); + + //Cond2 (ABS(q1 - q0) < beta) + temp1 = _mm_subs_epu8(q0_16x8, q1_16x8); + temp2 = _mm_subs_epu8(q1_16x8, q0_16x8); + temp1 = _mm_add_epi8(temp1, temp2); + + temp2 = _mm_unpacklo_epi8(temp1, zero); + temp1 = _mm_unpackhi_epi8(temp1, zero); + + temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2); + temp1 = _mm_cmpgt_epi16(Beta_8x16, temp1); + + flag2_16x8 = _mm_packs_epi16(temp2, temp1); + + flag1_16x8 = _mm_and_si128(flag1_16x8, flag2_16x8); + + //Cond3 (ABS(p1 - p0) < beta) + temp1 = _mm_subs_epu8(p0_16x8, p1_16x8); + temp2 = _mm_subs_epu8(p1_16x8, p0_16x8); + temp1 = _mm_add_epi8(temp1, temp2); + + temp2 = _mm_unpacklo_epi8(temp1, zero); + temp1 = _mm_unpackhi_epi8(temp1, zero); + + temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2); + temp1 = _mm_cmpgt_epi16(Beta_8x16, temp1); + + flag2_16x8 = _mm_packs_epi16(temp2, temp1); + + // !((ABS(p0 - q0) < alpha) || (ABS(q1 - q0) < beta) || (ABS(p1 - p0) < beta)) + flag1_16x8 = _mm_and_si128(flag1_16x8, flag2_16x8); + + // (ABS(p2 - p0) < beta) + temp1 = _mm_subs_epu8(p0_16x8, p2_16x8); + temp2 = _mm_subs_epu8(p2_16x8, p0_16x8); + temp1 = _mm_add_epi8(temp1, temp2); + + temp2 = _mm_unpacklo_epi8(temp1, zero); + temp1 = _mm_unpackhi_epi8(temp1, zero); + temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2); + temp1 = _mm_cmpgt_epi16(Beta_8x16, temp1); + + flag2_16x8 = _mm_packs_epi16(temp2, temp1); + flag2_16x8 = _mm_and_si128(flag1_16x8, flag2_16x8); + + temp2 = _mm_subs_epi16(zero, temp2); + temp1 = _mm_subs_epi16(zero, temp1); + + C_8x16 = _mm_add_epi16(C0_8x16, temp2); + C_hi_8x16 = _mm_add_epi16(C0_hi_8x16, temp1); + + // (ABS(q2 - q0) < beta) + temp1 = _mm_subs_epu8(q0_16x8, q2_16x8); + temp2 = _mm_subs_epu8(q2_16x8, q0_16x8); + temp1 = _mm_add_epi8(temp1, temp2); + + temp2 = _mm_unpacklo_epi8(temp1, zero); + temp1 = _mm_unpackhi_epi8(temp1, zero); + temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2); + temp1 = _mm_cmpgt_epi16(Beta_8x16, temp1); + + flag3_16x8 = _mm_packs_epi16(temp2, temp1); + flag3_16x8 = _mm_and_si128(flag1_16x8, flag3_16x8); + + temp2 = _mm_subs_epi16(zero, temp2); + temp1 = _mm_subs_epi16(zero, temp1); + + C_8x16 = _mm_add_epi16(C_8x16, temp2); + C_hi_8x16 = _mm_add_epi16(C_hi_8x16, temp1); + + const_val4_8x16 = _mm_set1_epi16(4); + temp1 = _mm_subs_epi16(_mm_unpacklo_epi8(q0_16x8, zero), + _mm_unpacklo_epi8(p0_16x8, zero)); + temp2 = _mm_subs_epi16(_mm_unpacklo_epi8(p1_16x8, zero), + _mm_unpacklo_epi8(q1_16x8, zero)); + temp1 = _mm_slli_epi16(temp1, 2); + temp1 = _mm_add_epi16(temp1, temp2); + temp1 = _mm_add_epi16(temp1, const_val4_8x16); + in_macro_16x8 = _mm_srai_epi16(temp1, 3); + + temp1 = _mm_subs_epi16(_mm_unpackhi_epi8(q0_16x8, zero), + _mm_unpackhi_epi8(p0_16x8, zero)); + temp2 = _mm_subs_epi16(_mm_unpackhi_epi8(p1_16x8, zero), + _mm_unpackhi_epi8(q1_16x8, zero)); + temp1 = _mm_slli_epi16(temp1, 2); + temp1 = _mm_add_epi16(temp1, temp2); + temp1 = _mm_add_epi16(temp1, const_val4_8x16); + in_macro_hi_16x8 = _mm_srai_epi16(temp1, 3); + + in_macro_16x8 = _mm_min_epi16(C_8x16, in_macro_16x8); //CLIP3 + in_macro_hi_16x8 = _mm_min_epi16(C_hi_8x16, in_macro_hi_16x8); //CLIP3 + C_8x16 = _mm_subs_epi16(zero, C_8x16); + C_hi_8x16 = _mm_subs_epi16(zero, C_hi_8x16); + in_macro_16x8 = _mm_max_epi16(C_8x16, in_macro_16x8); //CLIP3 + in_macro_hi_16x8 = _mm_max_epi16(C_hi_8x16, in_macro_hi_16x8); //CLIP3 + + temp1 = _mm_add_epi16(_mm_unpacklo_epi8(p0_16x8, zero), in_macro_16x8); + temp2 = _mm_add_epi16(_mm_unpackhi_epi8(p0_16x8, zero), in_macro_hi_16x8); + + temp1 = _mm_packus_epi16(temp1, temp2); + + temp1 = _mm_and_si128(temp1, flag1_16x8); + temp2 = _mm_and_si128(p0_16x8, + _mm_xor_si128(flag1_16x8, _mm_set1_epi16(0xFFFF))); + + temp1 = _mm_add_epi8(temp1, temp2); + + _mm_storeu_si128((__m128i *)(pu1_HorzPixel + i16_posP0), temp1); + + temp1 = _mm_sub_epi16(_mm_unpacklo_epi8(q0_16x8, zero), in_macro_16x8); + temp2 = _mm_sub_epi16(_mm_unpackhi_epi8(q0_16x8, zero), in_macro_hi_16x8); + + temp1 = _mm_packus_epi16(temp1, temp2); + + temp1 = _mm_and_si128(temp1, flag1_16x8); + temp2 = _mm_and_si128(q0_16x8, + _mm_xor_si128(flag1_16x8, _mm_set1_epi16(0xFFFF))); + + temp1 = _mm_add_epi8(temp1, temp2); + _mm_storeu_si128((__m128i *)(pu1_src), temp1); + + //if(Ap < Beta) + temp1 = _mm_avg_epu16(_mm_unpacklo_epi8(q0_16x8, zero), + _mm_unpacklo_epi8(p0_16x8, zero)); + temp2 = _mm_slli_epi16(_mm_unpacklo_epi8(p1_16x8, zero), 1); + //temp2 = _mm_subs_epi16(zero,temp2); + temp2 = _mm_subs_epi16(_mm_unpacklo_epi8(p2_16x8, zero), temp2); + temp2 = _mm_add_epi16(temp1, temp2); + in_macro_16x8 = _mm_srai_epi16(temp2, 1); + + temp1 = _mm_avg_epu16(_mm_unpackhi_epi8(q0_16x8, zero), + _mm_unpackhi_epi8(p0_16x8, zero)); + temp2 = _mm_slli_epi16(_mm_unpackhi_epi8(p1_16x8, zero), 1); + //temp2 = _mm_subs_epi16(zero,temp2); + temp2 = _mm_subs_epi16(_mm_unpackhi_epi8(p2_16x8, zero), temp2); + temp2 = _mm_add_epi16(temp1, temp2); + in_macro_hi_16x8 = _mm_srai_epi16(temp2, 1); + + in_macro_16x8 = _mm_min_epi16(C0_8x16, in_macro_16x8); //CLIP3 + in_macro_hi_16x8 = _mm_min_epi16(C0_hi_8x16, in_macro_hi_16x8); //CLIP3 + C0_8x16 = _mm_subs_epi16(zero, C0_8x16); + C0_hi_8x16 = _mm_subs_epi16(zero, C0_hi_8x16); + in_macro_16x8 = _mm_max_epi16(C0_8x16, in_macro_16x8); //CLIP3 + in_macro_hi_16x8 = _mm_max_epi16(C0_hi_8x16, in_macro_hi_16x8); //CLIP3 + + temp1 = _mm_add_epi16(_mm_unpacklo_epi8(p1_16x8, zero), in_macro_16x8); + temp2 = _mm_add_epi16(_mm_unpackhi_epi8(p1_16x8, zero), in_macro_hi_16x8); + + temp1 = _mm_packus_epi16(temp1, temp2); + + temp1 = _mm_and_si128(temp1, flag2_16x8); + temp2 = _mm_and_si128(p1_16x8, + _mm_xor_si128(flag2_16x8, _mm_set1_epi16(0xFFFF))); + temp1 = _mm_add_epi8(temp1, temp2); + _mm_storeu_si128((__m128i *)(pu1_HorzPixel + i16_posP1), temp1); + + //if(Aq < Beta) + temp1 = _mm_avg_epu16(_mm_unpacklo_epi8(q0_16x8, zero), + _mm_unpacklo_epi8(p0_16x8, zero)); + temp2 = _mm_slli_epi16(_mm_unpacklo_epi8(q1_16x8, zero), 1); + //temp2 = _mm_slli_epi16 (temp2, 1); + temp2 = _mm_subs_epi16(_mm_unpacklo_epi8(q2_16x8, zero), temp2); + temp2 = _mm_add_epi16(temp1, temp2); + in_macro_16x8 = _mm_srai_epi16(temp2, 1); + + temp1 = _mm_avg_epu16(_mm_unpackhi_epi8(q0_16x8, zero), + _mm_unpackhi_epi8(p0_16x8, zero)); + temp2 = _mm_slli_epi16(_mm_unpackhi_epi8(q1_16x8, zero), 1); + //temp2 = _mm_slli_epi16 (temp2, 1); + temp2 = _mm_subs_epi16(_mm_unpackhi_epi8(q2_16x8, zero), temp2); + temp2 = _mm_add_epi16(temp1, temp2); + in_macro_hi_16x8 = _mm_srai_epi16(temp2, 1); + + in_macro_16x8 = _mm_max_epi16(C0_8x16, in_macro_16x8); //CLIP3 + in_macro_hi_16x8 = _mm_max_epi16(C0_hi_8x16, in_macro_hi_16x8); //CLIP3 + C0_8x16 = _mm_subs_epi16(zero, C0_8x16); + C0_hi_8x16 = _mm_subs_epi16(zero, C0_hi_8x16); + in_macro_16x8 = _mm_min_epi16(C0_8x16, in_macro_16x8); //CLIP3 + in_macro_hi_16x8 = _mm_min_epi16(C0_hi_8x16, in_macro_hi_16x8); //CLIP3 + + temp1 = _mm_add_epi16(_mm_unpacklo_epi8(q1_16x8, zero), in_macro_16x8); + temp2 = _mm_add_epi16(_mm_unpackhi_epi8(q1_16x8, zero), in_macro_hi_16x8); + + temp1 = _mm_packus_epi16(temp1, temp2); + + temp1 = _mm_and_si128(temp1, flag3_16x8); + temp2 = _mm_and_si128(q1_16x8, + _mm_xor_si128(flag3_16x8, _mm_set1_epi16(0xFFFF))); + temp1 = _mm_add_epi8(temp1, temp2); + + _mm_storeu_si128((__m128i *)(pu1_src + i16_posQ1), temp1); + +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_deblk_luma_vert_bs4_mbaff_ssse3() */ +/* */ +/* Description : This function performs filtering of a luma block */ +/* vertical edge when boundary strength is set to 4. */ +/* */ +/* Inputs : pu1_src - pointer to the src sample q0 */ +/* src_strd - source stride */ +/* alpha - alpha value for the boundary */ +/* beta - beta value for the boundary */ +/* */ +/* Globals : None */ +/* */ +/* Processing : When the function is called twice, this operation is as */ +/* described in Sec. 8.7.2.3 under the title "Filtering */ +/* process for edges for bS equal to 4" in ITU T Rec H.264. */ +/* */ +/* Outputs : None */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 12 02 2015 Naveen Kumar P Initial version */ +/* */ +/*****************************************************************************/ +void ih264_deblk_luma_vert_bs4_mbaff_ssse3(UWORD8 *pu1_src, + WORD32 src_strd, + WORD32 alpha, + WORD32 beta) +{ + __m128i zero = _mm_setzero_si128(); + __m128i q0_16x8, q1_16x8, q2_16x8, q3_16x8; + __m128i p0_16x8, p1_16x8, p2_16x8, p3_16x8; + __m128i q0_8x16, q1_8x16, q2_8x16, q3_8x16; + __m128i p0_8x16, p1_8x16, p2_8x16, p3_8x16; + __m128i q0_16x8_1; + __m128i p0_16x8_1; + __m128i q0_16x8_2, q1_16x8_2, q2_16x8_2; + __m128i p0_16x8_2, p1_16x8_2, p2_16x8_2; + __m128i temp1, temp2, temp3, temp4, temp5, temp6; + __m128i Alpha_8x16, Beta_8x16; + __m128i flag1_16x8, flag2_16x8, flag3_16x8, flag4_16x8; + __m128i const_val2_16x8 = _mm_set1_epi16(2); + __m128i line1, line2, line3, line4, line5, line6, line7, line8; + + Alpha_8x16 = _mm_set1_epi16(alpha); + Beta_8x16 = _mm_set1_epi16(beta); + + line1 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 0 * src_strd)); + line2 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 1 * src_strd)); + line3 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 2 * src_strd)); + line4 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 3 * src_strd)); + line5 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 4 * src_strd)); + line6 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 5 * src_strd)); + line7 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 6 * src_strd)); + line8 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 7 * src_strd)); + + temp1 = _mm_unpacklo_epi8(line1, line2); + temp2 = _mm_unpacklo_epi8(line3, line4); + temp3 = _mm_unpacklo_epi8(line5, line6); + temp4 = _mm_unpacklo_epi8(line7, line8); + + line1 = _mm_unpacklo_epi16(temp1, temp2); + line2 = _mm_unpackhi_epi16(temp1, temp2); + line3 = _mm_unpacklo_epi16(temp3, temp4); + line4 = _mm_unpackhi_epi16(temp3, temp4); + + p1_8x16 = _mm_unpacklo_epi32(line1, line3); + p0_8x16 = _mm_unpackhi_epi32(line1, line3); + q0_8x16 = _mm_unpacklo_epi32(line2, line4); + q1_8x16 = _mm_unpackhi_epi32(line2, line4); + + p3_16x8 = _mm_unpacklo_epi64(p1_8x16, zero); + p2_16x8 = _mm_unpackhi_epi64(p1_8x16, zero); + q2_16x8 = _mm_unpacklo_epi64(q1_8x16, zero); + q3_16x8 = _mm_unpackhi_epi64(q1_8x16, zero); + p1_16x8 = _mm_unpacklo_epi64(p0_8x16, zero); + p0_16x8 = _mm_unpackhi_epi64(p0_8x16, zero); + q0_16x8 = _mm_unpacklo_epi64(q0_8x16, zero); + q1_16x8 = _mm_unpackhi_epi64(q0_8x16, zero); + + //Cond1 (ABS(p0 - q0) < alpha) + temp1 = _mm_subs_epu8(q0_16x8, p0_16x8); + temp2 = _mm_subs_epu8(p0_16x8, q0_16x8); + temp1 = _mm_add_epi8(temp1, temp2); + + temp2 = _mm_unpacklo_epi8(temp1, zero); + temp1 = _mm_unpackhi_epi8(temp1, zero); + + temp2 = _mm_cmpgt_epi16(Alpha_8x16, temp2); + temp1 = _mm_cmpgt_epi16(Alpha_8x16, temp1); + + flag1_16x8 = _mm_packs_epi16(temp2, temp1); + + //Cond2 (ABS(q1 - q0) < beta) + temp1 = _mm_subs_epu8(q0_16x8, q1_16x8); + temp2 = _mm_subs_epu8(q1_16x8, q0_16x8); + temp1 = _mm_add_epi8(temp1, temp2); + + temp2 = _mm_unpacklo_epi8(temp1, zero); + temp1 = _mm_unpackhi_epi8(temp1, zero); + + temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2); + temp1 = _mm_cmpgt_epi16(Beta_8x16, temp1); + + flag2_16x8 = _mm_packs_epi16(temp2, temp1); + + flag1_16x8 = _mm_and_si128(flag1_16x8, flag2_16x8); + + //Cond3 (ABS(p1 - p0) < beta) + temp1 = _mm_subs_epu8(p0_16x8, p1_16x8); + temp2 = _mm_subs_epu8(p1_16x8, p0_16x8); + temp1 = _mm_add_epi8(temp1, temp2); + + temp2 = _mm_unpacklo_epi8(temp1, zero); + temp1 = _mm_unpackhi_epi8(temp1, zero); + + temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2); + temp1 = _mm_cmpgt_epi16(Beta_8x16, temp1); + + flag2_16x8 = _mm_packs_epi16(temp2, temp1); + + // !((ABS(p0 - q0) < alpha) || (ABS(q1 - q0) < beta) || (ABS(p1 - p0) < beta)) + flag1_16x8 = _mm_and_si128(flag1_16x8, flag2_16x8); + + // (ABS(p0 - q0) < ((alpha >> 2) + 2)) + temp1 = _mm_subs_epu8(p0_16x8, q0_16x8); + temp2 = _mm_subs_epu8(q0_16x8, p0_16x8); + temp1 = _mm_add_epi8(temp1, temp2); + Alpha_8x16 = _mm_srai_epi16(Alpha_8x16, 2); + Alpha_8x16 = _mm_add_epi16(Alpha_8x16, const_val2_16x8); + + temp2 = _mm_unpacklo_epi8(temp1, zero); + temp1 = _mm_unpackhi_epi8(temp1, zero); + temp2 = _mm_cmpgt_epi16(Alpha_8x16, temp2); + temp1 = _mm_cmpgt_epi16(Alpha_8x16, temp1); + + flag2_16x8 = _mm_packs_epi16(temp2, temp1); + flag2_16x8 = _mm_and_si128(flag1_16x8, flag2_16x8); + + // (ABS(p2 - p0) < beta) + temp1 = _mm_subs_epu8(p0_16x8, p2_16x8); + temp2 = _mm_subs_epu8(p2_16x8, p0_16x8); + temp1 = _mm_add_epi8(temp1, temp2); + + temp2 = _mm_unpacklo_epi8(temp1, zero); + temp1 = _mm_unpackhi_epi8(temp1, zero); + temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2); + temp1 = _mm_cmpgt_epi16(Beta_8x16, temp1); + + flag3_16x8 = _mm_packs_epi16(temp2, temp1); + flag3_16x8 = _mm_and_si128(flag3_16x8, flag2_16x8); + + // (ABS(q2 - q0) < beta) + temp1 = _mm_subs_epu8(q0_16x8, q2_16x8); + temp2 = _mm_subs_epu8(q2_16x8, q0_16x8); + temp1 = _mm_add_epi8(temp1, temp2); + + temp2 = _mm_unpacklo_epi8(temp1, zero); + temp1 = _mm_unpackhi_epi8(temp1, zero); + temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2); + temp1 = _mm_cmpgt_epi16(Beta_8x16, temp1); + + flag4_16x8 = _mm_packs_epi16(temp2, temp1); + flag4_16x8 = _mm_and_si128(flag4_16x8, flag2_16x8); + + // First 8 pixels + p3_8x16 = _mm_unpacklo_epi8(p3_16x8, zero); + p2_8x16 = _mm_unpacklo_epi8(p2_16x8, zero); + p1_8x16 = _mm_unpacklo_epi8(p1_16x8, zero); + p0_8x16 = _mm_unpacklo_epi8(p0_16x8, zero); + q0_8x16 = _mm_unpacklo_epi8(q0_16x8, zero); + q1_8x16 = _mm_unpacklo_epi8(q1_16x8, zero); + q2_8x16 = _mm_unpacklo_epi8(q2_16x8, zero); + q3_8x16 = _mm_unpacklo_epi8(q3_16x8, zero); + + // p0_1 and q0_1 + temp1 = _mm_add_epi16(p0_8x16, q1_8x16); + temp2 = _mm_add_epi16(p1_8x16, q0_8x16); + temp5 = _mm_add_epi16(temp1, const_val2_16x8); + temp6 = _mm_add_epi16(temp2, const_val2_16x8); + temp3 = _mm_slli_epi16(p1_8x16, 1); + temp4 = _mm_slli_epi16(q1_8x16, 1); + temp1 = _mm_add_epi16(temp5, temp3); + temp2 = _mm_add_epi16(temp6, temp4); + p0_16x8_1 = _mm_srai_epi16(temp1, 2); + q0_16x8_1 = _mm_srai_epi16(temp2, 2); + + // p1_2 and q1_2 + temp6 = _mm_add_epi16(temp6, p0_8x16); + temp5 = _mm_add_epi16(temp5, q0_8x16); + temp1 = _mm_add_epi16(temp6, p2_8x16); + temp2 = _mm_add_epi16(temp5, q2_8x16); + p1_16x8_2 = _mm_srai_epi16(temp1, 2); + q1_16x8_2 = _mm_srai_epi16(temp2, 2); + + // p0_2 and q0_2 + temp1 = _mm_add_epi16(temp3, p2_8x16); + temp2 = _mm_add_epi16(temp4, q2_8x16); + temp1 = _mm_add_epi16(temp1, q1_8x16); + temp2 = _mm_add_epi16(temp2, p1_8x16); + temp3 = _mm_add_epi16(p0_8x16, q0_8x16); + temp3 = _mm_slli_epi16(temp3, 1); + temp1 = _mm_add_epi16(temp1, temp3); + temp2 = _mm_add_epi16(temp2, temp3); + temp1 = _mm_add_epi16(temp1, _mm_set1_epi16(4)); + temp2 = _mm_add_epi16(temp2, _mm_set1_epi16(4)); + p0_16x8_2 = _mm_srai_epi16(temp1, 3); + q0_16x8_2 = _mm_srai_epi16(temp2, 3); + + // p2_2 and q2_2 + temp1 = _mm_add_epi16(temp6, const_val2_16x8); + temp2 = _mm_add_epi16(temp5, const_val2_16x8); + temp3 = _mm_slli_epi16(p2_8x16, 1); + temp4 = _mm_slli_epi16(q2_8x16, 1); + temp3 = _mm_add_epi16(p2_8x16, temp3); + temp4 = _mm_add_epi16(q2_8x16, temp4); + temp5 = _mm_slli_epi16(p3_8x16, 1); + temp6 = _mm_slli_epi16(q3_8x16, 1); + temp1 = _mm_add_epi16(temp1, temp3); + temp2 = _mm_add_epi16(temp2, temp4); + temp1 = _mm_add_epi16(temp1, temp5); + temp2 = _mm_add_epi16(temp2, temp6); + p2_16x8_2 = _mm_srai_epi16(temp1, 3); + q2_16x8_2 = _mm_srai_epi16(temp2, 3); + + // p0_1 and q0_1 + p0_16x8_1 = _mm_packus_epi16(p0_16x8_1, zero); + q0_16x8_1 = _mm_packus_epi16(q0_16x8_1, zero); + + // p1_2 and q1_2 + p1_16x8_2 = _mm_packus_epi16(p1_16x8_2, zero); + q1_16x8_2 = _mm_packus_epi16(q1_16x8_2, zero); + + // p0_2 and q0_2 + p0_16x8_2 = _mm_packus_epi16(p0_16x8_2, zero); + q0_16x8_2 = _mm_packus_epi16(q0_16x8_2, zero); + + // p2_2 and q2_2 + p2_16x8_2 = _mm_packus_epi16(p2_16x8_2, zero); + q2_16x8_2 = _mm_packus_epi16(q2_16x8_2, zero); + + // p0 and q0 + p0_16x8 = _mm_and_si128(p0_16x8, + _mm_xor_si128(flag1_16x8, _mm_set1_epi8(0xFF))); + p0_16x8_1 = _mm_and_si128(p0_16x8_1, flag1_16x8); + p0_16x8 = _mm_add_epi8(p0_16x8, p0_16x8_1); + q0_16x8 = _mm_and_si128(q0_16x8, + _mm_xor_si128(flag1_16x8, _mm_set1_epi8(0xFF))); + q0_16x8_1 = _mm_and_si128(q0_16x8_1, flag1_16x8); + q0_16x8 = _mm_add_epi8(q0_16x8, q0_16x8_1); + + // p0 and q0 + p0_16x8 = _mm_and_si128(p0_16x8, + _mm_xor_si128(flag3_16x8, _mm_set1_epi8(0xFF))); + p0_16x8_2 = _mm_and_si128(p0_16x8_2, flag3_16x8); + p0_16x8 = _mm_add_epi8(p0_16x8, p0_16x8_2); + q0_16x8 = _mm_and_si128(q0_16x8, + _mm_xor_si128(flag4_16x8, _mm_set1_epi8(0xFF))); + q0_16x8_2 = _mm_and_si128(q0_16x8_2, flag4_16x8); + q0_16x8 = _mm_add_epi8(q0_16x8, q0_16x8_2); + + // p1 and q1 + p1_16x8 = _mm_and_si128(p1_16x8, + _mm_xor_si128(flag3_16x8, _mm_set1_epi8(0xFF))); + p1_16x8_2 = _mm_and_si128(p1_16x8_2, flag3_16x8); + p1_16x8 = _mm_add_epi8(p1_16x8, p1_16x8_2); + q1_16x8 = _mm_and_si128(q1_16x8, + _mm_xor_si128(flag4_16x8, _mm_set1_epi8(0xFF))); + q1_16x8_2 = _mm_and_si128(q1_16x8_2, flag4_16x8); + q1_16x8 = _mm_add_epi8(q1_16x8, q1_16x8_2); + + // p2 and q2 + p2_16x8 = _mm_and_si128(p2_16x8, + _mm_xor_si128(flag3_16x8, _mm_set1_epi8(0xFF))); + p2_16x8_2 = _mm_and_si128(p2_16x8_2, flag3_16x8); + p2_16x8 = _mm_add_epi8(p2_16x8, p2_16x8_2); + q2_16x8 = _mm_and_si128(q2_16x8, + _mm_xor_si128(flag4_16x8, _mm_set1_epi8(0xFF))); + q2_16x8_2 = _mm_and_si128(q2_16x8_2, flag4_16x8); + q2_16x8 = _mm_add_epi8(q2_16x8, q2_16x8_2); + + temp1 = _mm_unpacklo_epi8(p3_16x8, p2_16x8); + temp2 = _mm_unpacklo_epi8(p1_16x8, p0_16x8); + temp3 = _mm_unpacklo_epi8(q0_16x8, q1_16x8); + temp4 = _mm_unpacklo_epi8(q2_16x8, q3_16x8); + + p3_8x16 = _mm_unpacklo_epi16(temp1, temp2); + p2_8x16 = _mm_unpackhi_epi16(temp1, temp2); + q2_8x16 = _mm_unpacklo_epi16(temp3, temp4); + q3_8x16 = _mm_unpackhi_epi16(temp3, temp4); + + line1 = _mm_unpacklo_epi32(p3_8x16, q2_8x16); + line2 = _mm_srli_si128(line1, 8); + line3 = _mm_unpackhi_epi32(p3_8x16, q2_8x16); + line4 = _mm_srli_si128(line3, 8); + line5 = _mm_unpacklo_epi32(p2_8x16, q3_8x16); + line6 = _mm_srli_si128(line5, 8); + line7 = _mm_unpackhi_epi32(p2_8x16, q3_8x16); + line8 = _mm_srli_si128(line7, 8); + + _mm_storel_epi64((__m128i *)(pu1_src - 4 + 0 * src_strd), line1); + _mm_storel_epi64((__m128i *)(pu1_src - 4 + 1 * src_strd), line2); + _mm_storel_epi64((__m128i *)(pu1_src - 4 + 2 * src_strd), line3); + _mm_storel_epi64((__m128i *)(pu1_src - 4 + 3 * src_strd), line4); + _mm_storel_epi64((__m128i *)(pu1_src - 4 + 4 * src_strd), line5); + _mm_storel_epi64((__m128i *)(pu1_src - 4 + 5 * src_strd), line6); + _mm_storel_epi64((__m128i *)(pu1_src - 4 + 6 * src_strd), line7); + _mm_storel_epi64((__m128i *)(pu1_src - 4 + 7 * src_strd), line8); + +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_deblk_luma_vert_bslt4_mbaff_ssse3() */ +/* */ +/* Description : This function performs filtering of a luma block */ +/* vertical edge when boundary strength is less than 4. */ +/* */ +/* Inputs : pu1_src - pointer to the src sample q0 */ +/* src_strd - source stride */ +/* alpha - alpha value for the boundary */ +/* beta - beta value for the boundary */ +/* u4_bs - packed Boundary strength array */ +/* pu1_cliptab - tc0_table */ +/* */ +/* Globals : None */ +/* */ +/* Processing : When the function is called twice, this operation is as */ +/* described in Sec. 8.7.2.3 under the title "Filtering */ +/* process for edges for bS less than 4" in ITU T Rec H.264.*/ +/* */ +/* Outputs : None */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 12 02 2015 Naveen Kumar P Initial version */ +/* */ +/*****************************************************************************/ +void ih264_deblk_luma_vert_bslt4_mbaff_ssse3(UWORD8 *pu1_src, + WORD32 src_strd, + WORD32 alpha, + WORD32 beta, + UWORD32 u4_bs, + const UWORD8 *pu1_cliptab) +{ + __m128i zero = _mm_setzero_si128(); + __m128i bs_flag_16x8b, C0_16x8, C0_8x16, C_8x16; + __m128i q0_16x8, q1_16x8, q2_16x8, q3_16x8; + __m128i p0_16x8, p1_16x8, p2_16x8, p3_16x8; + __m128i temp1, temp2, temp3, temp4; + __m128i Alpha_8x16, Beta_8x16, flag1_16x8, flag2_16x8, flag3_16x8; + __m128i in_macro_16x8; + __m128i const_val4_8x16; + UWORD8 u1_Bs0, u1_Bs1, u1_Bs2, u1_Bs3; + UWORD8 clip0, clip1, clip2, clip3; + __m128i line1, line2, line3, line4, line5, line6, line7, line8; + __m128i q0_16x8_1, q1_16x8_1, q0_16x8_2; + __m128i p0_16x8_1, p1_16x8_1, p0_16x8_2; + + line1 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 0 * src_strd)); + line2 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 1 * src_strd)); + line3 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 2 * src_strd)); + line4 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 3 * src_strd)); + line5 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 4 * src_strd)); + line6 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 5 * src_strd)); + line7 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 6 * src_strd)); + line8 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 7 * src_strd)); + + temp1 = _mm_unpacklo_epi8(line1, line2); + temp2 = _mm_unpacklo_epi8(line3, line4); + temp3 = _mm_unpacklo_epi8(line5, line6); + temp4 = _mm_unpacklo_epi8(line7, line8); + + line1 = _mm_unpacklo_epi16(temp1, temp2); + line2 = _mm_unpackhi_epi16(temp1, temp2); + line3 = _mm_unpacklo_epi16(temp3, temp4); + line4 = _mm_unpackhi_epi16(temp3, temp4); + + temp1 = _mm_unpacklo_epi32(line1, line3); + temp2 = _mm_unpackhi_epi32(line1, line3); + temp3 = _mm_unpacklo_epi32(line2, line4); + temp4 = _mm_unpackhi_epi32(line2, line4); + + p3_16x8 = _mm_unpacklo_epi64(temp1, zero); + p2_16x8 = _mm_unpackhi_epi64(temp1, zero); + q2_16x8 = _mm_unpacklo_epi64(temp4, zero); + q3_16x8 = _mm_unpackhi_epi64(temp4, zero); + p1_16x8 = _mm_unpacklo_epi64(temp2, zero); + p0_16x8 = _mm_unpackhi_epi64(temp2, zero); + q0_16x8 = _mm_unpacklo_epi64(temp3, zero); + q1_16x8 = _mm_unpackhi_epi64(temp3, zero); + + u1_Bs0 = (u4_bs >> 24) & 0xff; + u1_Bs1 = (u4_bs >> 16) & 0xff; + u1_Bs2 = (u4_bs >> 8) & 0xff; + u1_Bs3 = (u4_bs >> 0) & 0xff; + clip0 = pu1_cliptab[u1_Bs0]; + clip1 = pu1_cliptab[u1_Bs1]; + clip2 = pu1_cliptab[u1_Bs2]; + clip3 = pu1_cliptab[u1_Bs3]; + + Alpha_8x16 = _mm_set1_epi16(alpha); + Beta_8x16 = _mm_set1_epi16(beta); + + bs_flag_16x8b = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, u1_Bs3, u1_Bs3, u1_Bs2, + u1_Bs2, u1_Bs1, u1_Bs1, u1_Bs0, u1_Bs0); + + C0_16x8 = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, clip3, clip3, clip2, clip2, + clip1, clip1, clip0, clip0); + + bs_flag_16x8b = _mm_cmpeq_epi8(bs_flag_16x8b, zero); + bs_flag_16x8b = _mm_xor_si128(bs_flag_16x8b, _mm_set1_epi8(0xFF)); //Invert for required mask + C0_8x16 = _mm_unpacklo_epi8(C0_16x8, zero); + + //Cond1 (ABS(p0 - q0) < alpha) + temp1 = _mm_subs_epu8(q0_16x8, p0_16x8); + temp2 = _mm_subs_epu8(p0_16x8, q0_16x8); + temp1 = _mm_add_epi8(temp1, temp2); + + temp2 = _mm_unpacklo_epi8(temp1, zero); + temp2 = _mm_cmpgt_epi16(Alpha_8x16, temp2); + + flag1_16x8 = _mm_packs_epi16(temp2, zero); + flag1_16x8 = _mm_and_si128(flag1_16x8, bs_flag_16x8b); + + //Cond2 (ABS(q1 - q0) < beta) + temp1 = _mm_subs_epu8(q0_16x8, q1_16x8); + temp2 = _mm_subs_epu8(q1_16x8, q0_16x8); + temp1 = _mm_add_epi8(temp1, temp2); + + temp2 = _mm_unpacklo_epi8(temp1, zero); + temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2); + + flag2_16x8 = _mm_packs_epi16(temp2, zero); + flag1_16x8 = _mm_and_si128(flag1_16x8, flag2_16x8); + + //Cond3 (ABS(p1 - p0) < beta) + temp1 = _mm_subs_epu8(p0_16x8, p1_16x8); + temp2 = _mm_subs_epu8(p1_16x8, p0_16x8); + temp1 = _mm_add_epi8(temp1, temp2); + + temp2 = _mm_unpacklo_epi8(temp1, zero); + temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2); + + flag2_16x8 = _mm_packs_epi16(temp2, zero); + + // !((ABS(p0 - q0) < alpha) || (ABS(q1 - q0) < beta) || (ABS(p1 - p0) < beta)) + flag1_16x8 = _mm_and_si128(flag1_16x8, flag2_16x8); + + // (ABS(p2 - p0) < beta) + temp1 = _mm_subs_epu8(p0_16x8, p2_16x8); + temp2 = _mm_subs_epu8(p2_16x8, p0_16x8); + temp1 = _mm_add_epi8(temp1, temp2); + + temp2 = _mm_unpacklo_epi8(temp1, zero); + temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2); + + flag2_16x8 = _mm_packs_epi16(temp2, zero); + flag2_16x8 = _mm_and_si128(flag1_16x8, flag2_16x8); + + temp2 = _mm_subs_epi16(zero, temp2); + + C_8x16 = _mm_add_epi16(C0_8x16, temp2); + + // (ABS(q2 - q0) < beta) + temp1 = _mm_subs_epu8(q0_16x8, q2_16x8); + temp2 = _mm_subs_epu8(q2_16x8, q0_16x8); + temp1 = _mm_add_epi8(temp1, temp2); + + temp2 = _mm_unpacklo_epi8(temp1, zero); + temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2); + + flag3_16x8 = _mm_packs_epi16(temp2, zero); + flag3_16x8 = _mm_and_si128(flag1_16x8, flag3_16x8); + + temp2 = _mm_subs_epi16(zero, temp2); + + C_8x16 = _mm_add_epi16(C_8x16, temp2); + + const_val4_8x16 = _mm_set1_epi16(4); + temp1 = _mm_subs_epi16(_mm_unpacklo_epi8(q0_16x8, zero), + _mm_unpacklo_epi8(p0_16x8, zero)); + temp2 = _mm_subs_epi16(_mm_unpacklo_epi8(p1_16x8, zero), + _mm_unpacklo_epi8(q1_16x8, zero)); + temp1 = _mm_slli_epi16(temp1, 2); + temp1 = _mm_add_epi16(temp1, temp2); + temp1 = _mm_add_epi16(temp1, const_val4_8x16); + in_macro_16x8 = _mm_srai_epi16(temp1, 3); + + in_macro_16x8 = _mm_min_epi16(C_8x16, in_macro_16x8); //CLIP3 + C_8x16 = _mm_subs_epi16(zero, C_8x16); + in_macro_16x8 = _mm_max_epi16(C_8x16, in_macro_16x8); //CLIP3 + + // p0 + temp1 = _mm_add_epi16(_mm_unpacklo_epi8(p0_16x8, zero), in_macro_16x8); + + temp1 = _mm_packus_epi16(temp1, zero); + + p0_16x8_1 = _mm_and_si128(temp1, flag1_16x8); + p0_16x8_2 = _mm_and_si128( + p0_16x8, _mm_xor_si128(flag1_16x8, _mm_set1_epi16(0xFFFF))); + + p0_16x8_1 = _mm_add_epi8(p0_16x8_1, p0_16x8_2); + + // q0 + temp1 = _mm_sub_epi16(_mm_unpacklo_epi8(q0_16x8, zero), in_macro_16x8); + + temp1 = _mm_packus_epi16(temp1, zero); + + q0_16x8_1 = _mm_and_si128(temp1, flag1_16x8); + q0_16x8_2 = _mm_and_si128( + q0_16x8, _mm_xor_si128(flag1_16x8, _mm_set1_epi16(0xFFFF))); + + q0_16x8_1 = _mm_add_epi8(q0_16x8_1, q0_16x8_2); + + //if(Ap < Beta) + temp1 = _mm_avg_epu16(_mm_unpacklo_epi8(q0_16x8, zero), + _mm_unpacklo_epi8(p0_16x8, zero)); + temp2 = _mm_slli_epi16(_mm_unpacklo_epi8(p1_16x8, zero), 1); + //temp2 = _mm_subs_epi16(zero,temp2); + temp2 = _mm_subs_epi16(_mm_unpacklo_epi8(p2_16x8, zero), temp2); + temp2 = _mm_add_epi16(temp1, temp2); + in_macro_16x8 = _mm_srai_epi16(temp2, 1); + + in_macro_16x8 = _mm_min_epi16(C0_8x16, in_macro_16x8); //CLIP3 + C0_8x16 = _mm_subs_epi16(zero, C0_8x16); + in_macro_16x8 = _mm_max_epi16(C0_8x16, in_macro_16x8); //CLIP3 + + // p1 + temp1 = _mm_add_epi16(_mm_unpacklo_epi8(p1_16x8, zero), in_macro_16x8); + + temp1 = _mm_packus_epi16(temp1, zero); + + p1_16x8_1 = _mm_and_si128(temp1, flag2_16x8); + p1_16x8 = _mm_and_si128(p1_16x8, + _mm_xor_si128(flag2_16x8, _mm_set1_epi16(0xFFFF))); + p1_16x8 = _mm_add_epi8(p1_16x8, p1_16x8_1); + + //if(Aq < Beta) + temp1 = _mm_avg_epu16(_mm_unpacklo_epi8(q0_16x8, zero), + _mm_unpacklo_epi8(p0_16x8, zero)); + temp2 = _mm_slli_epi16(_mm_unpacklo_epi8(q1_16x8, zero), 1); + //temp2 = _mm_slli_epi16 (temp2, 1); + temp2 = _mm_subs_epi16(_mm_unpacklo_epi8(q2_16x8, zero), temp2); + temp2 = _mm_add_epi16(temp1, temp2); + in_macro_16x8 = _mm_srai_epi16(temp2, 1); + + in_macro_16x8 = _mm_max_epi16(C0_8x16, in_macro_16x8); //CLIP3 + C0_8x16 = _mm_subs_epi16(zero, C0_8x16); + in_macro_16x8 = _mm_min_epi16(C0_8x16, in_macro_16x8); //CLIP3 + + temp1 = _mm_add_epi16(_mm_unpacklo_epi8(q1_16x8, zero), in_macro_16x8); + + // q1 + temp1 = _mm_packus_epi16(temp1, zero); + + q1_16x8_1 = _mm_and_si128(temp1, flag3_16x8); + q1_16x8 = _mm_and_si128(q1_16x8, + _mm_xor_si128(flag3_16x8, _mm_set1_epi16(0xFFFF))); + q1_16x8 = _mm_add_epi8(q1_16x8, q1_16x8_1); + + temp1 = _mm_unpacklo_epi8(p3_16x8, p2_16x8); + temp2 = _mm_unpacklo_epi8(p1_16x8, p0_16x8_1); + temp3 = _mm_unpacklo_epi8(q0_16x8_1, q1_16x8); + temp4 = _mm_unpacklo_epi8(q2_16x8, q3_16x8); + + line7 = _mm_unpacklo_epi16(temp1, temp2); + temp1 = _mm_unpackhi_epi16(temp1, temp2); + line8 = _mm_unpacklo_epi16(temp3, temp4); + temp2 = _mm_unpackhi_epi16(temp3, temp4); + + line1 = _mm_unpacklo_epi32(line7, line8); + line2 = _mm_srli_si128(line1, 8); + line3 = _mm_unpackhi_epi32(line7, line8); + line4 = _mm_srli_si128(line3, 8); + line5 = _mm_unpacklo_epi32(temp1, temp2); + line6 = _mm_srli_si128(line5, 8); + line7 = _mm_unpackhi_epi32(temp1, temp2); + line8 = _mm_srli_si128(line7, 8); + + _mm_storel_epi64((__m128i *)(pu1_src - 4 + 0 * src_strd), line1); + _mm_storel_epi64((__m128i *)(pu1_src - 4 + 1 * src_strd), line2); + _mm_storel_epi64((__m128i *)(pu1_src - 4 + 2 * src_strd), line3); + _mm_storel_epi64((__m128i *)(pu1_src - 4 + 3 * src_strd), line4); + _mm_storel_epi64((__m128i *)(pu1_src - 4 + 4 * src_strd), line5); + _mm_storel_epi64((__m128i *)(pu1_src - 4 + 5 * src_strd), line6); + _mm_storel_epi64((__m128i *)(pu1_src - 4 + 6 * src_strd), line7); + _mm_storel_epi64((__m128i *)(pu1_src - 4 + 7 * src_strd), line8); +} + diff --git a/common/x86/ih264_ihadamard_scaling_sse42.c b/common/x86/ih264_ihadamard_scaling_sse42.c new file mode 100755 index 0000000..895291b --- /dev/null +++ b/common/x86/ih264_ihadamard_scaling_sse42.c @@ -0,0 +1,238 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** + ******************************************************************************* + * @file + * ih264_ihadamard_scaling_sse42.c + * + * @brief + * Contains definition of functions for h264 inverse hadamard 4x4 transform and scaling + * + * @author + * Mohit + * + * @par List of Functions: + * - ih264_ihadamard_scaling_4x4_sse42() + * - ih264_ihadamard_scaling_2x2_uv_ssse42() + * + * @remarks + * + ******************************************************************************* + */ +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* User include files */ +#include "ih264_typedefs.h" +#include "ih264_defs.h" +#include "ih264_trans_macros.h" +#include "ih264_macros.h" +#include "ih264_trans_data.h" +#include "ih264_size_defs.h" +#include "ih264_structs.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include <immintrin.h> + +/* + ******************************************************************************** + * + * @brief This function performs a 4x4 inverse hadamard transform on the 4x4 DC coefficients + * of a 16x16 intra prediction macroblock, and then performs scaling. + * prediction buffer + * + * @par Description: + * The DC coefficients pass through a 2-stage inverse hadamard transform. + * This inverse transformed content is scaled to based on Qp value. + * + * @param[in] pi2_src + * input 4x4 block of DC coefficients + * + * @param[out] pi2_out + * output 4x4 block + * + * @param[in] pu2_iscal_mat + * pointer to scaling list + * + * @param[in] pu2_weigh_mat + * pointer to weight matrix + * + * @param[in] u4_qp_div_6 + * Floor (qp/6) + * + * @param[in] pi4_tmp + * temporary buffer of size 1*16 + * + * @returns none + * + * @remarks none + * + ******************************************************************************* + */ +void ih264_ihadamard_scaling_4x4_sse42(WORD16* pi2_src, WORD16* pi2_out, + const UWORD16 *pu2_iscal_mat, const UWORD16 *pu2_weigh_mat, + UWORD32 u4_qp_div_6, WORD32* pi4_tmp) { + __m128i src_r0_r1, src_r2_r3; + __m128i src_r0, src_r1, src_r2, src_r3; + __m128i temp0, temp1, temp2, temp3; + __m128i add_rshift = _mm_set1_epi32((1 << (5 - u4_qp_div_6))); + __m128i mult_val = _mm_set1_epi32(pu2_iscal_mat[0] * pu2_weigh_mat[0]); + + src_r0_r1 = _mm_loadu_si128((__m128i *) (pi2_src)); //a00 a01 a02 a03 a10 a11 a12 a13 -- the source matrix 0th,1st row + src_r2_r3 = _mm_loadu_si128((__m128i *) (pi2_src + 8)); //a20 a21 a22 a23 a30 a31 a32 a33 -- the source matrix 2nd,3rd row + //sign_reg = _mm_cmpgt_epi16(zero_8x16b, src_r0_r1); + src_r0 = _mm_cvtepi16_epi32(src_r0_r1); + src_r0_r1 = _mm_srli_si128(src_r0_r1, 8); + src_r1 = _mm_cvtepi16_epi32(src_r0_r1); + + src_r2 = _mm_cvtepi16_epi32(src_r2_r3); + src_r2_r3 = _mm_srli_si128(src_r2_r3, 8); + src_r3 = _mm_cvtepi16_epi32(src_r2_r3); + + /* Perform Inverse transform */ + /*-------------------------------------------------------------*/ + /* IDCT [ Horizontal transformation ] */ + /*-------------------------------------------------------------*/ + // Matrix transpose + /* + * a0 a1 a2 a3 + * b0 b1 b2 b3 + * c0 c1 c2 c3 + * d0 d1 d2 d3 + */ + temp0 = _mm_unpacklo_epi32(src_r0, src_r1); //a0 b0 a1 b1 + temp2 = _mm_unpacklo_epi32(src_r2, src_r3); //c0 d0 c1 d1 + temp1 = _mm_unpackhi_epi32(src_r0, src_r1); //a2 b2 a3 b3 + temp3 = _mm_unpackhi_epi32(src_r2, src_r3); //c2 d2 c3 d3 + src_r0 = _mm_unpacklo_epi64(temp0, temp2); //a0 b0 c0 d0 + src_r1 = _mm_unpackhi_epi64(temp0, temp2); //a1 b1 c1 d1 + src_r2 = _mm_unpacklo_epi64(temp1, temp3); //a2 b2 c2 d2 + src_r3 = _mm_unpackhi_epi64(temp1, temp3); //a3 b3 c3 d3 + + temp0 = _mm_add_epi32(src_r0, src_r3); + temp1 = _mm_add_epi32(src_r1, src_r2); + temp2 = _mm_sub_epi32(src_r1, src_r2); + temp3 = _mm_sub_epi32(src_r0, src_r3); + + src_r0 = _mm_add_epi32(temp0, temp1); + src_r1 = _mm_add_epi32(temp2, temp3); + src_r2 = _mm_sub_epi32(temp0, temp1); + src_r3 = _mm_sub_epi32(temp3, temp2); + + /*-------------------------------------------------------------*/ + /* IDCT [ Vertical transformation ] */ + /*-------------------------------------------------------------*/ + // Matrix transpose + /* + * a0 b0 c0 d0 + * a1 b1 c1 d1 + * a2 b2 c2 d2 + * a3 b3 c3 d3 + */ + temp0 = _mm_unpacklo_epi32(src_r0, src_r1); //a0 a1 b0 b1 + temp2 = _mm_unpacklo_epi32(src_r2, src_r3); //a2 a3 b2 b3 + temp1 = _mm_unpackhi_epi32(src_r0, src_r1); //c0 c1 d0 d1 + temp3 = _mm_unpackhi_epi32(src_r2, src_r3); //c2 c3 d2 d3 + src_r0 = _mm_unpacklo_epi64(temp0, temp2); //a0 a1 a2 a3 + src_r1 = _mm_unpackhi_epi64(temp0, temp2); //b0 b1 b2 b3 + src_r2 = _mm_unpacklo_epi64(temp1, temp3); //c0 c1 c2 c3 + src_r3 = _mm_unpackhi_epi64(temp1, temp3); //d0 d1 d2 d3 + + temp0 = _mm_add_epi32(src_r0, src_r3); + temp1 = _mm_add_epi32(src_r1, src_r2); + temp2 = _mm_sub_epi32(src_r1, src_r2); + temp3 = _mm_sub_epi32(src_r0, src_r3); + + src_r0 = _mm_add_epi32(temp0, temp1); + src_r1 = _mm_add_epi32(temp2, temp3); + src_r2 = _mm_sub_epi32(temp0, temp1); + src_r3 = _mm_sub_epi32(temp3, temp2); + + src_r0 = _mm_mullo_epi32(src_r0, mult_val); + src_r1 = _mm_mullo_epi32(src_r1, mult_val); + src_r2 = _mm_mullo_epi32(src_r2, mult_val); + src_r3 = _mm_mullo_epi32(src_r3, mult_val); + + //Scaling + if (u4_qp_div_6 >= 6) { + src_r0 = _mm_slli_epi32(src_r0, u4_qp_div_6 - 6); + src_r1 = _mm_slli_epi32(src_r1, u4_qp_div_6 - 6); + src_r2 = _mm_slli_epi32(src_r2, u4_qp_div_6 - 6); + src_r3 = _mm_slli_epi32(src_r3, u4_qp_div_6 - 6); + } else { + temp0 = _mm_add_epi32(src_r0, add_rshift); + temp1 = _mm_add_epi32(src_r1, add_rshift); + temp2 = _mm_add_epi32(src_r2, add_rshift); + temp3 = _mm_add_epi32(src_r3, add_rshift); + src_r0 = _mm_srai_epi32(temp0, 6 - u4_qp_div_6); + src_r1 = _mm_srai_epi32(temp1, 6 - u4_qp_div_6); + src_r2 = _mm_srai_epi32(temp2, 6 - u4_qp_div_6); + src_r3 = _mm_srai_epi32(temp3, 6 - u4_qp_div_6); + } + src_r0_r1 = _mm_packs_epi32(src_r0, src_r1); + src_r2_r3 = _mm_packs_epi32(src_r2, src_r3); + + _mm_storeu_si128((__m128i *) (&pi2_out[0]), src_r0_r1); + _mm_storeu_si128((__m128i *) (&pi2_out[8]), src_r2_r3); +} + +void ih264_ihadamard_scaling_2x2_uv_sse42(WORD16* pi2_src, + WORD16* pi2_out, + const UWORD16 *pu2_iscal_mat, + const UWORD16 *pu2_weigh_mat, + UWORD32 u4_qp_div_6, + WORD32* pi4_tmp) +{ + UNUSED(pi4_tmp); + __m128i src, plane_0, plane_1, temp0, temp1, sign_reg; + __m128i zero_8x16b = _mm_setzero_si128(); + __m128i scale_val = _mm_set1_epi32((WORD32)(pu2_iscal_mat[0] * pu2_weigh_mat[0])); + src = _mm_loadu_si128((__m128i *) pi2_src); //a0 a1 a2 a3 b0 b1 b2 b3 + sign_reg = _mm_cmpgt_epi16(zero_8x16b, src); + plane_0 = _mm_unpacklo_epi16(src, sign_reg); //a0 a1 a2 a3 -- 32 bits + plane_1 = _mm_unpackhi_epi16(src, sign_reg); //b0 b1 b2 b3 -- 32 bits + + temp0 = _mm_hadd_epi32(plane_0, plane_1); //a0+a1 a2+a3 b0+b1 b2+b3 + temp1 = _mm_hsub_epi32(plane_0, plane_1); //a0-a1 a2-a3 b0-b1 b2-b3 + plane_0 = _mm_hadd_epi32(temp0, temp1); //a0+a1+a2+a3 b0+b1+b2+b3 a0-a1+a2-a3 b0-b1+b2-b3 + plane_1 = _mm_hsub_epi32(temp0, temp1); //a0+a1-a2-a3 b0+b1-b2-b3 a0-a1-a2+a3 b0-b1-b2+b3 + temp0 = _mm_unpacklo_epi32(plane_0, plane_1); //a0+a1+a2+a3 a0+a1-a2-a3 b0+b1+b2+b3 b0+b1-b2-b3 + temp1 = _mm_unpackhi_epi32(plane_0, plane_1); //a0-a1+a2-a3 a0-a1-a2+a3 b0-b1+b2-b3 b0-b1-b2+b3 + + plane_0 = _mm_unpacklo_epi64(temp0, temp1); //a0+a1+a2+a3 a0+a1-a2-a3 a0-a1+a2-a3 a0-a1-a2+a3 + plane_1 = _mm_unpackhi_epi64(temp0, temp1); //b0+b1+b2+b3 b0+b1-b2-b3 b0-b1+b2-b3 b0-b1-b2+b3 + + plane_0 = _mm_shuffle_epi32(plane_0, 0xd8); //a0+a1+a2+a3 a0-a1+a2-a3 a0+a1-a2-a3 a0-a1-a2+a3 + plane_1 = _mm_shuffle_epi32(plane_1, 0xd8); //b0+b1+b2+b3 b0-b1+b2-b3 b0+b1-b2-b3 b0-b1-b2+b3 + + temp0 = _mm_mullo_epi32(scale_val, plane_0); //multiply by pu2_iscal_mat[0] * pu2_weigh_mat[0] + temp1 = _mm_mullo_epi32(scale_val, plane_1); //multiply by pu2_iscal_mat[0] * pu2_weigh_mat[0] + + temp0 = _mm_slli_epi32(temp0, u4_qp_div_6); + temp1 = _mm_slli_epi32(temp1, u4_qp_div_6); + + temp0 = _mm_srai_epi32(temp0, 5); + temp1 = _mm_srai_epi32(temp1, 5); + + temp0 = _mm_packs_epi32(temp0, temp1); //Final values are 16-bits only. + + _mm_storeu_si128((__m128i *) (&pi2_out[0]), temp0); + +} diff --git a/common/x86/ih264_ihadamard_scaling_ssse3.c b/common/x86/ih264_ihadamard_scaling_ssse3.c new file mode 100755 index 0000000..232d9fa --- /dev/null +++ b/common/x86/ih264_ihadamard_scaling_ssse3.c @@ -0,0 +1,200 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** + ******************************************************************************* + * @file + * ih264_ihadamard_scaling_ssse3.c + * + * @brief + * Contains definition of functions for h264 inverse hadamard 4x4 transform and scaling + * + * @author + * Mohit + * + * @par List of Functions: + * - ih264_ihadamard_scaling_4x4_ssse3() + * + * @remarks + * + ******************************************************************************* + */ +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* User include files */ +#include "ih264_typedefs.h" +#include "ih264_defs.h" +#include "ih264_trans_macros.h" +#include "ih264_macros.h" +#include "ih264_trans_data.h" +#include "ih264_size_defs.h" +#include "ih264_structs.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include <immintrin.h> + +/* + ******************************************************************************** + * + * @brief This function performs a 4x4 inverse hadamard transform on the 4x4 DC coefficients + * of a 16x16 intra prediction macroblock, and then performs scaling. + * prediction buffer + * + * @par Description: + * The DC coefficients pass through a 2-stage inverse hadamard transform. + * This inverse transformed content is scaled to based on Qp value. + * + * @param[in] pi2_src + * input 4x4 block of DC coefficients + * + * @param[out] pi2_out + * output 4x4 block + * + * @param[in] pu2_iscal_mat + * pointer to scaling list + * + * @param[in] pu2_weigh_mat + * pointer to weight matrix + * + * @param[in] u4_qp_div_6 + * Floor (qp/6) + * + * @param[in] pi4_tmp + * temporary buffer of size 1*16 + * + * @returns none + * + * @remarks none + * + ******************************************************************************* + */ +void ih264_ihadamard_scaling_4x4_ssse3(WORD16* pi2_src, WORD16* pi2_out, + const UWORD16 *pu2_iscal_mat, const UWORD16 *pu2_weigh_mat, + UWORD32 u4_qp_div_6, WORD32* pi4_tmp) { + int val = 0xFFFF; + __m128i src_r0_r1, src_r2_r3, sign_reg, zero_8x16b = _mm_setzero_si128(); + __m128i src_r0, src_r1, src_r2, src_r3; + __m128i temp0, temp1, temp2, temp3; + __m128i add_rshift = _mm_set1_epi32((1 << (5 - u4_qp_div_6))); + __m128i mult_val = _mm_set1_epi32(pu2_iscal_mat[0] * pu2_weigh_mat[0]); + + __m128i mask = _mm_set1_epi32(val); + mult_val = _mm_and_si128(mult_val, mask); + + src_r0_r1 = _mm_loadu_si128((__m128i *) (pi2_src)); //a00 a01 a02 a03 a10 a11 a12 a13 -- the source matrix 0th,1st row + src_r2_r3 = _mm_loadu_si128((__m128i *) (pi2_src + 8)); //a20 a21 a22 a23 a30 a31 a32 a33 -- the source matrix 2nd,3rd row + sign_reg = _mm_cmpgt_epi16(zero_8x16b, src_r0_r1); + src_r0 = _mm_unpacklo_epi16(src_r0_r1, sign_reg); + src_r1 = _mm_unpackhi_epi16(src_r0_r1, sign_reg); + sign_reg = _mm_cmpgt_epi16(zero_8x16b, src_r2_r3); + src_r2 = _mm_unpacklo_epi16(src_r2_r3, sign_reg); + src_r3 = _mm_unpackhi_epi16(src_r2_r3, sign_reg); + + /* Perform Inverse transform */ + /*-------------------------------------------------------------*/ + /* IDCT [ Horizontal transformation ] */ + /*-------------------------------------------------------------*/ + // Matrix transpose + /* + * a0 a1 a2 a3 + * b0 b1 b2 b3 + * c0 c1 c2 c3 + * d0 d1 d2 d3 + */ + temp0 = _mm_unpacklo_epi32(src_r0, src_r1); //a0 b0 a1 b1 + temp2 = _mm_unpacklo_epi32(src_r2, src_r3); //c0 d0 c1 d1 + temp1 = _mm_unpackhi_epi32(src_r0, src_r1); //a2 b2 a3 b3 + temp3 = _mm_unpackhi_epi32(src_r2, src_r3); //c2 d2 c3 d3 + src_r0 = _mm_unpacklo_epi64(temp0, temp2); //a0 b0 c0 d0 + src_r1 = _mm_unpackhi_epi64(temp0, temp2); //a1 b1 c1 d1 + src_r2 = _mm_unpacklo_epi64(temp1, temp3); //a2 b2 c2 d2 + src_r3 = _mm_unpackhi_epi64(temp1, temp3); //a3 b3 c3 d3 + + temp0 = _mm_add_epi32(src_r0, src_r3); + temp1 = _mm_add_epi32(src_r1, src_r2); + temp2 = _mm_sub_epi32(src_r1, src_r2); + temp3 = _mm_sub_epi32(src_r0, src_r3); + + src_r0 = _mm_add_epi32(temp0, temp1); + src_r1 = _mm_add_epi32(temp2, temp3); + src_r2 = _mm_sub_epi32(temp0, temp1); + src_r3 = _mm_sub_epi32(temp3, temp2); + + /*-------------------------------------------------------------*/ + /* IDCT [ Vertical transformation ] */ + /*-------------------------------------------------------------*/ + // Matrix transpose + /* + * a0 b0 c0 d0 + * a1 b1 c1 d1 + * a2 b2 c2 d2 + * a3 b3 c3 d3 + */ + temp0 = _mm_unpacklo_epi32(src_r0, src_r1); //a0 a1 b0 b1 + temp2 = _mm_unpacklo_epi32(src_r2, src_r3); //a2 a3 b2 b3 + temp1 = _mm_unpackhi_epi32(src_r0, src_r1); //c0 c1 d0 d1 + temp3 = _mm_unpackhi_epi32(src_r2, src_r3); //c2 c3 d2 d3 + src_r0 = _mm_unpacklo_epi64(temp0, temp2); //a0 a1 a2 a3 + src_r1 = _mm_unpackhi_epi64(temp0, temp2); //b0 b1 b2 b3 + src_r2 = _mm_unpacklo_epi64(temp1, temp3); //c0 c1 c2 c3 + src_r3 = _mm_unpackhi_epi64(temp1, temp3); //d0 d1 d2 d3 + + temp0 = _mm_add_epi32(src_r0, src_r3); + temp1 = _mm_add_epi32(src_r1, src_r2); + temp2 = _mm_sub_epi32(src_r1, src_r2); + temp3 = _mm_sub_epi32(src_r0, src_r3); + + src_r0 = _mm_add_epi32(temp0, temp1); + src_r1 = _mm_add_epi32(temp2, temp3); + src_r2 = _mm_sub_epi32(temp0, temp1); + src_r3 = _mm_sub_epi32(temp3, temp2); + + src_r0 = _mm_and_si128(src_r0, mask); + src_r1 = _mm_and_si128(src_r1, mask); + src_r2 = _mm_and_si128(src_r2, mask); + src_r3 = _mm_and_si128(src_r3, mask); + + src_r0 = _mm_madd_epi16(src_r0, mult_val); + src_r1 = _mm_madd_epi16(src_r1, mult_val); + src_r2 = _mm_madd_epi16(src_r2, mult_val); + src_r3 = _mm_madd_epi16(src_r3, mult_val); + + //Scaling + if (u4_qp_div_6 >= 6) { + src_r0 = _mm_slli_epi32(src_r0, u4_qp_div_6 - 6); + src_r1 = _mm_slli_epi32(src_r1, u4_qp_div_6 - 6); + src_r2 = _mm_slli_epi32(src_r2, u4_qp_div_6 - 6); + src_r3 = _mm_slli_epi32(src_r3, u4_qp_div_6 - 6); + } else { + temp0 = _mm_add_epi32(src_r0, add_rshift); + temp1 = _mm_add_epi32(src_r1, add_rshift); + temp2 = _mm_add_epi32(src_r2, add_rshift); + temp3 = _mm_add_epi32(src_r3, add_rshift); + src_r0 = _mm_srai_epi32(temp0, 6 - u4_qp_div_6); + src_r1 = _mm_srai_epi32(temp1, 6 - u4_qp_div_6); + src_r2 = _mm_srai_epi32(temp2, 6 - u4_qp_div_6); + src_r3 = _mm_srai_epi32(temp3, 6 - u4_qp_div_6); + } + src_r0_r1 = _mm_packs_epi32(src_r0, src_r1); + src_r2_r3 = _mm_packs_epi32(src_r2, src_r3); + + _mm_storeu_si128((__m128i *) (&pi2_out[0]), src_r0_r1); + _mm_storeu_si128((__m128i *) (&pi2_out[8]), src_r2_r3); +} diff --git a/common/x86/ih264_inter_pred_filters_ssse3.c b/common/x86/ih264_inter_pred_filters_ssse3.c new file mode 100755 index 0000000..64e364e --- /dev/null +++ b/common/x86/ih264_inter_pred_filters_ssse3.c @@ -0,0 +1,4375 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*****************************************************************************/ +/* */ +/* File Name : ih264_inter_pred_filters_intr_ssse3.c */ +/* */ +/* Description : Contains function definitions for weighted */ +/* prediction functions in x86 sse4 intrinsics */ +/* */ +/* List of Functions : ih264_inter_pred_luma_copy_ssse3() */ +/* ih264_inter_pred_luma_horz_ssse3() */ +/* ih264_inter_pred_luma_vert_ssse3() */ +/* ih264_inter_pred_luma_horz_hpel_vert_hpel_ssse3() */ +/* ih264_inter_pred_luma_horz_qpel_ssse3() */ +/* ih264_inter_pred_luma_vert_qpel_ssse3() */ +/* ih264_inter_pred_luma_horz_qpel_vert_qpel_ssse3() */ +/* ih264_inter_pred_luma_horz_hpel_vert_qpel_ssse3() */ +/* ih264_inter_pred_luma_horz_qpel_vert_hpel_ssse3() */ +/* ih264_inter_pred_chroma_ssse3() */ +/* */ +/* Issues / Problems : None */ +/* */ +/* Revision History : */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 13 02 2015 Kaushik Initial version */ +/* Senthoor */ +/* */ +/*****************************************************************************/ +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +#include <immintrin.h> +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264_inter_pred_filters.h" + +/*****************************************************************************/ +/* Constant Data variables */ +/*****************************************************************************/ + +/* coefficients for 6 tap filtering*/ +//const WORD32 ih264_g_six_tap[3] ={1,-5,20}; +/*****************************************************************************/ +/* Function definitions . */ +/*****************************************************************************/ +/*****************************************************************************/ +/* */ +/* Function Name : ih264_inter_pred_luma_copy_ssse3 */ +/* */ +/* Description : This function copies the contents of ht x wd block from */ +/* source to destination. (ht,wd) can be (4,4), (8,4), */ +/* (4,8), (8,8), (16,8), (8,16) or (16,16). */ +/* */ +/* Inputs : puc_src - pointer to source */ +/* puc_dst - pointer to destination */ +/* src_strd - stride for source */ +/* dst_strd - stride for destination */ +/* ht - height of the block */ +/* wd - width of the block */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 13 02 2015 Kaushik Initial Version */ +/* Senthoor */ +/* */ +/*****************************************************************************/ +void ih264_inter_pred_luma_copy_ssse3(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ht, + WORD32 wd, + UWORD8* pu1_tmp, + WORD32 dydx) +{ + __m128i y_0_16x8b, y_1_16x8b, y_2_16x8b, y_3_16x8b; + + UNUSED(pu1_tmp); + UNUSED(dydx); + + WORD32 src_strd2, src_strd3, src_strd4, dst_strd2, dst_strd3, dst_strd4; + + src_strd2 = src_strd << 1; + dst_strd2 = dst_strd << 1; + src_strd4 = src_strd << 2; + dst_strd4 = dst_strd << 2; + src_strd3 = src_strd2 + src_strd; + dst_strd3 = dst_strd2 + dst_strd; + + if(wd == 4) + { + __m128i mask_full_128b, mask_low_32b; + + mask_full_128b = _mm_set1_epi8(0xff); + mask_low_32b = _mm_srli_si128(mask_full_128b, 12); + // mask for first four bytes + + do + { + y_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src); + y_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd)); + y_2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd2)); + y_3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd3)); + + _mm_maskmoveu_si128(y_0_16x8b, mask_low_32b, (char*)pu1_dst); + _mm_maskmoveu_si128(y_1_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd)); + _mm_maskmoveu_si128(y_2_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd2)); + _mm_maskmoveu_si128(y_3_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd3)); + + ht -= 4; + pu1_src += src_strd4; + pu1_dst += dst_strd4; + } + while(ht > 0); + } + else if(wd == 8) + { + do + { + y_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src); + y_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd)); + y_2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd2)); + y_3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd3)); + + _mm_storel_epi64((__m128i *)pu1_dst, y_0_16x8b); + _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), y_1_16x8b); + _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd2), y_2_16x8b); + _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd3), y_3_16x8b); + + ht -= 4; + pu1_src += src_strd4; + pu1_dst += dst_strd4; + } + while(ht > 0); + } + else // wd == 16 + { + WORD32 src_strd5, src_strd6, src_strd7, src_strd8; + WORD32 dst_strd5, dst_strd6, dst_strd7, dst_strd8; + + __m128i y_4_16x8b, y_5_16x8b, y_6_16x8b, y_7_16x8b; + + src_strd5 = src_strd2 + src_strd3; + dst_strd5 = dst_strd2 + dst_strd3; + src_strd6 = src_strd3 << 1; + dst_strd6 = dst_strd3 << 1; + src_strd7 = src_strd3 + src_strd4; + dst_strd7 = dst_strd3 + dst_strd4; + src_strd8 = src_strd << 3; + dst_strd8 = dst_strd << 3; + + do + { + y_0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); + y_1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd)); + y_2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd2)); + y_3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd3)); + y_4_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd4)); + y_5_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd5)); + y_6_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd6)); + y_7_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd7)); + + _mm_storeu_si128((__m128i *)pu1_dst, y_0_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), y_1_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), y_2_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), y_3_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd4), y_4_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd5), y_5_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd6), y_6_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd7), y_7_16x8b); + + ht -= 8; + pu1_src += src_strd8; + pu1_dst += dst_strd8; + } + while(ht > 0); + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_inter_pred_luma_horz_ssse3 */ +/* */ +/* Description : This function applies a horizontal 6-tap filter on */ +/* ht x wd block as mentioned in sec. 8.4.2.2.1 titled */ +/* "Luma sample interpolation process". (ht,wd) can be */ +/* (4,4), (8,4), (4,8), (8,8), (16,8), (8,16) or (16,16). */ +/* */ +/* Inputs : puc_src - pointer to source */ +/* puc_dst - pointer to destination */ +/* src_strd - stride for source */ +/* dst_strd - stride for destination */ +/* ht - height of the block */ +/* wd - width of the block */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 13 02 2015 Kaushik Initial Version */ +/* Senthoor */ +/* */ +/*****************************************************************************/ +void ih264_inter_pred_luma_horz_ssse3(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ht, + WORD32 wd, + UWORD8* pu1_tmp, + WORD32 dydx) +{ + __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b; + __m128i const_val16_8x16b; + + UNUSED(pu1_tmp); + UNUSED(dydx); + + pu1_src -= 2; // the filter input starts from x[-2] (till x[3]) + + coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01); //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 + coeff2_3_16x8b = _mm_set1_epi32(0x14141414); //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 + coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB); //c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 + //c0 = c5 = 1, c1 = c4 = -5, c2 = c3 = 20 + const_val16_8x16b = _mm_set1_epi16(16); + + if(wd == 4) + { + __m128i src_r0_16x8b, src_r1_16x8b, src_r0r1_16x8b; + __m128i src_r0_sht_16x8b, src_r1_sht_16x8b; + + __m128i res_r0r1_t1_8x16b, res_r0r1_t2_8x16b, res_r0r1_t3_8x16b; + __m128i res_r0r1_16x8b; + + __m128i mask_full_16x8b, mask_low32b; + + mask_full_16x8b = _mm_set1_epi8(0xff); + mask_low32b = _mm_srli_si128(mask_full_16x8b, 12); // mask for first four bytes + + //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9..... + //Row1 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9..... + + do + { + src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 + src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd)); //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 + + src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1); //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0 + src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1); //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0 + + src_r0_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 + src_r1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 + + src_r0r1_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 b0 b1 b1 b2 b2 b3 b3 b4 + res_r0r1_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b); //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1 + //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1 + + src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 4); //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 0 0 0 0 + src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 4); //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 0 0 0 0 + + src_r0r1_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b); //a2 a3 a3 a4 a4 a5 a5 a6 b2 b3 b3 b4 b4 b5 b5 b6 + res_r0r1_t2_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff2_3_16x8b); //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3 + //b2*c2+b3*c3 b3*c2+b4*c3 b4*c2+b5*c3 b5*c2+b6*c3 + + src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 4); //a4 a5 a5 a6 a6 a7 a7 a8 0 0 0 0 0 0 0 0 + src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 4); //b4 b5 b5 b6 b6 b7 b7 b8 0 0 0 0 0 0 0 0 + + src_r0r1_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b); //a4 a5 a5 a6 a6 a7 a7 a8 b4 b5 b5 b6 b6 b7 b7 b8 + res_r0r1_t3_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff4_5_16x8b); //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5 + //b4*c4+b5*c5 b5*c4+b6*c5 b4*c6+b7*c5 b7*c4+b8*c5 + + res_r0r1_t1_8x16b = _mm_add_epi16(res_r0r1_t1_8x16b, res_r0r1_t2_8x16b); + res_r0r1_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_r0r1_t3_8x16b); + res_r0r1_t1_8x16b = _mm_add_epi16(res_r0r1_t1_8x16b, res_r0r1_t3_8x16b); //a0*c0+a1*c1+a2*c2+a3*c3+a4*a4+a5*c5 + 16; + //a1*c0+a2*c1+a2*c2+a3*c3+a5*a4+a6*c5 + 16; + //a2*c0+a3*c1+a4*c2+a5*c3+a6*a4+a7*c5 + 16; + //a3*c0+a4*c1+a5*c2+a6*c3+a6*a4+a8*c5 + 16; + //b0*c0+b1*c1+b2*c2+b3*c3+b4*b4+b5*c5 + 16; + //b1*c0+b2*c1+b2*c2+b3*c3+b5*b4+b6*c5 + 16; + //b2*c0+b3*c1+b4*c2+b5*c3+b6*b4+b7*c5 + 16; + //b3*c0+b4*c1+b5*c2+b6*c3+b6*b4+b8*c5 + 16; + + res_r0r1_t1_8x16b = _mm_srai_epi16(res_r0r1_t1_8x16b, 5); //shifting right by 5 bits. + + res_r0r1_16x8b = _mm_packus_epi16(res_r0r1_t1_8x16b, res_r0r1_t1_8x16b); + + _mm_maskmoveu_si128(res_r0r1_16x8b, mask_low32b, (char*)pu1_dst); + res_r0r1_16x8b = _mm_srli_si128(res_r0r1_16x8b, 4); + _mm_maskmoveu_si128(res_r0r1_16x8b, mask_low32b, (char*)(pu1_dst + dst_strd)); + + ht -= 2; + pu1_src += src_strd << 1; + pu1_dst += dst_strd << 1; + } + while(ht > 0); + } + else if(wd == 8) + { + __m128i src_r0_16x8b, src_r1_16x8b, src_r0_sht_16x8b, src_r1_sht_16x8b; + __m128i src_r0_t1_16x8b, src_r1_t1_16x8b; + + __m128i res_r0_t1_8x16b, res_r0_t2_8x16b, res_r0_t3_8x16b; + __m128i res_r1_t1_8x16b, res_r1_t2_8x16b, res_r1_t3_8x16b; + + //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9..... + //Row1 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9..... + + do + { + src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 + src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd)); //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 + + src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1); //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0 + src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1); //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0 + + src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 + src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 + + res_r0_t1_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff0_1_16x8b); //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1 + //a4*c0+a5*c1 a5*c0+a6*c1 a6*c0+a7*c1 a7*c0+a8*c1 + res_r1_t1_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff0_1_16x8b); //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1 + //b4*c0+b5*c1 b5*c0+b6*c1 b6*c0+b7*c1 b7*c0+b8*c1 + + src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a2 a3 a4 a5 a6 a7 a8 a9....a15 0 0 + src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b2 b3 b4 b5 b6 b7 b8 b9....b15 0 0 + + src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a3 a4 a5 a6 a7 a8 a9....a15 0 0 0 + src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b3 b4 b5 b6 b7 b8 b9....b15 0 0 0 + + src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 + src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 a8 a9 a9 a10 + + res_r0_t2_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff2_3_16x8b); //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3 + //a6*c2+a7*c3 a7*c2+a8*c3 a8*c2+a9*c3 a9*c2+a10*c3 + res_r1_t2_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff2_3_16x8b); //b2*c2+b3*c3 b3*c2+b4*c3 b2*c4+b5*c3 b5*c2+b6*c3 + //b6*c2+b7*c3 b7*c2+b8*c3 b8*c2+b9*c3 b9*c2+b10*c3 + + src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a4 a5 a6 a7 a8 a9....a15 0 0 0 0 + src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b4 b5 b6 b7 b8 b9....b15 0 0 0 0 + + src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a5 a6 a7 a8 a9....a15 0 0 0 0 0 + src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b5 b6 b7 b8 b9....b15 0 0 0 0 0 + + src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 a10 a11 a11 a12 + src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b4 b5 b5 b6 b6 b7 b7 b8 b8 b9 b9 b10 b10 b11 b11 b12 + + res_r0_t3_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff4_5_16x8b); //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5 + //a8*c4+a9*c5 a9*c4+a10*c5 a10*c4+a11*c5 a11*c4+a12*c5 + res_r1_t3_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff4_5_16x8b); //b4*c4+b5*c5 b5*c4+b6*c5 b6*c4+b7*c5 b7*c4+b8*c5 + //b8*c4+b9*c5 b9*c4+b10*c5 b10*c4+b11*c5 b11*c4+b12*c5 + res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t2_8x16b); + res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t2_8x16b); + res_r0_t3_8x16b = _mm_add_epi16(res_r0_t3_8x16b, const_val16_8x16b); + res_r1_t3_8x16b = _mm_add_epi16(res_r1_t3_8x16b, const_val16_8x16b); + res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t3_8x16b); + res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t3_8x16b); + + res_r0_t1_8x16b = _mm_srai_epi16(res_r0_t1_8x16b, 5); //shifting right by 5 bits. + res_r1_t1_8x16b = _mm_srai_epi16(res_r1_t1_8x16b, 5); + + src_r0_16x8b = _mm_packus_epi16(res_r0_t1_8x16b, res_r0_t1_8x16b); + src_r1_16x8b = _mm_packus_epi16(res_r1_t1_8x16b, res_r1_t1_8x16b); + + _mm_storel_epi64((__m128i *)pu1_dst, src_r0_16x8b); + _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), src_r1_16x8b); + + ht -= 2; + pu1_src += src_strd << 1; + pu1_dst += dst_strd << 1; + } + while(ht > 0); + } + else // wd == 16 + { + __m128i src_r0_16x8b, src_r1_16x8b, src_r0_sht_16x8b, src_r1_sht_16x8b; + __m128i src_r0_t1_16x8b, src_r1_t1_16x8b; + + __m128i res_r0_t1_8x16b, res_r0_t2_8x16b, res_r0_t3_8x16b; + __m128i res_r1_t1_8x16b, res_r1_t2_8x16b, res_r1_t3_8x16b; + + //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9..... + //Row0 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9..... + //b0 is same a8. Similarly other bn pixels are same as a(n+8) pixels. + + do + { + src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 + src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 8)); //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 + + src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1); //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0 + src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1); //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0 + + src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 + src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 + + res_r0_t1_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff0_1_16x8b); //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1 + //a4*c0+a5*c1 a5*c0+a6*c1 a6*c0+a7*c1 a7*c0+a8*c1 + res_r1_t1_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff0_1_16x8b); //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1 + //b4*c0+b5*c1 b5*c0+b6*c1 b6*c0+b7*c1 b7*c0+b8*c1 + + src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a2 a3 a4 a5 a6 a7 a8 a9....a15 0 0 + src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b2 b3 b4 b5 b6 b7 b8 b9....b15 0 0 + + src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a3 a4 a5 a6 a7 a8 a9....a15 0 0 0 + src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b3 b4 b5 b6 b7 b8 b9....b15 0 0 0 + + src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 + src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 a8 a9 a9 a10 + + res_r0_t2_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff2_3_16x8b); //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3 + //a6*c2+a7*c3 a7*c2+a8*c3 a8*c2+a9*c3 a9*c2+a10*c3 + res_r1_t2_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff2_3_16x8b); //b2*c2+b3*c3 b3*c2+b4*c3 b2*c4+b5*c3 b5*c2+b6*c3 + //b6*c2+b7*c3 b7*c2+b8*c3 b8*c2+b9*c3 b9*c2+b10*c3 + + src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a4 a5 a6 a7 a8 a9....a15 0 0 0 0 + src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b4 b5 b6 b7 b8 b9....b15 0 0 0 0 + + src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a5 a6 a7 a8 a9....a15 0 0 0 0 0 + src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b5 b6 b7 b8 b9....b15 0 0 0 0 0 + + src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 a10 a11 a11 a12 + src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b4 b5 b5 b6 b6 b7 b7 b8 b8 b9 b9 b10 b10 b11 b11 b12 + + res_r0_t3_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff4_5_16x8b); //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5 + //a8*c4+a9*c5 a9*c4+a10*c5 a10*c4+a11*c5 a11*c4+a12*c5 + res_r1_t3_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff4_5_16x8b); //b4*c4+b5*c5 b5*c4+b6*c5 b6*c4+b7*c5 b7*c4+b8*c5 + //b8*c4+b9*c5 b9*c4+b10*c5 b10*c4+b11*c5 b11*c4+b12*c5 + res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t2_8x16b); + res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t2_8x16b); + res_r0_t3_8x16b = _mm_add_epi16(res_r0_t3_8x16b, const_val16_8x16b); + res_r1_t3_8x16b = _mm_add_epi16(res_r1_t3_8x16b, const_val16_8x16b); + res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t3_8x16b); + res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t3_8x16b); + + res_r0_t1_8x16b = _mm_srai_epi16(res_r0_t1_8x16b, 5); //shifting right by 5 bits. + res_r1_t1_8x16b = _mm_srai_epi16(res_r1_t1_8x16b, 5); + + src_r0_16x8b = _mm_packus_epi16(res_r0_t1_8x16b, res_r1_t1_8x16b); + _mm_storeu_si128((__m128i *)pu1_dst, src_r0_16x8b); + + ht--; + pu1_src += src_strd; + pu1_dst += dst_strd; + } + while(ht > 0); + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_inter_pred_luma_vert_ssse3 */ +/* */ +/* Description : This function applies a vertical 6-tap filter on */ +/* ht x wd block as mentioned in sec. 8.4.2.2.1 titled */ +/* "Luma sample interpolation process". (ht,wd) can be */ +/* (4,4), (8,4), (4,8), (8,8), (16,8), (8,16) or (16,16). */ +/* */ +/* Inputs : puc_src - pointer to source */ +/* puc_dst - pointer to destination */ +/* src_strd - stride for source */ +/* dst_strd - stride for destination */ +/* ht - height of the block */ +/* wd - width of the block */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 13 02 2015 Kaushik Initial Version */ +/* Senthoor */ +/* */ +/*****************************************************************************/ +void ih264_inter_pred_luma_vert_ssse3(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ht, + WORD32 wd, + UWORD8* pu1_tmp, + WORD32 dydx) +{ + __m128i src_r0_16x8b, src_r1_16x8b, src_r2_16x8b, src_r3_16x8b, src_r4_16x8b; + __m128i src_r5_16x8b, src_r6_16x8b; + __m128i src_r0r1_16x8b, src_r2r3_16x8b, src_r4r5_16x8b; + + __m128i res_16x8b, res_t1_8x16b, res_t2_8x16b, res_t3_8x16b; + + __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b; + __m128i const_val16_8x16b; + + UNUSED(pu1_tmp); + UNUSED(dydx); + + coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01); //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 + coeff2_3_16x8b = _mm_set1_epi32(0x14141414); //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 + coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB); //c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 + //c0 = c5 = 1, c1 = c4 = -5, c2 = c3 = 20 + const_val16_8x16b = _mm_set1_epi16(16); + + pu1_src -= src_strd << 1; // the filter input starts from x[-2] (till x[3]) + + if(wd == 4) + { + __m128i mask_low32b; + + mask_low32b = _mm_set1_epi8(0xff); + + //Epilogue: Load all the pred rows except sixth and seventh row + // for the first and second row processing. + src_r0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src); + pu1_src += src_strd; + src_r1_16x8b = _mm_loadl_epi64((__m128i *)pu1_src); + pu1_src += src_strd; + src_r2_16x8b = _mm_loadl_epi64((__m128i *)pu1_src); + pu1_src += src_strd; + src_r3_16x8b = _mm_loadl_epi64((__m128i *)pu1_src); + pu1_src += src_strd; + src_r4_16x8b = _mm_loadl_epi64((__m128i *)pu1_src); + pu1_src += src_strd; + + mask_low32b = _mm_srli_si128(mask_low32b, 12); // mask for first four bytes + + src_r0_16x8b = _mm_unpacklo_epi32(src_r0_16x8b, src_r1_16x8b); + src_r1_16x8b = _mm_unpacklo_epi32(src_r1_16x8b, src_r2_16x8b); + src_r2_16x8b = _mm_unpacklo_epi32(src_r2_16x8b, src_r3_16x8b); + src_r3_16x8b = _mm_unpacklo_epi32(src_r3_16x8b, src_r4_16x8b); + + do + { + src_r5_16x8b = _mm_loadl_epi64((__m128i *)pu1_src); + src_r6_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd)); + + src_r4_16x8b = _mm_unpacklo_epi32(src_r4_16x8b, src_r5_16x8b); + src_r5_16x8b = _mm_unpacklo_epi32(src_r5_16x8b, src_r6_16x8b); + + src_r0r1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r1_16x8b); + src_r2r3_16x8b = _mm_unpacklo_epi8(src_r2_16x8b, src_r3_16x8b); + src_r4r5_16x8b = _mm_unpacklo_epi8(src_r4_16x8b, src_r5_16x8b); + + res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b); + res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b); + res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b); + + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b); + res_t3_8x16b = _mm_add_epi16(res_t3_8x16b, const_val16_8x16b); + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b); + + res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits. + res_16x8b = _mm_packus_epi16(res_t1_8x16b, res_t1_8x16b); + + _mm_maskmoveu_si128(res_16x8b, mask_low32b, (char*)pu1_dst); + res_16x8b = _mm_srli_si128(res_16x8b, 4); + _mm_maskmoveu_si128(res_16x8b, mask_low32b, (char*)(pu1_dst + dst_strd)); + + src_r0_16x8b = src_r2_16x8b; + src_r1_16x8b = src_r3_16x8b; + src_r2_16x8b = src_r4_16x8b; + src_r3_16x8b = src_r5_16x8b; + src_r4_16x8b = src_r6_16x8b; + + ht -= 2; + pu1_src += src_strd << 1; + pu1_dst += dst_strd << 1; + } + while(ht > 0); + } + + else if(wd == 8) + { + //Epilogue: Load all the pred rows except sixth and seventh row + // for the first and second row processing. + src_r0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src); + pu1_src += src_strd; + src_r1_16x8b = _mm_loadl_epi64((__m128i *)pu1_src); + pu1_src += src_strd; + src_r2_16x8b = _mm_loadl_epi64((__m128i *)pu1_src); + pu1_src += src_strd; + src_r3_16x8b = _mm_loadl_epi64((__m128i *)pu1_src); + pu1_src += src_strd; + src_r4_16x8b = _mm_loadl_epi64((__m128i *)pu1_src); + pu1_src += src_strd; + + src_r0_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b); + src_r1_16x8b = _mm_unpacklo_epi64(src_r1_16x8b, src_r2_16x8b); + src_r2_16x8b = _mm_unpacklo_epi64(src_r2_16x8b, src_r3_16x8b); + src_r3_16x8b = _mm_unpacklo_epi64(src_r3_16x8b, src_r4_16x8b); + + do + { + src_r5_16x8b = _mm_loadl_epi64((__m128i *)pu1_src); + src_r6_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd)); + + src_r4_16x8b = _mm_unpacklo_epi64(src_r4_16x8b, src_r5_16x8b); + src_r5_16x8b = _mm_unpacklo_epi64(src_r5_16x8b, src_r6_16x8b); + + src_r0r1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r1_16x8b); + src_r2r3_16x8b = _mm_unpacklo_epi8(src_r2_16x8b, src_r3_16x8b); + src_r4r5_16x8b = _mm_unpacklo_epi8(src_r4_16x8b, src_r5_16x8b); + + res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b); + res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b); + res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b); + + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b); + res_t3_8x16b = _mm_add_epi16(res_t3_8x16b, const_val16_8x16b); + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b); + + res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits. + res_16x8b = _mm_packus_epi16(res_t1_8x16b, res_t1_8x16b); + + _mm_storel_epi64((__m128i *)pu1_dst, res_16x8b); + + src_r0r1_16x8b = _mm_unpackhi_epi8(src_r0_16x8b, src_r1_16x8b); + src_r2r3_16x8b = _mm_unpackhi_epi8(src_r2_16x8b, src_r3_16x8b); + src_r4r5_16x8b = _mm_unpackhi_epi8(src_r4_16x8b, src_r5_16x8b); + + res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b); + res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b); + res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b); + + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b); + res_t3_8x16b = _mm_add_epi16(res_t3_8x16b, const_val16_8x16b); + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b); + + res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits. + res_16x8b = _mm_packus_epi16(res_t1_8x16b, res_t1_8x16b); + + _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), res_16x8b); + + src_r0_16x8b = src_r2_16x8b; + src_r1_16x8b = src_r3_16x8b; + src_r2_16x8b = src_r4_16x8b; + src_r3_16x8b = src_r5_16x8b; + src_r4_16x8b = src_r6_16x8b; + + ht -= 2; + pu1_src += src_strd << 1; + pu1_dst += dst_strd << 1; + } + while(ht > 0); + } + else // wd == 16 + { + __m128i res_t0_8x16b; + + //Epilogue: Load all the pred rows except sixth and seventh row + // for the first and second row processing. + src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); + pu1_src += src_strd; + src_r1_16x8b = _mm_loadu_si128((__m128i *)pu1_src); + pu1_src += src_strd; + src_r2_16x8b = _mm_loadu_si128((__m128i *)pu1_src); + pu1_src += src_strd; + src_r3_16x8b = _mm_loadu_si128((__m128i *)pu1_src); + pu1_src += src_strd; + src_r4_16x8b = _mm_loadu_si128((__m128i *)pu1_src); + pu1_src += src_strd; + + do + { + src_r5_16x8b = _mm_loadu_si128((__m128i *)pu1_src); + src_r6_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd)); + + src_r0r1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r1_16x8b); + src_r2r3_16x8b = _mm_unpacklo_epi8(src_r2_16x8b, src_r3_16x8b); + src_r4r5_16x8b = _mm_unpacklo_epi8(src_r4_16x8b, src_r5_16x8b); + + res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b); + res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b); + res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b); + + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b); + res_t3_8x16b = _mm_add_epi16(res_t3_8x16b, const_val16_8x16b); + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b); + res_t0_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits. + + src_r0r1_16x8b = _mm_unpackhi_epi8(src_r0_16x8b, src_r1_16x8b); + src_r2r3_16x8b = _mm_unpackhi_epi8(src_r2_16x8b, src_r3_16x8b); + src_r4r5_16x8b = _mm_unpackhi_epi8(src_r4_16x8b, src_r5_16x8b); + + res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b); + res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b); + res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b); + + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b); + res_t3_8x16b = _mm_add_epi16(res_t3_8x16b, const_val16_8x16b); + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b); + res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits. + + res_16x8b = _mm_packus_epi16(res_t0_8x16b, res_t1_8x16b); + + _mm_storeu_si128((__m128i *)pu1_dst, res_16x8b); + + src_r0r1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r2_16x8b); + src_r2r3_16x8b = _mm_unpacklo_epi8(src_r3_16x8b, src_r4_16x8b); + src_r4r5_16x8b = _mm_unpacklo_epi8(src_r5_16x8b, src_r6_16x8b); + + res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b); + res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b); + res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b); + + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b); + res_t3_8x16b = _mm_add_epi16(res_t3_8x16b, const_val16_8x16b); + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b); + res_t0_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits. + + src_r0r1_16x8b = _mm_unpackhi_epi8(src_r1_16x8b, src_r2_16x8b); + src_r2r3_16x8b = _mm_unpackhi_epi8(src_r3_16x8b, src_r4_16x8b); + src_r4r5_16x8b = _mm_unpackhi_epi8(src_r5_16x8b, src_r6_16x8b); + + res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b); + res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b); + res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b); + + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b); + res_t3_8x16b = _mm_add_epi16(res_t3_8x16b, const_val16_8x16b); + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b); + res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits. + + res_16x8b = _mm_packus_epi16(res_t0_8x16b, res_t1_8x16b); + + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res_16x8b); + + src_r0_16x8b = src_r2_16x8b; + src_r1_16x8b = src_r3_16x8b; + src_r2_16x8b = src_r4_16x8b; + src_r3_16x8b = src_r5_16x8b; + src_r4_16x8b = src_r6_16x8b; + + ht -= 2; + pu1_src += src_strd << 1; + pu1_dst += dst_strd << 1; + } + while(ht > 0); + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_inter_pred_luma_horz_hpel_vert_hpel_ssse3 */ +/* */ +/* Description : This function implements a two stage cascaded six tap */ +/* filter, horizontally and then vertically on ht x wd */ +/* block as mentioned in sec. 8.4.2.2.1 titled "Luma sample */ +/* interpolation process". (ht,wd) can be (4,4), (8,4), */ +/* (4,8), (8,8), (16,8), (8,16) or (16,16). */ +/* */ +/* Inputs : puc_src - pointer to source */ +/* puc_dst - pointer to destination */ +/* src_strd - stride for source */ +/* dst_strd - stride for destination */ +/* ht - height of the block */ +/* wd - width of the block */ +/* pu1_tmp - pointer to temporary buffer */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 13 02 2015 Kaushik Initial Version */ +/* Senthoor */ +/* */ +/*****************************************************************************/ +void ih264_inter_pred_luma_horz_hpel_vert_hpel_ssse3(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ht, + WORD32 wd, + UWORD8* pu1_tmp, + WORD32 dydx) +{ + UNUSED(dydx); + + if(wd == 4) + { + WORD16 *pi2_temp; + + pu1_tmp += 4; + pu1_src -= src_strd << 1; + pi2_temp = (WORD16 *)pu1_tmp; + pu1_src -= 2; // the filter input starts from x[-2] (till x[3]) + + // Horizontal 6-tap filtering + { + WORD32 ht_tmp = ht + 4; + + __m128i src_r0_16x8b, src_r1_16x8b; + __m128i src_r0_sht_16x8b, src_r1_sht_16x8b; + __m128i src_r0r1_t1_16x8b; + __m128i res_r0r1_t1_8x16b, res_r0r1_t2_8x16b, res_r0r1_t3_8x16b; + __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b; + + coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01); //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 + coeff2_3_16x8b = _mm_set1_epi32(0x14141414); //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 + coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB); //c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5 + //c0 = c5 = 1, c1 = c4 = -5, c2 = c3 = 20 + //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9..... + //Row1 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9..... + + do + { + src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 + src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd)); //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 + + src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1); //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0 + src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1); //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0 + + src_r0_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 + src_r1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 + + src_r0r1_t1_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 b0 b1 b1 b2 b2 b3 b3 b4 + res_r0r1_t1_8x16b = _mm_maddubs_epi16(src_r0r1_t1_16x8b, coeff0_1_16x8b); //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1 + //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1 + + src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 4); //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 0 0 0 0 + src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 4); //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 0 0 0 0 + + src_r0r1_t1_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b); //a2 a3 a3 a4 a4 a5 a5 a6 b2 b3 b3 b4 b4 b5 b5 b6 + res_r0r1_t2_8x16b = _mm_maddubs_epi16(src_r0r1_t1_16x8b, coeff2_3_16x8b); //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3 + //b2*c2+b3*c3 b3*c2+b4*c3 b4*c2+b5*c3 b5*c2+b6*c3 + + src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 4); //a4 a5 a5 a6 a6 a7 a7 a8 0 0 0 0 0 0 0 0 + src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 4); //b4 b5 b5 b6 b6 b7 b7 b8 0 0 0 0 0 0 0 0 + + src_r0r1_t1_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b); //a4 a5 a5 a6 a6 a7 a7 a8 b4 b5 b5 b6 b6 b7 b7 b8 + res_r0r1_t3_8x16b = _mm_maddubs_epi16(src_r0r1_t1_16x8b, coeff4_5_16x8b); //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5 + //b4*c4+b5*c5 b5*c4+b6*c5 b4*c6+b7*c5 b7*c4+b8*c5 + res_r0r1_t1_8x16b = _mm_add_epi16(res_r0r1_t1_8x16b, res_r0r1_t2_8x16b); + res_r0r1_t1_8x16b = _mm_add_epi16(res_r0r1_t3_8x16b, res_r0r1_t1_8x16b); + + _mm_storeu_si128((__m128i *)pi2_temp, res_r0r1_t1_8x16b); + + ht_tmp -= 2; + pu1_src += src_strd << 1; + pi2_temp += 8; + } + while(ht_tmp > 0); + + src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 + src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1); //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0 + + src_r0_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 + res_r0r1_t1_8x16b = _mm_maddubs_epi16(src_r0_16x8b, coeff0_1_16x8b); //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1 + + src_r0_16x8b = _mm_srli_si128(src_r0_16x8b,4); //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 0 0 0 0 + res_r0r1_t2_8x16b = _mm_maddubs_epi16(src_r0_16x8b, coeff2_3_16x8b); //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3 + + src_r0_16x8b = _mm_srli_si128(src_r0_16x8b,4); //a4 a5 a5 a6 a6 a7 a7 a8 0 0 0 0 0 0 0 0 + res_r0r1_t3_8x16b = _mm_maddubs_epi16(src_r0_16x8b, coeff4_5_16x8b); //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5 + + res_r0r1_t1_8x16b = _mm_add_epi16(res_r0r1_t1_8x16b, res_r0r1_t2_8x16b); + res_r0r1_t1_8x16b = _mm_add_epi16(res_r0r1_t3_8x16b, res_r0r1_t1_8x16b); + + _mm_storel_epi64((__m128i *)pi2_temp, res_r0r1_t1_8x16b); + } + + pi2_temp = (WORD16 *)pu1_tmp; + + // Vertical 6-tap filtering + { + __m128i src_r0_8x16b, src_r1_8x16b, src_r2_8x16b, src_r3_8x16b, + src_r4_8x16b; + __m128i src_r5_8x16b, src_r6_8x16b; + __m128i src_t1_8x16b, src_t2_8x16b; + + __m128i res_t0_4x32b, res_t1_4x32b, res_t2_4x32b, res_t3_4x32b; + __m128i res_8x16b, res_16x8b; + + __m128i coeff0_1_8x16b, coeff2_3_8x16b, coeff4_5_8x16b; + __m128i const_val512_4x32b, mask_low32b; + + mask_low32b = _mm_set1_epi8(0xff); + + coeff0_1_8x16b = _mm_set1_epi32(0xFFFB0001); + coeff2_3_8x16b = _mm_set1_epi32(0x00140014); + coeff4_5_8x16b = _mm_set1_epi32(0x0001FFFB); + + mask_low32b = _mm_srli_si128(mask_low32b, 12); + const_val512_4x32b = _mm_set1_epi32(512); + + src_r0_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp)); + src_r1_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp + 4)); + src_r2_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp + 8)); + src_r3_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp + 12)); + src_r4_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp + 16)); + pi2_temp += 20; + + do + { + src_r5_8x16b = _mm_loadl_epi64((__m128i *)pi2_temp); + src_r6_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp + 4)); + + src_r0_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b); + src_t1_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b); + src_t2_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b); + + res_t1_4x32b = _mm_madd_epi16(src_r0_8x16b, coeff0_1_8x16b); + res_t2_4x32b = _mm_madd_epi16(src_t1_8x16b, coeff2_3_8x16b); + res_t3_4x32b = _mm_madd_epi16(src_t2_8x16b, coeff4_5_8x16b); + + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b); + res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b); + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b); + + res_t0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10); + + src_r1_8x16b = _mm_unpacklo_epi16(src_r1_8x16b, src_r2_8x16b); + src_t1_8x16b = _mm_unpacklo_epi16(src_r3_8x16b, src_r4_8x16b); + src_t2_8x16b = _mm_unpacklo_epi16(src_r5_8x16b, src_r6_8x16b); + + res_t1_4x32b = _mm_madd_epi16(src_r1_8x16b, coeff0_1_8x16b); + res_t2_4x32b = _mm_madd_epi16(src_t1_8x16b, coeff2_3_8x16b); + res_t3_4x32b = _mm_madd_epi16(src_t2_8x16b, coeff4_5_8x16b); + + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b); + res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b); + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b); + + res_t1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10); + + res_8x16b = _mm_packs_epi32(res_t0_4x32b, res_t1_4x32b); + res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b); + + _mm_maskmoveu_si128(res_16x8b, mask_low32b, (char*)pu1_dst); + res_16x8b = _mm_srli_si128(res_16x8b, 4); + _mm_maskmoveu_si128(res_16x8b, mask_low32b, (char*)(pu1_dst + dst_strd)); + + src_r0_8x16b = src_r2_8x16b; + src_r1_8x16b = src_r3_8x16b; + src_r2_8x16b = src_r4_8x16b; + src_r3_8x16b = src_r5_8x16b; + src_r4_8x16b = src_r6_8x16b; + + ht -= 2; + pi2_temp += 8; + pu1_dst += dst_strd << 1; + } + while(ht > 0); + } + } + else if(wd == 8) + { + WORD16 *pi2_temp; + + pu1_tmp += 4; + pu1_src -= src_strd << 1; + pi2_temp = (WORD16 *)pu1_tmp; + pu1_src -= 2; // the filter input starts from x[-2] (till x[3]) + + // Horizontal 6-tap filtering + { + WORD32 ht_tmp = ht + 4; + + __m128i src_r0_16x8b, src_r1_16x8b; + __m128i src_r0_sht_16x8b, src_r1_sht_16x8b; + __m128i src_r0_t1_16x8b, src_r1_t1_16x8b; + __m128i res_r0_t1_8x16b, res_r0_t2_8x16b, res_r0_t3_8x16b; + __m128i res_r1_t1_8x16b, res_r1_t2_8x16b, res_r1_t3_8x16b; + __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b; + + coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01); //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 + coeff2_3_16x8b = _mm_set1_epi32(0x14141414); //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 + coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB); //c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5 + //c0 = c5 = 1, c1 = c4 = -5, c2 = c3 = 20 + //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9..... + //Row1 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9..... + + do + { + src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12 a13 a14 a15 + src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd)); //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 b10 b11 b12 b13 b14 b15 + + src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1); //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0 + src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1); //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0 + + src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 + src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 + + res_r0_t1_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff0_1_16x8b); //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1 + //a4*c0+a5*c1 a5*c0+a6*c1 a6*c0+a7*c1 a7*c0+a8*c1 + res_r1_t1_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff0_1_16x8b); //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1 + //b4*c0+b5*c1 b5*c0+b6*c1 b6*c0+b7*c1 b7*c0+b8*c1 + + src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a2 a3 a4 a5 a6 a7 a8 a9....a15 0 0 + src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b2 b3 b4 b5 b6 b7 b8 b9....b15 0 0 + + src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a3 a4 a5 a6 a7 a8 a9....a15 0 0 0 + src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b3 b4 b5 b6 b7 b8 b9....b15 0 0 0 + + src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 + src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 a8 a9 a9 a10 + + res_r0_t2_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff2_3_16x8b); //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3 + //a6*c2+a7*c3 a7*c2+a8*c3 a8*c2+a9*c3 a9*c2+a10*c3 + res_r1_t2_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff2_3_16x8b); //b2*c2+b3*c3 b3*c2+b4*c3 b2*c4+b5*c3 b5*c2+b6*c3 + //b6*c2+b7*c3 b7*c2+b8*c3 b8*c2+b9*c3 b9*c2+b10*c3 + + src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a4 a5 a6 a7 a8 a9....a15 0 0 0 0 + src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b4 b5 b6 b7 b8 b9....b15 0 0 0 0 + + src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a5 a6 a7 a8 a9....a15 0 0 0 0 0 + src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b5 b6 b7 b8 b9....b15 0 0 0 0 0 + + src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 a10 a11 a11 a12 + src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b4 b5 b5 b6 b6 b7 b7 b8 b8 b9 b9 b10 b10 b11 b11 b12 + + res_r0_t3_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff4_5_16x8b); //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5 + //a8*c4+a9*c5 a9*c4+a10*c5 a10*c4+a11*c5 a11*c4+a12*c5 + res_r1_t3_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff4_5_16x8b); //b4*c4+b5*c5 b5*c4+b6*c5 b6*c4+b7*c5 b7*c4+b8*c5 + //b8*c4+b9*c5 b9*c4+b10*c5 b10*c4+b11*c5 b11*c4+b12*c5 + res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t2_8x16b); + res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t3_8x16b); + + res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t2_8x16b); + res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t3_8x16b); + + _mm_storeu_si128((__m128i *)pi2_temp, res_r0_t1_8x16b); + _mm_storeu_si128((__m128i *)(pi2_temp + 8), res_r1_t1_8x16b); + + ht_tmp -= 2; + pu1_src += src_strd << 1; + pi2_temp += 16; + } + while(ht_tmp > 0); + + src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12 a13 a14 a15 + src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1); //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0 + + src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b,src_r0_sht_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 + res_r0_t1_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b,coeff0_1_16x8b); //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1 + //a4*c0+a5*c1 a5*c0+a6*c1 a6*c0+a7*c1 a7*c0+a8*c1 + + src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a2 a3 a4 a5 a6 a7 a8 a9....a15 0 0 + src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a3 a4 a5 a6 a7 a8 a9....a15 0 0 0 + + src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 + res_r0_t2_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff2_3_16x8b); //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3 + //a6*c2+a7*c3 a7*c2+a8*c3 a8*c2+a9*c3 a9*c2+a10*c3 + + src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a4 a5 a6 a7 a8 a9....a15 0 0 0 0 + src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a5 a6 a7 a8 a9....a15 0 0 0 0 0 + + src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 a10 a11 a11 a12 + res_r0_t3_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff4_5_16x8b); //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5 + //a8*c4+a9*c5 a9*c4+a10*c5 a10*c4+a11*c5 a11*c4+a12*c5 + res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t2_8x16b); + res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t3_8x16b); + + _mm_storeu_si128((__m128i *)pi2_temp, res_r0_t1_8x16b); + } + + pi2_temp = (WORD16 *)pu1_tmp; + + // Vertical 6-tap filtering + { + __m128i src_r0_8x16b, src_r1_8x16b, src_r2_8x16b, src_r3_8x16b, + src_r4_8x16b; + __m128i src_r5_8x16b, src_r6_8x16b; + __m128i src_r0r1_8x16b, src_r2r3_8x16b, src_r4r5_8x16b; + + __m128i res_t1_4x32b, res_t2_4x32b, res_t3_4x32b; + __m128i res_c0_4x32b, res_c1_4x32b; + __m128i res_8x16b, res_16x8b; + + __m128i coeff0_1_8x16b, coeff2_3_8x16b, coeff4_5_8x16b; + __m128i const_val512_4x32b; + + coeff0_1_8x16b = _mm_set1_epi32(0xFFFB0001); + coeff2_3_8x16b = _mm_set1_epi32(0x00140014); + coeff4_5_8x16b = _mm_set1_epi32(0x0001FFFB); + + const_val512_4x32b = _mm_set1_epi32(512); + + src_r0_8x16b = _mm_loadu_si128((__m128i *)pi2_temp); + src_r1_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp + 8)); + src_r2_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp + 16)); + src_r3_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp + 24)); + src_r4_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp + 32)); + pi2_temp += 40; + + do + { + src_r5_8x16b = _mm_loadu_si128((__m128i *)pi2_temp); + src_r6_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp + 8)); + + src_r0r1_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b); + src_r2r3_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b); + src_r4r5_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b); + + res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b); + res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b); + res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b); + + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b); + res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b); + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b); + res_c0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10); + + src_r0r1_8x16b = _mm_unpackhi_epi16(src_r0_8x16b, src_r1_8x16b); + src_r2r3_8x16b = _mm_unpackhi_epi16(src_r2_8x16b, src_r3_8x16b); + src_r4r5_8x16b = _mm_unpackhi_epi16(src_r4_8x16b, src_r5_8x16b); + + res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b); + res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b); + res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b); + + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b); + res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b); + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b); + res_c1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10); + + res_8x16b = _mm_packs_epi32(res_c0_4x32b, res_c1_4x32b); + res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b); + + _mm_storel_epi64((__m128i *)pu1_dst, res_16x8b); + + src_r0r1_8x16b = _mm_unpacklo_epi16(src_r1_8x16b, src_r2_8x16b); + src_r2r3_8x16b = _mm_unpacklo_epi16(src_r3_8x16b, src_r4_8x16b); + src_r4r5_8x16b = _mm_unpacklo_epi16(src_r5_8x16b, src_r6_8x16b); + + res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b); + res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b); + res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b); + + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b); + res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b); + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b); + res_c0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10); + + src_r0r1_8x16b = _mm_unpackhi_epi16(src_r1_8x16b, src_r2_8x16b); + src_r2r3_8x16b = _mm_unpackhi_epi16(src_r3_8x16b, src_r4_8x16b); + src_r4r5_8x16b = _mm_unpackhi_epi16(src_r5_8x16b, src_r6_8x16b); + + res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b); + res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b); + res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b); + + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b); + res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b); + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b); + res_c1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10); + + res_8x16b = _mm_packs_epi32(res_c0_4x32b, res_c1_4x32b); + res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b); + + _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), res_16x8b); + + src_r0_8x16b = src_r2_8x16b; + src_r1_8x16b = src_r3_8x16b; + src_r2_8x16b = src_r4_8x16b; + src_r3_8x16b = src_r5_8x16b; + src_r4_8x16b = src_r6_8x16b; + + ht -= 2; + pi2_temp += 16; + pu1_dst += dst_strd << 1; + } + while(ht > 0); + } + } + else // wd == 16 + { + WORD16 *pi2_temp; + WORD32 ht_tmp; + + pu1_tmp += 4; + pu1_src -= src_strd << 1; + pi2_temp = (WORD16 *)pu1_tmp; + pu1_src -= 2; // the filter input starts from x[-2] (till x[3]) + + // Horizontal 6-tap filtering + { + ht_tmp = ht + 5; + + __m128i src_r0_16x8b, src_r1_16x8b, src_r0_sht_16x8b, src_r1_sht_16x8b; + __m128i src_r0_t1_16x8b, src_r1_t1_16x8b; + + __m128i res_r0_t1_8x16b, res_r0_t2_8x16b, res_r0_t3_8x16b; + __m128i res_r1_t1_8x16b, res_r1_t2_8x16b, res_r1_t3_8x16b; + + __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b; + + coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01); //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 + coeff2_3_16x8b = _mm_set1_epi32(0x14141414); //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 + coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB); //c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5 + //c0 = c5 = 1, c1 = c4 = -5, c2 = c3 = 20 + //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9..... + //Row0 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9..... + //b0 is same a8. Similarly other bn pixels are same as a(n+8) pixels. + + do + { + src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 + src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 8)); //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 + + src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1); //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0 + src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1); //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0 + + src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 + src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 + + res_r0_t1_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff0_1_16x8b); //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1 + //a4*c0+a5*c1 a5*c0+a6*c1 a6*c0+a7*c1 a7*c0+a8*c1 + res_r1_t1_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff0_1_16x8b); //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1 + //b4*c0+b5*c1 b5*c0+b6*c1 b6*c0+b7*c1 b7*c0+b8*c1 + + src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a2 a3 a4 a5 a6 a7 a8 a9....a15 0 0 + src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b2 b3 b4 b5 b6 b7 b8 b9....b15 0 0 + + src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a3 a4 a5 a6 a7 a8 a9....a15 0 0 0 + src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b3 b4 b5 b6 b7 b8 b9....b15 0 0 0 + + src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 + src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 a8 a9 a9 a10 + + res_r0_t2_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff2_3_16x8b); //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3 + //a6*c2+a7*c3 a7*c2+a8*c3 a8*c2+a9*c3 a9*c2+a10*c3 + res_r1_t2_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff2_3_16x8b); //b2*c2+b3*c3 b3*c2+b4*c3 b2*c4+b5*c3 b5*c2+b6*c3 + //b6*c2+b7*c3 b7*c2+b8*c3 b8*c2+b9*c3 b9*c2+b10*c3 + + src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a4 a5 a6 a7 a8 a9....a15 0 0 0 0 + src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b4 b5 b6 b7 b8 b9....b15 0 0 0 0 + + src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a5 a6 a7 a8 a9....a15 0 0 0 0 0 + src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b5 b6 b7 b8 b9....b15 0 0 0 0 0 + + src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 a10 a11 a11 a12 + src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b4 b5 b5 b6 b6 b7 b7 b8 b8 b9 b9 b10 b10 b11 b11 b12 + + res_r0_t3_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff4_5_16x8b); //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5 + //a8*c4+a9*c5 a9*c4+a10*c5 a10*c4+a11*c5 a11*c4+a12*c5 + res_r1_t3_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff4_5_16x8b); //b4*c4+b5*c5 b5*c4+b6*c5 b6*c4+b7*c5 b7*c4+b8*c5 + //b8*c4+b9*c5 b9*c4+b10*c5 b10*c4+b11*c5 b11*c4+b12*c5 + res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t2_8x16b); + res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t3_8x16b); + + res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t2_8x16b); + res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t3_8x16b); + + _mm_storeu_si128((__m128i *)pi2_temp, res_r0_t1_8x16b); + _mm_storeu_si128((__m128i *)(pi2_temp + 8), res_r1_t1_8x16b); + + ht_tmp--; + pu1_src += src_strd; + pi2_temp += 16; + } + while(ht_tmp > 0); + } + + pi2_temp = (WORD16 *)pu1_tmp; + + // Vertical 6-tap filtering + { + WORD16 *pi2_temp2; + UWORD8 *pu1_dst2; + WORD32 ht_tmp; + + __m128i src_r0_8x16b, src_r1_8x16b, src_r2_8x16b, src_r3_8x16b, src_r4_8x16b; + __m128i src_r5_8x16b, src_r6_8x16b; + __m128i src_r0r1_8x16b, src_r2r3_8x16b, src_r4r5_8x16b; + + __m128i res_t1_4x32b, res_t2_4x32b, res_t3_4x32b; + __m128i res_c0_4x32b, res_c1_4x32b; + __m128i res_8x16b, res_16x8b; + + __m128i coeff0_1_8x16b, coeff2_3_8x16b, coeff4_5_8x16b; + __m128i const_val512_4x32b; + + coeff0_1_8x16b = _mm_set1_epi32(0xFFFB0001); + coeff2_3_8x16b = _mm_set1_epi32(0x00140014); + coeff4_5_8x16b = _mm_set1_epi32(0x0001FFFB); + + const_val512_4x32b = _mm_set1_epi32(512); + + pi2_temp2 = pi2_temp + 8; + pu1_dst2 = pu1_dst + 8; + ht_tmp = ht; + + /**********************************************************/ + /* Do first height x 8 block */ + /**********************************************************/ + src_r0_8x16b = _mm_loadu_si128((__m128i *)pi2_temp); + src_r1_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp + 16)); + src_r2_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp + 32)); + src_r3_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp + 48)); + src_r4_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp + 64)); + pi2_temp += 80; + + do + { + src_r5_8x16b = _mm_loadu_si128((__m128i *)pi2_temp); + src_r6_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp + 16)); + + src_r0r1_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b); + src_r2r3_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b); + src_r4r5_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b); + + res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b); + res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b); + res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b); + + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b); + res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b); + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b); + res_c0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10); + + src_r0r1_8x16b = _mm_unpackhi_epi16(src_r0_8x16b, src_r1_8x16b); + src_r2r3_8x16b = _mm_unpackhi_epi16(src_r2_8x16b, src_r3_8x16b); + src_r4r5_8x16b = _mm_unpackhi_epi16(src_r4_8x16b, src_r5_8x16b); + + res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b); + res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b); + res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b); + + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b); + res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b); + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b); + res_c1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10); + + res_8x16b = _mm_packs_epi32(res_c0_4x32b, res_c1_4x32b); + res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b); + + _mm_storel_epi64((__m128i *)pu1_dst, res_16x8b); + + src_r0r1_8x16b = _mm_unpacklo_epi16(src_r1_8x16b, src_r2_8x16b); + src_r2r3_8x16b = _mm_unpacklo_epi16(src_r3_8x16b, src_r4_8x16b); + src_r4r5_8x16b = _mm_unpacklo_epi16(src_r5_8x16b, src_r6_8x16b); + + res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b); + res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b); + res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b); + + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b); + res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b); + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b); + res_c0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10); + + src_r0r1_8x16b = _mm_unpackhi_epi16(src_r1_8x16b, src_r2_8x16b); + src_r2r3_8x16b = _mm_unpackhi_epi16(src_r3_8x16b, src_r4_8x16b); + src_r4r5_8x16b = _mm_unpackhi_epi16(src_r5_8x16b, src_r6_8x16b); + + res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b); + res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b); + res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b); + + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b); + res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b); + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b); + res_c1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10); + + res_8x16b = _mm_packs_epi32(res_c0_4x32b, res_c1_4x32b); + res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b); + + _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), res_16x8b); + + src_r0_8x16b = src_r2_8x16b; + src_r1_8x16b = src_r3_8x16b; + src_r2_8x16b = src_r4_8x16b; + src_r3_8x16b = src_r5_8x16b; + src_r4_8x16b = src_r6_8x16b; + + ht_tmp -= 2; + pi2_temp += 32; + pu1_dst += dst_strd << 1; + } + while(ht_tmp > 0); + + /**********************************************************/ + /* Do second ht x 8 block */ + /**********************************************************/ + src_r0_8x16b = _mm_loadu_si128((__m128i *)pi2_temp2); + src_r1_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 16)); + src_r2_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 32)); + src_r3_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 48)); + src_r4_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 64)); + pi2_temp2 += 80; + + do + { + src_r5_8x16b = _mm_loadu_si128((__m128i *)pi2_temp2); + src_r6_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 16)); + + src_r0r1_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b); + src_r2r3_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b); + src_r4r5_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b); + + res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b); + res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b); + res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b); + + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b); + res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b); + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b); + res_c0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10); + + src_r0r1_8x16b = _mm_unpackhi_epi16(src_r0_8x16b, src_r1_8x16b); + src_r2r3_8x16b = _mm_unpackhi_epi16(src_r2_8x16b, src_r3_8x16b); + src_r4r5_8x16b = _mm_unpackhi_epi16(src_r4_8x16b, src_r5_8x16b); + + res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b); + res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b); + res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b); + + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b); + res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b); + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b); + res_c1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10); + + res_8x16b = _mm_packs_epi32(res_c0_4x32b, res_c1_4x32b); + res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b); + + _mm_storel_epi64((__m128i *)pu1_dst2, res_16x8b); + + src_r0r1_8x16b = _mm_unpacklo_epi16(src_r1_8x16b, src_r2_8x16b); + src_r2r3_8x16b = _mm_unpacklo_epi16(src_r3_8x16b, src_r4_8x16b); + src_r4r5_8x16b = _mm_unpacklo_epi16(src_r5_8x16b, src_r6_8x16b); + + res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b); + res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b); + res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b); + + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b); + res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b); + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b); + res_c0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10); + + src_r0r1_8x16b = _mm_unpackhi_epi16(src_r1_8x16b, src_r2_8x16b); + src_r2r3_8x16b = _mm_unpackhi_epi16(src_r3_8x16b, src_r4_8x16b); + src_r4r5_8x16b = _mm_unpackhi_epi16(src_r5_8x16b, src_r6_8x16b); + + res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b); + res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b); + res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b); + + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b); + res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b); + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b); + res_c1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10); + + res_8x16b = _mm_packs_epi32(res_c0_4x32b, res_c1_4x32b); + res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b); + + _mm_storel_epi64((__m128i *)(pu1_dst2 + dst_strd), res_16x8b); + + src_r0_8x16b = src_r2_8x16b; + src_r1_8x16b = src_r3_8x16b; + src_r2_8x16b = src_r4_8x16b; + src_r3_8x16b = src_r5_8x16b; + src_r4_8x16b = src_r6_8x16b; + + ht -= 2; + pi2_temp2 += 32; + pu1_dst2 += dst_strd << 1; + } + while(ht > 0); + } + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_inter_pred_luma_horz_qpel_ssse3 */ +/* */ +/* Description : This function implements a six-tap filter horizontally */ +/* on ht x wd block and averages the values with the source */ +/* pixels to calculate horizontal quarter-pel as mentioned */ +/* in sec. 8.4.2.2.1 titled "Luma sample interpolation */ +/* process". (ht,wd) can be (4,4), (8,4), (4,8), (8,8), */ +/* (16,8), (8,16) or (16,16). */ +/* */ +/* Inputs : puc_src - pointer to source */ +/* puc_dst - pointer to destination */ +/* src_strd - stride for source */ +/* dst_strd - stride for destination */ +/* ht - height of the block */ +/* wd - width of the block */ +/* pu1_tmp - pointer to temporary buffer */ +/* dydx - x and y reference offset for q-pel */ +/* calculations */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 13 02 2015 Kaushik Initial Version */ +/* Senthoor */ +/* */ +/*****************************************************************************/ +void ih264_inter_pred_luma_horz_qpel_ssse3(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ht, + WORD32 wd, + UWORD8* pu1_tmp, + WORD32 dydx) +{ + WORD32 x_offset; + UWORD8 *pu1_pred1; + + __m128i src_r0_16x8b, src_r1_16x8b; + __m128i src_r0_sht_16x8b, src_r1_sht_16x8b; + __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b; + __m128i const_val16_8x16b; + + UNUSED(pu1_tmp); + + x_offset = dydx & 3; + + coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01); //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 + coeff2_3_16x8b = _mm_set1_epi32(0x14141414); //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 + coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB); //c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 + //c0 = c5 = 1, c1 = c4 = -5, c2 = c3 = 20 + pu1_pred1 = pu1_src + (x_offset >> 1); + + const_val16_8x16b = _mm_set1_epi16(16); + + pu1_src -= 2; // the filter input starts from x[-2] (till x[3]) + + if(wd == 4) + { + __m128i src_r0r1_16x8b; + + __m128i res_r0r1_t1_8x16b, res_r0r1_t2_8x16b, res_r0r1_t3_8x16b; + __m128i res_r0r1_16x8b; + + __m128i mask_full_16x8b, mask_low32b; + + mask_full_16x8b = _mm_set1_epi8(0xff); + mask_low32b = _mm_srli_si128(mask_full_16x8b, 12); // mask for first four bytes + + //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9..... + //Row1 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9..... + + do + { + src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 + src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd)); //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 + + src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1); //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0 + src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1); //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0 + + src_r0_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 + src_r1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 + + src_r0r1_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 b0 b1 b1 b2 b2 b3 b3 b4 + res_r0r1_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b); //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1 + //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1 + + src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 4); //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 0 0 0 0 + src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 4); //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 0 0 0 0 + + src_r0r1_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b); //a2 a3 a3 a4 a4 a5 a5 a6 b2 b3 b3 b4 b4 b5 b5 b6 + res_r0r1_t2_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff2_3_16x8b); //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3 + //b2*c2+b3*c3 b3*c2+b4*c3 b4*c2+b5*c3 b5*c2+b6*c3 + + src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 4); //a4 a5 a5 a6 a6 a7 a7 a8 0 0 0 0 0 0 0 0 + src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 4); //b4 b5 b5 b6 b6 b7 b7 b8 0 0 0 0 0 0 0 0 + + src_r0r1_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b); //a4 a5 a5 a6 a6 a7 a7 a8 b4 b5 b5 b6 b6 b7 b7 b8 + res_r0r1_t3_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff4_5_16x8b); //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5 + //b4*c4+b5*c5 b5*c4+b6*c5 b4*c6+b7*c5 b7*c4+b8*c5 + src_r0_16x8b = _mm_loadl_epi64((__m128i *)pu1_pred1); + src_r1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred1 + src_strd)); + + res_r0r1_t1_8x16b = _mm_add_epi16(res_r0r1_t1_8x16b, res_r0r1_t2_8x16b); + res_r0r1_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_r0r1_t3_8x16b); + res_r0r1_t1_8x16b = _mm_add_epi16(res_r0r1_t1_8x16b, res_r0r1_t3_8x16b); //a0*c0+a1*c1+a2*c2+a3*c3+a4*a4+a5*c5 + 16; + //a1*c0+a2*c1+a2*c2+a3*c3+a5*a4+a6*c5 + 16; + //a2*c0+a3*c1+a4*c2+a5*c3+a6*a4+a7*c5 + 16; + //a3*c0+a4*c1+a5*c2+a6*c3+a6*a4+a8*c5 + 16; + //b0*c0+b1*c1+b2*c2+b3*c3+b4*b4+b5*c5 + 16; + //b1*c0+b2*c1+b2*c2+b3*c3+b5*b4+b6*c5 + 16; + //b2*c0+b3*c1+b4*c2+b5*c3+b6*b4+b7*c5 + 16; + //b3*c0+b4*c1+b5*c2+b6*c3+b6*b4+b8*c5 + 16; + src_r0r1_16x8b = _mm_unpacklo_epi32(src_r0_16x8b,src_r1_16x8b); + + res_r0r1_t1_8x16b = _mm_srai_epi16(res_r0r1_t1_8x16b, 5); //shifting right by 5 bits. + + res_r0r1_16x8b = _mm_packus_epi16(res_r0r1_t1_8x16b, res_r0r1_t1_8x16b); + res_r0r1_16x8b = _mm_avg_epu8(src_r0r1_16x8b, res_r0r1_16x8b); //computing q-pel + + _mm_maskmoveu_si128(res_r0r1_16x8b, mask_low32b, (char*)pu1_dst); + res_r0r1_16x8b = _mm_srli_si128(res_r0r1_16x8b, 4); + _mm_maskmoveu_si128(res_r0r1_16x8b, mask_low32b, (char*)(pu1_dst + dst_strd)); + + ht -= 2; + pu1_src += src_strd << 1; + pu1_pred1 += src_strd << 1; + pu1_dst += dst_strd << 1; + } + while(ht > 0); + } + else if(wd == 8) + { + __m128i src_r0_t1_16x8b, src_r1_t1_16x8b; + + __m128i res_r0_t1_8x16b, res_r0_t2_8x16b, res_r0_t3_8x16b; + __m128i res_r1_t1_8x16b, res_r1_t2_8x16b, res_r1_t3_8x16b; + __m128i res_r0_16x8b, res_r1_16x8b; + + //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9..... + //Row1 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9..... + + do + { + src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 + src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd)); //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 + + src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1); //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0 + src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1); //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0 + + src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 + src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 + + res_r0_t1_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff0_1_16x8b); //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1 + //a4*c0+a5*c1 a5*c0+a6*c1 a6*c0+a7*c1 a7*c0+a8*c1 + res_r1_t1_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff0_1_16x8b); //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1 + //b4*c0+b5*c1 b5*c0+b6*c1 b6*c0+b7*c1 b7*c0+b8*c1 + + src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a2 a3 a4 a5 a6 a7 a8 a9....a15 0 0 + src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b2 b3 b4 b5 b6 b7 b8 b9....b15 0 0 + + src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a3 a4 a5 a6 a7 a8 a9....a15 0 0 0 + src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b3 b4 b5 b6 b7 b8 b9....b15 0 0 0 + + src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 + src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 a8 a9 a9 a10 + + res_r0_t2_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff2_3_16x8b); //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3 + //a6*c2+a7*c3 a7*c2+a8*c3 a8*c2+a9*c3 a9*c2+a10*c3 + res_r1_t2_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff2_3_16x8b); //b2*c2+b3*c3 b3*c2+b4*c3 b2*c4+b5*c3 b5*c2+b6*c3 + //b6*c2+b7*c3 b7*c2+b8*c3 b8*c2+b9*c3 b9*c2+b10*c3 + + src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a4 a5 a6 a7 a8 a9....a15 0 0 0 0 + src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b4 b5 b6 b7 b8 b9....b15 0 0 0 0 + + src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a5 a6 a7 a8 a9....a15 0 0 0 0 0 + src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b5 b6 b7 b8 b9....b15 0 0 0 0 0 + + src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 a10 a11 a11 a12 + src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b4 b5 b5 b6 b6 b7 b7 b8 b8 b9 b9 b10 b10 b11 b11 b12 + + res_r0_t3_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff4_5_16x8b); //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5 + //a8*c4+a9*c5 a9*c4+a10*c5 a10*c4+a11*c5 a11*c4+a12*c5 + res_r1_t3_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff4_5_16x8b); //b4*c4+b5*c5 b5*c4+b6*c5 b6*c4+b7*c5 b7*c4+b8*c5 + //b8*c4+b9*c5 b9*c4+b10*c5 b10*c4+b11*c5 b11*c4+b12*c5 + src_r0_16x8b = _mm_loadl_epi64((__m128i *)pu1_pred1); + src_r1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred1 + src_strd)); + + res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t2_8x16b); + res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t2_8x16b); + res_r0_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_r0_t3_8x16b); + res_r1_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_r1_t3_8x16b); + res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t3_8x16b); + res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t3_8x16b); + + res_r0_t1_8x16b = _mm_srai_epi16(res_r0_t1_8x16b, 5); + res_r1_t1_8x16b = _mm_srai_epi16(res_r1_t1_8x16b, 5); //shifting right by 5 bits. + + res_r0_16x8b = _mm_packus_epi16(res_r0_t1_8x16b, res_r0_t1_8x16b); + res_r1_16x8b = _mm_packus_epi16(res_r1_t1_8x16b, res_r1_t1_8x16b); + + res_r0_16x8b = _mm_avg_epu8(src_r0_16x8b, res_r0_16x8b); + res_r1_16x8b = _mm_avg_epu8(src_r1_16x8b, res_r1_16x8b); //computing q-pel + + _mm_storel_epi64((__m128i *)pu1_dst, res_r0_16x8b); + _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), res_r1_16x8b); + + ht -= 2; + pu1_src += src_strd << 1; + pu1_pred1 += src_strd << 1; + pu1_dst += dst_strd << 1; + } + while(ht > 0); + } + else // wd == 16 + { + __m128i src_r0_t1_16x8b, src_r1_t1_16x8b; + + __m128i res_r0_t1_8x16b, res_r0_t2_8x16b, res_r0_t3_8x16b; + __m128i res_r1_t1_8x16b, res_r1_t2_8x16b, res_r1_t3_8x16b; + __m128i res_16x8b; + + //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9..... + //Row0 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9..... + //b0 is same a8. Similarly other bn pixels are same as a(n+8) pixels. + + do + { + src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 + src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 8)); //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 + + src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1); //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0 + src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1); //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0 + + src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 + src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 + + res_r0_t1_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff0_1_16x8b); //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1 + //a4*c0+a5*c1 a5*c0+a6*c1 a6*c0+a7*c1 a7*c0+a8*c1 + res_r1_t1_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff0_1_16x8b); //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1 + //b4*c0+b5*c1 b5*c0+b6*c1 b6*c0+b7*c1 b7*c0+b8*c1 + + src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a2 a3 a4 a5 a6 a7 a8 a9....a15 0 0 + src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b2 b3 b4 b5 b6 b7 b8 b9....b15 0 0 + + src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a3 a4 a5 a6 a7 a8 a9....a15 0 0 0 + src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b3 b4 b5 b6 b7 b8 b9....b15 0 0 0 + + src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 + src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 a8 a9 a9 a10 + + res_r0_t2_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff2_3_16x8b); //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3 + //a6*c2+a7*c3 a7*c2+a8*c3 a8*c2+a9*c3 a9*c2+a10*c3 + res_r1_t2_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff2_3_16x8b); //b2*c2+b3*c3 b3*c2+b4*c3 b2*c4+b5*c3 b5*c2+b6*c3 + //b6*c2+b7*c3 b7*c2+b8*c3 b8*c2+b9*c3 b9*c2+b10*c3 + + src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a4 a5 a6 a7 a8 a9....a15 0 0 0 0 + src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b4 b5 b6 b7 b8 b9....b15 0 0 0 0 + + src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a5 a6 a7 a8 a9....a15 0 0 0 0 0 + src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b5 b6 b7 b8 b9....b15 0 0 0 0 0 + + src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 a10 a11 a11 a12 + src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b4 b5 b5 b6 b6 b7 b7 b8 b8 b9 b9 b10 b10 b11 b11 b12 + + res_r0_t3_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff4_5_16x8b); //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5 + //a8*c4+a9*c5 a9*c4+a10*c5 a10*c4+a11*c5 a11*c4+a12*c5 + res_r1_t3_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff4_5_16x8b); //b4*c4+b5*c5 b5*c4+b6*c5 b6*c4+b7*c5 b7*c4+b8*c5 + //b8*c4+b9*c5 b9*c4+b10*c5 b10*c4+b11*c5 b11*c4+b12*c5 + src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_pred1); + + res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t2_8x16b); + res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t2_8x16b); + res_r0_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_r0_t3_8x16b); + res_r1_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_r1_t3_8x16b); + res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t3_8x16b); + res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t3_8x16b); + + res_r0_t1_8x16b = _mm_srai_epi16(res_r0_t1_8x16b, 5); + res_r1_t1_8x16b = _mm_srai_epi16(res_r1_t1_8x16b, 5); //shifting right by 5 bits + + res_16x8b = _mm_packus_epi16(res_r0_t1_8x16b, res_r1_t1_8x16b); + res_16x8b = _mm_avg_epu8(src_r0_16x8b, res_16x8b); //computing q-pel + + _mm_storeu_si128((__m128i *)pu1_dst, res_16x8b); + + ht--; + pu1_src += src_strd; + pu1_pred1 += src_strd; + pu1_dst += dst_strd; + } + while(ht > 0); + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_inter_pred_luma_vert_qpel_ssse3 */ +/* */ +/* Description : This function implements a six-tap filter vertically on */ +/* ht x wd block and averages the values with the source */ +/* pixels to calculate vertical quarter-pel as mentioned in */ +/* sec. 8.4.2.2.1 titled "Luma sample interpolation */ +/* process". (ht,wd) can be (4,4), (8,4), (4,8), (8,8), */ +/* (16,8), (8,16) or (16,16). */ +/* */ +/* Inputs : puc_src - pointer to source */ +/* puc_dst - pointer to destination */ +/* src_strd - stride for source */ +/* dst_strd - stride for destination */ +/* ht - height of the block */ +/* wd - width of the block */ +/* pu1_tmp - pointer to temporary buffer */ +/* dydx - x and y reference offset for q-pel */ +/* calculations */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 13 02 2015 Kaushik Initial Version */ +/* Senthoor */ +/* */ +/*****************************************************************************/ +void ih264_inter_pred_luma_vert_qpel_ssse3(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ht, + WORD32 wd, + UWORD8* pu1_tmp, + WORD32 dydx) +{ + WORD32 y_offset; + UWORD8 *pu1_pred1; + + UNUSED(pu1_tmp); + + __m128i src_r0_16x8b, src_r1_16x8b, src_r2_16x8b, src_r3_16x8b, src_r4_16x8b; + __m128i src_r5_16x8b, src_r6_16x8b; + __m128i src_r0r1_16x8b, src_r2r3_16x8b, src_r4r5_16x8b; + __m128i res_16x8b, res_t1_8x16b, res_t2_8x16b, res_t3_8x16b; + + __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b; + __m128i const_val16_8x16b; + + y_offset = dydx & 0xf; + + coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01); //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 + coeff2_3_16x8b = _mm_set1_epi32(0x14141414); //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 + coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB); //c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 + //c0 = c5 = 1, c1 = c4 = -5, c2 = c3 = 20 + + pu1_pred1 = pu1_src + (y_offset >> 3) * src_strd; + + const_val16_8x16b = _mm_set1_epi16(16); + + pu1_src -= src_strd << 1; // the filter input starts from x[-2] (till x[3]) + + if(wd == 4) + { + __m128i mask_low32b; + + mask_low32b = _mm_set1_epi8(0xff); + + //Epilogue: Load all the pred rows except sixth and seventh row + // for the first and second row processing. + src_r0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src); + pu1_src += src_strd; + src_r1_16x8b = _mm_loadl_epi64((__m128i *)pu1_src); + pu1_src += src_strd; + src_r2_16x8b = _mm_loadl_epi64((__m128i *)pu1_src); + pu1_src += src_strd; + src_r3_16x8b = _mm_loadl_epi64((__m128i *)pu1_src); + pu1_src += src_strd; + src_r4_16x8b = _mm_loadl_epi64((__m128i *)pu1_src); + pu1_src += src_strd; + + mask_low32b = _mm_srli_si128(mask_low32b, 12); // mask for first four bytes + + src_r0_16x8b = _mm_unpacklo_epi32(src_r0_16x8b, src_r1_16x8b); + src_r1_16x8b = _mm_unpacklo_epi32(src_r1_16x8b, src_r2_16x8b); + src_r2_16x8b = _mm_unpacklo_epi32(src_r2_16x8b, src_r3_16x8b); + src_r3_16x8b = _mm_unpacklo_epi32(src_r3_16x8b, src_r4_16x8b); + + do + { + src_r5_16x8b = _mm_loadl_epi64((__m128i *)pu1_src); + src_r6_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd)); + + src_r4_16x8b = _mm_unpacklo_epi32(src_r4_16x8b, src_r5_16x8b); + src_r5_16x8b = _mm_unpacklo_epi32(src_r5_16x8b, src_r6_16x8b); + + src_r0r1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r1_16x8b); + src_r2r3_16x8b = _mm_unpacklo_epi8(src_r2_16x8b, src_r3_16x8b); + src_r4r5_16x8b = _mm_unpacklo_epi8(src_r4_16x8b, src_r5_16x8b); + + res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b); + res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b); + res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b); + + src_r0_16x8b = _mm_loadl_epi64((__m128i *)pu1_pred1); + src_r1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred1 + src_strd)); + + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b); + res_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_t3_8x16b); + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b); + + src_r0r1_16x8b = _mm_unpacklo_epi32(src_r0_16x8b,src_r1_16x8b); + + res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits. + + res_16x8b = _mm_packus_epi16(res_t1_8x16b, res_t1_8x16b); + + res_16x8b = _mm_avg_epu8(src_r0r1_16x8b, res_16x8b); //computing q-pel + + _mm_maskmoveu_si128(res_16x8b, mask_low32b, (char*)pu1_dst); + res_16x8b = _mm_srli_si128(res_16x8b, 4); + _mm_maskmoveu_si128(res_16x8b, mask_low32b, (char*)(pu1_dst + dst_strd)); + + src_r0_16x8b = src_r2_16x8b; + src_r1_16x8b = src_r3_16x8b; + src_r2_16x8b = src_r4_16x8b; + src_r3_16x8b = src_r5_16x8b; + src_r4_16x8b = src_r6_16x8b; + + ht -= 2; + pu1_src += src_strd << 1; + pu1_pred1 += src_strd << 1; + pu1_dst += dst_strd << 1; + } + while(ht > 0); + } + + else if(wd == 8) + { + //Epilogue: Load all the pred rows except sixth and seventh row + // for the first and second row processing. + src_r0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src); + pu1_src += src_strd; + src_r1_16x8b = _mm_loadl_epi64((__m128i *)pu1_src); + pu1_src += src_strd; + src_r2_16x8b = _mm_loadl_epi64((__m128i *)pu1_src); + pu1_src += src_strd; + src_r3_16x8b = _mm_loadl_epi64((__m128i *)pu1_src); + pu1_src += src_strd; + src_r4_16x8b = _mm_loadl_epi64((__m128i *)pu1_src); + pu1_src += src_strd; + + src_r0_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b); + src_r1_16x8b = _mm_unpacklo_epi64(src_r1_16x8b, src_r2_16x8b); + src_r2_16x8b = _mm_unpacklo_epi64(src_r2_16x8b, src_r3_16x8b); + src_r3_16x8b = _mm_unpacklo_epi64(src_r3_16x8b, src_r4_16x8b); + + do + { + src_r5_16x8b = _mm_loadl_epi64((__m128i *)pu1_src); + src_r6_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd)); + + src_r4_16x8b = _mm_unpacklo_epi64(src_r4_16x8b, src_r5_16x8b); + src_r5_16x8b = _mm_unpacklo_epi64(src_r5_16x8b, src_r6_16x8b); + + src_r0r1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r1_16x8b); + src_r2r3_16x8b = _mm_unpacklo_epi8(src_r2_16x8b, src_r3_16x8b); + src_r4r5_16x8b = _mm_unpacklo_epi8(src_r4_16x8b, src_r5_16x8b); + + res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b); + res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b); + res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b); + + src_r0r1_16x8b = _mm_loadl_epi64((__m128i *)pu1_pred1); + + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b); + res_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_t3_8x16b); + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b); + + res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits. + + res_16x8b = _mm_packus_epi16(res_t1_8x16b, res_t1_8x16b); + res_16x8b = _mm_avg_epu8(src_r0r1_16x8b, res_16x8b); //computing q-pel + + _mm_storel_epi64((__m128i *)pu1_dst, res_16x8b); + + src_r0r1_16x8b = _mm_unpackhi_epi8(src_r0_16x8b, src_r1_16x8b); + src_r2r3_16x8b = _mm_unpackhi_epi8(src_r2_16x8b, src_r3_16x8b); + src_r4r5_16x8b = _mm_unpackhi_epi8(src_r4_16x8b, src_r5_16x8b); + + res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b); + res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b); + res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b); + + src_r0r1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred1 + src_strd)); + + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b); + res_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_t3_8x16b); + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b); + + res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits. + + res_16x8b = _mm_packus_epi16(res_t1_8x16b, res_t1_8x16b); + res_16x8b = _mm_avg_epu8(src_r0r1_16x8b, res_16x8b); //computing q-pel + + _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), res_16x8b); + + src_r0_16x8b = src_r2_16x8b; + src_r1_16x8b = src_r3_16x8b; + src_r2_16x8b = src_r4_16x8b; + src_r3_16x8b = src_r5_16x8b; + src_r4_16x8b = src_r6_16x8b; + + ht -= 2; + pu1_src += src_strd << 1; + pu1_pred1 += src_strd << 1; + pu1_dst += dst_strd << 1; + } + while(ht > 0); + } + else // wd == 16 + { + __m128i res_t0_8x16b; + + //Epilogue: Load all the pred rows except sixth and seventh row + // for the first and second row processing. + src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); + pu1_src += src_strd; + src_r1_16x8b = _mm_loadu_si128((__m128i *)pu1_src); + pu1_src += src_strd; + src_r2_16x8b = _mm_loadu_si128((__m128i *)pu1_src); + pu1_src += src_strd; + src_r3_16x8b = _mm_loadu_si128((__m128i *)pu1_src); + pu1_src += src_strd; + src_r4_16x8b = _mm_loadu_si128((__m128i *)pu1_src); + pu1_src += src_strd; + + do + { + src_r5_16x8b = _mm_loadu_si128((__m128i *)pu1_src); + src_r6_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd)); + + src_r0r1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r1_16x8b); + src_r2r3_16x8b = _mm_unpacklo_epi8(src_r2_16x8b, src_r3_16x8b); + src_r4r5_16x8b = _mm_unpacklo_epi8(src_r4_16x8b, src_r5_16x8b); + + res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b); + res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b); + res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b); + + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b); + res_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_t3_8x16b); + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b); + + res_t0_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits. + + src_r0r1_16x8b = _mm_unpackhi_epi8(src_r0_16x8b, src_r1_16x8b); + src_r2r3_16x8b = _mm_unpackhi_epi8(src_r2_16x8b, src_r3_16x8b); + src_r4r5_16x8b = _mm_unpackhi_epi8(src_r4_16x8b, src_r5_16x8b); + + res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b); + res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b); + res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b); + + src_r0r1_16x8b = _mm_loadu_si128((__m128i *)pu1_pred1); + + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b); + res_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_t3_8x16b); + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b); + + res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits. + + res_16x8b = _mm_packus_epi16(res_t0_8x16b, res_t1_8x16b); + res_16x8b = _mm_avg_epu8(src_r0r1_16x8b, res_16x8b); //computing q-pel + + _mm_storeu_si128((__m128i *)pu1_dst, res_16x8b); + + src_r0r1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r2_16x8b); + src_r2r3_16x8b = _mm_unpacklo_epi8(src_r3_16x8b, src_r4_16x8b); + src_r4r5_16x8b = _mm_unpacklo_epi8(src_r5_16x8b, src_r6_16x8b); + + res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b); + res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b); + res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b); + + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b); + res_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_t3_8x16b); + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b); + + res_t0_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits. + + src_r0r1_16x8b = _mm_unpackhi_epi8(src_r1_16x8b, src_r2_16x8b); + src_r2r3_16x8b = _mm_unpackhi_epi8(src_r3_16x8b, src_r4_16x8b); + src_r4r5_16x8b = _mm_unpackhi_epi8(src_r5_16x8b, src_r6_16x8b); + + res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b); + res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b); + res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b); + + src_r0r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_pred1 + src_strd)); + + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b); + res_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_t3_8x16b); + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b); + + res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits. + + res_16x8b = _mm_packus_epi16(res_t0_8x16b, res_t1_8x16b); + res_16x8b = _mm_avg_epu8(src_r0r1_16x8b, res_16x8b); //computing q-pel + + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res_16x8b); + + src_r0_16x8b = src_r2_16x8b; + src_r1_16x8b = src_r3_16x8b; + src_r2_16x8b = src_r4_16x8b; + src_r3_16x8b = src_r5_16x8b; + src_r4_16x8b = src_r6_16x8b; + + ht -= 2; + pu1_src += src_strd << 1; + pu1_pred1 += src_strd << 1; + pu1_dst += dst_strd << 1; + } + while(ht > 0); + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_inter_pred_luma_horz_qpel_vert_qpel_ssse3 */ +/* */ +/* Description : This function implements a six-tap filter vertically and */ +/* horizontally on ht x wd block separately and averages */ +/* the two sets of values to calculate values at (1/4,1/4), */ +/* (1/4, 3/4), (3/4, 1/4) or (3/4, 3/4) as mentioned in */ +/* sec. 8.4.2.2.1 titled "Luma sample interpolation */ +/* process". (ht,wd) can be (4,4), (8,4), (4,8), (8,8), */ +/* (16,8), (8,16) or (16,16). */ +/* */ +/* Inputs : puc_src - pointer to source */ +/* puc_dst - pointer to destination */ +/* src_strd - stride for source */ +/* dst_strd - stride for destination */ +/* ht - height of the block */ +/* wd - width of the block */ +/* pu1_tmp - pointer to temporary buffer */ +/* dydx - x and y reference offset for q-pel */ +/* calculations */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 13 02 2015 Kaushik Initial Version */ +/* Senthoor */ +/* */ +/*****************************************************************************/ +void ih264_inter_pred_luma_horz_qpel_vert_qpel_ssse3(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ht, + WORD32 wd, + UWORD8* pu1_tmp, + WORD32 dydx) +{ + WORD32 ht_temp; + UWORD8 *pu1_pred_vert,*pu1_pred_horiz; + UWORD8 *pu1_tmp1, *pu1_tmp2; + WORD32 x_offset, y_offset; + + pu1_tmp1 = pu1_tmp; + + dydx &= 0xf; + ht_temp = ht; + x_offset = dydx & 0x3; + y_offset = dydx >> 2; + pu1_tmp2 = pu1_tmp1; + + pu1_pred_vert = pu1_src + (x_offset >> 1) - 2*src_strd; + pu1_pred_horiz = pu1_src + (y_offset >> 1) * src_strd - 2; + //the filter input starts from x[-2] (till x[3]) + + __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b; + __m128i const_val16_8x16b; + + coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01); //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 + coeff2_3_16x8b = _mm_set1_epi32(0x14141414); //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 + coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB); //c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5 + //c0 = c5 = 1, c1 = c4 = -5, c2 = c3 = 20 + const_val16_8x16b = _mm_set1_epi16(16); + + if(wd == 4) + { + //vertical q-pel filter + { + __m128i src_r0_16x8b, src_r1_16x8b, src_r2_16x8b, src_r3_16x8b, src_r4_16x8b; + __m128i src_r5_16x8b, src_r6_16x8b; + __m128i src_r0r1_16x8b, src_r2r3_16x8b, src_r4r5_16x8b; + + __m128i res_r0r1_16x8b, res_t1_8x16b, res_t2_8x16b, res_t3_8x16b; + + //epilogue: Load all the pred rows except sixth and seventh row for the + //first and second row processing. + src_r0_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred_vert)); + pu1_pred_vert = pu1_pred_vert + src_strd; + + src_r1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred_vert)); + pu1_pred_vert = pu1_pred_vert + src_strd; + src_r0_16x8b = _mm_unpacklo_epi32(src_r0_16x8b, src_r1_16x8b); + + src_r2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred_vert)); + pu1_pred_vert = pu1_pred_vert + src_strd; + src_r1_16x8b = _mm_unpacklo_epi32(src_r1_16x8b, src_r2_16x8b); + + src_r3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred_vert)); + pu1_pred_vert = pu1_pred_vert + src_strd; + src_r2_16x8b = _mm_unpacklo_epi32(src_r2_16x8b, src_r3_16x8b); + + src_r4_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred_vert)); + pu1_pred_vert = pu1_pred_vert + src_strd; + src_r3_16x8b = _mm_unpacklo_epi32(src_r3_16x8b, src_r4_16x8b); + + //Core Loop: Process all the rows. + do + { + src_r5_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred_vert)); + src_r4_16x8b = _mm_unpacklo_epi32(src_r4_16x8b, src_r5_16x8b); + + src_r6_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred_vert + src_strd)); + src_r5_16x8b = _mm_unpacklo_epi32(src_r5_16x8b, src_r6_16x8b); + + src_r0r1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r1_16x8b); + src_r2r3_16x8b = _mm_unpacklo_epi8(src_r2_16x8b, src_r3_16x8b); + src_r4r5_16x8b = _mm_unpacklo_epi8(src_r4_16x8b, src_r5_16x8b); + + res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b); + res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b); + res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b); + + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b); + res_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_t3_8x16b); + res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b); + + res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits. + res_r0r1_16x8b = _mm_packus_epi16(res_t1_8x16b, res_t1_8x16b); + + _mm_storel_epi64((__m128i *)pu1_tmp1, res_r0r1_16x8b); + + src_r0_16x8b = src_r2_16x8b; + src_r1_16x8b = src_r3_16x8b; + src_r2_16x8b = src_r4_16x8b; + src_r3_16x8b = src_r5_16x8b; + src_r4_16x8b = src_r6_16x8b; + + ht_temp -= 2; + pu1_pred_vert += src_strd << 1; + pu1_tmp1 += 8; + } + while(ht_temp > 0); + } + + //horizontal q-pel filter + { + __m128i src_r0_16x8b, src_r1_16x8b; + __m128i src_r0_sht_16x8b, src_r1_sht_16x8b; + __m128i src_r0r1_vpel_16x8b, src_r0r1_t1_16x8b; + + __m128i res_r0r1_t1_8x16b, res_r0r1_t2_8x16b, res_r0r1_t3_8x16b; + __m128i res_r0r1_16x8b; + + __m128i mask_low32b; + + mask_low32b = _mm_set1_epi8(0xff); + mask_low32b = _mm_srli_si128(mask_low32b, 12); + + //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9..... + //Row1 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9..... + + do + { + src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_pred_horiz); //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 + src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_pred_horiz + src_strd)); //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 + + src_r0r1_vpel_16x8b = _mm_loadl_epi64((__m128i *)pu1_tmp2); + + src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1); //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0 + src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1); //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0 + + src_r0_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 + src_r1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 + + src_r0r1_t1_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 b0 b1 b1 b2 b2 b3 b3 b4 + res_r0r1_t1_8x16b = _mm_maddubs_epi16(src_r0r1_t1_16x8b, coeff0_1_16x8b); //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1 + //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1 + + src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 4); //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 0 0 0 0 + src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 4); //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 0 0 0 0 + + src_r0r1_t1_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b); //a2 a3 a3 a4 a4 a5 a5 a6 b2 b3 b3 b4 b4 b5 b5 b6 + res_r0r1_t2_8x16b = _mm_maddubs_epi16(src_r0r1_t1_16x8b, coeff2_3_16x8b); //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3 + //b2*c2+b3*c3 b3*c2+b4*c3 b4*c2+b5*c3 b5*c2+b6*c3 + + src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 4); //a4 a5 a5 a6 a6 a7 a7 a8 0 0 0 0 0 0 0 0 + src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 4); //b4 b5 b5 b6 b6 b7 b7 b8 0 0 0 0 0 0 0 0 + + src_r0r1_t1_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b); //a4 a5 a5 a6 a6 a7 a7 a8 b4 b5 b5 b6 b6 b7 b7 b8 + res_r0r1_t3_8x16b = _mm_maddubs_epi16(src_r0r1_t1_16x8b, coeff4_5_16x8b); //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5 + //b4*c4+b5*c5 b5*c4+b6*c5 b4*c6+b7*c5 b7*c4+b8*c5 + + res_r0r1_t1_8x16b = _mm_add_epi16(res_r0r1_t1_8x16b, res_r0r1_t2_8x16b); + res_r0r1_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_r0r1_t3_8x16b); + res_r0r1_t1_8x16b = _mm_add_epi16(res_r0r1_t1_8x16b, res_r0r1_t3_8x16b); //a0*c0+a1*c1+a2*c2+a3*c3+a4*a4+a5*c5 + 15; + //a1*c0+a2*c1+a2*c2+a3*c3+a5*a4+a6*c5 + 15; + //a2*c0+a3*c1+a4*c2+a5*c3+a6*a4+a7*c5 + 15; + //a3*c0+a4*c1+a5*c2+a6*c3+a6*a4+a8*c5 + 15; + //b0*c0+b1*c1+b2*c2+b3*c3+b4*b4+b5*c5 + 15; + //b1*c0+b2*c1+b2*c2+b3*c3+b5*b4+b6*c5 + 15; + //b2*c0+b3*c1+b4*c2+b5*c3+b6*b4+b7*c5 + 15; + //b3*c0+b4*c1+b5*c2+b6*c3+b6*b4+b8*c5 + 15; + + res_r0r1_t1_8x16b = _mm_srai_epi16(res_r0r1_t1_8x16b, 5); //shifting right by 5 bits. + + res_r0r1_16x8b = _mm_packus_epi16(res_r0r1_t1_8x16b,res_r0r1_t1_8x16b); + + res_r0r1_16x8b = _mm_avg_epu8(res_r0r1_16x8b,src_r0r1_vpel_16x8b); + + _mm_maskmoveu_si128(res_r0r1_16x8b, mask_low32b, (char*)pu1_dst); + res_r0r1_16x8b = _mm_srli_si128(res_r0r1_16x8b, 4); + _mm_maskmoveu_si128(res_r0r1_16x8b, mask_low32b, (char*)(pu1_dst + dst_strd)); + + ht -= 2; + pu1_pred_horiz += src_strd << 1; + pu1_tmp2 += 8; + pu1_dst += dst_strd << 1; + } + while(ht > 0); + } + } + else if(wd == 8) + { + //vertical q-pel filter + { + __m128i src_r0_16x8b, src_r1_16x8b, src_r2_16x8b, src_r3_16x8b; + __m128i src_r4_16x8b, src_r5_16x8b, src_r6_16x8b; + __m128i src_r0r1_16x8b, src_r2r3_16x8b, src_r4r5_16x8b; + + __m128i res_16x8b, res_t1_8x16b, res_t2_8x16b, res_t3_8x16b; + + //epilogue: Load all the pred rows except sixth and seventh row for the + //first and second row processing. + src_r0_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred_vert)); + pu1_pred_vert = pu1_pred_vert + src_strd; + + src_r1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred_vert)); + pu1_pred_vert = pu1_pred_vert + src_strd; + src_r0_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b); + + src_r2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred_vert)); + pu1_pred_vert = pu1_pred_vert + src_strd; + src_r1_16x8b = _mm_unpacklo_epi64(src_r1_16x8b, src_r2_16x8b); + + src_r3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred_vert)); + pu1_pred_vert = pu1_pred_vert + src_strd; + src_r2_16x8b = _mm_unpacklo_epi64(src_r2_16x8b, src_r3_16x8b); + + src_r4_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred_vert)); + pu1_pred_vert = pu1_pred_vert + src_strd; + src_r3_16x8b = _mm_unpacklo_epi64(src_r3_16x8b, src_r4_16x8b); + + //Core Loop: Process all the rows. + do + { + src_r5_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred_vert)); + src_r4_16x8b = _mm_unpacklo_epi64(src_r4_16x8b, src_r5_16x8b); + + src_r6_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred_vert + src_strd)); + src_r5_16x8b = _mm_unpacklo_epi64(src_r5_16x8b, src_r6_16x8b); + + src_r0r1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r1_16x8b); + src_r2r3_16x8b = _mm_unpacklo_epi8(src_r2_16x8b, src_r3_16x8b); + src_r4r5_16x8b = _mm_unpacklo_epi8(src_r4_16x8b, src_r5_16x8b); + + res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b); + res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b); + res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b); + + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b); + res_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_t3_8x16b); + res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b); + + res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits. + res_16x8b = _mm_packus_epi16(res_t1_8x16b, res_t1_8x16b); + + _mm_storel_epi64((__m128i *)(pu1_tmp1), res_16x8b); + + src_r0r1_16x8b = _mm_unpackhi_epi8(src_r0_16x8b, src_r1_16x8b); + src_r2r3_16x8b = _mm_unpackhi_epi8(src_r2_16x8b, src_r3_16x8b); + src_r4r5_16x8b = _mm_unpackhi_epi8(src_r4_16x8b, src_r5_16x8b); + + res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b); + res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b); + res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b); + + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b); + res_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_t3_8x16b); + res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b); + + res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits. + res_16x8b = _mm_packus_epi16(res_t1_8x16b, res_t1_8x16b); + + _mm_storel_epi64((__m128i *)(pu1_tmp1 + 8), res_16x8b); + + src_r0_16x8b = src_r2_16x8b; + src_r1_16x8b = src_r3_16x8b; + src_r2_16x8b = src_r4_16x8b; + src_r3_16x8b = src_r5_16x8b; + src_r4_16x8b = src_r6_16x8b; + + ht_temp -= 2; + pu1_pred_vert += src_strd << 1; + pu1_tmp1 += 16; + } + while(ht_temp > 0); + } + + //horizontal q-pel filter + { + __m128i src_r0_16x8b, src_r1_16x8b, src_r0_sht_16x8b, src_r1_sht_16x8b; + __m128i src_r0_t1_16x8b, src_r1_t1_16x8b; + __m128i src_r0_vpel_16x8b, src_r1_vpel_16x8b; + + __m128i res_r0_t1_8x16b, res_r0_t2_8x16b, res_r0_t3_8x16b; + __m128i res_r1_t1_8x16b, res_r1_t2_8x16b, res_r1_t3_8x16b, res_16x8b; + + //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9..... + //Row1 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9..... + + do + { + src_r0_16x8b = _mm_loadu_si128((__m128i *)(pu1_pred_horiz)); //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 + src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_pred_horiz + src_strd)); //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 + + src_r0_vpel_16x8b = _mm_loadl_epi64((__m128i *)(pu1_tmp2)); //a2 a3 a4 a5 a6 a7 a8....a15 0 or + //a3 a4 a5 a6 a7 a8 a9....a15 0 + src_r1_vpel_16x8b = _mm_loadl_epi64((__m128i *)(pu1_tmp2 + 8)); + //b2 b3 b4 b5 b6 b7 b8....b15 0 or + //b3 b4 b5 b6 b7 b8 b9....b15 0 + + src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1); //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0 + src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1); //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0 + + src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 + src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 + + res_r0_t1_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff0_1_16x8b); //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1 + //a4*c0+a5*c1 a5*c0+a6*c1 a6*c0+a7*c1 a7*c0+a8*c1 + res_r1_t1_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff0_1_16x8b); //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1 + //b4*c0+b5*c1 b5*c0+b6*c1 b6*c0+b7*c1 b7*c0+b8*c1 + + src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a2 a3 a4 a5 a6 a7 a8 a9....a15 0 0 + src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b2 b3 b4 b5 b6 b7 b8 b9....b15 0 0 + + src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a3 a4 a5 a6 a7 a8 a9....a15 0 0 0 + src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b3 b4 b5 b6 b7 b8 b9....b15 0 0 0 + + src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 + src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 a8 a9 a9 a10 + + res_r0_t2_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff2_3_16x8b); //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3 + //a6*c2+a7*c3 a7*c2+a8*c3 a8*c2+a9*c3 a9*c2+a10*c3 + res_r1_t2_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff2_3_16x8b); //b2*c2+b3*c3 b3*c2+b4*c3 b2*c4+b5*c3 b5*c2+b6*c3 + //b6*c2+b7*c3 b7*c2+b8*c3 b8*c2+b9*c3 b9*c2+b10*c3 + + src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a4 a5 a6 a7 a8 a9....a15 0 0 0 0 + src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b4 b5 b6 b7 b8 b9....b15 0 0 0 0 + + src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a5 a6 a7 a8 a9....a15 0 0 0 0 0 + src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b5 b6 b7 b8 b9....b15 0 0 0 0 0 + + src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 a10 a11 a11 a12 + src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b4 b5 b5 b6 b6 b7 b7 b8 b8 b9 b9 b10 b10 b11 b11 b12 + + res_r0_t3_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff4_5_16x8b); //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5 + //a8*c4+a9*c5 a9*c4+a10*c5 a10*c4+a11*c5 a11*c4+a12*c5 + res_r1_t3_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff4_5_16x8b); //b4*c4+b5*c5 b5*c4+b6*c5 b6*c4+b7*c5 b7*c4+b8*c5 + //b8*c4+b9*c5 b9*c4+b10*c5 b10*c4+b11*c5 b11*c4+b12*c5 + res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t2_8x16b); + res_r0_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_r0_t3_8x16b); + res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t3_8x16b); + res_r0_t1_8x16b = _mm_srai_epi16(res_r0_t1_8x16b, 5); //shifting right by 5 bits. + + res_16x8b = _mm_packus_epi16(res_r0_t1_8x16b, res_r0_t1_8x16b); + res_16x8b = _mm_avg_epu8(res_16x8b, src_r0_vpel_16x8b); + + _mm_storel_epi64((__m128i *)(pu1_dst), res_16x8b); + + res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t2_8x16b); + res_r1_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_r1_t3_8x16b); + res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t3_8x16b); + res_r1_t1_8x16b = _mm_srai_epi16(res_r1_t1_8x16b, 5); //shifting right by 5 bits. + + res_16x8b = _mm_packus_epi16(res_r1_t1_8x16b, res_r1_t1_8x16b); + res_16x8b = _mm_avg_epu8(res_16x8b,src_r1_vpel_16x8b); + + _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), res_16x8b); + + ht -= 2; + pu1_pred_horiz += src_strd << 1; + pu1_dst += dst_strd << 1; + pu1_tmp2 += 16; + } + while(ht > 0); + } + } + else // wd == 16 + { + //vertical q-pel filter + { + __m128i src_r0_16x8b, src_r1_16x8b, src_r2_16x8b, src_r3_16x8b; + __m128i src_r4_16x8b, src_r5_16x8b, src_r6_16x8b; + __m128i src_r0r1_16x8b, src_r2r3_16x8b, src_r4r5_16x8b; + + __m128i res_t0_8x16b, res_t1_8x16b, res_t2_8x16b, res_t3_8x16b; + __m128i res_16x8b; + + //epilogue: Load all the pred rows except sixth and seventh row for the + //first and second row processing. + src_r0_16x8b = _mm_loadu_si128((__m128i *)(pu1_pred_vert)); + pu1_pred_vert = pu1_pred_vert + src_strd; + src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_pred_vert)); + pu1_pred_vert = pu1_pred_vert + src_strd; + src_r2_16x8b = _mm_loadu_si128((__m128i *)(pu1_pred_vert)); + pu1_pred_vert = pu1_pred_vert + src_strd; + src_r3_16x8b = _mm_loadu_si128((__m128i *)(pu1_pred_vert)); + pu1_pred_vert = pu1_pred_vert + src_strd; + src_r4_16x8b = _mm_loadu_si128((__m128i *)(pu1_pred_vert)); + pu1_pred_vert = pu1_pred_vert + src_strd; + + //Core Loop: Process all the rows. + do + { + src_r5_16x8b = _mm_loadu_si128((__m128i *)(pu1_pred_vert)); + src_r6_16x8b = _mm_loadu_si128((__m128i *)(pu1_pred_vert + src_strd)); + + src_r0r1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r1_16x8b); + src_r2r3_16x8b = _mm_unpacklo_epi8(src_r2_16x8b, src_r3_16x8b); + src_r4r5_16x8b = _mm_unpacklo_epi8(src_r4_16x8b, src_r5_16x8b); + + res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b); + res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b); + res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b); + + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b); + res_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_t3_8x16b); + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b); + res_t0_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits. + + src_r0r1_16x8b = _mm_unpackhi_epi8(src_r0_16x8b, src_r1_16x8b); + src_r2r3_16x8b = _mm_unpackhi_epi8(src_r2_16x8b, src_r3_16x8b); + src_r4r5_16x8b = _mm_unpackhi_epi8(src_r4_16x8b, src_r5_16x8b); + + res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b); + res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b); + res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b); + + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b); + res_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_t3_8x16b); + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b); + res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits. + + res_16x8b = _mm_packus_epi16(res_t0_8x16b, res_t1_8x16b); + + _mm_storeu_si128((__m128i *)(pu1_tmp1), res_16x8b); + + src_r0r1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r2_16x8b); + src_r2r3_16x8b = _mm_unpacklo_epi8(src_r3_16x8b, src_r4_16x8b); + src_r4r5_16x8b = _mm_unpacklo_epi8(src_r5_16x8b, src_r6_16x8b); + + res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b); + res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b); + res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b); + + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b); + res_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_t3_8x16b); + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b); + res_t0_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits. + + src_r0r1_16x8b = _mm_unpackhi_epi8(src_r1_16x8b, src_r2_16x8b); + src_r2r3_16x8b = _mm_unpackhi_epi8(src_r3_16x8b, src_r4_16x8b); + src_r4r5_16x8b = _mm_unpackhi_epi8(src_r5_16x8b, src_r6_16x8b); + + res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b); + res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b); + res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b); + + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b); + res_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_t3_8x16b); + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b); + res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits. + + res_16x8b = _mm_packus_epi16(res_t0_8x16b, res_t1_8x16b); + + _mm_storeu_si128((__m128i *)(pu1_tmp1 + 16), res_16x8b); + + src_r0_16x8b = src_r2_16x8b; + src_r1_16x8b = src_r3_16x8b; + src_r2_16x8b = src_r4_16x8b; + src_r3_16x8b = src_r5_16x8b; + src_r4_16x8b = src_r6_16x8b; + + ht_temp -= 2; + pu1_pred_vert += src_strd << 1; + pu1_tmp1 += 32; + } + while(ht_temp > 0); + } + //horizontal q-pel filter + { + __m128i src_r0_16x8b, src_r1_16x8b, src_r0_sht_16x8b, src_r1_sht_16x8b; + __m128i src_r0_t1_16x8b, src_r1_t1_16x8b; + __m128i src_vpel_16x8b; + + __m128i res_r0_t1_8x16b, res_r0_t2_8x16b, res_r0_t3_8x16b; + __m128i res_r1_t1_8x16b, res_r1_t2_8x16b, res_r1_t3_8x16b; + __m128i res_16x8b; + + //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9..... + //Row0 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9..... + //b0 is same a8. Similarly other bn pixels are same as a(n+8) pixels. + + do + { + src_r0_16x8b = _mm_loadu_si128((__m128i *)(pu1_pred_horiz)); //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 + src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_pred_horiz + 8)); //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 + src_vpel_16x8b = _mm_loadu_si128((__m128i *)(pu1_tmp2)); + + src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1); //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0 + src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1); //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0 + + src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 + src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 + + res_r0_t1_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff0_1_16x8b); //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1 + //a4*c0+a5*c1 a5*c0+a6*c1 a6*c0+a7*c1 a7*c0+a8*c1 + res_r1_t1_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff0_1_16x8b); //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1 + //b4*c0+b5*c1 b5*c0+b6*c1 b6*c0+b7*c1 b7*c0+b8*c1 + + src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a2 a3 a4 a5 a6 a7 a8 a9....a15 0 0 + src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b2 b3 b4 b5 b6 b7 b8 b9....b15 0 0 + + src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a3 a4 a5 a6 a7 a8 a9....a15 0 0 0 + src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b3 b4 b5 b6 b7 b8 b9....b15 0 0 0 + + src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 + src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 a8 a9 a9 a10 + + res_r0_t2_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff2_3_16x8b); //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3 + //a6*c2+a7*c3 a7*c2+a8*c3 a8*c2+a9*c3 a9*c2+a10*c3 + res_r1_t2_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff2_3_16x8b); //b2*c2+b3*c3 b3*c2+b4*c3 b2*c4+b5*c3 b5*c2+b6*c3 + //b6*c2+b7*c3 b7*c2+b8*c3 b8*c2+b9*c3 b9*c2+b10*c3 + + src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a4 a5 a6 a7 a8 a9....a15 0 0 0 0 + src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b4 b5 b6 b7 b8 b9....b15 0 0 0 0 + + src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a5 a6 a7 a8 a9....a15 0 0 0 0 0 + src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b5 b6 b7 b8 b9....b15 0 0 0 0 0 + + src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 a10 a11 a11 a12 + src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b4 b5 b5 b6 b6 b7 b7 b8 b8 b9 b9 b10 b10 b11 b11 b12 + + res_r0_t3_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff4_5_16x8b); //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5 + //a8*c4+a9*c5 a9*c4+a10*c5 a10*c4+a11*c5 a11*c4+a12*c5 + res_r1_t3_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff4_5_16x8b); //b4*c4+b5*c5 b5*c4+b6*c5 b6*c4+b7*c5 b7*c4+b8*c5 + //b8*c4+b9*c5 b9*c4+b10*c5 b10*c4+b11*c5 b11*c4+b12*c5 + res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t2_8x16b); + res_r0_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_r0_t3_8x16b); + res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t3_8x16b); + res_r0_t1_8x16b = _mm_srai_epi16(res_r0_t1_8x16b, 5); //shifting right by 5 bits. + + res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t2_8x16b); + res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t3_8x16b); + res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, const_val16_8x16b); + res_r1_t1_8x16b = _mm_srai_epi16(res_r1_t1_8x16b, 5); //shifting right by 5 bits. + + res_16x8b = _mm_packus_epi16(res_r0_t1_8x16b, res_r1_t1_8x16b); + + res_16x8b = _mm_avg_epu8(res_16x8b, src_vpel_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst), res_16x8b); + + ht --; + pu1_pred_horiz += src_strd; + pu1_dst += dst_strd; + pu1_tmp2 += 16; + } + while(ht > 0); + } + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_inter_pred_luma_horz_qpel_vert_hpel_ssse3 */ +/* */ +/* Description : This function implements a six-tap filter vertically and */ +/* horizontally on ht x wd block separately and averages */ +/* the two sets of values to calculate values at (1/4,1/2), */ +/* or (3/4, 1/2) as mentioned in sec. 8.4.2.2.1 titled */ +/* "Luma sample interpolation process". (ht,wd) can be */ +/* (4,4), (8,4), (4,8), (8,8), (16,8), (8,16) or (16,16). */ +/* */ +/* Inputs : puc_src - pointer to source */ +/* puc_dst - pointer to destination */ +/* src_strd - stride for source */ +/* dst_strd - stride for destination */ +/* ht - height of the block */ +/* wd - width of the block */ +/* pu1_tmp - pointer to temporary buffer */ +/* dydx - x and y reference offset for q-pel */ +/* calculations */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 13 02 2015 Kaushik Initial Version */ +/* Senthoor */ +/* */ +/*****************************************************************************/ +void ih264_inter_pred_luma_horz_qpel_vert_hpel_ssse3(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ht, + WORD32 wd, + UWORD8* pu1_tmp, + WORD32 dydx) +{ + WORD32 ht_temp; + WORD32 x_offset; + WORD32 off0,off1, off2, off3, off4, off5; + WORD16 *pi2_temp1,*pi2_temp2,*pi2_temp3; + + ht_temp = ht; + x_offset = dydx & 0x3; + pi2_temp1 = (WORD16 *)pu1_tmp; + pi2_temp2 = pi2_temp1; + pi2_temp3 = pi2_temp1 + (x_offset >> 1); + + pu1_src -= 2 * src_strd; + pu1_src -= 2; + pi2_temp3 += 2; + //the filter input starts from x[-2] (till x[3]) + + if(wd == 4) + { + //vertical half-pel + { + __m128i src_r0_16x8b, src_r1_16x8b, src_r2_16x8b, src_r3_16x8b, src_r4_16x8b; + __m128i src_r5_16x8b, src_r6_16x8b; + __m128i src_r0r1_16x8b, src_r2r3_16x8b, src_r4r5_16x8b; + + __m128i res_t1_8x16b, res_t2_8x16b, res_t3_8x16b; + + __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b; + + coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01); //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 + coeff2_3_16x8b = _mm_set1_epi32(0x14141414); //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 + coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB); //c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5 + //c0 = c5 = 1, c1 = c4 = -5, c2 = c3 = 20 + off0 = -((src_strd << 2) + src_strd) + 8; + off1 = -(src_strd << 2) + 8; + off2 = -((src_strd << 1) + src_strd) + 8; + off3 = -(src_strd << 1) + 8; + off4 = -src_strd + 8; + off5 = 8; + + //epilogue: Load all the pred rows except sixth and seventh row for the + //first and second row processing. + src_r0_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src)); + pu1_src = pu1_src + src_strd; + + src_r1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src)); + pu1_src = pu1_src + src_strd; + + src_r2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src)); + pu1_src = pu1_src + src_strd; + + src_r3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src)); + pu1_src = pu1_src + src_strd; + + src_r4_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src)); + pu1_src = pu1_src + src_strd; + + //Core Loop: Process all the rows. + do + { + src_r5_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src)); + + src_r0r1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r1_16x8b); + src_r2r3_16x8b = _mm_unpacklo_epi8(src_r2_16x8b, src_r3_16x8b); + src_r4r5_16x8b = _mm_unpacklo_epi8(src_r4_16x8b, src_r5_16x8b); + + res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b); + res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b); + res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b); + + res_t1_8x16b = _mm_add_epi16(res_t2_8x16b, res_t1_8x16b); + res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b); + + _mm_storeu_si128((__m128i *)(pi2_temp1), res_t1_8x16b); + + pi2_temp1[8] = pu1_src[off0] + pu1_src[off5] + - (pu1_src[off1] + pu1_src[off4]) + + ((pu1_src[off2] + pu1_src[off3] - pu1_src[off1] - pu1_src[off4]) << 2) + + ((pu1_src[off2] + pu1_src[off3]) << 4); + + pu1_src = pu1_src + src_strd; + pi2_temp1 = pi2_temp1 + 9; + + src_r6_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src)); + + src_r0r1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r2_16x8b); + src_r2r3_16x8b = _mm_unpacklo_epi8(src_r3_16x8b, src_r4_16x8b); + src_r4r5_16x8b = _mm_unpacklo_epi8(src_r5_16x8b, src_r6_16x8b); + + res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b); + res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b); + res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b); + + res_t1_8x16b = _mm_add_epi16(res_t2_8x16b, res_t1_8x16b); + res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b); + + _mm_storeu_si128((__m128i *)(pi2_temp1), res_t1_8x16b); + + pi2_temp1[8] = pu1_src[off0] + pu1_src[off5] + - (pu1_src[off1] + pu1_src[off4]) + + ((pu1_src[off2] + pu1_src[off3] - pu1_src[off1] - pu1_src[off4]) << 2) + + ((pu1_src[off2] + pu1_src[off3]) << 4); + + ht_temp -= 2; + pu1_src = pu1_src + src_strd; + pi2_temp1 = pi2_temp1 + 9; + + src_r0_16x8b = src_r2_16x8b; + src_r1_16x8b = src_r3_16x8b; + src_r2_16x8b = src_r4_16x8b; + src_r3_16x8b = src_r5_16x8b; + src_r4_16x8b = src_r6_16x8b; + } + while(ht_temp > 0); + } + + //horizontal q-pel + { + __m128i src_r0_8x16b, src_r1_8x16b, src_r2_8x16b; + __m128i src_r3_8x16b, src_r4_8x16b, src_r5_8x16b; + __m128i src_r0r1_c0_8x16b, src_r2r3_c0_8x16b, src_r4r5_c0_8x16b; + __m128i src_hpel_16x8b, src_hpel_8x16b; + + __m128i res_t1_4x32b, res_t2_4x32b, res_t3_4x32b; + __m128i res_8x16b, res_16x8b; + + __m128i coeff0_1_8x16b, coeff2_3_8x16b, coeff4_5_8x16b; + __m128i const_val512_4x32b, const_val16_8x16b; + __m128i mask_low32b; + + mask_low32b = _mm_set1_epi8(0xff); + + coeff0_1_8x16b = _mm_set1_epi32(0xFFFB0001); + coeff2_3_8x16b = _mm_set1_epi32(0x00140014); + coeff4_5_8x16b = _mm_set1_epi32(0x0001FFFB); + + mask_low32b = _mm_srli_si128(mask_low32b, 12); + + const_val512_4x32b = _mm_set1_epi32(512); + const_val16_8x16b = _mm_set1_epi16(16); + + do + { + src_r0_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp2)); + src_r1_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 1)); + src_r2_8x16b = _mm_srli_si128(src_r1_8x16b, 2); + src_r3_8x16b = _mm_srli_si128(src_r1_8x16b, 4); + src_r4_8x16b = _mm_srli_si128(src_r1_8x16b, 6); + src_r5_8x16b = _mm_srli_si128(src_r1_8x16b, 8); + + src_r0r1_c0_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b); + src_r2r3_c0_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b); + src_r4r5_c0_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b); + + res_t1_4x32b = _mm_madd_epi16(src_r0r1_c0_8x16b, coeff0_1_8x16b); + res_t2_4x32b = _mm_madd_epi16(src_r2r3_c0_8x16b, coeff2_3_8x16b); + res_t3_4x32b = _mm_madd_epi16(src_r4r5_c0_8x16b, coeff4_5_8x16b); + + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b); + res_t3_4x32b = _mm_add_epi32(const_val512_4x32b, res_t3_4x32b); + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b); + res_t1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10); + + res_8x16b = _mm_packs_epi32(res_t1_4x32b, res_t1_4x32b); + res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b); + + src_hpel_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp3)); + src_hpel_8x16b = _mm_add_epi16(src_hpel_8x16b, const_val16_8x16b); + src_hpel_8x16b = _mm_srai_epi16(src_hpel_8x16b, 5); //shifting right by 5 bits. + src_hpel_16x8b = _mm_packus_epi16(src_hpel_8x16b, src_hpel_8x16b); + + res_16x8b = _mm_avg_epu8(res_16x8b, src_hpel_16x8b); + + _mm_maskmoveu_si128(res_16x8b, mask_low32b, (char*)pu1_dst); + + ht--; + pi2_temp2 = pi2_temp2 + 4 + 5; + pi2_temp3 = pi2_temp3 + 4 + 5; + pu1_dst = pu1_dst + dst_strd; + } + while(ht > 0); + } + } + else if(wd == 8) + { + // vertical half-pel + { + __m128i src_r0_16x8b, src_r1_16x8b, src_r2_16x8b, src_r3_16x8b, src_r4_16x8b; + __m128i src_r5_16x8b, src_r6_16x8b; + __m128i src_r0r1_16x8b, src_r2r3_16x8b, src_r4r5_16x8b; + + __m128i res_t1_8x16b, res_t2_8x16b, res_t3_8x16b; + + __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b; + + coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01); //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 + coeff2_3_16x8b = _mm_set1_epi32(0x14141414); //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 + coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB); //c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5 + + //epilogue: Load all the pred rows except sixth and seventh row for the + //first and second row processing. + src_r0_16x8b = _mm_loadu_si128((__m128i *)(pu1_src)); + pu1_src = pu1_src + src_strd; + + src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src)); + pu1_src = pu1_src + src_strd; + + src_r2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src)); + pu1_src = pu1_src + src_strd; + + src_r3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src)); + pu1_src = pu1_src + src_strd; + + src_r4_16x8b = _mm_loadu_si128((__m128i *)(pu1_src)); + pu1_src = pu1_src + src_strd; + + //Core Loop: Process all the rows. + do + { + src_r5_16x8b = _mm_loadu_si128((__m128i *)(pu1_src)); + src_r6_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd)); + + src_r0r1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r1_16x8b); + src_r2r3_16x8b = _mm_unpacklo_epi8(src_r2_16x8b, src_r3_16x8b); + src_r4r5_16x8b = _mm_unpacklo_epi8(src_r4_16x8b, src_r5_16x8b); + + res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b); + res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b); + res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b); + + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b); + res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b); + + _mm_storeu_si128((__m128i *)(pi2_temp1), res_t1_8x16b); + + src_r0r1_16x8b = _mm_unpackhi_epi8(src_r0_16x8b, src_r1_16x8b); + src_r2r3_16x8b = _mm_unpackhi_epi8(src_r2_16x8b, src_r3_16x8b); + src_r4r5_16x8b = _mm_unpackhi_epi8(src_r4_16x8b, src_r5_16x8b); + + res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b); + res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b); + res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b); + + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b); + res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b); + + _mm_storeu_si128((__m128i *)(pi2_temp1 + 8), res_t1_8x16b); + + src_r0r1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r2_16x8b); + src_r2r3_16x8b = _mm_unpacklo_epi8(src_r3_16x8b, src_r4_16x8b); + src_r4r5_16x8b = _mm_unpacklo_epi8(src_r5_16x8b, src_r6_16x8b); + + res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b); + res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b); + res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b); + + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b); + res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b); + + _mm_storeu_si128((__m128i *)(pi2_temp1 + 8 + 5), res_t1_8x16b); + + src_r0r1_16x8b = _mm_unpackhi_epi8(src_r1_16x8b, src_r2_16x8b); + src_r2r3_16x8b = _mm_unpackhi_epi8(src_r3_16x8b, src_r4_16x8b); + src_r4r5_16x8b = _mm_unpackhi_epi8(src_r5_16x8b, src_r6_16x8b); + + res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b); + res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b); + res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b); + + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b); + res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b); + + _mm_storeu_si128((__m128i *)(pi2_temp1 + 8 + 5 + 8), res_t1_8x16b); + + src_r0_16x8b = src_r2_16x8b; + src_r1_16x8b = src_r3_16x8b; + src_r2_16x8b = src_r4_16x8b; + src_r3_16x8b = src_r5_16x8b; + src_r4_16x8b = src_r6_16x8b; + + ht_temp -= 2; + pu1_src = pu1_src + (src_strd << 1); + pi2_temp1 = pi2_temp1 + (13 << 1); + } + while(ht_temp > 0); + } + // horizontal q-pel + { + __m128i src_r0_8x16b, src_r1_8x16b, src_r2_8x16b, src_r3_8x16b; + __m128i src_r4_8x16b, src_r5_8x16b; + __m128i src_r0r1_c0_8x16b, src_r2r3_c0_8x16b, src_r4r5_c0_8x16b; + __m128i src_r0r1_c1_8x16b, src_r2r3_c1_8x16b, src_r4r5_c1_8x16b; + __m128i src_hpel_8x16b, src_hpel_16x8b; + + __m128i res_t0_4x32b, res_t1_4x32b, res_t2_4x32b, res_t3_4x32b; + __m128i res_8x16b, res_16x8b; + + __m128i coeff0_1_8x16b, coeff2_3_8x16b, coeff4_5_8x16b; + __m128i const_val512_4x32b, const_val16_8x16b; + + coeff0_1_8x16b = _mm_set1_epi32(0xFFFB0001); + coeff2_3_8x16b = _mm_set1_epi32(0x00140014); + coeff4_5_8x16b = _mm_set1_epi32(0x0001FFFB); + + const_val512_4x32b = _mm_set1_epi32(512); + const_val16_8x16b = _mm_set1_epi16(16); + + do + { + src_r0_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2)); + src_r1_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 1)); + src_r2_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 2)); + src_r3_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 3)); + src_r4_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 4)); + src_r5_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 5)); + + src_r0r1_c0_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b); + src_r2r3_c0_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b); + src_r4r5_c0_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b); + + src_r0r1_c1_8x16b = _mm_unpackhi_epi16(src_r0_8x16b, src_r1_8x16b); + src_r2r3_c1_8x16b = _mm_unpackhi_epi16(src_r2_8x16b, src_r3_8x16b); + src_r4r5_c1_8x16b = _mm_unpackhi_epi16(src_r4_8x16b, src_r5_8x16b); + + res_t1_4x32b = _mm_madd_epi16(src_r0r1_c0_8x16b, coeff0_1_8x16b); + res_t2_4x32b = _mm_madd_epi16(src_r2r3_c0_8x16b, coeff2_3_8x16b); + res_t3_4x32b = _mm_madd_epi16(src_r4r5_c0_8x16b, coeff4_5_8x16b); + + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b); + res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b); + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b); + + res_t0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10); + + res_t1_4x32b = _mm_madd_epi16(src_r0r1_c1_8x16b, coeff0_1_8x16b); + res_t2_4x32b = _mm_madd_epi16(src_r2r3_c1_8x16b, coeff2_3_8x16b); + res_t3_4x32b = _mm_madd_epi16(src_r4r5_c1_8x16b, coeff4_5_8x16b); + + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b); + res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b); + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b); + + res_t1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10); + + res_8x16b = _mm_packs_epi32(res_t0_4x32b, res_t1_4x32b); + res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b); + + src_hpel_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp3)); + src_hpel_8x16b = _mm_add_epi16(src_hpel_8x16b, const_val16_8x16b); + src_hpel_8x16b = _mm_srai_epi16(src_hpel_8x16b, 5); //shifting right by 5 bits. + src_hpel_16x8b = _mm_packus_epi16(src_hpel_8x16b, src_hpel_8x16b); + + res_16x8b = _mm_avg_epu8(res_16x8b, src_hpel_16x8b); + + _mm_storel_epi64((__m128i *)(pu1_dst), res_16x8b); + + ht--; + pi2_temp2 = pi2_temp2 + 8 + 5; + pi2_temp3 = pi2_temp3 + 8 + 5; + pu1_dst = pu1_dst + dst_strd; + } + while(ht > 0); + } + } + else // wd == 16 + { + // vertical half-pel + { + __m128i src_r0_16x8b, src_r1_16x8b, src_r2_16x8b, src_r3_16x8b; + __m128i src_r4_16x8b, src_r5_16x8b; + __m128i src_r0_c2_16x8b, src_r1_c2_16x8b, src_r2_c2_16x8b, src_r3_c2_16x8b; + __m128i src_r4_c2_16x8b, src_r5_c2_16x8b; + __m128i src_r0r1_16x8b, src_r2r3_16x8b, src_r4r5_16x8b; + + __m128i res_t1_8x16b, res_t2_8x16b, res_t3_8x16b; + + __m128i coeff0_1_16x8b,coeff2_3_16x8b,coeff4_5_16x8b; + + coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01); //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 + coeff2_3_16x8b = _mm_set1_epi32(0x14141414); //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 + coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB); //c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5 + + src_r0_16x8b = _mm_loadu_si128((__m128i *)(pu1_src)); + src_r0_c2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 16)); + pu1_src = pu1_src + src_strd; + src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src)); + src_r1_c2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 16)); + pu1_src = pu1_src + src_strd; + src_r2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src)); + src_r2_c2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 16)); + pu1_src = pu1_src + src_strd; + src_r3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src)); + src_r3_c2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 16)); + pu1_src = pu1_src + src_strd; + src_r4_16x8b = _mm_loadu_si128((__m128i *)(pu1_src)); + src_r4_c2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 16)); + pu1_src = pu1_src + src_strd; + + //Core Loop: Process all the rows. + do + { + src_r5_16x8b = _mm_loadu_si128((__m128i *)(pu1_src)); + src_r5_c2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 16)); + + src_r0r1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r1_16x8b); + src_r2r3_16x8b = _mm_unpacklo_epi8(src_r2_16x8b, src_r3_16x8b); + src_r4r5_16x8b = _mm_unpacklo_epi8(src_r4_16x8b, src_r5_16x8b); + + res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b); + res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b); + res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b); + + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b); + res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b); + + _mm_storeu_si128((__m128i *)(pi2_temp1), res_t1_8x16b); + + src_r0r1_16x8b = _mm_unpackhi_epi8(src_r0_16x8b, src_r1_16x8b); + src_r2r3_16x8b = _mm_unpackhi_epi8(src_r2_16x8b, src_r3_16x8b); + src_r4r5_16x8b = _mm_unpackhi_epi8(src_r4_16x8b, src_r5_16x8b); + + res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b); + res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b); + res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b); + + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b); + res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b); + + _mm_storeu_si128((__m128i *)(pi2_temp1 + 8), res_t1_8x16b); + + src_r0r1_16x8b = _mm_unpacklo_epi8(src_r0_c2_16x8b, src_r1_c2_16x8b); + src_r2r3_16x8b = _mm_unpacklo_epi8(src_r2_c2_16x8b, src_r3_c2_16x8b); + src_r4r5_16x8b = _mm_unpacklo_epi8(src_r4_c2_16x8b, src_r5_c2_16x8b); + + res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b); + res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b); + res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b); + + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b); + res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b); + + _mm_storeu_si128((__m128i *)(pi2_temp1 + 16), res_t1_8x16b); + + src_r0_16x8b = src_r1_16x8b; + src_r1_16x8b = src_r2_16x8b; + src_r2_16x8b = src_r3_16x8b; + src_r3_16x8b = src_r4_16x8b; + src_r4_16x8b = src_r5_16x8b; + + src_r0_c2_16x8b = src_r1_c2_16x8b; + src_r1_c2_16x8b = src_r2_c2_16x8b; + src_r2_c2_16x8b = src_r3_c2_16x8b; + src_r3_c2_16x8b = src_r4_c2_16x8b; + src_r4_c2_16x8b = src_r5_c2_16x8b; + + ht_temp--; + pu1_src = pu1_src + src_strd; + pi2_temp1 = pi2_temp1 + 16 + 5; + } + while(ht_temp > 0); + } + // horizontal q-pel + { + __m128i src_r0_8x16b, src_r1_8x16b, src_r2_8x16b, src_r3_8x16b; + __m128i src_r4_8x16b, src_r5_8x16b; + __m128i src_r0r1_8x16b, src_r2r3_8x16b, src_r4r5_8x16b; + __m128i src_hpel1_8x16b, src_hpel2_8x16b, src_hpel_16x8b; + + __m128i res_t0_4x32b, res_t1_4x32b, res_t2_4x32b, res_t3_4x32b; + __m128i res_c0_8x16b, res_c1_8x16b, res_16x8b; + + __m128i coeff0_1_8x16b, coeff2_3_8x16b, coeff4_5_8x16b; + __m128i const_val512_4x32b, const_val16_8x16b; + + coeff0_1_8x16b = _mm_set1_epi32(0xFFFB0001); + coeff2_3_8x16b = _mm_set1_epi32(0x00140014); + coeff4_5_8x16b = _mm_set1_epi32(0x0001FFFB); + + const_val512_4x32b = _mm_set1_epi32(512); + const_val16_8x16b = _mm_set1_epi16(16); + + do + { + src_r0_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2)); + src_r1_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 1)); + src_r2_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 2)); + src_r3_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 3)); + src_r4_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 4)); + src_r5_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 5)); + + src_r0r1_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b); + src_r2r3_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b); + src_r4r5_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b); + + res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b); + res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b); + res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b); + + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b); + res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b); + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b); + res_t0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10); + + src_r0r1_8x16b = _mm_unpackhi_epi16(src_r0_8x16b, src_r1_8x16b); + src_r2r3_8x16b = _mm_unpackhi_epi16(src_r2_8x16b, src_r3_8x16b); + src_r4r5_8x16b = _mm_unpackhi_epi16(src_r4_8x16b, src_r5_8x16b); + + res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b); + res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b); + res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b); + + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b); + res_t3_4x32b = _mm_add_epi32(const_val512_4x32b, res_t3_4x32b); + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b); + res_t1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10); + + res_c0_8x16b = _mm_packs_epi32(res_t0_4x32b, res_t1_4x32b); + + src_r0_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 8)); + src_r1_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 8 + 1)); + src_r2_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 8 + 2)); + src_r3_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 8 + 3)); + src_r4_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 8 + 4)); + src_r5_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 8 + 5)); + + src_r0r1_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b); + src_r2r3_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b); + src_r4r5_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b); + + res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b); + res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b); + res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b); + + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b); + res_t3_4x32b = _mm_add_epi32(const_val512_4x32b, res_t3_4x32b); + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b); + res_t0_4x32b = _mm_srai_epi32(res_t1_4x32b ,10); + + src_r0r1_8x16b = _mm_unpackhi_epi16(src_r0_8x16b, src_r1_8x16b); + src_r2r3_8x16b = _mm_unpackhi_epi16(src_r2_8x16b, src_r3_8x16b); + src_r4r5_8x16b = _mm_unpackhi_epi16(src_r4_8x16b, src_r5_8x16b); + + res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b); + res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b); + res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b); + + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b); + res_t3_4x32b = _mm_add_epi32(const_val512_4x32b, res_t3_4x32b); + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b); + res_t1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10); + + res_c1_8x16b = _mm_packs_epi32(res_t0_4x32b, res_t1_4x32b); + res_16x8b = _mm_packus_epi16(res_c0_8x16b, res_c1_8x16b); + + src_hpel1_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp3)); + src_hpel1_8x16b = _mm_add_epi16(src_hpel1_8x16b, const_val16_8x16b); + src_hpel1_8x16b = _mm_srai_epi16(src_hpel1_8x16b, 5); //shifting right by 5 bits. + + src_hpel2_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp3 + 8)); + src_hpel2_8x16b = _mm_add_epi16(src_hpel2_8x16b, const_val16_8x16b); + src_hpel2_8x16b = _mm_srai_epi16(src_hpel2_8x16b, 5); //shifting right by 5 bits. + + src_hpel_16x8b = _mm_packus_epi16(src_hpel1_8x16b, src_hpel2_8x16b); + res_16x8b = _mm_avg_epu8(res_16x8b, src_hpel_16x8b); + + _mm_storeu_si128((__m128i *)(pu1_dst), res_16x8b); + + ht--; + pi2_temp2 = pi2_temp2 + 16 + 5; + pi2_temp3 = pi2_temp3 + 16 + 5; + pu1_dst = pu1_dst + dst_strd; + } + while(ht > 0); + } + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_inter_pred_luma_horz_hpel_vert_qpel_ssse3 */ +/* */ +/* Description : This function implements a six-tap filter vertically and */ +/* horizontally on ht x wd block separately and averages */ +/* the two sets of values to calculate values at (1/2,1/4), */ +/* or (1/2, 3/4) as mentioned in sec. 8.4.2.2.1 titled */ +/* "Luma sample interpolation process". (ht,wd) can be */ +/* (4,4), (8,4), (4,8), (8,8), (16,8), (8,16) or (16,16). */ +/* */ +/* Inputs : puc_src - pointer to source */ +/* puc_dst - pointer to destination */ +/* src_strd - stride for source */ +/* dst_strd - stride for destination */ +/* ht - height of the block */ +/* wd - width of the block */ +/* pu1_tmp - pointer to temporary buffer */ +/* dydx - x and y reference offset for q-pel */ +/* calculations */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 13 02 2015 Kaushik Initial Version */ +/* Senthoor */ +/* */ +/*****************************************************************************/ +void ih264_inter_pred_luma_horz_hpel_vert_qpel_ssse3(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ht, + WORD32 wd, + UWORD8* pu1_tmp, + WORD32 dydx) +{ + WORD32 ht_temp; + WORD32 y_offset; + WORD16 *pi2_temp1,*pi2_temp2,*pi2_temp3; + + y_offset = (dydx & 0xf) >> 2; + pi2_temp1 = (WORD16 *)pu1_tmp; + pi2_temp2 = pi2_temp1; + pi2_temp3 = pi2_temp1 + (y_offset >> 1) * wd; + + ht_temp = ht + 5; + pu1_src -= src_strd << 1; + pu1_src -= 2; + pi2_temp3 += wd << 1; + //the filter input starts from x[-2] (till x[3]) + + if(wd == 4) + { + // horizontal half-pel + { + __m128i src_r0_16x8b, src_r1_16x8b, src_r0r1_t1_16x8b; + __m128i src_r0_sht_16x8b, src_r1_sht_16x8b; + __m128i res_r0r1_t1_8x16b, res_r0r1_t2_8x16b, res_r0r1_t3_8x16b; + __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b; + + coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01); //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 + coeff2_3_16x8b = _mm_set1_epi32(0x14141414); //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 + coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB); //c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5 + + //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9..... + //Row1 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9..... + + do + { + src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 + src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd)); //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 + + src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1); //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0 + src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1); //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0 + + src_r0_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 + src_r1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 + + src_r0r1_t1_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 b0 b1 b1 b2 b2 b3 b3 b4 + res_r0r1_t1_8x16b = _mm_maddubs_epi16(src_r0r1_t1_16x8b, coeff0_1_16x8b); //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1 + //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1 + + src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 4); //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 0 0 0 0 + src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 4); //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 0 0 0 0 + + src_r0r1_t1_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b); //a2 a3 a3 a4 a4 a5 a5 a6 b2 b3 b3 b4 b4 b5 b5 b6 + res_r0r1_t2_8x16b = _mm_maddubs_epi16(src_r0r1_t1_16x8b, coeff2_3_16x8b); //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3 + //b2*c2+b3*c3 b3*c2+b4*c3 b4*c2+b5*c3 b5*c2+b6*c3 + + src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 4); //a4 a5 a5 a6 a6 a7 a7 a8 0 0 0 0 0 0 0 0 + src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 4); //b4 b5 b5 b6 b6 b7 b7 b8 0 0 0 0 0 0 0 0 + + src_r0r1_t1_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b); //a4 a5 a5 a6 a6 a7 a7 a8 b4 b5 b5 b6 b6 b7 b7 b8 + res_r0r1_t3_8x16b = _mm_maddubs_epi16(src_r0r1_t1_16x8b, coeff4_5_16x8b); //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5 + //b4*c4+b5*c5 b5*c4+b6*c5 b4*c6+b7*c5 b7*c4+b8*c5 + + res_r0r1_t1_8x16b = _mm_add_epi16(res_r0r1_t1_8x16b, res_r0r1_t2_8x16b); + res_r0r1_t1_8x16b = _mm_add_epi16(res_r0r1_t1_8x16b, res_r0r1_t3_8x16b); + + + _mm_storeu_si128((__m128i *)(pi2_temp1), res_r0r1_t1_8x16b); + + ht_temp -= 2; + pu1_src = pu1_src + (src_strd << 1); + pi2_temp1 = pi2_temp1 + (4 << 1); + } + while(ht_temp > 0); + } + // vertical q-pel + { + __m128i src_r0_8x16b, src_r1_8x16b, src_r2_8x16b, src_r3_8x16b; + __m128i src_r4_8x16b, src_r5_8x16b, src_r6_8x16b; + __m128i src_r0r1_c0_8x16b, src_r2r3_c0_8x16b, src_r4r5_c0_8x16b; + __m128i src_hpel_16x8b, src_hpel_8x16b; + + __m128i res_t0_4x32b, res_t1_4x32b, res_t2_4x32b, res_t3_4x32b; + __m128i res_8x16b, res_16x8b; + + __m128i coeff0_1_8x16b, coeff2_3_8x16b, coeff4_5_8x16b; + __m128i const_val512_4x32b, const_val16_8x16b; + __m128i mask_low32b; + + mask_low32b = _mm_set1_epi8(0xff); + const_val512_4x32b = _mm_set1_epi32(512); + const_val16_8x16b = _mm_set1_epi16(16); + mask_low32b = _mm_srli_si128(mask_low32b, 12); + + coeff0_1_8x16b = _mm_set1_epi32(0xFFFB0001); + coeff2_3_8x16b = _mm_set1_epi32(0x00140014); + coeff4_5_8x16b = _mm_set1_epi32(0x0001FFFB); + + src_r0_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp2)); + src_r1_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp2 + 4)); + src_r2_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp2 + 8)); + src_r3_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp2 + 12)); + src_r4_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp2 + 16)); + pi2_temp2 += 20; + + do + { + src_r5_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp2)); + src_r6_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp2 + 4)); + + src_r0r1_c0_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b); + src_r2r3_c0_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b); + src_r4r5_c0_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b); + + res_t1_4x32b = _mm_madd_epi16(src_r0r1_c0_8x16b, coeff0_1_8x16b); + res_t2_4x32b = _mm_madd_epi16(src_r2r3_c0_8x16b, coeff2_3_8x16b); + res_t3_4x32b = _mm_madd_epi16(src_r4r5_c0_8x16b, coeff4_5_8x16b); + + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b); + res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b); + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b); + res_t0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10); + + src_r0r1_c0_8x16b = _mm_unpacklo_epi16(src_r1_8x16b, src_r2_8x16b); + src_r2r3_c0_8x16b = _mm_unpacklo_epi16(src_r3_8x16b, src_r4_8x16b); + src_r4r5_c0_8x16b = _mm_unpacklo_epi16(src_r5_8x16b, src_r6_8x16b); + + res_t1_4x32b = _mm_madd_epi16(src_r0r1_c0_8x16b, coeff0_1_8x16b); + res_t2_4x32b = _mm_madd_epi16(src_r2r3_c0_8x16b, coeff2_3_8x16b); + res_t3_4x32b = _mm_madd_epi16(src_r4r5_c0_8x16b, coeff4_5_8x16b); + + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b); + res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b); + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b); + res_t1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10); + + res_8x16b = _mm_packs_epi32(res_t0_4x32b, res_t1_4x32b); + res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b); + + src_hpel_8x16b = _mm_loadu_si128((__m128i *)pi2_temp3); + src_hpel_8x16b = _mm_add_epi16(src_hpel_8x16b, const_val16_8x16b); + src_hpel_8x16b = _mm_srai_epi16(src_hpel_8x16b, 5); //shifting right by 5 bits. + src_hpel_16x8b = _mm_packus_epi16(src_hpel_8x16b, src_hpel_8x16b); + + res_16x8b = _mm_avg_epu8(res_16x8b, src_hpel_16x8b); + + _mm_maskmoveu_si128(res_16x8b, mask_low32b, (char *)(pu1_dst)); + res_16x8b = _mm_srli_si128(res_16x8b, 4); + _mm_maskmoveu_si128(res_16x8b, mask_low32b, (char *)(pu1_dst + dst_strd)); + + src_r0_8x16b = src_r2_8x16b; + src_r1_8x16b = src_r3_8x16b; + src_r2_8x16b = src_r4_8x16b; + src_r3_8x16b = src_r5_8x16b; + src_r4_8x16b = src_r6_8x16b; + + ht -= 2; + pi2_temp2 = pi2_temp2 + (4 << 1); + pi2_temp3 = pi2_temp3 + (4 << 1); + pu1_dst = pu1_dst + (dst_strd << 1); + } + while(ht > 0); + } + } + else if(wd == 8) + { + // horizontal half-pel + { + __m128i src_r0_16x8b, src_r1_16x8b, src_r0_sht_16x8b, src_r1_sht_16x8b; + __m128i src_r0_t1_16x8b, src_r1_t1_16x8b; + + __m128i res_r0_t1_8x16b, res_r0_t2_8x16b, res_r0_t3_8x16b; + __m128i res_r1_t1_8x16b, res_r1_t2_8x16b, res_r1_t3_8x16b; + + __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b; + + coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01); //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 + coeff2_3_16x8b = _mm_set1_epi32(0x14141414); //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 + coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB); //c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5 + + //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9..... + //Row1 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9..... + + do + { + src_r0_16x8b = _mm_loadu_si128((__m128i *)(pu1_src)); //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 + src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd)); //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 + + src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1); //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0 + src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1); //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0 + + src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 + src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 + + res_r0_t1_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff0_1_16x8b); //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1 + //a4*c0+a5*c1 a5*c0+a6*c1 a6*c0+a7*c1 a7*c0+a8*c1 + res_r1_t1_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff0_1_16x8b); //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1 + //b4*c0+b5*c1 b5*c0+b6*c1 b6*c0+b7*c1 b7*c0+b8*c1 + + src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a2 a3 a4 a5 a6 a7 a8 a9....a15 0 0 + src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b2 b3 b4 b5 b6 b7 b8 b9....b15 0 0 + + src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a3 a4 a5 a6 a7 a8 a9....a15 0 0 0 + src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b3 b4 b5 b6 b7 b8 b9....b15 0 0 0 + + src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 + src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 a8 a9 a9 a10 + + res_r0_t2_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff2_3_16x8b); //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3 + //a6*c2+a7*c3 a7*c2+a8*c3 a8*c2+a9*c3 a9*c2+a10*c3 + res_r1_t2_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff2_3_16x8b); //b2*c2+b3*c3 b3*c2+b4*c3 b2*c4+b5*c3 b5*c2+b6*c3 + //b6*c2+b7*c3 b7*c2+b8*c3 b8*c2+b9*c3 b9*c2+b10*c3 + + src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a4 a5 a6 a7 a8 a9....a15 0 0 0 0 + src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b4 b5 b6 b7 b8 b9....b15 0 0 0 0 + + src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a5 a6 a7 a8 a9....a15 0 0 0 0 0 + src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b5 b6 b7 b8 b9....b15 0 0 0 0 0 + + src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 a10 a11 a11 a12 + src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b4 b5 b5 b6 b6 b7 b7 b8 b8 b9 b9 b10 b10 b11 b11 b12 + + res_r0_t3_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff4_5_16x8b); //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5 + //a8*c4+a9*c5 a9*c4+a10*c5 a10*c4+a11*c5 a11*c4+a12*c5 + res_r1_t3_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff4_5_16x8b); //b4*c4+b5*c5 b5*c4+b6*c5 b6*c4+b7*c5 b7*c4+b8*c5 + //b8*c4+b9*c5 b9*c4+b10*c5 b10*c4+b11*c5 b11*c4+b12*c5 + res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t2_8x16b); + res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t3_8x16b); + + res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t2_8x16b); + res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t3_8x16b); + + _mm_storeu_si128((__m128i *)(pi2_temp1), res_r0_t1_8x16b); + _mm_storeu_si128((__m128i *)(pi2_temp1 + 8), res_r1_t1_8x16b); + + ht_temp -= 2; + pu1_src = pu1_src + (src_strd << 1); + pi2_temp1 = pi2_temp1 + (8 << 1); + } + while(ht_temp > 0); + } + // vertical q-pel + { + __m128i src_r0_8x16b, src_r1_8x16b, src_r2_8x16b, src_r3_8x16b; + __m128i src_r4_8x16b, src_r5_8x16b, src_r6_8x16b; + __m128i src_r0r1_8x16b, src_r2r3_8x16b, src_r4r5_8x16b; + __m128i src_hpel_8x16b, src_hpel_16x8b; + + __m128i res_t0_4x32b, res_t1_4x32b, res_t2_4x32b, res_t3_4x32b; + __m128i res_8x16b, res_16x8b; + + __m128i coeff0_1_8x16b, coeff2_3_8x16b, coeff4_5_8x16b; + __m128i const_val512_4x32b, const_val16_8x16b; + + coeff0_1_8x16b = _mm_set1_epi32(0xFFFB0001); + coeff2_3_8x16b = _mm_set1_epi32(0x00140014); + coeff4_5_8x16b = _mm_set1_epi32(0x0001FFFB); + + const_val512_4x32b = _mm_set1_epi32(512); + const_val16_8x16b = _mm_set1_epi16(16); + + src_r0_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2)); + src_r1_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 8)); + src_r2_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 16)); + src_r3_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 24)); + src_r4_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 32)); + pi2_temp2 += 40; + + do + { + src_r5_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2)); + src_r6_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 8)); + + src_r0r1_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b); + src_r2r3_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b); + src_r4r5_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b); + + res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b); + res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b); + res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b); + + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b); + res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b); + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b); + res_t0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10); + + src_r0r1_8x16b = _mm_unpackhi_epi16(src_r0_8x16b, src_r1_8x16b); + src_r2r3_8x16b = _mm_unpackhi_epi16(src_r2_8x16b, src_r3_8x16b); + src_r4r5_8x16b = _mm_unpackhi_epi16(src_r4_8x16b, src_r5_8x16b); + + res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b); + res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b); + res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b); + + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b); + res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b); + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b); + res_t1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10); + + res_8x16b = _mm_packs_epi32(res_t0_4x32b, res_t1_4x32b); + res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b); + + src_hpel_8x16b = _mm_loadu_si128((__m128i *)pi2_temp3); + src_hpel_8x16b = _mm_add_epi16(const_val16_8x16b, src_hpel_8x16b); + src_hpel_8x16b = _mm_srai_epi16(src_hpel_8x16b, 5); //shifting right by 5 bits. + src_hpel_16x8b = _mm_packus_epi16(src_hpel_8x16b, src_hpel_8x16b); + + res_16x8b = _mm_avg_epu8(res_16x8b, src_hpel_16x8b); + + _mm_storel_epi64((__m128i *)(pu1_dst), res_16x8b); + + src_r0r1_8x16b = _mm_unpacklo_epi16(src_r1_8x16b, src_r2_8x16b); + src_r2r3_8x16b = _mm_unpacklo_epi16(src_r3_8x16b, src_r4_8x16b); + src_r4r5_8x16b = _mm_unpacklo_epi16(src_r5_8x16b, src_r6_8x16b); + + res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b); + res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b); + res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b); + + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b); + res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b); + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b); + res_t0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10); + + src_r0r1_8x16b = _mm_unpackhi_epi16(src_r1_8x16b, src_r2_8x16b); + src_r2r3_8x16b = _mm_unpackhi_epi16(src_r3_8x16b, src_r4_8x16b); + src_r4r5_8x16b = _mm_unpackhi_epi16(src_r5_8x16b, src_r6_8x16b); + + res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b); + res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b); + res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b); + + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b); + res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b); + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b); + res_t1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10); + + res_8x16b = _mm_packs_epi32(res_t0_4x32b, res_t1_4x32b); + res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b); + + src_hpel_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp3 + 8)); + src_hpel_8x16b = _mm_add_epi16(const_val16_8x16b, src_hpel_8x16b); + src_hpel_8x16b = _mm_srai_epi16(src_hpel_8x16b, 5); //shifting right by 5 bits. + src_hpel_16x8b = _mm_packus_epi16(src_hpel_8x16b, src_hpel_8x16b); + + res_16x8b = _mm_avg_epu8(res_16x8b, src_hpel_16x8b); + + _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), res_16x8b); + + src_r0_8x16b = src_r2_8x16b; + src_r1_8x16b = src_r3_8x16b; + src_r2_8x16b = src_r4_8x16b; + src_r3_8x16b = src_r5_8x16b; + src_r4_8x16b = src_r6_8x16b; + + ht -= 2; + pi2_temp2 = pi2_temp2 + (8 << 1); + pi2_temp3 = pi2_temp3 + (8 << 1); + pu1_dst = pu1_dst + (dst_strd << 1); + } + while(ht > 0); + } + } + else // wd == 16 + { + UWORD8 *pu1_dst1; + WORD16 *pi2_temp4,*pi2_temp5; + + pu1_dst1 = pu1_dst + 8; + pi2_temp4 = pi2_temp2 + 8; + pi2_temp5 = pi2_temp3 + 8; + + // horizontal half-pel + { + __m128i src_r0_16x8b, src_r1_16x8b, src_r0_sht_16x8b, src_r1_sht_16x8b; + __m128i src_r0_t1_16x8b, src_r1_t1_16x8b; + + __m128i res_r0_t1_8x16b, res_r0_t2_8x16b, res_r0_t3_8x16b; + __m128i res_r1_t1_8x16b, res_r1_t2_8x16b, res_r1_t3_8x16b; + + __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b; + + coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01); //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 + coeff2_3_16x8b = _mm_set1_epi32(0x14141414); //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 + coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB); //c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5 + + //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9..... + //Row0 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9..... + //b0 is same a8. Similarly other bn pixels are same as a(n+8) pixels. + + do + { + src_r0_16x8b = _mm_loadu_si128((__m128i *)(pu1_src)); //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 + src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 8)); //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 + + src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1); //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0 + src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1); //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0 + + src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 + src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 + + res_r0_t1_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff0_1_16x8b); //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1 + //a4*c0+a5*c1 a5*c0+a6*c1 a6*c0+a7*c1 a7*c0+a8*c1 + res_r1_t1_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff0_1_16x8b); //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1 + //b4*c0+b5*c1 b5*c0+b6*c1 b6*c0+b7*c1 b7*c0+b8*c1 + + src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a2 a3 a4 a5 a6 a7 a8 a9....a15 0 0 + src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b2 b3 b4 b5 b6 b7 b8 b9....b15 0 0 + + src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a3 a4 a5 a6 a7 a8 a9....a15 0 0 0 + src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b3 b4 b5 b6 b7 b8 b9....b15 0 0 0 + + src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 + src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 a8 a9 a9 a10 + + res_r0_t2_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff2_3_16x8b); //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3 + //a6*c2+a7*c3 a7*c2+a8*c3 a8*c2+a9*c3 a9*c2+a10*c3 + res_r1_t2_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff2_3_16x8b); //b2*c2+b3*c3 b3*c2+b4*c3 b2*c4+b5*c3 b5*c2+b6*c3 + //b6*c2+b7*c3 b7*c2+b8*c3 b8*c2+b9*c3 b9*c2+b10*c3 + + src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a4 a5 a6 a7 a8 a9....a15 0 0 0 0 + src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b4 b5 b6 b7 b8 b9....b15 0 0 0 0 + + src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a5 a6 a7 a8 a9....a15 0 0 0 0 0 + src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b5 b6 b7 b8 b9....b15 0 0 0 0 0 + + src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 a10 a11 a11 a12 + src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b4 b5 b5 b6 b6 b7 b7 b8 b8 b9 b9 b10 b10 b11 b11 b12 + + res_r0_t3_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff4_5_16x8b); //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5 + //a8*c4+a9*c5 a9*c4+a10*c5 a10*c4+a11*c5 a11*c4+a12*c5 + res_r1_t3_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff4_5_16x8b); //b4*c4+b5*c5 b5*c4+b6*c5 b6*c4+b7*c5 b7*c4+b8*c5 + //b8*c4+b9*c5 b9*c4+b10*c5 b10*c4+b11*c5 b11*c4+b12*c5 + res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t2_8x16b); + res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t3_8x16b); + + res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t2_8x16b); + res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t3_8x16b); + + _mm_storeu_si128((__m128i *)(pi2_temp1), res_r0_t1_8x16b); + _mm_storeu_si128((__m128i *)(pi2_temp1 + 8), res_r1_t1_8x16b); + + ht_temp--; + pu1_src = pu1_src + src_strd; + pi2_temp1 = pi2_temp1 + 16; + } + while(ht_temp > 0); + } + // vertical q-pel + { + __m128i src_r0_8x16b, src_r1_8x16b, src_r2_8x16b, src_r3_8x16b, src_r4_8x16b; + __m128i src_r5_8x16b, src_r6_8x16b; + __m128i src_r0r1_8x16b, src_r2r3_8x16b, src_r4r5_8x16b; + __m128i src_hpel_8x16b, src_hpel_16x8b; + + __m128i res_t0_4x32b, res_t1_4x32b, res_t2_4x32b, res_t3_4x32b; + __m128i res_8x16b, res_16x8b; + + __m128i coeff0_1_8x16b, coeff2_3_8x16b, coeff4_5_8x16b; + __m128i const_val512_4x32b, const_val16_8x16b; + + coeff0_1_8x16b = _mm_set1_epi32(0xFFFB0001); + coeff2_3_8x16b = _mm_set1_epi32(0x00140014); + coeff4_5_8x16b = _mm_set1_epi32(0x0001FFFB); + + const_val512_4x32b = _mm_set1_epi32(512); + const_val16_8x16b = _mm_set1_epi16(16); + + /**********************************************************/ + /* Do first height x 8 block */ + /**********************************************************/ + src_r0_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2)); + src_r1_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 16)); + src_r2_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 32)); + src_r3_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 48)); + src_r4_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 64)); + pi2_temp2 += 80; + + ht_temp = ht; + do + { + src_r5_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2)); + src_r6_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 16)); + + src_r0r1_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b); + src_r2r3_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b); + src_r4r5_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b); + + res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b); + res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b); + res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b); + + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b); + res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b); + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b); + res_t0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10); + + src_r0r1_8x16b = _mm_unpackhi_epi16(src_r0_8x16b, src_r1_8x16b); + src_r2r3_8x16b = _mm_unpackhi_epi16(src_r2_8x16b, src_r3_8x16b); + src_r4r5_8x16b = _mm_unpackhi_epi16(src_r4_8x16b, src_r5_8x16b); + + res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b); + res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b); + res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b); + + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b); + res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b); + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b); + res_t1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10); + + res_8x16b = _mm_packs_epi32(res_t0_4x32b, res_t1_4x32b); + res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b); + + src_hpel_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp3)); + src_hpel_8x16b = _mm_add_epi16(src_hpel_8x16b, const_val16_8x16b); + src_hpel_8x16b = _mm_srai_epi16(src_hpel_8x16b, 5); //shifting right by 5 bits. + src_hpel_16x8b = _mm_packus_epi16(src_hpel_8x16b, src_hpel_8x16b); + + res_16x8b = _mm_avg_epu8(res_16x8b, src_hpel_16x8b); + _mm_storel_epi64((__m128i *)(pu1_dst), res_16x8b); + + src_r0r1_8x16b = _mm_unpacklo_epi16(src_r1_8x16b, src_r2_8x16b); + src_r2r3_8x16b = _mm_unpacklo_epi16(src_r3_8x16b, src_r4_8x16b); + src_r4r5_8x16b = _mm_unpacklo_epi16(src_r5_8x16b, src_r6_8x16b); + + res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b); + res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b); + res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b); + + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b); + res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b); + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b); + res_t0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10); + + src_r0r1_8x16b = _mm_unpackhi_epi16(src_r1_8x16b, src_r2_8x16b); + src_r2r3_8x16b = _mm_unpackhi_epi16(src_r3_8x16b, src_r4_8x16b); + src_r4r5_8x16b = _mm_unpackhi_epi16(src_r5_8x16b, src_r6_8x16b); + + res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b); + res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b); + res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b); + + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b); + res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b); + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b); + res_t1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10); + + res_8x16b = _mm_packs_epi32(res_t0_4x32b, res_t1_4x32b); + res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b); + + src_hpel_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp3 + 16)); + src_hpel_8x16b = _mm_add_epi16(src_hpel_8x16b, const_val16_8x16b); + src_hpel_8x16b = _mm_srai_epi16(src_hpel_8x16b, 5); //shifting right by 5 bits. + src_hpel_16x8b = _mm_packus_epi16(src_hpel_8x16b, src_hpel_8x16b); + + res_16x8b = _mm_avg_epu8(res_16x8b, src_hpel_16x8b); + _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), res_16x8b); + + src_r0_8x16b = src_r2_8x16b; + src_r1_8x16b = src_r3_8x16b; + src_r2_8x16b = src_r4_8x16b; + src_r3_8x16b = src_r5_8x16b; + src_r4_8x16b = src_r6_8x16b; + + ht_temp -= 2; + pi2_temp3 = pi2_temp3 + (16 << 1); + pi2_temp2 = pi2_temp2 + (16 << 1); + pu1_dst = pu1_dst + (dst_strd << 1); + } + while(ht_temp > 0); + + /**********************************************************/ + /* Do second height * 8 block */ + /**********************************************************/ + src_r0_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp4)); + src_r1_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp4 + 16)); + src_r2_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp4 + 32)); + src_r3_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp4 + 48)); + src_r4_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp4 + 64)); + pi2_temp4 += 80; + + do + { + src_r5_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp4)); + src_r6_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp4 + 16)); + + src_r0r1_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b); + src_r2r3_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b); + src_r4r5_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b); + + res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b); + res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b); + res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b); + + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b); + res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b); + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b); + res_t0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10); + + src_r0r1_8x16b = _mm_unpackhi_epi16(src_r0_8x16b, src_r1_8x16b); + src_r2r3_8x16b = _mm_unpackhi_epi16(src_r2_8x16b, src_r3_8x16b); + src_r4r5_8x16b = _mm_unpackhi_epi16(src_r4_8x16b, src_r5_8x16b); + + res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b); + res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b); + res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b); + + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b); + res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b); + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b); + res_t1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10); + + res_8x16b = _mm_packs_epi32(res_t0_4x32b, res_t1_4x32b); + res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b); + + src_hpel_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp5)); + src_hpel_8x16b = _mm_add_epi16(src_hpel_8x16b, const_val16_8x16b); + src_hpel_8x16b = _mm_srai_epi16(src_hpel_8x16b, 5); //shifting right by 5 bits. + src_hpel_16x8b = _mm_packus_epi16(src_hpel_8x16b, src_hpel_8x16b); + + res_16x8b = _mm_avg_epu8(res_16x8b, src_hpel_16x8b); + _mm_storel_epi64((__m128i *)(pu1_dst1), res_16x8b); + + src_r0r1_8x16b = _mm_unpacklo_epi16(src_r1_8x16b, src_r2_8x16b); + src_r2r3_8x16b = _mm_unpacklo_epi16(src_r3_8x16b, src_r4_8x16b); + src_r4r5_8x16b = _mm_unpacklo_epi16(src_r5_8x16b, src_r6_8x16b); + + res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b); + res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b); + res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b); + + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b); + res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b); + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b); + res_t0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10); + + src_r0r1_8x16b = _mm_unpackhi_epi16(src_r1_8x16b, src_r2_8x16b); + src_r2r3_8x16b = _mm_unpackhi_epi16(src_r3_8x16b, src_r4_8x16b); + src_r4r5_8x16b = _mm_unpackhi_epi16(src_r5_8x16b, src_r6_8x16b); + + res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b); + res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b); + res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b); + + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b); + res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b); + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b); + res_t1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10); + + res_8x16b = _mm_packs_epi32(res_t0_4x32b, res_t1_4x32b); + res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b); + + src_hpel_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp5 + 16)); + src_hpel_8x16b = _mm_add_epi16(src_hpel_8x16b, const_val16_8x16b); + src_hpel_8x16b = _mm_srai_epi16(src_hpel_8x16b, 5); //shifting right by 5 bits. + src_hpel_16x8b = _mm_packus_epi16(src_hpel_8x16b, src_hpel_8x16b); + + res_16x8b = _mm_avg_epu8(res_16x8b, src_hpel_16x8b); + _mm_storel_epi64((__m128i *)(pu1_dst1 + dst_strd), res_16x8b); + + src_r0_8x16b = src_r2_8x16b; + src_r1_8x16b = src_r3_8x16b; + src_r2_8x16b = src_r4_8x16b; + src_r3_8x16b = src_r5_8x16b; + src_r4_8x16b = src_r6_8x16b; + + ht -= 2; + pi2_temp5 = pi2_temp5 + (16 << 1); + pi2_temp4 = pi2_temp4 + (16 << 1); + pu1_dst1 = pu1_dst1 + (dst_strd << 1); + } + while(ht > 0); + } + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_inter_pred_chroma_ssse3 */ +/* */ +/* Description : This function implements a four-tap 2D filter as */ +/* mentioned in sec. 8.4.2.2.2 titled "Chroma sample */ +/* "interpolation process". (ht,wd) can be (2,2), (4,2), */ +/* (2,4), (4,4), (8,4), (4,8) or (8,8). */ +/* */ +/* Inputs : puc_src - pointer to source */ +/* puc_dst - pointer to destination */ +/* src_strd - stride for source */ +/* dst_strd - stride for destination */ +/* dx - x position of destination value */ +/* dy - y position of destination value */ +/* ht - height of the block */ +/* wd - width of the block */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 13 02 2015 Kaushik Initial Version */ +/* Senthoor */ +/* */ +/*****************************************************************************/ +void ih264_inter_pred_chroma_ssse3(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 dx, + WORD32 dy, + WORD32 ht, + WORD32 wd) +{ + WORD32 i, j, A, B, C, D; + + i = 8 - dx; + j = 8 - dy; + + A = i * j; + B = dx * j; + C = i * dy; + D = dx * dy; + + if(wd == 2) + { + WORD32 tmp1, tmp2, tmp3, tmp4; + + do + { + //U + tmp1 = A * pu1_src[0] + B * pu1_src[2] + C * pu1_src[src_strd] + D * pu1_src[src_strd + 2]; + tmp2 = A * pu1_src[2] + B * pu1_src[4] + C * pu1_src[src_strd + 2] + D * pu1_src[src_strd + 4]; + //V + tmp3 = A * pu1_src[1] + B * pu1_src[3] + C * pu1_src[src_strd + 1] + D * pu1_src[src_strd + 3]; + tmp4 = A * pu1_src[3] + B * pu1_src[5] + C * pu1_src[src_strd + 3] + D * pu1_src[src_strd + 5]; + + tmp1 = (tmp1 + 32) >> 6; + tmp2 = (tmp2 + 32) >> 6; + tmp3 = (tmp3 + 32) >> 6; + tmp4 = (tmp4 + 32) >> 6; + + pu1_dst[0] = CLIP_U8(tmp1); + pu1_dst[2] = CLIP_U8(tmp2); + pu1_dst[1] = CLIP_U8(tmp3); + pu1_dst[3] = CLIP_U8(tmp4); + + pu1_src += src_strd; + pu1_dst += dst_strd; + + tmp1 = A * pu1_src[0] + B * pu1_src[2] + C * pu1_src[src_strd] + D * pu1_src[src_strd + 2]; + tmp2 = A * pu1_src[2] + B * pu1_src[4] + C * pu1_src[src_strd + 2] + D * pu1_src[src_strd + 4]; + tmp3 = A * pu1_src[1] + B * pu1_src[3] + C * pu1_src[src_strd + 1] + D * pu1_src[src_strd + 3]; + tmp4 = A * pu1_src[3] + B * pu1_src[5] + C * pu1_src[src_strd + 3] + D * pu1_src[src_strd + 5]; + + tmp1 = (tmp1 + 32) >> 6; + tmp2 = (tmp2 + 32) >> 6; + tmp3 = (tmp3 + 32) >> 6; + tmp4 = (tmp4 + 32) >> 6; + + pu1_dst[0] = CLIP_U8(tmp1); + pu1_dst[2] = CLIP_U8(tmp2); + pu1_dst[1] = CLIP_U8(tmp3); + pu1_dst[3] = CLIP_U8(tmp4); + + ht -= 2; + pu1_src += src_strd; + pu1_dst += dst_strd; + } + while(ht > 0); + + /* + WORD32 AB, CD; + + __m128i src_r1_16x8b, src_r2_16x8b, src_r3_16x8b; + __m128i src_r1r2_16x8b, src_r2r3_16x8b; + __m128i res_AB_8x16b, res_CD_8x16b, res_8x16b, res_16x8b; + __m128i mask_low32b; + + __m128i coeffAB_16x8b, coeffCD_16x8b, round_add32_8x16b; + __m128i const_shuff_16x8b; + + AB = (B << 8) + A; + CD = (D << 8) + C; + + coeffAB_16x8b = _mm_set1_epi16(AB); + coeffCD_16x8b = _mm_set1_epi16(CD); + + round_add32_8x16b = _mm_set1_epi16(32); + + mask_low32b = _mm_set1_epi8(0xff); + src_r1_16x8b = _mm_loadl_epi64((__m128i *)pu1_src); //u1[0] v1[0] u1[1] v1[1] u1[2] v1[2] u1[3] v1[3] + pu1_src += src_strd; + + const_shuff_16x8b = _mm_setr_epi32(0x03010200, 0x05030402, 0x0b090a08, 0x0d0b0c0a); + mask_low32b = _mm_srli_si128(mask_low32b, 12); + + do + { + src_r2_16x8b = _mm_loadl_epi64((__m128i *)pu1_src); //u2[0] v2[0] u2[1] v2[1] u1[2] v2[2] u2[3] v2[3] + src_r3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd)); //u3[0] v3[0] u3[1] v3[1] u3[2] v3[2] u3[3] v3[3] + + src_r1r2_16x8b = _mm_unpacklo_epi64(src_r1_16x8b, src_r2_16x8b); + src_r2r3_16x8b = _mm_unpacklo_epi64(src_r2_16x8b, src_r3_16x8b); + + src_r1r2_16x8b = _mm_shuffle_epi8(src_r1r2_16x8b, const_shuff_16x8b); //u1[0] u1[1] v1[0] v1[1] u1[1] u1[2] v1[1] v1[2] + //u2[0] u2[1] v2[0] v2[1] u2[1] u2[2] v2[1] v2[2] + src_r2r3_16x8b = _mm_shuffle_epi8(src_r2r3_16x8b, const_shuff_16x8b); //u2[0] u2[1] v2[0] v2[1] u2[1] u2[2] v2[1] v2[2] + //u3[0] u3[1] v3[0] v3[1] u3[1] u3[2] v3[1] v3[2] + res_AB_8x16b = _mm_maddubs_epi16(src_r1r2_16x8b, coeffAB_16x8b); + res_CD_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeffCD_16x8b); + + res_8x16b = _mm_add_epi16(res_AB_8x16b, round_add32_8x16b); + res_8x16b = _mm_add_epi16(res_8x16b, res_CD_8x16b); + res_8x16b = _mm_srai_epi16(res_8x16b, 6); + res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b); + + _mm_maskmoveu_si128(res_16x8b, mask_low32b, (char *)pu1_dst); + + ht -= 2; + pu1_src += src_strd << 1; + res_16x8b = _mm_srli_si128(res_16x8b, 4); + src_r1_16x8b = src_r3_16x8b; + + _mm_maskmoveu_si128(res_16x8b, mask_low32b, (char *)(pu1_dst + dst_strd)); + + pu1_dst += dst_strd << 1; + } + while(ht > 0); + */ + } + else if(wd == 4) + { + WORD32 AB, CD; + + __m128i src_r1_16x8b, src_r2_16x8b, src_r3_16x8b; + __m128i res1_AB_8x16b, res1_CD_8x16b, res1_8x16b, res1_16x8b; + __m128i res2_AB_8x16b, res2_CD_8x16b, res2_8x16b, res2_16x8b; + + __m128i coeffAB_16x8b, coeffCD_16x8b, round_add32_8x16b; + __m128i const_shuff_16x8b; + + AB = (B << 8) + A; + CD = (D << 8) + C; + + coeffAB_16x8b = _mm_set1_epi16(AB); + coeffCD_16x8b = _mm_set1_epi16(CD); + + round_add32_8x16b = _mm_set1_epi16(32); + + const_shuff_16x8b = _mm_setr_epi32(0x03010200, 0x05030402, 0x07050604, 0x09070806); + + src_r1_16x8b = _mm_loadu_si128((__m128i *)pu1_src); + src_r1_16x8b = _mm_shuffle_epi8(src_r1_16x8b, const_shuff_16x8b); + pu1_src += src_strd; + + do + { + src_r2_16x8b = _mm_loadu_si128((__m128i *)pu1_src); + src_r3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd)); + + src_r2_16x8b = _mm_shuffle_epi8(src_r2_16x8b, const_shuff_16x8b); + src_r3_16x8b = _mm_shuffle_epi8(src_r3_16x8b, const_shuff_16x8b); + + res1_AB_8x16b = _mm_maddubs_epi16(src_r1_16x8b, coeffAB_16x8b); + res1_CD_8x16b = _mm_maddubs_epi16(src_r2_16x8b, coeffCD_16x8b); + res2_AB_8x16b = _mm_maddubs_epi16(src_r2_16x8b, coeffAB_16x8b); + res2_CD_8x16b = _mm_maddubs_epi16(src_r3_16x8b, coeffCD_16x8b); + + res1_8x16b = _mm_add_epi16(res1_AB_8x16b, res1_CD_8x16b); + res2_8x16b = _mm_add_epi16(res2_AB_8x16b, res2_CD_8x16b); + res1_8x16b = _mm_add_epi16(res1_8x16b, round_add32_8x16b); + res2_8x16b = _mm_add_epi16(res2_8x16b, round_add32_8x16b); + + res1_8x16b = _mm_srai_epi16(res1_8x16b, 6); + res2_8x16b = _mm_srai_epi16(res2_8x16b, 6); + + res1_16x8b = _mm_packus_epi16(res1_8x16b, res1_8x16b); + res2_16x8b = _mm_packus_epi16(res2_8x16b, res2_8x16b); + + _mm_storel_epi64((__m128i *)pu1_dst, res1_16x8b); + _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), res2_16x8b); + + src_r1_16x8b = src_r3_16x8b; + + ht -= 2; + pu1_src += src_strd << 1; + pu1_dst += dst_strd << 1; + } + while(ht > 0); + } + else // wd == 8 + { + WORD32 AB, CD; + + __m128i src_r1l_16x8b, src_r2l_16x8b; + __m128i src_r1h_16x8b, src_r2h_16x8b; + + __m128i res_l_AB_8x16b, res_l_CD_8x16b; + __m128i res_h_AB_8x16b, res_h_CD_8x16b; + __m128i res_l_8x16b, res_h_8x16b, res_16x8b; + + __m128i coeffAB_16x8b, coeffCD_16x8b, round_add32_8x16b; + __m128i const_shuff_16x8b; + + AB = (B << 8) + A; + CD = (D << 8) + C; + + coeffAB_16x8b = _mm_set1_epi16(AB); + coeffCD_16x8b = _mm_set1_epi16(CD); + + round_add32_8x16b = _mm_set1_epi16(32); + + const_shuff_16x8b = _mm_setr_epi32(0x03010200, 0x05030402, 0x07050604, 0x09070806); + + src_r1l_16x8b = _mm_loadu_si128((__m128i *)pu1_src); + src_r1h_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 8)); + + src_r1l_16x8b = _mm_shuffle_epi8(src_r1l_16x8b, const_shuff_16x8b); + src_r1h_16x8b = _mm_shuffle_epi8(src_r1h_16x8b, const_shuff_16x8b); + + pu1_src += src_strd; + + do + { + //row 1 + src_r2l_16x8b = _mm_loadu_si128((__m128i *)pu1_src); + src_r2h_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 8)); + + src_r2l_16x8b = _mm_shuffle_epi8(src_r2l_16x8b, const_shuff_16x8b); + src_r2h_16x8b = _mm_shuffle_epi8(src_r2h_16x8b, const_shuff_16x8b); + + res_l_AB_8x16b = _mm_maddubs_epi16(src_r1l_16x8b, coeffAB_16x8b); + res_h_AB_8x16b = _mm_maddubs_epi16(src_r1h_16x8b, coeffAB_16x8b); + res_l_CD_8x16b = _mm_maddubs_epi16(src_r2l_16x8b, coeffCD_16x8b); + res_h_CD_8x16b = _mm_maddubs_epi16(src_r2h_16x8b, coeffCD_16x8b); + + res_l_8x16b = _mm_add_epi16(res_l_AB_8x16b, round_add32_8x16b); + res_h_8x16b = _mm_add_epi16(res_h_AB_8x16b, round_add32_8x16b); + res_l_8x16b = _mm_add_epi16(res_l_8x16b, res_l_CD_8x16b); + res_h_8x16b = _mm_add_epi16(res_h_8x16b, res_h_CD_8x16b); + + res_l_8x16b = _mm_srai_epi16(res_l_8x16b, 6); + res_h_8x16b = _mm_srai_epi16(res_h_8x16b, 6); + + res_16x8b = _mm_packus_epi16(res_l_8x16b, res_h_8x16b); + + _mm_storeu_si128((__m128i *)pu1_dst, res_16x8b); + + pu1_src += src_strd; + pu1_dst += dst_strd; + + //row 2 + src_r1l_16x8b = _mm_loadu_si128((__m128i *)pu1_src); + src_r1h_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 8)); + + src_r1l_16x8b = _mm_shuffle_epi8(src_r1l_16x8b, const_shuff_16x8b); + src_r1h_16x8b = _mm_shuffle_epi8(src_r1h_16x8b, const_shuff_16x8b); + + res_l_AB_8x16b = _mm_maddubs_epi16(src_r2l_16x8b, coeffAB_16x8b); + res_h_AB_8x16b = _mm_maddubs_epi16(src_r2h_16x8b, coeffAB_16x8b); + res_l_CD_8x16b = _mm_maddubs_epi16(src_r1l_16x8b, coeffCD_16x8b); + res_h_CD_8x16b = _mm_maddubs_epi16(src_r1h_16x8b, coeffCD_16x8b); + + res_l_8x16b = _mm_add_epi16(res_l_AB_8x16b, round_add32_8x16b); + res_h_8x16b = _mm_add_epi16(res_h_AB_8x16b, round_add32_8x16b); + res_l_8x16b = _mm_add_epi16(res_l_8x16b, res_l_CD_8x16b); + res_h_8x16b = _mm_add_epi16(res_h_8x16b, res_h_CD_8x16b); + + res_l_8x16b = _mm_srai_epi16(res_l_8x16b, 6); + res_h_8x16b = _mm_srai_epi16(res_h_8x16b, 6); + + res_16x8b = _mm_packus_epi16(res_l_8x16b, res_h_8x16b); + + _mm_storeu_si128((__m128i *)pu1_dst, res_16x8b); + + pu1_src += src_strd; + pu1_dst += dst_strd; + + //row 3 + src_r2l_16x8b = _mm_loadu_si128((__m128i *)pu1_src); + src_r2h_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 8)); + + src_r2l_16x8b = _mm_shuffle_epi8(src_r2l_16x8b, const_shuff_16x8b); + src_r2h_16x8b = _mm_shuffle_epi8(src_r2h_16x8b, const_shuff_16x8b); + + res_l_AB_8x16b = _mm_maddubs_epi16(src_r1l_16x8b, coeffAB_16x8b); + res_h_AB_8x16b = _mm_maddubs_epi16(src_r1h_16x8b, coeffAB_16x8b); + res_l_CD_8x16b = _mm_maddubs_epi16(src_r2l_16x8b, coeffCD_16x8b); + res_h_CD_8x16b = _mm_maddubs_epi16(src_r2h_16x8b, coeffCD_16x8b); + + res_l_8x16b = _mm_add_epi16(res_l_AB_8x16b, round_add32_8x16b); + res_h_8x16b = _mm_add_epi16(res_h_AB_8x16b, round_add32_8x16b); + res_l_8x16b = _mm_add_epi16(res_l_8x16b, res_l_CD_8x16b); + res_h_8x16b = _mm_add_epi16(res_h_8x16b, res_h_CD_8x16b); + + res_l_8x16b = _mm_srai_epi16(res_l_8x16b, 6); + res_h_8x16b = _mm_srai_epi16(res_h_8x16b, 6); + + res_16x8b = _mm_packus_epi16(res_l_8x16b, res_h_8x16b); + + _mm_storeu_si128((__m128i *)pu1_dst, res_16x8b); + + pu1_src += src_strd; + pu1_dst += dst_strd; + + //row 1 + src_r1l_16x8b = _mm_loadu_si128((__m128i *)pu1_src); + src_r1h_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 8)); + + src_r1l_16x8b = _mm_shuffle_epi8(src_r1l_16x8b, const_shuff_16x8b); + src_r1h_16x8b = _mm_shuffle_epi8(src_r1h_16x8b, const_shuff_16x8b); + + res_l_AB_8x16b = _mm_maddubs_epi16(src_r2l_16x8b, coeffAB_16x8b); + res_h_AB_8x16b = _mm_maddubs_epi16(src_r2h_16x8b, coeffAB_16x8b); + res_l_CD_8x16b = _mm_maddubs_epi16(src_r1l_16x8b, coeffCD_16x8b); + res_h_CD_8x16b = _mm_maddubs_epi16(src_r1h_16x8b, coeffCD_16x8b); + + res_l_8x16b = _mm_add_epi16(res_l_AB_8x16b, round_add32_8x16b); + res_h_8x16b = _mm_add_epi16(res_h_AB_8x16b, round_add32_8x16b); + res_l_8x16b = _mm_add_epi16(res_l_8x16b, res_l_CD_8x16b); + res_h_8x16b = _mm_add_epi16(res_h_8x16b, res_h_CD_8x16b); + + res_l_8x16b = _mm_srai_epi16(res_l_8x16b, 6); + res_h_8x16b = _mm_srai_epi16(res_h_8x16b, 6); + + res_16x8b = _mm_packus_epi16(res_l_8x16b, res_h_8x16b); + + _mm_storeu_si128((__m128i *)pu1_dst, res_16x8b); + + ht -= 4; + pu1_src += src_strd; + pu1_dst += dst_strd; + } + while(ht > 0); + } +} diff --git a/common/x86/ih264_iquant_itrans_recon_dc_ssse3.c b/common/x86/ih264_iquant_itrans_recon_dc_ssse3.c new file mode 100755 index 0000000..d43c8e2 --- /dev/null +++ b/common/x86/ih264_iquant_itrans_recon_dc_ssse3.c @@ -0,0 +1,437 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** + ******************************************************************************* + * @file + * ih264_iquant_itrans_recon_dc_ssse3.c + * + * @brief + * Contains function definitions for inverse quantization, inverse + * transform and reconstruction + * + * @author + * Mohit [100664] + * + * @par List of Functions: + * - ihevc_iquant_itrans_recon_4x4_dc_ssse3() + * - ihevc_iquant_itrans_recon_8x8_dc_ssse3() + * + * @remarks + * None + * + ******************************************************************************* + */ +/* User include files */ +#include "ih264_typedefs.h" +#include "ih264_defs.h" +#include "ih264_trans_macros.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264_trans_data.h" +#include "ih264_size_defs.h" +#include "ih264_structs.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include <immintrin.h> + +/* + ******************************************************************************** + * + * @brief This function reconstructs a 4x4 sub block from quantized resiude and + * prediction buffer for dc input pattern only, i.e. only the (0,0) element of the input + * 4x4 block is non-zero. For complete function, refer ih264_iquant_itrans_recon_ssse3.c + * + * @par Description: + * The quantized residue is first inverse quantized, then inverse transformed. + * This inverse transformed content is added to the prediction buffer to recon- + * struct the end output + * + * @param[in] pi2_src + * quantized 4x4 block + * + * @param[in] pu1_pred + * prediction 4x4 block + * + * @param[out] pu1_out + * reconstructed 4x4 block + * + * @param[in] src_strd + * quantization buffer stride + * + * @param[in] pred_strd, + * Prediction buffer stride + * + * @param[in] out_strd + * recon buffer Stride + * + * @param[in] pu2_scaling_list + * pointer to scaling list + * + * @param[in] pu2_norm_adjust + * pointer to inverse scale matrix + * + * @param[in] u4_qp_div_6 + * Floor (qp/6) + * + * @param[in] pi4_tmp + * temporary buffer of size 1*16 + * + * @returns none + * + * @remarks none + * + ******************************************************************************* + */ +void ih264_iquant_itrans_recon_4x4_dc_ssse3(WORD16 *pi2_src, + UWORD8 *pu1_pred, + UWORD8 *pu1_out, + WORD32 pred_strd, + WORD32 out_strd, + const UWORD16 *pu2_iscal_mat, + const UWORD16 *pu2_weigh_mat, + UWORD32 u4_qp_div_6, + WORD16 *pi2_tmp, + WORD32 iq_start_idx, + WORD16 *pi2_dc_ld_addr) +{ + UWORD32 *pu4_out = (UWORD32 *)pu1_out; + WORD32 q0 = pi2_src[0]; + WORD16 i_macro, rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0; + INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4); + + if (iq_start_idx != 0 ) + q0 = pi2_dc_ld_addr[0]; // Restoring dc value for intra case + + i_macro = ((q0 + 32) >> 6); + + __m128i predload_r,pred_r0, pred_r1, pred_r2, pred_r3; + __m128i sign_reg; + __m128i zero_8x16b = _mm_setzero_si128(); // all bits reset to zero + __m128i temp4, temp5, temp6, temp7; + __m128i value_add = _mm_set1_epi16(i_macro); + + zero_8x16b = _mm_setzero_si128(); // all bits reset to zero + //Load pred buffer + predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); //p00 p01 p02 p03 0 0 0 0 0 0 0 0 -- all 8 bits + pred_r0 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p00 p01 p02 p03 0 0 0 0 -- all 16 bits + predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[pred_strd])); //p10 p11 p12 p13 0 0 0 0 0 0 0 0 -- all 8 bits + pred_r1 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p10 p11 p12 p13 0 0 0 0 -- all 16 bits + predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[2*pred_strd])); //p20 p21 p22 p23 0 0 0 0 0 0 0 0 -- all 8 bits + pred_r2 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p20 p21 p22 p23 0 0 0 0 -- all 16 bits + predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[3*pred_strd])); //p30 p31 p32 p33 0 0 0 0 0 0 0 0 -- all 8 bits + pred_r3 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p30 p31 p32 p33 0 0 0 0 -- all 16 bits + + pred_r0 = _mm_unpacklo_epi64(pred_r0, pred_r1); //p00 p01 p02 p03 p10 p11 p12 p13 + pred_r2 = _mm_unpacklo_epi64(pred_r2, pred_r3); //p20 p21 p22p p23 p30 p31 p32 p33 + + temp4 = _mm_add_epi16(value_add, pred_r0); + temp5 = _mm_add_epi16(value_add, pred_r2); + /*------------------------------------------------------------------*/ + //Clipping the results to 8 bits + sign_reg = _mm_cmpgt_epi16(temp4, zero_8x16b); // sign check + temp4 = _mm_and_si128(temp4, sign_reg); + sign_reg = _mm_cmpgt_epi16(temp5, zero_8x16b); // sign check + temp5 = _mm_and_si128(temp5, sign_reg); + + temp4 = _mm_packus_epi16(temp4,temp5); + temp5 = _mm_srli_si128(temp4,4); + temp6 = _mm_srli_si128(temp5,4); + temp7 = _mm_srli_si128(temp6,4); + + *pu4_out = _mm_cvtsi128_si32(temp4); + pu1_out += out_strd; + pu4_out = (UWORD32 *)(pu1_out); + *(pu4_out) = _mm_cvtsi128_si32(temp5); + pu1_out += out_strd; + pu4_out = (UWORD32 *)(pu1_out); + *(pu4_out) = _mm_cvtsi128_si32(temp6); + pu1_out += out_strd; + pu4_out = (UWORD32 *)(pu1_out); + *(pu4_out) = _mm_cvtsi128_si32(temp7); +} +/** + ******************************************************************************* + * + * @brief + * This function performs inverse quant and Inverse transform type Ci4 for 8x8 block + * for dc input pattern only, i.e. only the (0,0) element of the input 8x8 block is + * non-zero. For complete function, refer ih264_iquant_itrans_recon_ssse3.c + * + * @par Description: + * Performs inverse transform Ci8 and adds the residue to get the + * reconstructed block + * + * @param[in] pi2_src + * Input 8x8coefficients + * + * @param[in] pu1_pred + * Prediction 8x8 block + * + * @param[out] pu1_recon + * Output 8x8 block + * + * @param[in] q_div + * QP/6 + * + * @param[in] q_rem + * QP%6 + * + * @param[in] q_lev + * Quantizer level + * + * @param[in] u4_src_stride + * Input stride + * + * @param[in] u4_pred_stride, + * Prediction stride + * + * @param[in] u4_out_stride + * Output Stride + * + * @param[in] pi4_tmp + * temporary buffer of size 1*64 + * the tmp for each block + * + * @param[in] pu4_iquant_mat + * Pointer to the inverse quantization matrix + * + * @returns Void + * + * @remarks + * None + * + ******************************************************************************* + */ + +void ih264_iquant_itrans_recon_8x8_dc_ssse3 (WORD16 *pi2_src, + UWORD8 *pu1_pred, + UWORD8 *pu1_out, + WORD32 pred_strd, + WORD32 out_strd, + const UWORD16 *pu2_iscale_mat, + const UWORD16 *pu2_weigh_mat, + UWORD32 qp_div, + WORD16 *pi2_tmp, + WORD32 iq_start_idx, + WORD16 *pi2_dc_ld_addr) +{ + WORD32 q0 = pi2_src[0]; + WORD16 i_macro, rnd_fact = (qp_div < 6) ? 1 << (5 - qp_div) : 0; + INV_QUANT(q0, pu2_iscale_mat[0], pu2_weigh_mat[0], qp_div, rnd_fact, 6); + i_macro = ((q0 + 32) >> 6); + + __m128i predload_r,pred_r0, pred_r1, pred_r2, pred_r3,pred_r4,pred_r5,pred_r6,pred_r7; + __m128i sign_reg; + __m128i zero_8x16b = _mm_setzero_si128(); // all bits reset to zero + __m128i temp1,temp2,temp3,temp4, temp5, temp6, temp7,temp8; + __m128i value_add = _mm_set1_epi16(i_macro); + + //Load pred buffer row 0 + predload_r = _mm_loadl_epi64((__m128i *)(&pu1_pred[0])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits + pred_r0 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits + //Load pred buffer row 1 + predload_r = _mm_loadl_epi64((__m128i *)(&pu1_pred[pred_strd])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits + pred_r1 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits + //Load pred buffer row 2 + predload_r = _mm_loadl_epi64( + (__m128i *)(&pu1_pred[2 * pred_strd])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits + pred_r2 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits + //Load pred buffer row 3 + predload_r = _mm_loadl_epi64( + (__m128i *)(&pu1_pred[3 * pred_strd])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits + pred_r3 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits + //Load pred buffer row 4 + predload_r = _mm_loadl_epi64( + (__m128i *)(&pu1_pred[4 * pred_strd])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits + pred_r4 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits + //Load pred buffer row 5 + predload_r = _mm_loadl_epi64( + (__m128i *)(&pu1_pred[5 * pred_strd])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bit + pred_r5 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits + //Load pred buffer row 6 + predload_r = _mm_loadl_epi64( + (__m128i *)(&pu1_pred[6 * pred_strd])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits + pred_r6 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits + //Load pred buffer row 7 + predload_r = _mm_loadl_epi64( + (__m128i *)(&pu1_pred[7 * pred_strd])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits + pred_r7 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits + + temp1 = _mm_add_epi16(value_add, pred_r0); + + temp2 = _mm_add_epi16(value_add, pred_r1); + + temp3 = _mm_add_epi16(value_add, pred_r2); + + temp4 = _mm_add_epi16(value_add, pred_r3); + + temp5 = _mm_add_epi16(value_add, pred_r4); + + temp6 = _mm_add_epi16(value_add, pred_r5); + + temp7 = _mm_add_epi16(value_add, pred_r6); + + temp8 = _mm_add_epi16(value_add, pred_r7); + /*------------------------------------------------------------------*/ + //Clipping the results to 8 bits + sign_reg = _mm_cmpgt_epi16(temp1, zero_8x16b); // sign check + temp1 = _mm_and_si128(temp1, sign_reg); + sign_reg = _mm_cmpgt_epi16(temp2, zero_8x16b); // sign check + temp2 = _mm_and_si128(temp2, sign_reg); + sign_reg = _mm_cmpgt_epi16(temp3, zero_8x16b); // sign check + temp3 = _mm_and_si128(temp3, sign_reg); + sign_reg = _mm_cmpgt_epi16(temp4, zero_8x16b); // sign check + temp4 = _mm_and_si128(temp4, sign_reg); + sign_reg = _mm_cmpgt_epi16(temp5, zero_8x16b); // sign check + temp5 = _mm_and_si128(temp5, sign_reg); + sign_reg = _mm_cmpgt_epi16(temp6, zero_8x16b); // sign check + temp6 = _mm_and_si128(temp6, sign_reg); + sign_reg = _mm_cmpgt_epi16(temp7, zero_8x16b); // sign check + temp7 = _mm_and_si128(temp7, sign_reg); + sign_reg = _mm_cmpgt_epi16(temp8, zero_8x16b); // sign check + temp8 = _mm_and_si128(temp8, sign_reg); + + temp1 = _mm_packus_epi16(temp1, zero_8x16b); + temp2 = _mm_packus_epi16(temp2, zero_8x16b); + temp3 = _mm_packus_epi16(temp3, zero_8x16b); + temp4 = _mm_packus_epi16(temp4, zero_8x16b); + temp5 = _mm_packus_epi16(temp5, zero_8x16b); + temp6 = _mm_packus_epi16(temp6, zero_8x16b); + temp7 = _mm_packus_epi16(temp7, zero_8x16b); + temp8 = _mm_packus_epi16(temp8, zero_8x16b); + + _mm_storel_epi64((__m128i *)(&pu1_out[0]), temp1); + _mm_storel_epi64((__m128i *)(&pu1_out[out_strd]), temp2); + _mm_storel_epi64((__m128i *)(&pu1_out[2 * out_strd]), temp3); + _mm_storel_epi64((__m128i *)(&pu1_out[3 * out_strd]), temp4); + _mm_storel_epi64((__m128i *)(&pu1_out[4 * out_strd]), temp5); + _mm_storel_epi64((__m128i *)(&pu1_out[5 * out_strd]), temp6); + _mm_storel_epi64((__m128i *)(&pu1_out[6 * out_strd]), temp7); + _mm_storel_epi64((__m128i *)(&pu1_out[7 * out_strd]), temp8); +} + +/* + ******************************************************************************** + * + * @brief This function reconstructs a 4x4 sub block from quantized chroma resiude and + * prediction buffer + * + * @par Description: + * The quantized residue is first inverse quantized, then inverse transformed. + * This inverse transformed content is added to the prediction buffer to recon- + * struct the end output + * + * @param[in] pi2_src + * quantized 4x4 block + * + * @param[in] pu1_pred + * prediction 4x4 block + * + * @param[out] pu1_out + * reconstructed 4x4 block + * + * @param[in] src_strd + * quantization buffer stride + * + * @param[in] pred_strd, + * Prediction buffer stride + * + * @param[in] out_strd + * recon buffer Stride + * + * @param[in] pu2_scaling_list + * pointer to scaling list + * + * @param[in] pu2_norm_adjust + * pointer to inverse scale matrix + * + * @param[in] u4_qp_div_6 + * Floor (qp/6) + * + * @param[in] pi4_tmp + * temporary buffer of size 1*16 + * + * @returns none + * + * @remarks none + * + ******************************************************************************* + */ +void ih264_iquant_itrans_recon_chroma_4x4_dc_ssse3(WORD16 *pi2_src, + UWORD8 *pu1_pred, + UWORD8 *pu1_out, + WORD32 pred_strd, + WORD32 out_strd, + const UWORD16 *pu2_iscal_mat, + const UWORD16 *pu2_weigh_mat, + UWORD32 u4_qp_div_6, + WORD16 *pi2_tmp, + WORD16 *pi2_dc_src) + { + WORD16 q0 = pi2_dc_src[0]; // DC value won't be dequantized for chroma inverse transform + WORD16 i_macro = ((q0 + 32) >> 6); + + __m128i pred_r0, pred_r1, pred_r2, pred_r3, sign_reg; + __m128i zero_8x16b = _mm_setzero_si128(); // all bits reset to zero + __m128i chroma_mask = _mm_set1_epi16 (0xFF); + __m128i value_add = _mm_set1_epi16(i_macro); + + //Load pred buffer + pred_r0 = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); //p00 p01 p02 p03 0 0 0 0 0 0 0 0 -- all 8 bits + pred_r1 = _mm_loadl_epi64((__m128i *) (&pu1_pred[pred_strd])); //p10 p11 p12 p13 0 0 0 0 0 0 0 0 -- all 8 bits + pred_r2 = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * pred_strd])); //p20 p21 p22 p23 0 0 0 0 0 0 0 0 -- all 8 bits + pred_r3 = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * pred_strd])); //p30 p31 p32 p33 0 0 0 0 0 0 0 0 -- all 8 bits + + pred_r0 = _mm_and_si128(pred_r0, chroma_mask); + pred_r1 = _mm_and_si128(pred_r1, chroma_mask); + pred_r2 = _mm_and_si128(pred_r2, chroma_mask); + pred_r3 = _mm_and_si128(pred_r3, chroma_mask); + + pred_r0 = _mm_unpacklo_epi64(pred_r0, pred_r1); //p00 p01 p02 p03 p10 p11 p12 p13 + pred_r2 = _mm_unpacklo_epi64(pred_r2, pred_r3); //p20 p21 p22p p23 p30 p31 p32 p33 + + pred_r0 = _mm_add_epi16(value_add, pred_r0); + pred_r2 = _mm_add_epi16(value_add, pred_r2); + + /*------------------------------------------------------------------*/ + //Clipping the results to 8 bits + sign_reg = _mm_cmpgt_epi16(pred_r0, zero_8x16b); // sign check + pred_r0 = _mm_and_si128(pred_r0, sign_reg); + sign_reg = _mm_cmpgt_epi16(pred_r2, zero_8x16b); + pred_r2 = _mm_and_si128(pred_r2, sign_reg); + + pred_r0 = _mm_packus_epi16(pred_r0, pred_r2); + pred_r1 = _mm_srli_si128(pred_r0, 4); + pred_r2 = _mm_srli_si128(pred_r1, 4); + pred_r3 = _mm_srli_si128(pred_r2, 4); + + pred_r0 = _mm_unpacklo_epi8(pred_r0, zero_8x16b); //p00 p01 p02 p03 -- all 16 bits + pred_r1 = _mm_unpacklo_epi8(pred_r1, zero_8x16b); //p10 p11 p12 p13 -- all 16 bits + pred_r2 = _mm_unpacklo_epi8(pred_r2, zero_8x16b); //p20 p21 p22 p23 -- all 16 bits + pred_r3 = _mm_unpacklo_epi8(pred_r3, zero_8x16b); //p30 p31 p32 p33 -- all 16 bits + + chroma_mask = _mm_unpacklo_epi64(chroma_mask, zero_8x16b); //1 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 -- 8 bits + + _mm_maskmoveu_si128(pred_r0, chroma_mask, (char *)(&pu1_out[0])); + _mm_maskmoveu_si128(pred_r1, chroma_mask, (char *)(&pu1_out[out_strd])); + _mm_maskmoveu_si128(pred_r2, chroma_mask, (char *)(&pu1_out[2*out_strd])); + _mm_maskmoveu_si128(pred_r3, chroma_mask, (char *)(&pu1_out[3*out_strd])); +} + + diff --git a/common/x86/ih264_iquant_itrans_recon_sse42.c b/common/x86/ih264_iquant_itrans_recon_sse42.c new file mode 100755 index 0000000..2a4ea3f --- /dev/null +++ b/common/x86/ih264_iquant_itrans_recon_sse42.c @@ -0,0 +1,554 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** + ******************************************************************************* + * @file + * ih264_iquant_itrans_recon_sse42.c + * + * @brief + * Contains function definitions for inverse quantization, inverse + * transform and reconstruction + * + * @author + * Mohit [100664] + * + * @par List of Functions: + * - ihevc_iquant_itrans_recon_4x4_sse42() + * - ihevc_iquant_itrans_recon_chroma_4x4_sse42() + * + * @remarks + * None + * + ******************************************************************************* + */ +/* User include files */ +#include "ih264_typedefs.h" +#include "ih264_defs.h" +#include "ih264_trans_macros.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264_trans_data.h" +#include "ih264_size_defs.h" +#include "ih264_structs.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include <immintrin.h> + +/* + ******************************************************************************** + * + * @brief This function reconstructs a 4x4 sub block from quantized resiude and + * prediction buffer + * + * @par Description: + * The quantized residue is first inverse quantized, then inverse transformed. + * This inverse transformed content is added to the prediction buffer to recon- + * struct the end output + * + * @param[in] pi2_src + * quantized 4x4 block + * + * @param[in] pu1_pred + * prediction 4x4 block + * + * @param[out] pu1_out + * reconstructed 4x4 block + * + * @param[in] src_strd + * quantization buffer stride + * + * @param[in] pred_strd, + * Prediction buffer stride + * + * @param[in] out_strd + * recon buffer Stride + * + * @param[in] pu2_scaling_list + * pointer to scaling list + * + * @param[in] pu2_norm_adjust + * pointer to inverse scale matrix + * + * @param[in] u4_qp_div_6 + * Floor (qp/6) + * + * @param[in] pi4_tmp + * temporary buffer of size 1*16 + * + * @returns none + * + * @remarks none + * + ******************************************************************************* + */ +void ih264_iquant_itrans_recon_4x4_sse42(WORD16 *pi2_src, + UWORD8 *pu1_pred, + UWORD8 *pu1_out, + WORD32 pred_strd, + WORD32 out_strd, + const UWORD16 *pu2_iscal_mat, + const UWORD16 *pu2_weigh_mat, + UWORD32 u4_qp_div_6, + WORD16 *pi2_tmp, + WORD32 iq_start_idx, + WORD16 *pi2_dc_ld_addr) + { + UWORD32 *pu4_out = (UWORD32 *) pu1_out; + __m128i src_r0_r1, src_r2_r3; + __m128i src_r0, src_r1, src_r2, src_r3; + __m128i scalemat_r0_r1, scalemat_r2_r3; + __m128i pred_r0, pred_r1, pred_r2, pred_r3; + __m128i sign_reg, dequant_r0_r1, dequant_r2_r3; + __m128i zero_8x16b = _mm_setzero_si128(); // all bits reset to zero + __m128i temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; + __m128i resq_r0, resq_r1, resq_r2, resq_r3; + __m128i add_rshift = _mm_set1_epi32((1 << (3 - u4_qp_div_6))); + __m128i value_32 = _mm_set1_epi32(32); + + /*************************************************************/ + /* Dequantization of coefficients. Will be replaced by SIMD */ + /* operations on platform */ + /*************************************************************/ + src_r0_r1 = _mm_loadu_si128((__m128i *) (pi2_src)); //a00 a01 a02 a03 a10 a11 a12 a13 -- the source matrix 0th,1st row + src_r2_r3 = _mm_loadu_si128((__m128i *) (pi2_src + 8)); //a20 a21 a22 a23 a30 a31 a32 a33 -- the source matrix 2nd,3rd row + scalemat_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_iscal_mat)); //b00 b01 b02 b03 b10 b11 b12 b13 -- the scaling matrix 0th,1st row + scalemat_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_iscal_mat + 8)); //b20 b21 b22 b23 b30 b31 b32 b33 -- the scaling matrix 2nd,3rd row + dequant_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_weigh_mat)); //q00 q01 q02 q03 q10 q11 q12 q13 -- all 16 bits + dequant_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_weigh_mat + 8)); //q20 q21 q22 q23 q30 q31 q32 q33 -- all 16 bits + + temp0 = _mm_mullo_epi16(scalemat_r0_r1, dequant_r0_r1); //b00*q00 b01*q01 b02*q02 b03*q03 b10*q10 b11*q11 b12*q12 b13*q13 -- 16 bit result + temp1 = _mm_mullo_epi16(scalemat_r2_r3, dequant_r2_r3); //b00*q00 b01*q01 b02*q02 b03*q03 b10*q10 b11*q11 b12*q12 b13*q13 -- 16 bit result + + temp4 = _mm_unpacklo_epi16(temp0, zero_8x16b); // b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bit long + temp5 = _mm_unpackhi_epi16(temp0, zero_8x16b); // b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bit long + temp6 = _mm_unpacklo_epi16(temp1, zero_8x16b); // b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bit long + temp7 = _mm_unpackhi_epi16(temp1, zero_8x16b); // b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bit long + + src_r0 = _mm_unpacklo_epi16(src_r0_r1, zero_8x16b); // a00 0 a01 0 a02 0 a03 0 -- 16 bit long + src_r1 = _mm_unpackhi_epi16(src_r0_r1, zero_8x16b); // a10 0 a11 0 a12 0 a13 0 -- 16 bit long + src_r2 = _mm_unpacklo_epi16(src_r2_r3, zero_8x16b); // a20 0 a21 0 a22 0 a23 0 -- 16 bit long + src_r3 = _mm_unpackhi_epi16(src_r2_r3, zero_8x16b); // a30 0 a31 0 a32 0 a33 0 -- 16 bit long + + temp4 = _mm_madd_epi16(src_r0, temp4); //a00*b00*q00 a10*b10*q10 a20*b20*q20 a30*b30 q30 -- 32 bits long + temp5 = _mm_madd_epi16(src_r1, temp5); + temp6 = _mm_madd_epi16(src_r2, temp6); + temp7 = _mm_madd_epi16(src_r3, temp7); + + if (u4_qp_div_6 >= 4) { + resq_r0 = _mm_slli_epi32(temp4, u4_qp_div_6 - 4); + resq_r1 = _mm_slli_epi32(temp5, u4_qp_div_6 - 4); + resq_r2 = _mm_slli_epi32(temp6, u4_qp_div_6 - 4); + resq_r3 = _mm_slli_epi32(temp7, u4_qp_div_6 - 4); + } else { + temp4 = _mm_add_epi32(temp4, add_rshift); + temp5 = _mm_add_epi32(temp5, add_rshift); + temp6 = _mm_add_epi32(temp6, add_rshift); + temp7 = _mm_add_epi32(temp7, add_rshift); + resq_r0 = _mm_srai_epi32(temp4, 4 - u4_qp_div_6); + resq_r1 = _mm_srai_epi32(temp5, 4 - u4_qp_div_6); + resq_r2 = _mm_srai_epi32(temp6, 4 - u4_qp_div_6); + resq_r3 = _mm_srai_epi32(temp7, 4 - u4_qp_div_6); + } + + if (iq_start_idx == 1) + resq_r0 = _mm_insert_epi32(resq_r0,(WORD32)pi2_dc_ld_addr[0],0); + /* Perform Inverse transform */ + /*-------------------------------------------------------------*/ + /* IDCT [ Horizontal transformation ] */ + /*-------------------------------------------------------------*/ + // Matrix transpose + /* + * a0 a1 a2 a3 + * b0 b1 b2 b3 + * c0 c1 c2 c3 + * d0 d1 d2 d3 + */ + temp1 = _mm_unpacklo_epi32(resq_r0, resq_r1); //a0 b0 a1 b1 + temp3 = _mm_unpacklo_epi32(resq_r2, resq_r3); //c0 d0 c1 d1 + temp2 = _mm_unpackhi_epi32(resq_r0, resq_r1); //a2 b2 a3 b3 + temp4 = _mm_unpackhi_epi32(resq_r2, resq_r3); //c2 d2 c3 d3 + resq_r0 = _mm_unpacklo_epi64(temp1, temp3); //a0 b0 c0 d0 + resq_r1 = _mm_unpackhi_epi64(temp1, temp3); //a1 b1 c1 d1 + resq_r2 = _mm_unpacklo_epi64(temp2, temp4); //a2 b2 c2 d2 + resq_r3 = _mm_unpackhi_epi64(temp2, temp4); //a3 b3 c3 d3 + //Transform starts -- horizontal transform + /*------------------------------------------------------------------*/ + /* z0 = w0 + w2 */ + temp0 = _mm_add_epi32(resq_r0, resq_r2); + /* z1 = w0 - w2 */ + temp1 = _mm_sub_epi32(resq_r0, resq_r2); + /* z2 = (w1 >> 1) - w3 */ + temp2 = _mm_srai_epi32(resq_r1, 1); //(w1>>1) + temp2 = _mm_sub_epi32(temp2, resq_r3); //(w1>>1) - w3 + /* z3 = w1 + (w3 >> 1) */ + temp3 = _mm_srai_epi32(resq_r3, 1); //(w3>>1) + w1 + temp3 = _mm_add_epi32(temp3, resq_r1); + /*----------------------------------------------------------*/ + /* x0 = z0 + z3 */ + resq_r0 = _mm_add_epi32(temp0, temp3); + /* x1 = z1 + z2 */ + resq_r1 = _mm_add_epi32(temp1, temp2); + /* x2 = z1 - z2 */ + resq_r2 = _mm_sub_epi32(temp1, temp2); + /* x3 = z0 - z3 */ + resq_r3 = _mm_sub_epi32(temp0, temp3); + // Matrix transpose + /* + * a0 b0 c0 d0 + * a1 b1 c1 d1 + * a2 b2 c2 d2 + * a3 b3 c3 d3 + */ + temp1 = _mm_unpacklo_epi32(resq_r0, resq_r1); //a0 a1 b0 b1 + temp3 = _mm_unpacklo_epi32(resq_r2, resq_r3); //a2 a3 b2 b3 + temp2 = _mm_unpackhi_epi32(resq_r0, resq_r1); //c0 c1 d0 d1 + temp4 = _mm_unpackhi_epi32(resq_r2, resq_r3); //c2 c3 d2 d3 + resq_r0 = _mm_unpacklo_epi64(temp1, temp3); //a0 a1 a2 a3 + resq_r1 = _mm_unpackhi_epi64(temp1, temp3); //b0 b1 b2 b3 + resq_r2 = _mm_unpacklo_epi64(temp2, temp4); //c0 c1 c2 c3 + resq_r3 = _mm_unpackhi_epi64(temp2, temp4); //d0 d1 d2 d3 + //Transform ends -- horizontal transform + + //Load pred buffer + pred_r0 = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); //p00 p01 p02 p03 0 0 0 0 0 0 0 0 -- all 8 bits + pred_r1 = _mm_loadl_epi64((__m128i *) (&pu1_pred[pred_strd])); //p10 p11 p12 p13 0 0 0 0 0 0 0 0 -- all 8 bits + pred_r2 = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * pred_strd])); //p20 p21 p22 p23 0 0 0 0 0 0 0 0 -- all 8 bits + pred_r3 = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * pred_strd])); //p30 p31 p32 p33 0 0 0 0 0 0 0 0 -- all 8 bits + + pred_r0 = _mm_cvtepu8_epi32(pred_r0); //p00 p01 p02 p03 -- all 32 bits + pred_r1 = _mm_cvtepu8_epi32(pred_r1); //p10 p11 p12 p13 -- all 32 bits + pred_r2 = _mm_cvtepu8_epi32(pred_r2); //p20 p21 p22 p23 -- all 32 bits + pred_r3 = _mm_cvtepu8_epi32(pred_r3); //p30 p31 p32 p33 -- all 32 bits + + /*--------------------------------------------------------------*/ + /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6 */ + /* */ + /* Add the prediction and store it back to same buffer */ + /*--------------------------------------------------------------*/ + /* z0j = y0j + y2j */ + temp0 = _mm_add_epi32(resq_r0, resq_r2); + /* z1j = y0j - y2j */ + temp1 = _mm_sub_epi32(resq_r0, resq_r2); + /* z2j = (y1j>>1) - y3j */ + temp2 = _mm_srai_epi32(resq_r1, 1); //(y1j>>1) + temp2 = _mm_sub_epi32(temp2, resq_r3); + /* z3j = y1j + (y3j>>1) */ + temp3 = _mm_srai_epi32(resq_r3, 1); //(y3j>>1) + temp3 = _mm_add_epi32(temp3, resq_r1); + + /* x0j = z0j + z3j */ + temp4 = _mm_add_epi32(temp0, temp3); + temp4 = _mm_add_epi32(temp4, value_32); + temp4 = _mm_srai_epi32(temp4, 6); + temp4 = _mm_add_epi32(temp4, pred_r0); + /* x1j = z1j + z2j */ + temp5 = _mm_add_epi32(temp1, temp2); + temp5 = _mm_add_epi32(temp5, value_32); + temp5 = _mm_srai_epi32(temp5, 6); + temp5 = _mm_add_epi32(temp5, pred_r1); + /* x2j = z1j - z2j */ + temp6 = _mm_sub_epi32(temp1, temp2); + temp6 = _mm_add_epi32(temp6, value_32); + temp6 = _mm_srai_epi32(temp6, 6); + temp6 = _mm_add_epi32(temp6, pred_r2); + /* x3j = z0j - z3j */ + temp7 = _mm_sub_epi32(temp0, temp3); + temp7 = _mm_add_epi32(temp7, value_32); + temp7 = _mm_srai_epi32(temp7, 6); + temp7 = _mm_add_epi32(temp7, pred_r3); + + // 32-bit to 16-bit conversion + temp0 = _mm_packs_epi32(temp4, temp5); + temp1 = _mm_packs_epi32(temp6, temp7); + /*------------------------------------------------------------------*/ + //Clipping the results to 8 bits + sign_reg = _mm_cmpgt_epi16(temp0, zero_8x16b); // sign check + temp0 = _mm_and_si128(temp0, sign_reg); + sign_reg = _mm_cmpgt_epi16(temp1, zero_8x16b); + temp1 = _mm_and_si128(temp1, sign_reg); + + resq_r0 = _mm_packus_epi16(temp0, temp1); + resq_r1 = _mm_srli_si128(resq_r0, 4); + resq_r2 = _mm_srli_si128(resq_r1, 4); + resq_r3 = _mm_srli_si128(resq_r2, 4); + + *pu4_out = _mm_cvtsi128_si32(resq_r0); + pu1_out += out_strd; + pu4_out = (UWORD32 *) (pu1_out); + *(pu4_out) = _mm_cvtsi128_si32(resq_r1); + pu1_out += out_strd; + pu4_out = (UWORD32 *) (pu1_out); + *(pu4_out) = _mm_cvtsi128_si32(resq_r2); + pu1_out += out_strd; + pu4_out = (UWORD32 *) (pu1_out); + *(pu4_out) = _mm_cvtsi128_si32(resq_r3); +} + +/* + ******************************************************************************** + * + * @brief This function reconstructs a 4x4 sub block from quantized chroma resiude and + * prediction buffer + * + * @par Description: + * The quantized residue is first inverse quantized, then inverse transformed. + * This inverse transformed content is added to the prediction buffer to recon- + * struct the end output + * + * @param[in] pi2_src + * quantized 4x4 block + * + * @param[in] pu1_pred + * prediction 4x4 block + * + * @param[out] pu1_out + * reconstructed 4x4 block + * + * @param[in] src_strd + * quantization buffer stride + * + * @param[in] pred_strd, + * Prediction buffer stride + * + * @param[in] out_strd + * recon buffer Stride + * + * @param[in] pu2_scaling_list + * pointer to scaling list + * + * @param[in] pu2_norm_adjust + * pointer to inverse scale matrix + * + * @param[in] u4_qp_div_6 + * Floor (qp/6) + * + * @param[in] pi4_tmp + * temporary buffer of size 1*16 + * + * @returns none + * + * @remarks none + * + ******************************************************************************* + */ +void ih264_iquant_itrans_recon_chroma_4x4_sse42(WORD16 *pi2_src, + UWORD8 *pu1_pred, + UWORD8 *pu1_out, + WORD32 pred_strd, + WORD32 out_strd, + const UWORD16 *pu2_iscal_mat, + const UWORD16 *pu2_weigh_mat, + UWORD32 u4_qp_div_6, + WORD16 *pi2_tmp, + WORD16 *pi2_dc_ld_addr) + { + __m128i src_r0_r1, src_r2_r3; + __m128i src_r0, src_r1, src_r2, src_r3; + __m128i scalemat_r0_r1, scalemat_r2_r3; + __m128i pred_r0, pred_r1, pred_r2, pred_r3; + __m128i sign_reg, dequant_r0_r1, dequant_r2_r3; + __m128i zero_8x16b = _mm_setzero_si128(); // all bits reset to zero + __m128i temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; + __m128i resq_r0, resq_r1, resq_r2, resq_r3; + __m128i add_rshift = _mm_set1_epi32((1 << (3 - u4_qp_div_6))); + __m128i value_32 = _mm_set1_epi32(32); + __m128i chroma_mask = _mm_set1_epi16 (0xFF); + /*************************************************************/ + /* Dequantization of coefficients. Will be replaced by SIMD */ + /* operations on platform */ + /*************************************************************/ + src_r0_r1 = _mm_loadu_si128((__m128i *) (pi2_src)); //a00 a01 a02 a03 a10 a11 a12 a13 -- the source matrix 0th,1st row + src_r2_r3 = _mm_loadu_si128((__m128i *) (pi2_src + 8)); //a20 a21 a22 a23 a30 a31 a32 a33 -- the source matrix 2nd,3rd row + scalemat_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_iscal_mat)); //b00 b01 b02 b03 b10 b11 b12 b13 -- the scaling matrix 0th,1st row + scalemat_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_iscal_mat + 8)); //b20 b21 b22 b23 b30 b31 b32 b33 -- the scaling matrix 2nd,3rd row + dequant_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_weigh_mat)); //q00 q01 q02 q03 q10 q11 q12 q13 -- all 16 bits + dequant_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_weigh_mat + 8)); //q20 q21 q22 q23 q30 q31 q32 q33 -- all 16 bits + + temp0 = _mm_mullo_epi16(scalemat_r0_r1, dequant_r0_r1); //b00*q00 b01*q01 b02*q02 b03*q03 b10*q10 b11*q11 b12*q12 b13*q13 -- 16 bit result + temp1 = _mm_mullo_epi16(scalemat_r2_r3, dequant_r2_r3); //b00*q00 b01*q01 b02*q02 b03*q03 b10*q10 b11*q11 b12*q12 b13*q13 -- 16 bit result + + temp4 = _mm_unpacklo_epi16(temp0, zero_8x16b); // b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bit long + temp5 = _mm_unpackhi_epi16(temp0, zero_8x16b); // b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bit long + temp6 = _mm_unpacklo_epi16(temp1, zero_8x16b); // b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bit long + temp7 = _mm_unpackhi_epi16(temp1, zero_8x16b); // b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bit long + + src_r0 = _mm_unpacklo_epi16(src_r0_r1, zero_8x16b); // a00 0 a01 0 a02 0 a03 0 -- 16 bit long + src_r1 = _mm_unpackhi_epi16(src_r0_r1, zero_8x16b); // a10 0 a11 0 a12 0 a13 0 -- 16 bit long + src_r2 = _mm_unpacklo_epi16(src_r2_r3, zero_8x16b); // a20 0 a21 0 a22 0 a23 0 -- 16 bit long + src_r3 = _mm_unpackhi_epi16(src_r2_r3, zero_8x16b); // a30 0 a31 0 a32 0 a33 0 -- 16 bit long + + temp4 = _mm_madd_epi16(src_r0, temp4); //a00*b00*q00 a10*b10*q10 a20*b20*q20 a30*b30 q30 -- 32 bits long + temp5 = _mm_madd_epi16(src_r1, temp5); + temp6 = _mm_madd_epi16(src_r2, temp6); + temp7 = _mm_madd_epi16(src_r3, temp7); + + if (u4_qp_div_6 >= 4) { + resq_r0 = _mm_slli_epi32(temp4, u4_qp_div_6 - 4); + resq_r1 = _mm_slli_epi32(temp5, u4_qp_div_6 - 4); + resq_r2 = _mm_slli_epi32(temp6, u4_qp_div_6 - 4); + resq_r3 = _mm_slli_epi32(temp7, u4_qp_div_6 - 4); + } else { + temp4 = _mm_add_epi32(temp4, add_rshift); + temp5 = _mm_add_epi32(temp5, add_rshift); + temp6 = _mm_add_epi32(temp6, add_rshift); + temp7 = _mm_add_epi32(temp7, add_rshift); + resq_r0 = _mm_srai_epi32(temp4, 4 - u4_qp_div_6); + resq_r1 = _mm_srai_epi32(temp5, 4 - u4_qp_div_6); + resq_r2 = _mm_srai_epi32(temp6, 4 - u4_qp_div_6); + resq_r3 = _mm_srai_epi32(temp7, 4 - u4_qp_div_6); + } + + resq_r0 = _mm_insert_epi32(resq_r0,(WORD32)pi2_dc_ld_addr[0],0); + /* Perform Inverse transform */ + /*-------------------------------------------------------------*/ + /* IDCT [ Horizontal transformation ] */ + /*-------------------------------------------------------------*/ + // Matrix transpose + /* + * a0 a1 a2 a3 + * b0 b1 b2 b3 + * c0 c1 c2 c3 + * d0 d1 d2 d3 + */ + temp1 = _mm_unpacklo_epi32(resq_r0, resq_r1); //a0 b0 a1 b1 + temp3 = _mm_unpacklo_epi32(resq_r2, resq_r3); //c0 d0 c1 d1 + temp2 = _mm_unpackhi_epi32(resq_r0, resq_r1); //a2 b2 a3 b3 + temp4 = _mm_unpackhi_epi32(resq_r2, resq_r3); //c2 d2 c3 d3 + resq_r0 = _mm_unpacklo_epi64(temp1, temp3); //a0 b0 c0 d0 + resq_r1 = _mm_unpackhi_epi64(temp1, temp3); //a1 b1 c1 d1 + resq_r2 = _mm_unpacklo_epi64(temp2, temp4); //a2 b2 c2 d2 + resq_r3 = _mm_unpackhi_epi64(temp2, temp4); //a3 b3 c3 d3 + //Transform starts -- horizontal transform + /*------------------------------------------------------------------*/ + /* z0 = w0 + w2 */ + temp0 = _mm_add_epi32(resq_r0, resq_r2); + /* z1 = w0 - w2 */ + temp1 = _mm_sub_epi32(resq_r0, resq_r2); + /* z2 = (w1 >> 1) - w3 */ + temp2 = _mm_srai_epi32(resq_r1, 1); //(w1>>1) + temp2 = _mm_sub_epi32(temp2, resq_r3); //(w1>>1) - w3 + /* z3 = w1 + (w3 >> 1) */ + temp3 = _mm_srai_epi32(resq_r3, 1); //(w3>>1) + w1 + temp3 = _mm_add_epi32(temp3, resq_r1); + /*----------------------------------------------------------*/ + /* x0 = z0 + z3 */ + resq_r0 = _mm_add_epi32(temp0, temp3); + /* x1 = z1 + z2 */ + resq_r1 = _mm_add_epi32(temp1, temp2); + /* x2 = z1 - z2 */ + resq_r2 = _mm_sub_epi32(temp1, temp2); + /* x3 = z0 - z3 */ + resq_r3 = _mm_sub_epi32(temp0, temp3); + // Matrix transpose + /* + * a0 b0 c0 d0 + * a1 b1 c1 d1 + * a2 b2 c2 d2 + * a3 b3 c3 d3 + */ + temp1 = _mm_unpacklo_epi32(resq_r0, resq_r1); //a0 a1 b0 b1 + temp3 = _mm_unpacklo_epi32(resq_r2, resq_r3); //a2 a3 b2 b3 + temp2 = _mm_unpackhi_epi32(resq_r0, resq_r1); //c0 c1 d0 d1 + temp4 = _mm_unpackhi_epi32(resq_r2, resq_r3); //c2 c3 d2 d3 + resq_r0 = _mm_unpacklo_epi64(temp1, temp3); //a0 a1 a2 a3 + resq_r1 = _mm_unpackhi_epi64(temp1, temp3); //b0 b1 b2 b3 + resq_r2 = _mm_unpacklo_epi64(temp2, temp4); //c0 c1 c2 c3 + resq_r3 = _mm_unpackhi_epi64(temp2, temp4); //d0 d1 d2 d3 + //Transform ends -- horizontal transform + + //Load pred buffer + pred_r0 = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); //p00 p01 p02 p03 0 0 0 0 0 0 0 0 -- all 8 bits + pred_r1 = _mm_loadl_epi64((__m128i *) (&pu1_pred[pred_strd])); //p10 p11 p12 p13 0 0 0 0 0 0 0 0 -- all 8 bits + pred_r2 = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * pred_strd])); //p20 p21 p22 p23 0 0 0 0 0 0 0 0 -- all 8 bits + pred_r3 = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * pred_strd])); //p30 p31 p32 p33 0 0 0 0 0 0 0 0 -- all 8 bits + + pred_r0 = _mm_and_si128(pred_r0, chroma_mask); + pred_r1 = _mm_and_si128(pred_r1, chroma_mask); + pred_r2 = _mm_and_si128(pred_r2, chroma_mask); + pred_r3 = _mm_and_si128(pred_r3, chroma_mask); + + pred_r0 = _mm_cvtepu16_epi32(pred_r0); //p00 p01 p02 p03 -- all 32 bits + pred_r1 = _mm_cvtepu16_epi32(pred_r1); //p10 p11 p12 p13 -- all 32 bits + pred_r2 = _mm_cvtepu16_epi32(pred_r2); //p20 p21 p22 p23 -- all 32 bits + pred_r3 = _mm_cvtepu16_epi32(pred_r3); //p30 p31 p32 p33 -- all 32 bits + + /*--------------------------------------------------------------*/ + /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6 */ + /* */ + /* Add the prediction and store it back to same buffer */ + /*--------------------------------------------------------------*/ + /* z0j = y0j + y2j */ + temp0 = _mm_add_epi32(resq_r0, resq_r2); + /* z1j = y0j - y2j */ + temp1 = _mm_sub_epi32(resq_r0, resq_r2); + /* z2j = (y1j>>1) - y3j */ + temp2 = _mm_srai_epi32(resq_r1, 1); //(y1j>>1) + temp2 = _mm_sub_epi32(temp2, resq_r3); + /* z3j = y1j + (y3j>>1) */ + temp3 = _mm_srai_epi32(resq_r3, 1); //(y3j>>1) + temp3 = _mm_add_epi32(temp3, resq_r1); + + /* x0j = z0j + z3j */ + temp4 = _mm_add_epi32(temp0, temp3); + temp4 = _mm_add_epi32(temp4, value_32); + temp4 = _mm_srai_epi32(temp4, 6); + temp4 = _mm_add_epi32(temp4, pred_r0); + /* x1j = z1j + z2j */ + temp5 = _mm_add_epi32(temp1, temp2); + temp5 = _mm_add_epi32(temp5, value_32); + temp5 = _mm_srai_epi32(temp5, 6); + temp5 = _mm_add_epi32(temp5, pred_r1); + /* x2j = z1j - z2j */ + temp6 = _mm_sub_epi32(temp1, temp2); + temp6 = _mm_add_epi32(temp6, value_32); + temp6 = _mm_srai_epi32(temp6, 6); + temp6 = _mm_add_epi32(temp6, pred_r2); + /* x3j = z0j - z3j */ + temp7 = _mm_sub_epi32(temp0, temp3); + temp7 = _mm_add_epi32(temp7, value_32); + temp7 = _mm_srai_epi32(temp7, 6); + temp7 = _mm_add_epi32(temp7, pred_r3); + + // 32-bit to 16-bit conversion + temp0 = _mm_packs_epi32(temp4, temp5); + temp1 = _mm_packs_epi32(temp6, temp7); + /*------------------------------------------------------------------*/ + //Clipping the results to 8 bits + sign_reg = _mm_cmpgt_epi16(temp0, zero_8x16b); // sign check + temp0 = _mm_and_si128(temp0, sign_reg); + sign_reg = _mm_cmpgt_epi16(temp1, zero_8x16b); + temp1 = _mm_and_si128(temp1, sign_reg); + + resq_r0 = _mm_packus_epi16(temp0, temp1); + resq_r1 = _mm_srli_si128(resq_r0, 4); + resq_r2 = _mm_srli_si128(resq_r1, 4); + resq_r3 = _mm_srli_si128(resq_r2, 4); + + resq_r0 = _mm_cvtepu8_epi16(resq_r0); //p00 p01 p02 p03 -- all 16 bits + resq_r1 = _mm_cvtepu8_epi16(resq_r1); //p10 p11 p12 p13 -- all 16 bits + resq_r2 = _mm_cvtepu8_epi16(resq_r2); //p20 p21 p22 p23 -- all 16 bits + resq_r3 = _mm_cvtepu8_epi16(resq_r3); //p30 p31 p32 p33 -- all 16 bits + + chroma_mask = _mm_unpacklo_epi64(chroma_mask, zero_8x16b); + + _mm_maskmoveu_si128(resq_r0, chroma_mask, (char *)(&pu1_out[0])); + _mm_maskmoveu_si128(resq_r1, chroma_mask, (char *)(&pu1_out[out_strd])); + _mm_maskmoveu_si128(resq_r2, chroma_mask, (char *)(&pu1_out[2*out_strd])); + _mm_maskmoveu_si128(resq_r3, chroma_mask, (char *)(&pu1_out[3*out_strd])); +} diff --git a/common/x86/ih264_iquant_itrans_recon_ssse3.c b/common/x86/ih264_iquant_itrans_recon_ssse3.c new file mode 100755 index 0000000..ca1397e --- /dev/null +++ b/common/x86/ih264_iquant_itrans_recon_ssse3.c @@ -0,0 +1,1035 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** + ******************************************************************************* + * @file + * ih264_iquant_itrans_recon_ssse3.c + * + * @brief + * Contains function definitions for inverse quantization, inverse + * transform and reconstruction + * + * @author + * Mohit [100664] + * + * @par List of Functions: + * - ihevc_iquant_itrans_recon_4x4_ssse3() + * - ihevc_iquant_itrans_recon_8x8_ssse3() + * + * @remarks + * None + * + ******************************************************************************* + */ +/* User include files */ +#include "ih264_typedefs.h" +#include "ih264_defs.h" +#include "ih264_trans_macros.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264_trans_data.h" +#include "ih264_size_defs.h" +#include "ih264_structs.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include <immintrin.h> + +/* + ******************************************************************************** + * + * @brief This function reconstructs a 4x4 sub block from quantized resiude and + * prediction buffer + * + * @par Description: + * The quantized residue is first inverse quantized, then inverse transformed. + * This inverse transformed content is added to the prediction buffer to recon- + * struct the end output + * + * @param[in] pi2_src + * quantized 4x4 block + * + * @param[in] pu1_pred + * prediction 4x4 block + * + * @param[out] pu1_out + * reconstructed 4x4 block + * + * @param[in] src_strd + * quantization buffer stride + * + * @param[in] pred_strd, + * Prediction buffer stride + * + * @param[in] out_strd + * recon buffer Stride + * + * @param[in] pu2_scaling_list + * pointer to scaling list + * + * @param[in] pu2_norm_adjust + * pointer to inverse scale matrix + * + * @param[in] u4_qp_div_6 + * Floor (qp/6) + * + * @param[in] pi4_tmp + * temporary buffer of size 1*16 + * + * @returns none + * + * @remarks none + * + ******************************************************************************* + */ +void ih264_iquant_itrans_recon_4x4_ssse3(WORD16 *pi2_src, + UWORD8 *pu1_pred, + UWORD8 *pu1_out, + WORD32 pred_strd, + WORD32 out_strd, + const UWORD16 *pu2_iscal_mat, + const UWORD16 *pu2_weigh_mat, + UWORD32 u4_qp_div_6, + WORD16 *pi2_tmp, + WORD32 iq_start_idx, + WORD16 *pi2_dc_ld_addr) +{ + UWORD32 *pu4_out = (UWORD32 *) pu1_out; + __m128i src_r0_r1, src_r2_r3; + __m128i src_r0, src_r1, src_r2, src_r3; + __m128i scalemat_r0_r1, scalemat_r2_r3, predload_r; + __m128i pred_r0, pred_r1, pred_r2, pred_r3; + __m128i sign_reg, dequant_r0_r1, dequant_r2_r3; + __m128i zero_8x16b = _mm_setzero_si128(); // all bits reset to zero + __m128i temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; + __m128i resq_r0, resq_r1, resq_r2, resq_r3; + __m128i add_rshift = _mm_set1_epi32((1 << (3 - u4_qp_div_6))); + __m128i value_32 = _mm_set1_epi32(32); + + /*************************************************************/ + /* Dequantization of coefficients. Will be replaced by SIMD */ + /* operations on platform */ + /*************************************************************/ + src_r0_r1 = _mm_loadu_si128((__m128i *) (pi2_src)); //a00 a01 a02 a03 a10 a11 a12 a13 -- the source matrix 0th,1st row + src_r2_r3 = _mm_loadu_si128((__m128i *) (pi2_src + 8)); //a20 a21 a22 a23 a30 a31 a32 a33 -- the source matrix 2nd,3rd row + scalemat_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_iscal_mat)); //b00 b01 b02 b03 b10 b11 b12 b13 -- the scaling matrix 0th,1st row + scalemat_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_iscal_mat + 8)); //b20 b21 b22 b23 b30 b31 b32 b33 -- the scaling matrix 2nd,3rd row + dequant_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_weigh_mat)); //q00 q01 q02 q03 q10 q11 q12 q13 -- all 16 bits + dequant_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_weigh_mat + 8)); //q20 q21 q22 q23 q30 q31 q32 q33 -- all 16 bits + + temp0 = _mm_mullo_epi16(scalemat_r0_r1, dequant_r0_r1); //b00*q00 b01*q01 b02*q02 b03*q03 b10*q10 b11*q11 b12*q12 b13*q13 -- 16 bit result + temp1 = _mm_mullo_epi16(scalemat_r2_r3, dequant_r2_r3); //b00*q00 b01*q01 b02*q02 b03*q03 b10*q10 b11*q11 b12*q12 b13*q13 -- 16 bit result + + temp4 = _mm_unpacklo_epi16(temp0, zero_8x16b); // b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bit long + temp5 = _mm_unpackhi_epi16(temp0, zero_8x16b); // b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bit long + temp6 = _mm_unpacklo_epi16(temp1, zero_8x16b); // b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bit long + temp7 = _mm_unpackhi_epi16(temp1, zero_8x16b); // b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bit long + + src_r0 = _mm_unpacklo_epi16(src_r0_r1, zero_8x16b); // a00 0 a01 0 a02 0 a03 0 -- 16 bit long + src_r1 = _mm_unpackhi_epi16(src_r0_r1, zero_8x16b); // a10 0 a11 0 a12 0 a13 0 -- 16 bit long + src_r2 = _mm_unpacklo_epi16(src_r2_r3, zero_8x16b); // a20 0 a21 0 a22 0 a23 0 -- 16 bit long + src_r3 = _mm_unpackhi_epi16(src_r2_r3, zero_8x16b); // a30 0 a31 0 a32 0 a33 0 -- 16 bit long + + temp4 = _mm_madd_epi16(src_r0, temp4); //a00*b00*q00 a10*b10*q10 a20*b20*q20 a30*b30 q30 -- 32 bits long + temp5 = _mm_madd_epi16(src_r1, temp5); + temp6 = _mm_madd_epi16(src_r2, temp6); + temp7 = _mm_madd_epi16(src_r3, temp7); + + if (u4_qp_div_6 >= 4) { + resq_r0 = _mm_slli_epi32(temp4, u4_qp_div_6 - 4); + resq_r1 = _mm_slli_epi32(temp5, u4_qp_div_6 - 4); + resq_r2 = _mm_slli_epi32(temp6, u4_qp_div_6 - 4); + resq_r3 = _mm_slli_epi32(temp7, u4_qp_div_6 - 4); + } else { + temp4 = _mm_add_epi32(temp4, add_rshift); + temp5 = _mm_add_epi32(temp5, add_rshift); + temp6 = _mm_add_epi32(temp6, add_rshift); + temp7 = _mm_add_epi32(temp7, add_rshift); + resq_r0 = _mm_srai_epi32(temp4, 4 - u4_qp_div_6); + resq_r1 = _mm_srai_epi32(temp5, 4 - u4_qp_div_6); + resq_r2 = _mm_srai_epi32(temp6, 4 - u4_qp_div_6); + resq_r3 = _mm_srai_epi32(temp7, 4 - u4_qp_div_6); + } + + if (iq_start_idx == 1) + { + resq_r0 = _mm_insert_epi16(resq_r0,(WORD32)pi2_src[0],0); + if (pi2_src[0] >= 0) + resq_r0 = _mm_insert_epi16(resq_r0,0,1); + else + resq_r0 = _mm_insert_epi16(resq_r0,-1,1); + } + /* Perform Inverse transform */ + /*-------------------------------------------------------------*/ + /* IDCT [ Horizontal transformation ] */ + /*-------------------------------------------------------------*/ + // Matrix transpose + /* + * a0 a1 a2 a3 + * b0 b1 b2 b3 + * c0 c1 c2 c3 + * d0 d1 d2 d3 + */ + temp1 = _mm_unpacklo_epi32(resq_r0, resq_r1); //a0 b0 a1 b1 + temp3 = _mm_unpacklo_epi32(resq_r2, resq_r3); //c0 d0 c1 d1 + temp2 = _mm_unpackhi_epi32(resq_r0, resq_r1); //a2 b2 a3 b3 + temp4 = _mm_unpackhi_epi32(resq_r2, resq_r3); //c2 d2 c3 d3 + resq_r0 = _mm_unpacklo_epi64(temp1, temp3); //a0 b0 c0 d0 + resq_r1 = _mm_unpackhi_epi64(temp1, temp3); //a1 b1 c1 d1 + resq_r2 = _mm_unpacklo_epi64(temp2, temp4); //a2 b2 c2 d2 + resq_r3 = _mm_unpackhi_epi64(temp2, temp4); //a3 b3 c3 d3 + //Transform starts -- horizontal transform + /*------------------------------------------------------------------*/ + /* z0 = w0 + w2 */ + temp0 = _mm_add_epi32(resq_r0, resq_r2); + /* z1 = w0 - w2 */ + temp1 = _mm_sub_epi32(resq_r0, resq_r2); + /* z2 = (w1 >> 1) - w3 */ + temp2 = _mm_srai_epi32(resq_r1, 1); //(w1>>1) + temp2 = _mm_sub_epi32(temp2, resq_r3); //(w1>>1) - w3 + /* z3 = w1 + (w3 >> 1) */ + temp3 = _mm_srai_epi32(resq_r3, 1); //(w3>>1) + w1 + temp3 = _mm_add_epi32(temp3, resq_r1); + /*----------------------------------------------------------*/ + /* x0 = z0 + z3 */ + resq_r0 = _mm_add_epi32(temp0, temp3); + /* x1 = z1 + z2 */ + resq_r1 = _mm_add_epi32(temp1, temp2); + /* x2 = z1 - z2 */ + resq_r2 = _mm_sub_epi32(temp1, temp2); + /* x3 = z0 - z3 */ + resq_r3 = _mm_sub_epi32(temp0, temp3); + // Matrix transpose + /* + * a0 b0 c0 d0 + * a1 b1 c1 d1 + * a2 b2 c2 d2 + * a3 b3 c3 d3 + */ + temp1 = _mm_unpacklo_epi32(resq_r0, resq_r1); //a0 a1 b0 b1 + temp3 = _mm_unpacklo_epi32(resq_r2, resq_r3); //a2 a3 b2 b3 + temp2 = _mm_unpackhi_epi32(resq_r0, resq_r1); //c0 c1 d0 d1 + temp4 = _mm_unpackhi_epi32(resq_r2, resq_r3); //c2 c3 d2 d3 + resq_r0 = _mm_unpacklo_epi64(temp1, temp3); //a0 a1 a2 a3 + resq_r1 = _mm_unpackhi_epi64(temp1, temp3); //b0 b1 b2 b3 + resq_r2 = _mm_unpacklo_epi64(temp2, temp4); //c0 c1 c2 c3 + resq_r3 = _mm_unpackhi_epi64(temp2, temp4); //d0 d1 d2 d3 + //Transform ends -- horizontal transform + + zero_8x16b = _mm_setzero_si128(); // all bits reset to zero + //Load pred buffer + predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); //p00 p01 p02 p03 0 0 0 0 0 0 0 0 -- all 8 bits + pred_r0 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p00 p01 p02 p03 0 0 0 0 -- all 16 bits + + predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[pred_strd])); //p10 p11 p12 p13 0 0 0 0 0 0 0 0 -- all 8 bits + pred_r1 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p10 p11 p12 p13 0 0 0 0 -- all 16 bits + + predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * pred_strd])); //p20 p21 p22 p23 0 0 0 0 0 0 0 0 -- all 8 bits + pred_r2 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p20 p21 p22 p23 0 0 0 0 -- all 16 bits + + predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * pred_strd])); //p30 p31 p32 p33 0 0 0 0 0 0 0 0 -- all 8 bits + pred_r3 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p30 p31 p32 p33 0 0 0 0 -- all 16 bits + pred_r0 = _mm_unpacklo_epi16(pred_r0, zero_8x16b); //p00 p01 p02 p03 -- 32 bits sign extended + pred_r1 = _mm_unpacklo_epi16(pred_r1, zero_8x16b); //p10 p11 p12 p13 -- 32 bits sign extended + pred_r2 = _mm_unpacklo_epi16(pred_r2, zero_8x16b); //p20 p21 p22 p23 -- 32 bits sign extended + pred_r3 = _mm_unpacklo_epi16(pred_r3, zero_8x16b); //p30 p31 p32 p33 -- 32 bits sign extended + + /*--------------------------------------------------------------*/ + /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6 */ + /* */ + /* Add the prediction and store it back to same buffer */ + /*--------------------------------------------------------------*/ + /* z0j = y0j + y2j */ + temp0 = _mm_add_epi32(resq_r0, resq_r2); + /* z1j = y0j - y2j */ + temp1 = _mm_sub_epi32(resq_r0, resq_r2); + /* z2j = (y1j>>1) - y3j */ + temp2 = _mm_srai_epi32(resq_r1, 1); //(y1j>>1) + temp2 = _mm_sub_epi32(temp2, resq_r3); + /* z3j = y1j + (y3j>>1) */ + temp3 = _mm_srai_epi32(resq_r3, 1); //(y3j>>1) + temp3 = _mm_add_epi32(temp3, resq_r1); + + /* x0j = z0j + z3j */ + temp4 = _mm_add_epi32(temp0, temp3); + temp4 = _mm_add_epi32(temp4, value_32); + temp4 = _mm_srai_epi32(temp4, 6); + temp4 = _mm_add_epi32(temp4, pred_r0); + /* x1j = z1j + z2j */ + temp5 = _mm_add_epi32(temp1, temp2); + temp5 = _mm_add_epi32(temp5, value_32); + temp5 = _mm_srai_epi32(temp5, 6); + temp5 = _mm_add_epi32(temp5, pred_r1); + /* x2j = z1j - z2j */ + temp6 = _mm_sub_epi32(temp1, temp2); + temp6 = _mm_add_epi32(temp6, value_32); + temp6 = _mm_srai_epi32(temp6, 6); + temp6 = _mm_add_epi32(temp6, pred_r2); + /* x3j = z0j - z3j */ + temp7 = _mm_sub_epi32(temp0, temp3); + temp7 = _mm_add_epi32(temp7, value_32); + temp7 = _mm_srai_epi32(temp7, 6); + temp7 = _mm_add_epi32(temp7, pred_r3); + + // 32-bit to 16-bit conversion + temp0 = _mm_packs_epi32(temp4, temp5); + temp1 = _mm_packs_epi32(temp6, temp7); + /*------------------------------------------------------------------*/ + //Clipping the results to 8 bits + sign_reg = _mm_cmpgt_epi16(temp0, zero_8x16b); // sign check + temp0 = _mm_and_si128(temp0, sign_reg); + sign_reg = _mm_cmpgt_epi16(temp1, zero_8x16b); + temp1 = _mm_and_si128(temp1, sign_reg); + + resq_r0 = _mm_packus_epi16(temp0, temp1); + resq_r1 = _mm_srli_si128(resq_r0, 4); + resq_r2 = _mm_srli_si128(resq_r1, 4); + resq_r3 = _mm_srli_si128(resq_r2, 4); + + *pu4_out = _mm_cvtsi128_si32(resq_r0); + pu1_out += out_strd; + pu4_out = (UWORD32 *) (pu1_out); + *(pu4_out) = _mm_cvtsi128_si32(resq_r1); + pu1_out += out_strd; + pu4_out = (UWORD32 *) (pu1_out); + *(pu4_out) = _mm_cvtsi128_si32(resq_r2); + pu1_out += out_strd; + pu4_out = (UWORD32 *) (pu1_out); + *(pu4_out) = _mm_cvtsi128_si32(resq_r3); +} +/** + ******************************************************************************* + * + * @brief + * This function performs inverse quant and Inverse transform type Ci4 for 8x8 block + * + * @par Description: + * Performs inverse transform Ci8 and adds the residue to get the + * reconstructed block + * + * @param[in] pi2_src + * Input 8x8coefficients + * + * @param[in] pu1_pred + * Prediction 8x8 block + * + * @param[out] pu1_recon + * Output 8x8 block + * + * @param[in] q_div + * QP/6 + * + * @param[in] q_rem + * QP%6 + * + * @param[in] q_lev + * Quantizer level + * + * @param[in] u4_src_stride + * Input stride + * + * @param[in] u4_pred_stride, + * Prediction stride + * + * @param[in] u4_out_stride + * Output Stride + * + * @param[in] pi4_tmp + * temporary buffer of size 1*64 + * the tmp for each block + * + * @param[in] pu4_iquant_mat + * Pointer to the inverse quantization matrix + * + * @returns Void + * + * @remarks + * None + * + ******************************************************************************* + */ + +void ih264_iquant_itrans_recon_8x8_ssse3(WORD16 *pi2_src, + UWORD8 *pu1_pred, + UWORD8 *pu1_out, + WORD32 pred_strd, + WORD32 out_strd, + const UWORD16 *pu2_iscale_mat, + const UWORD16 *pu2_weigh_mat, + UWORD32 qp_div, + WORD16 *pi2_tmp, + WORD32 iq_start_idx, + WORD16 *pi2_dc_ld_addr) +{ + __m128i src_r0; + __m128i scalemat_r0; + __m128i zero_8x16b = _mm_setzero_si128(); // all bits reset to zero + // __m128i one_8x16b = _mm_set1_epi8(255); // all bits set to 1 + // __m128i one_zero_mask = _mm_unpacklo_epi16(one_8x16b, zero_8x16b); // 1 0 1 0 1 0 1 0 --- 16 bits size + __m128i value_32 = _mm_set1_epi32(32); + __m128i add_rshift = _mm_set1_epi32((1 << (5 - qp_div))); + __m128i dequant_r0; + __m128i predload_r; + __m128i pred_r0_1, pred_r1_1, pred_r2_1, pred_r3_1, pred_r4_1, pred_r5_1, + pred_r6_1, pred_r7_1; + __m128i sign_reg; + __m128i src_r0_1, src_r0_2; + __m128i scalemat_r0_1, scalemat_r0_2; + __m128i temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8; + __m128i temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17, + temp18, temp19, temp20; + // To store dequantization results + __m128i resq_r0_1, resq_r0_2, resq_r1_1, resq_r1_2, resq_r2_1, resq_r2_2, + resq_r3_1, resq_r3_2, resq_r4_1, resq_r4_2, resq_r5_1, resq_r5_2, + resq_r6_1, resq_r6_2, resq_r7_1, resq_r7_2; + + /*************************************************************/ + /* Dequantization of coefficients. Will be replaced by SIMD */ + /* operations on platform. Note : DC coeff is not scaled */ + /*************************************************************/ + + // Row 0 processing + src_r0 = _mm_loadu_si128((__m128i *) (pi2_src)); //a00 a01 a02 a03 a04 a05 a06 a07 -- the source matrix 0th row + scalemat_r0 = _mm_loadu_si128((__m128i *) (pu2_iscale_mat)); //b00 b01 b02 b03 b04 b05 b06 b07 -- the scaling matrix 0th row + dequant_r0 = _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[0])); //q0 q1 q2 q3 q4 q5 q6 q7 -- all 16 bits + src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b); //a00 0 a01 0 a02 0 a03 0 -- 16 bit long + src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b); // a04 0 a05 0 a06 0 a07 0 -- 16 bit long + temp10 = _mm_mullo_epi16(scalemat_r0, dequant_r0); //b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 b05*q5 b06*q6 b07*q7 -- 16 bit result + scalemat_r0_1 = _mm_unpacklo_epi16(temp10, zero_8x16b); // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long + scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b); // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long + + temp5 = _mm_madd_epi16(src_r0_1, scalemat_r0_1); // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 -- 32 bits long + temp7 = _mm_madd_epi16(src_r0_2, scalemat_r0_2); // a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 32 bits long + + if (qp_div >= 6) { + resq_r0_1 = _mm_slli_epi32(temp5, qp_div - 6); + resq_r0_2 = _mm_slli_epi32(temp7, qp_div - 6); + } else { + temp5 = _mm_add_epi32(temp5, add_rshift); + temp7 = _mm_add_epi32(temp7, add_rshift); + resq_r0_1 = _mm_srai_epi32(temp5, 6 - qp_div); + resq_r0_2 = _mm_srai_epi32(temp7, 6 - qp_div); + } + resq_r0_1 = _mm_packs_epi32(resq_r0_1, resq_r0_2); //a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 16 bit long + // Row 1 processing + src_r0 = _mm_loadu_si128((__m128i *) (pi2_src + 8)); //a00 a01 a02 a03 a04 a05 a06 a07 a08 -- the source matrix 1st row + scalemat_r0 = _mm_loadu_si128((__m128i *) (pu2_iscale_mat + 8)); //b00 b01 b02 b03 b04 b05 b06 b07 b08 -- the scaling matrix 1st row + dequant_r0 = _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[8])); //q0 q1 q2 q3 q4 q5 q6 q7 -- all 16 bits + src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b); //a00 0 a01 0 a02 0 a03 0 -- 16 bit long + src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b); // a04 0 a05 0 a06 0 a07 0 -- 16 bit long + temp10 = _mm_mullo_epi16(scalemat_r0, dequant_r0); //b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 b05*q5 b06*q6 b07*q7 -- 16 bit result + scalemat_r0_1 = _mm_unpacklo_epi16(temp10, zero_8x16b); // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long + scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b); // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long + temp5 = _mm_madd_epi16(src_r0_1, scalemat_r0_1); // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 -- 32 bits long + temp7 = _mm_madd_epi16(src_r0_2, scalemat_r0_2); // a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 32 bits long + if (qp_div >= 6) { + resq_r1_1 = _mm_slli_epi32(temp5, qp_div - 6); + resq_r1_2 = _mm_slli_epi32(temp7, qp_div - 6); + } else { + temp5 = _mm_add_epi32(temp5, add_rshift); + temp7 = _mm_add_epi32(temp7, add_rshift); + resq_r1_1 = _mm_srai_epi32(temp5, 6 - qp_div); + resq_r1_2 = _mm_srai_epi32(temp7, 6 - qp_div); + } + resq_r1_1 = _mm_packs_epi32(resq_r1_1, resq_r1_2); //a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 16 bit long + // Row 2 processing + src_r0 = _mm_loadu_si128((__m128i *) (pi2_src + 16)); //a00 a01 a02 a03 a04 a05 a06 a07 a08 -- the source matrix 2nd row + scalemat_r0 = _mm_loadu_si128((__m128i *) (pu2_iscale_mat + 16)); //b00 b01 b02 b03 b04 b05 b06 b07 b08 -- the scaling matrix 2nd row + dequant_r0 = _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[16])); //q0 q1 q2 q3 q4 q5 q6 q7 -- all 16 bits + src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b); //a00 0 a01 0 a02 0 a03 0 -- 16 bit long + src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b); // a04 0 a05 0 a06 0 a07 0 -- 16 bit long + temp10 = _mm_mullo_epi16(scalemat_r0, dequant_r0); //b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 b05*q5 b06*q6 b07*q7 -- 16 bit result + scalemat_r0_1 = _mm_unpacklo_epi16(temp10, zero_8x16b); // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long + scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b); // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long + temp5 = _mm_madd_epi16(src_r0_1, scalemat_r0_1); // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 -- 32 bits long + temp7 = _mm_madd_epi16(src_r0_2, scalemat_r0_2); // a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 32 bits long + if (qp_div >= 6) { + resq_r2_1 = _mm_slli_epi32(temp5, qp_div - 6); + resq_r2_2 = _mm_slli_epi32(temp7, qp_div - 6); + } else { + temp5 = _mm_add_epi32(temp5, add_rshift); + temp7 = _mm_add_epi32(temp7, add_rshift); + resq_r2_1 = _mm_srai_epi32(temp5, 6 - qp_div); + resq_r2_2 = _mm_srai_epi32(temp7, 6 - qp_div); + } + resq_r2_1 = _mm_packs_epi32(resq_r2_1, resq_r2_2); //a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 16 bit long + // Row 3 processing + src_r0 = _mm_loadu_si128((__m128i *) (pi2_src + 24)); //a00 a01 a02 a03 a04 a05 a06 a07 a08 -- the source matrix 3rd row + scalemat_r0 = _mm_loadu_si128((__m128i *) (pu2_iscale_mat + 24)); //b00 b01 b02 b03 b04 b05 b06 b07 b08 -- the scaling matrix 3rd row + dequant_r0 = _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[24])); //q0 q1 q2 q3 q4 q5 q6 q7 -- all 16 bits + src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b); //a00 0 a01 0 a02 0 a03 0 -- 16 bit long + src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b); // a04 0 a05 0 a06 0 a07 0 -- 16 bit long + temp10 = _mm_mullo_epi16(scalemat_r0, dequant_r0); //b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 b05*q5 b06*q6 b07*q7 -- 16 bit result + scalemat_r0_1 = _mm_unpacklo_epi16(temp10, zero_8x16b); // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long + scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b); // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long + temp5 = _mm_madd_epi16(src_r0_1, scalemat_r0_1); // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 - 32 bits long + temp7 = _mm_madd_epi16(src_r0_2, scalemat_r0_2); // a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 32 bits long + if (qp_div >= 6) { + resq_r3_1 = _mm_slli_epi32(temp5, qp_div - 6); + resq_r3_2 = _mm_slli_epi32(temp7, qp_div - 6); + } else { + temp5 = _mm_add_epi32(temp5, add_rshift); + temp7 = _mm_add_epi32(temp7, add_rshift); + resq_r3_1 = _mm_srai_epi32(temp5, 6 - qp_div); + resq_r3_2 = _mm_srai_epi32(temp7, 6 - qp_div); + } + resq_r3_1 = _mm_packs_epi32(resq_r3_1, resq_r3_2); //a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 16 bit long + // Row 4 processing + src_r0 = _mm_loadu_si128((__m128i *) (pi2_src + 32)); //a00 a01 a02 a03 a04 a05 a06 a07 a08 -- the source matrix 4th row + scalemat_r0 = _mm_loadu_si128((__m128i *) (pu2_iscale_mat + 32)); //b00 b01 b02 b03 b04 b05 b06 b07 b08 -- the scaling matrix 4th row + dequant_r0 = _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[32])); //q0 q1 q2 q3 q4 q5 q6 q7 -- all 16 bits + src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b); //a00 0 a01 0 a02 0 a03 0 -- 16 bit long + src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b); // a04 0 a05 0 a06 0 a07 0 -- 16 bit long + temp10 = _mm_mullo_epi16(scalemat_r0, dequant_r0); //b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 b05*q5 b06*q6 b07*q7 -- 16 bit result + scalemat_r0_1 = _mm_unpacklo_epi16(temp10, zero_8x16b); // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long + scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b); // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long + temp5 = _mm_madd_epi16(src_r0_1, scalemat_r0_1); // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 -- 32 bits long + temp7 = _mm_madd_epi16(src_r0_2, scalemat_r0_2); // a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 32 bits long + if (qp_div >= 6) { + resq_r4_1 = _mm_slli_epi32(temp5, qp_div - 6); + resq_r4_2 = _mm_slli_epi32(temp7, qp_div - 6); + + } else { + temp5 = _mm_add_epi32(temp5, add_rshift); + temp7 = _mm_add_epi32(temp7, add_rshift); + resq_r4_1 = _mm_srai_epi32(temp5, 6 - qp_div); + resq_r4_2 = _mm_srai_epi32(temp7, 6 - qp_div); + } + resq_r4_1 = _mm_packs_epi32(resq_r4_1, resq_r4_2); //a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 16 bit long + // Row 5 processing + src_r0 = _mm_loadu_si128((__m128i *) (pi2_src + 40)); //a00 a01 a02 a03 a04 a05 a06 a07 a08 -- the source matrix 5th row + scalemat_r0 = _mm_loadu_si128((__m128i *) (pu2_iscale_mat + 40)); //b00 b01 b02 b03 b04 b05 b06 b07 b08 -- the scaling matrix 5th row + dequant_r0 = _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[40])); //q0 q1 q2 q3 q4 q5 q6 q7 -- all 16 bits + src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b); //a00 0 a01 0 a02 0 a03 0 -- 16 bit long + src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b); // a04 0 a05 0 a06 0 a07 0 -- 16 bit long + temp10 = _mm_mullo_epi16(scalemat_r0, dequant_r0); //b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 b05*q5 b06*q6 b07*q7 -- 16 bit result + scalemat_r0_1 = _mm_unpacklo_epi16(temp10, zero_8x16b); // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long + scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b); // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long + temp5 = _mm_madd_epi16(src_r0_1, scalemat_r0_1); // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 -- 32 bits long + temp7 = _mm_madd_epi16(src_r0_2, scalemat_r0_2); // a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 32 bits long + if (qp_div >= 6) { + resq_r5_1 = _mm_slli_epi32(temp5, qp_div - 6); + resq_r5_2 = _mm_slli_epi32(temp7, qp_div - 6); + //resq_r5_1 = _mm_and_si128(resq_r5_1,one_zero_mask); + //resq_r5_2 = _mm_and_si128(resq_r5_2,one_zero_mask); + } else { + temp5 = _mm_add_epi32(temp5, add_rshift); + temp7 = _mm_add_epi32(temp7, add_rshift); + resq_r5_1 = _mm_srai_epi32(temp5, 6 - qp_div); + resq_r5_2 = _mm_srai_epi32(temp7, 6 - qp_div); + } + resq_r5_1 = _mm_packs_epi32(resq_r5_1, resq_r5_2); //a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 16 bit long + // Row 6 processing + src_r0 = _mm_loadu_si128((__m128i *) (pi2_src + 48)); //a00 a01 a02 a03 a04 a05 a06 a07 a08 -- the source matrix 6th row + scalemat_r0 = _mm_loadu_si128((__m128i *) (pu2_iscale_mat + 48)); //b00 b01 b02 b03 b04 b05 b06 b07 b08 -- the scaling matrix 6th row + dequant_r0 = _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[48])); //q0 q1 q2 q3 q4 q5 q6 q7 -- all 16 bits + src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b); //a00 0 a01 0 a02 0 a03 0 -- 16 bit long + src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b); // a04 0 a05 0 a06 0 a07 0 -- 16 bit long + temp10 = _mm_mullo_epi16(scalemat_r0, dequant_r0); //b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 b05*q5 b06*q6 b07*q7 -- 16 bit result + scalemat_r0_1 = _mm_unpacklo_epi16(temp10, zero_8x16b); // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long + scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b); // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long + temp5 = _mm_madd_epi16(src_r0_1, scalemat_r0_1); // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 -- 32 bits long + temp7 = _mm_madd_epi16(src_r0_2, scalemat_r0_2); // a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 32 bits long + if (qp_div >= 6) { + resq_r6_1 = _mm_slli_epi32(temp5, qp_div - 6); + resq_r6_2 = _mm_slli_epi32(temp7, qp_div - 6); + //resq_r6_1 = _mm_and_si128(resq_r6_1,one_zero_mask); + //resq_r6_2 = _mm_and_si128(resq_r6_2,one_zero_mask); + } else { + temp5 = _mm_add_epi32(temp5, add_rshift); + temp7 = _mm_add_epi32(temp7, add_rshift); + resq_r6_1 = _mm_srai_epi32(temp5, 6 - qp_div); + resq_r6_2 = _mm_srai_epi32(temp7, 6 - qp_div); + //resq_r6_1 = _mm_and_si128(resq_r6_1,one_zero_mask); + //resq_r6_2 = _mm_and_si128(resq_r6_2,one_zero_mask); + } + resq_r6_1 = _mm_packs_epi32(resq_r6_1, resq_r6_2); //a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 16 bit long + // Row 7 processing + src_r0 = _mm_loadu_si128((__m128i *) (pi2_src + 56)); //a00 a01 a02 a03 a04 a05 a06 a07 a08 -- the source matrix 7th row + scalemat_r0 = _mm_loadu_si128((__m128i *) (pu2_iscale_mat + 56)); //b00 b01 b02 b03 b04 b05 b06 b07 b08 -- the scaling matrix 7th row + dequant_r0 = _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[56])); //q0 q1 q2 q3 q4 q5 q6 q7 -- all 16 bits + src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b); //a00 0 a01 0 a02 0 a03 0 -- 16 bit long + src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b); // a04 0 a05 0 a06 0 a07 0 -- 16 bit long + temp10 = _mm_mullo_epi16(scalemat_r0, dequant_r0); //b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 b05*q5 b06*q6 b07*q7 -- 16 bit result + scalemat_r0_1 = _mm_unpacklo_epi16(temp10, zero_8x16b); // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long + scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b); // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long + temp5 = _mm_madd_epi16(src_r0_1, scalemat_r0_1); // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 -- 32 bits long + temp7 = _mm_madd_epi16(src_r0_2, scalemat_r0_2); // a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 32 bits long + if (qp_div >= 6) { + resq_r7_1 = _mm_slli_epi32(temp5, qp_div - 6); + resq_r7_2 = _mm_slli_epi32(temp7, qp_div - 6); + } else { + temp5 = _mm_add_epi32(temp5, add_rshift); + temp7 = _mm_add_epi32(temp7, add_rshift); + resq_r7_1 = _mm_srai_epi32(temp5, 6 - qp_div); + resq_r7_2 = _mm_srai_epi32(temp7, 6 - qp_div); + } + resq_r7_1 = _mm_packs_epi32(resq_r7_1, resq_r7_2); //a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 16 bit long + /* Perform Inverse transform */ + /*--------------------------------------------------------------------*/ + /* IDCT [ Horizontal transformation ] */ + /*--------------------------------------------------------------------*/ + // Matrix transpose + /* + * a0 a1 a2 a3 a4 a5 a6 a7 + * b0 b1 b2 b3 b4 b5 b6 b7 + * c0 c1 c2 c3 c4 c5 c6 c7 + * d0 d1 d2 d3 d4 d5 d6 d7 + */ + temp1 = _mm_unpacklo_epi16(resq_r0_1, resq_r1_1); //a0 b0 a1 b1 a2 b2 a3 b3 + temp3 = _mm_unpacklo_epi16(resq_r2_1, resq_r3_1); //c0 d0 c1 d1 c2 d2 c3 d3 + temp2 = _mm_unpackhi_epi16(resq_r0_1, resq_r1_1); //a4 b4 a5 b5 a6 b6 a7 b7 + temp4 = _mm_unpackhi_epi16(resq_r2_1, resq_r3_1); //c4 d4 c5 d5 c6 d6 c7 d7 + resq_r0_1 = _mm_unpacklo_epi32(temp1, temp3); //a0 b0 c0 d0 a1 b1 c1 d1 + resq_r1_1 = _mm_unpackhi_epi32(temp1, temp3); //a2 b2 c2 d2 a3 b3 c3 d3 + resq_r2_1 = _mm_unpacklo_epi32(temp2, temp4); //a4 b4 c4 d4 a5 b5 c5 d5 + resq_r3_1 = _mm_unpackhi_epi32(temp2, temp4); //a6 b6 c6 d6 a7 b7 c7 d7 + /* + * e0 e1 e2 e3 e4 e5 e6 e7 + * f0 f1 f2 f3 f4 f5 f6 f7 + * g0 g1 g2 g3 g4 g5 g6 g7 + * h0 h1 h2 h3 h4 h5 h6 h7 + */ + temp1 = _mm_unpacklo_epi16(resq_r4_1, resq_r5_1); //e0 f0 e1 f1 e2 f2 e2 f3 + temp3 = _mm_unpacklo_epi16(resq_r6_1, resq_r7_1); //g0 h0 g1 h1 g2 h2 g3 h3 + temp2 = _mm_unpackhi_epi16(resq_r4_1, resq_r5_1); //e4 f4 e5 f5 e6 f6 e7 f7 + temp4 = _mm_unpackhi_epi16(resq_r6_1, resq_r7_1); //g4 h4 g5 h5 g6 h6 g7 h7 + resq_r4_1 = _mm_unpacklo_epi32(temp1, temp3); //e0 f0 g0 h0 e1 f1 g1 h1 + resq_r5_1 = _mm_unpackhi_epi32(temp1, temp3); //e2 f2 g2 h2 e3 f3 g3 h3 + resq_r6_1 = _mm_unpacklo_epi32(temp2, temp4); //e4 f4 g4 h4 e5 f5 g5 h5 + resq_r7_1 = _mm_unpackhi_epi32(temp2, temp4); //e6 f6 g6 h6 e7 f7 g7 h7 + /* + * a0 b0 c0 d0 a1 b1 c1 d1 + * a2 b2 c2 d2 a3 b3 c3 d3 + * a4 b4 c4 d4 a5 b5 c5 d5 + * a6 b6 c6 d6 a7 b7 c7 d7 + * e0 f0 g0 h0 e1 f1 g1 h1 + * e2 f2 g2 h2 e3 f3 g3 h3 + * e4 f4 g4 h4 e5 f5 g5 h5 + * e6 f6 g6 h6 e7 f7 g7 h7 + */ + resq_r0_2 = _mm_unpacklo_epi64(resq_r0_1, resq_r4_1); //a0 b0 c0 d0 e0 f0 g0 h0 + resq_r1_2 = _mm_unpackhi_epi64(resq_r0_1, resq_r4_1); //a1 b1 c1 d1 e1 f1 g1 h1 + resq_r2_2 = _mm_unpacklo_epi64(resq_r1_1, resq_r5_1); //a2 b2 c2 d2 e2 f2 g2 h2 + resq_r3_2 = _mm_unpackhi_epi64(resq_r1_1, resq_r5_1); //a3 b3 c3 d3 e3 f3 g3 h3 + resq_r4_2 = _mm_unpacklo_epi64(resq_r2_1, resq_r6_1); //a4 b4 c4 d4 e4 f4 g4 h4 + resq_r5_2 = _mm_unpackhi_epi64(resq_r2_1, resq_r6_1); //a5 b5 c5 d5 e5 f5 g5 h5 + resq_r6_2 = _mm_unpacklo_epi64(resq_r3_1, resq_r7_1); //a6 b6 c6 d6 e6 f6 g6 h6 + resq_r7_2 = _mm_unpackhi_epi64(resq_r3_1, resq_r7_1); //a7 b7 c7 d7 e7 f7 g7 h7 + + sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r1_2); + resq_r1_1 = _mm_unpacklo_epi16(resq_r1_2, sign_reg); //a1 b1 c1 d1 -- 32 bit + resq_r1_2 = _mm_unpackhi_epi16(resq_r1_2, sign_reg); //e1 f1 g1 h1 -- 32 bit + sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r3_2); + resq_r3_1 = _mm_unpacklo_epi16(resq_r3_2, sign_reg); //a3 b3 c3 d3 -- 32 bit + resq_r3_2 = _mm_unpackhi_epi16(resq_r3_2, sign_reg); //e3 f3 g3 h3 -- 32 bit + sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r5_2); + resq_r5_1 = _mm_unpacklo_epi16(resq_r5_2, sign_reg); //a5 b5 c5 d5 -- 32 bit + resq_r5_2 = _mm_unpackhi_epi16(resq_r5_2, sign_reg); //e5 f5 g5 h5 -- 32 bit + sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r7_2); + resq_r7_1 = _mm_unpacklo_epi16(resq_r7_2, sign_reg); //a7 b7 c7 d7 -- 32 bit + resq_r7_2 = _mm_unpackhi_epi16(resq_r7_2, sign_reg); //e7 f7 g7 h7 -- 32 bit + //Transform starts -- horizontal transform + /*------------------------------------------------------------------*/ + /* y0 = w0 + w4 */ + temp1 = _mm_add_epi16(resq_r0_2, resq_r4_2); + /* y2 = w0 - w4 */ + temp3 = _mm_sub_epi16(resq_r0_2, resq_r4_2); + /* y1 = -w3 + w5 - w7 - (w7 >> 1) */ + temp2 = _mm_sub_epi32(resq_r5_1, resq_r3_1); //-w3+w5 + temp10 = _mm_sub_epi32(resq_r5_2, resq_r3_2); + temp4 = _mm_sub_epi32(temp2, resq_r7_1); //-w3+w5-w7 + temp12 = _mm_sub_epi32(temp10, resq_r7_2); + temp5 = _mm_srai_epi32(resq_r7_1, 1); //w7>>1 + temp13 = _mm_srai_epi32(resq_r7_2, 1); + temp2 = _mm_sub_epi32(temp4, temp5); //-w3+w5-w7 -(w7>>1) + temp10 = _mm_sub_epi32(temp12, temp13); + temp2 = _mm_packs_epi32(temp2, temp10); + /* y3 = w1 + w7 - w3 - (w3 >> 1) */ + temp4 = _mm_add_epi32(resq_r1_1, resq_r7_1); //w1+w7 + temp12 = _mm_add_epi32(resq_r1_2, resq_r7_2); + temp4 = _mm_sub_epi32(temp4, resq_r3_1); //w1+w7-w3 + temp12 = _mm_sub_epi32(temp12, resq_r3_2); + temp5 = _mm_srai_epi32(resq_r3_1, 1); //w3>>1 + temp13 = _mm_srai_epi32(resq_r3_2, 1); + temp4 = _mm_sub_epi32(temp4, temp5); //w1+w7-w3-(w3>>1) + temp12 = _mm_sub_epi32(temp12, temp13); + temp4 = _mm_packs_epi32(temp4, temp12); + /* y4 = (w2 >> 1) - w6 */ + temp5 = _mm_srai_epi16(resq_r2_2, 1); //w2>>1 + temp5 = _mm_sub_epi16(temp5, resq_r6_2); //(w2>>1)-w6 + /* y5 = -w1 + w7 + w5 + (w5 >> 1) */ + temp6 = _mm_sub_epi32(resq_r7_1, resq_r1_1); //w7-w1 + temp14 = _mm_sub_epi32(resq_r7_2, resq_r1_2); + temp6 = _mm_add_epi32(temp6, resq_r5_1); //w7-w1+w5 + temp14 = _mm_add_epi32(temp14, resq_r5_2); + temp7 = _mm_srai_epi32(resq_r5_1, 1); //w5>>1 + temp15 = _mm_srai_epi32(resq_r5_2, 1); + temp6 = _mm_add_epi32(temp6, temp7); //w7-w1_w5+(w5>>1) + temp14 = _mm_add_epi32(temp14, temp15); + temp6 = _mm_packs_epi32(temp6, temp14); + /* y6 = w2 + (w6 >> 1) */ + temp7 = _mm_srai_epi16(resq_r6_2, 1); //w6>>1 + temp7 = _mm_add_epi16(temp7, resq_r2_2); //(w6>>1)+w2 + /* y7 = w3 + w5 + w1 + (w1 >> 1) */ + temp8 = _mm_add_epi32(resq_r3_1, resq_r5_1); //w3+w5 + temp16 = _mm_add_epi32(resq_r3_2, resq_r5_2); + temp8 = _mm_add_epi32(temp8, resq_r1_1); //w3+w5+w1 + temp16 = _mm_add_epi32(temp16, resq_r1_2); + temp17 = _mm_srai_epi32(resq_r1_1, 1); //w1>>1 + temp18 = _mm_srai_epi32(resq_r1_2, 1); + temp8 = _mm_add_epi32(temp8, temp17); //w3+w5+w1+(w1>>1) + temp16 = _mm_add_epi32(temp16, temp18); + temp8 = _mm_packs_epi32(temp8, temp16); + /*------------------------------------------------------------------*/ + /*------------------------------------------------------------------*/ + /* z0 = y0 + y6 */ + resq_r0_1 = _mm_add_epi16(temp1, temp7); + /* z1 = y1 + (y7 >> 2) */ + resq_r1_1 = _mm_srai_epi16(temp8, 2); + resq_r1_1 = _mm_add_epi16(resq_r1_1, temp2); + /* z2 = y2 + y4 */ + resq_r2_1 = _mm_add_epi16(temp3, temp5); + /* z3 = y3 + (y5 >> 2) */ + resq_r3_1 = _mm_srai_epi16(temp6, 2); + resq_r3_1 = _mm_add_epi16(resq_r3_1, temp4); + /* z4 = y2 - y4 */ + resq_r4_1 = _mm_sub_epi16(temp3, temp5); + /* z5 = (y3 >> 2) - y5 */ + resq_r5_1 = _mm_srai_epi16(temp4, 2); + resq_r5_1 = _mm_sub_epi16(resq_r5_1, temp6); + /* z6 = y0 - y6 */ + resq_r6_1 = _mm_sub_epi16(temp1, temp7); + /* z7 = y7 - (y1 >> 2) */ + resq_r7_1 = _mm_srai_epi16(temp2, 2); + resq_r7_1 = _mm_sub_epi16(temp8, resq_r7_1); + /*------------------------------------------------------------------*/ + /*------------------------------------------------------------------*/ + /* x0 = z0 + z7 */ + temp1 = _mm_add_epi16(resq_r0_1, resq_r7_1); + /* x1 = z2 + z5 */ + temp2 = _mm_add_epi16(resq_r2_1, resq_r5_1); + /* x2 = z4 + z3 */ + temp3 = _mm_add_epi16(resq_r4_1, resq_r3_1); + /* x3 = z6 + z1 */ + temp4 = _mm_add_epi16(resq_r6_1, resq_r1_1); + /* x4 = z6 - z1 */ + temp5 = _mm_sub_epi16(resq_r6_1, resq_r1_1); + /* x5 = z4 - z3 */ + temp6 = _mm_sub_epi16(resq_r4_1, resq_r3_1); + /* x6 = z2 - z5 */ + temp7 = _mm_sub_epi16(resq_r2_1, resq_r5_1); + /* x7 = z0 - z7 */ + temp8 = _mm_sub_epi16(resq_r0_1, resq_r7_1); + /*------------------------------------------------------------------*/ + // Matrix transpose + /* + * a0 b0 c0 d0 e0 f0 g0 h0 + * a1 b1 c1 d1 e1 f1 g1 h1 + * a2 b2 c2 d2 e2 f2 g2 h2 + * a3 b3 c3 d3 e3 f3 g3 h3 + */ + temp17 = _mm_unpacklo_epi16(temp1, temp2); //a0 a1 b0 b1 c0 c1 d0 d1 + temp19 = _mm_unpacklo_epi16(temp3, temp4); //a2 a3 b2 b3 c2 c3 d2 d3 + temp18 = _mm_unpackhi_epi16(temp1, temp2); //e0 e1 f0 f1 g0 g1 h0 h1 + temp20 = _mm_unpackhi_epi16(temp3, temp4); //e2 e3 f2 f3 g2 g3 h2 h3 + + resq_r0_1 = _mm_unpacklo_epi32(temp17, temp19); //a0 a1 a2 a3 b0 b1 b2 b3 + resq_r1_1 = _mm_unpackhi_epi32(temp17, temp19); //c0 c1 c2 c3 d0 d1 d2 d3 + resq_r2_1 = _mm_unpacklo_epi32(temp18, temp20); //e0 e1 e2 e3 f0 f1 f2 f3 + resq_r3_1 = _mm_unpackhi_epi32(temp18, temp20); //g0 g2 g2 g3 h0 h1 h2 h3 + /* + * a4 b4 c4 d4 e4 f4 g4 h4 + * a5 b5 c5 d5 e5 f5 g5 h5 + * a6 b6 c6 d6 e6 f6 g6 h6 + * a7 b7 c7 d7 e7 f7 g7 h7 + */ + temp17 = _mm_unpacklo_epi16(temp5, temp6); //a4 a5 b4 b5 c4 c5 d4 d5 + temp19 = _mm_unpacklo_epi16(temp7, temp8); //a6 a7 b6 b7 c6 c7 d6 d7 + temp18 = _mm_unpackhi_epi16(temp5, temp6); //e4 e5 f4 f5 g4 g5 h4 h5 + temp20 = _mm_unpackhi_epi16(temp7, temp8); //e6 e7 f6 f7 g6 g7 h6 h7 + + resq_r4_1 = _mm_unpacklo_epi32(temp17, temp19); //a4 a5 a6 a7 b4 b5 b6 b7 + resq_r5_1 = _mm_unpackhi_epi32(temp17, temp19); //c4 c5 c6 c7 d4 d5 d6 d7 + resq_r6_1 = _mm_unpacklo_epi32(temp18, temp20); //e4 e5 e6 e7 f4 f5 f6 f7 + resq_r7_1 = _mm_unpackhi_epi32(temp18, temp20); //g4 g5 g6 g7 h4 h5 h6 h7 + /* a0 a1 a2 a3 b0 b1 b2 b3 + * c0 c1 c2 c3 d0 d1 d2 d3 + * e0 e1 e2 e3 f0 f1 f2 f3 + * g0 g2 g2 g3 h0 h1 h2 h3 + * a4 a5 a6 a7 b4 b5 b6 b7 + * c4 c5 c6 c7 d4 d5 d6 d7 + * e4 e5 e6 e7 f4 f5 f6 f7 + * g4 g5 g6 g7 h4 h5 h6 h7 + */ + resq_r0_2 = _mm_unpacklo_epi64(resq_r0_1, resq_r4_1); //a0 a1 a2 a3 a4 a5 a6 a7 + resq_r1_2 = _mm_unpackhi_epi64(resq_r0_1, resq_r4_1); //b0 b1 b2 b3 b4 b5 b6 b7 + resq_r2_2 = _mm_unpacklo_epi64(resq_r1_1, resq_r5_1); //c0 c1 c2 c3 c4 c5 c6 c7 + resq_r3_2 = _mm_unpackhi_epi64(resq_r1_1, resq_r5_1); //d0 d1 d2 d3 d4 d5 d6 d7 + resq_r4_2 = _mm_unpacklo_epi64(resq_r2_1, resq_r6_1); //e0 e1 e2 e3 e4 e5 e6 e7 + resq_r5_2 = _mm_unpackhi_epi64(resq_r2_1, resq_r6_1); //f0 f1 f2 f3 f4 f5 f6 f7 + resq_r6_2 = _mm_unpacklo_epi64(resq_r3_1, resq_r7_1); //g0 g1 g2 g3 g4 g5 g6 g7 + resq_r7_2 = _mm_unpackhi_epi64(resq_r3_1, resq_r7_1); //h0 h1 h2 h3 h4 h5 h6 h7 + + sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r1_2); + resq_r1_1 = _mm_unpacklo_epi16(resq_r1_2, sign_reg); //a1 b1 c1 d1 -- 32 bit + resq_r1_2 = _mm_unpackhi_epi16(resq_r1_2, sign_reg); //e1 f1 g1 h1 -- 32 bit + sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r3_2); + resq_r3_1 = _mm_unpacklo_epi16(resq_r3_2, sign_reg); //a3 b3 c3 d3 -- 32 bit + resq_r3_2 = _mm_unpackhi_epi16(resq_r3_2, sign_reg); //e3 f3 g3 h3 -- 32 bit + sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r5_2); + resq_r5_1 = _mm_unpacklo_epi16(resq_r5_2, sign_reg); //a5 b5 c5 d5 -- 32 bit + resq_r5_2 = _mm_unpackhi_epi16(resq_r5_2, sign_reg); //e5 f5 g5 h5 -- 32 bit + sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r7_2); + resq_r7_1 = _mm_unpacklo_epi16(resq_r7_2, sign_reg); //a7 b7 c7 d7 -- 32 bit + resq_r7_2 = _mm_unpackhi_epi16(resq_r7_2, sign_reg); //e7 f7 g7 h7 -- 32 bit + + zero_8x16b = _mm_setzero_si128(); // all bits reset to zero + //Load pred buffer row 0 + predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits + pred_r0_1 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits + //Load pred buffer row 1 + predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[pred_strd])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits + pred_r1_1 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits + //Load pred buffer row 2 + predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * pred_strd])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits + pred_r2_1 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits + //Load pred buffer row 3 + predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * pred_strd])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits + pred_r3_1 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits + //Load pred buffer row 4 + predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[4 * pred_strd])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits + pred_r4_1 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits + //Load pred buffer row 5 + predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[5 * pred_strd])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bit + pred_r5_1 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits + //Load pred buffer row 6 + predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[6 * pred_strd])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits + pred_r6_1 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits + //Load pred buffer row 7 + predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[7 * pred_strd])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits + pred_r7_1 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits + + /*--------------------------------------------------------------------*/ + /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6 */ + /* */ + /* Add the prediction and store it back to reconstructed frame buffer */ + /* [Prediction buffer itself in this case] */ + /*--------------------------------------------------------------------*/ + + /* y0j = w0j + w4j */ + temp1 = _mm_add_epi16(resq_r0_2, resq_r4_2); + /* y2j = w0j - w4j */ + temp3 = _mm_sub_epi16(resq_r0_2, resq_r4_2); + /* y1j = -w3j + w5j - w7j - (w7j >> 1) */ + temp2 = _mm_sub_epi32(resq_r5_1, resq_r3_1); //-w3+w5 + temp10 = _mm_sub_epi32(resq_r5_2, resq_r3_2); + temp4 = _mm_sub_epi32(temp2, resq_r7_1); //-w3+w5-w7 + temp12 = _mm_sub_epi32(temp10, resq_r7_2); + temp5 = _mm_srai_epi32(resq_r7_1, 1); //w7>>1 + temp13 = _mm_srai_epi32(resq_r7_2, 1); + temp2 = _mm_sub_epi32(temp4, temp5); //-w3+w5-w7 -(w7>>1) + temp10 = _mm_sub_epi32(temp12, temp13); + temp2 = _mm_packs_epi32(temp2, temp10); + /* y3j = w1j + w7j - w3j - (w3j >> 1) */ + temp4 = _mm_add_epi32(resq_r1_1, resq_r7_1); //w1+w7 + temp12 = _mm_add_epi32(resq_r1_2, resq_r7_2); + temp4 = _mm_sub_epi32(temp4, resq_r3_1); //w1+w7-w3 + temp12 = _mm_sub_epi32(temp12, resq_r3_2); + temp5 = _mm_srai_epi32(resq_r3_1, 1); //w3>>1 + temp13 = _mm_srai_epi32(resq_r3_2, 1); + temp4 = _mm_sub_epi32(temp4, temp5); //w1+w7-w3-(w3>>1) + temp12 = _mm_sub_epi32(temp12, temp13); + temp4 = _mm_packs_epi32(temp4, temp12); + /* y4j = (w2j >> 1) - w6j */ + temp5 = _mm_srai_epi16(resq_r2_2, 1); //w2>>1 + temp5 = _mm_sub_epi16(temp5, resq_r6_2); //(w2>>1)-w6 + /* y5j = -w1j + w7j + w5j + (w5j >> 1) */ + temp6 = _mm_sub_epi32(resq_r7_1, resq_r1_1); //w7-w1 + temp14 = _mm_sub_epi32(resq_r7_2, resq_r1_2); + temp6 = _mm_add_epi32(temp6, resq_r5_1); //w7-w1+w5 + temp14 = _mm_add_epi32(temp14, resq_r5_2); + temp7 = _mm_srai_epi32(resq_r5_1, 1); //w5>>1 + temp15 = _mm_srai_epi32(resq_r5_2, 1); + temp6 = _mm_add_epi32(temp6, temp7); //w7-w1_w5+(w5>>1) + temp14 = _mm_add_epi32(temp14, temp15); + temp6 = _mm_packs_epi32(temp6, temp14); + /* y6j = w2j + (w6j >> 1) */ + temp7 = _mm_srai_epi16(resq_r6_2, 1); //w6>>1 + temp7 = _mm_add_epi16(temp7, resq_r2_2); //(w6>>1)+w2 + /* y7j = w3j + w5j + w1j + (w1j >> 1) */ + temp8 = _mm_add_epi32(resq_r3_1, resq_r5_1); //w3+w5 + temp16 = _mm_add_epi32(resq_r3_2, resq_r5_2); + temp8 = _mm_add_epi32(temp8, resq_r1_1); //w3+w5+w1 + temp16 = _mm_add_epi32(temp16, resq_r1_2); + temp17 = _mm_srai_epi32(resq_r1_1, 1); //w1>>1 + temp18 = _mm_srai_epi32(resq_r1_2, 1); + temp8 = _mm_add_epi32(temp8, temp17); //w3+w5+w1+(w1>>1) + temp16 = _mm_add_epi32(temp16, temp18); + temp8 = _mm_packs_epi32(temp8, temp16); + /*------------------------------------------------------------------*/ + /*------------------------------------------------------------------*/ + /* z0j = y0j + y6j */ + resq_r0_1 = _mm_add_epi16(temp1, temp7); + /* z1j = y1j + (y7j >> 2) */ + resq_r1_1 = _mm_srai_epi16(temp8, 2); + resq_r1_1 = _mm_add_epi16(resq_r1_1, temp2); + /* z2j = y2j + y4j */ + resq_r2_1 = _mm_add_epi16(temp3, temp5); + /* z3j = y3j + (y5j >> 2) */ + resq_r3_1 = _mm_srai_epi16(temp6, 2); + resq_r3_1 = _mm_add_epi16(resq_r3_1, temp4); + /* z4j = y2j - y4j */ + resq_r4_1 = _mm_sub_epi16(temp3, temp5); + /* z5j = (y3j >> 2) - y5j */ + resq_r5_1 = _mm_srai_epi16(temp4, 2); + resq_r5_1 = _mm_sub_epi16(resq_r5_1, temp6); + /* z6j = y0j - y6j */ + resq_r6_1 = _mm_sub_epi16(temp1, temp7); + /* z7j = y7j - (y1j >> 2) */ + resq_r7_1 = _mm_srai_epi16(temp2, 2); + resq_r7_1 = _mm_sub_epi16(temp8, resq_r7_1); + /*------------------------------------------------------------------*/ + + /*------------------------------------------------------------------*/ + /* x0j = z0j + z7j */ + temp1 = _mm_add_epi16(resq_r0_1, resq_r7_1); + sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp1); + temp10 = _mm_unpacklo_epi16(temp1, sign_reg); + temp11 = _mm_unpackhi_epi16(temp1, sign_reg); + temp10 = _mm_add_epi32(temp10, value_32); + temp11 = _mm_add_epi32(temp11, value_32); + temp10 = _mm_srai_epi32(temp10, 6); + temp11 = _mm_srai_epi32(temp11, 6); + temp10 = _mm_packs_epi32(temp10, temp11); + temp1 = _mm_add_epi16(temp10, pred_r0_1); + /* x1j = z2j + z5j */ + temp2 = _mm_add_epi16(resq_r2_1, resq_r5_1); + sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp2); + temp10 = _mm_unpacklo_epi16(temp2, sign_reg); + temp11 = _mm_unpackhi_epi16(temp2, sign_reg); + temp10 = _mm_add_epi32(temp10, value_32); + temp11 = _mm_add_epi32(temp11, value_32); + temp10 = _mm_srai_epi32(temp10, 6); + temp11 = _mm_srai_epi32(temp11, 6); + temp10 = _mm_packs_epi32(temp10, temp11); + temp2 = _mm_add_epi16(temp10, pred_r1_1); + /* x2j = z4j + z3j */ + temp3 = _mm_add_epi16(resq_r4_1, resq_r3_1); + sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp3); + temp10 = _mm_unpacklo_epi16(temp3, sign_reg); + temp11 = _mm_unpackhi_epi16(temp3, sign_reg); + temp10 = _mm_add_epi32(temp10, value_32); + temp11 = _mm_add_epi32(temp11, value_32); + temp10 = _mm_srai_epi32(temp10, 6); + temp11 = _mm_srai_epi32(temp11, 6); + temp10 = _mm_packs_epi32(temp10, temp11); + temp3 = _mm_add_epi16(temp10, pred_r2_1); + /* x3j = z6j + z1j */ + temp4 = _mm_add_epi16(resq_r6_1, resq_r1_1); + sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp4); + temp10 = _mm_unpacklo_epi16(temp4, sign_reg); + temp11 = _mm_unpackhi_epi16(temp4, sign_reg); + temp10 = _mm_add_epi32(temp10, value_32); + temp11 = _mm_add_epi32(temp11, value_32); + temp10 = _mm_srai_epi32(temp10, 6); + temp11 = _mm_srai_epi32(temp11, 6); + temp10 = _mm_packs_epi32(temp10, temp11); + temp4 = _mm_add_epi16(temp10, pred_r3_1); + /* x4j = z6j - z1j */ + temp5 = _mm_sub_epi16(resq_r6_1, resq_r1_1); + sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp5); + temp10 = _mm_unpacklo_epi16(temp5, sign_reg); + temp11 = _mm_unpackhi_epi16(temp5, sign_reg); + temp10 = _mm_add_epi32(temp10, value_32); + temp11 = _mm_add_epi32(temp11, value_32); + temp10 = _mm_srai_epi32(temp10, 6); + temp11 = _mm_srai_epi32(temp11, 6); + temp10 = _mm_packs_epi32(temp10, temp11); + temp5 = _mm_add_epi16(temp10, pred_r4_1); + /* x5j = z4j - z3j */ + temp6 = _mm_sub_epi16(resq_r4_1, resq_r3_1); + sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp6); + temp10 = _mm_unpacklo_epi16(temp6, sign_reg); + temp11 = _mm_unpackhi_epi16(temp6, sign_reg); + temp10 = _mm_add_epi32(temp10, value_32); + temp11 = _mm_add_epi32(temp11, value_32); + temp10 = _mm_srai_epi32(temp10, 6); + temp11 = _mm_srai_epi32(temp11, 6); + temp10 = _mm_packs_epi32(temp10, temp11); + temp6 = _mm_add_epi16(temp10, pred_r5_1); + /* x6j = z2j - z5j */ + temp7 = _mm_sub_epi16(resq_r2_1, resq_r5_1); + sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp7); + temp10 = _mm_unpacklo_epi16(temp7, sign_reg); + temp11 = _mm_unpackhi_epi16(temp7, sign_reg); + temp10 = _mm_add_epi32(temp10, value_32); + temp11 = _mm_add_epi32(temp11, value_32); + temp10 = _mm_srai_epi32(temp10, 6); + temp11 = _mm_srai_epi32(temp11, 6); + temp10 = _mm_packs_epi32(temp10, temp11); + temp7 = _mm_add_epi16(temp10, pred_r6_1); + /* x7j = z0j - z7j */ + temp8 = _mm_sub_epi16(resq_r0_1, resq_r7_1); + sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp8); + temp10 = _mm_unpacklo_epi16(temp8, sign_reg); + temp11 = _mm_unpackhi_epi16(temp8, sign_reg); + temp10 = _mm_add_epi32(temp10, value_32); + temp11 = _mm_add_epi32(temp11, value_32); + temp10 = _mm_srai_epi32(temp10, 6); + temp11 = _mm_srai_epi32(temp11, 6); + temp10 = _mm_packs_epi32(temp10, temp11); + temp8 = _mm_add_epi16(temp10, pred_r7_1); + /*------------------------------------------------------------------*/ + //Clipping the results to 8 bits + sign_reg = _mm_cmpgt_epi16(temp1, zero_8x16b); // sign check + temp1 = _mm_and_si128(temp1, sign_reg); + sign_reg = _mm_cmpgt_epi16(temp2, zero_8x16b); // sign check + temp2 = _mm_and_si128(temp2, sign_reg); + sign_reg = _mm_cmpgt_epi16(temp3, zero_8x16b); // sign check + temp3 = _mm_and_si128(temp3, sign_reg); + sign_reg = _mm_cmpgt_epi16(temp4, zero_8x16b); // sign check + temp4 = _mm_and_si128(temp4, sign_reg); + sign_reg = _mm_cmpgt_epi16(temp5, zero_8x16b); // sign check + temp5 = _mm_and_si128(temp5, sign_reg); + sign_reg = _mm_cmpgt_epi16(temp6, zero_8x16b); // sign check + temp6 = _mm_and_si128(temp6, sign_reg); + sign_reg = _mm_cmpgt_epi16(temp7, zero_8x16b); // sign check + temp7 = _mm_and_si128(temp7, sign_reg); + sign_reg = _mm_cmpgt_epi16(temp8, zero_8x16b); // sign check + temp8 = _mm_and_si128(temp8, sign_reg); + + resq_r0_2 = _mm_packus_epi16(temp1, zero_8x16b); + resq_r1_2 = _mm_packus_epi16(temp2, zero_8x16b); + resq_r2_2 = _mm_packus_epi16(temp3, zero_8x16b); + resq_r3_2 = _mm_packus_epi16(temp4, zero_8x16b); + resq_r4_2 = _mm_packus_epi16(temp5, zero_8x16b); + resq_r5_2 = _mm_packus_epi16(temp6, zero_8x16b); + resq_r6_2 = _mm_packus_epi16(temp7, zero_8x16b); + resq_r7_2 = _mm_packus_epi16(temp8, zero_8x16b); + + _mm_storel_epi64((__m128i *) (&pu1_out[0]), resq_r0_2); + _mm_storel_epi64((__m128i *) (&pu1_out[out_strd]), resq_r1_2); + _mm_storel_epi64((__m128i *) (&pu1_out[2 * out_strd]), resq_r2_2); + _mm_storel_epi64((__m128i *) (&pu1_out[3 * out_strd]), resq_r3_2); + _mm_storel_epi64((__m128i *) (&pu1_out[4 * out_strd]), resq_r4_2); + _mm_storel_epi64((__m128i *) (&pu1_out[5 * out_strd]), resq_r5_2); + _mm_storel_epi64((__m128i *) (&pu1_out[6 * out_strd]), resq_r6_2); + _mm_storel_epi64((__m128i *) (&pu1_out[7 * out_strd]), resq_r7_2); +} + diff --git a/common/x86/ih264_luma_intra_pred_filters_ssse3.c b/common/x86/ih264_luma_intra_pred_filters_ssse3.c new file mode 100755 index 0000000..5a35372 --- /dev/null +++ b/common/x86/ih264_luma_intra_pred_filters_ssse3.c @@ -0,0 +1,2282 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** + ******************************************************************************* + * @file + * ih264_luma_intra_pred_filters_ssse3.c + * + * @brief + * Contains function definitions for luma intra prediction filters in x86 + * intrinsics + * + * @author + * Ittiam + * + * @par List of Functions: + * - ih264_intra_pred_luma_4x4_mode_vert_ssse3 + * - ih264_intra_pred_luma_4x4_mode_horz_ssse3 + * - ih264_intra_pred_luma_4x4_mode_dc_ssse3 + * - ih264_intra_pred_luma_4x4_mode_diag_dl_ssse3 + * - ih264_intra_pred_luma_4x4_mode_diag_dr_ssse3 + * - ih264_intra_pred_luma_4x4_mode_vert_r_ssse3 + * - ih264_intra_pred_luma_4x4_mode_horz_d_ssse3 + * - ih264_intra_pred_luma_4x4_mode_vert_l_ssse3 + * - ih264_intra_pred_luma_4x4_mode_horz_u_ssse3 + * - ih264_intra_pred_luma_8x8_mode_vert_ssse3 + * - ih264_intra_pred_luma_8x8_mode_horz_ssse3 + * - ih264_intra_pred_luma_8x8_mode_dc_ssse3 + * - ih264_intra_pred_luma_8x8_mode_diag_dl_ssse3 + * - ih264_intra_pred_luma_8x8_mode_diag_dr_ssse3 + * - ih264_intra_pred_luma_8x8_mode_vert_r_ssse3 + * - ih264_intra_pred_luma_8x8_mode_horz_d_ssse3 + * - ih264_intra_pred_luma_8x8_mode_vert_l_ssse3 + * - ih264_intra_pred_luma_8x8_mode_horz_u_ssse3 + * - ih264_intra_pred_luma_16x16_mode_vert_ssse3 + * - ih264_intra_pred_luma_16x16_mode_horz_ssse3 + * - ih264_intra_pred_luma_16x16_mode_dc_ssse3 + * - ih264_intra_pred_luma_16x16_mode_plane_ssse3 + * + * @remarks + * None + * + ****************************************************************************** + */ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ +/* System include files */ +#include <stdio.h> +#include <stddef.h> +#include <string.h> +#include <immintrin.h> + +/* User include files */ +#include "ih264_defs.h" +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264_intra_pred_filters.h" + + + +/******************* LUMA INTRAPREDICTION *******************/ + +/******************* 4x4 Modes *******************/ + +/** + ******************************************************************************* + * + * ih264_intra_pred_luma_4x4_mode_vert_ssse3 + * + * @brief + * Perform Intra prediction for luma_4x4 mode:vertical + * + * @par Description: + * Perform Intra prediction for luma_4x4 mode:vertical ,described in sec 8.3.1.2.1 + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ngbr_avail + * availability of neighbouring pixels(Not used in this function) + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +void ih264_intra_pred_luma_4x4_mode_vert_ssse3(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + UWORD8 *pu1_top; + WORD32 dst_strd2, dst_strd3; + + __m128i top_16x8b; + __m128i mask_full_128b, mask_low_32b; + + UNUSED(src_strd); + UNUSED(ngbr_avail); + + mask_full_128b = _mm_set1_epi8(0xff); + + pu1_top = pu1_src + BLK_SIZE + 1; + + mask_low_32b = _mm_srli_si128(mask_full_128b, 12); + + top_16x8b = _mm_loadl_epi64((__m128i *)pu1_top); + + dst_strd2 = dst_strd << 1; + dst_strd3 = dst_strd + dst_strd2; + + _mm_maskmoveu_si128(top_16x8b, mask_low_32b, (char*)pu1_dst); + _mm_maskmoveu_si128(top_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd)); + _mm_maskmoveu_si128(top_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd2)); + _mm_maskmoveu_si128(top_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd3)); +} + +/** + ******************************************************************************* + * + *ih264_intra_pred_luma_4x4_mode_horz_ssse3 + * + * @brief + * Perform Intra prediction for luma_4x4 mode:horizontal + * + * @par Description: + * Perform Intra prediction for luma_4x4 mode:horizontal ,described in sec 8.3.1.2.2 + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ngbr_avail + * availability of neighbouring pixels(Not used in this function) + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +void ih264_intra_pred_luma_4x4_mode_horz_ssse3(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + UWORD8 *pu1_left; + WORD32 dst_strd2, dst_strd3; + WORD32 val1, val2; + + __m128i left_16x8b; + __m128i row1_16x8b, row2_16x8b, row3_16x8b, row4_16x8b; + __m128i mask_full_128b, mask_low_32b; + + UNUSED(src_strd); + UNUSED(ngbr_avail); + + mask_full_128b = _mm_set1_epi8(0xff); + + pu1_left = pu1_src + BLK_SIZE - 1; + + mask_low_32b = _mm_srli_si128(mask_full_128b, 12); + left_16x8b = _mm_loadl_epi64((__m128i *)(pu1_left - 3)); + + val1 = _mm_extract_epi16(left_16x8b, 1); + val2 = _mm_extract_epi16(left_16x8b, 0); + + row1_16x8b = _mm_set1_epi8(val1 >> 8); + row2_16x8b = _mm_set1_epi8(val1 & 0xff); + row3_16x8b = _mm_set1_epi8(val2 >> 8); + row4_16x8b = _mm_set1_epi8(val2 & 0xff); + + dst_strd2 = dst_strd << 1; + dst_strd3 = dst_strd + dst_strd2; + + _mm_maskmoveu_si128(row1_16x8b, mask_low_32b, (char*)pu1_dst); + _mm_maskmoveu_si128(row2_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd)); + _mm_maskmoveu_si128(row3_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd2)); + _mm_maskmoveu_si128(row4_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd3)); +} + +/** + ******************************************************************************* + * + * ih264_intra_pred_luma_4x4_mode_dc_ssse3 + * + * @brief + * Perform Intra prediction for luma_4x4 mode:DC + * + * @par Description: + * Perform Intra prediction for luma_4x4 mode:DC ,described in sec 8.3.1.2.3 + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ngbr_avail + * availability of neighbouring pixels + * + * @returns + * + * @remarks + * None + * + *******************************************************************************/ +void ih264_intra_pred_luma_4x4_mode_dc_ssse3(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + UWORD8 u1_useleft; /* availability of left predictors (only for DC) */ + UWORD8 u1_usetop; /* availability of top predictors (only for DC) */ + UWORD8 *pu1_left, *pu1_top; + WORD32 dc_val, flag; + WORD32 dst_strd2, dst_strd3; + + __m128i mask_full_128b, mask_low_32b; + __m128i dcval_16x8b; + + UNUSED(src_strd); + UNUSED(ngbr_avail); + + mask_full_128b = _mm_set1_epi8(0xff); + + u1_useleft = BOOLEAN(ngbr_avail & LEFT_MB_AVAILABLE_MASK); + u1_usetop = BOOLEAN(ngbr_avail & TOP_MB_AVAILABLE_MASK); + + pu1_left = pu1_src + BLK_SIZE - 1; + pu1_top = pu1_src + BLK_SIZE + 1; + + mask_low_32b = _mm_srli_si128(mask_full_128b, 12); + + flag = u1_useleft + u1_usetop; + + if(flag) + { + WORD32 shft, ofst = 0; + + __m128i left_16x8b, top_16x8b, val_16x8b, tmp_8x16b, zero_vector; + + if(u1_useleft) + { + left_16x8b = _mm_loadl_epi64((__m128i *)(pu1_left - 3)); + ofst += 2; + } + else + left_16x8b = _mm_setzero_si128(); + + zero_vector = _mm_setzero_si128(); + + if(u1_usetop) + { + top_16x8b = _mm_loadl_epi64((__m128i *)pu1_top); + ofst += 2; + } + else + top_16x8b = _mm_setzero_si128(); + + shft = flag + 1; + val_16x8b = _mm_unpacklo_epi32(left_16x8b, top_16x8b); + tmp_8x16b = _mm_sad_epu8(val_16x8b, zero_vector); + + dc_val = _mm_extract_epi16(tmp_8x16b, 0); + dc_val = (dc_val + ofst) >> shft; + } + else + dc_val = 128; + + dst_strd2 = dst_strd << 1; + dst_strd3 = dst_strd + dst_strd2; + + dcval_16x8b = _mm_set1_epi8(dc_val); + + _mm_maskmoveu_si128(dcval_16x8b, mask_low_32b, (char*)pu1_dst); + _mm_maskmoveu_si128(dcval_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd)); + _mm_maskmoveu_si128(dcval_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd2)); + _mm_maskmoveu_si128(dcval_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd3)); +} + +/** + ******************************************************************************* + * + * ih264_intra_pred_luma_4x4_mode_diag_dl_ssse3 + * + * @brief + * Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Left + * + * @par Description: + * Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Left ,described in sec 8.3.1.2.4 + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ngbr_avail + * availability of neighbouring pixels(Not used in this function) + * + * @returns + * + * @remarks + * None + * + *******************************************************************************/ +void ih264_intra_pred_luma_4x4_mode_diag_dl_ssse3(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + UWORD8 *pu1_top; + WORD32 dst_strd2, dst_strd3; + + __m128i top_16x8b, top_8x16b, top_sh_8x16b; + __m128i res1_8x16b, res2_8x16b, res_16x8b; + __m128i zero_vector, const_2_8x16b; + __m128i mask_full_128b, mask_low_32b; + + UNUSED(src_strd); + UNUSED(ngbr_avail); + + pu1_top = pu1_src + BLK_SIZE + 1; + + top_16x8b = _mm_loadl_epi64((__m128i *)pu1_top); + zero_vector = _mm_setzero_si128(); + top_8x16b = _mm_unpacklo_epi8(top_16x8b, zero_vector); //t0 t1 t2 t3 t4 t5 t6 t7 + + mask_full_128b = _mm_set1_epi8(0xff); + top_sh_8x16b = _mm_srli_si128(top_8x16b, 2); //t1 t2 t3 t4 t5 t6 t7 0 + const_2_8x16b = _mm_set1_epi16(2); + + top_sh_8x16b = _mm_shufflehi_epi16(top_sh_8x16b, 0xa4); //t1 t2 t3 t4 t5 t6 t7 t7 + res1_8x16b = _mm_add_epi16(top_8x16b, top_sh_8x16b); + mask_low_32b = _mm_srli_si128(mask_full_128b, 12); + res2_8x16b = _mm_srli_si128(res1_8x16b, 2); + + res1_8x16b = _mm_add_epi16(res1_8x16b, const_2_8x16b); + res1_8x16b = _mm_add_epi16(res2_8x16b, res1_8x16b); + res1_8x16b = _mm_srai_epi16(res1_8x16b, 2); + + dst_strd2 = dst_strd << 1; + dst_strd3 = dst_strd + dst_strd2; + + res_16x8b = _mm_packus_epi16(res1_8x16b, res1_8x16b); + _mm_maskmoveu_si128(res_16x8b, mask_low_32b, (char*)pu1_dst); + res_16x8b = _mm_srli_si128(res_16x8b, 1); + _mm_maskmoveu_si128(res_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd)); + res_16x8b = _mm_srli_si128(res_16x8b, 1); + _mm_maskmoveu_si128(res_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd2)); + res_16x8b = _mm_srli_si128(res_16x8b, 1); + _mm_maskmoveu_si128(res_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd3)); +} + +/** + ******************************************************************************* + * + * ih264_intra_pred_luma_4x4_mode_diag_dr_ssse3 + * + * @brief + * Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Right + * + * @par Description: + * Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Right ,described in sec 8.3.1.2.5 + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ngbr_avail + * availability of neighbouring pixels(Not used in this function) + * + * @returns + * + * @remarks + * None + * + *******************************************************************************/ +void ih264_intra_pred_luma_4x4_mode_diag_dr_ssse3(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + UWORD8 *pu1_left; + WORD32 dst_strd2, dst_strd3; + + __m128i top_left_16x8b, top_left_8x16b; + __m128i top_left_sh_16x8b, top_left_sh_8x16b; + __m128i res1_8x16b, res2_8x16b; + __m128i res1_16x8b, res2_16x8b; + __m128i zero_vector, const_2_8x16b; + __m128i mask_full_128b, mask_low_32b; + + UNUSED(src_strd); + UNUSED(ngbr_avail); + + pu1_left = pu1_src + BLK_SIZE - 1; + + top_left_16x8b = _mm_loadu_si128((__m128i *)(pu1_left - 3)); //l3 l2 l1 l0 tl t0 t1 t2... + zero_vector = _mm_setzero_si128(); + top_left_sh_16x8b = _mm_srli_si128(top_left_16x8b, 1); //l2 l1 l0 tl t0 t1 t2 t3... + + top_left_8x16b = _mm_unpacklo_epi8(top_left_16x8b, zero_vector); + top_left_sh_8x16b = _mm_unpacklo_epi8(top_left_sh_16x8b, zero_vector); + + mask_full_128b = _mm_set1_epi8(0xff); + res1_8x16b = _mm_add_epi16(top_left_8x16b, top_left_sh_8x16b); //l3+l2 l2+l1 l1+l0 l0+tl tl+t0 t0+t1 t1+t2 t2+t3... + const_2_8x16b = _mm_set1_epi16(2); + res2_8x16b = _mm_srli_si128(res1_8x16b, 2); //l2+l1 l1+l0 l0+tl tl+t0 t0+t1 t1+t2 t2+t3... + + res1_8x16b = _mm_add_epi16(res1_8x16b, const_2_8x16b); + mask_low_32b = _mm_srli_si128(mask_full_128b, 12); + res1_8x16b = _mm_add_epi16(res2_8x16b, res1_8x16b); //l3+2*l2+l1+2 l2+2*l1+l0+2... + res1_8x16b = _mm_srai_epi16(res1_8x16b, 2); + res1_16x8b = _mm_packus_epi16(res1_8x16b, res1_8x16b); + + dst_strd2 = dst_strd << 1; + dst_strd3 = dst_strd + dst_strd2; + + res2_16x8b = _mm_srli_si128(res1_16x8b, 3); + _mm_maskmoveu_si128(res2_16x8b, mask_low_32b, (char*)pu1_dst); + res2_16x8b = _mm_srli_si128(res1_16x8b, 2); + _mm_maskmoveu_si128(res2_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd)); + res2_16x8b = _mm_srli_si128(res1_16x8b, 1); + _mm_maskmoveu_si128(res2_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd2)); + _mm_maskmoveu_si128(res1_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd3)); +} + +/** + ******************************************************************************* + * + * ih264_intra_pred_luma_4x4_mode_vert_r_ssse3 + * + * @brief + * Perform Intra prediction for luma_4x4 mode:Vertical_Right + * + * @par Description: + * Perform Intra prediction for luma_4x4 mode:Vertical_Right ,described in sec 8.3.1.2.6 + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ngbr_avail + * availability of neighbouring pixels(Not used in this function) + * + * @returns + * + * @remarks + * None + * + *******************************************************************************/ +void ih264_intra_pred_luma_4x4_mode_vert_r_ssse3(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + UWORD8 *pu1_left; + WORD32 dst_strd2, dst_strd3; + + __m128i val_16x8b, temp_16x8b; + __m128i w11_a1_16x8b, w11_a2_16x8b; + __m128i w121_a1_8x16b, w121_a2_8x16b, w121_sh_8x16b; + __m128i row1_16x8b, row2_16x8b, row3_16x8b, row4_16x8b; + __m128i zero_vector, const_2_8x16b; + __m128i mask_full_128b, mask_low_32b; + + UNUSED(src_strd); + UNUSED(ngbr_avail); + + mask_full_128b = _mm_set1_epi8(0xff); + mask_low_32b = _mm_srli_si128(mask_full_128b, 12); + + pu1_left = pu1_src + BLK_SIZE - 1; + + val_16x8b = _mm_loadl_epi64((__m128i *)(pu1_left - 2)); + zero_vector = _mm_setzero_si128(); + + w121_a1_8x16b = _mm_unpacklo_epi8(val_16x8b, zero_vector); //l2 l1 l0 tl t0 t1 t2 t3 + w11_a1_16x8b = _mm_srli_si128(val_16x8b, 3); + w121_a2_8x16b = _mm_srli_si128(w121_a1_8x16b, 2); //l1 l0 tl t0 t1 t2 t3 0 + w11_a2_16x8b = _mm_srli_si128(val_16x8b, 4); + + w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, w121_a2_8x16b); //l2+l1 l1+l0 l0+tl tl+t0 t0+t1 t1+t2 t2+t3 t3 + row1_16x8b = _mm_avg_epu8(w11_a1_16x8b, w11_a2_16x8b); + w121_a2_8x16b = _mm_srli_si128(w121_a1_8x16b, 2); //l1+l0 l0+tl tl+t0 t0+t1 t1+t2 t2+t3 t3 0 + + const_2_8x16b = _mm_set1_epi16(2); + w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, w121_a2_8x16b); //l2+2*l1+l0 l1+2*l0+tl ... + w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, const_2_8x16b); + w121_a1_8x16b = _mm_srai_epi16(w121_a1_8x16b, 2); + + w121_sh_8x16b = _mm_shufflelo_epi16(w121_a1_8x16b, 0xe1); + w121_sh_8x16b = _mm_srli_si128(w121_sh_8x16b, 2); + + row4_16x8b = _mm_packus_epi16(w121_sh_8x16b, w121_sh_8x16b); + temp_16x8b = _mm_slli_si128(w121_a1_8x16b, 13); + row2_16x8b = _mm_srli_si128(row4_16x8b, 1); + row3_16x8b = _mm_alignr_epi8(row1_16x8b, temp_16x8b, 15); + + dst_strd2 = dst_strd << 1; + dst_strd3 = dst_strd + dst_strd2; + + _mm_maskmoveu_si128(row1_16x8b, mask_low_32b, (char*)pu1_dst); + _mm_maskmoveu_si128(row2_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd)); + _mm_maskmoveu_si128(row3_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd2)); + _mm_maskmoveu_si128(row4_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd3)); +} + +/* + ******************************************************************************* + * + * ih264_intra_pred_luma_4x4_mode_horz_d_ssse3 + * + * @brief + * Perform Intra prediction for luma_4x4 mode:Horizontal_Down + * + * @par Description: + * Perform Intra prediction for luma_4x4 mode:Horizontal_Down ,described in sec 8.3.1.2.7 + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ngbr_avail + * availability of neighbouring pixels(Not used in this function) + * + * @returns + * + * @remarks + * None + * + *******************************************************************************/ +void ih264_intra_pred_luma_4x4_mode_horz_d_ssse3(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + UWORD8 *pu1_left; + WORD32 dst_strd2, dst_strd3; + WORD32 val_121_t0t1; + + __m128i val_16x8b, val_sh_16x8b; + __m128i w11_16x8b; + __m128i w121_a1_8x16b, w121_a2_8x16b, w121_16x8b; + __m128i row1_16x8b, row2_16x8b, row3_16x8b, row4_16x8b; + + __m128i zero_vector, const_2_8x16b; + __m128i mask_full_128b, mask_low_32b; + + UNUSED(src_strd); + UNUSED(ngbr_avail); + + mask_full_128b = _mm_set1_epi8(0xff); + mask_low_32b = _mm_srli_si128(mask_full_128b, 12); + + pu1_left = pu1_src + BLK_SIZE - 1; + + val_16x8b = _mm_loadl_epi64((__m128i *)(pu1_left - 3)); + zero_vector = _mm_setzero_si128(); + val_sh_16x8b = _mm_srli_si128(val_16x8b, 1); + w11_16x8b = _mm_avg_epu8(val_16x8b, val_sh_16x8b); + + w121_a1_8x16b = _mm_unpacklo_epi8(val_16x8b, zero_vector); //l3 l2 l1 l0 tl t0 t1 t2 + w121_a2_8x16b = _mm_srli_si128(w121_a1_8x16b, 2); //l2 l1 l0 tl t0 t1 t2 0 + w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, w121_a2_8x16b); //l3+l2 l2+l1 l1+l0 l0+tl tl+t0 t0+t1 t1+t2 t2 + w121_a2_8x16b = _mm_srli_si128(w121_a1_8x16b, 2); //l2+l1 l1+l0 l0+tl tl+t0 t0+t1 t1+t2 t2 0 + + zero_vector = _mm_setzero_si128(); + const_2_8x16b = _mm_set1_epi16(2); + + w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, w121_a2_8x16b); //l3+2*l2+l1 l2+2*l1+l0 l1+2*l0+tl ... + w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, const_2_8x16b); + w121_a1_8x16b = _mm_srai_epi16(w121_a1_8x16b, 2); + + w121_16x8b = _mm_packus_epi16(w121_a1_8x16b, w121_a1_8x16b); + + row4_16x8b = _mm_unpacklo_epi8(w11_16x8b, w121_16x8b); + val_121_t0t1 = _mm_extract_epi16(w121_16x8b, 2); + row4_16x8b = _mm_insert_epi16(row4_16x8b, val_121_t0t1, 4); + + dst_strd2 = dst_strd << 1; + dst_strd3 = dst_strd + dst_strd2; + + row1_16x8b = _mm_srli_si128(row4_16x8b, 6); + row2_16x8b = _mm_srli_si128(row4_16x8b, 4); + row3_16x8b = _mm_srli_si128(row4_16x8b, 2); + + _mm_maskmoveu_si128(row1_16x8b, mask_low_32b, (char*)pu1_dst); + _mm_maskmoveu_si128(row2_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd)); + _mm_maskmoveu_si128(row3_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd2)); + _mm_maskmoveu_si128(row4_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd3)); +} + +/** + ******************************************************************************* + * + * ih264_intra_pred_luma_4x4_mode_vert_l_ssse3 + * + * @brief + * Perform Intra prediction for luma_4x4 mode:Vertical_Left + * + * @par Description: + * Perform Intra prediction for luma_4x4 mode:Vertical_Left ,described in sec 8.3.1.2.8 + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ngbr_avail + * availability of neighbouring pixels(Not used in this function) + * + * @returns + * + * @remarks + * None + * + *******************************************************************************/ +void ih264_intra_pred_luma_4x4_mode_vert_l_ssse3(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + UWORD8 *pu1_top; + WORD32 dst_strd2, dst_strd3; + + __m128i val_16x8b, val_sh_16x8b; + __m128i w121_a1_8x16b, w121_a2_8x16b; + __m128i row1_16x8b, row2_16x8b, row3_16x8b, row4_16x8b; + + __m128i zero_vector, const_2_8x16b; + __m128i mask_full_128b, mask_low_32b; + + UNUSED(src_strd); + UNUSED(ngbr_avail); + + mask_full_128b = _mm_set1_epi8(0xff); + mask_low_32b = _mm_srli_si128(mask_full_128b, 12); + + pu1_top = pu1_src +BLK_SIZE + 1; + + val_16x8b = _mm_loadl_epi64((__m128i *)pu1_top); + zero_vector = _mm_setzero_si128(); + val_sh_16x8b = _mm_srli_si128(val_16x8b, 1); + row1_16x8b = _mm_avg_epu8(val_16x8b, val_sh_16x8b); + + w121_a1_8x16b = _mm_unpacklo_epi8(val_16x8b, zero_vector); //t0 t1 t2 t3 t4 t5... + w121_a2_8x16b = _mm_srli_si128(w121_a1_8x16b, 2); //t1 t2 t3 t4 t5 t6... + w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, w121_a2_8x16b); //t0+t1 t1+t2 t2+t3 t3+t4 t4+t5... + w121_a2_8x16b = _mm_srli_si128(w121_a1_8x16b, 2); //t1+t2 t2+t3 t3+t4 t4+t5 t5+t6... + + zero_vector = _mm_setzero_si128(); + const_2_8x16b = _mm_set1_epi16(2); + + w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, w121_a2_8x16b); //t0+2*t1+t2 t1+2*t2+t3 t2+2*t3+t4... + w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, const_2_8x16b); + w121_a1_8x16b = _mm_srai_epi16(w121_a1_8x16b, 2); + + row2_16x8b = _mm_packus_epi16(w121_a1_8x16b, w121_a1_8x16b); + + dst_strd2 = dst_strd << 1; + dst_strd3 = dst_strd + dst_strd2; + + row3_16x8b = _mm_srli_si128(row1_16x8b, 1); + row4_16x8b = _mm_srli_si128(row2_16x8b, 1); + + _mm_maskmoveu_si128(row1_16x8b, mask_low_32b, (char*)pu1_dst); + _mm_maskmoveu_si128(row2_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd)); + _mm_maskmoveu_si128(row3_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd2)); + _mm_maskmoveu_si128(row4_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd3)); +} + +/** + ******************************************************************************* + * + * ih264_intra_pred_luma_4x4_mode_horz_u_ssse3 + * + * @brief + * Perform Intra prediction for luma_4x4 mode:Horizontal_Up + * + * @par Description: + * Perform Intra prediction for luma_4x4 mode:Horizontal_Up ,described in sec 8.3.1.2.9 + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ngbr_avail + * availability of neighbouring pixels(Not used in this function) + * + * @returns + * + * @remarks + * None + * + *******************************************************************************/ +void ih264_intra_pred_luma_4x4_mode_horz_u_ssse3(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + UWORD8 *pu1_left; + WORD32 dst_strd2, dst_strd3; + + __m128i val_16x8b, val_sh_16x8b; + __m128i w11_16x8b; + __m128i w121_a1_8x16b, w121_a2_8x16b, w121_16x8b; + __m128i row1_16x8b, row2_16x8b, row3_16x8b, row4_16x8b; + + __m128i zero_vector, const_2_8x16b, rev_16x8b; + __m128i mask_full_128b, mask_low_32b; + + UNUSED(src_strd); + UNUSED(ngbr_avail); + + mask_full_128b = _mm_set1_epi8(0xff); + mask_low_32b = _mm_srli_si128(mask_full_128b, 12); + + pu1_left = pu1_src + BLK_SIZE - 1; + + zero_vector = _mm_setzero_si128(); + rev_16x8b = _mm_setr_epi8(3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + + val_16x8b = _mm_loadl_epi64((__m128i *)(pu1_left - 3)); //l3 l2 l1 l0 0 0 0... + val_16x8b = _mm_shuffle_epi8(val_16x8b, rev_16x8b); //l0 l1 l2 l3 l3 l3 l3... + + val_sh_16x8b = _mm_srli_si128(val_16x8b, 1); + w11_16x8b = _mm_avg_epu8(val_16x8b, val_sh_16x8b); + + w121_a1_8x16b = _mm_unpacklo_epi8(val_16x8b, zero_vector); //l0 l1 l2 l3 l3 l3... + w121_a2_8x16b = _mm_srli_si128(w121_a1_8x16b, 2); //l1 l2 l3 l3 l3 l3... + + w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, w121_a2_8x16b); //l0+t1 l1+l2 l2+l3 2*l3 2*l3... + w121_a2_8x16b = _mm_srli_si128(w121_a1_8x16b, 2); //l1+t2 l2+l3 2*l3 2*l3 2*l3... + + zero_vector = _mm_setzero_si128(); + const_2_8x16b = _mm_set1_epi16(2); + + w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, w121_a2_8x16b); //l0+2*l1+l2 l1+2*l2+l3 l2+3*l3 4*l3 4*l3... + w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, const_2_8x16b); + w121_a1_8x16b = _mm_srai_epi16(w121_a1_8x16b, 2); + + w121_16x8b = _mm_packus_epi16(w121_a1_8x16b, w121_a1_8x16b); + + dst_strd2 = dst_strd << 1; + dst_strd3 = dst_strd + dst_strd2; + + row1_16x8b = _mm_unpacklo_epi8(w11_16x8b, w121_16x8b); + row2_16x8b = _mm_srli_si128(row1_16x8b, 2); + row3_16x8b = _mm_srli_si128(row1_16x8b, 4); + row4_16x8b = _mm_srli_si128(row1_16x8b, 6); + + _mm_maskmoveu_si128(row1_16x8b, mask_low_32b, (char*)pu1_dst); + _mm_maskmoveu_si128(row2_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd)); + _mm_maskmoveu_si128(row3_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd2)); + _mm_maskmoveu_si128(row4_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd3)); +} + +/******************* 8x8 Modes *******************/ + +/** + ******************************************************************************* + * + * ih264_intra_pred_luma_8x8_mode_vert_ssse3 + * + * @brief + * Perform Intra prediction for luma_8x8 mode:vertical + * + * @par Description: + * Perform Intra prediction for luma_8x8 mode:vertical ,described in sec 8.3.2.2.2 + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ngbr_avail + * availability of neighbouring pixels(Not used in this function) + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +void ih264_intra_pred_luma_8x8_mode_vert_ssse3(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + UWORD8 *pu1_top = NULL; + __m128i top_8x8b; + UNUSED(src_strd); + UNUSED(ngbr_avail); + pu1_top = pu1_src + BLK8x8SIZE + 1; + + top_8x8b = _mm_loadl_epi64((__m128i *)pu1_top); + + _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), top_8x8b); + _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), top_8x8b); + _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), top_8x8b); + _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), top_8x8b); + _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), top_8x8b); + _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), top_8x8b); + _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), top_8x8b); + _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), top_8x8b); +} + +/** + ******************************************************************************* + * + * ih264_intra_pred_luma_8x8_mode_horz_ssse3 + * + * @brief + * Perform Intra prediction for luma_8x8 mode:horizontal + * + * @par Description: + * Perform Intra prediction for uma_8x8 mode:horizontal ,described in sec 8.3.2.2.2 + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ngbr_avail + * availability of neighbouring pixels(Not used in this function) + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +void ih264_intra_pred_luma_8x8_mode_horz_ssse3(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + UWORD8 *pu1_left = pu1_src + BLK8x8SIZE - 1; + __m128i row1_8x8b, row2_8x8b, row3_8x8b, row4_8x8b; + __m128i row5_8x8b, row6_8x8b, row7_8x8b, row8_8x8b; + + UNUSED(src_strd); + UNUSED(ngbr_avail); + + row1_8x8b = _mm_set1_epi8(pu1_left[0]); + row2_8x8b = _mm_set1_epi8(pu1_left[-1]); + row3_8x8b = _mm_set1_epi8(pu1_left[-2]); + row4_8x8b = _mm_set1_epi8(pu1_left[-3]); + row5_8x8b = _mm_set1_epi8(pu1_left[-4]); + row6_8x8b = _mm_set1_epi8(pu1_left[-5]); + row7_8x8b = _mm_set1_epi8(pu1_left[-6]); + row8_8x8b = _mm_set1_epi8(pu1_left[-7]); + + _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), row1_8x8b); + _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), row2_8x8b); + _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), row3_8x8b); + _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), row4_8x8b); + _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), row5_8x8b); + _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), row6_8x8b); + _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), row7_8x8b); + _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), row8_8x8b); +} + +/** + ******************************************************************************* + * + * ih264_intra_pred_luma_8x8_mode_dc_ssse3 + * + * @brief + * Perform Intra prediction for luma_8x8 mode:DC + * + * @par Description: + * Perform Intra prediction for luma_8x8 mode:DC ,described in sec 8.3.2.2.4 + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ngbr_avail + * availability of neighbouring pixels + * + * @returns + * + * @remarks + * None + * + *******************************************************************************/ +void ih264_intra_pred_luma_8x8_mode_dc_ssse3(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + UWORD8 u1_useleft; /* availability of left predictors (only for DC) */ + UWORD8 u1_usetop; /* availability of top predictors (only for DC) */ + UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */ + UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */ + __m128i dc_val_8x8b; + WORD32 dc_val = 0; + UNUSED(src_strd); + + u1_useleft = BOOLEAN(ngbr_avail & LEFT_MB_AVAILABLE_MASK); + u1_usetop = BOOLEAN(ngbr_avail & TOP_MB_AVAILABLE_MASK); + pu1_top = pu1_src + BLK8x8SIZE + 1; + pu1_left = pu1_src + BLK8x8SIZE - 1; + + if(u1_useleft || u1_usetop) + { + WORD32 shft = 2; + __m128i val_8x8b, zero_8x8b, sum_8x16b; + + zero_8x8b = _mm_setzero_si128(); + + if(u1_useleft) + { + val_8x8b = _mm_loadl_epi64((__m128i *)(pu1_left - 7)); + sum_8x16b = _mm_sad_epu8(zero_8x8b, val_8x8b); + + shft++; + dc_val += 4; + dc_val += _mm_extract_epi16(sum_8x16b, 0); + } + if(u1_usetop) + { + val_8x8b = _mm_loadl_epi64((__m128i *)pu1_top); + sum_8x16b = _mm_sad_epu8(zero_8x8b, val_8x8b); + + shft++; + dc_val += 4; + dc_val += _mm_extract_epi16(sum_8x16b, 0); + } + dc_val = dc_val >> shft; + } + else + dc_val = 128; + + dc_val_8x8b = _mm_set1_epi8(dc_val); + + _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), dc_val_8x8b); + _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), dc_val_8x8b); + _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), dc_val_8x8b); + _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), dc_val_8x8b); + _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), dc_val_8x8b); + _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), dc_val_8x8b); + _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), dc_val_8x8b); + _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), dc_val_8x8b); +} + +/** + ******************************************************************************* + * + * ih264_intra_pred_luma_8x8_mode_diag_dl_ssse3 + * + * @brief + * Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Left + * + * @par Description: + * Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Left ,described in sec 8.3.2.2.5 + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ngbr_avail + * availability of neighbouring pixels(Not used in this function) + * + * @returns + * + * @remarks + * None + * + *******************************************************************************/ +void ih264_intra_pred_luma_8x8_mode_diag_dl_ssse3(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */ + __m128i top_16x8; + __m128i out_15x16; + __m128i a0_8x16, a1_8x16, a2_8x16; + __m128i temp1, temp2; + __m128i res1_8x16, res2_8x16; + __m128i zero = _mm_setzero_si128(); + __m128i const_val2_8x16 = _mm_set1_epi16(2); + + UNUSED(src_strd); + UNUSED(ngbr_avail); + + pu1_top = pu1_src + BLK8x8SIZE + 1; + + top_16x8 = _mm_loadu_si128((__m128i *)(pu1_top)); + + temp1 = _mm_srli_si128(top_16x8, 1); + temp2 = _mm_srli_si128(top_16x8, 2); + a0_8x16 = _mm_unpacklo_epi8(top_16x8, zero); + a1_8x16 = _mm_unpacklo_epi8(temp1, zero); + a2_8x16 = _mm_unpacklo_epi8(temp2, zero); + + a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16); + a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16); + a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16); + a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16); + res1_8x16 = _mm_srai_epi16(a0_8x16, 2); + + temp2 = _mm_srli_si128(top_16x8, 2); + temp1 = _mm_srli_si128(top_16x8, 1); + a2_8x16 = _mm_unpackhi_epi8(temp2, zero); + a0_8x16 = _mm_unpackhi_epi8(top_16x8, zero); + a2_8x16 = _mm_shufflehi_epi16(a2_8x16, 0x14); + a1_8x16 = _mm_unpackhi_epi8(temp1, zero); + + a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16); + a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16); + a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16); + a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16); + res2_8x16 = _mm_srai_epi16(a0_8x16, 2); + + out_15x16 = _mm_packus_epi16(res1_8x16, res2_8x16); + + _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), out_15x16); + out_15x16 = _mm_srli_si128(out_15x16, 1); + _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), out_15x16); + out_15x16 = _mm_srli_si128(out_15x16, 1); + _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), out_15x16); + out_15x16 = _mm_srli_si128(out_15x16, 1); + _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), out_15x16); + out_15x16 = _mm_srli_si128(out_15x16, 1); + _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), out_15x16); + out_15x16 = _mm_srli_si128(out_15x16, 1); + _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), out_15x16); + out_15x16 = _mm_srli_si128(out_15x16, 1); + _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), out_15x16); + out_15x16 = _mm_srli_si128(out_15x16, 1); + _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), out_15x16); +} + +/** + ******************************************************************************* + * + * ih264_intra_pred_luma_8x8_mode_diag_dr_ssse3 + * + * @brief + * Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Right + * + * @par Description: + * Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Right ,described in sec 8.3.2.2.6 + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ngbr_avail + * availability of neighbouring pixels(Not used in this function) + * + * @returns + * + * @remarks + * None + * + *******************************************************************************/ +void ih264_intra_pred_luma_8x8_mode_diag_dr_ssse3(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */ + UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */ + __m128i top_8x8, left_16x8; + __m128i out_15x16; + __m128i a0_8x16, a1_8x16, a2_8x16; + __m128i temp1, temp2; + __m128i res1_8x16, res2_8x16; + __m128i zero = _mm_setzero_si128(); + __m128i const_val2_8x16 = _mm_set1_epi16(2); + __m128i str_8x8; + + UNUSED(src_strd); + UNUSED(ngbr_avail); + + pu1_left = pu1_src + BLK8x8SIZE - 1; + pu1_top = pu1_src + BLK8x8SIZE + 1; + + left_16x8 = _mm_loadu_si128((__m128i *)(pu1_left - 7)); + + temp1 = _mm_srli_si128(left_16x8, 1); + temp2 = _mm_srli_si128(left_16x8, 2); + a0_8x16 = _mm_unpacklo_epi8(left_16x8, zero); + a1_8x16 = _mm_unpacklo_epi8(temp1, zero); + a2_8x16 = _mm_unpacklo_epi8(temp2, zero); + + a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16); + a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16); + a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16); + a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16); + res1_8x16 = _mm_srai_epi16(a0_8x16, 2); + + top_8x8 = _mm_loadu_si128((__m128i *)(pu1_top - 1)); + + temp1 = _mm_srli_si128(top_8x8, 1); + temp2 = _mm_srli_si128(top_8x8, 2); + a0_8x16 = _mm_unpacklo_epi8(top_8x8, zero); + a1_8x16 = _mm_unpacklo_epi8(temp1, zero); + a2_8x16 = _mm_unpacklo_epi8(temp2, zero); + + a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16); + a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16); + a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16); + a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16); + res2_8x16 = _mm_srai_epi16(a0_8x16, 2); + + out_15x16 = _mm_packus_epi16(res1_8x16, res2_8x16); + + str_8x8 = _mm_srli_si128(out_15x16, 7); + _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), str_8x8); + str_8x8 = _mm_srli_si128(out_15x16, 6); + _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), str_8x8); + str_8x8 = _mm_srli_si128(out_15x16, 5); + _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), str_8x8); + str_8x8 = _mm_srli_si128(out_15x16, 4); + _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), str_8x8); + str_8x8 = _mm_srli_si128(out_15x16, 3); + _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), str_8x8); + str_8x8 = _mm_srli_si128(out_15x16, 2); + _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), str_8x8); + str_8x8 = _mm_srli_si128(out_15x16, 1); + _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), str_8x8); + _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), out_15x16); +} + +/** + ******************************************************************************* + * + * ih264_intra_pred_luma_8x8_mode_vert_r_ssse3 + * + * @brief + * Perform Intra prediction for luma_8x8 mode:Vertical_Right + * + * @par Description: + * Perform Intra prediction for luma_8x8 mode:Vertical_Right ,described in sec 8.3.2.2.7 + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ngbr_avail + * availability of neighbouring pixels(Not used in this function) + * + * @returns + * + * @remarks + * None + * + *******************************************************************************/ +void ih264_intra_pred_luma_8x8_mode_vert_r_ssse3(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */ + UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */ + __m128i top_8x8, left_16x8; + __m128i out1_16x16, out2_16x16; + __m128i a0_8x16, a1_8x16, a2_8x16; + __m128i temp1, temp2; + __m128i res1_8x16, res2_8x16, res3_8x16; + __m128i zero = _mm_setzero_si128(); + __m128i const_val2_8x16 = _mm_set1_epi16(2); + __m128i str_8x8; + __m128i mask = _mm_set1_epi32(0xFFFF); + + UNUSED(src_strd); + UNUSED(ngbr_avail); + + pu1_left = pu1_src + BLK8x8SIZE - 1; + pu1_top = pu1_src + BLK8x8SIZE + 1; + + left_16x8 = _mm_loadu_si128((__m128i *)(pu1_left - 6)); + + temp1 = _mm_srli_si128(left_16x8, 1); + temp2 = _mm_srli_si128(left_16x8, 2); + a0_8x16 = _mm_unpacklo_epi8(left_16x8, zero); + a1_8x16 = _mm_unpacklo_epi8(temp1, zero); + a2_8x16 = _mm_unpacklo_epi8(temp2, zero); + + a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16); + a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16); + a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16); + a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16); + res1_8x16 = _mm_srai_epi16(a0_8x16, 2); + + top_8x8 = _mm_loadu_si128((__m128i *)(pu1_top - 1)); + + temp1 = _mm_srli_si128(top_8x8, 1); + temp2 = _mm_srli_si128(top_8x8, 2); + a0_8x16 = _mm_unpacklo_epi8(top_8x8, zero); + a1_8x16 = _mm_unpacklo_epi8(temp1, zero); + a2_8x16 = _mm_unpacklo_epi8(temp2, zero); + + res3_8x16 = _mm_avg_epu16(a0_8x16, a1_8x16); + + a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16); + a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16); + a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16); + a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16); + res2_8x16 = _mm_srai_epi16(a0_8x16, 2); + + str_8x8 = _mm_packus_epi16(res3_8x16, zero); + _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), str_8x8); + + temp1 = _mm_and_si128(res1_8x16, mask); + temp1 = _mm_packs_epi32(temp1, temp1); + out1_16x16 = _mm_packus_epi16(temp1, res2_8x16); + + res1_8x16 = _mm_slli_si128(res1_8x16, 2); + temp1 = _mm_and_si128(res1_8x16, mask); + temp1 = _mm_packs_epi32(temp1, temp1); + out2_16x16 = _mm_packus_epi16(temp1, res3_8x16); + + str_8x8 = _mm_srli_si128(out1_16x16, 7); + _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), str_8x8); + + str_8x8 = _mm_srli_si128(out2_16x16, 7); + _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), str_8x8); + + str_8x8 = _mm_srli_si128(out1_16x16, 6); + _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), str_8x8); + + str_8x8 = _mm_srli_si128(out2_16x16, 6); + _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), str_8x8); + + str_8x8 = _mm_srli_si128(out1_16x16, 5); + _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), str_8x8); + + str_8x8 = _mm_srli_si128(out2_16x16, 5); + _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), str_8x8); + + str_8x8 = _mm_srli_si128(out1_16x16, 4); + _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), str_8x8); +} + +/* + ******************************************************************************* + * + * ih264_intra_pred_luma_8x8_mode_horz_d_ssse3 + * + * @brief + * Perform Intra prediction for luma_8x8 mode:Horizontal_Down + * + * @par Description: + * Perform Intra prediction for luma_8x8 mode:Horizontal_Down ,described in sec 8.3.2.2.8 + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ngbr_avail + * availability of neighbouring pixels(Not used in this function) + * + * @returns + * + * @remarks + * None + * + *******************************************************************************/ +void ih264_intra_pred_luma_8x8_mode_horz_d_ssse3(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */ + __m128i pels_16x16; + __m128i temp1, temp2, temp3, temp4; + __m128i a0_8x16, a1_8x16, a2_8x16; + __m128i zero = _mm_setzero_si128(); + __m128i const_val2_8x16 = _mm_set1_epi16(2); + __m128i res1_8x16, res2_8x16; + __m128i out1_16x16, out2_16x16; + __m128i str_8x8; + UNUSED(src_strd); + UNUSED(ngbr_avail); + + pu1_left = pu1_src + BLK8x8SIZE - 1; + + pels_16x16 = _mm_loadu_si128((__m128i *)(pu1_left - 7)); + + temp1 = _mm_srli_si128(pels_16x16, 1); + temp2 = _mm_srli_si128(pels_16x16, 2); + a0_8x16 = _mm_unpacklo_epi8(pels_16x16, zero); + a1_8x16 = _mm_unpacklo_epi8(temp1, zero); + a2_8x16 = _mm_unpacklo_epi8(temp2, zero); + + res1_8x16 = _mm_avg_epu16(a0_8x16, a1_8x16); + + a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16); + a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16); + a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16); + a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16); + res2_8x16 = _mm_srai_epi16(a0_8x16, 2); + + temp3 = _mm_unpacklo_epi16(res1_8x16, res2_8x16); + temp4 = _mm_unpackhi_epi16(res1_8x16, res2_8x16); + out2_16x16 = _mm_packus_epi16(temp3, temp4); + + a0_8x16 = _mm_unpackhi_epi8(pels_16x16, zero); + a1_8x16 = _mm_unpackhi_epi8(temp1, zero); + a2_8x16 = _mm_unpackhi_epi8(temp2, zero); + + a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16); + a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16); + a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16); + a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16); + res2_8x16 = _mm_srai_epi16(a0_8x16, 2); + + out1_16x16 = _mm_packus_epi16(res2_8x16, zero); + temp1 = _mm_srli_si128(out2_16x16, 8); + out1_16x16 = _mm_unpacklo_epi64(temp1, out1_16x16); + + str_8x8 = _mm_srli_si128(out1_16x16, 6); + _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), str_8x8); + str_8x8 = _mm_srli_si128(out1_16x16, 4); + _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), str_8x8); + str_8x8 = _mm_srli_si128(out1_16x16, 2); + _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), str_8x8); + _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), out1_16x16); + + str_8x8 = _mm_srli_si128(out2_16x16, 6); + _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), str_8x8); + str_8x8 = _mm_srli_si128(out2_16x16, 4); + _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), str_8x8); + str_8x8 = _mm_srli_si128(out2_16x16, 2); + _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), str_8x8); + _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), out2_16x16); +} + +/** + ******************************************************************************* + * + * ih264_intra_pred_luma_8x8_mode_vert_l_ssse3 + * + * @brief + * Perform Intra prediction for luma_8x8 mode:Vertical_Left + * + * @par Description: + * Perform Intra prediction for luma_8x8 mode:Vertical_Left ,described in sec 8.3.2.2.9 + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ngbr_avail + * availability of neighbouring pixels(Not used in this function) + * + * @returns + * + * @remarks + * None + * + *******************************************************************************/ + +void ih264_intra_pred_luma_8x8_mode_vert_l_ssse3(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */ + __m128i top_16x16; + __m128i temp1, temp2; + __m128i a0_8x16, a1_8x16, a2_8x16; + __m128i zero = _mm_setzero_si128(); + __m128i const_val2_8x16 = _mm_set1_epi16(2); + __m128i res1_8x16, res2_8x16, res3_8x16, res4_8x16; + __m128i out1_16x16, out2_16x16; + UNUSED(src_strd); + UNUSED(ngbr_avail); + pu1_top = pu1_src + BLK8x8SIZE + 1; + + top_16x16 = _mm_loadu_si128((__m128i *)(pu1_top)); + temp1 = _mm_srli_si128(top_16x16, 1); + temp2 = _mm_srli_si128(top_16x16, 2); + a0_8x16 = _mm_unpacklo_epi8(top_16x16, zero); + a1_8x16 = _mm_unpacklo_epi8(temp1, zero); + a2_8x16 = _mm_unpacklo_epi8(temp2, zero); + + res1_8x16 = _mm_avg_epu16(a0_8x16, a1_8x16); + + a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16); + a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16); + a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16); + a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16); + res2_8x16 = _mm_srai_epi16(a0_8x16, 2); + + a0_8x16 = _mm_unpackhi_epi8(top_16x16, zero); + a1_8x16 = _mm_unpackhi_epi8(temp1, zero); + a2_8x16 = _mm_unpackhi_epi8(temp2, zero); + + res3_8x16 = _mm_avg_epu16(a0_8x16, a1_8x16); + + a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16); + a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16); + a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16); + a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16); + res4_8x16 = _mm_srai_epi16(a0_8x16, 2); + + out1_16x16 = _mm_packus_epi16(res1_8x16, res3_8x16); + out2_16x16 = _mm_packus_epi16(res2_8x16, res4_8x16); + + _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), out1_16x16); + _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), out2_16x16); + out1_16x16 = _mm_srli_si128(out1_16x16, 1); + out2_16x16 = _mm_srli_si128(out2_16x16, 1); + _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), out1_16x16); + _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), out2_16x16); + out1_16x16 = _mm_srli_si128(out1_16x16, 1); + out2_16x16 = _mm_srli_si128(out2_16x16, 1); + _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), out1_16x16); + _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), out2_16x16); + out1_16x16 = _mm_srli_si128(out1_16x16, 1); + out2_16x16 = _mm_srli_si128(out2_16x16, 1); + _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), out1_16x16); + _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), out2_16x16); +} + +/** + ******************************************************************************* + * + * ih264_intra_pred_luma_8x8_mode_horz_u_ssse3 + * + * @brief + * Perform Intra prediction for luma_8x8 mode:Horizontal_Up + * + * @par Description: + * Perform Intra prediction for luma_8x8 mode:Horizontal_Up ,described in sec 8.3.2.2.10 + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ngbr_avail + * availability of neighbouring pixels(Not used in this function) + * + * @returns + * + * @remarks + * None + * + *******************************************************************************/ +void ih264_intra_pred_luma_8x8_mode_horz_u_ssse3(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */ + __m128i left_16x16; + __m128i temp1, temp2; + __m128i a0_8x16, a1_8x16, a2_8x16; + __m128i zero = _mm_setzero_si128(); + __m128i const_val2_8x16 = _mm_set1_epi16(2); + __m128i res1_8x16, res2_8x16; + __m128i out1_16x16; + __m128i str_8x8; + __m128i shuffle_16x16; + UNUSED(src_strd); + UNUSED(ngbr_avail); + + pu1_left = pu1_src + BLK8x8SIZE - 1; + shuffle_16x16 = _mm_set_epi8(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, + 0x0F); + + left_16x16 = _mm_loadu_si128((__m128i *)(pu1_left - 7)); + temp1 = _mm_srli_si128(left_16x16, 1); + a0_8x16 = _mm_unpacklo_epi8(left_16x16, zero); + a0_8x16 = _mm_slli_si128(a0_8x16, 2); + a1_8x16 = _mm_unpacklo_epi8(left_16x16, zero); + a0_8x16 = _mm_shufflelo_epi16(a0_8x16, 0xE5); + a2_8x16 = _mm_unpacklo_epi8(temp1, zero); + + res1_8x16 = _mm_avg_epu16(a0_8x16, a1_8x16); + + a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16); + a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16); + a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16); + a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16); + res2_8x16 = _mm_srai_epi16(a0_8x16, 2); + + temp1 = _mm_unpacklo_epi16(res1_8x16, res2_8x16); + temp2 = _mm_unpackhi_epi16(res1_8x16, res2_8x16); + out1_16x16 = _mm_packus_epi16(temp1, temp2); + out1_16x16 = _mm_shuffle_epi8(out1_16x16, shuffle_16x16); + + str_8x8 = _mm_srli_si128(out1_16x16, 1); + _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), str_8x8); + str_8x8 = _mm_srli_si128(out1_16x16, 3); + _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), str_8x8); + str_8x8 = _mm_srli_si128(out1_16x16, 5); + _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), str_8x8); + str_8x8 = _mm_srli_si128(out1_16x16, 7); + _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), str_8x8); + temp1 = _mm_set1_epi8(pu1_left[-7]); + str_8x8 = _mm_unpacklo_epi64(str_8x8, temp1); + str_8x8 = _mm_srli_si128(str_8x8, 2); + _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), str_8x8); + str_8x8 = _mm_srli_si128(str_8x8, 2); + _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), str_8x8); + str_8x8 = _mm_srli_si128(str_8x8, 2); + _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), str_8x8); + str_8x8 = _mm_srli_si128(str_8x8, 2); + _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), str_8x8); + +} + + +/******************* 16x16 Modes *******************/ + +/** + ******************************************************************************* + * + *ih264_intra_pred_luma_16x16_mode_vert_ssse3 + * + * @brief + * Perform Intra prediction for luma_16x16 mode:Vertical + * + * @par Description: + * Perform Intra prediction for luma_16x16 mode:Vertical, described in sec 8.3.3.1 + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ngbr_avail + * availability of neighbouring pixels (Not used in this function) + * + * @returns + * + * @remarks + * None + * + *******************************************************************************/ +void ih264_intra_pred_luma_16x16_mode_vert_ssse3(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + UWORD8 *pu1_top; + WORD32 dst_strd2, dst_strd3, dst_strd4; + + __m128i top_16x8b; + + UNUSED(src_strd); + UNUSED(ngbr_avail); + + pu1_top = pu1_src + MB_SIZE + 1; + + dst_strd2 = dst_strd << 1; + dst_strd4 = dst_strd << 2; + + top_16x8b = _mm_loadu_si128((__m128i *)pu1_top); + + dst_strd3 = dst_strd + dst_strd2; + + _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), top_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), top_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), top_16x8b); + pu1_dst += dst_strd4; + + _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), top_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), top_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), top_16x8b); + pu1_dst += dst_strd4; + + _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), top_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), top_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), top_16x8b); + pu1_dst += dst_strd4; + + _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), top_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), top_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), top_16x8b); +} + +/** + ******************************************************************************* + * + *ih264_intra_pred_luma_16x16_mode_horz_ssse3 + * + * @brief + * Perform Intra prediction for luma_16x16 mode:Horizontal + * + * @par Description: + * Perform Intra prediction for luma_16x16 mode:Horizontal, described in sec 8.3.3.2 + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ngbr_avail + * availability of neighbouring pixels(Not used in this function) + * + * @returns + * + * @remarks + * None + * + *******************************************************************************/ +void ih264_intra_pred_luma_16x16_mode_horz_ssse3(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + UWORD8 *pu1_left; + WORD32 dst_strd2, dst_strd3, dst_strd4; + WORD32 val1, val2; + + __m128i val_16x8b; + __m128i row1_16x8b, row2_16x8b, row3_16x8b, row4_16x8b; + + UNUSED(src_strd); + UNUSED(ngbr_avail); + + pu1_left = pu1_src + MB_SIZE - 1; + + dst_strd4 = dst_strd << 2; + + val_16x8b = _mm_loadu_si128((__m128i *)(pu1_left - 15)); + + dst_strd2 = dst_strd << 1; + dst_strd3 = dst_strd4 - dst_strd; + + val1 = _mm_extract_epi16(val_16x8b, 7); + val2 = _mm_extract_epi16(val_16x8b, 6); + + row1_16x8b = _mm_set1_epi8(val1 >> 8); + row2_16x8b = _mm_set1_epi8(val1 & 0xff); + row3_16x8b = _mm_set1_epi8(val2 >> 8); + row4_16x8b = _mm_set1_epi8(val2 & 0xff); + + _mm_storeu_si128((__m128i *)pu1_dst, row1_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), row2_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), row3_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), row4_16x8b); + + val1 = _mm_extract_epi16(val_16x8b, 5); + val2 = _mm_extract_epi16(val_16x8b, 4); + + pu1_dst += dst_strd4; + row1_16x8b = _mm_set1_epi8(val1 >> 8); + row2_16x8b = _mm_set1_epi8(val1 & 0xff); + row3_16x8b = _mm_set1_epi8(val2 >> 8); + row4_16x8b = _mm_set1_epi8(val2 & 0xff); + + _mm_storeu_si128((__m128i *)pu1_dst, row1_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), row2_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), row3_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), row4_16x8b); + + val1 = _mm_extract_epi16(val_16x8b, 3); + val2 = _mm_extract_epi16(val_16x8b, 2); + + pu1_dst += dst_strd4; + row1_16x8b = _mm_set1_epi8(val1 >> 8); + row2_16x8b = _mm_set1_epi8(val1 & 0xff); + row3_16x8b = _mm_set1_epi8(val2 >> 8); + row4_16x8b = _mm_set1_epi8(val2 & 0xff); + + _mm_storeu_si128((__m128i *)pu1_dst, row1_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), row2_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), row3_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), row4_16x8b); + + val1 = _mm_extract_epi16(val_16x8b, 1); + val2 = _mm_extract_epi16(val_16x8b, 0); + + pu1_dst += dst_strd4; + row1_16x8b = _mm_set1_epi8(val1 >> 8); + row2_16x8b = _mm_set1_epi8(val1 & 0xff); + row3_16x8b = _mm_set1_epi8(val2 >> 8); + row4_16x8b = _mm_set1_epi8(val2 & 0xff); + + _mm_storeu_si128((__m128i *)pu1_dst, row1_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), row2_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), row3_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), row4_16x8b); +} + +/** + ******************************************************************************* + * + *ih264_intra_pred_luma_16x16_mode_dc_ssse3 + * + * @brief + * Perform Intra prediction for luma_16x16 mode:DC + * + * @par Description: + * Perform Intra prediction for luma_16x16 mode:DC, described in sec 8.3.3.3 + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + ** @param[in] ngbr_avail + * availability of neighbouring pixels + * + * @returns + * + * @remarks + * None + * + *******************************************************************************/ +void ih264_intra_pred_luma_16x16_mode_dc_ssse3(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + WORD8 u1_useleft, u1_usetop; + WORD32 dc_val; + + WORD32 dst_strd2, dst_strd3, dst_strd4; + + __m128i dc_val_16x8b; + + UNUSED(src_strd); + + u1_useleft = BOOLEAN(ngbr_avail & LEFT_MB_AVAILABLE_MASK); + u1_usetop = BOOLEAN(ngbr_avail & TOP_MB_AVAILABLE_MASK); + + if(u1_useleft || u1_usetop) + { + WORD32 shft; + __m128i val_16x8b, zero_16x8b, sum_8x16b; + + dc_val = 0; + shft = 3; + + zero_16x8b = _mm_setzero_si128(); + + if(u1_useleft) + { + UWORD8 *pu1_left; + + pu1_left = pu1_src + MB_SIZE - 1; + + val_16x8b = _mm_loadu_si128((__m128i *)(pu1_left - 15)); + sum_8x16b = _mm_sad_epu8(zero_16x8b, val_16x8b); + + shft++; + dc_val += 8; + dc_val += _mm_extract_epi16(sum_8x16b, 0); + dc_val += _mm_extract_epi16(sum_8x16b, 4); + } + if(u1_usetop) + { + UWORD8 *pu1_top; + + pu1_top = pu1_src + MB_SIZE + 1; + + val_16x8b = _mm_loadu_si128((__m128i *)pu1_top); + sum_8x16b = _mm_sad_epu8(zero_16x8b, val_16x8b); + + shft++; + dc_val += 8; + dc_val += _mm_extract_epi16(sum_8x16b, 0); + dc_val += _mm_extract_epi16(sum_8x16b, 4); + } + dc_val = dc_val >> shft; + } + else + dc_val = 128; + + dc_val_16x8b = _mm_set1_epi8(dc_val); + + dst_strd2 = dst_strd << 1; + dst_strd4 = dst_strd << 2; + dst_strd3 = dst_strd + dst_strd2; + + _mm_storeu_si128((__m128i *)pu1_dst, dc_val_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), dc_val_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), dc_val_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), dc_val_16x8b); + pu1_dst += dst_strd4; + + _mm_storeu_si128((__m128i *)pu1_dst, dc_val_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), dc_val_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), dc_val_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), dc_val_16x8b); + pu1_dst += dst_strd4; + + _mm_storeu_si128((__m128i *)pu1_dst, dc_val_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), dc_val_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), dc_val_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), dc_val_16x8b); + pu1_dst += dst_strd4; + + _mm_storeu_si128((__m128i *)pu1_dst, dc_val_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), dc_val_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), dc_val_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), dc_val_16x8b); +} + +/** + ******************************************************************************* + * + *ih264_intra_pred_luma_16x16_mode_plane_ssse3 + * + * @brief + * Perform Intra prediction for luma_16x16 mode:PLANE + * + * @par Description: + * Perform Intra prediction for luma_16x16 mode:PLANE, described in sec 8.3.3.4 + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[out] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] src_strd + * integer source stride + * + * @param[in] dst_strd + * integer destination stride + * + * @param[in] ngbr_avail + * availability of neighbouring pixels(Not used in this function) + * + * @returns + * + * @remarks + * None + * + *******************************************************************************/ +void ih264_intra_pred_luma_16x16_mode_plane_ssse3(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 ngbr_avail) +{ + UWORD8 *pu1_left, *pu1_top; + WORD32 a, b, c; + + __m128i rev_8x16b, mul_8x16b, zero_16x8b; + + UNUSED(src_strd); + UNUSED(ngbr_avail); + + pu1_top = pu1_src + MB_SIZE + 1; + pu1_left = pu1_src + MB_SIZE - 1; + + rev_8x16b = _mm_setr_epi16(0x0f0e, 0x0d0c, 0x0b0a, 0x0908, 0x0706, 0x0504, 0x0302, 0x0100); + //used to reverse the order of 16-bit values in a vector + + mul_8x16b = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8); + zero_16x8b = _mm_setzero_si128(); + + //calculating a, b and c + { + WORD32 h, v; + + __m128i h_val1_16x8b, h_val2_16x8b; + __m128i h_val1_8x16b, h_val2_8x16b, h_val_4x32b; + __m128i v_val1_16x8b, v_val2_16x8b; + __m128i v_val1_8x16b, v_val2_8x16b, v_val_4x32b; + __m128i hv_val_4x32b; + + a = (pu1_top[15] + pu1_left[-15]) << 4; + + h_val1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_top + 8)); + h_val2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_top - 1)); + v_val1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_left - 15)); + v_val2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_left - 6)); + + h_val1_8x16b = _mm_unpacklo_epi8(h_val1_16x8b, zero_16x8b); + h_val2_8x16b = _mm_unpacklo_epi8(h_val2_16x8b, zero_16x8b); + v_val1_8x16b = _mm_unpacklo_epi8(v_val1_16x8b, zero_16x8b); + v_val2_8x16b = _mm_unpacklo_epi8(v_val2_16x8b, zero_16x8b); + + h_val2_8x16b = _mm_shuffle_epi8(h_val2_8x16b, rev_8x16b); + v_val1_8x16b = _mm_shuffle_epi8(v_val1_8x16b, rev_8x16b); + + h_val1_8x16b = _mm_sub_epi16(h_val1_8x16b, h_val2_8x16b); + v_val1_8x16b = _mm_sub_epi16(v_val1_8x16b, v_val2_8x16b); + + h_val_4x32b = _mm_madd_epi16(mul_8x16b, h_val1_8x16b); + v_val_4x32b = _mm_madd_epi16(mul_8x16b, v_val1_8x16b); + + hv_val_4x32b = _mm_hadd_epi32(h_val_4x32b, v_val_4x32b); + hv_val_4x32b = _mm_hadd_epi32(hv_val_4x32b, hv_val_4x32b); + + h = _mm_extract_epi16(hv_val_4x32b, 0); + v = _mm_extract_epi16(hv_val_4x32b, 2); + h = (h << 16) >> 16; + v = (v << 16) >> 16; + + b = ((h << 2) + h + 32) >> 6; + c = ((v << 2) + v + 32) >> 6; + } + + //using a, b and c to compute the fitted plane values + { + __m128i const_8x16b, b_8x16b, c_8x16b, c2_8x16b; + __m128i res1_l_8x16b, res1_h_8x16b; + __m128i res2_l_8x16b, res2_h_8x16b; + __m128i res1_sh_l_8x16b, res1_sh_h_8x16b, res1_16x8b; + __m128i res2_sh_l_8x16b, res2_sh_h_8x16b, res2_16x8b; + + b_8x16b = _mm_set1_epi16(b); + c_8x16b = _mm_set1_epi16(c); + c2_8x16b = _mm_set1_epi16(c << 1); + const_8x16b = _mm_set1_epi16(a - c*7 + 16); + + res1_h_8x16b = _mm_mullo_epi16(mul_8x16b, b_8x16b); + //contains {b*1, b*2, b*3,... b*8} + + res1_l_8x16b = _mm_shuffle_epi8(res1_h_8x16b, rev_8x16b); + res1_l_8x16b = _mm_srli_si128(res1_l_8x16b, 2); + res1_l_8x16b = _mm_sub_epi16(zero_16x8b, res1_l_8x16b); + //contains {-b*7, -b*6,... -b*1, b*0} + + // rows 1, 2 + res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, const_8x16b); + res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, const_8x16b); + res2_h_8x16b = _mm_add_epi16(res1_h_8x16b, c_8x16b); + res2_l_8x16b = _mm_add_epi16(res1_l_8x16b, c_8x16b); + + res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5); + res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5); + res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5); + res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5); + + res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b); + res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b); + + _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b); + + // rows 3, 4 + res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b); + res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b); + res2_h_8x16b = _mm_add_epi16(res2_h_8x16b, c2_8x16b); + res2_l_8x16b = _mm_add_epi16(res2_l_8x16b, c2_8x16b); + + res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5); + res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5); + res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5); + res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5); + + pu1_dst += dst_strd << 1; + + res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b); + res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b); + + _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b); + + // rows 5, 6 + res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b); + res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b); + res2_h_8x16b = _mm_add_epi16(res2_h_8x16b, c2_8x16b); + res2_l_8x16b = _mm_add_epi16(res2_l_8x16b, c2_8x16b); + + res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5); + res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5); + res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5); + res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5); + + pu1_dst += dst_strd << 1; + + res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b); + res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b); + + _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b); + + // rows 7, 8 + res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b); + res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b); + res2_h_8x16b = _mm_add_epi16(res2_h_8x16b, c2_8x16b); + res2_l_8x16b = _mm_add_epi16(res2_l_8x16b, c2_8x16b); + + res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5); + res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5); + res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5); + res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5); + + pu1_dst += dst_strd << 1; + + res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b); + res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b); + + _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b); + + // rows 9, 10 + res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b); + res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b); + res2_h_8x16b = _mm_add_epi16(res2_h_8x16b, c2_8x16b); + res2_l_8x16b = _mm_add_epi16(res2_l_8x16b, c2_8x16b); + + res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5); + res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5); + res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5); + res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5); + + pu1_dst += dst_strd << 1; + + res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b); + res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b); + + _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b); + + // rows 11, 12 + res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b); + res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b); + res2_h_8x16b = _mm_add_epi16(res2_h_8x16b, c2_8x16b); + res2_l_8x16b = _mm_add_epi16(res2_l_8x16b, c2_8x16b); + + res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5); + res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5); + res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5); + res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5); + + pu1_dst += dst_strd << 1; + + res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b); + res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b); + + _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b); + + // rows 13, 14 + res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b); + res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b); + res2_h_8x16b = _mm_add_epi16(res2_h_8x16b, c2_8x16b); + res2_l_8x16b = _mm_add_epi16(res2_l_8x16b, c2_8x16b); + + res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5); + res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5); + res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5); + res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5); + + pu1_dst += dst_strd << 1; + + res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b); + res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b); + + _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b); + + // rows 15, 16 + res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b); + res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b); + res2_h_8x16b = _mm_add_epi16(res2_h_8x16b, c2_8x16b); + res2_l_8x16b = _mm_add_epi16(res2_l_8x16b, c2_8x16b); + + res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5); + res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5); + res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5); + res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5); + + pu1_dst += dst_strd << 1; + + res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b); + res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b); + + _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b); + } +} diff --git a/common/x86/ih264_mem_fns_ssse3.c b/common/x86/ih264_mem_fns_ssse3.c new file mode 100755 index 0000000..8ca1f3e --- /dev/null +++ b/common/x86/ih264_mem_fns_ssse3.c @@ -0,0 +1,169 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** + ******************************************************************************* + * @file + * ih264_mem_fns_atom_intr.c + * + * @brief + * Functions used for memory operations + * + * @author + * Ittiam + * + * @par List of Functions: + * + * @remarks + * None + * + ******************************************************************************* + */ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> + +#include "ih264_typedefs.h" +#include "ih264_mem_fns.h" + +#include <immintrin.h> + +/** + ******************************************************************************* + * + * @brief + * memcpy of a 8,16 or 32 bytes + * + * @par Description: + * Does memcpy of 8bit data from source to destination for 8,16 or 32 number of bytes + * + * @param[in] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] pu1_src + * UWORD8 pointer to the source + * + * @param[in] num_bytes + * number of bytes to copy + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ + + + + +void ih264_memcpy_mul_8_ssse3(UWORD8 *pu1_dst, UWORD8 *pu1_src, UWORD32 num_bytes) +{ + int col; + for(col = num_bytes; col >= 8; col -= 8) + { + __m128i src_temp16x8b; + src_temp16x8b = _mm_loadl_epi64((__m128i *)(pu1_src)); + pu1_src += 8; + _mm_storel_epi64((__m128i *)(pu1_dst), src_temp16x8b); + pu1_dst += 8; + } +} + +/** + ******************************************************************************* + * + * @brief + * memset of a 8,16 or 32 bytes + * + * @par Description: + * Does memset of 8bit data for 8,16 or 32 number of bytes + * + * @param[in] pu1_dst + * UWORD8 pointer to the destination + * + * @param[in] value + * UWORD8 value used for memset + * + * @param[in] num_bytes + * number of bytes to set + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ + + +void ih264_memset_mul_8_ssse3(UWORD8 *pu1_dst, UWORD8 value, UWORD32 num_bytes) +{ + int col; + __m128i src_temp16x8b; + src_temp16x8b = _mm_set1_epi8(value); + for(col = num_bytes; col >= 8; col -= 8) + { + _mm_storel_epi64((__m128i *)(pu1_dst), src_temp16x8b); + pu1_dst += 8; + } +} + +/** + ******************************************************************************* + * + * @brief + * memset of 16bit data of a 8,16 or 32 bytes + * + * @par Description: + * Does memset of 16bit data for 8,16 or 32 number of bytes + * + * @param[in] pu2_dst + * UWORD8 pointer to the destination + * + * @param[in] value + * UWORD16 value used for memset + * + * @param[in] num_words + * number of words to set + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ + + +void ih264_memset_16bit_mul_8_ssse3(UWORD16 *pu2_dst, UWORD16 value, UWORD32 num_words) +{ + int col; + __m128i src_temp16x8b; + src_temp16x8b = _mm_set1_epi16(value); + for(col = num_words; col >= 8; col -= 8) + { + _mm_storeu_si128((__m128i *)(pu2_dst), src_temp16x8b); + pu2_dst += 8; + } +} + diff --git a/common/x86/ih264_padding_ssse3.c b/common/x86/ih264_padding_ssse3.c new file mode 100755 index 0000000..6dadd39 --- /dev/null +++ b/common/x86/ih264_padding_ssse3.c @@ -0,0 +1,335 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ih264_padding_atom_intr.c +* +* @brief +* Contains function definitions for Padding +* +* @author +* Srinivas T +* +* @par List of Functions: +* - ih264_pad_left_luma_ssse3() +* - ih264_pad_left_chroma_ssse3() +* - ih264_pad_right_luma_ssse3() +* - ih264_pad_right_chroma_ssse3() +* +* @remarks +* None +* +******************************************************************************* +*/ + +#include <string.h> +#include <assert.h> +#include "ih264_typedefs.h" +#include "ih264_platform_macros.h" +#include "ih264_mem_fns.h" +#include "ih264_debug.h" + +#include <immintrin.h> + + +/** +******************************************************************************* +* +* @brief +* Padding (luma block) at the left of a 2d array +* +* @par Description: +* The left column of a 2d array is replicated for pad_size times at the left +* +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[in] src_strd +* integer source stride +* +* @param[in] ht +* integer height of the array +* +* @param[in] wd +* integer width of the array +* +* @param[in] pad_size +* integer -padding size of the array +* +* @param[in] ht +* integer height of the array +* +* @param[in] wd +* integer width of the array +* +* @returns +* +* @remarks +* None +* +******************************************************************************* +*/ + +void ih264_pad_left_luma_ssse3(UWORD8 *pu1_src, + WORD32 src_strd, + WORD32 ht, + WORD32 pad_size) +{ + WORD32 row; + WORD32 i; + UWORD8 *pu1_dst; + __m128i const0_16x8b; + + const0_16x8b = _mm_setzero_si128(); + + ASSERT(pad_size % 8 == 0); + + for(row = 0; row < ht; row++) + { + __m128i src_temp0_16x8b; + + src_temp0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); + pu1_dst = pu1_src - pad_size; + src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b); + for(i = 0; i < pad_size; i += 8) + { + _mm_storel_epi64((__m128i *)(pu1_dst + i), src_temp0_16x8b); + } + pu1_src += src_strd; + } + +} + + + +/** +******************************************************************************* +* +* @brief +* Padding (chroma block) at the left of a 2d array +* +* @par Description: +* The left column of a 2d array is replicated for pad_size times at the left +* +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[in] src_strd +* integer source stride +* +* @param[in] ht +* integer height of the array +* +* @param[in] wd +* integer width of the array (each colour component) +* +* @param[in] pad_size +* integer -padding size of the array +* +* @param[in] ht +* integer height of the array +* +* @param[in] wd +* integer width of the array +* +* @returns +* +* @remarks +* None +* +******************************************************************************* +*/ + +void ih264_pad_left_chroma_ssse3(UWORD8 *pu1_src, + WORD32 src_strd, + WORD32 ht, + WORD32 pad_size) +{ + WORD32 row; + WORD32 col; + UWORD8 *pu1_dst; + __m128i const0_16x8b, const1_16x8b; + const0_16x8b = _mm_setzero_si128(); + const1_16x8b = _mm_set1_epi8(1); + const0_16x8b = _mm_unpacklo_epi8(const0_16x8b, const1_16x8b); + + ASSERT(pad_size % 8 == 0); + for(row = 0; row < ht; row++) + { + __m128i src_temp0_16x8b; + + src_temp0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); + pu1_dst = pu1_src - pad_size; + src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b); + + for(col = 0; col < pad_size; col += 8) + { + _mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b); + } + pu1_src += src_strd; + } + +} + + + +/** +******************************************************************************* +* +* @brief +* Padding (luma block) at the right of a 2d array +* +* @par Description: +* The right column of a 2d array is replicated for pad_size times at the right +* +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[in] src_strd +* integer source stride +* +* @param[in] ht +* integer height of the array +* +* @param[in] wd +* integer width of the array +* +* @param[in] pad_size +* integer -padding size of the array +* +* @param[in] ht +* integer height of the array +* +* @param[in] wd +* integer width of the array +* +* @returns +* +* @remarks +* None +* +******************************************************************************* +*/ + +void ih264_pad_right_luma_ssse3(UWORD8 *pu1_src, + WORD32 src_strd, + WORD32 ht, + WORD32 pad_size) +{ + WORD32 row; + WORD32 col; + UWORD8 *pu1_dst; + __m128i const0_16x8b; + + ASSERT(pad_size % 8 == 0); + + for(row = 0; row < ht; row++) + { + __m128i src_temp0_16x8b; + + src_temp0_16x8b = _mm_loadu_si128((__m128i *)(pu1_src - 1)); + const0_16x8b = _mm_setzero_si128(); + pu1_dst = pu1_src; + src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b); + for(col = 0; col < pad_size; col += 8) + { + _mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b); + } + pu1_src += src_strd; + } + +} + + + +/** +******************************************************************************* +* +* @brief +* Padding (chroma block) at the right of a 2d array +* +* @par Description: +* The right column of a 2d array is replicated for pad_size times at the right +* +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[in] src_strd +* integer source stride +* +* @param[in] ht +* integer height of the array +* +* @param[in] wd +* integer width of the array (each colour component) +* +* @param[in] pad_size +* integer -padding size of the array +* +* @param[in] ht +* integer height of the array +* +* @param[in] wd +* integer width of the array +* +* @returns +* +* @remarks +* None +* +******************************************************************************* +*/ + +void ih264_pad_right_chroma_ssse3(UWORD8 *pu1_src, + WORD32 src_strd, + WORD32 ht, + WORD32 pad_size) +{ + WORD32 row; + WORD32 col; + UWORD8 *pu1_dst; + __m128i const0_16x8b, const1_16x8b; + const0_16x8b = _mm_setzero_si128(); + const1_16x8b = _mm_set1_epi8(1); + const0_16x8b = _mm_unpacklo_epi8(const0_16x8b, const1_16x8b); + + ASSERT(pad_size % 8 == 0); + + for(row = 0; row < ht; row++) + { + __m128i src_temp0_16x8b; + + src_temp0_16x8b = _mm_loadu_si128((__m128i *)(pu1_src - 2)); + pu1_dst = pu1_src; + src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b); + for(col = 0; col < pad_size; col += 8) + { + _mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b); + } + + pu1_src += src_strd; + } +} + diff --git a/common/x86/ih264_platform_macros.h b/common/x86/ih264_platform_macros.h new file mode 100755 index 0000000..e4b9821 --- /dev/null +++ b/common/x86/ih264_platform_macros.h @@ -0,0 +1,114 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ih264_platform_macros.h +* +* @brief +* Platform specific Macro definitions used in the codec +* +* @author +* Ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ + + +#ifndef _IH264_PLATFORM_MACROS_H_ +#define _IH264_PLATFORM_MACROS_H_ + +#include <immintrin.h> + + +#define CLIP_U8(x) CLIP3(0, 255, (x)) +#define CLIP_S8(x) CLIP3(-128, 127, (x)) + +#define CLIP_U10(x) CLIP3(0, 1023, (x)) +#define CLIP_S10(x) CLIP3(-512, 511, (x)) + +#define CLIP_U12(x) CLIP3(0, 4095, (x)) +#define CLIP_S12(x) CLIP3(-2048, 2047, (x)) + +#define CLIP_U16(x) CLIP3(0, 65535, (x)) +#define CLIP_S16(x) CLIP3(-32768, 32767, (x)) + +#define MEM_ALIGN16 __attribute__ ((aligned (16))) + +#define SHL(x,y) (((y) < 32) ? ((x) << (y)) : 0) +#define SHR(x,y) (((y) < 32) ? ((x) >> (y)) : 0) + +#define SHR_NEG(val,shift) ((shift>0)?(val>>shift):(val<<(-shift))) +#define SHL_NEG(val,shift) ((shift<0)?(val>>(-shift)):(val<<shift)) + + +#define ITT_BIG_ENDIAN(x) ((x << 24)) | \ + ((x & 0x0000ff00) << 8) | \ + ((x & 0x00ff0000) >> 8) | \ + ((UWORD32)x >> 24); + + +#define NOP(nop_cnt) {UWORD32 nop_i; for (nop_i = 0; nop_i < nop_cnt; nop_i++);} + +#define PLD(a) + +static __inline UWORD32 CLZ(UWORD32 u4_word) +{ + if(u4_word) + return(__builtin_clz(u4_word)); + else + return 32; +} + +static __inline UWORD32 CTZ(UWORD32 u4_word) +{ + if(0 == u4_word) + return 31; + else + { + unsigned int index; + index = __builtin_ctz(u4_word); + return (UWORD32)index; + } +} + +#define DATA_SYNC() __sync_synchronize() + + + +//#define INLINE __inline +#define INLINE + +#define PREFETCH_ENABLE 1 + +#if PREFETCH_ENABLE +#define PREFETCH(ptr, type) _mm_prefetch(ptr, type); +#else +#define PREFETCH(ptr, type) +#endif + +#define MEM_ALIGN8 __attribute__ ((aligned (8))) +#define MEM_ALIGN16 __attribute__ ((aligned (16))) +#define MEM_ALIGN32 __attribute__ ((aligned (32))) + +#endif /* _IH264_PLATFORM_MACROS_H_ */ diff --git a/common/x86/ih264_resi_trans_quant_sse42.c b/common/x86/ih264_resi_trans_quant_sse42.c new file mode 100755 index 0000000..c267651 --- /dev/null +++ b/common/x86/ih264_resi_trans_quant_sse42.c @@ -0,0 +1,984 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** + ******************************************************************************* + * @file + * ih264_resi_trans_quant_sse42.c + * + * @brief + * Contains function definitions single stage forward transform for H.264 + * It will calculate the residue, do the cf and then do quantization + * + * @author + * Mohit [100664] + * + * @par List of Functions: + * - ih264_resi_trans_quant_4x4_sse42() + * - ih264_resi_trans_quant_chroma_4x4_sse42() + * + * @remarks + * None + * + ******************************************************************************* + */ +/* System include files */ +#include <stddef.h> + +/* User include files */ +#include "ih264_typedefs.h" +#include "ih264_defs.h" +#include "ih264_size_defs.h" +#include "ih264_macros.h" +#include "ih264_trans_macros.h" +#include "ih264_trans_data.h" +#include "ih264_structs.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include <immintrin.h> +/** + ******************************************************************************* + * + * @brief + * This function performs forward transform and quantization on a 4*4 block + * + * @par Description: + * The function accepts source buffer and estimation buffer. From these, it + * computes the residue. This is residue is then transformed and quantized. + * The transform and quantization are in placed computed. They use the residue + * buffer for this. + * + * @param[in] pu1_src + * Pointer to source sub-block + * + * @param[in] pu1_pred + * Pointer to prediction sub-block + * + * @param[in] pi2_out + * Pointer to residual sub-block + * + * @param[in] src_strd + * Source stride + * + * @param[in] pred_strd + * Prediction stride + * + * @param[in] dst_strd + * Destination stride + * + * @param[in] u4_qbits + * QP_BITS_h264_4x4 + floor(QP/6) + * + * @param[in] pu2_threshold_matrix + * Pointer to Forward Quant Threshold Matrix + * + * @param[in] pu2_scale_matrix + * Pointer to Forward Quant Scale Matrix + * + * @param[in] u4_round_factor + * Quantization Round factor + * + * @param[out] pu1_nnz + * Total non-zero coefficients in the current sub-block + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +void ih264_resi_trans_quant_4x4_sse42(UWORD8 *pu1_src, UWORD8 *pu1_pred, + WORD16 *pi2_out, WORD32 src_strd, WORD32 pred_strd, + const UWORD16 *pu2_scale_matrix, const UWORD16 *pu2_threshold_matrix, + UWORD32 u4_qbits, UWORD32 u4_round_factor, UWORD8 *pu1_nnz, + WORD16 *pi2_alt_dc_addr) +{ + WORD32 tmp_dc, u4_zero_coeff, u4_nonzero_coeff = 0; + WORD32 mask0, mask1; + __m128i sum0, sum1, sum2, cmp0, cmp1; + __m128i rnd_fact = _mm_set1_epi32(u4_round_factor); + __m128i temp_2 = _mm_set1_epi16(2); + __m128i temp_1 = _mm_set1_epi16(1); + __m128i src_r0, src_r1, src_r2, src_r3; + __m128i pred_r0, pred_r1, pred_r2, pred_r3; + __m128i temp0, temp1, temp2, temp3; + __m128i zero_8x16b = _mm_setzero_si128(); // all bits reset to zero + __m128i sign_reg0, sign_reg2; + __m128i scalemat_r0_r1, scalemat_r2_r3; + scalemat_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_scale_matrix)); //b00 b01 b02 b03 b10 b11 b12 b13 -- the scaling matrix 0th,1st row + scalemat_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_scale_matrix + 8)); //b20 b21 b22 b23 b30 b31 b32 b33 -- the scaling matrix 2nd,3rd row + src_r0 = _mm_loadl_epi64((__m128i *) (&pu1_src[0])); //a00 a01 a02 a03 0 0 0 0 0 0 0 0 -- all 8 bits + src_r1 = _mm_loadl_epi64((__m128i *) (&pu1_src[src_strd])); //a10 a11 a12 a13 0 0 0 0 0 0 0 0 -- all 8 bits + src_r2 = _mm_loadl_epi64((__m128i *) (&pu1_src[2 * src_strd])); //a20 a21 a22 a23 0 0 0 0 0 0 0 0 -- all 8 bits + src_r3 = _mm_loadl_epi64((__m128i *) (&pu1_src[3 * src_strd])); //a30 a31 a32 a33 0 0 0 0 0 0 0 0 -- all 8 bits + + src_r0 = _mm_cvtepu8_epi16(src_r0); + src_r1 = _mm_cvtepu8_epi16(src_r1); + src_r2 = _mm_cvtepu8_epi16(src_r2); + src_r3 = _mm_cvtepu8_epi16(src_r3); + + pred_r0 = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); //p00 p01 p02 p03 0 0 0 0 0 0 0 0 -- all 8 bits + pred_r1 = _mm_loadl_epi64((__m128i *) (&pu1_pred[pred_strd])); //p10 p11 p12 p13 0 0 0 0 0 0 0 0 -- all 8 bits + pred_r2 = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * pred_strd])); //p20 p21 p22 p23 0 0 0 0 0 0 0 0 -- all 8 bits + pred_r3 = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * pred_strd])); //p30 p31 p32 p33 0 0 0 0 0 0 0 0 -- all 8 bits + + pred_r0 = _mm_cvtepu8_epi16(pred_r0); //p00 p01 p02 p03 -- all 16 bits + pred_r1 = _mm_cvtepu8_epi16(pred_r1); //p10 p11 p12 p13 -- all 16 bits + pred_r2 = _mm_cvtepu8_epi16(pred_r2); //p20 p21 p22 p23 -- all 16 bits + pred_r3 = _mm_cvtepu8_epi16(pred_r3); //p30 p31 p32 p33 -- all 16 bits + + src_r0 = _mm_sub_epi16(src_r0, pred_r0); + src_r1 = _mm_sub_epi16(src_r1, pred_r1); + src_r2 = _mm_sub_epi16(src_r2, pred_r2); + src_r3 = _mm_sub_epi16(src_r3, pred_r3); + + /* Perform Forward transform */ + /*-------------------------------------------------------------*/ + /* DCT [ Horizontal transformation ] */ + /*-------------------------------------------------------------*/ + // Matrix transpose + /* + * a0 a1 a2 a3 + * b0 b1 b2 b3 + * c0 c1 c2 c3 + * d0 d1 d2 d3 + */ + temp0 = _mm_unpacklo_epi16(src_r0, src_r1); //a0 b0 a1 b1 a2 b2 a3 b3 + temp2 = _mm_unpacklo_epi16(src_r2, src_r3); //c0 d0 c1 d1 c2 d2 c3 d3 + temp1 = _mm_unpacklo_epi32(temp0, temp2); //a0 b0 c0 d0 a1 b1 c1 d1 + temp3 = _mm_unpackhi_epi32(temp0, temp2); //a2 b2 c2 d2 a3 b3 c3 d3 + + src_r0 = _mm_unpacklo_epi64(temp1, zero_8x16b); //a0 b0 c0 d0 + src_r1 = _mm_unpackhi_epi64(temp1, zero_8x16b); //a1 b1 c1 d1 + src_r2 = _mm_unpacklo_epi64(temp3, zero_8x16b); //a2 b2 c2 d2 + src_r3 = _mm_unpackhi_epi64(temp3, zero_8x16b); //a3 b3 c3 d3 + + /*----------------------------------------------------------*/ + /* x0 = z0 + z3 */ + temp0 = _mm_add_epi16(src_r0, src_r3); + /* x1 = z1 + z2 */ + temp1 = _mm_add_epi16(src_r1, src_r2); + /* x2 = z1 - z2 */ + temp2 = _mm_sub_epi16(src_r1, src_r2); + /* x3 = z0 - z3 */ + temp3 = _mm_sub_epi16(src_r0, src_r3); + + /* z0 = x0 + x1 */ + src_r0 = _mm_add_epi16(temp0, temp1); + /* z1 = (x3 << 1) + x2 */ + src_r1 = _mm_slli_epi16(temp3, 1); //(x3<<1) + src_r1 = _mm_add_epi16(src_r1, temp2); + /* z2 = x0 - x1 */ + src_r2 = _mm_sub_epi16(temp0, temp1); + /* z3 = x3 - (x2 << 1) */ + src_r3 = _mm_slli_epi16(temp2, 1); //(x2<<1) + src_r3 = _mm_sub_epi16(temp3, src_r3); + + // Matrix transpose + /* + * a0 b0 c0 d0 + * a1 b1 c1 d1 + * a2 b2 c2 d2 + * a3 b3 c3 d3 + */ + temp0 = _mm_unpacklo_epi16(src_r0, src_r1); //a0 a1 b0 b1 c0 c1 d0 d1 + temp2 = _mm_unpacklo_epi16(src_r2, src_r3); //a2 a3 b2 b3 c2 c3 d2 d3 + temp1 = _mm_unpacklo_epi32(temp0, temp2); //a0 a1 a2 a3 b0 b1 b2 b3 + temp3 = _mm_unpackhi_epi32(temp0, temp2); //c0 c1 c2 c3 d0 d1 d2 d3 + + src_r0 = _mm_unpacklo_epi64(temp1, zero_8x16b); //a0 a1 a2 a3 + src_r1 = _mm_unpackhi_epi64(temp1, zero_8x16b); //b0 b1 b2 b3 + src_r2 = _mm_unpacklo_epi64(temp3, zero_8x16b); //c0 c1 c2 c3 + src_r3 = _mm_unpackhi_epi64(temp3, zero_8x16b); //d0 d1 d2 d3 + + /*----------------------------------------------------------*/ + /* x0 = z0 + z3 */ + temp0 = _mm_add_epi16(src_r0, src_r3); + /* x1 = z1 + z2 */ + temp1 = _mm_add_epi16(src_r1, src_r2); + /* x2 = z1 - z2 */ + temp2 = _mm_sub_epi16(src_r1, src_r2); + /* x3 = z0 - z3 */ + temp3 = _mm_sub_epi16(src_r0, src_r3); + + /* z0 = x0 + x1 */ + src_r0 = _mm_add_epi16(temp0, temp1); + /* z1 = (x3 << 1) + x2 */ + src_r1 = _mm_slli_epi16(temp3, 1); //(x3<<1) + src_r1 = _mm_add_epi16(src_r1, temp2); + /* z2 = x0 - x1 */ + src_r2 = _mm_sub_epi16(temp0, temp1); + /* z3 = x3 - (x2 << 1) */ + src_r3 = _mm_slli_epi16(temp2, 1); //(x2<<1) + src_r3 = _mm_sub_epi16(temp3, src_r3); + + tmp_dc = _mm_extract_epi16(src_r0,0); //a0 + *pi2_alt_dc_addr = tmp_dc; + + src_r0 = _mm_unpacklo_epi64(src_r0, src_r1); //a0 a1 a2 a3 b0 b1 b2 b3 + src_r2 = _mm_unpacklo_epi64(src_r2, src_r3); //c0 c1 c2 c3 d0 d1 d2 d3 + sign_reg0 = _mm_cmpgt_epi16(zero_8x16b,src_r0); + sign_reg2 = _mm_cmpgt_epi16(zero_8x16b,src_r2); + + sign_reg0 = _mm_mullo_epi16(temp_2,sign_reg0); + sign_reg2 = _mm_mullo_epi16(temp_2,sign_reg2); + + sign_reg0 = _mm_add_epi16(temp_1,sign_reg0); + sign_reg2 = _mm_add_epi16(temp_1,sign_reg2); + + src_r0 = _mm_abs_epi16(src_r0); + src_r2 = _mm_abs_epi16(src_r2); + + src_r1 = _mm_srli_si128(src_r0, 8); + src_r0 = _mm_cvtepu16_epi32(src_r0); + src_r1 = _mm_cvtepu16_epi32(src_r1); + src_r3 = _mm_srli_si128(src_r2, 8); + src_r2 = _mm_cvtepu16_epi32(src_r2); + src_r3 = _mm_cvtepu16_epi32(src_r3); + + temp0 = _mm_cvtepu16_epi32(scalemat_r0_r1); + scalemat_r0_r1 = _mm_srli_si128(scalemat_r0_r1, 8); + temp2 = _mm_cvtepu16_epi32(scalemat_r2_r3); + scalemat_r2_r3 = _mm_srli_si128(scalemat_r2_r3, 8); + temp1 = _mm_cvtepu16_epi32(scalemat_r0_r1); + temp3 = _mm_cvtepu16_epi32(scalemat_r2_r3); + + temp0 = _mm_mullo_epi32(temp0, src_r0); + temp1 = _mm_mullo_epi32(temp1, src_r1); + temp2 = _mm_mullo_epi32(temp2, src_r2); + temp3 = _mm_mullo_epi32(temp3, src_r3); + + temp0 = _mm_add_epi32(temp0,rnd_fact); + temp1 = _mm_add_epi32(temp1,rnd_fact); + temp2 = _mm_add_epi32(temp2,rnd_fact); + temp3 = _mm_add_epi32(temp3,rnd_fact); + + temp0 = _mm_srli_epi32(temp0,u4_qbits); + temp1 = _mm_srli_epi32(temp1,u4_qbits); + temp2 = _mm_srli_epi32(temp2,u4_qbits); + temp3 = _mm_srli_epi32(temp3,u4_qbits); + + temp0 = _mm_packs_epi32 (temp0,temp1); + temp2 = _mm_packs_epi32 (temp2,temp3); + + temp0 = _mm_sign_epi16(temp0, sign_reg0); + temp2 = _mm_sign_epi16(temp2, sign_reg2); + + _mm_storeu_si128((__m128i *) (&pi2_out[0]), temp0); + _mm_storeu_si128((__m128i *) (&pi2_out[8]), temp2); + + cmp0 = _mm_cmpeq_epi16(temp0, zero_8x16b); + cmp1 = _mm_cmpeq_epi16(temp2, zero_8x16b); + + mask0 = _mm_movemask_epi8(cmp0); + mask1 = _mm_movemask_epi8(cmp1); + u4_zero_coeff = 0; + if(mask0) + { + if(mask0 == 0xffff) + u4_zero_coeff+=8; + else + { + cmp0 = _mm_and_si128(temp_1, cmp0); + sum0 = _mm_hadd_epi16(cmp0, zero_8x16b); + sum1 = _mm_hadd_epi16(sum0, zero_8x16b); + sum2 = _mm_hadd_epi16(sum1, zero_8x16b); + u4_zero_coeff += _mm_cvtsi128_si32(sum2); + } + } + if(mask1) + { + if(mask1 == 0xffff) + u4_zero_coeff+=8; + else + { + cmp1 = _mm_and_si128(temp_1, cmp1); + sum0 = _mm_hadd_epi16(cmp1, zero_8x16b); + sum1 = _mm_hadd_epi16(sum0, zero_8x16b); + sum2 = _mm_hadd_epi16(sum1, zero_8x16b); + u4_zero_coeff += _mm_cvtsi128_si32(sum2); + } + } + + /* Return total nonzero coefficients in the current sub block */ + u4_nonzero_coeff = 16 - u4_zero_coeff; + *pu1_nnz = u4_nonzero_coeff; +} + +/** + ******************************************************************************* + * + * @brief + * This function performs forward transform and quantization on a 4*4 chroma block + * + * @par Description: + * The function accepts source buffer and estimation buffer. From these, it + * computes the residue. This is residue is then transformed and quantized. + * The transform and quantization are in placed computed. They use the residue + * buffer for this. + * + * @param[in] pu1_src + * Pointer to source sub-block + * + * @param[in] pu1_pred + * Pointer to prediction sub-block + * + * @param[in] pi2_out + * Pointer to residual sub-block + * + * @param[in] src_strd + * Source stride + * + * @param[in] pred_strd + * Prediction stride + * + * @param[in] dst_strd + * Destination stride + * + * @param[in] u4_qbits + * QP_BITS_h264_4x4 + floor(QP/6) + * + * @param[in] pu2_threshold_matrix + * Pointer to Forward Quant Threshold Matrix + * + * @param[in] pu2_scale_matrix + * Pointer to Forward Quant Scale Matrix + * + * @param[in] u4_round_factor + * Quantization Round factor + * + * @param[out] pu1_nnz + * Total non-zero coefficients in the current sub-block + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +void ih264_resi_trans_quant_chroma_4x4_sse42(UWORD8 *pu1_src,UWORD8 *pu1_pred,WORD16 *pi2_out, + WORD32 src_strd,WORD32 pred_strd, + const UWORD16 *pu2_scale_matrix, + const UWORD16 *pu2_threshold_matrix, + UWORD32 u4_qbits,UWORD32 u4_round_factor, + UWORD8 *pu1_nnz, WORD16 *pi2_alt_dc_addr) +{ + WORD32 tmp_dc, u4_zero_coeff, u4_nonzero_coeff = 0; + WORD32 mask0, mask1; + __m128i cmp0, cmp1, sum0, sum1, sum2; + __m128i rnd_fact = _mm_set1_epi32(u4_round_factor); + __m128i temp_2 = _mm_set1_epi16(2); + __m128i temp_1 = _mm_set1_epi16(1); + __m128i src_r0, src_r1, src_r2, src_r3; + __m128i pred_r0, pred_r1, pred_r2, pred_r3; + __m128i temp0, temp1, temp2, temp3; + __m128i zero_8x16b = _mm_setzero_si128(); // all bits reset to zero + __m128i sign_reg0, sign_reg2; + __m128i scalemat_r0_r1, scalemat_r2_r3; + __m128i chroma_mask = _mm_set1_epi16 (0xFF); + + scalemat_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_scale_matrix)); //b00 b01 b02 b03 b10 b11 b12 b13 -- the scaling matrix 0th,1st row + scalemat_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_scale_matrix + 8)); //b20 b21 b22 b23 b30 b31 b32 b33 -- the scaling matrix 2nd,3rd row + src_r0 = _mm_loadl_epi64((__m128i *) (&pu1_src[0])); //a00 a01 a02 a03 0 0 0 0 0 0 0 0 -- all 8 bits + src_r1 = _mm_loadl_epi64((__m128i *) (&pu1_src[src_strd])); //a10 a11 a12 a13 0 0 0 0 0 0 0 0 -- all 8 bits + src_r2 = _mm_loadl_epi64((__m128i *) (&pu1_src[2 * src_strd])); //a20 a21 a22 a23 0 0 0 0 0 0 0 0 -- all 8 bits + src_r3 = _mm_loadl_epi64((__m128i *) (&pu1_src[3 * src_strd])); //a30 a31 a32 a33 0 0 0 0 0 0 0 0 -- all 8 bits + + src_r0 = _mm_and_si128(src_r0, chroma_mask); + src_r1 = _mm_and_si128(src_r1, chroma_mask); + src_r2 = _mm_and_si128(src_r2, chroma_mask); + src_r3 = _mm_and_si128(src_r3, chroma_mask); +// src_r0 = _mm_cvtepu8_epi16(src_r0); +// src_r1 = _mm_cvtepu8_epi16(src_r1); +// src_r2 = _mm_cvtepu8_epi16(src_r2); +// src_r3 = _mm_cvtepu8_epi16(src_r3); + + pred_r0 = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); //p00 p01 p02 p03 0 0 0 0 0 0 0 0 -- all 8 bits + pred_r1 = _mm_loadl_epi64((__m128i *) (&pu1_pred[pred_strd])); //p10 p11 p12 p13 0 0 0 0 0 0 0 0 -- all 8 bits + pred_r2 = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * pred_strd])); //p20 p21 p22 p23 0 0 0 0 0 0 0 0 -- all 8 bits + pred_r3 = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * pred_strd])); //p30 p31 p32 p33 0 0 0 0 0 0 0 0 -- all 8 bits + + pred_r0 = _mm_and_si128(pred_r0, chroma_mask); + pred_r1 = _mm_and_si128(pred_r1, chroma_mask); + pred_r2 = _mm_and_si128(pred_r2, chroma_mask); + pred_r3 = _mm_and_si128(pred_r3, chroma_mask); +// pred_r0 = _mm_cvtepu8_epi16(pred_r0); //p00 p01 p02 p03 -- all 16 bits +// pred_r1 = _mm_cvtepu8_epi16(pred_r1); //p10 p11 p12 p13 -- all 16 bits +// pred_r2 = _mm_cvtepu8_epi16(pred_r2); //p20 p21 p22 p23 -- all 16 bits +// pred_r3 = _mm_cvtepu8_epi16(pred_r3); //p30 p31 p32 p33 -- all 16 bits + + src_r0 = _mm_sub_epi16(src_r0, pred_r0); + src_r1 = _mm_sub_epi16(src_r1, pred_r1); + src_r2 = _mm_sub_epi16(src_r2, pred_r2); + src_r3 = _mm_sub_epi16(src_r3, pred_r3); + + /* Perform Forward transform */ + /*-------------------------------------------------------------*/ + /* DCT [ Horizontal transformation ] */ + /*-------------------------------------------------------------*/ + // Matrix transpose + /* + * a0 a1 a2 a3 + * b0 b1 b2 b3 + * c0 c1 c2 c3 + * d0 d1 d2 d3 + */ + temp0 = _mm_unpacklo_epi16(src_r0, src_r1); //a0 b0 a1 b1 a2 b2 a3 b3 + temp2 = _mm_unpacklo_epi16(src_r2, src_r3); //c0 d0 c1 d1 c2 d2 c3 d3 + temp1 = _mm_unpacklo_epi32(temp0, temp2); //a0 b0 c0 d0 a1 b1 c1 d1 + temp3 = _mm_unpackhi_epi32(temp0, temp2); //a2 b2 c2 d2 a3 b3 c3 d3 + + src_r0 = _mm_unpacklo_epi64(temp1, zero_8x16b); //a0 b0 c0 d0 + src_r1 = _mm_unpackhi_epi64(temp1, zero_8x16b); //a1 b1 c1 d1 + src_r2 = _mm_unpacklo_epi64(temp3, zero_8x16b); //a2 b2 c2 d2 + src_r3 = _mm_unpackhi_epi64(temp3, zero_8x16b); //a3 b3 c3 d3 + + /*----------------------------------------------------------*/ + /* x0 = z0 + z3 */ + temp0 = _mm_add_epi16(src_r0, src_r3); + /* x1 = z1 + z2 */ + temp1 = _mm_add_epi16(src_r1, src_r2); + /* x2 = z1 - z2 */ + temp2 = _mm_sub_epi16(src_r1, src_r2); + /* x3 = z0 - z3 */ + temp3 = _mm_sub_epi16(src_r0, src_r3); + + /* z0 = x0 + x1 */ + src_r0 = _mm_add_epi16(temp0, temp1); + /* z1 = (x3 << 1) + x2 */ + src_r1 = _mm_slli_epi16(temp3, 1); //(x3<<1) + src_r1 = _mm_add_epi16(src_r1, temp2); + /* z2 = x0 - x1 */ + src_r2 = _mm_sub_epi16(temp0, temp1); + /* z3 = x3 - (x2 << 1) */ + src_r3 = _mm_slli_epi16(temp2, 1); //(x2<<1) + src_r3 = _mm_sub_epi16(temp3, src_r3); + + // Matrix transpose + /* + * a0 b0 c0 d0 + * a1 b1 c1 d1 + * a2 b2 c2 d2 + * a3 b3 c3 d3 + */ + temp0 = _mm_unpacklo_epi16(src_r0, src_r1); //a0 a1 b0 b1 c0 c1 d0 d1 + temp2 = _mm_unpacklo_epi16(src_r2, src_r3); //a2 a3 b2 b3 c2 c3 d2 d3 + temp1 = _mm_unpacklo_epi32(temp0, temp2); //a0 a1 a2 a3 b0 b1 b2 b3 + temp3 = _mm_unpackhi_epi32(temp0, temp2); //c0 c1 c2 c3 d0 d1 d2 d3 + + src_r0 = _mm_unpacklo_epi64(temp1, zero_8x16b); //a0 a1 a2 a3 + src_r1 = _mm_unpackhi_epi64(temp1, zero_8x16b); //b0 b1 b2 b3 + src_r2 = _mm_unpacklo_epi64(temp3, zero_8x16b); //c0 c1 c2 c3 + src_r3 = _mm_unpackhi_epi64(temp3, zero_8x16b); //d0 d1 d2 d3 + + /*----------------------------------------------------------*/ + /* x0 = z0 + z3 */ + temp0 = _mm_add_epi16(src_r0, src_r3); + /* x1 = z1 + z2 */ + temp1 = _mm_add_epi16(src_r1, src_r2); + /* x2 = z1 - z2 */ + temp2 = _mm_sub_epi16(src_r1, src_r2); + /* x3 = z0 - z3 */ + temp3 = _mm_sub_epi16(src_r0, src_r3); + + /* z0 = x0 + x1 */ + src_r0 = _mm_add_epi16(temp0, temp1); + /* z1 = (x3 << 1) + x2 */ + src_r1 = _mm_slli_epi16(temp3, 1); //(x3<<1) + src_r1 = _mm_add_epi16(src_r1, temp2); + /* z2 = x0 - x1 */ + src_r2 = _mm_sub_epi16(temp0, temp1); + /* z3 = x3 - (x2 << 1) */ + src_r3 = _mm_slli_epi16(temp2, 1); //(x2<<1) + src_r3 = _mm_sub_epi16(temp3, src_r3); + + tmp_dc = _mm_extract_epi16(src_r0,0); //a0 + *pi2_alt_dc_addr = tmp_dc; + + src_r0 = _mm_unpacklo_epi64(src_r0, src_r1); //a0 a1 a2 a3 b0 b1 b2 b3 + src_r2 = _mm_unpacklo_epi64(src_r2, src_r3); //c0 c1 c2 c3 d0 d1 d2 d3 + sign_reg0 = _mm_cmpgt_epi16(zero_8x16b,src_r0); + sign_reg2 = _mm_cmpgt_epi16(zero_8x16b,src_r2); + + sign_reg0 = _mm_mullo_epi16(temp_2,sign_reg0); + sign_reg2 = _mm_mullo_epi16(temp_2,sign_reg2); + + sign_reg0 = _mm_add_epi16(temp_1,sign_reg0); + sign_reg2 = _mm_add_epi16(temp_1,sign_reg2); + + src_r0 = _mm_abs_epi16(src_r0); + src_r2 = _mm_abs_epi16(src_r2); + + src_r1 = _mm_srli_si128(src_r0, 8); + src_r0 = _mm_cvtepu16_epi32(src_r0); + src_r1 = _mm_cvtepu16_epi32(src_r1); + src_r3 = _mm_srli_si128(src_r2, 8); + src_r2 = _mm_cvtepu16_epi32(src_r2); + src_r3 = _mm_cvtepu16_epi32(src_r3); + + temp0 = _mm_cvtepu16_epi32(scalemat_r0_r1); + scalemat_r0_r1 = _mm_srli_si128(scalemat_r0_r1, 8); + temp2 = _mm_cvtepu16_epi32(scalemat_r2_r3); + scalemat_r2_r3 = _mm_srli_si128(scalemat_r2_r3, 8); + temp1 = _mm_cvtepu16_epi32(scalemat_r0_r1); + temp3 = _mm_cvtepu16_epi32(scalemat_r2_r3); + + temp0 = _mm_mullo_epi32(temp0, src_r0); + temp1 = _mm_mullo_epi32(temp1, src_r1); + temp2 = _mm_mullo_epi32(temp2, src_r2); + temp3 = _mm_mullo_epi32(temp3, src_r3); + + temp0 = _mm_add_epi32(temp0,rnd_fact); + temp1 = _mm_add_epi32(temp1,rnd_fact); + temp2 = _mm_add_epi32(temp2,rnd_fact); + temp3 = _mm_add_epi32(temp3,rnd_fact); + + temp0 = _mm_srli_epi32(temp0,u4_qbits); + temp1 = _mm_srli_epi32(temp1,u4_qbits); + temp2 = _mm_srli_epi32(temp2,u4_qbits); + temp3 = _mm_srli_epi32(temp3,u4_qbits); + + temp0 = _mm_packs_epi32 (temp0,temp1); + temp2 = _mm_packs_epi32 (temp2,temp3); + + temp0 = _mm_sign_epi16(temp0, sign_reg0); + temp2 = _mm_sign_epi16(temp2, sign_reg2); + + //temp0 = _mm_insert_epi16(temp0, tmp_dc, 0); + + _mm_storeu_si128((__m128i *) (&pi2_out[0]), temp0); + _mm_storeu_si128((__m128i *) (&pi2_out[8]), temp2); + + cmp0 = _mm_cmpeq_epi16(temp0, zero_8x16b); + cmp1 = _mm_cmpeq_epi16(temp2, zero_8x16b); + + mask0 = _mm_movemask_epi8(cmp0); + mask1 = _mm_movemask_epi8(cmp1); + u4_zero_coeff = 0; + if(mask0) + { + if(mask0 == 0xffff) + u4_zero_coeff+=8; + else + { + cmp0 = _mm_and_si128(temp_1, cmp0); + sum0 = _mm_hadd_epi16(cmp0, zero_8x16b); + sum1 = _mm_hadd_epi16(sum0, zero_8x16b); + sum2 = _mm_hadd_epi16(sum1, zero_8x16b); + u4_zero_coeff += _mm_cvtsi128_si32(sum2); + } + } + if(mask1) + { + if(mask1 == 0xffff) + u4_zero_coeff+=8; + else + { + cmp1 = _mm_and_si128(temp_1, cmp1); + sum0 = _mm_hadd_epi16(cmp1, zero_8x16b); + sum1 = _mm_hadd_epi16(sum0, zero_8x16b); + sum2 = _mm_hadd_epi16(sum1, zero_8x16b); + u4_zero_coeff += _mm_cvtsi128_si32(sum2); + } + } + + /* Return total nonzero coefficients in the current sub block */ + u4_nonzero_coeff = 16 - u4_zero_coeff; + *pu1_nnz = u4_nonzero_coeff; + +} + + +/** + ******************************************************************************* + * + * @brief + * This function performs forward hadamard transform and quantization on a 4*4 block + * + * @par Description: + * The function accepts source buffer and estimation buffer. From these, it + * computes the residue. This is residue is then transformed and quantized. + * The transform and quantization are in placed computed. They use the residue + * buffer for this. + * + * @param[in] pu1_src + * Pointer to source sub-block + * + * @param[in] pu1_pred + * Pointer to prediction sub-block + * + * @param[in] pi2_out + * Pointer to residual sub-block + * + * @param[in] src_strd + * Source stride + * + * @param[in] pred_strd + * Prediction stride + * + * @param[in] dst_strd + * Destination stride + * + * @param[in] u4_qbits + * QP_BITS_h264_4x4 + floor(QP/6) + * + * @param[in] pu2_threshold_matrix + * Pointer to Forward Quant Threshold Matrix + * + * @param[in] pu2_scale_matrix + * Pointer to Forward Quant Scale Matrix + * + * @param[in] u4_round_factor + * Quantization Round factor + * + * @param[out] pu1_nnz + * Total non-zero coefficients in the current sub-block + * + * @returns + * + * @remarks + * None + * + */ + +void ih264_hadamard_quant_4x4_sse42(WORD16 *pi2_src, WORD16 *pi2_dst, + const UWORD16 *pu2_scale_matrix, + const UWORD16 *pu2_threshold_matrix, UWORD32 u4_qbits, + UWORD32 u4_round_factor,UWORD8 *pu1_nnz + ) +{ + WORD32 u4_zero_coeff,u4_nonzero_coeff=0; + __m128i cmp0, cmp1, sum0, sum1, sum2; + WORD32 mask0, mask1; + __m128i src_r0_r1, src_r2_r3, sign_reg; + __m128i src_r0, src_r1, src_r2, src_r3; + __m128i zero_8x16b = _mm_setzero_si128(); + __m128i temp0, temp1, temp2, temp3; + __m128i sign_reg0, sign_reg1, sign_reg2, sign_reg3; + __m128i temp_1 = _mm_set1_epi16(1); + __m128i rnd_fact = _mm_set1_epi32(u4_round_factor); + __m128i scale_val = _mm_set1_epi32(pu2_scale_matrix[0]); + + src_r0_r1 = _mm_loadu_si128((__m128i *) (pi2_src)); //a00 a01 a02 a03 a10 a11 a12 a13 -- the source matrix 0th,1st row + src_r2_r3 = _mm_loadu_si128((__m128i *) (pi2_src + 8)); //a20 a21 a22 a23 a30 a31 a32 a33 -- the source matrix 2nd,3rd row + sign_reg = _mm_cmpgt_epi16(zero_8x16b, src_r0_r1); + src_r0 = _mm_unpacklo_epi16(src_r0_r1, sign_reg); //a0 a1 a2 a3 + src_r1 = _mm_unpackhi_epi16(src_r0_r1, sign_reg); //b0 b1 b2 b3 + sign_reg = _mm_cmpgt_epi16(zero_8x16b, src_r2_r3); + src_r2 = _mm_unpacklo_epi16(src_r2_r3, sign_reg); //c0 c1 c2 c3 + src_r3 = _mm_unpackhi_epi16(src_r2_r3, sign_reg); //d0 d1 d2 d3 + + /* Perform Inverse transform */ + /*-------------------------------------------------------------*/ + /* Forward DC transform [ Horizontal transformation ] */ + /*-------------------------------------------------------------*/ + // Matrix transpose + /* + * a0 a1 a2 a3 + * b0 b1 b2 b3 + * c0 c1 c2 c3 + * d0 d1 d2 d3 + */ + temp0 = _mm_unpacklo_epi32(src_r0, src_r1); //a0 b0 a1 b1 + temp2 = _mm_unpacklo_epi32(src_r2, src_r3); //c0 d0 c1 d1 + temp1 = _mm_unpackhi_epi32(src_r0, src_r1); //a2 b2 a3 b3 + temp3 = _mm_unpackhi_epi32(src_r2, src_r3); //c2 d2 c3 d3 + src_r0 = _mm_unpacklo_epi64(temp0, temp2); //a0 b0 c0 d0 + src_r1 = _mm_unpackhi_epi64(temp0, temp2); //a1 b1 c1 d1 + src_r2 = _mm_unpacklo_epi64(temp1, temp3); //a2 b2 c2 d2 + src_r3 = _mm_unpackhi_epi64(temp1, temp3); //a3 b3 c3 d3 + + temp0 = _mm_add_epi32(src_r0, src_r3); + temp1 = _mm_add_epi32(src_r1, src_r2); + temp2 = _mm_sub_epi32(src_r1, src_r2); + temp3 = _mm_sub_epi32(src_r0, src_r3); + + src_r0 = _mm_add_epi32(temp0, temp1); + src_r1 = _mm_add_epi32(temp2, temp3); + src_r2 = _mm_sub_epi32(temp0, temp1); + src_r3 = _mm_sub_epi32(temp3, temp2); + + /*-------------------------------------------------------------*/ + /* Forward DC transform [ Vertical transformation ] */ + /*-------------------------------------------------------------*/ + // Matrix transpose + /* + * a0 b0 c0 d0 + * a1 b1 c1 d1 + * a2 b2 c2 d2 + * a3 b3 c3 d3 + */ + temp0 = _mm_unpacklo_epi32(src_r0, src_r1); //a0 a1 b0 b1 + temp2 = _mm_unpacklo_epi32(src_r2, src_r3); //a2 a3 b2 b3 + temp1 = _mm_unpackhi_epi32(src_r0, src_r1); //c0 c1 d0 d1 + temp3 = _mm_unpackhi_epi32(src_r2, src_r3); //c2 c3 d2 d3 + src_r0 = _mm_unpacklo_epi64(temp0, temp2); //a0 a1 a2 a3 + src_r1 = _mm_unpackhi_epi64(temp0, temp2); //b0 b1 b2 b3 + src_r2 = _mm_unpacklo_epi64(temp1, temp3); //c0 c1 c2 c3 + src_r3 = _mm_unpackhi_epi64(temp1, temp3); //d0 d1 d2 d3 + + temp0 = _mm_add_epi32(src_r0, src_r3); + temp1 = _mm_add_epi32(src_r1, src_r2); + temp2 = _mm_sub_epi32(src_r1, src_r2); + temp3 = _mm_sub_epi32(src_r0, src_r3); + + src_r0 = _mm_add_epi32(temp0, temp1); + src_r1 = _mm_add_epi32(temp2, temp3); + src_r2 = _mm_sub_epi32(temp0, temp1); + src_r3 = _mm_sub_epi32(temp3, temp2); + + src_r0 = _mm_srai_epi32(src_r0, 1); + src_r1 = _mm_srai_epi32(src_r1, 1); + src_r2 = _mm_srai_epi32(src_r2, 1); + src_r3 = _mm_srai_epi32(src_r3, 1); + + // Quantization + sign_reg0 = _mm_cmpgt_epi32(zero_8x16b, src_r0); //Find sign of each value for later restoration + sign_reg1 = _mm_cmpgt_epi32(zero_8x16b, src_r1); + sign_reg2 = _mm_cmpgt_epi32(zero_8x16b, src_r2); + sign_reg3 = _mm_cmpgt_epi32(zero_8x16b, src_r3); + + sign_reg0 = _mm_packs_epi32(sign_reg0, sign_reg1); //Sign = -1 or 0 depending on <0 or >0 respectively + sign_reg2 = _mm_packs_epi32(sign_reg2, sign_reg3); + + sign_reg0 = _mm_slli_epi16(sign_reg0, 1); //Sign = -2 or 0 depending on <0 or >0 respectively + sign_reg2 = _mm_slli_epi16(sign_reg2, 1); + + sign_reg0 = _mm_add_epi16(temp_1,sign_reg0); //Sign = -1 or 1 depending on <0 or >0 respectively + sign_reg2 = _mm_add_epi16(temp_1,sign_reg2); + + src_r0 = _mm_abs_epi32(src_r0); //Absolute values + src_r1 = _mm_abs_epi32(src_r1); + src_r2 = _mm_abs_epi32(src_r2); + src_r3 = _mm_abs_epi32(src_r3); + + temp0 = _mm_mullo_epi32(scale_val, src_r0); //multiply by pu2_scale_matrix[0] + temp1 = _mm_mullo_epi32(scale_val, src_r1); + temp2 = _mm_mullo_epi32(scale_val, src_r2); + temp3 = _mm_mullo_epi32(scale_val, src_r3); + + temp0 = _mm_add_epi32(temp0,rnd_fact); //Add round factor + temp1 = _mm_add_epi32(temp1,rnd_fact); + temp2 = _mm_add_epi32(temp2,rnd_fact); + temp3 = _mm_add_epi32(temp3,rnd_fact); + + temp0 = _mm_srli_epi32(temp0,u4_qbits); //RIght shift by qbits, unsigned variable, so shift right immediate works + temp1 = _mm_srli_epi32(temp1,u4_qbits); + temp2 = _mm_srli_epi32(temp2,u4_qbits); + temp3 = _mm_srli_epi32(temp3,u4_qbits); + + temp0 = _mm_packs_epi32 (temp0,temp1); //Final values are 16-bits only. + temp2 = _mm_packs_epi32 (temp2,temp3); + + temp0 = _mm_sign_epi16(temp0, sign_reg0); //Sign restoration + temp2 = _mm_sign_epi16(temp2, sign_reg2); + + _mm_storeu_si128((__m128i *) (&pi2_dst[0]), temp0); + _mm_storeu_si128((__m128i *) (&pi2_dst[8]), temp2); + + cmp0 = _mm_cmpeq_epi16(temp0, zero_8x16b); + cmp1 = _mm_cmpeq_epi16(temp2, zero_8x16b); + + mask0 = _mm_movemask_epi8(cmp0); + mask1 = _mm_movemask_epi8(cmp1); + u4_zero_coeff = 0; + if(mask0) + { + if(mask0 == 0xffff) + u4_zero_coeff+=8; + else + { + cmp0 = _mm_and_si128(temp_1, cmp0); + sum0 = _mm_hadd_epi16(cmp0, zero_8x16b); + sum1 = _mm_hadd_epi16(sum0, zero_8x16b); + sum2 = _mm_hadd_epi16(sum1, zero_8x16b); + u4_zero_coeff += _mm_cvtsi128_si32(sum2); + } + } + if(mask1) + { + if(mask1 == 0xffff) + u4_zero_coeff+=8; + else + { + cmp1 = _mm_and_si128(temp_1, cmp1); + sum0 = _mm_hadd_epi16(cmp1, zero_8x16b); + sum1 = _mm_hadd_epi16(sum0, zero_8x16b); + sum2 = _mm_hadd_epi16(sum1, zero_8x16b); + u4_zero_coeff += _mm_cvtsi128_si32(sum2); + } + } + + /* Return total nonzero coefficients in the current sub block */ + u4_nonzero_coeff = 16 - u4_zero_coeff; + pu1_nnz[0] = u4_nonzero_coeff; +} + + +/** + ******************************************************************************* + * + * @brief + * This function performs forward hadamard transform and quantization on a 2*2 block + * for both U and V planes + * + * @par Description: + * The function accepts source buffer and estimation buffer. From these, it + * computes the residue. This is residue is then transformed and quantized. + * The transform and quantization are in placed computed. They use the residue + * buffer for this. + * + * @param[in] pu1_src + * Pointer to source sub-block + * + * @param[in] pu1_pred + * Pointer to prediction sub-block + * + * @param[in] pi2_out + * Pointer to residual sub-block + * + * @param[in] src_strd + * Source stride + * + * @param[in] pred_strd + * Prediction stride + * + * @param[in] dst_strd + * Destination stride + * + * @param[in] u4_qbits + * QP_BITS_h264_4x4 + floor(QP/6) + * + * @param[in] pu2_threshold_matrix + * Pointer to Forward Quant Threshold Matrix + * + * @param[in] pu2_scale_matrix + * Pointer to Forward Quant Scale Matrix + * + * @param[in] u4_round_factor + * Quantization Round factor + * + * @param[out] pu1_nnz + * Total non-zero coefficients in the current sub-block + * + * @returns + * + * @remarks + * NNZ for dc is populated at 0 and 5th position of pu1_nnz + * + */ + +void ih264_hadamard_quant_2x2_uv_sse42(WORD16 *pi2_src, WORD16 *pi2_dst, + const UWORD16 *pu2_scale_matrix, + const UWORD16 *pu2_threshold_matrix, UWORD32 u4_qbits, + UWORD32 u4_round_factor,UWORD8 *pu1_nnz) +{ + WORD32 val, nonzero_coeff_0, nonzero_coeff_1=0; + nonzero_coeff_0 = 0; + __m128i cmp, cmp0, cmp1; + __m128i sum0, sum1; + WORD32 mask, mask0, mask1; + __m128i src, plane_0, plane_1, temp0, temp1, sign_reg; + __m128i zero_8x16b = _mm_setzero_si128(); + __m128i scale_val = _mm_set1_epi32(pu2_scale_matrix[0]); + __m128i sign_reg0, sign_reg1; + __m128i temp_1 = _mm_set1_epi16(1); + __m128i rnd_fact = _mm_set1_epi32(u4_round_factor); + + src = _mm_loadu_si128((__m128i *)pi2_src); //a0 a1 a2 a3 b0 b1 b2 b3 + sign_reg = _mm_cmpgt_epi16(zero_8x16b, src); + plane_0 = _mm_unpacklo_epi16(src, sign_reg); //a0 a1 a2 a3 -- 32 bits + plane_1 = _mm_unpackhi_epi16(src, sign_reg); //b0 b1 b2 b3 -- 32 bits + + temp0 = _mm_hadd_epi32(plane_0, plane_1); //a0+a1 a2+a3 b0+b1 b2+b3 + temp1 = _mm_hsub_epi32(plane_0, plane_1); //a0-a1 a2-a3 b0-b1 b2-b3 + + plane_0 = _mm_hadd_epi32(temp0, temp1); //a0+a1+a2+a3 b0+b1+b2+b3 a0-a1+a2-a3 b0-b1+b2-b3 + plane_1 = _mm_hsub_epi32(temp0, temp1); //a0+a1-a2-a3 b0+b1-b2-b3 a0-a1-a2+a3 b0-b1-b2+b3 + + temp0 = _mm_unpacklo_epi32(plane_0, plane_1); //a0+a1+a2+a3 a0+a1-a2-a3 b0+b1+b2+b3 b0+b1-b2-b3 + temp1 = _mm_unpackhi_epi32(plane_0, plane_1); //a0-a1+a2-a3 a0-a1-a2+a3 b0-b1+b2-b3 b0-b1-b2+b3 + + plane_0 = _mm_unpacklo_epi64(temp0, temp1); //a0+a1+a2+a3 a0+a1-a2-a3 a0-a1+a2-a3 a0-a1-a2+a3 + plane_1 = _mm_unpackhi_epi64(temp0, temp1); //b0+b1+b2+b3 b0+b1-b2-b3 b0-b1+b2-b3 b0-b1-b2+b3 + + plane_0 = _mm_shuffle_epi32(plane_0, 0xd8); //a0+a1+a2+a3 a0-a1+a2-a3 a0+a1-a2-a3 a0-a1-a2+a3 + plane_1 = _mm_shuffle_epi32(plane_1, 0xd8); //b0+b1+b2+b3 b0-b1+b2-b3 b0+b1-b2-b3 b0-b1-b2+b3 + // Quantization + sign_reg0 = _mm_cmpgt_epi32(zero_8x16b, plane_0); //Find sign of each value for later restoration + sign_reg1 = _mm_cmpgt_epi32(zero_8x16b, plane_1); + + sign_reg0 = _mm_packs_epi32(sign_reg0, sign_reg1); //Sign = -1 or 0 depending on <0 or >0 respectively + sign_reg0 = _mm_slli_epi16(sign_reg0, 1); //Sign = -2 or 0 depending on <0 or >0 respectively + sign_reg0 = _mm_add_epi16(temp_1,sign_reg0); //Sign = -1 or 1 depending on <0 or >0 respectively + + plane_0 = _mm_abs_epi32(plane_0); //Absolute values + plane_1 = _mm_abs_epi32(plane_1); + + temp0 = _mm_mullo_epi32(scale_val, plane_0); //multiply by pu2_scale_matrix[0] + temp1 = _mm_mullo_epi32(scale_val, plane_1); //multiply by pu2_scale_matrix[0] + + temp0 = _mm_add_epi32(temp0,rnd_fact); //Add round factor + temp1 = _mm_add_epi32(temp1,rnd_fact); + + temp0 = _mm_srli_epi32(temp0,u4_qbits); //RIght shift by qbits, unsigned variable, so shift right immediate works + temp1 = _mm_srli_epi32(temp1,u4_qbits); + + temp0 = _mm_packs_epi32 (temp0,temp1); //Final values are 16-bits only. + temp0 = _mm_sign_epi16(temp0, sign_reg0); //Sign restoration + + _mm_storeu_si128((__m128i *) (&pi2_dst[0]), temp0); + + cmp = _mm_cmpeq_epi16(temp0, zero_8x16b); + mask = _mm_movemask_epi8(cmp); + mask0 = mask & 0xff; + mask1 = mask>>8; + if(mask0) + { + if(mask0 == 0xff) + nonzero_coeff_0 += 4; + else + { + cmp0 = _mm_and_si128(temp_1, cmp); + sum0 = _mm_hadd_epi16(cmp0, zero_8x16b); + sum1 = _mm_hadd_epi16(sum0, zero_8x16b); + val = _mm_cvtsi128_si32(sum1); + val = val & 0xffff; + nonzero_coeff_0 += val; + } + } + if(mask1) + { + if(mask1 == 0xff) + nonzero_coeff_1 += 4; + else + { + cmp1 = _mm_srli_si128(cmp, 8); + cmp1 = _mm_and_si128(temp_1, cmp1); + sum0 = _mm_hadd_epi16(cmp1, zero_8x16b); + sum1 = _mm_hadd_epi16(sum0, zero_8x16b); + nonzero_coeff_1 += _mm_cvtsi128_si32(sum1); + } + } + + pu1_nnz[0] = 4 - nonzero_coeff_0; + pu1_nnz[1] = 4 - nonzero_coeff_1; + +} diff --git a/common/x86/ih264_weighted_pred_sse42.c b/common/x86/ih264_weighted_pred_sse42.c new file mode 100755 index 0000000..b1684b7 --- /dev/null +++ b/common/x86/ih264_weighted_pred_sse42.c @@ -0,0 +1,1349 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*****************************************************************************/ +/* */ +/* File Name : ih264_weighted_pred_intr_sse42.c */ +/* */ +/* Description : Contains function definitions for weighted */ +/* prediction functions in x86 sse4 intrinsics */ +/* */ +/* List of Functions : ih264_default_weighted_pred_luma_sse42() */ +/* ih264_default_weighted_pred_chroma_sse42() */ +/* ih264_weighted_pred_luma_sse42() */ +/* ih264_weighted_pred_chroma_sse42() */ +/* ih264_weighted_bipred_luma_sse42() */ +/* ih264_weighted_bipred_chroma_sse42() */ +/* */ +/* Issues / Problems : None */ +/* */ +/* Revision History : */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 30 01 2015 Kaushik Initial version */ +/* Senthoor */ +/* */ +/*****************************************************************************/ +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +#include <immintrin.h> +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264_weighted_pred.h" + +/*****************************************************************************/ +/* Function definitions . */ +/*****************************************************************************/ +/*****************************************************************************/ +/* */ +/* Function Name : ih264_default_weighted_pred_luma_sse42 */ +/* */ +/* Description : This function performs the default weighted prediction */ +/* as described in sec 8.4.2.3.1 titled "Default weighted */ +/* sample prediction process" for luma. The function gets */ +/* two ht x wd blocks, calculates their rounded-average and */ +/* stores it in the destination block. (ht,wd) can be */ +/* (4,4), (8,4), (4,8), (8,8), (16,8), (8,16) or (16,16). */ +/* */ +/* Inputs : pu1_src1 - Pointer to source 1 */ +/* pu1_src2 - Pointer to source 2 */ +/* pu1_dst - Pointer to destination */ +/* src_strd1 - stride for source 1 */ +/* src_strd1 - stride for source 2 */ +/* dst_strd - stride for destination */ +/* ht - height of the block */ +/* wd - width of the block */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 04 02 2015 Kaushik Initial Version */ +/* Senthoor */ +/* */ +/*****************************************************************************/ +void ih264_default_weighted_pred_luma_sse42(UWORD8 *pu1_src1, + UWORD8 *pu1_src2, + UWORD8 *pu1_dst, + WORD32 src_strd1, + WORD32 src_strd2, + WORD32 dst_strd, + WORD32 ht, + WORD32 wd) +{ + __m128i y0_0_16x8b, y0_1_16x8b, y0_2_16x8b, y0_3_16x8b; + __m128i y1_0_16x8b, y1_1_16x8b, y1_2_16x8b, y1_3_16x8b; + + if(wd == 4) + { + __m128i mask_full_16x8b, mask_ll4B_16x8b; + + mask_full_16x8b = _mm_set1_epi8(0xff); + mask_ll4B_16x8b = _mm_srli_si128(mask_full_16x8b, 12); + // mask for first four bytes + + do + { + y0_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src1); + y0_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src1 + src_strd1)); + y0_2_16x8b = _mm_loadl_epi64( + (__m128i *)(pu1_src1 + (src_strd1 << 1))); + y0_3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src1 + src_strd1 * 3)); + + y1_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src2); + y1_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src2 + src_strd2)); + y1_2_16x8b = _mm_loadl_epi64( + (__m128i *)(pu1_src2 + (src_strd2 << 1))); + y1_3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src2 + src_strd2 * 3)); + + y0_0_16x8b = _mm_avg_epu8(y0_0_16x8b, y1_0_16x8b); + y0_1_16x8b = _mm_avg_epu8(y0_1_16x8b, y1_1_16x8b); + y0_2_16x8b = _mm_avg_epu8(y0_2_16x8b, y1_2_16x8b); + y0_3_16x8b = _mm_avg_epu8(y0_3_16x8b, y1_3_16x8b); + + _mm_maskmoveu_si128(y0_0_16x8b, mask_ll4B_16x8b, (char*)pu1_dst); + _mm_maskmoveu_si128(y0_1_16x8b, mask_ll4B_16x8b, + (char*)(pu1_dst + dst_strd)); + _mm_maskmoveu_si128(y0_2_16x8b, mask_ll4B_16x8b, + (char*)(pu1_dst + (dst_strd << 1))); + _mm_maskmoveu_si128(y0_3_16x8b, mask_ll4B_16x8b, + (char*)(pu1_dst + dst_strd * 3)); + + ht -= 4; + pu1_src1 += src_strd1 << 2; + pu1_src2 += src_strd2 << 2; + pu1_dst += dst_strd << 2; + } + while(ht > 0); + } + else if(wd == 8) + { + do + { + y0_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src1); + y0_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src1 + src_strd1)); + y0_2_16x8b = _mm_loadl_epi64( + (__m128i *)(pu1_src1 + (src_strd1 << 1))); + y0_3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src1 + src_strd1 * 3)); + + y1_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src2); + y1_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src2 + src_strd2)); + y1_2_16x8b = _mm_loadl_epi64( + (__m128i *)(pu1_src2 + (src_strd2 << 1))); + y1_3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src2 + src_strd2 * 3)); + + y0_0_16x8b = _mm_avg_epu8(y0_0_16x8b, y1_0_16x8b); + y0_1_16x8b = _mm_avg_epu8(y0_1_16x8b, y1_1_16x8b); + y0_2_16x8b = _mm_avg_epu8(y0_2_16x8b, y1_2_16x8b); + y0_3_16x8b = _mm_avg_epu8(y0_3_16x8b, y1_3_16x8b); + + _mm_storel_epi64((__m128i *)pu1_dst, y0_0_16x8b); + _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), y0_1_16x8b); + _mm_storel_epi64((__m128i *)(pu1_dst + (dst_strd << 1)), y0_2_16x8b); + _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd * 3), y0_3_16x8b); + + ht -= 4; + pu1_src1 += src_strd1 << 2; + pu1_src2 += src_strd2 << 2; + pu1_dst += dst_strd << 2; + } + while(ht > 0); + } + else // wd == 16 + { + __m128i y0_4_16x8b, y0_5_16x8b, y0_6_16x8b, y0_7_16x8b; + __m128i y1_4_16x8b, y1_5_16x8b, y1_6_16x8b, y1_7_16x8b; + + do + { + y0_0_16x8b = _mm_loadu_si128((__m128i *)pu1_src1); + y0_1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src1 + src_strd1)); + y0_2_16x8b = _mm_loadu_si128( + (__m128i *)(pu1_src1 + (src_strd1 << 1))); + y0_3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src1 + src_strd1 * 3)); + y0_4_16x8b = _mm_loadu_si128( + (__m128i *)(pu1_src1 + (src_strd1 << 2))); + y0_5_16x8b = _mm_loadu_si128((__m128i *)(pu1_src1 + src_strd1 * 5)); + y0_6_16x8b = _mm_loadu_si128((__m128i *)(pu1_src1 + src_strd1 * 6)); + y0_7_16x8b = _mm_loadu_si128((__m128i *)(pu1_src1 + src_strd1 * 7)); + + y1_0_16x8b = _mm_loadu_si128((__m128i *)pu1_src2); + y1_1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src2 + src_strd2)); + y1_2_16x8b = _mm_loadu_si128( + (__m128i *)(pu1_src2 + (src_strd2 << 1))); + y1_3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src2 + src_strd2 * 3)); + y1_4_16x8b = _mm_loadu_si128( + (__m128i *)(pu1_src2 + (src_strd2 << 2))); + y1_5_16x8b = _mm_loadu_si128((__m128i *)(pu1_src2 + src_strd2 * 5)); + y1_6_16x8b = _mm_loadu_si128((__m128i *)(pu1_src2 + src_strd2 * 6)); + y1_7_16x8b = _mm_loadu_si128((__m128i *)(pu1_src2 + src_strd2 * 7)); + + y0_0_16x8b = _mm_avg_epu8(y0_0_16x8b, y1_0_16x8b); + y0_1_16x8b = _mm_avg_epu8(y0_1_16x8b, y1_1_16x8b); + y0_2_16x8b = _mm_avg_epu8(y0_2_16x8b, y1_2_16x8b); + y0_3_16x8b = _mm_avg_epu8(y0_3_16x8b, y1_3_16x8b); + y0_4_16x8b = _mm_avg_epu8(y0_4_16x8b, y1_4_16x8b); + y0_5_16x8b = _mm_avg_epu8(y0_5_16x8b, y1_5_16x8b); + y0_6_16x8b = _mm_avg_epu8(y0_6_16x8b, y1_6_16x8b); + y0_7_16x8b = _mm_avg_epu8(y0_7_16x8b, y1_7_16x8b); + + _mm_storeu_si128((__m128i *)pu1_dst, y0_0_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), y0_1_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + (dst_strd << 1)), y0_2_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd * 3), y0_3_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + (dst_strd << 2)), y0_4_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd * 5), y0_5_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd * 6), y0_6_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd * 7), y0_7_16x8b); + + ht -= 8; + pu1_src1 += src_strd1 << 3; + pu1_src2 += src_strd2 << 3; + pu1_dst += dst_strd << 3; + } + while(ht > 0); + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_default_weighted_pred_chroma_sse42 */ +/* */ +/* Description : This function performs the default weighted prediction */ +/* as described in sec 8.4.2.3.1 titled "Default weighted */ +/* sample prediction process" for chroma. The function gets */ +/* two ht x wd blocks, calculates their rounded-average and */ +/* stores it in the destination block. (ht,wd) can be */ +/* (2,2), (4,2) , (2,4), (4,4), (8,4), (4,8) or (8,8). */ +/* */ +/* Inputs : pu1_src1 - Pointer to source 1 */ +/* pu1_src2 - Pointer to source 2 */ +/* pu1_dst - Pointer to destination */ +/* src_strd1 - stride for source 1 */ +/* src_strd1 - stride for source 2 */ +/* dst_strd - stride for destination */ +/* ht - height of the block */ +/* wd - width of the block */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 04 02 2015 Kaushik Initial Version */ +/* Senthoor */ +/* */ +/*****************************************************************************/ +void ih264_default_weighted_pred_chroma_sse42(UWORD8 *pu1_src1, + UWORD8 *pu1_src2, + UWORD8 *pu1_dst, + WORD32 src_strd1, + WORD32 src_strd2, + WORD32 dst_strd, + WORD32 ht, + WORD32 wd) +{ + __m128i uv0_0_16x8b, uv0_1_16x8b; + __m128i uv1_0_16x8b, uv1_1_16x8b; + + if(wd == 2) + { + __m128i mask_full_16x8b, mask_ll4B_16x8b; + + mask_full_16x8b = _mm_set1_epi8(0xff); + mask_ll4B_16x8b = _mm_srli_si128(mask_full_16x8b, 12); + // mask for first four bytes + + do + { + uv0_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src1); + uv0_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src1 + src_strd1)); + + uv1_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src2); + uv1_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src2 + src_strd2)); + + uv0_0_16x8b = _mm_avg_epu8(uv0_0_16x8b, uv1_0_16x8b); + uv0_1_16x8b = _mm_avg_epu8(uv0_1_16x8b, uv1_1_16x8b); + + _mm_maskmoveu_si128(uv0_0_16x8b, mask_ll4B_16x8b, (char*)pu1_dst); + _mm_maskmoveu_si128(uv0_1_16x8b, mask_ll4B_16x8b, + (char*)(pu1_dst + dst_strd)); + + ht -= 2; + pu1_src1 += src_strd1 << 1; + pu1_src2 += src_strd2 << 1; + pu1_dst += dst_strd << 1; + } + while(ht > 0); + } + else if(wd == 4) + { + do + { + uv0_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src1); + uv0_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src1 + src_strd1)); + + uv1_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src2); + uv1_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src2 + src_strd2)); + + uv0_0_16x8b = _mm_avg_epu8(uv0_0_16x8b, uv1_0_16x8b); + uv0_1_16x8b = _mm_avg_epu8(uv0_1_16x8b, uv1_1_16x8b); + + _mm_storel_epi64((__m128i *)pu1_dst, uv0_0_16x8b); + _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), uv0_1_16x8b); + + ht -= 2; + pu1_src1 += src_strd1 << 1; + pu1_src2 += src_strd2 << 1; + pu1_dst += dst_strd << 1; + } + while(ht > 0); + } + else // wd == 8 + { + __m128i uv0_2_16x8b, uv0_3_16x8b; + __m128i uv1_2_16x8b, uv1_3_16x8b; + + do + { + uv0_0_16x8b = _mm_loadu_si128((__m128i *)pu1_src1); + uv0_1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src1 + src_strd1)); + uv0_2_16x8b = _mm_loadu_si128( + (__m128i *)(pu1_src1 + (src_strd1 << 1))); + uv0_3_16x8b = _mm_loadu_si128( + (__m128i *)(pu1_src1 + src_strd1 * 3)); + + uv1_0_16x8b = _mm_loadu_si128((__m128i *)pu1_src2); + uv1_1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src2 + src_strd2)); + uv1_2_16x8b = _mm_loadu_si128( + (__m128i *)(pu1_src2 + (src_strd2 << 1))); + uv1_3_16x8b = _mm_loadu_si128( + (__m128i *)(pu1_src2 + src_strd2 * 3)); + + uv0_0_16x8b = _mm_avg_epu8(uv0_0_16x8b, uv1_0_16x8b); + uv0_1_16x8b = _mm_avg_epu8(uv0_1_16x8b, uv1_1_16x8b); + uv0_2_16x8b = _mm_avg_epu8(uv0_2_16x8b, uv1_2_16x8b); + uv0_3_16x8b = _mm_avg_epu8(uv0_3_16x8b, uv1_3_16x8b); + + _mm_storeu_si128((__m128i *)pu1_dst, uv0_0_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), uv0_1_16x8b); + _mm_storeu_si128( + (__m128i *)(pu1_dst + (dst_strd << 1)), uv0_2_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd * 3), uv0_3_16x8b); + + ht -= 4; + pu1_src1 += src_strd1 << 2; + pu1_src2 += src_strd2 << 2; + pu1_dst += dst_strd << 2; + } + while(ht > 0); + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_weighted_pred_luma_sse42 */ +/* */ +/* Description : This function performs the weighted prediction as */ +/* described in sec 8.4.2.3.2 titled "Weighted sample */ +/* prediction process" for luma. The function gets one */ +/* ht x wd block, weights it, rounds it off, offsets it, */ +/* saturates it to unsigned 8-bit and stores it in the */ +/* destination block. (ht,wd) can be (4,4), (8,4), (4,8), */ +/* (8,8), (16,8), (8,16) or (16,16). */ +/* */ +/* Inputs : pu1_src - Pointer to source */ +/* pu1_dst - Pointer to destination */ +/* src_strd - stride for source */ +/* dst_strd - stride for destination */ +/* log_wd - number of bits to be rounded off */ +/* wt - weight value */ +/* ofst - offset value */ +/* ht - height of the block */ +/* wd - width of the block */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 04 02 2015 Kaushik Initial Version */ +/* Senthoor */ +/* */ +/*****************************************************************************/ +void ih264_weighted_pred_luma_sse42(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 log_wd, + WORD32 wt, + WORD32 ofst, + WORD32 ht, + WORD32 wd) +{ + __m128i y_0_16x8b, y_1_16x8b, y_2_16x8b, y_3_16x8b; + + __m128i wt_8x16b, round_8x16b, ofst_8x16b; + + WORD32 round_val; + + wt = (WORD16)(wt & 0xffff); + round_val = 1 << (log_wd - 1); + ofst = (WORD8)(ofst & 0xff); + + wt_8x16b = _mm_set1_epi16(wt); + round_8x16b = _mm_set1_epi16(round_val); + ofst_8x16b = _mm_set1_epi16(ofst); + + if(wd == 4) + { + __m128i y_0_8x16b, y_2_8x16b; + + __m128i mask_full_16x8b, mask_ll4B_16x8b; + + mask_full_16x8b = _mm_set1_epi8(0xff); + mask_ll4B_16x8b = _mm_srli_si128(mask_full_16x8b, 12); + // mask for first four bytes + + do + { + y_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src); + y_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd)); + y_2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + (src_strd << 1))); + y_3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd * 3)); + + y_0_16x8b = _mm_unpacklo_epi32(y_0_16x8b, y_1_16x8b); + y_2_16x8b = _mm_unpacklo_epi32(y_2_16x8b, y_3_16x8b); + + y_0_8x16b = _mm_cvtepu8_epi16(y_0_16x8b); + y_2_8x16b = _mm_cvtepu8_epi16(y_2_16x8b); + + y_0_8x16b = _mm_mullo_epi16(y_0_8x16b, wt_8x16b); + y_2_8x16b = _mm_mullo_epi16(y_2_8x16b, wt_8x16b); + + y_0_8x16b = _mm_adds_epi16(round_8x16b, y_0_8x16b); + y_2_8x16b = _mm_adds_epi16(round_8x16b, y_2_8x16b); + + y_0_8x16b = _mm_srai_epi16(y_0_8x16b, log_wd); + y_2_8x16b = _mm_srai_epi16(y_2_8x16b, log_wd); + + y_0_8x16b = _mm_adds_epi16(ofst_8x16b, y_0_8x16b); + y_2_8x16b = _mm_adds_epi16(ofst_8x16b, y_2_8x16b); + + y_0_16x8b = _mm_packus_epi16(y_0_8x16b, y_2_8x16b); + y_1_16x8b = _mm_srli_si128(y_0_16x8b, 4); + y_2_16x8b = _mm_srli_si128(y_0_16x8b, 8); + y_3_16x8b = _mm_srli_si128(y_0_16x8b, 12); + + _mm_maskmoveu_si128(y_0_16x8b, mask_ll4B_16x8b, (char*)pu1_dst); + _mm_maskmoveu_si128(y_1_16x8b, mask_ll4B_16x8b, + (char*)(pu1_dst + dst_strd)); + _mm_maskmoveu_si128(y_2_16x8b, mask_ll4B_16x8b, + (char*)(pu1_dst + (dst_strd << 1))); + _mm_maskmoveu_si128(y_3_16x8b, mask_ll4B_16x8b, + (char*)(pu1_dst + dst_strd * 3)); + + ht -= 4; + pu1_src += src_strd << 2; + pu1_dst += dst_strd << 2; + } + while(ht > 0); + } + else if(wd == 8) + { + __m128i y_0_8x16b, y_1_8x16b, y_2_8x16b, y_3_8x16b; + + do + { + y_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src); + y_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd)); + y_2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + (src_strd << 1))); + y_3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd * 3)); + + y_0_8x16b = _mm_cvtepu8_epi16(y_0_16x8b); + y_1_8x16b = _mm_cvtepu8_epi16(y_1_16x8b); + y_2_8x16b = _mm_cvtepu8_epi16(y_2_16x8b); + y_3_8x16b = _mm_cvtepu8_epi16(y_3_16x8b); + + y_0_8x16b = _mm_mullo_epi16(y_0_8x16b, wt_8x16b); + y_1_8x16b = _mm_mullo_epi16(y_1_8x16b, wt_8x16b); + y_2_8x16b = _mm_mullo_epi16(y_2_8x16b, wt_8x16b); + y_3_8x16b = _mm_mullo_epi16(y_3_8x16b, wt_8x16b); + + y_0_8x16b = _mm_adds_epi16(round_8x16b, y_0_8x16b); + y_1_8x16b = _mm_adds_epi16(round_8x16b, y_1_8x16b); + y_2_8x16b = _mm_adds_epi16(round_8x16b, y_2_8x16b); + y_3_8x16b = _mm_adds_epi16(round_8x16b, y_3_8x16b); + + y_0_8x16b = _mm_srai_epi16(y_0_8x16b, log_wd); + y_1_8x16b = _mm_srai_epi16(y_1_8x16b, log_wd); + y_2_8x16b = _mm_srai_epi16(y_2_8x16b, log_wd); + y_3_8x16b = _mm_srai_epi16(y_3_8x16b, log_wd); + + y_0_8x16b = _mm_adds_epi16(ofst_8x16b, y_0_8x16b); + y_1_8x16b = _mm_adds_epi16(ofst_8x16b, y_1_8x16b); + y_2_8x16b = _mm_adds_epi16(ofst_8x16b, y_2_8x16b); + y_3_8x16b = _mm_adds_epi16(ofst_8x16b, y_3_8x16b); + + y_0_16x8b = _mm_packus_epi16(y_0_8x16b, y_1_8x16b); + y_2_16x8b = _mm_packus_epi16(y_2_8x16b, y_3_8x16b); + y_1_16x8b = _mm_srli_si128(y_0_16x8b, 8); + y_3_16x8b = _mm_srli_si128(y_2_16x8b, 8); + + _mm_storel_epi64((__m128i *)pu1_dst, y_0_16x8b); + _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), y_1_16x8b); + _mm_storel_epi64((__m128i *)(pu1_dst + (dst_strd << 1)), y_2_16x8b); + _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd * 3), y_3_16x8b); + + ht -= 4; + pu1_src += src_strd << 2; + pu1_dst += dst_strd << 2; + } + while(ht > 0); + } + else // wd == 16 + { + __m128i y_0L_8x16b, y_1L_8x16b, y_2L_8x16b, y_3L_8x16b; + __m128i y_0H_8x16b, y_1H_8x16b, y_2H_8x16b, y_3H_8x16b; + + __m128i zero_16x8b; + zero_16x8b = _mm_set1_epi8(0); + + do + { + y_0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); + y_1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd)); + y_2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + (src_strd << 1))); + y_3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd * 3)); + + y_0L_8x16b = _mm_cvtepu8_epi16(y_0_16x8b); + y_0H_8x16b = _mm_unpackhi_epi8(y_0_16x8b, zero_16x8b); + y_1L_8x16b = _mm_cvtepu8_epi16(y_1_16x8b); + y_1H_8x16b = _mm_unpackhi_epi8(y_1_16x8b, zero_16x8b); + y_2L_8x16b = _mm_cvtepu8_epi16(y_2_16x8b); + y_2H_8x16b = _mm_unpackhi_epi8(y_2_16x8b, zero_16x8b); + y_3L_8x16b = _mm_cvtepu8_epi16(y_3_16x8b); + y_3H_8x16b = _mm_unpackhi_epi8(y_3_16x8b, zero_16x8b); + + y_0L_8x16b = _mm_mullo_epi16(y_0L_8x16b, wt_8x16b); + y_0H_8x16b = _mm_mullo_epi16(y_0H_8x16b, wt_8x16b); + y_1L_8x16b = _mm_mullo_epi16(y_1L_8x16b, wt_8x16b); + y_1H_8x16b = _mm_mullo_epi16(y_1H_8x16b, wt_8x16b); + y_2L_8x16b = _mm_mullo_epi16(y_2L_8x16b, wt_8x16b); + y_2H_8x16b = _mm_mullo_epi16(y_2H_8x16b, wt_8x16b); + y_3L_8x16b = _mm_mullo_epi16(y_3L_8x16b, wt_8x16b); + y_3H_8x16b = _mm_mullo_epi16(y_3H_8x16b, wt_8x16b); + + y_0L_8x16b = _mm_adds_epi16(round_8x16b, y_0L_8x16b); + y_0H_8x16b = _mm_adds_epi16(round_8x16b, y_0H_8x16b); + y_1L_8x16b = _mm_adds_epi16(round_8x16b, y_1L_8x16b); + y_1H_8x16b = _mm_adds_epi16(round_8x16b, y_1H_8x16b); + y_2L_8x16b = _mm_adds_epi16(round_8x16b, y_2L_8x16b); + y_2H_8x16b = _mm_adds_epi16(round_8x16b, y_2H_8x16b); + y_3L_8x16b = _mm_adds_epi16(round_8x16b, y_3L_8x16b); + y_3H_8x16b = _mm_adds_epi16(round_8x16b, y_3H_8x16b); + + y_0L_8x16b = _mm_srai_epi16(y_0L_8x16b, log_wd); + y_0H_8x16b = _mm_srai_epi16(y_0H_8x16b, log_wd); + y_1L_8x16b = _mm_srai_epi16(y_1L_8x16b, log_wd); + y_1H_8x16b = _mm_srai_epi16(y_1H_8x16b, log_wd); + y_2L_8x16b = _mm_srai_epi16(y_2L_8x16b, log_wd); + y_2H_8x16b = _mm_srai_epi16(y_2H_8x16b, log_wd); + y_3L_8x16b = _mm_srai_epi16(y_3L_8x16b, log_wd); + y_3H_8x16b = _mm_srai_epi16(y_3H_8x16b, log_wd); + + y_0L_8x16b = _mm_adds_epi16(ofst_8x16b, y_0L_8x16b); + y_0H_8x16b = _mm_adds_epi16(ofst_8x16b, y_0H_8x16b); + y_1L_8x16b = _mm_adds_epi16(ofst_8x16b, y_1L_8x16b); + y_1H_8x16b = _mm_adds_epi16(ofst_8x16b, y_1H_8x16b); + y_2L_8x16b = _mm_adds_epi16(ofst_8x16b, y_2L_8x16b); + y_2H_8x16b = _mm_adds_epi16(ofst_8x16b, y_2H_8x16b); + y_3L_8x16b = _mm_adds_epi16(ofst_8x16b, y_3L_8x16b); + y_3H_8x16b = _mm_adds_epi16(ofst_8x16b, y_3H_8x16b); + + y_0_16x8b = _mm_packus_epi16(y_0L_8x16b, y_0H_8x16b); + y_1_16x8b = _mm_packus_epi16(y_1L_8x16b, y_1H_8x16b); + y_2_16x8b = _mm_packus_epi16(y_2L_8x16b, y_2H_8x16b); + y_3_16x8b = _mm_packus_epi16(y_3L_8x16b, y_3H_8x16b); + + _mm_storeu_si128((__m128i *)pu1_dst, y_0_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), y_1_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + (dst_strd << 1)), y_2_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd * 3), y_3_16x8b); + + ht -= 4; + pu1_src += src_strd << 2; + pu1_dst += dst_strd << 2; + } + while(ht > 0); + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_weighted_pred_chroma_sse42 */ +/* */ +/* Description : This function performs the weighted prediction as */ +/* described in sec 8.4.2.3.2 titled "Weighted sample */ +/* prediction process" for chroma. The function gets one */ +/* ht x wd block, weights it, rounds it off, offsets it, */ +/* saturates it to unsigned 8-bit and stores it in the */ +/* destination block. (ht,wd) can be (2,2), (4,2), (2,4), */ +/* (4,4), (8,4), (4,8) or (8,8). */ +/* */ +/* Inputs : pu1_src - Pointer to source */ +/* pu1_dst - Pointer to destination */ +/* src_strd - stride for source */ +/* dst_strd - stride for destination */ +/* log_wd - number of bits to be rounded off */ +/* wt - weight values for u and v */ +/* ofst - offset values for u and v */ +/* ht - height of the block */ +/* wd - width of the block */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 04 02 2015 Kaushik Initial Version */ +/* Senthoor */ +/* */ +/*****************************************************************************/ +void ih264_weighted_pred_chroma_sse42(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 log_wd, + WORD32 wt, + WORD32 ofst, + WORD32 ht, + WORD32 wd) +{ + __m128i y_0_16x8b, y_1_16x8b; + + __m128i wt_8x16b, round_8x16b, ofst_8x16b; + + WORD32 ofst_u, ofst_v; + WORD32 round_val; + + ofst_u = (WORD8)(ofst & 0xff); + ofst_v = (WORD8)(ofst >> 8); + round_val = 1 << (log_wd - 1); + ofst = (ofst_u & 0xffff) | (ofst_v << 16); + + wt_8x16b = _mm_set1_epi32(wt); + round_8x16b = _mm_set1_epi16(round_val); + ofst_8x16b = _mm_set1_epi32(ofst); + + if(wd == 2) + { + __m128i y_0_8x16b; + + __m128i mask_full_16x8b, mask_ll4B_16x8b; + + mask_full_16x8b = _mm_set1_epi8(0xff); + mask_ll4B_16x8b = _mm_srli_si128(mask_full_16x8b, 12); + // mask for first four bytes + + do + { + y_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src); + y_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd)); + + y_0_16x8b = _mm_unpacklo_epi32(y_0_16x8b, y_1_16x8b); + + y_0_8x16b = _mm_cvtepu8_epi16(y_0_16x8b); + + y_0_8x16b = _mm_mullo_epi16(y_0_8x16b, wt_8x16b); + + y_0_8x16b = _mm_adds_epi16(round_8x16b, y_0_8x16b); + + y_0_8x16b = _mm_srai_epi16(y_0_8x16b, log_wd); + + y_0_8x16b = _mm_adds_epi16(ofst_8x16b, y_0_8x16b); + + y_0_16x8b = _mm_packus_epi16(y_0_8x16b, y_0_8x16b); + y_1_16x8b = _mm_srli_si128(y_0_16x8b, 4); + + _mm_maskmoveu_si128(y_0_16x8b, mask_ll4B_16x8b, (char*)pu1_dst); + _mm_maskmoveu_si128(y_1_16x8b, mask_ll4B_16x8b, + (char*)(pu1_dst + dst_strd)); + + ht -= 2; + pu1_src += src_strd << 1; + pu1_dst += dst_strd << 1; + } + while(ht > 0); + } + else if(wd == 4) + { + __m128i y_0_8x16b, y_1_8x16b; + + do + { + y_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src); + y_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd)); + + y_0_8x16b = _mm_cvtepu8_epi16(y_0_16x8b); + y_1_8x16b = _mm_cvtepu8_epi16(y_1_16x8b); + + y_0_8x16b = _mm_mullo_epi16(y_0_8x16b, wt_8x16b); + y_1_8x16b = _mm_mullo_epi16(y_1_8x16b, wt_8x16b); + + y_0_8x16b = _mm_adds_epi16(round_8x16b, y_0_8x16b); + y_1_8x16b = _mm_adds_epi16(round_8x16b, y_1_8x16b); + + y_0_8x16b = _mm_srai_epi16(y_0_8x16b, log_wd); + y_1_8x16b = _mm_srai_epi16(y_1_8x16b, log_wd); + + y_0_8x16b = _mm_adds_epi16(ofst_8x16b, y_0_8x16b); + y_1_8x16b = _mm_adds_epi16(ofst_8x16b, y_1_8x16b); + + y_0_16x8b = _mm_packus_epi16(y_0_8x16b, y_1_8x16b); + y_1_16x8b = _mm_srli_si128(y_0_16x8b, 8); + + _mm_storel_epi64((__m128i *)pu1_dst, y_0_16x8b); + _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), y_1_16x8b); + + ht -= 2; + pu1_src += src_strd << 1; + pu1_dst += dst_strd << 1; + } + while(ht > 0); + } + else // wd == 16 + { + __m128i y_2_16x8b, y_3_16x8b; + __m128i y_0L_8x16b, y_1L_8x16b, y_2L_8x16b, y_3L_8x16b; + __m128i y_0H_8x16b, y_1H_8x16b, y_2H_8x16b, y_3H_8x16b; + + __m128i zero_16x8b; + zero_16x8b = _mm_set1_epi8(0); + + do + { + y_0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); + y_1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd)); + y_2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + (src_strd << 1))); + y_3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd * 3)); + + y_0L_8x16b = _mm_cvtepu8_epi16(y_0_16x8b); + y_0H_8x16b = _mm_unpackhi_epi8(y_0_16x8b, zero_16x8b); + y_1L_8x16b = _mm_cvtepu8_epi16(y_1_16x8b); + y_1H_8x16b = _mm_unpackhi_epi8(y_1_16x8b, zero_16x8b); + y_2L_8x16b = _mm_cvtepu8_epi16(y_2_16x8b); + y_2H_8x16b = _mm_unpackhi_epi8(y_2_16x8b, zero_16x8b); + y_3L_8x16b = _mm_cvtepu8_epi16(y_3_16x8b); + y_3H_8x16b = _mm_unpackhi_epi8(y_3_16x8b, zero_16x8b); + + y_0L_8x16b = _mm_mullo_epi16(y_0L_8x16b, wt_8x16b); + y_0H_8x16b = _mm_mullo_epi16(y_0H_8x16b, wt_8x16b); + y_1L_8x16b = _mm_mullo_epi16(y_1L_8x16b, wt_8x16b); + y_1H_8x16b = _mm_mullo_epi16(y_1H_8x16b, wt_8x16b); + y_2L_8x16b = _mm_mullo_epi16(y_2L_8x16b, wt_8x16b); + y_2H_8x16b = _mm_mullo_epi16(y_2H_8x16b, wt_8x16b); + y_3L_8x16b = _mm_mullo_epi16(y_3L_8x16b, wt_8x16b); + y_3H_8x16b = _mm_mullo_epi16(y_3H_8x16b, wt_8x16b); + + y_0L_8x16b = _mm_adds_epi16(round_8x16b, y_0L_8x16b); + y_0H_8x16b = _mm_adds_epi16(round_8x16b, y_0H_8x16b); + y_1L_8x16b = _mm_adds_epi16(round_8x16b, y_1L_8x16b); + y_1H_8x16b = _mm_adds_epi16(round_8x16b, y_1H_8x16b); + y_2L_8x16b = _mm_adds_epi16(round_8x16b, y_2L_8x16b); + y_2H_8x16b = _mm_adds_epi16(round_8x16b, y_2H_8x16b); + y_3L_8x16b = _mm_adds_epi16(round_8x16b, y_3L_8x16b); + y_3H_8x16b = _mm_adds_epi16(round_8x16b, y_3H_8x16b); + + y_0L_8x16b = _mm_srai_epi16(y_0L_8x16b, log_wd); + y_0H_8x16b = _mm_srai_epi16(y_0H_8x16b, log_wd); + y_1L_8x16b = _mm_srai_epi16(y_1L_8x16b, log_wd); + y_1H_8x16b = _mm_srai_epi16(y_1H_8x16b, log_wd); + y_2L_8x16b = _mm_srai_epi16(y_2L_8x16b, log_wd); + y_2H_8x16b = _mm_srai_epi16(y_2H_8x16b, log_wd); + y_3L_8x16b = _mm_srai_epi16(y_3L_8x16b, log_wd); + y_3H_8x16b = _mm_srai_epi16(y_3H_8x16b, log_wd); + + y_0L_8x16b = _mm_adds_epi16(ofst_8x16b, y_0L_8x16b); + y_0H_8x16b = _mm_adds_epi16(ofst_8x16b, y_0H_8x16b); + y_1L_8x16b = _mm_adds_epi16(ofst_8x16b, y_1L_8x16b); + y_1H_8x16b = _mm_adds_epi16(ofst_8x16b, y_1H_8x16b); + y_2L_8x16b = _mm_adds_epi16(ofst_8x16b, y_2L_8x16b); + y_2H_8x16b = _mm_adds_epi16(ofst_8x16b, y_2H_8x16b); + y_3L_8x16b = _mm_adds_epi16(ofst_8x16b, y_3L_8x16b); + y_3H_8x16b = _mm_adds_epi16(ofst_8x16b, y_3H_8x16b); + + y_0_16x8b = _mm_packus_epi16(y_0L_8x16b, y_0H_8x16b); + y_1_16x8b = _mm_packus_epi16(y_1L_8x16b, y_1H_8x16b); + y_2_16x8b = _mm_packus_epi16(y_2L_8x16b, y_2H_8x16b); + y_3_16x8b = _mm_packus_epi16(y_3L_8x16b, y_3H_8x16b); + + _mm_storeu_si128((__m128i *)pu1_dst, y_0_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), y_1_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + (dst_strd << 1)), y_2_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd * 3), y_3_16x8b); + + ht -= 4; + pu1_src += src_strd << 2; + pu1_dst += dst_strd << 2; + } + while(ht > 0); + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_weighted_bi_pred_luma_sse42 */ +/* */ +/* Description : This function performs the weighted biprediction as */ +/* described in sec 8.4.2.3.2 titled "Weighted sample */ +/* prediction process" for luma. The function gets two */ +/* ht x wd blocks, weights them, adds them, rounds off the */ +/* sum, offsets it, saturates it to unsigned 8-bit and */ +/* stores it in the destination block. (ht,wd) can be */ +/* (4,4), (8,4), (4,8), (8,8), (16,8), (8,16) or (16,16). */ +/* */ +/* Inputs : pu1_src1 - Pointer to source 1 */ +/* pu1_src2 - Pointer to source 2 */ +/* pu1_dst - Pointer to destination */ +/* src_strd1 - stride for source 1 */ +/* src_strd2 - stride for source 2 */ +/* dst_strd2 - stride for destination */ +/* log_wd - number of bits to be rounded off */ +/* wt1 - weight value for source 1 */ +/* wt2 - weight value for source 2 */ +/* ofst1 - offset value for source 1 */ +/* ofst2 - offset value for source 2 */ +/* ht - height of the block */ +/* wd - width of the block */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 04 02 2015 Kaushik Initial Version */ +/* Senthoor */ +/* */ +/*****************************************************************************/ +void ih264_weighted_bi_pred_luma_sse42(UWORD8 *pu1_src1, + UWORD8 *pu1_src2, + UWORD8 *pu1_dst, + WORD32 src_strd1, + WORD32 src_strd2, + WORD32 dst_strd, + WORD32 log_wd, + WORD32 wt1, + WORD32 wt2, + WORD32 ofst1, + WORD32 ofst2, + WORD32 ht, + WORD32 wd) +{ + __m128i y1_0_16x8b, y1_1_16x8b; + __m128i y2_0_16x8b, y2_1_16x8b; + + __m128i wt1_8x16b, wt2_8x16b; + __m128i ofst_8x16b, round_8x16b; + + WORD32 ofst; + WORD32 round_val, shft; + + wt1 = (WORD16)(wt1 & 0xffff); + wt2 = (WORD16)(wt2 & 0xffff); + round_val = 1 << log_wd; + shft = log_wd + 1; + ofst1 = (WORD8)(ofst1 & 0xff); + ofst2 = (WORD8)(ofst2 & 0xff); + ofst = (ofst1 + ofst2 + 1) >> 1; + + wt1_8x16b = _mm_set1_epi16(wt1); + wt2_8x16b = _mm_set1_epi16(wt2); + round_8x16b = _mm_set1_epi16(round_val); + ofst_8x16b = _mm_set1_epi16(ofst); + + if(wd == 4) + { + __m128i y1_2_16x8b, y1_3_16x8b; + __m128i y2_2_16x8b, y2_3_16x8b; + + __m128i y1_0_8x16b, y1_2_8x16b; + __m128i y2_0_8x16b, y2_2_8x16b; + + __m128i mask_ll4B_16x8b; + + mask_ll4B_16x8b = _mm_set1_epi8(0xff); + mask_ll4B_16x8b = _mm_srli_si128(mask_ll4B_16x8b, 12); + // mask for first four bytes + + do + { + y1_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src1); + y1_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src1 + src_strd1)); + y1_2_16x8b = _mm_loadl_epi64( + (__m128i *)(pu1_src1 + (src_strd1 << 1))); + y1_3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src1 + src_strd1 * 3)); + + y2_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src2); + y2_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src2 + src_strd2)); + y2_2_16x8b = _mm_loadl_epi64( + (__m128i *)(pu1_src2 + (src_strd2 << 1))); + y2_3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src2 + src_strd2 * 3)); + + y1_0_16x8b = _mm_unpacklo_epi32(y1_0_16x8b, y1_1_16x8b); + y1_2_16x8b = _mm_unpacklo_epi32(y1_2_16x8b, y1_3_16x8b); + y2_0_16x8b = _mm_unpacklo_epi32(y2_0_16x8b, y2_1_16x8b); + y2_2_16x8b = _mm_unpacklo_epi32(y2_2_16x8b, y2_3_16x8b); + + y1_0_8x16b = _mm_cvtepu8_epi16(y1_0_16x8b); + y1_2_8x16b = _mm_cvtepu8_epi16(y1_2_16x8b); + y2_0_8x16b = _mm_cvtepu8_epi16(y2_0_16x8b); + y2_2_8x16b = _mm_cvtepu8_epi16(y2_2_16x8b); + + y1_0_8x16b = _mm_mullo_epi16(y1_0_8x16b, wt1_8x16b); + y2_0_8x16b = _mm_mullo_epi16(y2_0_8x16b, wt2_8x16b); + y1_2_8x16b = _mm_mullo_epi16(y1_2_8x16b, wt1_8x16b); + y2_2_8x16b = _mm_mullo_epi16(y2_2_8x16b, wt2_8x16b); + + y1_0_8x16b = _mm_adds_epi16(y1_0_8x16b, y2_0_8x16b); + y1_2_8x16b = _mm_adds_epi16(y1_2_8x16b, y2_2_8x16b); + + y1_0_8x16b = _mm_adds_epi16(round_8x16b, y1_0_8x16b); + y1_2_8x16b = _mm_adds_epi16(round_8x16b, y1_2_8x16b); + + y1_0_8x16b = _mm_srai_epi16(y1_0_8x16b, shft); + y1_2_8x16b = _mm_srai_epi16(y1_2_8x16b, shft); + + y1_0_8x16b = _mm_adds_epi16(ofst_8x16b, y1_0_8x16b); + y1_2_8x16b = _mm_adds_epi16(ofst_8x16b, y1_2_8x16b); + + y1_0_16x8b = _mm_packus_epi16(y1_0_8x16b, y1_2_8x16b); + y1_1_16x8b = _mm_srli_si128(y1_0_16x8b, 4); + y1_2_16x8b = _mm_srli_si128(y1_0_16x8b, 8); + y1_3_16x8b = _mm_srli_si128(y1_0_16x8b, 12); + + _mm_maskmoveu_si128(y1_0_16x8b, mask_ll4B_16x8b, (char*)pu1_dst); + _mm_maskmoveu_si128(y1_1_16x8b, mask_ll4B_16x8b, + (char*)(pu1_dst + dst_strd)); + _mm_maskmoveu_si128(y1_2_16x8b, mask_ll4B_16x8b, + (char*)(pu1_dst + (dst_strd << 1))); + _mm_maskmoveu_si128(y1_3_16x8b, mask_ll4B_16x8b, + (char*)(pu1_dst + dst_strd * 3)); + + ht -= 4; + pu1_src1 += src_strd1 << 2; + pu1_src2 += src_strd2 << 2; + pu1_dst += dst_strd << 2; + } + while(ht > 0); + } + else if(wd == 8) + { + __m128i y1_2_16x8b, y1_3_16x8b; + __m128i y2_2_16x8b, y2_3_16x8b; + + __m128i y1_0_8x16b, y1_1_8x16b, y1_2_8x16b, y1_3_8x16b; + __m128i y2_0_8x16b, y2_1_8x16b, y2_2_8x16b, y2_3_8x16b; + + do + { + y1_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src1); + y1_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src1 + src_strd1)); + y1_2_16x8b = _mm_loadl_epi64( + (__m128i *)(pu1_src1 + (src_strd1 << 1))); + y1_3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src1 + src_strd1 * 3)); + + y2_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src2); + y2_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src2 + src_strd2)); + y2_2_16x8b = _mm_loadl_epi64( + (__m128i *)(pu1_src2 + (src_strd2 << 1))); + y2_3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src2 + src_strd2 * 3)); + + y1_0_8x16b = _mm_cvtepu8_epi16(y1_0_16x8b); + y1_1_8x16b = _mm_cvtepu8_epi16(y1_1_16x8b); + y1_2_8x16b = _mm_cvtepu8_epi16(y1_2_16x8b); + y1_3_8x16b = _mm_cvtepu8_epi16(y1_3_16x8b); + + y2_0_8x16b = _mm_cvtepu8_epi16(y2_0_16x8b); + y2_1_8x16b = _mm_cvtepu8_epi16(y2_1_16x8b); + y2_2_8x16b = _mm_cvtepu8_epi16(y2_2_16x8b); + y2_3_8x16b = _mm_cvtepu8_epi16(y2_3_16x8b); + + y1_0_8x16b = _mm_mullo_epi16(y1_0_8x16b, wt1_8x16b); + y2_0_8x16b = _mm_mullo_epi16(y2_0_8x16b, wt2_8x16b); + y1_1_8x16b = _mm_mullo_epi16(y1_1_8x16b, wt1_8x16b); + y2_1_8x16b = _mm_mullo_epi16(y2_1_8x16b, wt2_8x16b); + + y1_2_8x16b = _mm_mullo_epi16(y1_2_8x16b, wt1_8x16b); + y2_2_8x16b = _mm_mullo_epi16(y2_2_8x16b, wt2_8x16b); + y1_3_8x16b = _mm_mullo_epi16(y1_3_8x16b, wt1_8x16b); + y2_3_8x16b = _mm_mullo_epi16(y2_3_8x16b, wt2_8x16b); + + y1_0_8x16b = _mm_adds_epi16(y1_0_8x16b, y2_0_8x16b); + y1_1_8x16b = _mm_adds_epi16(y1_1_8x16b, y2_1_8x16b); + y1_2_8x16b = _mm_adds_epi16(y1_2_8x16b, y2_2_8x16b); + y1_3_8x16b = _mm_adds_epi16(y1_3_8x16b, y2_3_8x16b); + + y1_0_8x16b = _mm_adds_epi16(round_8x16b, y1_0_8x16b); + y1_1_8x16b = _mm_adds_epi16(round_8x16b, y1_1_8x16b); + y1_2_8x16b = _mm_adds_epi16(round_8x16b, y1_2_8x16b); + y1_3_8x16b = _mm_adds_epi16(round_8x16b, y1_3_8x16b); + + y1_0_8x16b = _mm_srai_epi16(y1_0_8x16b, shft); + y1_1_8x16b = _mm_srai_epi16(y1_1_8x16b, shft); + y1_2_8x16b = _mm_srai_epi16(y1_2_8x16b, shft); + y1_3_8x16b = _mm_srai_epi16(y1_3_8x16b, shft); + + y1_0_8x16b = _mm_adds_epi16(ofst_8x16b, y1_0_8x16b); + y1_1_8x16b = _mm_adds_epi16(ofst_8x16b, y1_1_8x16b); + y1_2_8x16b = _mm_adds_epi16(ofst_8x16b, y1_2_8x16b); + y1_3_8x16b = _mm_adds_epi16(ofst_8x16b, y1_3_8x16b); + + y1_0_16x8b = _mm_packus_epi16(y1_0_8x16b, y1_1_8x16b); + y1_2_16x8b = _mm_packus_epi16(y1_2_8x16b, y1_3_8x16b); + y1_1_16x8b = _mm_srli_si128(y1_0_16x8b, 8); + y1_3_16x8b = _mm_srli_si128(y1_2_16x8b, 8); + + _mm_storel_epi64((__m128i *)pu1_dst, y1_0_16x8b); + _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), y1_1_16x8b); + _mm_storel_epi64((__m128i *)(pu1_dst + (dst_strd << 1)), y1_2_16x8b); + _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd * 3), y1_3_16x8b); + + ht -= 4; + pu1_src1 += src_strd1 << 2; + pu1_src2 += src_strd2 << 2; + pu1_dst += dst_strd << 2; + } + while(ht > 0); + } + else // wd == 16 + { + __m128i y1_0L_8x16b, y1_0H_8x16b, y1_1L_8x16b, y1_1H_8x16b; + __m128i y2_0L_8x16b, y2_0H_8x16b, y2_1L_8x16b, y2_1H_8x16b; + + __m128i zero_16x8b; + zero_16x8b = _mm_set1_epi8(0); + + do + { + y1_0_16x8b = _mm_loadu_si128((__m128i *)pu1_src1); + y1_1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src1 + src_strd1)); + y2_0_16x8b = _mm_loadu_si128((__m128i *)pu1_src2); + y2_1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src2 + src_strd2)); + + y1_0L_8x16b = _mm_cvtepu8_epi16(y1_0_16x8b); + y1_0H_8x16b = _mm_unpackhi_epi8(y1_0_16x8b, zero_16x8b); + y1_1L_8x16b = _mm_cvtepu8_epi16(y1_1_16x8b); + y1_1H_8x16b = _mm_unpackhi_epi8(y1_1_16x8b, zero_16x8b); + + y2_0L_8x16b = _mm_cvtepu8_epi16(y2_0_16x8b); + y2_0H_8x16b = _mm_unpackhi_epi8(y2_0_16x8b, zero_16x8b); + y2_1L_8x16b = _mm_cvtepu8_epi16(y2_1_16x8b); + y2_1H_8x16b = _mm_unpackhi_epi8(y2_1_16x8b, zero_16x8b); + + y1_0L_8x16b = _mm_mullo_epi16(y1_0L_8x16b, wt1_8x16b); + y1_0H_8x16b = _mm_mullo_epi16(y1_0H_8x16b, wt1_8x16b); + y1_1L_8x16b = _mm_mullo_epi16(y1_1L_8x16b, wt1_8x16b); + y1_1H_8x16b = _mm_mullo_epi16(y1_1H_8x16b, wt1_8x16b); + + y2_0L_8x16b = _mm_mullo_epi16(y2_0L_8x16b, wt2_8x16b); + y2_0H_8x16b = _mm_mullo_epi16(y2_0H_8x16b, wt2_8x16b); + y2_1L_8x16b = _mm_mullo_epi16(y2_1L_8x16b, wt2_8x16b); + y2_1H_8x16b = _mm_mullo_epi16(y2_1H_8x16b, wt2_8x16b); + + y1_0L_8x16b = _mm_adds_epi16(y1_0L_8x16b, y2_0L_8x16b); + y1_0H_8x16b = _mm_adds_epi16(y1_0H_8x16b, y2_0H_8x16b); + y1_1L_8x16b = _mm_adds_epi16(y1_1L_8x16b, y2_1L_8x16b); + y1_1H_8x16b = _mm_adds_epi16(y1_1H_8x16b, y2_1H_8x16b); + + y1_0L_8x16b = _mm_adds_epi16(round_8x16b, y1_0L_8x16b); + y1_0H_8x16b = _mm_adds_epi16(round_8x16b, y1_0H_8x16b); + y1_1L_8x16b = _mm_adds_epi16(round_8x16b, y1_1L_8x16b); + y1_1H_8x16b = _mm_adds_epi16(round_8x16b, y1_1H_8x16b); + + y1_0L_8x16b = _mm_srai_epi16(y1_0L_8x16b, shft); + y1_0H_8x16b = _mm_srai_epi16(y1_0H_8x16b, shft); + y1_1L_8x16b = _mm_srai_epi16(y1_1L_8x16b, shft); + y1_1H_8x16b = _mm_srai_epi16(y1_1H_8x16b, shft); + + y1_0L_8x16b = _mm_adds_epi16(ofst_8x16b, y1_0L_8x16b); + y1_0H_8x16b = _mm_adds_epi16(ofst_8x16b, y1_0H_8x16b); + y1_1L_8x16b = _mm_adds_epi16(ofst_8x16b, y1_1L_8x16b); + y1_1H_8x16b = _mm_adds_epi16(ofst_8x16b, y1_1H_8x16b); + + y1_0_16x8b = _mm_packus_epi16(y1_0L_8x16b, y1_0H_8x16b); + y1_1_16x8b = _mm_packus_epi16(y1_1L_8x16b, y1_1H_8x16b); + + _mm_storeu_si128((__m128i *)pu1_dst, y1_0_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), y1_1_16x8b); + + ht -= 2; + pu1_src1 += src_strd1 << 1; + pu1_src2 += src_strd2 << 1; + pu1_dst += dst_strd << 1; + } + while(ht > 0); + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264_weighted_bi_pred_chroma_sse42 */ +/* */ +/* Description : This function performs the weighted biprediction as */ +/* described in sec 8.4.2.3.2 titled "Weighted sample */ +/* prediction process" for chroma. The function gets two */ +/* ht x wd blocks, weights them, adds them, rounds off the */ +/* sum, offsets it, saturates it to unsigned 8-bit and */ +/* stores it in the destination block. (ht,wd) can be */ +/* (2,2), (4,2), (2,4), (4,4), (8,4), (4,8) or (8,8). */ +/* */ +/* Inputs : pu1_src1 - Pointer to source 1 */ +/* pu1_src2 - Pointer to source 2 */ +/* pu1_dst - Pointer to destination */ +/* src_strd1 - stride for source 1 */ +/* src_strd2 - stride for source 2 */ +/* dst_strd2 - stride for destination */ +/* log_wd - number of bits to be rounded off */ +/* wt1 - weight values for u and v in source 1 */ +/* wt2 - weight values for u and v in source 2 */ +/* ofst1 - offset value for u and v in source 1 */ +/* ofst2 - offset value for u and v in source 2 */ +/* ht - height of the block */ +/* wd - width of the block */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 04 02 2015 Kaushik Initial Version */ +/* Senthoor */ +/* */ +/*****************************************************************************/ +void ih264_weighted_bi_pred_chroma_sse42(UWORD8 *pu1_src1, + UWORD8 *pu1_src2, + UWORD8 *pu1_dst, + WORD32 src_strd1, + WORD32 src_strd2, + WORD32 dst_strd, + WORD32 log_wd, + WORD32 wt1, + WORD32 wt2, + WORD32 ofst1, + WORD32 ofst2, + WORD32 ht, + WORD32 wd) +{ + __m128i y1_0_16x8b, y1_1_16x8b; + __m128i y2_0_16x8b, y2_1_16x8b; + + __m128i wt1_8x16b, wt2_8x16b; + __m128i ofst_8x16b, round_8x16b; + + WORD32 ofst1_u, ofst2_u, ofst_u; + WORD32 ofst1_v, ofst2_v, ofst_v; + WORD32 round_val, shft, ofst_val; + + round_val = 1 << log_wd; + shft = log_wd + 1; + + ofst1_u = (WORD8)(ofst1 & 0xff); + ofst1_v = (WORD8)(ofst1 >> 8); + ofst2_u = (WORD8)(ofst2 & 0xff); + ofst2_v = (WORD8)(ofst2 >> 8); + + wt1_8x16b = _mm_set1_epi32(wt1); + wt2_8x16b = _mm_set1_epi32(wt2); + + ofst_u = (ofst1_u + ofst2_u + 1) >> 1; + ofst_v = (ofst1_v + ofst2_v + 1) >> 1; + ofst_val = (ofst_u & 0xffff) | (ofst_v << 16); + + round_8x16b = _mm_set1_epi16(round_val); + ofst_8x16b = _mm_set1_epi32(ofst_val); + + if(wd == 2) + { + __m128i y1_0_8x16b, y2_0_8x16b; + + __m128i mask_full_16x8b, mask_ll4B_16x8b; + + mask_full_16x8b = _mm_set1_epi8(0xff); + mask_ll4B_16x8b = _mm_srli_si128(mask_full_16x8b, 12); + + do + { + y1_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src1); + y1_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src1 + src_strd1)); + + y2_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src2); + y2_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src2 + src_strd2)); + + y1_0_16x8b = _mm_unpacklo_epi32(y1_0_16x8b, y1_1_16x8b); + y2_0_16x8b = _mm_unpacklo_epi32(y2_0_16x8b, y2_1_16x8b); + + y1_0_8x16b = _mm_cvtepu8_epi16(y1_0_16x8b); + y2_0_8x16b = _mm_cvtepu8_epi16(y2_0_16x8b); + + y1_0_8x16b = _mm_mullo_epi16(y1_0_8x16b, wt1_8x16b); + y2_0_8x16b = _mm_mullo_epi16(y2_0_8x16b, wt2_8x16b); + + y1_0_8x16b = _mm_adds_epi16(y1_0_8x16b, y2_0_8x16b); + y1_0_8x16b = _mm_adds_epi16(round_8x16b, y1_0_8x16b); + + y1_0_8x16b = _mm_srai_epi16(y1_0_8x16b, shft); + y1_0_8x16b = _mm_adds_epi16(ofst_8x16b, y1_0_8x16b); + + y1_0_16x8b = _mm_packus_epi16(y1_0_8x16b, y1_0_8x16b); + y1_1_16x8b = _mm_srli_si128(y1_0_16x8b, 4); + + _mm_maskmoveu_si128(y1_0_16x8b, mask_ll4B_16x8b, (char*)pu1_dst); + _mm_maskmoveu_si128(y1_1_16x8b, mask_ll4B_16x8b, + (char*)(pu1_dst + dst_strd)); + + ht -= 2; + pu1_src1 += src_strd1 << 1; + pu1_src2 += src_strd2 << 1; + pu1_dst += dst_strd << 1; + } + while(ht > 0); + } + else if(wd == 4) + { + __m128i y1_0_8x16b, y1_1_8x16b; + __m128i y2_0_8x16b, y2_1_8x16b; + + do + { + y1_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src1); + y1_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src1 + src_strd1)); + + y2_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src2); + y2_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src2 + src_strd2)); + + y1_0_8x16b = _mm_cvtepu8_epi16(y1_0_16x8b); + y1_1_8x16b = _mm_cvtepu8_epi16(y1_1_16x8b); + + y2_0_8x16b = _mm_cvtepu8_epi16(y2_0_16x8b); + y2_1_8x16b = _mm_cvtepu8_epi16(y2_1_16x8b); + + y1_0_8x16b = _mm_mullo_epi16(y1_0_8x16b, wt1_8x16b); + y2_0_8x16b = _mm_mullo_epi16(y2_0_8x16b, wt2_8x16b); + y1_1_8x16b = _mm_mullo_epi16(y1_1_8x16b, wt1_8x16b); + y2_1_8x16b = _mm_mullo_epi16(y2_1_8x16b, wt2_8x16b); + + y1_0_8x16b = _mm_adds_epi16(y1_0_8x16b, y2_0_8x16b); + y1_1_8x16b = _mm_adds_epi16(y1_1_8x16b, y2_1_8x16b); + + y1_0_8x16b = _mm_adds_epi16(round_8x16b, y1_0_8x16b); + y1_1_8x16b = _mm_adds_epi16(round_8x16b, y1_1_8x16b); + + y1_0_8x16b = _mm_srai_epi16(y1_0_8x16b, shft); + y1_1_8x16b = _mm_srai_epi16(y1_1_8x16b, shft); + + y1_0_8x16b = _mm_adds_epi16(ofst_8x16b, y1_0_8x16b); + y1_1_8x16b = _mm_adds_epi16(ofst_8x16b, y1_1_8x16b); + + y1_0_16x8b = _mm_packus_epi16(y1_0_8x16b, y1_1_8x16b); + y1_1_16x8b = _mm_srli_si128(y1_0_16x8b, 8); + + _mm_storel_epi64((__m128i *)pu1_dst, y1_0_16x8b); + _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), y1_1_16x8b); + + ht -= 2; + pu1_src1 += src_strd1 << 1; + pu1_src2 += src_strd2 << 1; + pu1_dst += dst_strd << 1; + } + while(ht > 0); + } + else // wd == 8 + { + __m128i y1_0L_8x16b, y1_0H_8x16b, y1_1L_8x16b, y1_1H_8x16b; + __m128i y2_0L_8x16b, y2_0H_8x16b, y2_1L_8x16b, y2_1H_8x16b; + + __m128i zero_16x8b; + zero_16x8b = _mm_set1_epi8(0); + + do + { + y1_0_16x8b = _mm_loadu_si128((__m128i *)pu1_src1); + y1_1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src1 + src_strd1)); + y2_0_16x8b = _mm_loadu_si128((__m128i *)pu1_src2); + y2_1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src2 + src_strd2)); + + y1_0L_8x16b = _mm_cvtepu8_epi16(y1_0_16x8b); + y1_0H_8x16b = _mm_unpackhi_epi8(y1_0_16x8b, zero_16x8b); + y1_1L_8x16b = _mm_cvtepu8_epi16(y1_1_16x8b); + y1_1H_8x16b = _mm_unpackhi_epi8(y1_1_16x8b, zero_16x8b); + + y2_0L_8x16b = _mm_cvtepu8_epi16(y2_0_16x8b); + y2_0H_8x16b = _mm_unpackhi_epi8(y2_0_16x8b, zero_16x8b); + y2_1L_8x16b = _mm_cvtepu8_epi16(y2_1_16x8b); + y2_1H_8x16b = _mm_unpackhi_epi8(y2_1_16x8b, zero_16x8b); + + y1_0L_8x16b = _mm_mullo_epi16(y1_0L_8x16b, wt1_8x16b); + y1_0H_8x16b = _mm_mullo_epi16(y1_0H_8x16b, wt1_8x16b); + y1_1L_8x16b = _mm_mullo_epi16(y1_1L_8x16b, wt1_8x16b); + y1_1H_8x16b = _mm_mullo_epi16(y1_1H_8x16b, wt1_8x16b); + + y2_0L_8x16b = _mm_mullo_epi16(y2_0L_8x16b, wt2_8x16b); + y2_0H_8x16b = _mm_mullo_epi16(y2_0H_8x16b, wt2_8x16b); + y2_1L_8x16b = _mm_mullo_epi16(y2_1L_8x16b, wt2_8x16b); + y2_1H_8x16b = _mm_mullo_epi16(y2_1H_8x16b, wt2_8x16b); + + y1_0L_8x16b = _mm_adds_epi16(y1_0L_8x16b, y2_0L_8x16b); + y1_0H_8x16b = _mm_adds_epi16(y1_0H_8x16b, y2_0H_8x16b); + y1_1L_8x16b = _mm_adds_epi16(y1_1L_8x16b, y2_1L_8x16b); + y1_1H_8x16b = _mm_adds_epi16(y1_1H_8x16b, y2_1H_8x16b); + + y1_0L_8x16b = _mm_adds_epi16(round_8x16b, y1_0L_8x16b); + y1_0H_8x16b = _mm_adds_epi16(round_8x16b, y1_0H_8x16b); + y1_1L_8x16b = _mm_adds_epi16(round_8x16b, y1_1L_8x16b); + y1_1H_8x16b = _mm_adds_epi16(round_8x16b, y1_1H_8x16b); + + y1_0L_8x16b = _mm_srai_epi16(y1_0L_8x16b, shft); + y1_0H_8x16b = _mm_srai_epi16(y1_0H_8x16b, shft); + y1_1L_8x16b = _mm_srai_epi16(y1_1L_8x16b, shft); + y1_1H_8x16b = _mm_srai_epi16(y1_1H_8x16b, shft); + + y1_0L_8x16b = _mm_adds_epi16(ofst_8x16b, y1_0L_8x16b); + y1_0H_8x16b = _mm_adds_epi16(ofst_8x16b, y1_0H_8x16b); + y1_1L_8x16b = _mm_adds_epi16(ofst_8x16b, y1_1L_8x16b); + y1_1H_8x16b = _mm_adds_epi16(ofst_8x16b, y1_1H_8x16b); + + y1_0_16x8b = _mm_packus_epi16(y1_0L_8x16b, y1_0H_8x16b); + y1_1_16x8b = _mm_packus_epi16(y1_1L_8x16b, y1_1H_8x16b); + + _mm_storeu_si128((__m128i *)pu1_dst, y1_0_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), y1_1_16x8b); + + ht -= 2; + pu1_src1 += src_strd1 << 1; + pu1_src2 += src_strd2 << 1; + pu1_dst += dst_strd << 1; + } + while(ht > 0); + } +} diff --git a/decoder.arm.mk b/decoder.arm.mk new file mode 100755 index 0000000..80093e4 --- /dev/null +++ b/decoder.arm.mk @@ -0,0 +1,44 @@ +libavcd_inc_dir_arm += $(LOCAL_PATH)/decoder/arm +libavcd_inc_dir_arm += $(LOCAL_PATH)/common/arm + +libavcd_srcs_c_arm += decoder/arm/ih264d_function_selector.c +libavcd_cflags_arm += -DDISABLE_NEONINTR -DARM -DARMGCC + +#LOCAL_ARM_MODE := arm + +ifeq ($(ARCH_ARM_HAVE_NEON),true) +libavcd_srcs_c_arm += decoder/arm/ih264d_function_selector_a9q.c + +libavcd_srcs_asm_arm += common/arm/ih264_intra_pred_chroma_a9q.s +libavcd_srcs_asm_arm += common/arm/ih264_intra_pred_luma_16x16_a9q.s +libavcd_srcs_asm_arm += common/arm/ih264_intra_pred_luma_4x4_a9q.s +libavcd_srcs_asm_arm += common/arm/ih264_intra_pred_luma_8x8_a9q.s +libavcd_srcs_asm_arm += common/arm/ih264_inter_pred_chroma_a9q.s +libavcd_srcs_asm_arm += common/arm/ih264_inter_pred_filters_luma_horz_a9q.s +libavcd_srcs_asm_arm += common/arm/ih264_inter_pred_filters_luma_vert_a9q.s +libavcd_srcs_asm_arm += common/arm/ih264_inter_pred_luma_copy_a9q.s +libavcd_srcs_asm_arm += common/arm/ih264_inter_pred_luma_horz_qpel_a9q.s +libavcd_srcs_asm_arm += common/arm/ih264_inter_pred_luma_vert_qpel_a9q.s +libavcd_srcs_asm_arm += common/arm/ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q.s +libavcd_srcs_asm_arm += common/arm/ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q.s +libavcd_srcs_asm_arm += common/arm/ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q.s +libavcd_srcs_asm_arm += common/arm/ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q.s +libavcd_srcs_asm_arm += common/arm/ih264_default_weighted_pred_a9q.s +libavcd_srcs_asm_arm += common/arm/ih264_weighted_pred_a9q.s +libavcd_srcs_asm_arm += common/arm/ih264_weighted_bi_pred_a9q.s +libavcd_srcs_asm_arm += common/arm/ih264_deblk_chroma_a9.s +libavcd_srcs_asm_arm += common/arm/ih264_deblk_luma_a9.s +libavcd_srcs_asm_arm += common/arm/ih264_padding_neon.s +libavcd_srcs_asm_arm += common/arm/ih264_iquant_itrans_recon_a9.s +libavcd_srcs_asm_arm += common/arm/ih264_iquant_itrans_recon_dc_a9.s +libavcd_srcs_asm_arm += common/arm/ih264_ihadamard_scaling_a9.s +libavcd_srcs_asm_arm += common/arm/ih264_arm_memory_barrier.s + +libavcd_cflags_arm += -DDEFAULT_ARCH=D_ARCH_ARM_A9Q +else +libavcd_cflags_arm += -DDISABLE_NEON -DDEFAULT_ARCH=D_ARCH_ARM_NONEON +endif + +LOCAL_SRC_FILES_arm += $(libavcd_srcs_c_arm) $(libavcd_srcs_asm_arm) +LOCAL_C_INCLUDES_arm += $(libavcd_inc_dir_arm) +LOCAL_CFLAGS_arm += $(libavcd_cflags_arm) diff --git a/decoder.arm64.mk b/decoder.arm64.mk new file mode 100755 index 0000000..7a06163 --- /dev/null +++ b/decoder.arm64.mk @@ -0,0 +1,46 @@ +libavcd_cflags_arm64 += -DARMV8 +libavcd_cflags_arm64 += -DDISABLE_NEONINTR -DARM -DARMGCC + +libavcd_inc_dir_arm64 += $(LOCAL_PATH)/decoder/arm +libavcd_inc_dir_arm64 += $(LOCAL_PATH)/common/armv8 + +libavcd_srcs_c_arm64 += decoder/arm/ih264d_function_selector.c + +ifeq ($(ARCH_ARM_HAVE_NEON),true) +libavcd_srcs_c_arm64 += decoder/arm/ih264d_function_selector_av8.c + +libavcd_srcs_asm_arm64 += common/armv8/ih264_intra_pred_chroma_av8.s +libavcd_srcs_asm_arm64 += common/armv8/ih264_intra_pred_luma_16x16_av8.s +libavcd_srcs_asm_arm64 += common/armv8/ih264_intra_pred_luma_4x4_av8.s +libavcd_srcs_asm_arm64 += common/armv8/ih264_inter_pred_chroma_av8.s +libavcd_srcs_asm_arm64 += common/armv8/ih264_inter_pred_filters_luma_horz_av8.s +libavcd_srcs_asm_arm64 += common/armv8/ih264_inter_pred_filters_luma_vert_av8.s +libavcd_srcs_asm_arm64 += common/armv8/ih264_inter_pred_luma_copy_av8.s +libavcd_srcs_asm_arm64 += common/armv8/ih264_inter_pred_luma_horz_qpel_av8.s +libavcd_srcs_asm_arm64 += common/armv8/ih264_inter_pred_luma_vert_qpel_av8.s +libavcd_srcs_asm_arm64 += common/armv8/ih264_inter_pred_luma_horz_hpel_vert_hpel_av8.s +libavcd_srcs_asm_arm64 += common/armv8/ih264_inter_pred_luma_horz_qpel_vert_qpel_av8.s +libavcd_srcs_asm_arm64 += common/armv8/ih264_inter_pred_luma_horz_qpel_vert_hpel_av8.s +libavcd_srcs_asm_arm64 += common/armv8/ih264_inter_pred_luma_horz_hpel_vert_qpel_av8.s +libavcd_srcs_asm_arm64 += common/armv8/ih264_default_weighted_pred_av8.s +libavcd_srcs_asm_arm64 += common/armv8/ih264_weighted_pred_av8.s +libavcd_srcs_asm_arm64 += common/armv8/ih264_weighted_bi_pred_av8.s +libavcd_srcs_asm_arm64 += common/armv8/ih264_deblk_chroma_av8.s +libavcd_srcs_asm_arm64 += common/armv8/ih264_deblk_luma_av8.s +libavcd_srcs_asm_arm64 += common/armv8/ih264_padding_neon_av8.s +libavcd_srcs_asm_arm64 += common/armv8/ih264_iquant_itrans_recon_av8.s +libavcd_srcs_asm_arm64 += common/armv8/ih264_iquant_itrans_recon_dc_av8.s +libavcd_srcs_asm_arm64 += common/armv8/ih264_ihadamard_scaling_av8.s +libavcd_srcs_asm_arm64 += common/armv8/ih264_intra_pred_luma_8x8_av8.s + +libavcd_cflags_arm += -DDEFAULT_ARCH=D_ARCH_ARMV8_GENERIC +else +libavcd_cflags_arm64 += -DDISABLE_NEON -DDEFAULT_ARCH=D_ARCH_ARM_NONEON +endif + + + + +LOCAL_SRC_FILES_arm64 += $(libavcd_srcs_c_arm64) $(libavcd_srcs_asm_arm64) +LOCAL_C_INCLUDES_arm64 += $(libavcd_inc_dir_arm64) +LOCAL_CFLAGS_arm64 += $(libavcd_cflags_arm64) diff --git a/decoder.mips.mk b/decoder.mips.mk new file mode 100755 index 0000000..3d00395 --- /dev/null +++ b/decoder.mips.mk @@ -0,0 +1,6 @@ +libavcd_inc_dir_mips += $(LOCAL_PATH)/common/mips + +libavcd_srcs_c_mips += decoder/mips/ih264d_function_selector.c + +LOCAL_C_INCLUDES_mips += $(libavcd_inc_dir_mips) +LOCAL_SRC_FILES_mips += $(libavcd_srcs_c_mips) diff --git a/decoder.mips64.mk b/decoder.mips64.mk new file mode 100755 index 0000000..ffcb882 --- /dev/null +++ b/decoder.mips64.mk @@ -0,0 +1,6 @@ +libavcd_inc_dir_mips64 += $(LOCAL_PATH)/common/mips + +libavcd_srcs_c_mips64 += decoder/mips/ih264d_function_selector.c + +LOCAL_C_INCLUDES_mips64 += $(libavcd_inc_dir_mips) +LOCAL_SRC_FILES_mips64 += $(libavcd_srcs_c_mips) diff --git a/decoder.mk b/decoder.mk new file mode 100755 index 0000000..d4fa0be --- /dev/null +++ b/decoder.mk @@ -0,0 +1,76 @@ +LOCAL_PATH := $(call my-dir) +include $(CLEAR_VARS) + +libavcd_source_dir := $(LOCAL_PATH) + +## Arch-common settings +LOCAL_MODULE := libavcdec +#LOCAL_32_BIT_ONLY := true + +LOCAL_MODULE_CLASS := STATIC_LIBRARIES + +LOCAL_CFLAGS += -D_LIB -DMULTICORE -fPIC -UAPPLY_CONCEALMENT -UINSERT_LOGO -DTHREAD_QUAD_CORE +LOCAL_CFLAGS += -O3 -DANDROID + +LOCAL_C_INCLUDES := $(LOCAL_PATH)/decoder $(LOCAL_PATH)/common + +libavcd_srcs_c += common/ih264_buf_mgr.c +libavcd_srcs_c += common/ih264_disp_mgr.c +libavcd_srcs_c += common/ih264_inter_pred_filters.c +libavcd_srcs_c += common/ih264_luma_intra_pred_filters.c +libavcd_srcs_c += common/ih264_chroma_intra_pred_filters.c +libavcd_srcs_c += common/ih264_padding.c +libavcd_srcs_c += common/ih264_mem_fns.c +libavcd_srcs_c += common/ih264_deblk_edge_filters.c +libavcd_srcs_c += common/ih264_iquant_itrans_recon.c +libavcd_srcs_c += common/ih264_ihadamard_scaling.c +libavcd_srcs_c += common/ih264_weighted_pred.c + +libavcd_srcs_c += common/ithread.c + +libavcd_srcs_c += decoder/ih264d_cabac.c +libavcd_srcs_c += decoder/ih264d_parse_mb_header.c +libavcd_srcs_c += decoder/ih264d_parse_cabac.c +libavcd_srcs_c += decoder/ih264d_process_intra_mb.c +libavcd_srcs_c += decoder/ih264d_inter_pred.c +libavcd_srcs_c += decoder/ih264d_parse_bslice.c +libavcd_srcs_c += decoder/ih264d_parse_pslice.c +libavcd_srcs_c += decoder/ih264d_parse_islice.c +libavcd_srcs_c += decoder/ih264d_cabac_init_tables.c +libavcd_srcs_c += decoder/ih264d_debug.c +libavcd_srcs_c += decoder/ih264d_bitstrm.c +libavcd_srcs_c += decoder/ih264d_compute_bs.c +libavcd_srcs_c += decoder/ih264d_deblocking.c +libavcd_srcs_c += decoder/ih264d_parse_headers.c +libavcd_srcs_c += decoder/ih264d_mb_utils.c +libavcd_srcs_c += decoder/ih264d_mvpred.c +libavcd_srcs_c += decoder/ih264d_utils.c +libavcd_srcs_c += decoder/ih264d_process_bslice.c +libavcd_srcs_c += decoder/ih264d_process_pslice.c +libavcd_srcs_c += decoder/ih264d_parse_slice.c +libavcd_srcs_c += decoder/ih264d_quant_scaling.c +libavcd_srcs_c += decoder/ih264d_parse_cavlc.c +libavcd_srcs_c += decoder/ih264d_dpb_mgr.c +libavcd_srcs_c += decoder/ih264d_nal.c +libavcd_srcs_c += decoder/ih264d_sei.c +libavcd_srcs_c += decoder/ih264d_tables.c +libavcd_srcs_c += decoder/ih264d_vui.c +libavcd_srcs_c += decoder/ih264d_format_conv.c +libavcd_srcs_c += decoder/ih264d_thread_parse_decode.c +libavcd_srcs_c += decoder/ih264d_api.c +libavcd_srcs_c += decoder/ih264d_thread_compute_bs.c +libavcd_srcs_c += decoder/ih264d_function_selector_generic.c + + +LOCAL_SRC_FILES := $(libavcd_srcs_c) $(libavcd_srcs_asm) + + +# Load the arch-specific settings +include $(LOCAL_PATH)/decoder.arm.mk +include $(LOCAL_PATH)/decoder.arm64.mk +include $(LOCAL_PATH)/decoder.x86.mk +include $(LOCAL_PATH)/decoder.x86_64.mk +include $(LOCAL_PATH)/decoder.mips.mk +include $(LOCAL_PATH)/decoder.mips64.mk + +include $(BUILD_STATIC_LIBRARY) diff --git a/decoder.x86.mk b/decoder.x86.mk new file mode 100755 index 0000000..309bc23 --- /dev/null +++ b/decoder.x86.mk @@ -0,0 +1,26 @@ +libavcd_cflags_x86 += -DX86 -DDISABLE_AVX2 -m32 -msse4.2 -mno-avx -DDEFAULT_ARCH=D_ARCH_X86_SSE42 + +libavcd_inc_dir_x86 += $(LOCAL_PATH)/decoder/x86 +libavcd_inc_dir_x86 += $(LOCAL_PATH)/common/x86 + +libavcd_srcs_c_x86 += decoder/x86/ih264d_function_selector.c +libavcd_srcs_c_x86 += decoder/x86/ih264d_function_selector_sse42.c +libavcd_srcs_c_x86 += decoder/x86/ih264d_function_selector_ssse3.c + +libavcd_srcs_c_x86 += common/x86/ih264_inter_pred_filters_ssse3.c +libavcd_srcs_c_x86 += common/x86/ih264_deblk_luma_ssse3.c +libavcd_srcs_c_x86 += common/x86/ih264_deblk_chroma_ssse3.c +libavcd_srcs_c_x86 += common/x86/ih264_padding_ssse3.c +libavcd_srcs_c_x86 += common/x86/ih264_mem_fns_ssse3.c +libavcd_srcs_c_x86 += common/x86/ih264_iquant_itrans_recon_dc_ssse3.c +libavcd_srcs_c_x86 += common/x86/ih264_iquant_itrans_recon_ssse3.c +libavcd_srcs_c_x86 += common/x86/ih264_luma_intra_pred_filters_ssse3.c +libavcd_srcs_c_x86 += common/x86/ih264_chroma_intra_pred_filters_ssse3.c +libavcd_srcs_c_x86 += common/x86/ih264_iquant_itrans_recon_sse42.c +libavcd_srcs_c_x86 += common/x86/ih264_weighted_pred_sse42.c +libavcd_srcs_c_x86 += common/x86/ih264_ihadamard_scaling_sse42.c + +LOCAL_SRC_FILES_x86 += $(libavcd_srcs_c_x86) $(libavcd_srcs_asm_x86) +LOCAL_C_INCLUDES_x86 += $(libavcd_inc_dir_x86) +LOCAL_CFLAGS_x86 += $(libavcd_cflags_x86) + diff --git a/decoder.x86_64.mk b/decoder.x86_64.mk new file mode 100755 index 0000000..1b018f7 --- /dev/null +++ b/decoder.x86_64.mk @@ -0,0 +1,30 @@ +libavcd_cflags_x86_64 += -DX86 -DDISABLE_AVX2 -m64 -msse4.2 -mno-avx -DDEFAULT_ARCH=D_ARCH_X86_SSE42 +libavcd_cflags_x86_64 += -UAPPLY_CONCEALMENT -ULOGO_EN -DTHREAD_QUAD_CORE + +libavcd_inc_dir_x86_64 += $(LOCAL_PATH)/decoder/x86 +libavcd_inc_dir_x86_64 += $(LOCAL_PATH)/common/x86 + +libavcd_srcs_c_x86_64 += decoder/x86/ih264d_function_selector.c +libavcd_srcs_c_x86_64 += decoder/x86/ih264d_function_selector_sse42.c +libavcd_srcs_c_x86_64 += decoder/x86/ih264d_function_selector_ssse3.c + +libavcd_srcs_c_x86_64 += common/x86/ih264_inter_pred_filters_ssse3.c +libavcd_srcs_c_x86_64 += common/x86/ih264_deblk_luma_ssse3.c +libavcd_srcs_c_x86_64 += common/x86/ih264_deblk_chroma_ssse3.c +libavcd_srcs_c_x86_64 += common/x86/ih264_padding_ssse3.c +libavcd_srcs_c_x86_64 += common/x86/ih264_mem_fns_ssse3.c +libavcd_srcs_c_x86_64 += common/x86/ih264_iquant_itrans_recon_dc_ssse3.c +libavcd_srcs_c_x86_64 += common/x86/ih264_iquant_itrans_recon_ssse3.c +libavcd_srcs_c_x86_64 += common/x86/ih264_luma_intra_pred_filters_ssse3.c +libavcd_srcs_c_x86_64 += common/x86/ih264_chroma_intra_pred_filters_ssse3.c +libavcd_srcs_c_x86_64 += common/x86/ih264_iquant_itrans_recon_sse42.c +libavcd_srcs_c_x86_64 += common/x86/ih264_weighted_pred_sse42.c +libavcd_srcs_c_x86_64 += common/x86/ih264_ihadamard_scaling_sse42.c + + +LOCAL_SRC_FILES_x86_64 += $(libavcd_srcs_c_x86_64) $(libavcd_srcs_asm_x86_64) +LOCAL_C_INCLUDES_x86_64 += $(libavcd_inc_dir_x86_64) +LOCAL_CFLAGS_x86_64 += $(libavcd_cflags_x86_64) + + + diff --git a/decoder/arm/ih264d_function_selector.c b/decoder/arm/ih264d_function_selector.c new file mode 100755 index 0000000..1aa0c43 --- /dev/null +++ b/decoder/arm/ih264d_function_selector.c @@ -0,0 +1,101 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ihevcd_function_selector.c +* +* @brief +* Contains functions to initialize function pointers used in hevc +* +* @author +* Naveen +* +* @par List of Functions: +* @remarks +* None +* +******************************************************************************* +*/ +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +#include "ih264_typedefs.h" +#include "iv.h" +#include "ivd.h" +#include "ih264_defs.h" +#include "ih264_size_defs.h" +#include "ih264_error.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include "ih264_inter_pred_filters.h" + +#include "ih264d_structs.h" +#include "ih264d_function_selector.h" + +void ih264d_init_function_ptr(dec_struct_t *ps_codec) +{ + + IVD_ARCH_T e_proc_arch = ps_codec->e_processor_arch; + ih264d_init_function_ptr_generic(ps_codec); + switch(e_proc_arch) + { + case ARCH_ARM_NONEON: + ih264d_init_function_ptr_generic(ps_codec); + break; +#ifndef ARMV8 + case ARCH_ARM_A5: + case ARCH_ARM_A7: + case ARCH_ARM_A9: + case ARCH_ARM_A15: + case ARCH_ARM_A9Q: + default: + ih264d_init_function_ptr_a9q(ps_codec); + break; +#else /* ARMV8 */ + case ARCH_ARMV8_GENERIC: + default: + ih264d_init_function_ptr_av8(ps_codec); + break; +#endif /* ARMV8 */ + } +} + +void ih264d_init_arch(dec_struct_t *ps_codec) +{ +#ifdef DEFAULT_ARCH +#if DEFAULT_ARCH == D_ARCH_ARM_NONEON + ps_codec->e_processor_arch = ARCH_ARM_NONEON; +#elif DEFAULT_ARCH == D_ARCH_ARMV8_GENERIC + ps_codec->e_processor_arch = ARCH_ARMV8_GENERIC; +#elif DEFAULT_ARCH == D_ARCH_ARM_NEONINTR + ps_codec->e_processor_arch = ARCH_ARM_NEONINTR; +#else + ps_codec->e_processor_arch = ARCH_ARM_A9Q; +#endif +#else + ps_codec->e_processor_arch = ARCH_ARM_A9Q; +#endif + +} diff --git a/decoder/arm/ih264d_function_selector_a9q.c b/decoder/arm/ih264d_function_selector_a9q.c new file mode 100755 index 0000000..0cf8581 --- /dev/null +++ b/decoder/arm/ih264d_function_selector_a9q.c @@ -0,0 +1,200 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ih264e_function_selector_a9q.c +* +* @brief +* Contains functions to initialize function pointers of codec context +* +* @author +* Ittiam +* +* @par List of Functions: +* - ih264e_init_function_ptr_a9q +* +* @remarks +* None +* +******************************************************************************* +*/ + + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System Include files */ +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +/* User Include files */ +#include "ih264_typedefs.h" +#include "iv.h" +#include "ivd.h" +#include "ih264_defs.h" +#include "ih264_size_defs.h" +#include "ih264_error.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include "ih264_inter_pred_filters.h" + +#include "ih264d_structs.h" + + +/** +******************************************************************************* +* +* @brief Initialize the intra/inter/transform/deblk function pointers of +* codec context +* +* @par Description: the current routine initializes the function pointers of +* codec context basing on the architecture in use +* +* @param[in] ps_codec +* Codec context pointer +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void ih264d_init_function_ptr_a9q(dec_struct_t *ps_codec) +{ + + /* Init function pointers for intra pred leaf level functions luma + * Intra 16x16 */ + ps_codec->apf_intra_pred_luma_16x16[0] = ih264_intra_pred_luma_16x16_mode_vert_a9q; + ps_codec->apf_intra_pred_luma_16x16[1] = ih264_intra_pred_luma_16x16_mode_horz_a9q; + ps_codec->apf_intra_pred_luma_16x16[2] = ih264_intra_pred_luma_16x16_mode_dc_a9q; + ps_codec->apf_intra_pred_luma_16x16[3] = ih264_intra_pred_luma_16x16_mode_plane_a9q; + + /* Init function pointers for intra pred leaf level functions luma + * Intra 4x4 */ + ps_codec->apf_intra_pred_luma_4x4[0] = ih264_intra_pred_luma_4x4_mode_vert_a9q; + ps_codec->apf_intra_pred_luma_4x4[1] = ih264_intra_pred_luma_4x4_mode_horz_a9q; + ps_codec->apf_intra_pred_luma_4x4[2] = ih264_intra_pred_luma_4x4_mode_dc_a9q; + ps_codec->apf_intra_pred_luma_4x4[3] = ih264_intra_pred_luma_4x4_mode_diag_dl_a9q; + ps_codec->apf_intra_pred_luma_4x4[4] = ih264_intra_pred_luma_4x4_mode_diag_dr_a9q; + ps_codec->apf_intra_pred_luma_4x4[5] = ih264_intra_pred_luma_4x4_mode_vert_r_a9q; + ps_codec->apf_intra_pred_luma_4x4[6] = ih264_intra_pred_luma_4x4_mode_horz_d_a9q; + ps_codec->apf_intra_pred_luma_4x4[7] = ih264_intra_pred_luma_4x4_mode_vert_l_a9q; + ps_codec->apf_intra_pred_luma_4x4[8] = ih264_intra_pred_luma_4x4_mode_horz_u_a9q; + + /* Init function pointers for intra pred leaf level functions luma + * Intra 8x8 */ + ps_codec->apf_intra_pred_luma_8x8[0] = ih264_intra_pred_luma_8x8_mode_vert_a9q; + ps_codec->apf_intra_pred_luma_8x8[1] = ih264_intra_pred_luma_8x8_mode_horz_a9q; + ps_codec->apf_intra_pred_luma_8x8[2] = ih264_intra_pred_luma_8x8_mode_dc_a9q; + ps_codec->apf_intra_pred_luma_8x8[3] = ih264_intra_pred_luma_8x8_mode_diag_dl_a9q; + ps_codec->apf_intra_pred_luma_8x8[4] = ih264_intra_pred_luma_8x8_mode_diag_dr_a9q; + ps_codec->apf_intra_pred_luma_8x8[5] = ih264_intra_pred_luma_8x8_mode_vert_r_a9q; + ps_codec->apf_intra_pred_luma_8x8[6] = ih264_intra_pred_luma_8x8_mode_horz_d_a9q; + ps_codec->apf_intra_pred_luma_8x8[7] = ih264_intra_pred_luma_8x8_mode_vert_l_a9q; + ps_codec->apf_intra_pred_luma_8x8[8] = ih264_intra_pred_luma_8x8_mode_horz_u_a9q; + + /* ih264_intra_pred_luma_8x8_mode_ref_filtering_a9q does not handle all availibilities */ + ps_codec->pf_intra_pred_ref_filtering = ih264_intra_pred_luma_8x8_mode_ref_filtering; + + /* Init function pointers for intra pred leaf level functions chroma + * Intra 8x8 */ + ps_codec->apf_intra_pred_chroma[0] = ih264_intra_pred_chroma_8x8_mode_vert_a9q; + ps_codec->apf_intra_pred_chroma[1] = ih264_intra_pred_chroma_8x8_mode_horz_a9q; + /* ih264_intra_pred_chroma_8x8_mode_dc_a9q does not support interlaced clips, hence using C */ + ps_codec->apf_intra_pred_chroma[2] = ih264_intra_pred_chroma_8x8_mode_dc; + ps_codec->apf_intra_pred_chroma[3] = ih264_intra_pred_chroma_8x8_mode_plane_a9q; + + + ps_codec->pf_default_weighted_pred_luma = ih264_default_weighted_pred_luma_a9q; + ps_codec->pf_default_weighted_pred_chroma = ih264_default_weighted_pred_chroma_a9q; + ps_codec->pf_weighted_pred_luma = ih264_weighted_pred_luma_a9q; + ps_codec->pf_weighted_pred_chroma = ih264_weighted_pred_chroma_a9q; + ps_codec->pf_weighted_bi_pred_luma = ih264_weighted_bi_pred_luma_a9q; + ps_codec->pf_weighted_bi_pred_chroma = ih264_weighted_bi_pred_chroma_a9q; + + /* Padding Functions */ + ps_codec->pf_pad_top = ih264_pad_top_a9q; + ps_codec->pf_pad_bottom = ih264_pad_bottom; + + ps_codec->pf_pad_left_luma = ih264_pad_left_luma_a9q; + ps_codec->pf_pad_right_luma = ih264_pad_right_luma_a9q; + ps_codec->pf_pad_left_chroma = ih264_pad_left_chroma_a9q; + ps_codec->pf_pad_right_chroma = ih264_pad_right_chroma_a9q; + + ps_codec->pf_iquant_itrans_recon_luma_4x4 = ih264_iquant_itrans_recon_4x4_a9; + ps_codec->pf_iquant_itrans_recon_luma_4x4_dc = ih264_iquant_itrans_recon_4x4_dc_a9; + ps_codec->pf_iquant_itrans_recon_luma_8x8 = ih264_iquant_itrans_recon_8x8_a9; + ps_codec->pf_iquant_itrans_recon_luma_8x8_dc = ih264_iquant_itrans_recon_8x8_dc_a9; + ps_codec->pf_ihadamard_scaling_4x4 = ih264_ihadamard_scaling_4x4_a9; + + + ps_codec->pf_iquant_itrans_recon_chroma_4x4 = ih264_iquant_itrans_recon_chroma_4x4_a9; + ps_codec->pf_iquant_itrans_recon_chroma_4x4_dc = ih264_iquant_itrans_recon_chroma_4x4_dc_a9; + + /* Init fn ptr luma deblocking */ + ps_codec->pf_deblk_luma_vert_bs4 = ih264_deblk_luma_vert_bs4_a9; + ps_codec->pf_deblk_luma_vert_bslt4 = ih264_deblk_luma_vert_bslt4_a9; + ps_codec->pf_deblk_luma_vert_bs4_mbaff = ih264_deblk_luma_vert_bs4_mbaff_a9; + ps_codec->pf_deblk_luma_vert_bslt4_mbaff = ih264_deblk_luma_vert_bslt4_mbaff_a9; + + ps_codec->pf_deblk_luma_horz_bs4 = ih264_deblk_luma_horz_bs4_a9; + ps_codec->pf_deblk_luma_horz_bslt4 = ih264_deblk_luma_horz_bslt4_a9; + + /* Init fn ptr chroma deblocking */ + ps_codec->pf_deblk_chroma_vert_bs4 = ih264_deblk_chroma_vert_bs4_a9; + ps_codec->pf_deblk_chroma_vert_bslt4 = ih264_deblk_chroma_vert_bslt4_a9; + ps_codec->pf_deblk_chroma_vert_bs4_mbaff = ih264_deblk_chroma_vert_bs4_mbaff_a9; + ps_codec->pf_deblk_chroma_vert_bslt4_mbaff = ih264_deblk_chroma_vert_bslt4_mbaff_a9; + + ps_codec->pf_deblk_chroma_horz_bs4 = ih264_deblk_chroma_horz_bs4_a9; + ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4_a9; + + + /* Inter pred leaf level functions */ + ps_codec->apf_inter_pred_luma[0] = ih264_inter_pred_luma_copy_a9q; + + ps_codec->apf_inter_pred_luma[1] = ih264_inter_pred_luma_horz_qpel_a9q; + ps_codec->apf_inter_pred_luma[2] = ih264_inter_pred_luma_horz_a9q; + ps_codec->apf_inter_pred_luma[3] = ih264_inter_pred_luma_horz_qpel_a9q; + ps_codec->apf_inter_pred_luma[4] = ih264_inter_pred_luma_vert_qpel_a9q; + + ps_codec->apf_inter_pred_luma[5] = ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q; + + ps_codec->apf_inter_pred_luma[6] = ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q; + + ps_codec->apf_inter_pred_luma[7] = ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q; + + ps_codec->apf_inter_pred_luma[8] = ih264_inter_pred_luma_vert_a9q; + ps_codec->apf_inter_pred_luma[9] = ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q; + ps_codec->apf_inter_pred_luma[10] = ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q; + ps_codec->apf_inter_pred_luma[11] = ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q; + ps_codec->apf_inter_pred_luma[12] = ih264_inter_pred_luma_vert_qpel_a9q; + ps_codec->apf_inter_pred_luma[13] = ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q; + ps_codec->apf_inter_pred_luma[14] = ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q; + ps_codec->apf_inter_pred_luma[15] = ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q; + + ps_codec->pf_inter_pred_chroma = ih264_inter_pred_chroma_a9q; + + + return; +} diff --git a/decoder/arm/ih264d_function_selector_av8.c b/decoder/arm/ih264d_function_selector_av8.c new file mode 100755 index 0000000..5715ee0 --- /dev/null +++ b/decoder/arm/ih264d_function_selector_av8.c @@ -0,0 +1,191 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ih264e_function_selector_av8.c +* +* @brief +* Contains functions to initialize function pointers of codec context +* +* @author +* Ittiam +* +* @par List of Functions: +* - ih264e_init_function_ptr_av8 +* +* @remarks +* None +* +******************************************************************************* +*/ + + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System Include files */ +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +/* User Include files */ +#include "ih264_typedefs.h" +#include "iv.h" +#include "ivd.h" +#include "ih264_defs.h" +#include "ih264_size_defs.h" +#include "ih264_error.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include "ih264_inter_pred_filters.h" + +#include "ih264d_structs.h" +#include "ih264d_function_selector.h" + + +/** +******************************************************************************* +* +* @brief Initialize the intra/inter/transform/deblk function pointers of +* codec context +* +* @par Description: the current routine initializes the function pointers of +* codec context basing on the architecture in use +* +* @param[in] ps_codec +* Codec context pointer +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void ih264d_init_function_ptr_av8(dec_struct_t *ps_codec) +{ + /* Init function pointers for intra pred leaf level functions luma + * Intra 16x16 */ + ps_codec->apf_intra_pred_luma_16x16[0] = ih264_intra_pred_luma_16x16_mode_vert_av8; + ps_codec->apf_intra_pred_luma_16x16[1] = ih264_intra_pred_luma_16x16_mode_horz_av8; + ps_codec->apf_intra_pred_luma_16x16[2] = ih264_intra_pred_luma_16x16_mode_dc_av8; + ps_codec->apf_intra_pred_luma_16x16[3] = ih264_intra_pred_luma_16x16_mode_plane_av8; + + /* Init function pointers for intra pred leaf level functions luma + * Intra 4x4 */ + ps_codec->apf_intra_pred_luma_4x4[0] = ih264_intra_pred_luma_4x4_mode_vert_av8; + ps_codec->apf_intra_pred_luma_4x4[1] = ih264_intra_pred_luma_4x4_mode_horz_av8; + ps_codec->apf_intra_pred_luma_4x4[2] = ih264_intra_pred_luma_4x4_mode_dc_av8; + ps_codec->apf_intra_pred_luma_4x4[3] = ih264_intra_pred_luma_4x4_mode_diag_dl_av8; + ps_codec->apf_intra_pred_luma_4x4[4] = ih264_intra_pred_luma_4x4_mode_diag_dr_av8; + ps_codec->apf_intra_pred_luma_4x4[5] = ih264_intra_pred_luma_4x4_mode_vert_r_av8; + ps_codec->apf_intra_pred_luma_4x4[6] = ih264_intra_pred_luma_4x4_mode_horz_d_av8; + ps_codec->apf_intra_pred_luma_4x4[7] = ih264_intra_pred_luma_4x4_mode_vert_l_av8; + ps_codec->apf_intra_pred_luma_4x4[8] = ih264_intra_pred_luma_4x4_mode_horz_u_av8; + + /* Init function pointers for intra pred leaf level functions luma + * Intra 8x8 */ + ps_codec->apf_intra_pred_luma_8x8[0] = ih264_intra_pred_luma_8x8_mode_vert_av8; + ps_codec->apf_intra_pred_luma_8x8[1] = ih264_intra_pred_luma_8x8_mode_horz_av8; + ps_codec->apf_intra_pred_luma_8x8[2] = ih264_intra_pred_luma_8x8_mode_dc_av8; + ps_codec->apf_intra_pred_luma_8x8[3] = ih264_intra_pred_luma_8x8_mode_diag_dl_av8; + ps_codec->apf_intra_pred_luma_8x8[4] = ih264_intra_pred_luma_8x8_mode_diag_dr_av8; + ps_codec->apf_intra_pred_luma_8x8[5] = ih264_intra_pred_luma_8x8_mode_vert_r_av8; + ps_codec->apf_intra_pred_luma_8x8[6] = ih264_intra_pred_luma_8x8_mode_horz_d_av8; + ps_codec->apf_intra_pred_luma_8x8[7] = ih264_intra_pred_luma_8x8_mode_vert_l_av8; + ps_codec->apf_intra_pred_luma_8x8[8] = ih264_intra_pred_luma_8x8_mode_horz_u_av8; + + ps_codec->pf_intra_pred_ref_filtering = ih264_intra_pred_luma_8x8_mode_ref_filtering; + + /* Init function pointers for intra pred leaf level functions chroma + * Intra 8x8 */ + ps_codec->apf_intra_pred_chroma[0] = ih264_intra_pred_chroma_8x8_mode_vert_av8; + ps_codec->apf_intra_pred_chroma[1] = ih264_intra_pred_chroma_8x8_mode_horz_av8; + /* ih264_intra_pred_chroma_8x8_mode_dc_av8 does not support interlaced clips, hence using C */ + ps_codec->apf_intra_pred_chroma[2] = ih264_intra_pred_chroma_8x8_mode_dc; + ps_codec->apf_intra_pred_chroma[3] = ih264_intra_pred_chroma_8x8_mode_plane_av8; + + ps_codec->pf_default_weighted_pred_luma = ih264_default_weighted_pred_luma_av8; + ps_codec->pf_default_weighted_pred_chroma = ih264_default_weighted_pred_chroma_av8; + ps_codec->pf_weighted_pred_luma = ih264_weighted_pred_luma_av8; + ps_codec->pf_weighted_pred_chroma = ih264_weighted_pred_chroma_av8; + ps_codec->pf_weighted_bi_pred_luma = ih264_weighted_bi_pred_luma_av8; + ps_codec->pf_weighted_bi_pred_chroma = ih264_weighted_bi_pred_chroma_av8; + + /* Padding Functions */ + ps_codec->pf_pad_top = ih264_pad_top_av8; + ps_codec->pf_pad_bottom = ih264_pad_bottom; + ps_codec->pf_pad_left_luma = ih264_pad_left_luma_av8; + ps_codec->pf_pad_left_chroma = ih264_pad_left_chroma_av8; + ps_codec->pf_pad_right_luma = ih264_pad_right_luma_av8; + ps_codec->pf_pad_right_chroma = ih264_pad_right_chroma_av8; + + + ps_codec->pf_iquant_itrans_recon_luma_4x4 = ih264_iquant_itrans_recon_4x4_av8; + ps_codec->pf_iquant_itrans_recon_luma_4x4_dc = ih264_iquant_itrans_recon_4x4_dc_av8; + ps_codec->pf_iquant_itrans_recon_luma_8x8 = ih264_iquant_itrans_recon_8x8_av8; + ps_codec->pf_iquant_itrans_recon_luma_8x8_dc = ih264_iquant_itrans_recon_8x8_dc_av8; + ps_codec->pf_iquant_itrans_recon_chroma_4x4 = ih264_iquant_itrans_recon_chroma_4x4_av8; + ps_codec->pf_iquant_itrans_recon_chroma_4x4_dc = ih264_iquant_itrans_recon_chroma_4x4_dc_av8; + ps_codec->pf_ihadamard_scaling_4x4 = ih264_ihadamard_scaling_4x4_av8; + + + /* Init fn ptr luma deblocking */ + ps_codec->pf_deblk_luma_vert_bs4 = ih264_deblk_luma_vert_bs4_av8; + ps_codec->pf_deblk_luma_vert_bslt4 = ih264_deblk_luma_vert_bslt4_av8; + ps_codec->pf_deblk_luma_vert_bs4_mbaff = ih264_deblk_luma_vert_bs4_mbaff; + ps_codec->pf_deblk_luma_vert_bslt4_mbaff = ih264_deblk_luma_vert_bslt4_mbaff; + + ps_codec->pf_deblk_luma_horz_bs4 = ih264_deblk_luma_horz_bs4_av8; + ps_codec->pf_deblk_luma_horz_bslt4 = ih264_deblk_luma_horz_bslt4_av8; + + /* Init fn ptr chroma deblocking */ + ps_codec->pf_deblk_chroma_vert_bs4 = ih264_deblk_chroma_vert_bs4_av8; + ps_codec->pf_deblk_chroma_vert_bslt4 = ih264_deblk_chroma_vert_bslt4_av8; + ps_codec->pf_deblk_chroma_vert_bs4_mbaff = ih264_deblk_chroma_vert_bs4_mbaff; + ps_codec->pf_deblk_chroma_vert_bslt4_mbaff = ih264_deblk_chroma_vert_bslt4_mbaff; + + ps_codec->pf_deblk_chroma_horz_bs4 = ih264_deblk_chroma_horz_bs4_av8; + ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4_av8; + + /* Inter pred leaf level functions */ + ps_codec->apf_inter_pred_luma[0] = ih264_inter_pred_luma_copy_av8; + ps_codec->apf_inter_pred_luma[1] = ih264_inter_pred_luma_horz_qpel_av8; + ps_codec->apf_inter_pred_luma[2] = ih264_inter_pred_luma_horz_av8; + ps_codec->apf_inter_pred_luma[3] = ih264_inter_pred_luma_horz_qpel_av8; + ps_codec->apf_inter_pred_luma[4] = ih264_inter_pred_luma_vert_qpel_av8; + ps_codec->apf_inter_pred_luma[5] = ih264_inter_pred_luma_horz_qpel_vert_qpel_av8; + ps_codec->apf_inter_pred_luma[6] = ih264_inter_pred_luma_horz_hpel_vert_qpel_av8; + ps_codec->apf_inter_pred_luma[7] = ih264_inter_pred_luma_horz_qpel_vert_qpel_av8; + ps_codec->apf_inter_pred_luma[8] = ih264_inter_pred_luma_vert_av8; + ps_codec->apf_inter_pred_luma[9] = ih264_inter_pred_luma_horz_qpel_vert_hpel_av8; + ps_codec->apf_inter_pred_luma[10] = ih264_inter_pred_luma_horz_hpel_vert_hpel_av8; + ps_codec->apf_inter_pred_luma[11] = ih264_inter_pred_luma_horz_qpel_vert_hpel_av8; + ps_codec->apf_inter_pred_luma[12] = ih264_inter_pred_luma_vert_qpel_av8; + ps_codec->apf_inter_pred_luma[13] = ih264_inter_pred_luma_horz_qpel_vert_qpel_av8; + ps_codec->apf_inter_pred_luma[14] = ih264_inter_pred_luma_horz_hpel_vert_qpel_av8; + ps_codec->apf_inter_pred_luma[15] = ih264_inter_pred_luma_horz_qpel_vert_qpel_av8; + + ps_codec->pf_inter_pred_chroma = ih264_inter_pred_chroma_av8; + + + return; +} diff --git a/decoder/ih264d.h b/decoder/ih264d.h new file mode 100755 index 0000000..f89e576 --- /dev/null +++ b/decoder/ih264d.h @@ -0,0 +1,482 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*****************************************************************************/ +/* */ +/* File Name : ih264d.h */ +/* */ +/* Description : This file contains all the necessary structure and */ +/* enumeration definitions needed for the Application */ +/* Program Interface(API) of the Ittiam H264 ASP */ +/* Decoder on Cortex A8 - Neon platform */ +/* */ +/* List of Functions : ih264d_api_function */ +/* */ +/* Issues / Problems : None */ +/* */ +/* Revision History : */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 26 08 2010 100239(RCY) Draft */ +/* */ +/*****************************************************************************/ + +#ifndef _IH264D_H_ +#define _IH264D_H_ +#ifdef __cplusplus +extern "C" { +#endif + +#include "iv.h" +#include "ivd.h" + + +/*****************************************************************************/ +/* Constant Macros */ +/*****************************************************************************/ + +/*****************************************************************************/ +/* Function Macros */ +/*****************************************************************************/ +#define IS_IVD_CONCEALMENT_APPLIED(x) (x & (1 << IVD_APPLIEDCONCEALMENT)) +#define IS_IVD_INSUFFICIENTDATA_ERROR(x) (x & (1 << IVD_INSUFFICIENTDATA)) +#define IS_IVD_CORRUPTEDDATA_ERROR(x) (x & (1 << IVD_CORRUPTEDDATA)) +#define IS_IVD_CORRUPTEDHEADER_ERROR(x) (x & (1 << IVD_CORRUPTEDHEADER)) +#define IS_IVD_UNSUPPORTEDINPUT_ERROR(x) (x & (1 << IVD_UNSUPPORTEDINPUT)) +#define IS_IVD_UNSUPPORTEDPARAM_ERROR(x) (x & (1 << IVD_UNSUPPORTEDPARAM)) +#define IS_IVD_FATAL_ERROR(x) (x & (1 << IVD_FATALERROR)) +#define IS_IVD_INVALID_BITSTREAM_ERROR(x) (x & (1 << IVD_INVALID_BITSTREAM)) +#define IS_IVD_INCOMPLETE_BITSTREAM_ERROR(x) (x & (1 << IVD_INCOMPLETE_BITSTREAM)) + + +/*****************************************************************************/ +/* API Function Prototype */ +/*****************************************************************************/ +IV_API_CALL_STATUS_T ih264d_api_function(iv_obj_t *ps_handle, void *pv_api_ip,void *pv_api_op); + +/*****************************************************************************/ +/* Enums */ +/*****************************************************************************/ +/* Codec Error codes for H264 ASP Decoder */ + +typedef enum { + + IH264D_VID_HDR_DEC_NUM_FRM_BUF_NOT_SUFFICIENT = IVD_DUMMY_ELEMENT_FOR_CODEC_EXTENSIONS + 1, + +}IH264D_ERROR_CODES_T; + +/*****************************************************************************/ +/* Extended Structures */ +/*****************************************************************************/ + +/*****************************************************************************/ +/* Get Number of Memory Records */ +/*****************************************************************************/ + + +typedef struct { + iv_num_mem_rec_ip_t s_ivd_num_mem_rec_ip_t; +}ih264d_num_mem_rec_ip_t; + + +typedef struct{ + iv_num_mem_rec_op_t s_ivd_num_mem_rec_op_t; +}ih264d_num_mem_rec_op_t; + + +/*****************************************************************************/ +/* Fill Memory Records */ +/*****************************************************************************/ + + +typedef struct { + iv_fill_mem_rec_ip_t s_ivd_fill_mem_rec_ip_t; + WORD32 i4_level; + UWORD32 u4_num_reorder_frames; + UWORD32 u4_num_ref_frames; + UWORD32 u4_share_disp_buf; + + /* format in which codec has to give out frame data for display */ + IV_COLOR_FORMAT_T e_output_format; + + /* Number of extra display buffers that will be allocated to handle display pipeline depth */ + UWORD32 u4_num_extra_disp_buf; + +}ih264d_fill_mem_rec_ip_t; + + +typedef struct{ + iv_fill_mem_rec_op_t s_ivd_fill_mem_rec_op_t; + +}ih264d_fill_mem_rec_op_t; + +/*****************************************************************************/ +/* Retrieve Memory Records */ +/*****************************************************************************/ + + +typedef struct { + iv_retrieve_mem_rec_ip_t s_ivd_retrieve_mem_rec_ip_t; +}ih264d_retrieve_mem_rec_ip_t; + + +typedef struct{ + iv_retrieve_mem_rec_op_t s_ivd_retrieve_mem_rec_op_t; +}ih264d_retrieve_mem_rec_op_t; + + +/*****************************************************************************/ +/* Initialize decoder */ +/*****************************************************************************/ + + +typedef struct { + ivd_init_ip_t s_ivd_init_ip_t; + WORD32 i4_level; + UWORD32 u4_num_reorder_frames; + UWORD32 u4_num_ref_frames; + UWORD32 u4_share_disp_buf; + /* Number of extra display buffers that will be allocated to handle display pipeline depth */ + UWORD32 u4_num_extra_disp_buf; + +}ih264d_init_ip_t; + + +typedef struct{ + ivd_init_op_t s_ivd_init_op_t; +}ih264d_init_op_t; + + +/*****************************************************************************/ +/* Video Decode */ +/*****************************************************************************/ + + +typedef struct { + ivd_video_decode_ip_t s_ivd_video_decode_ip_t; +}ih264d_video_decode_ip_t; + + +typedef struct{ + ivd_video_decode_op_t s_ivd_video_decode_op_t; +}ih264d_video_decode_op_t; + + +/*****************************************************************************/ +/* Get Display Frame */ +/*****************************************************************************/ + + +typedef struct +{ + ivd_get_display_frame_ip_t s_ivd_get_display_frame_ip_t; +}ih264d_get_display_frame_ip_t; + + +typedef struct +{ + ivd_get_display_frame_op_t s_ivd_get_display_frame_op_t; +}ih264d_get_display_frame_op_t; + +/*****************************************************************************/ +/* Set Display Frame */ +/*****************************************************************************/ + + +typedef struct +{ + ivd_set_display_frame_ip_t s_ivd_set_display_frame_ip_t; +}ih264d_set_display_frame_ip_t; + + +typedef struct +{ + ivd_set_display_frame_op_t s_ivd_set_display_frame_op_t; +}ih264d_set_display_frame_op_t; + +/*****************************************************************************/ +/* Release Display Buffers */ +/*****************************************************************************/ + + +typedef struct +{ + ivd_rel_display_frame_ip_t s_ivd_rel_display_frame_ip_t; +}ih264d_rel_display_frame_ip_t; + + +typedef struct +{ + ivd_rel_display_frame_op_t s_ivd_rel_display_frame_op_t; +}ih264d_rel_display_frame_op_t; + + +typedef enum { + /** Set number of cores/threads to be used */ + IH264D_CMD_CTL_SET_NUM_CORES = IVD_CMD_CTL_CODEC_SUBCMD_START, + + /** Set processor details */ + IH264D_CMD_CTL_SET_PROCESSOR = IVD_CMD_CTL_CODEC_SUBCMD_START + 0x001, + + /** Get display buffer dimensions */ + IH264D_CMD_CTL_GET_BUFFER_DIMENSIONS = IVD_CMD_CTL_CODEC_SUBCMD_START + 0x100, + + /** Get VUI parameters */ + IH264D_CMD_CTL_GET_VUI_PARAMS = IVD_CMD_CTL_CODEC_SUBCMD_START + 0x101, + + /** Enable/disable GPU, supported on select platforms */ + IH264D_CMD_CTL_GPU_ENABLE_DISABLE = IVD_CMD_CTL_CODEC_SUBCMD_START + 0x200, + + /** Set degrade level */ + IH264D_CMD_CTL_DEGRADE = IVD_CMD_CTL_CODEC_SUBCMD_START + 0x300 +}IH264D_CMD_CTL_SUB_CMDS; +/*****************************************************************************/ +/* Video control Flush */ +/*****************************************************************************/ + + +typedef struct{ + ivd_ctl_flush_ip_t s_ivd_ctl_flush_ip_t; +}ih264d_ctl_flush_ip_t; + + +typedef struct{ + ivd_ctl_flush_op_t s_ivd_ctl_flush_op_t; +}ih264d_ctl_flush_op_t; + +/*****************************************************************************/ +/* Video control reset */ +/*****************************************************************************/ + + +typedef struct{ + ivd_ctl_reset_ip_t s_ivd_ctl_reset_ip_t; +}ih264d_ctl_reset_ip_t; + + +typedef struct{ + ivd_ctl_reset_op_t s_ivd_ctl_reset_op_t; +}ih264d_ctl_reset_op_t; + + +/*****************************************************************************/ +/* Video control Set Params */ +/*****************************************************************************/ + + +typedef struct { + ivd_ctl_set_config_ip_t s_ivd_ctl_set_config_ip_t; +}ih264d_ctl_set_config_ip_t; + + +typedef struct{ + ivd_ctl_set_config_op_t s_ivd_ctl_set_config_op_t; +}ih264d_ctl_set_config_op_t; + +/*****************************************************************************/ +/* Video control:Get Buf Info */ +/*****************************************************************************/ + + +typedef struct{ + ivd_ctl_getbufinfo_ip_t s_ivd_ctl_getbufinfo_ip_t; +}ih264d_ctl_getbufinfo_ip_t; + + + +typedef struct{ + ivd_ctl_getbufinfo_op_t s_ivd_ctl_getbufinfo_op_t; +}ih264d_ctl_getbufinfo_op_t; + + +/*****************************************************************************/ +/* Video control:Getstatus Call */ +/*****************************************************************************/ + + +typedef struct{ + ivd_ctl_getstatus_ip_t s_ivd_ctl_getstatus_ip_t; +}ih264d_ctl_getstatus_ip_t; + + + +typedef struct{ + ivd_ctl_getstatus_op_t s_ivd_ctl_getstatus_op_t; +}ih264d_ctl_getstatus_op_t; + + +/*****************************************************************************/ +/* Video control:Get Version Info */ +/*****************************************************************************/ + + +typedef struct{ + ivd_ctl_getversioninfo_ip_t s_ivd_ctl_getversioninfo_ip_t; +}ih264d_ctl_getversioninfo_ip_t; + + + +typedef struct{ + ivd_ctl_getversioninfo_op_t s_ivd_ctl_getversioninfo_op_t; +}ih264d_ctl_getversioninfo_op_t; + +typedef struct{ + + /** + * u4_size + */ + UWORD32 u4_size; + + /** + * cmd + */ + IVD_API_COMMAND_TYPE_T e_cmd; + + /** + * sub_cmd + */ + IVD_CONTROL_API_COMMAND_TYPE_T e_sub_cmd; + + /** + * Pictures that are are degraded + * 0 : No degrade + * 1 : Only on non-reference frames + * 2 : Use interval specified by u4_nondegrade_interval + * 3 : All non-key frames + * 4 : All frames + */ + WORD32 i4_degrade_pics; + + /** + * Interval for pictures which are completely decoded without any degradation + */ + WORD32 i4_nondegrade_interval; + + /** + * bit position (lsb is zero): Type of degradation + * 1 : Disable deblocking + * 2 : Faster inter prediction filters + * 3 : Fastest inter prediction filters + */ + WORD32 i4_degrade_type; + +}ih264d_ctl_degrade_ip_t; + +typedef struct +{ + /** + * u4_size + */ + UWORD32 u4_size; + + /** + * error_code + */ + UWORD32 u4_error_code; +}ih264d_ctl_degrade_op_t; + +typedef struct{ + UWORD32 u4_size; + IVD_API_COMMAND_TYPE_T e_cmd; + IVD_CONTROL_API_COMMAND_TYPE_T e_sub_cmd; + UWORD32 u4_disable_deblk_level; +}ih264d_ctl_disable_deblock_ip_t; + +typedef struct{ + UWORD32 u4_size; + UWORD32 u4_error_code; +}ih264d_ctl_disable_deblock_op_t; + + +typedef struct{ + UWORD32 u4_size; + IVD_API_COMMAND_TYPE_T e_cmd; + IVD_CONTROL_API_COMMAND_TYPE_T e_sub_cmd; + UWORD32 u4_num_cores; +}ih264d_ctl_set_num_cores_ip_t; + +typedef struct{ + UWORD32 u4_size; + UWORD32 u4_error_code; +}ih264d_ctl_set_num_cores_op_t; + +typedef struct +{ + /** + * i4_size + */ + UWORD32 u4_size; + /** + * cmd + */ + IVD_API_COMMAND_TYPE_T e_cmd; + /** + * sub cmd + */ + IVD_CONTROL_API_COMMAND_TYPE_T e_sub_cmd; + /** + * Processor type + */ + UWORD32 u4_arch; + /** + * SOC type + */ + UWORD32 u4_soc; + + /** + * num_cores + */ + UWORD32 u4_num_cores; + +}ih264d_ctl_set_processor_ip_t; + +typedef struct +{ + /** + * i4_size + */ + UWORD32 u4_size; + /** + * error_code + */ + UWORD32 u4_error_code; +}ih264d_ctl_set_processor_op_t; + +typedef struct{ + UWORD32 u4_size; + IVD_API_COMMAND_TYPE_T e_cmd; + IVD_CONTROL_API_COMMAND_TYPE_T e_sub_cmd; +}ih264d_ctl_get_frame_dimensions_ip_t; + + +typedef struct{ + UWORD32 u4_size; + UWORD32 u4_error_code; + UWORD32 u4_x_offset[3]; + UWORD32 u4_y_offset[3]; + UWORD32 u4_disp_wd[3]; + UWORD32 u4_disp_ht[3]; + UWORD32 u4_buffer_wd[3]; + UWORD32 u4_buffer_ht[3]; +}ih264d_ctl_get_frame_dimensions_op_t; + +#ifdef __cplusplus +} /* closing brace for extern "C" */ +#endif +#endif /* _IH264D_H_ */ diff --git a/decoder/ih264d_api.c b/decoder/ih264d_api.c new file mode 100755 index 0000000..67ef5bb --- /dev/null +++ b/decoder/ih264d_api.c @@ -0,0 +1,4680 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/*****************************************************************************/ +/* */ +/* File Name : ih264d_api.c */ +/* */ +/* Description : Has all API related functions */ +/* */ +/* */ +/* List of Functions : api_check_struct_sanity */ +/* ih264d_set_processor */ +/* ih264d_get_num_rec */ +/* ih264d_init_decoder */ +/* ih264d_init_video_decoder */ +/* ih264d_fill_num_mem_rec */ +/* ih264d_clr */ +/* ih264d_init */ +/* ih264d_map_error */ +/* ih264d_video_decode */ +/* ih264d_get_version */ +/* ih264d_get_display_frame */ +/* ih264d_set_display_frame */ +/* ih264d_set_flush_mode */ +/* ih264d_get_status */ +/* ih264d_get_buf_info */ +/* ih264d_set_params */ +/* ih264d_set_default_params */ +/* ih264d_reset */ +/* ih264d_ctl */ +/* ih264d_rel_display_frame */ +/* ih264d_set_degrade */ +/* ih264d_get_frame_dimensions */ +/* ih264d_set_num_cores */ +/* ih264d_fill_output_struct_from_context */ +/* ih264d_api_function */ +/* */ +/* Issues / Problems : None */ +/* */ +/* Revision History : */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 14 10 2008 100356(SKV) Draft */ +/* */ +/*****************************************************************************/ +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264d_tables.h" +#include "iv.h" +#include "ivd.h" +#include "ih264d.h" +#include "ih264d_defs.h" + +#include <string.h> +#include <limits.h> +#include <stddef.h> + +#include "ih264d_inter_pred.h" + +#include "ih264d_structs.h" +#include "ih264d_nal.h" +#include "ih264d_error_handler.h" + +#include "ih264d_defs.h" + +#include "ithread.h" +#include "ih264d_parse_slice.h" +#include "ih264d_function_selector.h" +#include "ih264_error.h" +#include "ih264_disp_mgr.h" +#include "ih264_buf_mgr.h" +#include "ih264d_deblocking.h" +#include "ih264d_parse_cavlc.h" +#include "ih264d_parse_cabac.h" +#include "ih264d_utils.h" +#include "ih264d_format_conv.h" +#include "ih264d_parse_headers.h" +#include <assert.h> + + +/*********************/ +/* Codec Versioning */ +/*********************/ +//Move this to where it is used +#define CODEC_NAME "H264VDEC" +#define CODEC_RELEASE_TYPE "production" +#define CODEC_RELEASE_VER "04.00" +#define CODEC_VENDOR "ITTIAM" +#define MAXVERSION_STRLEN 511 +#define VERSION(version_string, codec_name, codec_release_type, codec_release_ver, codec_vendor) \ + strncpy(version_string,"@(#)Id:", MAXVERSION_STRLEN); \ + strncat(version_string,codec_name, MAXVERSION_STRLEN); \ + strncat(version_string,"_", MAXVERSION_STRLEN); \ + strncat(version_string,codec_release_type, MAXVERSION_STRLEN); \ + strncat(version_string," Ver:", MAXVERSION_STRLEN); \ + strncat(version_string,codec_release_ver, MAXVERSION_STRLEN); \ + strncat(version_string," Released by ", MAXVERSION_STRLEN); \ + strncat(version_string,codec_vendor, MAXVERSION_STRLEN); \ + strncat(version_string," Build: ", MAXVERSION_STRLEN); \ + strncat(version_string,__DATE__, MAXVERSION_STRLEN); \ + strncat(version_string," @ ", MAXVERSION_STRLEN); \ + strncat(version_string,__TIME__, MAXVERSION_STRLEN); + +#define MAX_NAL_UNIT_SIZE MAX((H264_MAX_FRAME_HEIGHT * H264_MAX_FRAME_HEIGHT),MIN_NALUNIT_SIZE) +#define MIN_NALUNIT_SIZE 200000 +#define FMT_CONV_NUM_ROWS 4 + +#define MIN_IN_BUFS 1 +#define MIN_OUT_BUFS_420 3 +#define MIN_OUT_BUFS_422ILE 1 +#define MIN_OUT_BUFS_RGB565 1 +#define MIN_OUT_BUFS_420SP 2 +#define MIN_IN_BUF_SIZE (2*1024*1024) // Currently, i4_size set to 500kb, CHECK LATER + +#define NUM_FRAMES_LIMIT_ENABLED 0 + +#if NUM_FRAMES_LIMIT_ENABLED +#define NUM_FRAMES_LIMIT 10000 +#else +#define NUM_FRAMES_LIMIT 0x7FFFFFFF +#endif + + +UWORD32 ih264d_get_extra_mem_external(UWORD32 width, UWORD32 height); +WORD32 ih264d_get_frame_dimensions(iv_obj_t *dec_hdl, + void *pv_api_ip, + void *pv_api_op); +WORD32 ih264d_set_num_cores(iv_obj_t *dec_hdl, void *pv_api_ip, void *pv_api_op); + +WORD32 ih264d_deblock_display(dec_struct_t *ps_dec); + +void ih264d_signal_decode_thread(dec_struct_t *ps_dec); + +void ih264d_signal_bs_deblk_thread(dec_struct_t *ps_dec); +void ih264d_decode_picture_thread(dec_struct_t *ps_dec); + +WORD32 ih264d_set_degrade(iv_obj_t *ps_codec_obj, + void *pv_api_ip, + void *pv_api_op); + +void ih264d_fill_output_struct_from_context(dec_struct_t *ps_dec, + ivd_video_decode_op_t *ps_dec_op); + +static IV_API_CALL_STATUS_T api_check_struct_sanity(iv_obj_t *ps_handle, + void *pv_api_ip, + void *pv_api_op) +{ + IVD_API_COMMAND_TYPE_T e_cmd; + UWORD32 *pu4_api_ip; + UWORD32 *pu4_api_op; + UWORD32 i, j; + + if(NULL == pv_api_op) + return (IV_FAIL); + + if(NULL == pv_api_ip) + return (IV_FAIL); + + pu4_api_ip = (UWORD32 *)pv_api_ip; + pu4_api_op = (UWORD32 *)pv_api_op; + e_cmd = *(pu4_api_ip + 1); + + /* error checks on handle */ + switch((WORD32)e_cmd) + { + case IV_CMD_GET_NUM_MEM_REC: + case IV_CMD_FILL_NUM_MEM_REC: + break; + case IV_CMD_INIT: + if(ps_handle == NULL) + { + *(pu4_api_op + 1) |= 1 << IVD_UNSUPPORTEDPARAM; + *(pu4_api_op + 1) |= IVD_HANDLE_NULL; + return IV_FAIL; + } + + if(ps_handle->u4_size != sizeof(iv_obj_t)) + { + *(pu4_api_op + 1) |= 1 << IVD_UNSUPPORTEDPARAM; + *(pu4_api_op + 1) |= IVD_HANDLE_STRUCT_SIZE_INCORRECT; + H264_DEC_DEBUG_PRINT( + "Sizes do not match. Expected: %d, Got: %d", + sizeof(iv_obj_t), ps_handle->u4_size); + return IV_FAIL; + } + break; + case IVD_CMD_REL_DISPLAY_FRAME: + case IVD_CMD_SET_DISPLAY_FRAME: + case IVD_CMD_GET_DISPLAY_FRAME: + case IVD_CMD_VIDEO_DECODE: + case IV_CMD_RETRIEVE_MEMREC: + case IVD_CMD_VIDEO_CTL: + if(ps_handle == NULL) + { + *(pu4_api_op + 1) |= 1 << IVD_UNSUPPORTEDPARAM; + *(pu4_api_op + 1) |= IVD_HANDLE_NULL; + return IV_FAIL; + } + + if(ps_handle->u4_size != sizeof(iv_obj_t)) + { + *(pu4_api_op + 1) |= 1 << IVD_UNSUPPORTEDPARAM; + *(pu4_api_op + 1) |= IVD_HANDLE_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if(ps_handle->pv_fxns != ih264d_api_function) + { + *(pu4_api_op + 1) |= 1 << IVD_UNSUPPORTEDPARAM; + *(pu4_api_op + 1) |= IVD_INVALID_HANDLE_NULL; + return IV_FAIL; + } + + if(ps_handle->pv_codec_handle == NULL) + { + *(pu4_api_op + 1) |= 1 << IVD_UNSUPPORTEDPARAM; + *(pu4_api_op + 1) |= IVD_INVALID_HANDLE_NULL; + return IV_FAIL; + } + break; + default: + *(pu4_api_op + 1) |= 1 << IVD_UNSUPPORTEDPARAM; + *(pu4_api_op + 1) |= IVD_INVALID_API_CMD; + return IV_FAIL; + } + + switch((WORD32)e_cmd) + { + case IV_CMD_GET_NUM_MEM_REC: + { + ih264d_num_mem_rec_ip_t *ps_ip = + (ih264d_num_mem_rec_ip_t *)pv_api_ip; + ih264d_num_mem_rec_op_t *ps_op = + (ih264d_num_mem_rec_op_t *)pv_api_op; + ps_op->s_ivd_num_mem_rec_op_t.u4_error_code = 0; + + if(ps_ip->s_ivd_num_mem_rec_ip_t.u4_size + != sizeof(ih264d_num_mem_rec_ip_t)) + { + ps_op->s_ivd_num_mem_rec_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_num_mem_rec_op_t.u4_error_code |= + IVD_IP_API_STRUCT_SIZE_INCORRECT; + return (IV_FAIL); + } + + if(ps_op->s_ivd_num_mem_rec_op_t.u4_size + != sizeof(ih264d_num_mem_rec_op_t)) + { + ps_op->s_ivd_num_mem_rec_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_num_mem_rec_op_t.u4_error_code |= + IVD_OP_API_STRUCT_SIZE_INCORRECT; + return (IV_FAIL); + } + } + break; + case IV_CMD_FILL_NUM_MEM_REC: + { + ih264d_fill_mem_rec_ip_t *ps_ip = + (ih264d_fill_mem_rec_ip_t *)pv_api_ip; + ih264d_fill_mem_rec_op_t *ps_op = + (ih264d_fill_mem_rec_op_t *)pv_api_op; + iv_mem_rec_t *ps_mem_rec; + WORD32 max_wd = ps_ip->s_ivd_fill_mem_rec_ip_t.u4_max_frm_wd; + WORD32 max_ht = ps_ip->s_ivd_fill_mem_rec_ip_t.u4_max_frm_ht; + + max_wd = ((max_wd + 15) >> 4) << 4; + max_ht = ((max_ht + 15) >> 4) << 4; + + ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code = 0; + + if((ps_ip->s_ivd_fill_mem_rec_ip_t.u4_size + > sizeof(ih264d_fill_mem_rec_ip_t)) + || (ps_ip->s_ivd_fill_mem_rec_ip_t.u4_size + < sizeof(iv_fill_mem_rec_ip_t))) + { + ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= + IVD_IP_API_STRUCT_SIZE_INCORRECT; + return (IV_FAIL); + } + + if((ps_op->s_ivd_fill_mem_rec_op_t.u4_size + != sizeof(ih264d_fill_mem_rec_op_t)) + && (ps_op->s_ivd_fill_mem_rec_op_t.u4_size + != sizeof(iv_fill_mem_rec_op_t))) + { + ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= + IVD_OP_API_STRUCT_SIZE_INCORRECT; + return (IV_FAIL); + } + + if(max_wd < H264_MIN_FRAME_WIDTH) + { + ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= + IVD_REQUESTED_WIDTH_NOT_SUPPPORTED; + return (IV_FAIL); + } + + if(max_wd > H264_MAX_FRAME_WIDTH) + { + ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= + IVD_REQUESTED_WIDTH_NOT_SUPPPORTED; + return (IV_FAIL); + } + + if(max_ht < H264_MIN_FRAME_HEIGHT) + { + ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= + IVD_REQUESTED_HEIGHT_NOT_SUPPPORTED; + return (IV_FAIL); + } + + if((max_ht * max_wd) + > (H264_MAX_FRAME_HEIGHT * H264_MAX_FRAME_WIDTH)) + + { + ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= + IVD_REQUESTED_HEIGHT_NOT_SUPPPORTED; + return (IV_FAIL); + } + + if(NULL == ps_ip->s_ivd_fill_mem_rec_ip_t.pv_mem_rec_location) + { + ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= + IVD_NUM_REC_NOT_SUFFICIENT; + return (IV_FAIL); + } + + /* check memrecords sizes are correct */ + ps_mem_rec = ps_ip->s_ivd_fill_mem_rec_ip_t.pv_mem_rec_location; + for(i = 0; i < MEM_REC_CNT; i++) + { + if(ps_mem_rec[i].u4_size != sizeof(iv_mem_rec_t)) + { + ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= + IVD_MEM_REC_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + } + } + break; + + case IV_CMD_INIT: + { + ih264d_init_ip_t *ps_ip = (ih264d_init_ip_t *)pv_api_ip; + ih264d_init_op_t *ps_op = (ih264d_init_op_t *)pv_api_op; + iv_mem_rec_t *ps_mem_rec; + WORD32 max_wd = ps_ip->s_ivd_init_ip_t.u4_frm_max_wd; + WORD32 max_ht = ps_ip->s_ivd_init_ip_t.u4_frm_max_ht; + + max_wd = ((max_wd + 15) >> 4) << 4; + max_ht = ((max_ht + 15) >> 4) << 4; + + ps_op->s_ivd_init_op_t.u4_error_code = 0; + + if((ps_ip->s_ivd_init_ip_t.u4_size > sizeof(ih264d_init_ip_t)) + || (ps_ip->s_ivd_init_ip_t.u4_size + < sizeof(ivd_init_ip_t))) + { + ps_op->s_ivd_init_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_init_op_t.u4_error_code |= + IVD_IP_API_STRUCT_SIZE_INCORRECT; + H264_DEC_DEBUG_PRINT("\n"); + return (IV_FAIL); + } + + if((ps_op->s_ivd_init_op_t.u4_size != sizeof(ih264d_init_op_t)) + && (ps_op->s_ivd_init_op_t.u4_size + != sizeof(ivd_init_op_t))) + { + ps_op->s_ivd_init_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_init_op_t.u4_error_code |= + IVD_OP_API_STRUCT_SIZE_INCORRECT; + H264_DEC_DEBUG_PRINT("\n"); + return (IV_FAIL); + } + + if(ps_ip->s_ivd_init_ip_t.u4_num_mem_rec != MEM_REC_CNT) + { + ps_op->s_ivd_init_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_init_op_t.u4_error_code |= + IVD_INIT_DEC_NOT_SUFFICIENT; + H264_DEC_DEBUG_PRINT("\n"); + return (IV_FAIL); + } + + if(max_wd < H264_MIN_FRAME_WIDTH) + { + ps_op->s_ivd_init_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_init_op_t.u4_error_code |= + IVD_INIT_DEC_WIDTH_NOT_SUPPPORTED; + H264_DEC_DEBUG_PRINT("\n"); + return (IV_FAIL); + } + + if(max_wd > H264_MAX_FRAME_WIDTH) + { + ps_op->s_ivd_init_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_init_op_t.u4_error_code |= + IVD_INIT_DEC_WIDTH_NOT_SUPPPORTED; + H264_DEC_DEBUG_PRINT("\n"); + return (IV_FAIL); + } + + if(max_ht < H264_MIN_FRAME_HEIGHT) + { + ps_op->s_ivd_init_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_init_op_t.u4_error_code |= + IVD_INIT_DEC_HEIGHT_NOT_SUPPPORTED; + H264_DEC_DEBUG_PRINT("\n"); + return (IV_FAIL); + } + + if((max_ht * max_wd) + > (H264_MAX_FRAME_HEIGHT * H264_MAX_FRAME_WIDTH)) + + { + ps_op->s_ivd_init_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_init_op_t.u4_error_code |= + IVD_INIT_DEC_HEIGHT_NOT_SUPPPORTED; + H264_DEC_DEBUG_PRINT("\n"); + return (IV_FAIL); + } + + if(NULL == ps_ip->s_ivd_init_ip_t.pv_mem_rec_location) + { + ps_op->s_ivd_init_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_init_op_t.u4_error_code |= + IVD_NUM_REC_NOT_SUFFICIENT; + H264_DEC_DEBUG_PRINT("\n"); + return (IV_FAIL); + } + + if((ps_ip->s_ivd_init_ip_t.e_output_format != IV_YUV_420P) + && (ps_ip->s_ivd_init_ip_t.e_output_format + != IV_YUV_422ILE) + && (ps_ip->s_ivd_init_ip_t.e_output_format + != IV_RGB_565) + && (ps_ip->s_ivd_init_ip_t.e_output_format + != IV_YUV_420SP_UV) + && (ps_ip->s_ivd_init_ip_t.e_output_format + != IV_YUV_420SP_VU)) + { + ps_op->s_ivd_init_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_init_op_t.u4_error_code |= + IVD_INIT_DEC_COL_FMT_NOT_SUPPORTED; + H264_DEC_DEBUG_PRINT("\n"); + return (IV_FAIL); + } + + /* verify number of mem records */ + if(ps_ip->s_ivd_init_ip_t.u4_num_mem_rec < MEM_REC_CNT) + { + ps_op->s_ivd_init_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_init_op_t.u4_error_code |= + IVD_INIT_DEC_MEM_REC_NOT_SUFFICIENT; + H264_DEC_DEBUG_PRINT("\n"); + return IV_FAIL; + } + + ps_mem_rec = ps_ip->s_ivd_init_ip_t.pv_mem_rec_location; + /* check memrecords sizes are correct */ + for(i = 0; i < ps_ip->s_ivd_init_ip_t.u4_num_mem_rec; i++) + { + if(ps_mem_rec[i].u4_size != sizeof(iv_mem_rec_t)) + { + ps_op->s_ivd_init_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_init_op_t.u4_error_code |= + IVD_MEM_REC_STRUCT_SIZE_INCORRECT; + H264_DEC_DEBUG_PRINT("i: %d\n", i); + return IV_FAIL; + } + /* check memrecords pointers are not NULL */ + + if(ps_mem_rec[i].pv_base == NULL) + { + + ps_op->s_ivd_init_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_init_op_t.u4_error_code |= + IVD_INIT_DEC_MEM_REC_BASE_NULL; + H264_DEC_DEBUG_PRINT("i: %d\n", i); + return IV_FAIL; + + } + + } + + /* verify memtabs for overlapping regions */ + { + void *start[MEM_REC_CNT]; + void *end[MEM_REC_CNT]; + + start[0] = (void *)(ps_mem_rec[0].pv_base); + end[0] = (void *)((UWORD8 *)ps_mem_rec[0].pv_base + + ps_mem_rec[0].u4_mem_size - 1); + for(i = 1; i < MEM_REC_CNT; i++) + { + /* This array is populated to check memtab overlapp */ + start[i] = (void *)(ps_mem_rec[i].pv_base); + end[i] = (void *)((UWORD8 *)ps_mem_rec[i].pv_base + + ps_mem_rec[i].u4_mem_size - 1); + + for(j = 0; j < i; j++) + { + if((start[i] >= start[j]) && (start[i] <= end[j])) + { + ps_op->s_ivd_init_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_init_op_t.u4_error_code |= + IVD_INIT_DEC_MEM_REC_OVERLAP_ERR; + H264_DEC_DEBUG_PRINT("i: %d, j: %d\n", i, j); + return IV_FAIL; + } + + if((end[i] >= start[j]) && (end[i] <= end[j])) + { + ps_op->s_ivd_init_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_init_op_t.u4_error_code |= + IVD_INIT_DEC_MEM_REC_OVERLAP_ERR; + H264_DEC_DEBUG_PRINT("i: %d, j: %d\n", i, j); + return IV_FAIL; + } + + if((start[i] < start[j]) && (end[i] > end[j])) + { + ps_op->s_ivd_init_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_init_op_t.u4_error_code |= + IVD_INIT_DEC_MEM_REC_OVERLAP_ERR; + H264_DEC_DEBUG_PRINT("i: %d, j: %d\n", i, j); + return IV_FAIL; + } + } + + } + } + + { + iv_mem_rec_t mem_rec_ittiam_api[MEM_REC_CNT]; + ih264d_fill_mem_rec_ip_t s_fill_mem_rec_ip; + ih264d_fill_mem_rec_op_t s_fill_mem_rec_op; + IV_API_CALL_STATUS_T e_status; + + UWORD32 i; + s_fill_mem_rec_ip.s_ivd_fill_mem_rec_ip_t.e_cmd = + IV_CMD_FILL_NUM_MEM_REC; + s_fill_mem_rec_ip.s_ivd_fill_mem_rec_ip_t.pv_mem_rec_location = + mem_rec_ittiam_api; + s_fill_mem_rec_ip.s_ivd_fill_mem_rec_ip_t.u4_max_frm_wd = + max_wd; + s_fill_mem_rec_ip.s_ivd_fill_mem_rec_ip_t.u4_max_frm_ht = + max_ht; + + if(ps_ip->s_ivd_init_ip_t.u4_size + > offsetof(ih264d_init_ip_t, i4_level)) + { + s_fill_mem_rec_ip.i4_level = ps_ip->i4_level; + } + else + { + s_fill_mem_rec_ip.i4_level = H264_LEVEL_3_1; + } + + if(ps_ip->s_ivd_init_ip_t.u4_size + > offsetof(ih264d_init_ip_t, u4_num_ref_frames)) + { + s_fill_mem_rec_ip.u4_num_ref_frames = + ps_ip->u4_num_ref_frames; + } + else + { + s_fill_mem_rec_ip.u4_num_ref_frames = + (H264_MAX_REF_PICS + 1); + } + + if(ps_ip->s_ivd_init_ip_t.u4_size + > offsetof(ih264d_init_ip_t, + u4_num_reorder_frames)) + { + s_fill_mem_rec_ip.u4_num_reorder_frames = + ps_ip->u4_num_reorder_frames; + } + else + { + s_fill_mem_rec_ip.u4_num_reorder_frames = (H264_MAX_REF_PICS + + 1); + } + + if(ps_ip->s_ivd_init_ip_t.u4_size + > offsetof(ih264d_init_ip_t, + u4_num_extra_disp_buf)) + { + s_fill_mem_rec_ip.u4_num_extra_disp_buf = + ps_ip->u4_num_extra_disp_buf; + } + else + { + s_fill_mem_rec_ip.u4_num_extra_disp_buf = 0; + } + + if(ps_ip->s_ivd_init_ip_t.u4_size + > offsetof(ih264d_init_ip_t, u4_share_disp_buf)) + { +#ifndef LOGO_EN + s_fill_mem_rec_ip.u4_share_disp_buf = + ps_ip->u4_share_disp_buf; +#else + s_fill_mem_rec_ip.u4_share_disp_buf = 0; +#endif + } + else + { + s_fill_mem_rec_ip.u4_share_disp_buf = 0; + } + + s_fill_mem_rec_ip.e_output_format = + ps_ip->s_ivd_init_ip_t.e_output_format; + + if((s_fill_mem_rec_ip.e_output_format != IV_YUV_420P) + && (s_fill_mem_rec_ip.e_output_format + != IV_YUV_420SP_UV) + && (s_fill_mem_rec_ip.e_output_format + != IV_YUV_420SP_VU)) + { + s_fill_mem_rec_ip.u4_share_disp_buf = 0; + } + + s_fill_mem_rec_ip.s_ivd_fill_mem_rec_ip_t.u4_size = + sizeof(ih264d_fill_mem_rec_ip_t); + s_fill_mem_rec_op.s_ivd_fill_mem_rec_op_t.u4_size = + sizeof(ih264d_fill_mem_rec_op_t); + + for(i = 0; i < MEM_REC_CNT; i++) + mem_rec_ittiam_api[i].u4_size = sizeof(iv_mem_rec_t); + + e_status = ih264d_api_function(NULL, + (void *)&s_fill_mem_rec_ip, + (void *)&s_fill_mem_rec_op); + if(IV_FAIL == e_status) + { + ps_op->s_ivd_init_op_t.u4_error_code = + s_fill_mem_rec_op.s_ivd_fill_mem_rec_op_t.u4_error_code; + H264_DEC_DEBUG_PRINT("Fail\n"); + return (IV_FAIL); + } + + for(i = 0; i < MEM_REC_CNT; i++) + { + if(ps_mem_rec[i].u4_mem_size + < mem_rec_ittiam_api[i].u4_mem_size) + { + ps_op->s_ivd_init_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_init_op_t.u4_error_code |= + IVD_INIT_DEC_MEM_REC_INSUFFICIENT_SIZE; + H264_DEC_DEBUG_PRINT("i: %d \n", i); + return IV_FAIL; + } + if(ps_mem_rec[i].u4_mem_alignment + != mem_rec_ittiam_api[i].u4_mem_alignment) + { + ps_op->s_ivd_init_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_init_op_t.u4_error_code |= + IVD_INIT_DEC_MEM_REC_ALIGNMENT_ERR; + H264_DEC_DEBUG_PRINT("i: %d \n", i); + return IV_FAIL; + } + if(ps_mem_rec[i].e_mem_type + != mem_rec_ittiam_api[i].e_mem_type) + { + UWORD32 check = IV_SUCCESS; + UWORD32 diff = mem_rec_ittiam_api[i].e_mem_type + - ps_mem_rec[i].e_mem_type; + + if((ps_mem_rec[i].e_mem_type + <= IV_EXTERNAL_CACHEABLE_SCRATCH_MEM) + && (mem_rec_ittiam_api[i].e_mem_type + >= IV_INTERNAL_NONCACHEABLE_PERSISTENT_MEM)) + { + check = IV_FAIL; + } + if(3 != MOD(mem_rec_ittiam_api[i].e_mem_type, 4)) + { + /* + * It is not IV_EXTERNAL_NONCACHEABLE_PERSISTENT_MEM or IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM + */ + if((diff < 1) || (diff > 3)) + { + // Difference between 1 and 3 is okay for all cases other than the two filtered + // with the MOD condition above + check = IV_FAIL; + } + } + else + { + if(diff == 1) + { + /* + * This particular case is when codec asked for External Persistent, but got + * Internal Scratch. + */ + check = IV_FAIL; + } + if((diff != 2) && (diff != 3)) + { + check = IV_FAIL; + } + } + if(check == IV_FAIL) + { + ps_op->s_ivd_init_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_init_op_t.u4_error_code |= + IVD_INIT_DEC_MEM_REC_INCORRECT_TYPE; + H264_DEC_DEBUG_PRINT("i: %d \n", i); + return IV_FAIL; + } + } + } + } + + } + break; + + case IVD_CMD_GET_DISPLAY_FRAME: + { + ih264d_get_display_frame_ip_t *ps_ip = + (ih264d_get_display_frame_ip_t *)pv_api_ip; + ih264d_get_display_frame_op_t *ps_op = + (ih264d_get_display_frame_op_t *)pv_api_op; + + ps_op->s_ivd_get_display_frame_op_t.u4_error_code = 0; + + if((ps_ip->s_ivd_get_display_frame_ip_t.u4_size + != sizeof(ih264d_get_display_frame_ip_t)) + && (ps_ip->s_ivd_get_display_frame_ip_t.u4_size + != sizeof(ivd_get_display_frame_ip_t))) + { + ps_op->s_ivd_get_display_frame_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_get_display_frame_op_t.u4_error_code |= + IVD_IP_API_STRUCT_SIZE_INCORRECT; + return (IV_FAIL); + } + + if((ps_op->s_ivd_get_display_frame_op_t.u4_size + != sizeof(ih264d_get_display_frame_op_t)) + && (ps_op->s_ivd_get_display_frame_op_t.u4_size + != sizeof(ivd_get_display_frame_op_t))) + { + ps_op->s_ivd_get_display_frame_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_get_display_frame_op_t.u4_error_code |= + IVD_OP_API_STRUCT_SIZE_INCORRECT; + return (IV_FAIL); + } + } + break; + + case IVD_CMD_REL_DISPLAY_FRAME: + { + ih264d_rel_display_frame_ip_t *ps_ip = + (ih264d_rel_display_frame_ip_t *)pv_api_ip; + ih264d_rel_display_frame_op_t *ps_op = + (ih264d_rel_display_frame_op_t *)pv_api_op; + + ps_op->s_ivd_rel_display_frame_op_t.u4_error_code = 0; + + if((ps_ip->s_ivd_rel_display_frame_ip_t.u4_size + != sizeof(ih264d_rel_display_frame_ip_t)) + && (ps_ip->s_ivd_rel_display_frame_ip_t.u4_size + != sizeof(ivd_rel_display_frame_ip_t))) + { + ps_op->s_ivd_rel_display_frame_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_rel_display_frame_op_t.u4_error_code |= + IVD_IP_API_STRUCT_SIZE_INCORRECT; + return (IV_FAIL); + } + + if((ps_op->s_ivd_rel_display_frame_op_t.u4_size + != sizeof(ih264d_rel_display_frame_op_t)) + && (ps_op->s_ivd_rel_display_frame_op_t.u4_size + != sizeof(ivd_rel_display_frame_op_t))) + { + ps_op->s_ivd_rel_display_frame_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_rel_display_frame_op_t.u4_error_code |= + IVD_OP_API_STRUCT_SIZE_INCORRECT; + return (IV_FAIL); + } + + } + break; + + case IVD_CMD_SET_DISPLAY_FRAME: + { + ih264d_set_display_frame_ip_t *ps_ip = + (ih264d_set_display_frame_ip_t *)pv_api_ip; + ih264d_set_display_frame_op_t *ps_op = + (ih264d_set_display_frame_op_t *)pv_api_op; + UWORD32 j; + + ps_op->s_ivd_set_display_frame_op_t.u4_error_code = 0; + + if((ps_ip->s_ivd_set_display_frame_ip_t.u4_size + != sizeof(ih264d_set_display_frame_ip_t)) + && (ps_ip->s_ivd_set_display_frame_ip_t.u4_size + != sizeof(ivd_set_display_frame_ip_t))) + { + ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= + IVD_IP_API_STRUCT_SIZE_INCORRECT; + return (IV_FAIL); + } + + if((ps_op->s_ivd_set_display_frame_op_t.u4_size + != sizeof(ih264d_set_display_frame_op_t)) + && (ps_op->s_ivd_set_display_frame_op_t.u4_size + != sizeof(ivd_set_display_frame_op_t))) + { + ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= + IVD_OP_API_STRUCT_SIZE_INCORRECT; + return (IV_FAIL); + } + + if(ps_ip->s_ivd_set_display_frame_ip_t.num_disp_bufs == 0) + { + ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= + IVD_DISP_FRM_ZERO_OP_BUFS; + return IV_FAIL; + } + + for(j = 0; j < ps_ip->s_ivd_set_display_frame_ip_t.num_disp_bufs; + j++) + { + if(ps_ip->s_ivd_set_display_frame_ip_t.s_disp_buffer[j].u4_num_bufs + == 0) + { + ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= + IVD_DISP_FRM_ZERO_OP_BUFS; + return IV_FAIL; + } + + for(i = 0; + i + < ps_ip->s_ivd_set_display_frame_ip_t.s_disp_buffer[j].u4_num_bufs; + i++) + { + if(ps_ip->s_ivd_set_display_frame_ip_t.s_disp_buffer[j].pu1_bufs[i] + == NULL) + { + ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= + IVD_DISP_FRM_OP_BUF_NULL; + return IV_FAIL; + } + + if(ps_ip->s_ivd_set_display_frame_ip_t.s_disp_buffer[j].u4_min_out_buf_size[i] + == 0) + { + ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= + IVD_DISP_FRM_ZERO_OP_BUF_SIZE; + return IV_FAIL; + } + } + } + } + break; + + case IVD_CMD_VIDEO_DECODE: + { + ih264d_video_decode_ip_t *ps_ip = + (ih264d_video_decode_ip_t *)pv_api_ip; + ih264d_video_decode_op_t *ps_op = + (ih264d_video_decode_op_t *)pv_api_op; + + H264_DEC_DEBUG_PRINT("The input bytes is: %d", + ps_ip->s_ivd_video_decode_ip_t.u4_num_Bytes); + ps_op->s_ivd_video_decode_op_t.u4_error_code = 0; + + if(ps_ip->s_ivd_video_decode_ip_t.u4_size + != sizeof(ih264d_video_decode_ip_t)&& + ps_ip->s_ivd_video_decode_ip_t.u4_size != offsetof(ivd_video_decode_ip_t, s_out_buffer)) + { + ps_op->s_ivd_video_decode_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_video_decode_op_t.u4_error_code |= + IVD_IP_API_STRUCT_SIZE_INCORRECT; + return (IV_FAIL); + } + + if(ps_op->s_ivd_video_decode_op_t.u4_size + != sizeof(ih264d_video_decode_op_t)&& + ps_op->s_ivd_video_decode_op_t.u4_size != offsetof(ivd_video_decode_op_t, u4_output_present)) + { + ps_op->s_ivd_video_decode_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_video_decode_op_t.u4_error_code |= + IVD_OP_API_STRUCT_SIZE_INCORRECT; + return (IV_FAIL); + } + + } + break; + + case IV_CMD_RETRIEVE_MEMREC: + { + ih264d_retrieve_mem_rec_ip_t *ps_ip = + (ih264d_retrieve_mem_rec_ip_t *)pv_api_ip; + ih264d_retrieve_mem_rec_op_t *ps_op = + (ih264d_retrieve_mem_rec_op_t *)pv_api_op; + iv_mem_rec_t *ps_mem_rec; + + ps_op->s_ivd_retrieve_mem_rec_op_t.u4_error_code = 0; + + if(ps_ip->s_ivd_retrieve_mem_rec_ip_t.u4_size + != sizeof(ih264d_retrieve_mem_rec_ip_t)) + { + ps_op->s_ivd_retrieve_mem_rec_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_retrieve_mem_rec_op_t.u4_error_code |= + IVD_IP_API_STRUCT_SIZE_INCORRECT; + return (IV_FAIL); + } + + if(ps_op->s_ivd_retrieve_mem_rec_op_t.u4_size + != sizeof(ih264d_retrieve_mem_rec_op_t)) + { + ps_op->s_ivd_retrieve_mem_rec_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_retrieve_mem_rec_op_t.u4_error_code |= + IVD_OP_API_STRUCT_SIZE_INCORRECT; + return (IV_FAIL); + } + + ps_mem_rec = ps_ip->s_ivd_retrieve_mem_rec_ip_t.pv_mem_rec_location; + /* check memrecords sizes are correct */ + for(i = 0; i < MEM_REC_CNT; i++) + { + if(ps_mem_rec[i].u4_size != sizeof(iv_mem_rec_t)) + { + ps_op->s_ivd_retrieve_mem_rec_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_retrieve_mem_rec_op_t.u4_error_code |= + IVD_MEM_REC_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + } + } + break; + + case IVD_CMD_VIDEO_CTL: + { + UWORD32 *pu4_ptr_cmd; + UWORD32 sub_command; + + pu4_ptr_cmd = (UWORD32 *)pv_api_ip; + pu4_ptr_cmd += 2; + sub_command = *pu4_ptr_cmd; + + switch(sub_command) + { + case IVD_CMD_CTL_SETPARAMS: + { + ih264d_ctl_set_config_ip_t *ps_ip; + ih264d_ctl_set_config_op_t *ps_op; + ps_ip = (ih264d_ctl_set_config_ip_t *)pv_api_ip; + ps_op = (ih264d_ctl_set_config_op_t *)pv_api_op; + + if(ps_ip->s_ivd_ctl_set_config_ip_t.u4_size + != sizeof(ih264d_ctl_set_config_ip_t)) + { + ps_op->s_ivd_ctl_set_config_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_ctl_set_config_op_t.u4_error_code |= + IVD_IP_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + } + //no break; is needed here + case IVD_CMD_CTL_SETDEFAULT: + { + ih264d_ctl_set_config_op_t *ps_op; + ps_op = (ih264d_ctl_set_config_op_t *)pv_api_op; + if(ps_op->s_ivd_ctl_set_config_op_t.u4_size + != sizeof(ih264d_ctl_set_config_op_t)) + { + ps_op->s_ivd_ctl_set_config_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_ctl_set_config_op_t.u4_error_code |= + IVD_OP_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + } + break; + + case IVD_CMD_CTL_GETPARAMS: + { + ih264d_ctl_getstatus_ip_t *ps_ip; + ih264d_ctl_getstatus_op_t *ps_op; + + ps_ip = (ih264d_ctl_getstatus_ip_t *)pv_api_ip; + ps_op = (ih264d_ctl_getstatus_op_t *)pv_api_op; + if(ps_ip->s_ivd_ctl_getstatus_ip_t.u4_size + != sizeof(ih264d_ctl_getstatus_ip_t)) + { + ps_op->s_ivd_ctl_getstatus_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_ctl_getstatus_op_t.u4_error_code |= + IVD_IP_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + if(ps_op->s_ivd_ctl_getstatus_op_t.u4_size + != sizeof(ih264d_ctl_getstatus_op_t)) + { + ps_op->s_ivd_ctl_getstatus_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_ctl_getstatus_op_t.u4_error_code |= + IVD_OP_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + } + break; + + case IVD_CMD_CTL_GETBUFINFO: + { + ih264d_ctl_getbufinfo_ip_t *ps_ip; + ih264d_ctl_getbufinfo_op_t *ps_op; + ps_ip = (ih264d_ctl_getbufinfo_ip_t *)pv_api_ip; + ps_op = (ih264d_ctl_getbufinfo_op_t *)pv_api_op; + + if(ps_ip->s_ivd_ctl_getbufinfo_ip_t.u4_size + != sizeof(ih264d_ctl_getbufinfo_ip_t)) + { + ps_op->s_ivd_ctl_getbufinfo_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_ctl_getbufinfo_op_t.u4_error_code |= + IVD_IP_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + if(ps_op->s_ivd_ctl_getbufinfo_op_t.u4_size + != sizeof(ih264d_ctl_getbufinfo_op_t)) + { + ps_op->s_ivd_ctl_getbufinfo_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_ctl_getbufinfo_op_t.u4_error_code |= + IVD_OP_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + } + break; + + case IVD_CMD_CTL_GETVERSION: + { + ih264d_ctl_getversioninfo_ip_t *ps_ip; + ih264d_ctl_getversioninfo_op_t *ps_op; + ps_ip = (ih264d_ctl_getversioninfo_ip_t *)pv_api_ip; + ps_op = (ih264d_ctl_getversioninfo_op_t *)pv_api_op; + if(ps_ip->s_ivd_ctl_getversioninfo_ip_t.u4_size + != sizeof(ih264d_ctl_getversioninfo_ip_t)) + { + ps_op->s_ivd_ctl_getversioninfo_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_ctl_getversioninfo_op_t.u4_error_code |= + IVD_IP_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + if(ps_op->s_ivd_ctl_getversioninfo_op_t.u4_size + != sizeof(ih264d_ctl_getversioninfo_op_t)) + { + ps_op->s_ivd_ctl_getversioninfo_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_ctl_getversioninfo_op_t.u4_error_code |= + IVD_OP_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + } + break; + + case IVD_CMD_CTL_FLUSH: + { + ih264d_ctl_flush_ip_t *ps_ip; + ih264d_ctl_flush_op_t *ps_op; + ps_ip = (ih264d_ctl_flush_ip_t *)pv_api_ip; + ps_op = (ih264d_ctl_flush_op_t *)pv_api_op; + if(ps_ip->s_ivd_ctl_flush_ip_t.u4_size + != sizeof(ih264d_ctl_flush_ip_t)) + { + ps_op->s_ivd_ctl_flush_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_ctl_flush_op_t.u4_error_code |= + IVD_IP_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + if(ps_op->s_ivd_ctl_flush_op_t.u4_size + != sizeof(ih264d_ctl_flush_op_t)) + { + ps_op->s_ivd_ctl_flush_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_ctl_flush_op_t.u4_error_code |= + IVD_OP_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + } + break; + + case IVD_CMD_CTL_RESET: + { + ih264d_ctl_reset_ip_t *ps_ip; + ih264d_ctl_reset_op_t *ps_op; + ps_ip = (ih264d_ctl_reset_ip_t *)pv_api_ip; + ps_op = (ih264d_ctl_reset_op_t *)pv_api_op; + if(ps_ip->s_ivd_ctl_reset_ip_t.u4_size + != sizeof(ih264d_ctl_reset_ip_t)) + { + ps_op->s_ivd_ctl_reset_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_ctl_reset_op_t.u4_error_code |= + IVD_IP_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + if(ps_op->s_ivd_ctl_reset_op_t.u4_size + != sizeof(ih264d_ctl_reset_op_t)) + { + ps_op->s_ivd_ctl_reset_op_t.u4_error_code |= 1 + << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_ctl_reset_op_t.u4_error_code |= + IVD_OP_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + } + break; + + case IH264D_CMD_CTL_DEGRADE: + { + ih264d_ctl_degrade_ip_t *ps_ip; + ih264d_ctl_degrade_op_t *ps_op; + + ps_ip = (ih264d_ctl_degrade_ip_t *)pv_api_ip; + ps_op = (ih264d_ctl_degrade_op_t *)pv_api_op; + + if(ps_ip->u4_size != sizeof(ih264d_ctl_degrade_ip_t)) + { + ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= + IVD_IP_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if(ps_op->u4_size != sizeof(ih264d_ctl_degrade_op_t)) + { + ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= + IVD_OP_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if((ps_ip->i4_degrade_pics < 0) + || (ps_ip->i4_degrade_pics > 4) + || (ps_ip->i4_nondegrade_interval < 0) + || (ps_ip->i4_degrade_type < 0) + || (ps_ip->i4_degrade_type > 15)) + { + ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + return IV_FAIL; + } + + break; + } + + case IH264D_CMD_CTL_GET_BUFFER_DIMENSIONS: + { + ih264d_ctl_get_frame_dimensions_ip_t *ps_ip; + ih264d_ctl_get_frame_dimensions_op_t *ps_op; + + ps_ip = (ih264d_ctl_get_frame_dimensions_ip_t *)pv_api_ip; + ps_op = (ih264d_ctl_get_frame_dimensions_op_t *)pv_api_op; + + if(ps_ip->u4_size + != sizeof(ih264d_ctl_get_frame_dimensions_ip_t)) + { + ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= + IVD_IP_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if(ps_op->u4_size + != sizeof(ih264d_ctl_get_frame_dimensions_op_t)) + { + ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= + IVD_OP_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + break; + } + + case IH264D_CMD_CTL_SET_NUM_CORES: + { + ih264d_ctl_set_num_cores_ip_t *ps_ip; + ih264d_ctl_set_num_cores_op_t *ps_op; + + ps_ip = (ih264d_ctl_set_num_cores_ip_t *)pv_api_ip; + ps_op = (ih264d_ctl_set_num_cores_op_t *)pv_api_op; + + if(ps_ip->u4_size != sizeof(ih264d_ctl_set_num_cores_ip_t)) + { + ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= + IVD_IP_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if(ps_op->u4_size != sizeof(ih264d_ctl_set_num_cores_op_t)) + { + ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= + IVD_OP_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if((ps_ip->u4_num_cores != 1) && (ps_ip->u4_num_cores != 2) + && (ps_ip->u4_num_cores != 3) + && (ps_ip->u4_num_cores != 4)) + { + ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + return IV_FAIL; + } + break; + } + case IH264D_CMD_CTL_SET_PROCESSOR: + { + ih264d_ctl_set_processor_ip_t *ps_ip; + ih264d_ctl_set_processor_op_t *ps_op; + + ps_ip = (ih264d_ctl_set_processor_ip_t *)pv_api_ip; + ps_op = (ih264d_ctl_set_processor_op_t *)pv_api_op; + + if(ps_ip->u4_size != sizeof(ih264d_ctl_set_processor_ip_t)) + { + ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= + IVD_IP_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if(ps_op->u4_size != sizeof(ih264d_ctl_set_processor_op_t)) + { + ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= + IVD_OP_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + break; + } + default: + *(pu4_api_op + 1) |= 1 << IVD_UNSUPPORTEDPARAM; + *(pu4_api_op + 1) |= IVD_UNSUPPORTED_API_CMD; + return IV_FAIL; + break; + } + } + break; + } + + return IV_SUCCESS; +} + + +/** + ******************************************************************************* + * + * @brief + * Sets Processor type + * + * @par Description: + * Sets Processor type + * + * @param[in] ps_codec_obj + * Pointer to codec object at API level + * + * @param[in] pv_api_ip + * Pointer to input argument structure + * + * @param[out] pv_api_op + * Pointer to output argument structure + * + * @returns Status + * + * @remarks + * + * + ******************************************************************************* + */ + +WORD32 ih264d_set_processor(iv_obj_t *dec_hdl, void *pv_api_ip, void *pv_api_op) +{ + ih264d_ctl_set_processor_ip_t *ps_ip; + ih264d_ctl_set_processor_op_t *ps_op; + dec_struct_t *ps_codec = (dec_struct_t *)dec_hdl->pv_codec_handle; + + ps_ip = (ih264d_ctl_set_processor_ip_t *)pv_api_ip; + ps_op = (ih264d_ctl_set_processor_op_t *)pv_api_op; + + ps_codec->e_processor_arch = (IVD_ARCH_T)ps_ip->u4_arch; + ps_codec->e_processor_soc = (IVD_SOC_T)ps_ip->u4_soc; + + ih264d_init_function_ptr(ps_codec); + + ps_op->u4_error_code = 0; + return IV_SUCCESS; +} +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_get_num_rec */ +/* */ +/* Description : returns number of mem records required */ +/* */ +/* Inputs : pv_api_ip input api structure */ +/* : pv_api_op output api structure */ +/* Outputs : */ +/* Outputs : */ +/* Returns : void */ +/* */ +/* Issues : none */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 22 10 2008 100356 Draft */ +/* */ +/*****************************************************************************/ +WORD32 ih264d_get_num_rec(void *pv_api_ip, void *pv_api_op) +{ + iv_num_mem_rec_ip_t *ps_mem_q_ip; + iv_num_mem_rec_op_t *ps_mem_q_op; + ps_mem_q_ip = (iv_num_mem_rec_ip_t *)pv_api_ip; + ps_mem_q_op = (iv_num_mem_rec_op_t *)pv_api_op; + UNUSED(ps_mem_q_ip); + ps_mem_q_op->u4_num_mem_rec = MEM_REC_CNT; + + return IV_SUCCESS; + +} + + +/************************************************************************** + * \if Function name : ih264d_init_decoder \endif + * + * + * \brief + * Initializes the decoder + * + * \param apiVersion : Version of the api being used. + * \param errorHandlingMechanism : Mechanism to be used for errror handling. + * \param postFilteringType: Type of post filtering operation to be used. + * \param uc_outputFormat: Format of the decoded picture [default 4:2:0]. + * \param uc_dispBufs: Number of Display Buffers. + * \param p_NALBufAPI: Pointer to NAL Buffer API. + * \param p_DispBufAPI: Pointer to Display Buffer API. + * \param ih264d_dec_mem_manager :Pointer to the function that will be called by decoder + * for memory allocation and freeing. + * + * \return + * 0 on Success and -1 on error + * + ************************************************************************** + */ +void ih264d_init_decoder(void * ps_dec_params) +{ + dec_struct_t * ps_dec = (dec_struct_t *)ps_dec_params; + dec_slice_params_t *ps_cur_slice; + pocstruct_t *ps_prev_poc, *ps_cur_poc; + + + + /* Set pic_parameter_set_id to -1 */ + + + + ps_cur_slice = ps_dec->ps_cur_slice; + ps_dec->init_done = 0; + + ps_dec->u4_num_cores = 1; + + ps_dec->u2_pic_ht = ps_dec->u2_pic_wd = 0; + + ps_dec->u1_separate_parse = DEFAULT_SEPARATE_PARSE; + ps_dec->u4_app_disable_deblk_frm = 0; + ps_dec->i4_degrade_type = 0; + ps_dec->i4_degrade_pics = 0; + + ps_dec->i4_app_skip_mode = IVD_SKIP_NONE; + ps_dec->i4_dec_skip_mode = IVD_SKIP_NONE; + + memset(ps_dec->ps_pps, 0, + ((sizeof(dec_pic_params_t)) * MAX_NUM_PIC_PARAMS)); + memset(ps_dec->ps_sps, 0, + ((sizeof(dec_seq_params_t)) * MAX_NUM_SEQ_PARAMS)); + + /* Initialization of function pointers ih264d_deblock_picture function*/ + + ps_dec->p_DeblockPicture[0] = ih264d_deblock_picture_non_mbaff; + ps_dec->p_DeblockPicture[1] = ih264d_deblock_picture_mbaff; + + ps_dec->s_cab_dec_env.pv_codec_handle = ps_dec; + + ps_dec->u4_num_fld_in_frm = 0; + + ps_dec->ps_dpb_mgr->pv_codec_handle = ps_dec; + + /* Initialize the sei validity u4_flag with zero indiacting sei is not valid*/ + ps_dec->ps_sei->u1_is_valid = 0; + + /* decParams Initializations */ + ps_dec->ps_cur_pps = NULL; + ps_dec->ps_cur_sps = NULL; + ps_dec->u1_init_dec_flag = 0; + ps_dec->u1_first_nal_in_pic = 1; + ps_dec->u1_first_pb_nal_in_pic = 1; + ps_dec->u1_last_pic_not_decoded = 0; + ps_dec->u4_app_disp_width = 0; + ps_dec->i4_header_decoded = 0; + ps_dec->u4_total_frames_decoded = 0; + + ps_dec->i4_error_code = 0; + ps_dec->i4_content_type = -1; + ps_dec->ps_cur_slice->u1_mbaff_frame_flag = 0; + + ps_dec->ps_dec_err_status->u1_err_flag = ACCEPT_ALL_PICS; //REJECT_PB_PICS; + ps_dec->ps_dec_err_status->u1_cur_pic_type = PIC_TYPE_UNKNOWN; + ps_dec->ps_dec_err_status->u4_frm_sei_sync = SYNC_FRM_DEFAULT; + ps_dec->ps_dec_err_status->u4_cur_frm = INIT_FRAME; + ps_dec->ps_dec_err_status->u1_pic_aud_i = PIC_TYPE_UNKNOWN; + + ps_dec->u1_pr_sl_type = 0xFF; + ps_dec->u2_mbx = 0xffff; + ps_dec->u2_mby = 0; + ps_dec->u2_total_mbs_coded = 0; + ps_cur_slice->u1_end_of_frame_signal = 0; + + /* POC initializations */ + ps_prev_poc = &ps_dec->s_prev_pic_poc; + ps_cur_poc = &ps_dec->s_cur_pic_poc; + ps_prev_poc->i4_pic_order_cnt_lsb = ps_cur_poc->i4_pic_order_cnt_lsb = 0; + ps_prev_poc->i4_pic_order_cnt_msb = ps_cur_poc->i4_pic_order_cnt_msb = 0; + ps_prev_poc->i4_delta_pic_order_cnt_bottom = + ps_cur_poc->i4_delta_pic_order_cnt_bottom = 0; + ps_prev_poc->i4_delta_pic_order_cnt[0] = + ps_cur_poc->i4_delta_pic_order_cnt[0] = 0; + ps_prev_poc->i4_delta_pic_order_cnt[1] = + ps_cur_poc->i4_delta_pic_order_cnt[1] = 0; + ps_prev_poc->u1_mmco_equalto5 = ps_cur_poc->u1_mmco_equalto5 = 0; + ps_prev_poc->i4_top_field_order_count = ps_cur_poc->i4_top_field_order_count = + 0; + ps_prev_poc->i4_bottom_field_order_count = + ps_cur_poc->i4_bottom_field_order_count = 0; + ps_prev_poc->u1_bot_field = ps_cur_poc->u1_bot_field = 0; + ps_prev_poc->u1_mmco_equalto5 = ps_cur_poc->u1_mmco_equalto5 = 0; + ps_prev_poc->i4_prev_frame_num_ofst = ps_cur_poc->i4_prev_frame_num_ofst = 0; + ps_cur_slice->u1_mmco_equalto5 = 0; + ps_cur_slice->u2_frame_num = 0; + + ps_dec->i4_max_poc = 0; + ps_dec->i4_prev_max_display_seq = 0; + ps_dec->u1_recon_mb_grp = 4; + + /* Field PIC initializations */ + ps_dec->u1_second_field = 0; + ps_dec->s_prev_seq_params.u1_eoseq_pending = 0; + + /* Set the cropping parameters as zero */ + ps_dec->u2_crop_offset_y = 0; + ps_dec->u2_crop_offset_uv = 0; + + /* The Initial Frame Rate Info is not Present */ + ps_dec->i4_vui_frame_rate = -1; + ps_dec->i4_pic_type = -1; + ps_dec->i4_frametype = -1; + ps_dec->i4_content_type = -1; + + ps_dec->u1_res_changed = 0; + + + ps_dec->u1_frame_decoded_flag = 0; + + /* Set the default frame seek mask mode */ + ps_dec->u4_skip_frm_mask = SKIP_NONE; + + /********************************************************/ + /* Initialize CAVLC residual decoding function pointers */ + /********************************************************/ + ps_dec->pf_cavlc_4x4res_block[0] = ih264d_cavlc_4x4res_block_totalcoeff_1; + ps_dec->pf_cavlc_4x4res_block[1] = + ih264d_cavlc_4x4res_block_totalcoeff_2to10; + ps_dec->pf_cavlc_4x4res_block[2] = + ih264d_cavlc_4x4res_block_totalcoeff_11to16; + + ps_dec->pf_cavlc_parse4x4coeff[0] = ih264d_cavlc_parse4x4coeff_n0to7; + ps_dec->pf_cavlc_parse4x4coeff[1] = ih264d_cavlc_parse4x4coeff_n8; + + ps_dec->pf_cavlc_parse_8x8block[0] = + ih264d_cavlc_parse_8x8block_none_available; + ps_dec->pf_cavlc_parse_8x8block[1] = + ih264d_cavlc_parse_8x8block_left_available; + ps_dec->pf_cavlc_parse_8x8block[2] = + ih264d_cavlc_parse_8x8block_top_available; + ps_dec->pf_cavlc_parse_8x8block[3] = + ih264d_cavlc_parse_8x8block_both_available; + + /***************************************************************************/ + /* Initialize Bs calculation function pointers for P and B, 16x16/non16x16 */ + /***************************************************************************/ + ps_dec->pf_fill_bs1[0][0] = ih264d_fill_bs1_16x16mb_pslice; + ps_dec->pf_fill_bs1[0][1] = ih264d_fill_bs1_non16x16mb_pslice; + + ps_dec->pf_fill_bs1[1][0] = ih264d_fill_bs1_16x16mb_bslice; + ps_dec->pf_fill_bs1[1][1] = ih264d_fill_bs1_non16x16mb_bslice; + + ps_dec->pf_fill_bs_xtra_left_edge[0] = + ih264d_fill_bs_xtra_left_edge_cur_frm; + ps_dec->pf_fill_bs_xtra_left_edge[1] = + ih264d_fill_bs_xtra_left_edge_cur_fld; + + /* Initialize Reference Pic Buffers */ + ih264d_init_ref_bufs(ps_dec->ps_dpb_mgr); + +#if VERT_SCALE_UP_AND_422 + ps_dec->u1_vert_up_scale_flag = 1; +#else + ps_dec->u1_vert_up_scale_flag = 0; +#endif + + ps_dec->u2_prv_frame_num = 0; + ps_dec->u1_top_bottom_decoded = 0; + ps_dec->u1_dangling_field = 0; + + ps_dec->s_cab_dec_env.cabac_table = gau4_ih264d_cabac_table; + + ps_dec->pu1_left_mv_ctxt_inc = ps_dec->u1_left_mv_ctxt_inc_arr[0]; + ps_dec->pi1_left_ref_idx_ctxt_inc = + &ps_dec->i1_left_ref_idx_ctx_inc_arr[0][0]; + ps_dec->pu1_left_yuv_dc_csbp = &ps_dec->u1_yuv_dc_csbp_topmb; + + /* ! */ + /* Initializing flush frame u4_flag */ + ps_dec->u1_flushfrm = 0; + + { + ps_dec->s_cab_dec_env.pv_codec_handle = (void*)ps_dec; + ps_dec->ps_bitstrm->pv_codec_handle = (void*)ps_dec; + ps_dec->ps_cur_slice->pv_codec_handle = (void*)ps_dec; + ps_dec->ps_dpb_mgr->pv_codec_handle = (void*)ps_dec; + } + + memset(ps_dec->disp_bufs, 0, (MAX_DISP_BUFS_NEW) * sizeof(disp_buf_t)); + memset(ps_dec->u4_disp_buf_mapping, 0, + (MAX_DISP_BUFS_NEW) * sizeof(UWORD32)); + memset(ps_dec->u4_disp_buf_to_be_freed, 0, + (MAX_DISP_BUFS_NEW) * sizeof(UWORD32)); + + ih264d_init_arch(ps_dec); + ih264d_init_function_ptr(ps_dec); + + ps_dec->init_done = 1; + ps_dec->process_called = 1; +} + +/************************************************************************** + * \if Function name : ih264d_init_video_decoder \endif + * + * \brief + * Wrapper for the decoder init + * + * \param p_NALBufAPI: Pointer to NAL Buffer API. + * \param ih264d_dec_mem_manager :Pointer to the function that will be called by decoder + * for memory allocation and freeing. + * + * \return + * pointer to the decparams + * + ************************************************************************** + */ + +WORD32 ih264d_init_video_decoder(iv_obj_t *dec_hdl, + ih264d_init_ip_t *ps_init_ip, + ih264d_init_op_t *ps_init_op) +{ + dec_struct_t * ps_dec; + iv_mem_rec_t *memtab; + UWORD8 *pu1_extra_mem_base,*pu1_mem_base; + + memtab = ps_init_ip->s_ivd_init_ip_t.pv_mem_rec_location; + + dec_hdl->pv_codec_handle = memtab[MEM_REC_CODEC].pv_base; + ps_dec = dec_hdl->pv_codec_handle; + + memset(ps_dec, 0, sizeof(dec_struct_t)); + + if(ps_init_ip->s_ivd_init_ip_t.u4_size + > offsetof(ih264d_init_ip_t, i4_level)) + { + ps_dec->u4_level_at_init = ps_init_ip->i4_level; + } + else + { + ps_dec->u4_level_at_init = H264_LEVEL_3_1; + } + + if(ps_init_ip->s_ivd_init_ip_t.u4_size + > offsetof(ih264d_init_ip_t, u4_num_ref_frames)) + { + ps_dec->u4_num_ref_frames_at_init = ps_init_ip->u4_num_ref_frames; + } + else + { + ps_dec->u4_num_ref_frames_at_init = H264_MAX_REF_PICS; + } + + if(ps_init_ip->s_ivd_init_ip_t.u4_size + > offsetof(ih264d_init_ip_t, u4_num_reorder_frames)) + { + ps_dec->u4_num_reorder_frames_at_init = + ps_init_ip->u4_num_reorder_frames; + } + else + { + ps_dec->u4_num_reorder_frames_at_init = H264_MAX_REF_PICS; + } + + if(ps_init_ip->s_ivd_init_ip_t.u4_size + > offsetof(ih264d_init_ip_t, u4_num_extra_disp_buf)) + { + ps_dec->u4_num_extra_disp_bufs_at_init = + ps_init_ip->u4_num_extra_disp_buf; + } + else + { + ps_dec->u4_num_extra_disp_bufs_at_init = 0; + } + + if(ps_init_ip->s_ivd_init_ip_t.u4_size + > offsetof(ih264d_init_ip_t, u4_share_disp_buf)) + { +#ifndef LOGO_EN + ps_dec->u4_share_disp_buf = ps_init_ip->u4_share_disp_buf; +#else + ps_dec->u4_share_disp_buf = 0; +#endif + } + else + { + ps_dec->u4_share_disp_buf = 0; + } + + if((ps_init_ip->s_ivd_init_ip_t.e_output_format != IV_YUV_420P) + && (ps_init_ip->s_ivd_init_ip_t.e_output_format + != IV_YUV_420SP_UV) + && (ps_init_ip->s_ivd_init_ip_t.e_output_format + != IV_YUV_420SP_VU)) + { + ps_dec->u4_share_disp_buf = 0; + } + + if((ps_dec->u4_level_at_init < MIN_LEVEL_SUPPORTED) + || (ps_dec->u4_level_at_init > MAX_LEVEL_SUPPORTED)) + { + ps_init_op->s_ivd_init_op_t.u4_error_code |= ERROR_LEVEL_UNSUPPORTED; + return (IV_FAIL); + } + + if(ps_dec->u4_num_ref_frames_at_init > H264_MAX_REF_PICS) + { + ps_init_op->s_ivd_init_op_t.u4_error_code |= ERROR_NUM_REF; + ps_dec->u4_num_ref_frames_at_init = H264_MAX_REF_PICS; + } + + if(ps_dec->u4_num_reorder_frames_at_init > H264_MAX_REF_PICS) + { + ps_init_op->s_ivd_init_op_t.u4_error_code |= ERROR_NUM_REF; + ps_dec->u4_num_reorder_frames_at_init = H264_MAX_REF_PICS; + } + + if(ps_dec->u4_num_extra_disp_bufs_at_init > H264_MAX_REF_PICS) + { + ps_init_op->s_ivd_init_op_t.u4_error_code |= ERROR_NUM_REF; + ps_dec->u4_num_extra_disp_bufs_at_init = 0; + } + + if(0 == ps_dec->u4_share_disp_buf) + ps_dec->u4_num_extra_disp_bufs_at_init = 0; + + ps_dec->u4_num_disp_bufs_requested = 1; + + ps_dec->u4_width_at_init = ps_init_ip->s_ivd_init_ip_t.u4_frm_max_wd; + ps_dec->u4_height_at_init = ps_init_ip->s_ivd_init_ip_t.u4_frm_max_ht; + + ps_dec->u4_width_at_init = ALIGN16(ps_dec->u4_width_at_init); + ps_dec->u4_height_at_init = ALIGN16(ps_dec->u4_height_at_init); + + ps_dec->pv_dec_thread_handle = memtab[MEM_REC_THREAD_HANDLE].pv_base; + + pu1_mem_base = memtab[MEM_REC_THREAD_HANDLE].pv_base; + ps_dec->pv_bs_deblk_thread_handle = pu1_mem_base + + ithread_get_handle_size(); + + ps_dec->u4_extra_mem_used = 0; + + pu1_extra_mem_base = memtab[MEM_REC_EXTRA_MEM].pv_base; + + ps_dec->ps_dec_err_status = (dec_err_status_t *)(pu1_extra_mem_base + ps_dec->u4_extra_mem_used); + ps_dec->u4_extra_mem_used += (((sizeof(dec_err_status_t) + 127) >> 7) << 7); + + ps_dec->ps_mem_tab = memtab[MEM_REC_BACKUP].pv_base; + + memcpy(ps_dec->ps_mem_tab, memtab, sizeof(iv_mem_rec_t) * MEM_REC_CNT); + + ps_dec->ps_pps = memtab[MEM_REC_PPS].pv_base; + + ps_dec->ps_sps = memtab[MEM_REC_SPS].pv_base; + + ps_dec->ps_sei = (sei *)(pu1_extra_mem_base + ps_dec->u4_extra_mem_used); + ps_dec->u4_extra_mem_used += sizeof(sei); + + ps_dec->ps_dpb_mgr = memtab[MEM_REC_DPB_MGR].pv_base; + + ps_dec->ps_dpb_cmds = (dpb_commands_t *)(pu1_extra_mem_base + ps_dec->u4_extra_mem_used); + ps_dec->u4_extra_mem_used += sizeof(dpb_commands_t); + + ps_dec->ps_bitstrm = (dec_bit_stream_t *)(pu1_extra_mem_base + ps_dec->u4_extra_mem_used); + ps_dec->u4_extra_mem_used += sizeof(dec_bit_stream_t); + + ps_dec->ps_cur_slice =(dec_slice_params_t *) (pu1_extra_mem_base + ps_dec->u4_extra_mem_used); + ps_dec->u4_extra_mem_used += sizeof(dec_slice_params_t); + + ps_dec->pv_scratch_sps_pps = (void *)(pu1_extra_mem_base + ps_dec->u4_extra_mem_used); + + + ps_dec->u4_extra_mem_used += MAX(sizeof(dec_seq_params_t), + sizeof(dec_pic_params_t)); + ps_dec->ps_pred_pkd = memtab[MEM_REC_PRED_INFO_PKD].pv_base; + + + ps_dec->ps_dpb_mgr->pv_codec_handle = ps_dec; + + ps_dec->pv_dec_out = (void *)ps_init_op; + ps_dec->pv_dec_in = (void *)ps_init_ip; + + ps_dec->u1_chroma_format = + (UWORD8)(ps_init_ip->s_ivd_init_ip_t.e_output_format); + + + + ih264d_init_decoder(ps_dec); + + return (IV_SUCCESS); + +} + + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_fill_num_mem_rec */ +/* */ +/* Description : fills memory records */ +/* */ +/* Inputs : pv_api_ip input api structure */ +/* : pv_api_op output api structure */ +/* Outputs : */ +/* Returns : void */ +/* */ +/* Issues : none */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 22 10 2008 100356 Draft */ +/* */ +/*****************************************************************************/ +WORD32 ih264d_fill_num_mem_rec(void *pv_api_ip, void *pv_api_op) +{ + + ih264d_fill_mem_rec_ip_t *ps_mem_q_ip; + ih264d_fill_mem_rec_op_t *ps_mem_q_op; + WORD32 level; + UWORD32 num_reorder_frames; + UWORD32 num_ref_frames; + UWORD32 num_extra_disp_bufs; + UWORD32 u4_dpb_size_num_frames; + iv_mem_rec_t *memTab; + + UWORD32 chroma_format, u4_share_disp_buf; + UWORD32 u4_total_num_mbs; + UWORD32 luma_width, luma_width_in_mbs; + UWORD32 luma_height, luma_height_in_mbs; + UWORD32 max_dpb_size; + + ps_mem_q_ip = (ih264d_fill_mem_rec_ip_t *)pv_api_ip; + ps_mem_q_op = (ih264d_fill_mem_rec_op_t *)pv_api_op; + + if(ps_mem_q_ip->s_ivd_fill_mem_rec_ip_t.u4_size + > offsetof(ih264d_fill_mem_rec_ip_t, i4_level)) + { + level = ps_mem_q_ip->i4_level; + } + else + { + level = H264_LEVEL_3_1; + } + + if(ps_mem_q_ip->s_ivd_fill_mem_rec_ip_t.u4_size + > offsetof(ih264d_fill_mem_rec_ip_t, u4_num_reorder_frames)) + { + num_reorder_frames = ps_mem_q_ip->u4_num_reorder_frames; + } + else + { + num_reorder_frames = H264_MAX_REF_PICS; + } + + if(ps_mem_q_ip->s_ivd_fill_mem_rec_ip_t.u4_size + > offsetof(ih264d_fill_mem_rec_ip_t, u4_num_ref_frames)) + { + num_ref_frames = ps_mem_q_ip->u4_num_ref_frames; + } + else + { + num_ref_frames = H264_MAX_REF_PICS; + } + + if(ps_mem_q_ip->s_ivd_fill_mem_rec_ip_t.u4_size + > offsetof(ih264d_fill_mem_rec_ip_t, u4_num_extra_disp_buf)) + { + num_extra_disp_bufs = ps_mem_q_ip->u4_num_extra_disp_buf; + } + else + { + num_extra_disp_bufs = 0; + } + + if(ps_mem_q_ip->s_ivd_fill_mem_rec_ip_t.u4_size + > offsetof(ih264d_fill_mem_rec_ip_t, u4_share_disp_buf)) + { +#ifndef LOGO_EN + u4_share_disp_buf = ps_mem_q_ip->u4_share_disp_buf; +#else + u4_share_disp_buf = 0; +#endif + } + else + { + u4_share_disp_buf = 0; + } + + if(ps_mem_q_ip->s_ivd_fill_mem_rec_ip_t.u4_size + > offsetof(ih264d_fill_mem_rec_ip_t, e_output_format)) + { + chroma_format = ps_mem_q_ip->e_output_format; + } + else + { + chroma_format = -1; + } + + if((chroma_format != IV_YUV_420P) && (chroma_format != IV_YUV_420SP_UV) + && (chroma_format != IV_YUV_420SP_VU)) + { + u4_share_disp_buf = 0; + } + if(0 == u4_share_disp_buf) + num_extra_disp_bufs = 0; + + { + + luma_height = ps_mem_q_ip->s_ivd_fill_mem_rec_ip_t.u4_max_frm_ht; + luma_width = ps_mem_q_ip->s_ivd_fill_mem_rec_ip_t.u4_max_frm_wd; + + luma_height = ((luma_height + 15) >> 4) << 4; + luma_width = ((luma_width + 15) >> 4) << 4; + luma_width_in_mbs = luma_width >> 4; + luma_height_in_mbs = luma_height >> 4; + u4_total_num_mbs = (luma_height * luma_width) >> 8; + } + /* + * If level is lesser than 31 and the resolution required is higher, + * then make the level at least 31. + */ + if(u4_total_num_mbs > MAX_MBS_LEVEL_30 && level < H264_LEVEL_3_1) + { + level = H264_LEVEL_3_1; + } + + if((level < MIN_LEVEL_SUPPORTED) || (level > MAX_LEVEL_SUPPORTED)) + { + ps_mem_q_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= + ERROR_LEVEL_UNSUPPORTED; + return (IV_FAIL); + } + + if(num_ref_frames > H264_MAX_REF_PICS) + { + ps_mem_q_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= ERROR_NUM_REF; + num_ref_frames = H264_MAX_REF_PICS; + } + + if(num_reorder_frames > H264_MAX_REF_PICS) + { + ps_mem_q_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= ERROR_NUM_REF; + num_reorder_frames = H264_MAX_REF_PICS; + } + memTab = ps_mem_q_ip->s_ivd_fill_mem_rec_ip_t.pv_mem_rec_location; + + memTab[MEM_REC_IV_OBJ].u4_mem_size = sizeof(iv_obj_t); + memTab[MEM_REC_IV_OBJ].u4_mem_alignment = (128 * 8) / CHAR_BIT; + memTab[MEM_REC_IV_OBJ].e_mem_type = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM; + H264_DEC_DEBUG_PRINT("MEM_REC_IV_OBJ MEM Size = %d\n", + memTab[MEM_REC_IV_OBJ].u4_mem_size); + + memTab[MEM_REC_CODEC].u4_mem_alignment = (128 * 8) / CHAR_BIT; + memTab[MEM_REC_CODEC].e_mem_type = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM; + memTab[MEM_REC_CODEC].u4_mem_size = sizeof(dec_struct_t); + + { + UWORD32 mvinfo_size, mv_info_size_pad; + UWORD32 MVbank, MVbank_pad; + UWORD32 Ysize; + UWORD32 UVsize; + UWORD32 one_frm_size; + + UWORD32 extra_mem = 0; + + UWORD32 pad_len_h, pad_len_v; + + /* + * For low_delay, use num_buf as 2 - + * num_buf = (num_buf_ref) + 1; + * where num_buf_ref is 1. + */ + UWORD32 num_buf; + + { + UWORD32 num_bufs_app, num_bufs_level; + + num_bufs_app = num_ref_frames + num_reorder_frames + 1; + + if(num_bufs_app <= 1) + num_bufs_app = 2; + + num_bufs_level = ih264d_get_dpb_size_new(level, (luma_width >> 4), + (luma_height >> 4)); + + max_dpb_size = num_bufs_level; + + num_bufs_level = num_bufs_level * 2 + 1; + + num_buf = MIN(num_bufs_level, num_bufs_app); + + num_buf += num_extra_disp_bufs; + + } + + mvinfo_size = ((luma_width * (luma_height)) >> 4); + + mv_info_size_pad = ((luma_width * (PAD_MV_BANK_ROW)) >> 4); + + Ysize = ALIGN32((luma_width + (PAD_LEN_Y_H << 1))) + * (luma_height + (PAD_LEN_Y_V << 2)); + + + UVsize = Ysize >> 2; + if(u4_share_disp_buf == 1) + { + /* In case of buffers getting shared between application and library + there is no need of reference memtabs. Instead of setting the i4_size + to zero, it is reduced to a small i4_size to ensure that changes + in the code are minimal */ + + if((chroma_format == IV_YUV_420P) + || (chroma_format == IV_YUV_420SP_UV) + || (chroma_format == IV_YUV_420SP_VU)) + { + Ysize = 64; + } + if(chroma_format == IV_YUV_420SP_UV) + { + UVsize = 64; + } + } + + one_frm_size = (((Ysize + 127) >> 7) << 7) + + ((((UVsize << 1) + 127) >> 7) << 7); + + //Note that for ARM RVDS WS the sizeof(mv_pred_t) is 16 + + /*Add memory for colocated MB*/ + MVbank = sizeof(mv_pred_t) * mvinfo_size; + MVbank_pad = sizeof(mv_pred_t) * mv_info_size_pad; + + MVbank = (((MVbank + 127) >> 7) << 7); + + MVbank_pad = (((MVbank_pad + 127) >> 7) << 7); + + memTab[MEM_REC_MVBANK].u4_mem_alignment = (128 * 8) / CHAR_BIT; + memTab[MEM_REC_MVBANK].e_mem_type = + IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM; + memTab[MEM_REC_MVBANK].u4_mem_size = (MVbank + MVbank_pad) + * (MIN(max_dpb_size, num_ref_frames) + 1); + + + memTab[MEM_REC_REF_PIC].u4_mem_alignment = (128 * 8) / CHAR_BIT; + memTab[MEM_REC_REF_PIC].e_mem_type = + IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM; + memTab[MEM_REC_REF_PIC].u4_mem_size = one_frm_size * num_buf; + + } + + memTab[MEM_REC_DEBLK_MB_INFO].u4_mem_alignment = (128 * 8) / CHAR_BIT; + memTab[MEM_REC_DEBLK_MB_INFO].e_mem_type = + IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM; + memTab[MEM_REC_DEBLK_MB_INFO].u4_mem_size = (((((u4_total_num_mbs + + MAX_MBS_IN_ROW) * sizeof(deblk_mb_t)) + 127) >> 7) << 7); + + memTab[MEM_REC_NEIGHBOR_INFO].u4_mem_alignment = (128 * 8) / CHAR_BIT; + memTab[MEM_REC_NEIGHBOR_INFO].e_mem_type = + IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM; + memTab[MEM_REC_NEIGHBOR_INFO].u4_mem_size = sizeof(mb_neigbour_params_t) + * ((luma_width + 16) >> 4) * 2 * 2; + { + WORD32 size; + WORD32 num_entries; + + num_entries = MIN(MAX_FRAMES, num_ref_frames); + num_entries = 2 * ((2 * num_entries) + 1); + + size = num_entries * sizeof(void *); + size += PAD_MAP_IDX_POC * sizeof(void *); + size *= u4_total_num_mbs; + size += sizeof(dec_slice_struct_t) * u4_total_num_mbs; + memTab[MEM_REC_SLICE_HDR].u4_mem_alignment = (128 * 8) / CHAR_BIT; + memTab[MEM_REC_SLICE_HDR].e_mem_type = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM; + memTab[MEM_REC_SLICE_HDR].u4_mem_size = size; + } + { + + UWORD32 u4_num_entries; + + u4_num_entries = u4_total_num_mbs; + + memTab[MEM_REC_MB_INFO].u4_mem_alignment = (128 * 8) / CHAR_BIT; + memTab[MEM_REC_MB_INFO].e_mem_type = + IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM; + memTab[MEM_REC_MB_INFO].u4_mem_size = sizeof(dec_mb_info_t) + * u4_num_entries; + + memTab[MEM_REC_PRED_INFO].u4_mem_alignment = (128 * 8) / CHAR_BIT; + memTab[MEM_REC_PRED_INFO].e_mem_type = + IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM; + + memTab[MEM_REC_PRED_INFO].u4_mem_size = sizeof(pred_info_t) * 2*32; + + memTab[MEM_REC_COEFF_DATA].u4_mem_alignment = (128 * 8) / CHAR_BIT; + memTab[MEM_REC_COEFF_DATA].e_mem_type = + IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM; + memTab[MEM_REC_COEFF_DATA].u4_mem_size = MB_LUM_SIZE * sizeof(WORD16); + memTab[MEM_REC_COEFF_DATA].u4_mem_size += u4_num_entries + * (MAX(16 * sizeof(tu_sblk4x4_coeff_data_t),4 * sizeof(tu_blk8x8_coeff_data_t)) + + 8 * sizeof(tu_sblk4x4_coeff_data_t)); + memTab[MEM_REC_COEFF_DATA].u4_mem_size += u4_num_entries * 32; //32 bytes for each mb to store u1_prev_intra4x4_pred_mode and u1_rem_intra4x4_pred_mode data + + } + + memTab[MEM_REC_SPS].u4_mem_alignment = (128 * 8) / CHAR_BIT; + memTab[MEM_REC_SPS].e_mem_type = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM; + memTab[MEM_REC_SPS].u4_mem_size = ((sizeof(dec_seq_params_t)) + * MAX_NUM_SEQ_PARAMS); + + memTab[MEM_REC_PPS].u4_mem_alignment = (128 * 8) / CHAR_BIT; + memTab[MEM_REC_PPS].e_mem_type = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM; + memTab[MEM_REC_PPS].u4_mem_size = (sizeof(dec_pic_params_t)) + * MAX_NUM_PIC_PARAMS; + + { + UWORD32 u4_mem_size; + + u4_mem_size = 0; + u4_mem_size += (((sizeof(dec_err_status_t) + 127) >> 7) << 7); + u4_mem_size += sizeof(sei); + u4_mem_size += sizeof(dpb_commands_t); + u4_mem_size += sizeof(dec_bit_stream_t); + u4_mem_size += sizeof(dec_slice_params_t); + u4_mem_size += MAX(sizeof(dec_seq_params_t), sizeof(dec_pic_params_t)); + + memTab[MEM_REC_EXTRA_MEM].u4_mem_alignment = (128 * 8) / CHAR_BIT; + memTab[MEM_REC_EXTRA_MEM].e_mem_type = + IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM; + memTab[MEM_REC_EXTRA_MEM].u4_mem_size = u4_mem_size; + } + + { + + UWORD32 u4_mem_size; + + u4_mem_size = 0; + u4_mem_size += ((TOTAL_LIST_ENTRIES + PAD_MAP_IDX_POC) * sizeof(void *)); + u4_mem_size = ALIGN64(u4_mem_size); + u4_mem_size += (sizeof(bin_ctxt_model_t) * NUM_CABAC_CTXTS); + u4_mem_size = ALIGN64(u4_mem_size); + u4_mem_size += sizeof(ctxt_inc_mb_info_t); + u4_mem_size = ALIGN64(u4_mem_size); + u4_mem_size += sizeof(UWORD32) * (MAX_REF_BUFS * MAX_REF_BUFS); + u4_mem_size = ALIGN64(u4_mem_size); + + u4_mem_size += MAX_REF_BUF_SIZE; + u4_mem_size = ALIGN64(u4_mem_size); + u4_mem_size += ((sizeof(WORD16)) * PRED_BUFFER_WIDTH + * PRED_BUFFER_HEIGHT); + u4_mem_size = ALIGN64(u4_mem_size); + u4_mem_size += sizeof(UWORD8) * (MB_LUM_SIZE); + u4_mem_size = ALIGN64(u4_mem_size); + u4_mem_size += sizeof(parse_pmbarams_t) * luma_width_in_mbs; //Max recon mb group*/ + u4_mem_size = ALIGN64(u4_mem_size); + u4_mem_size += (sizeof(parse_part_params_t) * luma_width_in_mbs) << 4; //Max recon mb group*/ + u4_mem_size = ALIGN64(u4_mem_size); + + u4_mem_size += 2 * MAX_REF_BUFS * sizeof(struct pic_buffer_t); + u4_mem_size = ALIGN64(u4_mem_size); + u4_mem_size += 2 * MAX_REF_BUFS * sizeof(struct pic_buffer_t); + u4_mem_size = ALIGN64(u4_mem_size); + u4_mem_size += (sizeof(UWORD32) * 3 * (MAX_REF_BUFS * MAX_REF_BUFS)) << 3; + u4_mem_size = ALIGN64(u4_mem_size); + + u4_mem_size += sizeof(UWORD32) * 2 * 3 * (MAX_REF_BUFS * MAX_REF_BUFS); + u4_mem_size = ALIGN64(u4_mem_size); + + memTab[MEM_REC_INTERNAL_SCRATCH].u4_mem_alignment = + (128 * 8) / CHAR_BIT; + memTab[MEM_REC_INTERNAL_SCRATCH].e_mem_type = + IV_EXTERNAL_CACHEABLE_SCRATCH_MEM; + memTab[MEM_REC_INTERNAL_SCRATCH].u4_mem_size = u4_mem_size; + } + + { + + UWORD32 u4_mem_used; + UWORD32 u4_numRows = MB_SIZE << 1; + UWORD32 u4_blk_wd = ((luma_width_in_mbs << 4) >> 1) + 8; + + u4_mem_used = 0; + u4_mem_used += ((luma_width_in_mbs * sizeof(deblkmb_neighbour_t)) << 1); + u4_mem_used = ALIGN64(u4_mem_used); + u4_mem_used += (sizeof(neighbouradd_t) << 2); + u4_mem_used = ALIGN64(u4_mem_used); + u4_mem_used += ((sizeof(ctxt_inc_mb_info_t)) + * (((luma_width_in_mbs + 1) << 1) + 1)); + u4_mem_used = ALIGN64(u4_mem_used); + + u4_mem_used += (sizeof(mv_pred_t) * luma_width_in_mbs * 16); + u4_mem_used = ALIGN64(u4_mem_used); + u4_mem_used += (sizeof(mv_pred_t) * luma_width_in_mbs * 16); + u4_mem_used = ALIGN64(u4_mem_used); + u4_mem_used += (sizeof(mv_pred_t) * luma_width_in_mbs * 4 + * MV_SCRATCH_BUFS); + u4_mem_used = ALIGN64(u4_mem_used); + u4_mem_used += sizeof(UWORD8) * u4_numRows * u4_blk_wd; + u4_mem_used = ALIGN64(u4_mem_used); + u4_mem_used += sizeof(UWORD8) * u4_numRows * u4_blk_wd; + u4_mem_used = ALIGN64(u4_mem_used); + u4_numRows = BLK8x8SIZE << 1; + + u4_blk_wd = ((luma_width_in_mbs << 3) >> 1) + 8; + + u4_mem_used += sizeof(UWORD8) * u4_numRows * u4_blk_wd; + u4_mem_used = ALIGN64(u4_mem_used); + u4_mem_used += sizeof(UWORD8) * u4_numRows * u4_blk_wd; + u4_mem_used = ALIGN64(u4_mem_used); + u4_mem_used += sizeof(UWORD8) * u4_numRows * u4_blk_wd; + u4_mem_used = ALIGN64(u4_mem_used); + u4_mem_used += sizeof(UWORD8) * u4_numRows * u4_blk_wd; + u4_mem_used += 32; + u4_mem_used = ALIGN64(u4_mem_used); + u4_mem_used += sizeof(UWORD8) * (luma_width + 16) * 2; + u4_mem_used = ALIGN64(u4_mem_used); + u4_mem_used += sizeof(UWORD8) * ((luma_width >> 1) + 16) * 2 + * YUV420SP_FACTOR; + u4_mem_used = ALIGN64(u4_mem_used); + u4_mem_used += sizeof(UWORD8) * ((luma_width >> 1) + 16) * 2; + u4_mem_used = ALIGN64(u4_mem_used); + u4_mem_used += sizeof(mb_neigbour_params_t) * (luma_width_in_mbs + 1) + * luma_height_in_mbs; + u4_mem_used += luma_width; + u4_mem_used = ALIGN64(u4_mem_used); + u4_mem_used += luma_width >> 1; + u4_mem_used = ALIGN64(u4_mem_used); + u4_mem_used += luma_width >> 1; + u4_mem_used = ALIGN64(u4_mem_used); + + u4_mem_used += ((MB_SIZE + 4) << 1) * PAD_LEN_Y_H; + u4_mem_used = ALIGN64(u4_mem_used); + u4_mem_used += ((BLK8x8SIZE + 2) << 1) * PAD_LEN_UV_H; + u4_mem_used = ALIGN64(u4_mem_used); + u4_mem_used += ((BLK8x8SIZE + 2) << 1) * PAD_LEN_UV_H; + u4_mem_used = ALIGN64(u4_mem_used); + memTab[MEM_REC_INTERNAL_PERSIST].u4_mem_alignment = + (128 * 8) / CHAR_BIT; + memTab[MEM_REC_INTERNAL_PERSIST].e_mem_type = + IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM; + memTab[MEM_REC_INTERNAL_PERSIST].u4_mem_size = u4_mem_used; + } + + memTab[MEM_REC_BITSBUF].u4_mem_alignment = (128 * 8) / CHAR_BIT; + memTab[MEM_REC_BITSBUF].e_mem_type = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM; + memTab[MEM_REC_BITSBUF].u4_mem_size = MAX(256000, (luma_width * luma_height)); + + { + + UWORD32 u4_thread_struct_size = ithread_get_handle_size(); + + memTab[MEM_REC_THREAD_HANDLE].u4_mem_alignment = (128 * 8) / CHAR_BIT; + memTab[MEM_REC_THREAD_HANDLE].e_mem_type = + IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM; + memTab[MEM_REC_THREAD_HANDLE].u4_mem_size = u4_thread_struct_size * 2; + + } + + memTab[MEM_REC_PARSE_MAP].u4_mem_alignment = (128 * 8) / CHAR_BIT; + memTab[MEM_REC_PARSE_MAP].e_mem_type = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM; + memTab[MEM_REC_PARSE_MAP].u4_mem_size = u4_total_num_mbs; + + memTab[MEM_REC_PROC_MAP].u4_mem_alignment = (128 * 8) / CHAR_BIT; + memTab[MEM_REC_PROC_MAP].e_mem_type = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM; + memTab[MEM_REC_PROC_MAP].u4_mem_size = u4_total_num_mbs; + + memTab[MEM_REC_SLICE_NUM_MAP].u4_mem_alignment = (128 * 8) / CHAR_BIT; + memTab[MEM_REC_SLICE_NUM_MAP].e_mem_type = + IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM; + memTab[MEM_REC_SLICE_NUM_MAP].u4_mem_size = u4_total_num_mbs + * sizeof(UWORD16); + + memTab[MEM_REC_DPB_MGR].u4_mem_alignment = (128 * 8) / CHAR_BIT; + memTab[MEM_REC_DPB_MGR].e_mem_type = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM; + memTab[MEM_REC_DPB_MGR].u4_mem_size = sizeof(dpb_manager_t); + + memTab[MEM_REC_BACKUP].u4_mem_alignment = (128 * 8) / CHAR_BIT; + memTab[MEM_REC_BACKUP].e_mem_type = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM; + memTab[MEM_REC_BACKUP].u4_mem_size = sizeof(iv_mem_rec_t) * MEM_REC_CNT; + + { + + UWORD32 u4_mem_size; + + u4_mem_size = sizeof(disp_mgr_t); + u4_mem_size += sizeof(buf_mgr_t) + ithread_get_mutex_lock_size(); + u4_mem_size += sizeof(struct pic_buffer_t) * (H264_MAX_REF_PICS * 2); + + memTab[MEM_REC_PIC_BUF_MGR].u4_mem_alignment = (128 * 8) / CHAR_BIT; + memTab[MEM_REC_PIC_BUF_MGR].e_mem_type = + IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM; + memTab[MEM_REC_PIC_BUF_MGR].u4_mem_size = u4_mem_size; + } + + { + UWORD32 u4_mem_size; + + u4_mem_size = sizeof(buf_mgr_t) + ithread_get_mutex_lock_size(); + u4_mem_size += sizeof(col_mv_buf_t) * (H264_MAX_REF_PICS * 2); + u4_mem_size = ALIGN128(u4_mem_size); + u4_mem_size += ((luma_width * luma_height) >> 4) + * (MIN(max_dpb_size, num_ref_frames) + 1); + memTab[MEM_REC_MV_BUF_MGR].u4_mem_alignment = (128 * 8) / CHAR_BIT; + memTab[MEM_REC_MV_BUF_MGR].e_mem_type = + IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM; + memTab[MEM_REC_MV_BUF_MGR].u4_mem_size = u4_mem_size; + } + + memTab[MEM_REC_PRED_INFO_PKD].u4_mem_alignment = (128 * 8) / CHAR_BIT; + memTab[MEM_REC_PRED_INFO_PKD].e_mem_type = + IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM; + + { + UWORD32 u4_num_entries; + u4_num_entries = u4_total_num_mbs; + + if(1 == num_ref_frames) + u4_num_entries *= 16; + else + u4_num_entries *= 16 * 2; + + memTab[MEM_REC_PRED_INFO_PKD].u4_mem_size = sizeof(pred_info_pkd_t) + * u4_num_entries; + } + + ps_mem_q_op->s_ivd_fill_mem_rec_op_t.u4_num_mem_rec_filled = MEM_REC_CNT; + + + return IV_SUCCESS; +} +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_clr */ +/* */ +/* Description : returns memory records to app */ +/* */ +/* Inputs :iv_obj_t decoder handle */ +/* :pv_api_ip pointer to input structure */ +/* :pv_api_op pointer to output structure */ +/* Outputs : */ +/* Returns : void */ +/* */ +/* Issues : none */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 22 10 2008 100356 Draft */ +/* */ +/*****************************************************************************/ +WORD32 ih264d_clr(iv_obj_t *dec_hdl, void *pv_api_ip, void *pv_api_op) +{ + + dec_struct_t * ps_dec; + iv_retrieve_mem_rec_ip_t *dec_clr_ip; + iv_retrieve_mem_rec_op_t *dec_clr_op; + + dec_clr_ip = (iv_retrieve_mem_rec_ip_t *)pv_api_ip; + dec_clr_op = (iv_retrieve_mem_rec_op_t *)pv_api_op; + ps_dec = (dec_struct_t *)(dec_hdl->pv_codec_handle); + + if(ps_dec->init_done != 1) + { + //return a proper Error Code + return IV_FAIL; + } + + ih264_buf_mgr_free((buf_mgr_t *)ps_dec->pv_pic_buf_mgr); + ih264_buf_mgr_free((buf_mgr_t *)ps_dec->pv_mv_buf_mgr); + + memcpy(dec_clr_ip->pv_mem_rec_location, ps_dec->ps_mem_tab, + MEM_REC_CNT * (sizeof(iv_mem_rec_t))); + dec_clr_op->u4_num_mem_rec_filled = MEM_REC_CNT; + + H264_DEC_DEBUG_PRINT("The clear non-conceal num mem recs: %d\n", + dec_clr_op->u4_num_mem_rec_filled); + + return IV_SUCCESS; + +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_init */ +/* */ +/* Description : initializes decoder */ +/* */ +/* Inputs :iv_obj_t decoder handle */ +/* :pv_api_ip pointer to input structure */ +/* :pv_api_op pointer to output structure */ +/* Outputs : */ +/* Returns : void */ +/* */ +/* Issues : none */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 22 10 2008 100356 Draft */ +/* */ +/*****************************************************************************/ +WORD32 ih264d_init(iv_obj_t *dec_hdl, void *pv_api_ip, void *pv_api_op) +{ + ih264d_init_ip_t *ps_init_ip; + ih264d_init_op_t *ps_init_op; + ps_init_ip = (ih264d_init_ip_t *)pv_api_ip; + ps_init_op = (ih264d_init_op_t *)pv_api_op; + WORD32 init_status = IV_SUCCESS; + + init_status = ih264d_init_video_decoder(dec_hdl, ps_init_ip, ps_init_op); + + if(IV_SUCCESS != init_status) + { + return init_status; + } + + return init_status; +} +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_map_error */ +/* */ +/* Description : Maps error codes to IVD error groups */ +/* */ +/* Inputs : */ +/* Globals : <Does it use any global variables?> */ +/* Outputs : */ +/* Returns : void */ +/* */ +/* Issues : none */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 22 10 2008 100356 Draft */ +/* */ +/*****************************************************************************/ +UWORD32 ih264d_map_error(UWORD32 i4_err_status) +{ + UWORD32 temp = 0; + + switch(i4_err_status) + { + case ERROR_MEM_ALLOC_ISRAM_T: + case ERROR_MEM_ALLOC_SDRAM_T: + case ERROR_BUF_MGR: + case ERROR_MB_GROUP_ASSGN_T: + case ERROR_FRAME_LIMIT_OVER: + case ERROR_ACTUAL_RESOLUTION_GREATER_THAN_INIT: + case ERROR_PROFILE_NOT_SUPPORTED: + case ERROR_INIT_NOT_DONE: + temp = 1 << IVD_FATALERROR; + H264_DEC_DEBUG_PRINT("\nFatal Error\n"); + break; + + case ERROR_DBP_MANAGER_T: + case ERROR_GAPS_IN_FRM_NUM: + case ERROR_UNKNOWN_NAL: + case ERROR_INV_MB_SLC_GRP_T: + case ERROR_MULTIPLE_SLC_GRP_T: + case ERROR_UNKNOWN_LEVEL: + case ERROR_UNAVAIL_PICBUF_T: + case ERROR_UNAVAIL_MVBUF_T: + case ERROR_UNAVAIL_DISPBUF_T: + case ERROR_NUM_REF: + case ERROR_REFIDX_ORDER_T: + case ERROR_PIC0_NOT_FOUND_T: + case ERROR_MB_TYPE: + case ERROR_SUB_MB_TYPE: + case ERROR_CBP: + case ERROR_REF_IDX: + case ERROR_NUM_MV: + case ERROR_CHROMA_PRED_MODE: + case ERROR_INTRAPRED: + case ERROR_NEXT_MB_ADDRESS_T: + case ERROR_MB_ADDRESS_T: + case ERROR_PIC1_NOT_FOUND_T: + case ERROR_CAVLC_NUM_COEFF_T: + case ERROR_CAVLC_SCAN_POS_T: + case ERROR_PRED_WEIGHT_TABLE_T: + case ERROR_CORRUPTED_SLICE: + temp = 1 << IVD_CORRUPTEDDATA; + break; + + case ERROR_NOT_SUPP_RESOLUTION: + case ERROR_FEATURE_UNAVAIL: + case ERROR_ACTUAL_LEVEL_GREATER_THAN_INIT: + temp = 1 << IVD_UNSUPPORTEDINPUT; + break; + + case ERROR_INVALID_PIC_PARAM: + case ERROR_INVALID_SEQ_PARAM: + case ERROR_EGC_EXCEED_32_1_T: + case ERROR_EGC_EXCEED_32_2_T: + case ERROR_INV_RANGE_TEV_T: + case ERROR_INV_SLC_TYPE_T: + case ERROR_INV_POC_TYPE_T: + case ERROR_INV_RANGE_QP_T: + case ERROR_INV_SPS_PPS_T: + case ERROR_INV_SLICE_HDR_T: + temp = 1 << IVD_CORRUPTEDHEADER; + break; + + case ERROR_EOB_FLUSHBITS_T: + case ERROR_EOB_GETBITS_T: + case ERROR_EOB_GETBIT_T: + case ERROR_EOB_BYPASS_T: + case ERROR_EOB_DECISION_T: + case ERROR_EOB_TERMINATE_T: + case ERROR_EOB_READCOEFF4X4CAB_T: + temp = 1 << IVD_INSUFFICIENTDATA; + break; + case ERROR_DYNAMIC_RESOLUTION_NOT_SUPPORTED: + case ERROR_DISP_WIDTH_RESET_TO_PIC_WIDTH: + temp = 1 << IVD_UNSUPPORTEDPARAM | 1 << IVD_FATALERROR; + break; + + case ERROR_DANGLING_FIELD_IN_PIC: + temp = 1 << IVD_APPLIEDCONCEALMENT; + break; + + } + + return temp; + +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_video_decode */ +/* */ +/* Description : handle video decode API command */ +/* */ +/* Inputs :iv_obj_t decoder handle */ +/* :pv_api_ip pointer to input structure */ +/* :pv_api_op pointer to output structure */ +/* Outputs : */ +/* Returns : void */ +/* */ +/* Issues : none */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 22 10 2008 100356 Draft */ +/* */ +/*****************************************************************************/ + +WORD32 ih264d_video_decode(iv_obj_t *dec_hdl, void *pv_api_ip, void *pv_api_op) +{ + /* ! */ + + dec_struct_t * ps_dec = (dec_struct_t *)(dec_hdl->pv_codec_handle); + + WORD32 i4_err_status = 0; + UWORD8 *pu1_buf = NULL; + WORD32 buflen; + UWORD32 u4_max_ofst, u4_length_of_start_code = 0; + + UWORD32 bytes_consumed = 0; + UWORD32 cur_slice_is_nonref = 0; + UWORD32 u4_next_is_aud; + UWORD32 u4_first_start_code_found = 0; + WORD32 ret; + WORD32 header_data_left = 0,frame_data_left = 0; + UWORD8 *pu1_bitstrm_buf; + ithread_set_name((void*)"Parse_thread"); + + + ivd_video_decode_ip_t *ps_dec_ip; + ivd_video_decode_op_t *ps_dec_op; + ps_dec_ip = (ivd_video_decode_ip_t *)pv_api_ip; + ps_dec_op = (ivd_video_decode_op_t *)pv_api_op; + ps_dec->pv_dec_out = ps_dec_op; + ps_dec->process_called = 1; + ps_dec->u2_mb_skip_error = 0; + if(ps_dec->init_done != 1) + { + return IV_FAIL; + } + + /*Data memory barries instruction,so that bitstream write by the application is complete*/ + DATA_SYNC(); + + if(0 == ps_dec->u1_flushfrm) + { + if(ps_dec_ip->pv_stream_buffer == NULL) + { + ps_dec_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_dec_op->u4_error_code |= IVD_DEC_FRM_BS_BUF_NULL; + return IV_FAIL; + } + if(ps_dec_ip->u4_num_Bytes <= 0) + { + ps_dec_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_dec_op->u4_error_code |= IVD_DEC_NUMBYTES_INV; + return IV_FAIL; + + } + } + ps_dec->u1_pic_decode_done = 0; + + ps_dec_op->u4_num_bytes_consumed = 0; + + ps_dec->ps_out_buffer = NULL; + + if(ps_dec_ip->u4_size + >= offsetof(ivd_video_decode_ip_t, s_out_buffer)) + ps_dec->ps_out_buffer = &ps_dec_ip->s_out_buffer; + + if(ps_dec_op->u4_size + >= offsetof(ivd_video_decode_op_t, u4_disp_buf_id) + && ps_dec->ps_out_buffer != NULL) + ps_dec->u4_fmt_conv_in_process = 1; + else + ps_dec->u4_fmt_conv_in_process = 0; + + ps_dec->u4_fmt_conv_cur_row = 0; + + ps_dec->u4_output_present = 0; + ps_dec->s_disp_op.u4_error_code = 1; + ps_dec->u4_fmt_conv_num_rows = FMT_CONV_NUM_ROWS; + ps_dec->u4_stop_threads = 0; + if(ps_dec->u4_fmt_conv_in_process && 0 == ps_dec->u4_share_disp_buf + && ps_dec->i4_decode_header == 0) + { + UWORD32 i; + if(ps_dec->ps_out_buffer->u4_num_bufs == 0) + { + ps_dec_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_dec_op->u4_error_code |= IVD_DISP_FRM_ZERO_OP_BUFS; + return IV_FAIL; + } + + for(i = 0; i < ps_dec->ps_out_buffer->u4_num_bufs; i++) + { + if(ps_dec->ps_out_buffer->pu1_bufs[i] == NULL) + { + ps_dec_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_dec_op->u4_error_code |= IVD_DISP_FRM_OP_BUF_NULL; + return IV_FAIL; + } + + if(ps_dec->ps_out_buffer->u4_min_out_buf_size[i] == 0) + { + ps_dec_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_dec_op->u4_error_code |= + IVD_DISP_FRM_ZERO_OP_BUF_SIZE; + return IV_FAIL; + } + } + } + + if(ps_dec->u4_total_frames_decoded >= NUM_FRAMES_LIMIT) + { + ps_dec_op->u4_error_code = ERROR_FRAME_LIMIT_OVER; + return IV_FAIL; + } + + /* ! */ + ps_dec->u4_ts = ps_dec_ip->u4_ts; + + ps_dec_op->u4_error_code = 0; + ps_dec_op->e_pic_type = -1; + ps_dec_op->u4_output_present = 0; + ps_dec_op->u4_frame_decoded_flag = 0; + + ps_dec->i4_frametype = -1; + ps_dec->i4_content_type = -1; + /* + * For field pictures, set the bottom and top picture decoded u4_flag correctly. + */ + { + if((TOP_FIELD_ONLY | BOT_FIELD_ONLY) == ps_dec->u1_top_bottom_decoded) + { + ps_dec->u1_top_bottom_decoded = 0; + } + } + ps_dec->u4_slice_start_code_found = 0; + + /* In case the deocder is not in flush mode(in shared mode), + then decoder has to pick up a buffer to write current frame. + Check if a frame is available in such cases */ + + if(ps_dec->u1_init_dec_flag == 1 && ps_dec->u4_share_disp_buf == 1 + && ps_dec->u1_flushfrm == 0) + { + UWORD32 i; + + WORD32 disp_avail = 0, free_id; + + /* Check if at least one buffer is available with the codec */ + /* If not then return to application with error */ + for(i = 0; i < ps_dec->u1_pic_bufs; i++) + { + if(0 == ps_dec->u4_disp_buf_mapping[i] + || 1 == ps_dec->u4_disp_buf_to_be_freed[i]) + { + disp_avail = 1; + break; + } + + } + + if(0 == disp_avail) + { + /* If something is queued for display wait for that buffer to be returned */ + + ps_dec_op->u4_error_code = IVD_DEC_REF_BUF_NULL; + ps_dec_op->u4_error_code |= (1 << IVD_UNSUPPORTEDPARAM); + return (IV_FAIL); + } + + while(1) + { + pic_buffer_t *ps_pic_buf; + ps_pic_buf = (pic_buffer_t *)ih264_buf_mgr_get_next_free( + (buf_mgr_t *)ps_dec->pv_pic_buf_mgr, &free_id); + + if(ps_pic_buf == NULL) + { + UWORD32 i, display_queued = 0; + + /* check if any buffer was given for display which is not returned yet */ + for(i = 0; i < (MAX_DISP_BUFS_NEW); i++) + { + if(0 != ps_dec->u4_disp_buf_mapping[i]) + { + display_queued = 1; + break; + } + } + /* If some buffer is queued for display, then codec has to singal an error and wait + for that buffer to be returned. + If nothing is queued for display then codec has ownership of all display buffers + and it can reuse any of the existing buffers and continue decoding */ + + if(1 == display_queued) + { + /* If something is queued for display wait for that buffer to be returned */ + ps_dec_op->u4_error_code = IVD_DEC_REF_BUF_NULL; + ps_dec_op->u4_error_code |= (1 + << IVD_UNSUPPORTEDPARAM); + return (IV_FAIL); + } + } + else + { + /* If the buffer is with display, then mark it as in use and then look for a buffer again */ + if(1 == ps_dec->u4_disp_buf_mapping[free_id]) + { + ih264_buf_mgr_set_status( + (buf_mgr_t *)ps_dec->pv_pic_buf_mgr, + free_id, + BUF_MGR_IO); + } + else + { + /** + * Found a free buffer for present call. Release it now. + * Will be again obtained later. + */ + ih264_buf_mgr_release((buf_mgr_t *)ps_dec->pv_pic_buf_mgr, + free_id, + BUF_MGR_IO); + break; + } + } + } + + } + + if(ps_dec->u4_fmt_conv_in_process && ps_dec->u1_flushfrm && + ps_dec->u1_init_dec_flag) + { + + ih264d_get_next_display_field(ps_dec, ps_dec->ps_out_buffer, + &(ps_dec->s_disp_op)); + if(0 == ps_dec->s_disp_op.u4_error_code) + { + ps_dec->u4_fmt_conv_cur_row = 0; + ps_dec->u4_fmt_conv_num_rows = ps_dec->s_disp_frame_info.u4_y_ht; + ih264d_format_convert(ps_dec, &(ps_dec->s_disp_op), + ps_dec->u4_fmt_conv_cur_row, + ps_dec->u4_fmt_conv_num_rows); + ps_dec->u4_fmt_conv_cur_row += ps_dec->u4_fmt_conv_num_rows; + ps_dec->u4_output_present = 1; + + } + ih264d_release_display_field(ps_dec, &(ps_dec->s_disp_op)); + + ps_dec_op->u4_pic_wd = (UWORD32)ps_dec->u2_disp_width; + ps_dec_op->u4_pic_ht = (UWORD32)ps_dec->u2_disp_height; + + ps_dec_op->u4_new_seq = 0; + + ps_dec_op->u4_output_present = ps_dec->u4_output_present; + ps_dec_op->u4_progressive_frame_flag = + ps_dec->s_disp_op.u4_progressive_frame_flag; + ps_dec_op->e_output_format = + ps_dec->s_disp_op.e_output_format; + ps_dec_op->s_disp_frm_buf = ps_dec->s_disp_op.s_disp_frm_buf; + ps_dec_op->e4_fld_type = ps_dec->s_disp_op.e4_fld_type; + ps_dec_op->u4_ts = ps_dec->s_disp_op.u4_ts; + ps_dec_op->u4_disp_buf_id = ps_dec->s_disp_op.u4_disp_buf_id; + + /*In the case of flush ,since no frame is decoded set pic type as invalid*/ + ps_dec_op->u4_is_ref_flag = -1; + ps_dec_op->e_pic_type = IV_NA_FRAME; + ps_dec_op->u4_frame_decoded_flag = 0; + + if(0 == ps_dec->s_disp_op.u4_error_code) + { + return (IV_SUCCESS); + } + else + return (IV_FAIL); + + } + if(ps_dec->u1_res_changed == 1) + { + /*if resolution has changed and all buffers have been flushed, reset decoder*/ + ih264d_init_decoder(ps_dec); + } + + ps_dec->u4_prev_nal_skipped = 0; + + ps_dec->u4_start_frame_decode = 0; + ps_dec->u2_cur_mb_addr = 0; + ps_dec->cur_dec_mb_num = 0; + ps_dec->u4_first_slice_in_pic = 1; + + ps_dec->u4_dec_thread_created = 0; + ps_dec->u4_bs_deblk_thread_created = 0; + ps_dec->u4_cur_bs_mb_num = 0; + + ps_dec->as_fmt_conv_part[0].u4_flag = 1; + ps_dec->as_fmt_conv_part[1].u4_flag = 1; + ps_dec->as_fmt_conv_part[1].u4_start_y = 0; + ps_dec->as_fmt_conv_part[1].u4_num_rows_y = 0; + + DEBUG_THREADS_PRINTF(" Starting process call\n"); + + ps_dec->u4_pic_buf_got = 0; + ps_dec->u2_skip_deblock = 0; + + do + { + + pu1_buf = (UWORD8*)ps_dec_ip->pv_stream_buffer + + ps_dec_op->u4_num_bytes_consumed; + + u4_max_ofst = ps_dec_ip->u4_num_Bytes + - ps_dec_op->u4_num_bytes_consumed; + pu1_bitstrm_buf = ps_dec->ps_mem_tab[MEM_REC_BITSBUF].pv_base; + + u4_next_is_aud = 0; + + buflen = ih264d_find_start_code(pu1_buf, 0, u4_max_ofst, + &u4_length_of_start_code, + &u4_next_is_aud); + + if(buflen == -1) + buflen = 0; + + bytes_consumed = buflen + u4_length_of_start_code; + ps_dec_op->u4_num_bytes_consumed += bytes_consumed; + + if(buflen >= MAX_NAL_UNIT_SIZE) + { + + ih264d_fill_output_struct_from_context(ps_dec, ps_dec_op); + H264_DEC_DEBUG_PRINT( + "\nNal Size exceeded %d, Processing Stopped..\n", + MAX_NAL_UNIT_SIZE); + ps_dec->i4_error_code = 1 << IVD_CORRUPTEDDATA; + + ps_dec_op->e_pic_type = -1; + /*signal the decode thread*/ + ps_dec->as_fmt_conv_part[1].u4_flag = 0; + ih264d_signal_decode_thread(ps_dec); + /*signal end of frame decode for curren frame*/ + + if(ps_dec->u4_pic_buf_got == 0) + { + if(ps_dec->i4_header_decoded == 3) + { + ps_dec->u2_total_mbs_coded = + ps_dec->ps_cur_sps->u2_max_mb_addr + 1; + ps_dec->ps_cur_slice->u1_end_of_frame_signal = 1; + } + + /* close deblock thread if it is not closed yet*/ + if(ps_dec->u4_num_cores == 3) + { + ih264d_signal_bs_deblk_thread(ps_dec); + } + return IV_FAIL; + } + else + { + ps_dec->u1_pic_decode_done = 1; + continue; + } + } + + { + UWORD8 u1_firstbyte, u1_nal_ref_idc; + + if(ps_dec->i4_app_skip_mode == IVD_SKIP_B) + { + u1_firstbyte = *(pu1_buf + u4_length_of_start_code); + u1_nal_ref_idc = (UWORD8)(NAL_REF_IDC(u1_firstbyte)); + if(u1_nal_ref_idc == 0) + { + /*skip non reference frames*/ + cur_slice_is_nonref = 1; + continue; + } + else + { + if(1 == cur_slice_is_nonref) + { + /*We have encountered a referenced frame,return to app*/ + ps_dec_op->u4_num_bytes_consumed -= + bytes_consumed; + ps_dec_op->e_pic_type = IV_B_FRAME; + ps_dec_op->u4_error_code = + IVD_DEC_FRM_SKIPPED; + ps_dec_op->u4_error_code |= (1 + << IVD_UNSUPPORTEDPARAM); + ps_dec_op->u4_frame_decoded_flag = 0; + ps_dec_op->u4_size = + sizeof(ivd_video_decode_op_t); + /*signal the decode thread*/ + ps_dec->as_fmt_conv_part[1].u4_flag = 0; + ih264d_signal_decode_thread(ps_dec); + /* close deblock thread if it is not closed yet*/ + if(ps_dec->u4_num_cores == 3) + { + ih264d_signal_bs_deblk_thread(ps_dec); + } + + return (IV_FAIL); + } + } + + } + + } + + + if(buflen) + { + memcpy(pu1_bitstrm_buf, pu1_buf + u4_length_of_start_code, + buflen); + u4_first_start_code_found = 1; + + } + else + { + /*start code not found*/ + + if(u4_first_start_code_found == 0) + { + /*no start codes found in current process call*/ + + ps_dec->i4_error_code = ERROR_START_CODE_NOT_FOUND; + ps_dec_op->u4_error_code |= 1 << IVD_INSUFFICIENTDATA; + + if(ps_dec->u4_pic_buf_got == 0) + { + + ih264d_fill_output_struct_from_context(ps_dec, + ps_dec_op); + + ps_dec_op->u4_error_code = ps_dec->i4_error_code; + ps_dec_op->u4_frame_decoded_flag = 0; + + return (IV_FAIL); + } + else + { + ps_dec->u1_pic_decode_done = 1; + continue; + } + } + else + { + /* a start code has already been found earlier in the same process call*/ + continue; + } + + } + + ps_dec->u4_return_to_app = 0; + ret = ih264d_parse_nal_unit(dec_hdl, ps_dec_op, + pu1_bitstrm_buf, buflen); + if(ret != OK) + { + UWORD32 error = ih264d_map_error(ret); + ps_dec_op->u4_error_code = error | ret; + + if((ret == IVD_RES_CHANGED)||(ret == IVD_STREAM_WIDTH_HEIGHT_NOT_SUPPORTED)) + { + /*dont consume the SPS*/ + ps_dec_op->u4_num_bytes_consumed -= bytes_consumed; + } + return IV_FAIL; + } + + if(ps_dec->u4_return_to_app) + { + /*We have encountered a referenced frame,return to app*/ + ps_dec_op->u4_num_bytes_consumed -= bytes_consumed; + ps_dec_op->u4_error_code = IVD_DEC_FRM_SKIPPED; + ps_dec_op->u4_error_code |= (1 << IVD_UNSUPPORTEDPARAM); + ps_dec_op->u4_frame_decoded_flag = 0; + ps_dec_op->u4_size = sizeof(ivd_video_decode_op_t); + /*signal the decode thread*/ + ps_dec->as_fmt_conv_part[1].u4_flag = 0; + ih264d_signal_decode_thread(ps_dec); + /* close deblock thread if it is not closed yet*/ + if(ps_dec->u4_num_cores == 3) + { + ih264d_signal_bs_deblk_thread(ps_dec); + } + return (IV_FAIL); + + } + + + + header_data_left = ((ps_dec->i4_decode_header == 1) + && (ps_dec->i4_header_decoded != 3) + && (ps_dec_op->u4_num_bytes_consumed + < ps_dec_ip->u4_num_Bytes)); + frame_data_left = (((ps_dec->i4_decode_header == 0) + && ((ps_dec->u1_pic_decode_done == 0) + || (u4_next_is_aud == 1))) + && (ps_dec_op->u4_num_bytes_consumed + < ps_dec_ip->u4_num_Bytes)); + } + while(( header_data_left == 1)||(frame_data_left == 1)); + + if((ps_dec->u2_total_mbs_coded + != (ps_dec->u2_frm_wd_in_mbs * ps_dec->u2_frm_ht_in_mbs)) + && (ps_dec_op->u4_num_bytes_consumed + >= ps_dec_ip->u4_num_Bytes)) + { + if(ps_dec->ps_parse_cur_slice != NULL) + { + ps_dec->ps_parse_cur_slice->u2_error_flag = 1; + + ps_dec->u2_skip_deblock = 1; + } + } + if(ps_dec->u1_separate_parse) + { + + /* If Format conversion is not complete, + complete it here */ + if(ps_dec->u4_num_cores == 2) + { + ps_dec->u4_fmt_conv_num_rows = ps_dec->s_disp_frame_info.u4_y_ht + - ps_dec->u4_fmt_conv_cur_row; + if(ps_dec->u4_output_present && ps_dec->u4_fmt_conv_in_process + && ps_dec->u4_fmt_conv_num_rows) + { + ps_dec->u4_fmt_conv_num_rows = MIN( + ps_dec->u4_fmt_conv_num_rows, + (ps_dec->s_disp_frame_info.u4_y_ht + - ps_dec->u4_fmt_conv_cur_row)); + if(ps_dec->u4_fmt_conv_num_rows > 64) + { + UWORD32 num_rows_first_part = (ps_dec->u4_fmt_conv_num_rows + / 2); + + /* Align it to even number */ + num_rows_first_part = (num_rows_first_part >> 1) << 1; + + /* Schedule last half of the remaining rows to be processed in second thread */ + ps_dec->as_fmt_conv_part[1].u4_start_y = + ps_dec->u4_fmt_conv_cur_row + + num_rows_first_part; + ps_dec->as_fmt_conv_part[1].u4_num_rows_y = + (ps_dec->u4_fmt_conv_num_rows + - num_rows_first_part); + ps_dec->u4_fmt_conv_num_rows = num_rows_first_part; + DATA_SYNC(); + ps_dec->as_fmt_conv_part[1].u4_flag = 2; + + } + else + { + ps_dec->as_fmt_conv_part[1].u4_flag = 0; + } + + ih264d_format_convert(ps_dec, &(ps_dec->s_disp_op), + ps_dec->u4_fmt_conv_cur_row, + ps_dec->u4_fmt_conv_num_rows); + ps_dec->u4_fmt_conv_cur_row += ps_dec->u4_fmt_conv_num_rows; + + } + else + { + ps_dec->as_fmt_conv_part[1].u4_flag = 0; + } + } + else + { + ps_dec->as_fmt_conv_part[1].u4_flag = 0; + } + + /*signal the decode thread*/ + ih264d_signal_decode_thread(ps_dec); + /* close deblock thread if it is not closed yet*/ + if(ps_dec->u4_num_cores == 3) + { + ih264d_signal_bs_deblk_thread(ps_dec); + } + } + /* Decode thread would have completed format conversion for ps_dec->as_fmt_conv_part[1].u4_num_rows_y rows */ + + ps_dec->u4_fmt_conv_cur_row += ps_dec->as_fmt_conv_part[1].u4_num_rows_y; + + if((ps_dec_op->u4_error_code & 0xff) + != ERROR_DYNAMIC_RESOLUTION_NOT_SUPPORTED) + { + ps_dec_op->u4_pic_wd = (UWORD32)ps_dec->u2_disp_width; + ps_dec_op->u4_pic_ht = (UWORD32)ps_dec->u2_disp_height; + } + +//Report if header (sps and pps) has not been decoded yet + if(ps_dec->i4_header_decoded != 3) + { + ps_dec_op->u4_error_code |= (1 << IVD_INSUFFICIENTDATA); + + } + + if(ps_dec->i4_decode_header == 1 && ps_dec->i4_header_decoded != 3) + { + ps_dec_op->u4_error_code |= (1 << IVD_INSUFFICIENTDATA); + + } + if(ps_dec->u4_prev_nal_skipped) + { + /*We have encountered a referenced frame,return to app*/ + ps_dec_op->u4_error_code = IVD_DEC_FRM_SKIPPED; + ps_dec_op->u4_error_code |= (1 << IVD_UNSUPPORTEDPARAM); + ps_dec_op->u4_frame_decoded_flag = 0; + ps_dec_op->u4_size = sizeof(ivd_video_decode_op_t); + /* close deblock thread if it is not closed yet*/ + if(ps_dec->u4_num_cores == 3) + { + ih264d_signal_bs_deblk_thread(ps_dec); + } + return (IV_FAIL); + + } + + if((ps_dec->u4_slice_start_code_found == 1) + && (ERROR_DANGLING_FIELD_IN_PIC != i4_err_status)) + { + /* + * For field pictures, set the bottom and top picture decoded u4_flag correctly. + */ + + if(ps_dec->u4_pic_buf_got == 0) + { + ih264d_fill_output_struct_from_context(ps_dec, ps_dec_op); + + ps_dec_op->u4_error_code = ps_dec->i4_error_code; + ps_dec_op->u4_frame_decoded_flag = 0; + /* close deblock thread if it is not closed yet*/ + if(ps_dec->u4_num_cores == 3) + { + ih264d_signal_bs_deblk_thread(ps_dec); + } + return (IV_FAIL); + } + + if(ps_dec->ps_cur_slice->u1_field_pic_flag) + { + if(1 == ps_dec->ps_cur_slice->u1_bottom_field_flag) + { + ps_dec->u1_top_bottom_decoded |= BOT_FIELD_ONLY; + } + else + { + ps_dec->u1_top_bottom_decoded |= TOP_FIELD_ONLY; + } + } + + /* Calling Function to deblock Picture and Display */ + ret = ih264d_deblock_display(ps_dec); + if(ret != 0) + return IV_FAIL; + + /*set to complete ,as we dont support partial frame decode*/ + if(ps_dec->i4_header_decoded == 3) + { + ps_dec->u2_total_mbs_coded = ps_dec->ps_cur_sps->u2_max_mb_addr + 1; + } + + /*Update the i4_frametype at the end of picture*/ + if(ps_dec->ps_cur_slice->u1_nal_unit_type == IDR_SLICE_NAL) + { + ps_dec->i4_frametype = IV_IDR_FRAME; + } + else if(ps_dec->i4_pic_type == B_SLICE) + { + ps_dec->i4_frametype = IV_B_FRAME; + } + else if(ps_dec->i4_pic_type == P_SLICE) + { + ps_dec->i4_frametype = IV_P_FRAME; + } + else if(ps_dec->i4_pic_type == I_SLICE) + { + ps_dec->i4_frametype = IV_I_FRAME; + } + else + { + H264_DEC_DEBUG_PRINT("Shouldn't come here\n"); + } + + //Update the content type + ps_dec->i4_content_type = ps_dec->ps_cur_slice->u1_field_pic_flag; + + ps_dec->u4_total_frames_decoded = ps_dec->u4_total_frames_decoded + 2; + ps_dec->u4_total_frames_decoded = ps_dec->u4_total_frames_decoded + - ps_dec->ps_cur_slice->u1_field_pic_flag; + + } + + /* close deblock thread if it is not closed yet*/ + if(ps_dec->u4_num_cores == 3) + { + ih264d_signal_bs_deblk_thread(ps_dec); + } + + if(ps_dec->u4_fmt_conv_in_process) + { + /* In case the decoder is configured to run in low delay mode, + * then get display buffer and then format convert. + * Note in this mode, format conversion does not run paralelly in a thread and adds to the codec cycles + */ + + if((0 == ps_dec->u4_num_reorder_frames_at_init) + && ps_dec->u1_init_dec_flag) + { + + ih264d_get_next_display_field(ps_dec, ps_dec->ps_out_buffer, + &(ps_dec->s_disp_op)); + if(0 == ps_dec->s_disp_op.u4_error_code) + { + ps_dec->u4_fmt_conv_cur_row = 0; + ps_dec->u4_output_present = 1; + } + } + + ih264d_fill_output_struct_from_context(ps_dec, ps_dec_op); + + /* If Format conversion is not complete, + complete it here */ + ps_dec->u4_fmt_conv_num_rows = ps_dec->s_disp_frame_info.u4_y_ht + - ps_dec->u4_fmt_conv_cur_row; + DEBUG_PERF_PRINTF("ps_dec->u4_fmt_conv_num_rows = %d\n",ps_dec->u4_fmt_conv_num_rows); + if(ps_dec->u4_output_present && ps_dec->u4_fmt_conv_num_rows) + { + ps_dec->u4_fmt_conv_num_rows = MIN( + ps_dec->u4_fmt_conv_num_rows, + (ps_dec->s_disp_frame_info.u4_y_ht + - ps_dec->u4_fmt_conv_cur_row)); + ih264d_format_convert(ps_dec, &(ps_dec->s_disp_op), + ps_dec->u4_fmt_conv_cur_row, + ps_dec->u4_fmt_conv_num_rows); + ps_dec->u4_fmt_conv_cur_row += ps_dec->u4_fmt_conv_num_rows; + } + + ih264d_release_display_field(ps_dec, &(ps_dec->s_disp_op)); + } + + if(ps_dec->i4_decode_header == 1 && (ps_dec->i4_header_decoded & 1) == 1) + { + ps_dec_op->u4_progressive_frame_flag = 1; + if((NULL != ps_dec->ps_sps) && (1 == (ps_dec->ps_sps->u1_is_valid))) + { + if((0 == ps_dec->ps_sps->u1_frame_mbs_only_flag) + && (0 == ps_dec->ps_sps->u1_mb_aff_flag)) + ps_dec_op->u4_progressive_frame_flag = 0; + + } + } + + /*Data memory barrier instruction,so that yuv write by the library is complete*/ + DATA_SYNC(); + + H264_DEC_DEBUG_PRINT("The num bytes consumed: %d\n", + ps_dec_op->u4_num_bytes_consumed); + return IV_SUCCESS; +} + +WORD32 ih264d_get_version(iv_obj_t *dec_hdl, void *pv_api_ip, void *pv_api_op) +{ + char version_string[MAXVERSION_STRLEN + 1]; + + ivd_ctl_getversioninfo_ip_t *ps_ip; + ivd_ctl_getversioninfo_op_t *ps_op; + + ps_ip = (ivd_ctl_getversioninfo_ip_t *)pv_api_ip; + ps_op = (ivd_ctl_getversioninfo_op_t *)pv_api_op; + UNUSED(dec_hdl); + ps_op->u4_error_code = IV_SUCCESS; + + VERSION(version_string, CODEC_NAME, CODEC_RELEASE_TYPE, CODEC_RELEASE_VER, + CODEC_VENDOR); + + if((WORD32)ps_ip->u4_version_buffer_size <= 0) + { + ps_op->u4_error_code = IH264D_VERS_BUF_INSUFFICIENT; + return (IV_FAIL); + } + + if(ps_ip->u4_version_buffer_size >= (strnlen(version_string, MAXVERSION_STRLEN) + 1)) //(WORD32)sizeof(sizeof(version_string))) + { + strncpy(ps_ip->pv_version_buffer, version_string, MAXVERSION_STRLEN); + ps_op->u4_error_code = IV_SUCCESS; + } + else + { + ps_op->u4_error_code = IH264D_VERS_BUF_INSUFFICIENT; + return IV_FAIL; + } + return (IV_SUCCESS); +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_get_display_frame */ +/* */ +/* Description : */ +/* Inputs :iv_obj_t decoder handle */ +/* :pv_api_ip pointer to input structure */ +/* :pv_api_op pointer to output structure */ +/* Outputs : */ +/* Returns : void */ +/* */ +/* Issues : none */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 22 10 2008 100356 Draft */ +/* */ +/*****************************************************************************/ +WORD32 ih264d_get_display_frame(iv_obj_t *dec_hdl, + void *pv_api_ip, + void *pv_api_op) +{ + + ivd_get_display_frame_ip_t *dec_disp_ip; + ivd_get_display_frame_op_t *dec_disp_op; + + WORD32 u4_api_ret; + dec_struct_t * ps_dec = (dec_struct_t *)(dec_hdl->pv_codec_handle); + + dec_disp_ip = (ivd_get_display_frame_ip_t *)pv_api_ip; + dec_disp_op = (ivd_get_display_frame_op_t *)pv_api_op; + + if(ps_dec->u4_fmt_conv_in_process) + { + return IV_FAIL; + } + + { + + if(ps_dec->process_called != 1) + { + //Return Proper Error Code + } + + if(0 == ps_dec->u4_share_disp_buf) + { + UWORD32 i; + if(dec_disp_ip->s_out_buffer.u4_num_bufs == 0) + { + dec_disp_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + dec_disp_op->u4_error_code |= IVD_DISP_FRM_ZERO_OP_BUFS; + return IV_FAIL; + } + + for(i = 0; i < dec_disp_ip->s_out_buffer.u4_num_bufs; i++) + { + if(dec_disp_ip->s_out_buffer.pu1_bufs[i] == NULL) + { + dec_disp_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + dec_disp_op->u4_error_code |= IVD_DISP_FRM_OP_BUF_NULL; + return IV_FAIL; + } + + if(dec_disp_ip->s_out_buffer.u4_min_out_buf_size[i] == 0) + { + dec_disp_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + dec_disp_op->u4_error_code |= IVD_DISP_FRM_ZERO_OP_BUF_SIZE; + return IV_FAIL; + } + } + } + + u4_api_ret = ih264d_get_next_display_field(ps_dec, + &(dec_disp_ip->s_out_buffer), + &(ps_dec->s_disp_op)); + *dec_disp_op = (ps_dec->s_disp_op); + if(0 == dec_disp_op->u4_error_code) + { + ps_dec->u4_fmt_conv_cur_row = 0; + ps_dec->u4_fmt_conv_num_rows = ps_dec->s_disp_frame_info.u4_y_ht; + ih264d_format_convert(ps_dec, &(ps_dec->s_disp_op), + ps_dec->u4_fmt_conv_cur_row, + ps_dec->u4_fmt_conv_num_rows); + ps_dec->u4_fmt_conv_cur_row += ps_dec->u4_fmt_conv_num_rows; + + } + ih264d_release_display_field(ps_dec, dec_disp_op); + return u4_api_ret; + } + +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_set_display_frame */ +/* */ +/* Description : */ +/* */ +/* Inputs :iv_obj_t decoder handle */ +/* :pv_api_ip pointer to input structure */ +/* :pv_api_op pointer to output structure */ +/* Outputs : */ +/* Returns : void */ +/* */ +/* Issues : none */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 22 10 2008 100356 Draft */ +/* */ +/*****************************************************************************/ +WORD32 ih264d_set_display_frame(iv_obj_t *dec_hdl, + void *pv_api_ip, + void *pv_api_op) +{ + + ivd_set_display_frame_ip_t *dec_disp_ip; + ivd_set_display_frame_op_t *dec_disp_op; + + UWORD32 i, num_mvbank_req; + dec_struct_t * ps_dec = (dec_struct_t *)(dec_hdl->pv_codec_handle); + + dec_disp_ip = (ivd_set_display_frame_ip_t *)pv_api_ip; + dec_disp_op = (ivd_set_display_frame_op_t *)pv_api_op; + dec_disp_op->u4_error_code = 0; + if((NULL != ps_dec->ps_sps) && (1 == (ps_dec->ps_sps->u1_is_valid))) + { + UWORD32 level, width_mbs, height_mbs; + + level = ps_dec->u4_level_at_init; + width_mbs = ps_dec->u2_frm_wd_in_mbs; + height_mbs = ps_dec->u2_frm_ht_in_mbs; + + if((ps_dec->ps_sps->u1_vui_parameters_present_flag == 1) + && (ps_dec->ps_sps->s_vui.u4_num_reorder_frames != 64)) + { + num_mvbank_req = ps_dec->ps_sps->s_vui.u4_num_reorder_frames + 2; + } + else + { + /*if VUI is not present assume maximum possible refrence frames for the level, + * as max reorder frames*/ + num_mvbank_req = ih264d_get_dpb_size_new(level, width_mbs, + height_mbs); + } + + num_mvbank_req += ps_dec->ps_sps->u1_num_ref_frames + 1; + } + else + { + UWORD32 num_bufs_app, num_bufs_level; + UWORD32 num_ref_frames, num_reorder_frames, luma_width; + UWORD32 luma_height, level; + + num_ref_frames = ps_dec->u4_num_ref_frames_at_init; + num_reorder_frames = ps_dec->u4_num_reorder_frames_at_init; + level = ps_dec->u4_level_at_init; + luma_width = ps_dec->u4_width_at_init; + luma_height = ps_dec->u4_height_at_init; + + num_bufs_app = num_ref_frames + num_reorder_frames + 1; + + if(num_bufs_app <= 1) + num_bufs_app = 2; + + num_bufs_level = ih264d_get_dpb_size_new(level, (luma_width >> 4), + (luma_height >> 4)); + + num_bufs_level = num_bufs_level * 2 + 1; + + num_mvbank_req = MIN(num_bufs_level, num_bufs_app); + + num_mvbank_req += ps_dec->u4_num_extra_disp_bufs_at_init; + + } + + ps_dec->u4_num_disp_bufs = 0; + if(ps_dec->u4_share_disp_buf) + { + UWORD32 u4_num_bufs = dec_disp_ip->num_disp_bufs; + if(u4_num_bufs > MAX_DISP_BUFS_NEW) + u4_num_bufs = MAX_DISP_BUFS_NEW; + + u4_num_bufs = MIN(u4_num_bufs, MAX_DISP_BUFS_NEW); + u4_num_bufs = MIN(u4_num_bufs, num_mvbank_req); + + ps_dec->u4_num_disp_bufs = u4_num_bufs; + for(i = 0; i < u4_num_bufs; i++) + { + ps_dec->disp_bufs[i].u4_num_bufs = + dec_disp_ip->s_disp_buffer[i].u4_num_bufs; + + ps_dec->disp_bufs[i].buf[0] = + dec_disp_ip->s_disp_buffer[i].pu1_bufs[0]; + ps_dec->disp_bufs[i].buf[1] = + dec_disp_ip->s_disp_buffer[i].pu1_bufs[1]; + ps_dec->disp_bufs[i].buf[2] = + dec_disp_ip->s_disp_buffer[i].pu1_bufs[2]; + + ps_dec->disp_bufs[i].u4_bufsize[0] = + dec_disp_ip->s_disp_buffer[i].u4_min_out_buf_size[0]; + ps_dec->disp_bufs[i].u4_bufsize[1] = + dec_disp_ip->s_disp_buffer[i].u4_min_out_buf_size[1]; + ps_dec->disp_bufs[i].u4_bufsize[2] = + dec_disp_ip->s_disp_buffer[i].u4_min_out_buf_size[2]; + + } + } + return IV_SUCCESS; + +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_set_flush_mode */ +/* */ +/* Description : */ +/* */ +/* Inputs :iv_obj_t decoder handle */ +/* :pv_api_ip pointer to input structure */ +/* :pv_api_op pointer to output structure */ +/* Globals : <Does it use any global variables?> */ +/* Outputs : */ +/* Returns : void */ +/* */ +/* Issues : none */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 22 10 2008 100356 Draft */ +/* */ +/*****************************************************************************/ +WORD32 ih264d_set_flush_mode(iv_obj_t *dec_hdl, void *pv_api_ip, void *pv_api_op) +{ + + dec_struct_t * ps_dec; + ivd_ctl_flush_op_t *ps_ctl_op = (ivd_ctl_flush_op_t*)pv_api_op; + ps_ctl_op->u4_error_code = 0; + + ps_dec = (dec_struct_t *)(dec_hdl->pv_codec_handle); + UNUSED(pv_api_ip); + /* ! */ + /* Signal flush frame control call */ + ps_dec->u1_flushfrm = 1; + + ih264d_release_pics_in_dpb((void *)ps_dec, + ps_dec->u1_pic_bufs); + ih264d_release_display_bufs(ps_dec); + + ps_ctl_op->u4_error_code = + ((ivd_ctl_flush_op_t*)ps_dec->pv_dec_out)->u4_error_code; //verify the value + + return IV_SUCCESS; + +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_get_status */ +/* */ +/* Description : */ +/* */ +/* Inputs :iv_obj_t decoder handle */ +/* :pv_api_ip pointer to input structure */ +/* :pv_api_op pointer to output structure */ +/* Globals : <Does it use any global variables?> */ +/* Outputs : */ +/* Returns : void */ +/* */ +/* Issues : none */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 22 10 2008 100356 Draft */ +/* */ +/*****************************************************************************/ + +WORD32 ih264d_get_status(iv_obj_t *dec_hdl, void *pv_api_ip, void *pv_api_op) +{ + + UWORD32 i; + dec_struct_t * ps_dec; + UWORD32 pic_wd, pic_ht; + ivd_ctl_getstatus_op_t *ps_ctl_op = (ivd_ctl_getstatus_op_t*)pv_api_op; + UNUSED(pv_api_ip); + ps_ctl_op->u4_error_code = 0; + + ps_dec = (dec_struct_t *)(dec_hdl->pv_codec_handle); + + pic_wd = ps_dec->u4_width_at_init; + pic_ht = ps_dec->u4_height_at_init; + + if((NULL != ps_dec->ps_sps) && (1 == (ps_dec->ps_sps->u1_is_valid))) + { + ps_ctl_op->u4_pic_ht = ps_dec->u2_disp_height; + ps_ctl_op->u4_pic_wd = ps_dec->u2_disp_width; + + if(0 == ps_dec->u4_share_disp_buf) + { + pic_wd = ps_dec->u2_disp_width; + pic_ht = ps_dec->u2_disp_height; + + } + else + { + pic_wd = ps_dec->u2_frm_wd_y; + pic_ht = ps_dec->u2_frm_ht_y; + } + } + else + { + ps_ctl_op->u4_pic_ht = pic_wd; + ps_ctl_op->u4_pic_wd = pic_ht; + + if(1 == ps_dec->u4_share_disp_buf) + { + pic_wd += (PAD_LEN_Y_H << 1); + pic_ht += (PAD_LEN_Y_V << 2); + + } + + } + + if(ps_dec->u4_app_disp_width > pic_wd) + pic_wd = ps_dec->u4_app_disp_width; + if(0 == ps_dec->u4_share_disp_buf) + ps_ctl_op->u4_num_disp_bufs = 1; + else + { + if((NULL != ps_dec->ps_sps) && (1 == (ps_dec->ps_sps->u1_is_valid))) + { + UWORD32 level, width_mbs, height_mbs; + + level = ps_dec->u4_level_at_init; + width_mbs = ps_dec->u2_frm_wd_in_mbs; + height_mbs = ps_dec->u2_frm_ht_in_mbs; + + if((ps_dec->ps_sps->u1_vui_parameters_present_flag == 1) + && (ps_dec->ps_sps->s_vui.u4_num_reorder_frames + != 64)) + { + ps_ctl_op->u4_num_disp_bufs = + ps_dec->ps_sps->s_vui.u4_num_reorder_frames + 2; + } + else + { + /*if VUI is not present assume maximum possible refrence frames for the level, + * as max reorder frames*/ + ps_ctl_op->u4_num_disp_bufs = ih264d_get_dpb_size_new( + level, width_mbs, height_mbs); + } + + ps_ctl_op->u4_num_disp_bufs += + ps_dec->ps_sps->u1_num_ref_frames + 1; + } + else + { + ps_ctl_op->u4_num_disp_bufs = ih264d_get_dpb_size_new( + ps_dec->u4_level_at_init, + (ps_dec->u4_width_at_init >> 4), + (ps_dec->u4_height_at_init >> 4)); + + ps_ctl_op->u4_num_disp_bufs += + ps_ctl_op->u4_num_disp_bufs; + + ps_ctl_op->u4_num_disp_bufs = + MIN(ps_ctl_op->u4_num_disp_bufs, + (ps_dec->u4_num_ref_frames_at_init + + ps_dec->u4_num_reorder_frames_at_init)); + + } + + ps_ctl_op->u4_num_disp_bufs = MAX( + ps_ctl_op->u4_num_disp_bufs, 6); + ps_ctl_op->u4_num_disp_bufs = MIN( + ps_ctl_op->u4_num_disp_bufs, 32); + } + + ps_ctl_op->u4_error_code = ps_dec->i4_error_code; + + ps_ctl_op->u4_frame_rate = 0; //make it proper + ps_ctl_op->u4_bit_rate = 0; //make it proper + ps_ctl_op->e_content_type = ps_dec->i4_content_type; + ps_ctl_op->e_output_chroma_format = ps_dec->u1_chroma_format; + ps_ctl_op->u4_min_num_in_bufs = MIN_IN_BUFS; + + if(ps_dec->u1_chroma_format == IV_YUV_420P) + { + ps_ctl_op->u4_min_num_out_bufs = MIN_OUT_BUFS_420; + } + else if(ps_dec->u1_chroma_format == IV_YUV_422ILE) + { + ps_ctl_op->u4_min_num_out_bufs = MIN_OUT_BUFS_422ILE; + } + else if(ps_dec->u1_chroma_format == IV_RGB_565) + { + ps_ctl_op->u4_min_num_out_bufs = MIN_OUT_BUFS_RGB565; + } + else if((ps_dec->u1_chroma_format == IV_YUV_420SP_UV) + || (ps_dec->u1_chroma_format == IV_YUV_420SP_VU)) + { + ps_ctl_op->u4_min_num_out_bufs = MIN_OUT_BUFS_420SP; + } + + else + { + //Invalid chroma format; Error code may be updated, verify in testing if needed + ps_ctl_op->u4_error_code = ERROR_FEATURE_UNAVAIL; + return IV_FAIL; + } + + for(i = 0; i < ps_ctl_op->u4_min_num_in_bufs; i++) + { + ps_ctl_op->u4_min_in_buf_size[i] = MIN_IN_BUF_SIZE; + } + + /*!*/ + if(ps_dec->u1_chroma_format == IV_YUV_420P) + { + ps_ctl_op->u4_min_out_buf_size[0] = (pic_wd * pic_ht); + ps_ctl_op->u4_min_out_buf_size[1] = (pic_wd * pic_ht) + >> 2; + ps_ctl_op->u4_min_out_buf_size[2] = (pic_wd * pic_ht) + >> 2; + } + else if(ps_dec->u1_chroma_format == IV_YUV_422ILE) + { + ps_ctl_op->u4_min_out_buf_size[0] = (pic_wd * pic_ht) + * 2; + ps_ctl_op->u4_min_out_buf_size[1] = + ps_ctl_op->u4_min_out_buf_size[2] = 0; + } + else if(ps_dec->u1_chroma_format == IV_RGB_565) + { + ps_ctl_op->u4_min_out_buf_size[0] = (pic_wd * pic_ht) + * 2; + ps_ctl_op->u4_min_out_buf_size[1] = + ps_ctl_op->u4_min_out_buf_size[2] = 0; + } + else if((ps_dec->u1_chroma_format == IV_YUV_420SP_UV) + || (ps_dec->u1_chroma_format == IV_YUV_420SP_VU)) + { + ps_ctl_op->u4_min_out_buf_size[0] = (pic_wd * pic_ht); + ps_ctl_op->u4_min_out_buf_size[1] = (pic_wd * pic_ht) + >> 1; + ps_ctl_op->u4_min_out_buf_size[2] = 0; + } + + ps_dec->u4_num_disp_bufs_requested = ps_ctl_op->u4_num_disp_bufs; + return IV_SUCCESS; +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_get_buf_info */ +/* */ +/* Description : */ +/* */ +/* Inputs :iv_obj_t decoder handle */ +/* :pv_api_ip pointer to input structure */ +/* :pv_api_op pointer to output structure */ +/* Globals : <Does it use any global variables?> */ +/* Outputs : */ +/* Returns : void */ +/* */ +/* Issues : none */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 22 10 2008 100356 Draft */ +/* */ +/*****************************************************************************/ +WORD32 ih264d_get_buf_info(iv_obj_t *dec_hdl, void *pv_api_ip, void *pv_api_op) +{ + + dec_struct_t * ps_dec; + UWORD8 i = 0; // Default for 420P format + UWORD16 pic_wd, pic_ht; + ivd_ctl_getbufinfo_op_t *ps_ctl_op = + (ivd_ctl_getbufinfo_op_t*)pv_api_op; + UNUSED(pv_api_ip); + ps_ctl_op->u4_error_code = 0; + + ps_dec = (dec_struct_t *)(dec_hdl->pv_codec_handle); + + ps_ctl_op->u4_min_num_in_bufs = MIN_IN_BUFS; + if(ps_dec->u1_chroma_format == IV_YUV_420P) + ps_ctl_op->u4_min_num_out_bufs = MIN_OUT_BUFS_420; + else if(ps_dec->u1_chroma_format == IV_YUV_422ILE) + ps_ctl_op->u4_min_num_out_bufs = MIN_OUT_BUFS_422ILE; + else if(ps_dec->u1_chroma_format == IV_RGB_565) + ps_ctl_op->u4_min_num_out_bufs = MIN_OUT_BUFS_RGB565; + else if((ps_dec->u1_chroma_format == IV_YUV_420SP_UV) + || (ps_dec->u1_chroma_format == IV_YUV_420SP_VU)) + ps_ctl_op->u4_min_num_out_bufs = MIN_OUT_BUFS_420SP; + + else + { + //Invalid chroma format; Error code may be updated, verify in testing if needed + return IV_FAIL; + } + + ps_ctl_op->u4_num_disp_bufs = 1; + + for(i = 0; i < ps_ctl_op->u4_min_num_in_bufs; i++) + { + ps_ctl_op->u4_min_in_buf_size[i] = MIN_IN_BUF_SIZE; + } + + pic_wd = ps_dec->u4_width_at_init; + pic_ht = ps_dec->u4_height_at_init; + + if((NULL != ps_dec->ps_sps) && (1 == (ps_dec->ps_sps->u1_is_valid))) + { + + if(0 == ps_dec->u4_share_disp_buf) + { + pic_wd = ps_dec->u2_disp_width; + pic_ht = ps_dec->u2_disp_height; + + } + else + { + pic_wd = ps_dec->u2_frm_wd_y; + pic_ht = ps_dec->u2_frm_ht_y; + } + } + else + { + if(1 == ps_dec->u4_share_disp_buf) + { + pic_wd += (PAD_LEN_Y_H << 1); + pic_ht += (PAD_LEN_Y_V << 2); + + } + } + + if((WORD32)ps_dec->u4_app_disp_width > pic_wd) + pic_wd = ps_dec->u4_app_disp_width; + + if(0 == ps_dec->u4_share_disp_buf) + ps_ctl_op->u4_num_disp_bufs = 1; + else + { + if((NULL != ps_dec->ps_sps) && (1 == (ps_dec->ps_sps->u1_is_valid))) + { + UWORD32 level, width_mbs, height_mbs; + + level = ps_dec->u4_level_at_init; + width_mbs = ps_dec->u2_frm_wd_in_mbs; + height_mbs = ps_dec->u2_frm_ht_in_mbs; + + if((ps_dec->ps_sps->u1_vui_parameters_present_flag == 1) + && (ps_dec->ps_sps->s_vui.u4_num_reorder_frames + != 64)) + { + ps_ctl_op->u4_num_disp_bufs = + ps_dec->ps_sps->s_vui.u4_num_reorder_frames + 2; + } + else + { + /*if VUI is not present assume maximum possible refrence frames for the level, + * as max reorder frames*/ + ps_ctl_op->u4_num_disp_bufs = ih264d_get_dpb_size_new( + level, width_mbs, height_mbs); + } + + ps_ctl_op->u4_num_disp_bufs += + ps_dec->ps_sps->u1_num_ref_frames + 1; + + } + else + { + ps_ctl_op->u4_num_disp_bufs = ih264d_get_dpb_size_new( + ps_dec->u4_level_at_init, + (ps_dec->u4_width_at_init >> 4), + (ps_dec->u4_height_at_init >> 4)); + + ps_ctl_op->u4_num_disp_bufs += + ps_ctl_op->u4_num_disp_bufs; + + ps_ctl_op->u4_num_disp_bufs = + MIN(ps_ctl_op->u4_num_disp_bufs, + (ps_dec->u4_num_ref_frames_at_init + + ps_dec->u4_num_reorder_frames_at_init)); + + } + + ps_ctl_op->u4_num_disp_bufs = MAX( + ps_ctl_op->u4_num_disp_bufs, 6); + ps_ctl_op->u4_num_disp_bufs = MIN( + ps_ctl_op->u4_num_disp_bufs, 32); + } + + /*!*/ + if(ps_dec->u1_chroma_format == IV_YUV_420P) + { + ps_ctl_op->u4_min_out_buf_size[0] = (pic_wd * pic_ht); + ps_ctl_op->u4_min_out_buf_size[1] = (pic_wd * pic_ht) + >> 2; + ps_ctl_op->u4_min_out_buf_size[2] = (pic_wd * pic_ht) + >> 2; + } + else if(ps_dec->u1_chroma_format == IV_YUV_422ILE) + { + ps_ctl_op->u4_min_out_buf_size[0] = (pic_wd * pic_ht) + * 2; + ps_ctl_op->u4_min_out_buf_size[1] = + ps_ctl_op->u4_min_out_buf_size[2] = 0; + } + else if(ps_dec->u1_chroma_format == IV_RGB_565) + { + ps_ctl_op->u4_min_out_buf_size[0] = (pic_wd * pic_ht) + * 2; + ps_ctl_op->u4_min_out_buf_size[1] = + ps_ctl_op->u4_min_out_buf_size[2] = 0; + } + else if((ps_dec->u1_chroma_format == IV_YUV_420SP_UV) + || (ps_dec->u1_chroma_format == IV_YUV_420SP_VU)) + { + ps_ctl_op->u4_min_out_buf_size[0] = (pic_wd * pic_ht); + ps_ctl_op->u4_min_out_buf_size[1] = (pic_wd * pic_ht) + >> 1; + ps_ctl_op->u4_min_out_buf_size[2] = 0; + } + ps_dec->u4_num_disp_bufs_requested = ps_ctl_op->u4_num_disp_bufs; + + return IV_SUCCESS; +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_set_params */ +/* */ +/* Description : */ +/* */ +/* Inputs :iv_obj_t decoder handle */ +/* :pv_api_ip pointer to input structure */ +/* :pv_api_op pointer to output structure */ +/* Outputs : */ +/* Returns : void */ +/* */ +/* Issues : none */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 22 10 2008 100356 Draft */ +/* */ +/*****************************************************************************/ +WORD32 ih264d_set_params(iv_obj_t *dec_hdl, void *pv_api_ip, void *pv_api_op) +{ + + dec_struct_t * ps_dec; + WORD32 ret = IV_SUCCESS; + + ivd_ctl_set_config_ip_t *ps_ctl_ip = + (ivd_ctl_set_config_ip_t *)pv_api_ip; + ivd_ctl_set_config_op_t *ps_ctl_op = + (ivd_ctl_set_config_op_t *)pv_api_op; + + ps_dec = (dec_struct_t *)(dec_hdl->pv_codec_handle); + + ps_dec->u4_skip_frm_mask = 0; + + ps_ctl_op->u4_error_code = 0; + + ps_dec->i4_app_skip_mode = ps_ctl_ip->e_frm_skip_mode; + + /*Is it really supported test it when you so the corner testing using test app*/ + + if(ps_ctl_ip->e_frm_skip_mode != IVD_SKIP_NONE) + { + + if(ps_ctl_ip->e_frm_skip_mode == IVD_SKIP_P) + ps_dec->u4_skip_frm_mask |= 1 << P_SLC_BIT; + else if(ps_ctl_ip->e_frm_skip_mode == IVD_SKIP_B) + ps_dec->u4_skip_frm_mask |= 1 << B_SLC_BIT; + else if(ps_ctl_ip->e_frm_skip_mode == IVD_SKIP_PB) + { + ps_dec->u4_skip_frm_mask |= 1 << B_SLC_BIT; + ps_dec->u4_skip_frm_mask |= 1 << P_SLC_BIT; + } + else if(ps_ctl_ip->e_frm_skip_mode == IVD_SKIP_I) + ps_dec->u4_skip_frm_mask |= 1 << I_SLC_BIT; + else + { + //dynamic parameter not supported + //Put an appropriate error code to return the error.. + //when you do the error code tests and after that remove this comment + ps_ctl_op->u4_error_code = (1 << IVD_UNSUPPORTEDPARAM); + ret = IV_FAIL; + } + } + + if((0 != ps_dec->u4_app_disp_width) + && (ps_ctl_ip->u4_disp_wd + != ps_dec->u4_app_disp_width)) + { + ps_ctl_op->u4_error_code |= (1 << IVD_UNSUPPORTEDPARAM); + ps_ctl_op->u4_error_code |= ERROR_DISP_WIDTH_INVALID; + ret = IV_FAIL; + } + else + { + if((ps_ctl_ip->u4_disp_wd >= ps_dec->u2_pic_wd)/* && (ps_ctl_ip->u4_disp_wd <= ps_dec->u4_width_at_init) */) + { + ps_dec->u4_app_disp_width = ps_ctl_ip->u4_disp_wd; + } + else if((0 == ps_dec->i4_header_decoded) /*&& (ps_ctl_ip->u4_disp_wd <= ps_dec->u4_width_at_init)*/) + { + ps_dec->u4_app_disp_width = ps_ctl_ip->u4_disp_wd; + } + else if(ps_ctl_ip->u4_disp_wd == 0) + { + ps_dec->u4_app_disp_width = 0; + } + else + { + /* + * Set the display width to zero. This will ensure that the wrong value we had stored (0xFFFFFFFF) + * does not propogate. + */ + ps_dec->u4_app_disp_width = 0; + ps_ctl_op->u4_error_code |= (1 << IVD_UNSUPPORTEDPARAM); + ps_ctl_op->u4_error_code |= ERROR_DISP_WIDTH_INVALID; + ret = IV_FAIL; + } + } + if(ps_ctl_ip->e_vid_dec_mode == IVD_DECODE_FRAME) + ps_dec->i4_decode_header = 0; + else if(ps_ctl_ip->e_vid_dec_mode == IVD_DECODE_HEADER) + ps_dec->i4_decode_header = 1; + else + { + ps_ctl_op->u4_error_code = (1 << IVD_UNSUPPORTEDPARAM); + ps_dec->i4_decode_header = 1; + ret = IV_FAIL; + } + + return ret; + +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_set_default_params */ +/* */ +/* Description : */ +/* */ +/* Inputs :iv_obj_t decoder handle */ +/* :pv_api_ip pointer to input structure */ +/* :pv_api_op pointer to output structure */ +/* Outputs : */ +/* Returns : void */ +/* */ +/* Issues : none */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 08 08 2011 100421 Copied from set_params */ +/* */ +/*****************************************************************************/ +WORD32 ih264d_set_default_params(iv_obj_t *dec_hdl, + void *pv_api_ip, + void *pv_api_op) +{ + + dec_struct_t * ps_dec; + WORD32 ret = IV_SUCCESS; + + ivd_ctl_set_config_op_t *ps_ctl_op = + (ivd_ctl_set_config_op_t *)pv_api_op; + ps_dec = (dec_struct_t *)(dec_hdl->pv_codec_handle); + UNUSED(pv_api_ip); + + + { + ps_dec->u4_app_disp_width = 0; + ps_dec->u4_skip_frm_mask = 0; + ps_dec->i4_decode_header = 1; + + ps_ctl_op->u4_error_code = 0; + } + + + return ret; +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_reset */ +/* */ +/* Description : */ +/* */ +/* Inputs :iv_obj_t decoder handle */ +/* :pv_api_ip pointer to input structure */ +/* :pv_api_op pointer to output structure */ +/* Globals : <Does it use any global variables?> */ +/* Outputs : */ +/* Returns : void */ +/* */ +/* Issues : none */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 22 10 2008 100356 Draft */ +/* */ +/*****************************************************************************/ +WORD32 ih264d_reset(iv_obj_t *dec_hdl, void *pv_api_ip, void *pv_api_op) +{ + dec_struct_t * ps_dec; + ivd_ctl_reset_op_t *ps_ctl_op = (ivd_ctl_reset_op_t *)pv_api_op; + UNUSED(pv_api_ip); + ps_ctl_op->u4_error_code = 0; + + ps_dec = (dec_struct_t *)(dec_hdl->pv_codec_handle); +//CHECK + if(ps_dec != NULL) + { + + ih264d_init_decoder(ps_dec); + + /* + memset(ps_dec->disp_bufs, 0, (MAX_DISP_BUFS_NEW) * sizeof(disp_buf_t)); + memset(ps_dec->u4_disp_buf_mapping, 0, (MAX_DISP_BUFS_NEW) * sizeof(UWORD32)); + memset(ps_dec->u4_disp_buf_to_be_freed, 0, (MAX_DISP_BUFS_NEW) * sizeof(UWORD32)); + */ + } + else + { + H264_DEC_DEBUG_PRINT( + "\nReset called without Initializing the decoder\n"); + ps_ctl_op->u4_error_code = ERROR_INIT_NOT_DONE; + } + + return IV_SUCCESS; +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_ctl */ +/* */ +/* Description : */ +/* */ +/* Inputs :iv_obj_t decoder handle */ +/* :pv_api_ip pointer to input structure */ +/* :pv_api_op pointer to output structure */ +/* Outputs : */ +/* Returns : void */ +/* */ +/* Issues : none */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 22 10 2008 100356 Draft */ +/* */ +/*****************************************************************************/ +WORD32 ih264d_ctl(iv_obj_t *dec_hdl, void *pv_api_ip, void *pv_api_op) +{ + ivd_ctl_set_config_ip_t *ps_ctl_ip; + ivd_ctl_set_config_op_t *ps_ctl_op; + WORD32 ret = IV_SUCCESS; + UWORD32 subcommand; + dec_struct_t *ps_dec = dec_hdl->pv_codec_handle; + + if(ps_dec->init_done != 1) + { + //Return proper Error Code + return IV_FAIL; + } + ps_ctl_ip = (ivd_ctl_set_config_ip_t*)pv_api_ip; + ps_ctl_op = (ivd_ctl_set_config_op_t*)pv_api_op; + ps_ctl_op->u4_error_code = 0; + subcommand = ps_ctl_ip->e_sub_cmd; + + switch(subcommand) + { + case IVD_CMD_CTL_GETPARAMS: + ret = ih264d_get_status(dec_hdl, (void *)pv_api_ip, + (void *)pv_api_op); + break; + case IVD_CMD_CTL_SETPARAMS: + ret = ih264d_set_params(dec_hdl, (void *)pv_api_ip, + (void *)pv_api_op); + break; + case IVD_CMD_CTL_RESET: + ret = ih264d_reset(dec_hdl, (void *)pv_api_ip, (void *)pv_api_op); + break; + case IVD_CMD_CTL_SETDEFAULT: + ret = ih264d_set_default_params(dec_hdl, (void *)pv_api_ip, + (void *)pv_api_op); + break; + case IVD_CMD_CTL_FLUSH: + ret = ih264d_set_flush_mode(dec_hdl, (void *)pv_api_ip, + (void *)pv_api_op); + break; + case IVD_CMD_CTL_GETBUFINFO: + ret = ih264d_get_buf_info(dec_hdl, (void *)pv_api_ip, + (void *)pv_api_op); + break; + case IVD_CMD_CTL_GETVERSION: + ret = ih264d_get_version(dec_hdl, (void *)pv_api_ip, + (void *)pv_api_op); + break; + case IH264D_CMD_CTL_DEGRADE: + ret = ih264d_set_degrade(dec_hdl, (void *)pv_api_ip, + (void *)pv_api_op); + break; + + case IH264D_CMD_CTL_SET_NUM_CORES: + ret = ih264d_set_num_cores(dec_hdl, (void *)pv_api_ip, + (void *)pv_api_op); + break; + case IH264D_CMD_CTL_GET_BUFFER_DIMENSIONS: + ret = ih264d_get_frame_dimensions(dec_hdl, (void *)pv_api_ip, + (void *)pv_api_op); + break; + case IH264D_CMD_CTL_SET_PROCESSOR: + ret = ih264d_set_processor(dec_hdl, (void *)pv_api_ip, + (void *)pv_api_op); + break; + default: + H264_DEC_DEBUG_PRINT("\ndo nothing\n") + ; + break; + } + + return ret; +} +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_rel_display_frame */ +/* */ +/* Description : */ +/* */ +/* Inputs :iv_obj_t decoder handle */ +/* :pv_api_ip pointer to input structure */ +/* :pv_api_op pointer to output structure */ +/* Outputs : */ +/* Returns : void */ +/* */ +/* Issues : none */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 22 10 2008 100356 Draft */ +/* */ +/*****************************************************************************/ +WORD32 ih264d_rel_display_frame(iv_obj_t *dec_hdl, + void *pv_api_ip, + void *pv_api_op) +{ + + ivd_rel_display_frame_ip_t *ps_rel_ip; + ivd_rel_display_frame_op_t *ps_rel_op; + UWORD32 buf_released = 0; + + UWORD32 u4_ts = -1; + dec_struct_t *ps_dec = dec_hdl->pv_codec_handle; + + ps_rel_ip = (ivd_rel_display_frame_ip_t *)pv_api_ip; + ps_rel_op = (ivd_rel_display_frame_op_t *)pv_api_op; + ps_rel_op->u4_error_code = 0; + u4_ts = ps_rel_ip->u4_disp_buf_id; + + if(0 == ps_dec->u4_share_disp_buf) + { + ps_dec->u4_disp_buf_mapping[u4_ts] = 0; + ps_dec->u4_disp_buf_to_be_freed[u4_ts] = 0; + return IV_SUCCESS; + } + + if(ps_dec->pv_pic_buf_mgr != NULL) + { + if(1 == ps_dec->u4_disp_buf_mapping[u4_ts]) + { + ih264_buf_mgr_release((buf_mgr_t *)ps_dec->pv_pic_buf_mgr, + ps_rel_ip->u4_disp_buf_id, + BUF_MGR_IO); + ps_dec->u4_disp_buf_mapping[u4_ts] = 0; + buf_released = 1; + } + } + + if((1 == ps_dec->u4_share_disp_buf) && (0 == buf_released)) + ps_dec->u4_disp_buf_to_be_freed[u4_ts] = 1; + + return IV_SUCCESS; +} + +/** + ******************************************************************************* + * + * @brief + * Sets degrade params + * + * @par Description: + * Sets degrade params. + * Refer to ih264d_ctl_degrade_ip_t definition for details + * + * @param[in] ps_codec_obj + * Pointer to codec object at API level + * + * @param[in] pv_api_ip + * Pointer to input argument structure + * + * @param[out] pv_api_op + * Pointer to output argument structure + * + * @returns Status + * + * @remarks + * + * + ******************************************************************************* + */ + +WORD32 ih264d_set_degrade(iv_obj_t *ps_codec_obj, + void *pv_api_ip, + void *pv_api_op) +{ + ih264d_ctl_degrade_ip_t *ps_ip; + ih264d_ctl_degrade_op_t *ps_op; + dec_struct_t *ps_codec = (dec_struct_t *)ps_codec_obj->pv_codec_handle; + + ps_ip = (ih264d_ctl_degrade_ip_t *)pv_api_ip; + ps_op = (ih264d_ctl_degrade_op_t *)pv_api_op; + + ps_codec->i4_degrade_type = ps_ip->i4_degrade_type; + ps_codec->i4_nondegrade_interval = ps_ip->i4_nondegrade_interval; + ps_codec->i4_degrade_pics = ps_ip->i4_degrade_pics; + + ps_op->u4_error_code = 0; + ps_codec->i4_degrade_pic_cnt = 0; + + return IV_SUCCESS; +} + +WORD32 ih264d_get_frame_dimensions(iv_obj_t *dec_hdl, + void *pv_api_ip, + void *pv_api_op) +{ + ih264d_ctl_get_frame_dimensions_ip_t *ps_ip; + ih264d_ctl_get_frame_dimensions_op_t *ps_op; + dec_struct_t *ps_dec = dec_hdl->pv_codec_handle; + UWORD32 disp_wd, disp_ht, buffer_wd, buffer_ht, x_offset, y_offset; + + ps_ip = (ih264d_ctl_get_frame_dimensions_ip_t *)pv_api_ip; + + ps_op = (ih264d_ctl_get_frame_dimensions_op_t *)pv_api_op; + UNUSED(ps_ip); + if((NULL != ps_dec->ps_sps) && (1 == (ps_dec->ps_sps->u1_is_valid))) + { + disp_wd = ps_dec->u2_disp_width; + disp_ht = ps_dec->u2_disp_height; + + if(0 == ps_dec->u4_share_disp_buf) + { + buffer_wd = disp_wd; + buffer_ht = disp_ht; + } + else + { + buffer_wd = ps_dec->u2_frm_wd_y; + buffer_ht = ps_dec->u2_frm_ht_y; + } + } + else + { + + disp_wd = ps_dec->u4_width_at_init; + disp_ht = ps_dec->u4_height_at_init; + + if(0 == ps_dec->u4_share_disp_buf) + { + buffer_wd = disp_wd; + buffer_ht = disp_ht; + } + else + { + buffer_wd = ALIGN16(disp_wd) + (PAD_LEN_Y_H << 1); + buffer_ht = ALIGN16(disp_ht) + (PAD_LEN_Y_V << 2); + + } + } + if(ps_dec->u4_app_disp_width > buffer_wd) + buffer_wd = ps_dec->u4_app_disp_width; + + if(0 == ps_dec->u4_share_disp_buf) + { + x_offset = 0; + y_offset = 0; + } + else + { + y_offset = (PAD_LEN_Y_V << 1); + x_offset = PAD_LEN_Y_H; + + if((NULL != ps_dec->ps_sps) && (1 == (ps_dec->ps_sps->u1_is_valid)) + && (0 != ps_dec->u2_crop_offset_y)) + { + y_offset += ps_dec->u2_crop_offset_y / ps_dec->u2_frm_wd_y; + x_offset += ps_dec->u2_crop_offset_y % ps_dec->u2_frm_wd_y; + } + } + + ps_op->u4_disp_wd[0] = disp_wd; + ps_op->u4_disp_ht[0] = disp_ht; + ps_op->u4_buffer_wd[0] = buffer_wd; + ps_op->u4_buffer_ht[0] = buffer_ht; + ps_op->u4_x_offset[0] = x_offset; + ps_op->u4_y_offset[0] = y_offset; + + ps_op->u4_disp_wd[1] = ps_op->u4_disp_wd[2] = ((ps_op->u4_disp_wd[0] + 1) + >> 1); + ps_op->u4_disp_ht[1] = ps_op->u4_disp_ht[2] = ((ps_op->u4_disp_ht[0] + 1) + >> 1); + ps_op->u4_buffer_wd[1] = ps_op->u4_buffer_wd[2] = (ps_op->u4_buffer_wd[0] + >> 1); + ps_op->u4_buffer_ht[1] = ps_op->u4_buffer_ht[2] = (ps_op->u4_buffer_ht[0] + >> 1); + ps_op->u4_x_offset[1] = ps_op->u4_x_offset[2] = + (ps_op->u4_x_offset[0] >> 1); + ps_op->u4_y_offset[1] = ps_op->u4_y_offset[2] = + (ps_op->u4_y_offset[0] >> 1); + + if((ps_dec->u1_chroma_format == IV_YUV_420SP_UV) + || (ps_dec->u1_chroma_format == IV_YUV_420SP_VU)) + { + ps_op->u4_disp_wd[2] = 0; + ps_op->u4_disp_ht[2] = 0; + ps_op->u4_buffer_wd[2] = 0; + ps_op->u4_buffer_ht[2] = 0; + ps_op->u4_x_offset[2] = 0; + ps_op->u4_y_offset[2] = 0; + + ps_op->u4_disp_wd[1] <<= 1; + ps_op->u4_buffer_wd[1] <<= 1; + ps_op->u4_x_offset[1] <<= 1; + } + + return IV_SUCCESS; + +} + +WORD32 ih264d_set_num_cores(iv_obj_t *dec_hdl, void *pv_api_ip, void *pv_api_op) +{ + ih264d_ctl_set_num_cores_ip_t *ps_ip; + ih264d_ctl_set_num_cores_op_t *ps_op; + dec_struct_t *ps_dec = dec_hdl->pv_codec_handle; + + ps_ip = (ih264d_ctl_set_num_cores_ip_t *)pv_api_ip; + ps_op = (ih264d_ctl_set_num_cores_op_t *)pv_api_op; + ps_op->u4_error_code = 0; + ps_dec->u4_num_cores = ps_ip->u4_num_cores; + if(ps_dec->u4_num_cores == 1) + { + ps_dec->u1_separate_parse = 0; + ps_dec->pi4_ctxt_save_register_dec = ps_dec->pi4_ctxt_save_register; + } + else + { + ps_dec->u1_separate_parse = 1; + } + + /*using only upto three threads currently*/ + if(ps_dec->u4_num_cores > 3) + ps_dec->u4_num_cores = 3; + + return IV_SUCCESS; +} + +void ih264d_fill_output_struct_from_context(dec_struct_t *ps_dec, + ivd_video_decode_op_t *ps_dec_op) +{ + if((ps_dec_op->u4_error_code & 0xff) + != ERROR_DYNAMIC_RESOLUTION_NOT_SUPPORTED) + { + ps_dec_op->u4_pic_wd = (UWORD32)ps_dec->u2_disp_width; + ps_dec_op->u4_pic_ht = (UWORD32)ps_dec->u2_disp_height; + } + ps_dec_op->e_pic_type = ps_dec->i4_frametype; + + ps_dec_op->u4_new_seq = 0; + ps_dec_op->u4_output_present = ps_dec->u4_output_present; + ps_dec_op->u4_progressive_frame_flag = + ps_dec->s_disp_op.u4_progressive_frame_flag; + + ps_dec_op->u4_is_ref_flag = 1; + if(ps_dec_op->u4_frame_decoded_flag) + { + if(ps_dec->ps_cur_slice->u1_nal_ref_idc == 0) + ps_dec_op->u4_is_ref_flag = 0; + } + + ps_dec_op->e_output_format = ps_dec->s_disp_op.e_output_format; + ps_dec_op->s_disp_frm_buf = ps_dec->s_disp_op.s_disp_frm_buf; + ps_dec_op->e4_fld_type = ps_dec->s_disp_op.e4_fld_type; + ps_dec_op->u4_ts = ps_dec->s_disp_op.u4_ts; + ps_dec_op->u4_disp_buf_id = ps_dec->s_disp_op.u4_disp_buf_id; +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_api_function */ +/* */ +/* Description : */ +/* */ +/* Inputs :iv_obj_t decoder handle */ +/* :pv_api_ip pointer to input structure */ +/* :pv_api_op pointer to output structure */ +/* Outputs : */ +/* Returns : void */ +/* */ +/* Issues : none */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 22 10 2008 100356 Draft */ +/* */ +/*****************************************************************************/ +IV_API_CALL_STATUS_T ih264d_api_function(iv_obj_t *dec_hdl, + void *pv_api_ip, + void *pv_api_op) +{ + UWORD32 command; + UWORD32 *pu2_ptr_cmd; + UWORD32 u4_api_ret; + IV_API_CALL_STATUS_T e_status; + e_status = api_check_struct_sanity(dec_hdl, pv_api_ip, pv_api_op); + + if(e_status != IV_SUCCESS) + { + UWORD32 *ptr_err; + + ptr_err = (UWORD32 *)pv_api_op; + UNUSED(ptr_err); + H264_DEC_DEBUG_PRINT("error code = %d\n", *(ptr_err + 1)); + return IV_FAIL; + } + + pu2_ptr_cmd = (UWORD32 *)pv_api_ip; + pu2_ptr_cmd++; + + command = *pu2_ptr_cmd; +// H264_DEC_DEBUG_PRINT("inside lib = %d\n",command); + switch(command) + { + + case IV_CMD_GET_NUM_MEM_REC: + u4_api_ret = ih264d_get_num_rec((void *)pv_api_ip, + (void *)pv_api_op); + + break; + case IV_CMD_FILL_NUM_MEM_REC: + + u4_api_ret = ih264d_fill_num_mem_rec((void *)pv_api_ip, + (void *)pv_api_op); + break; + case IV_CMD_INIT: + u4_api_ret = ih264d_init(dec_hdl, (void *)pv_api_ip, + (void *)pv_api_op); + break; + + case IVD_CMD_VIDEO_DECODE: + u4_api_ret = ih264d_video_decode(dec_hdl, (void *)pv_api_ip, + (void *)pv_api_op); + break; + + case IVD_CMD_GET_DISPLAY_FRAME: + u4_api_ret = ih264d_get_display_frame(dec_hdl, (void *)pv_api_ip, + (void *)pv_api_op); + + break; + + case IVD_CMD_SET_DISPLAY_FRAME: + u4_api_ret = ih264d_set_display_frame(dec_hdl, (void *)pv_api_ip, + (void *)pv_api_op); + + break; + + case IVD_CMD_REL_DISPLAY_FRAME: + u4_api_ret = ih264d_rel_display_frame(dec_hdl, (void *)pv_api_ip, + (void *)pv_api_op); + break; + + case IV_CMD_RETRIEVE_MEMREC: + u4_api_ret = ih264d_clr(dec_hdl, (void *)pv_api_ip, + (void *)pv_api_op); + break; + + case IVD_CMD_VIDEO_CTL: + u4_api_ret = ih264d_ctl(dec_hdl, (void *)pv_api_ip, + (void *)pv_api_op); + break; + default: + u4_api_ret = IV_FAIL; + break; + } + + return u4_api_ret; +} diff --git a/decoder/ih264d_bitstrm.c b/decoder/ih264d_bitstrm.c new file mode 100755 index 0000000..fd41bc6 --- /dev/null +++ b/decoder/ih264d_bitstrm.c @@ -0,0 +1,181 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/*! + ************************************************************************** + * \file ih264d_bitstrm.c + * + * \brief + * Bitstream parsing routines + * + * \date + * 20/11/2002 + * + * \author AI + ************************************************************************** + */ + +#include <stdlib.h> +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264d_bitstrm.h" +#include "ih264d_error_handler.h" + +#include "ih264d_debug.h" +#include "ih264d_tables.h" +#include "ih264d_structs.h" + +/*! + ************************************************************************** + * \if Function name : ih264d_get_bit_h264 \endif + * + * \brief + * Read one bit from the bitstream. + * + * This is a Bitstream processing function. It reads the + * bit currently pointed by the bit pointer in the + * buffer and advances the pointer by one. It returns + * the bit (0 or 1) in the form of an unsigned integer. + * + * \return + * Returns the next bit (0 or 1) in the bitstream. + * + ************************************************************************** + */ +UWORD8 ih264d_get_bit_h264(dec_bit_stream_t *ps_stream) +{ + UWORD32 u4_code; + + GETBIT(u4_code, ps_stream->u4_ofst, ps_stream->pu4_buffer); + return (u4_code); +} + +/*! + ************************************************************************** + * \if Function name : ih264d_get_bits_h264 \endif + * + * \brief + * Read specified number of bits from the bitstream. + * + * This is a Bitstream processing function. It reads the + * number specified number of bits from the current bit + * position and advances the bit and byte pointers + * appropriately. + * + * \return + * An unsigned 32 bit integer with its least significant bits + * containing the bits in order of their occurence in the bitstream. + * + ************************************************************************** + */ + +UWORD32 ih264d_get_bits_h264(dec_bit_stream_t *ps_bitstrm, UWORD32 u4_num_bits) +{ + UWORD32 u4_code = 0; + if(u4_num_bits) + GETBITS(u4_code, ps_bitstrm->u4_ofst, ps_bitstrm->pu4_buffer, u4_num_bits); + return (u4_code); +} + +/*! + ************************************************************************** + * \if Function name : ih264d_next_bits_h264 \endif + * + * \brief + * Peek specified number of bits from the bitstream. + * + * This is a Bitstream processing function. It gets the + * specified number of bits from the buffer without + * altering the current pointers. It is equivalent to + * next_bits() function in the standard. + * + * \return + * An unsigned 32 bit integer with its least significant bits + * containing the bits in order of their occurence in the bitstream. + ************************************************************************** + */ +UWORD32 ih264d_next_bits_h264(dec_bit_stream_t *ps_bitstrm, UWORD32 u4_num_bits) +{ + UWORD32 u4_word_off = (ps_bitstrm->u4_ofst >> 5); + UWORD32 u4_bit_off = ps_bitstrm->u4_ofst & 0x1F; + UWORD32 *pu4_bitstream = ps_bitstrm->pu4_buffer; + UWORD32 u4_bits = pu4_bitstream[u4_word_off++] << u4_bit_off; + + /*************************************************************************/ + /* Test if number of bits to be read exceeds the number of bits in the */ + /* current word. If yes, read from the next word of the buffer, The bits */ + /* from both the words are concatenated to get next 32 bits in 'u4_bits' */ + /*************************************************************************/ + if(u4_bit_off > (INT_IN_BITS - u4_num_bits)) + u4_bits |= (pu4_bitstream[u4_word_off] >> (INT_IN_BITS - u4_bit_off)); + + return ((u4_bits >> (INT_IN_BITS - u4_num_bits))); +} + +/*! + ************************************************************************** + * \if Function name : ih264d_flush_bits_h264 \endif + * + * \brief + * Flush specified number of bits from the bitstream. + * + * This function flushes the specified number of bits (marks + * as read) from the buffer. + * + * \return + * A 8 bit unsigned integer with value + * '1' on successful flush + * '0' on failure. + * + ************************************************************************** + */ +WORD32 ih264d_flush_bits_h264(dec_bit_stream_t *ps_bitstrm, WORD32 u4_num_bits) +{ + ps_bitstrm->u4_ofst += u4_num_bits; + + if(ps_bitstrm->u4_ofst > ps_bitstrm->u4_max_ofst) + { + return ERROR_EOB_FLUSHBITS_T; + } + return OK; +} + +/*! + ************************************************************************** + * \if Function name : ih264d_check_byte_aligned \endif + * + * \brief + * Checks whether the bit ps_bitstrm u4_ofst is at byte boundary. + * + * \param ps_bitstrm : Pointer to bitstream + * + * \return + * Returns 1 if bit ps_bitstrm u4_ofst is at byte alligned position else zero. + ************************************************************************** + */ + +UWORD8 ih264d_check_byte_aligned(dec_bit_stream_t * ps_bitstrm) +{ + if(ps_bitstrm->u4_ofst & 0x07) + return (0); + else + return (1); +} diff --git a/decoder/ih264d_bitstrm.h b/decoder/ih264d_bitstrm.h new file mode 100755 index 0000000..49cd5e7 --- /dev/null +++ b/decoder/ih264d_bitstrm.h @@ -0,0 +1,195 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +#ifndef _IH264D_BITSTRM_H_ +#define _IH264D_BITSTRM_H_ +/*! + ************************************************************************* + * \file ih264d_bitstrm.h + * + * \brief + * Contains all the declarations of bitstream reading routines + * + * \date + * 20/11/2002 + * + * \author AI + ************************************************************************* + */ + +/* Includes */ +#include <stdio.h> +#include <stdlib.h> +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" + +#define INT_IN_BYTES 4 +#define INT_IN_BITS 32 + +/* Based on level 1.2 of baseline profile */ +/* 396[MAX_FS] * 128 * 1.5 [ChromaFormatParameter] / sizeof(UWORD32) + i.e 396 * 128 * 1.5 / 4 = 19008 */ +/* Based on level 3 of main profile */ +/* 1620[MAX_FS] * 128 * 1.5 [ChromaFormatParameter] / sizeof(UWORD32) + i.e 1620 * 128 * 1.5 / 4= 77760 */ +#define SIZE_OF_BUFFER 77760 + +/* Structure for the ps_bitstrm */ +typedef struct +{ + UWORD32 u4_ofst; /* Offset in the buffer for the current bit */ + UWORD32 *pu4_buffer; /* Bitstream Buffer */ + UWORD32 u4_max_ofst; /* Position of the last bit read in the current buffer */ + void * pv_codec_handle; /* For Error Handling */ +} dec_bit_stream_t; + +/* To read the next bit */ +UWORD8 ih264d_get_bit_h264(dec_bit_stream_t *); + +/* To read the next specified number of bits */ +UWORD32 ih264d_get_bits_h264(dec_bit_stream_t *, UWORD32); + +/* To see the next specified number of bits */ +UWORD32 ih264d_next_bits_h264(dec_bit_stream_t *, UWORD32); + +/* To flush a specified number of bits*/ +WORD32 ih264d_flush_bits_h264(dec_bit_stream_t *, WORD32); + +/*! + ************************************************************************** + * \if Function name : MoreRbspData \endif + * + * \brief + * Determines whether there is more data in RBSP or not. + * + * \param ps_bitstrm : Pointer to bitstream + * + * \return + * Returns 1 if there is more data in RBSP before rbsp_trailing_bits(). + * Otherwise it returns FALSE. + ************************************************************************** + */ + +#define MORE_RBSP_DATA(ps_bitstrm) \ + (ps_bitstrm->u4_ofst < ps_bitstrm->u4_max_ofst) +#define EXCEED_OFFSET(ps_bitstrm) \ + (ps_bitstrm->u4_ofst > ps_bitstrm->u4_max_ofst) + +void GoToByteBoundary(dec_bit_stream_t * ps_bitstrm); +UWORD8 ih264d_check_byte_aligned(dec_bit_stream_t * ps_bitstrm); + +/*****************************************************************************/ +/* Define a macro for inlining of GETBIT: */ +/*****************************************************************************/ +#define GETBIT(u4_code, u4_offset, pu4_bitstream) \ +{ \ + UWORD32 *pu4_buf = (pu4_bitstream); \ + UWORD32 u4_word_off = ((u4_offset) >> 5); \ + UWORD32 u4_bit_off = (u4_offset) & 0x1F; \ + u4_code = pu4_buf[u4_word_off] << u4_bit_off; \ + (u4_offset)++; \ + u4_code = (u4_code >> 31); \ +} + + + +/*****************************************************************************/ +/* Define a macro for inlining of GETBITS: u4_no_bits shall not exceed 32 */ +/*****************************************************************************/ +#define GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits) \ +{ \ + UWORD32 *pu4_buf = (pu4_bitstream); \ + UWORD32 u4_word_off = ((u4_offset) >> 5); \ + UWORD32 u4_bit_off = (u4_offset) & 0x1F; \ + u4_code = pu4_buf[u4_word_off++] << u4_bit_off; \ + \ + if(u4_bit_off) \ + u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off)); \ + u4_code = u4_code >> (INT_IN_BITS - u4_no_bits); \ + (u4_offset) += u4_no_bits; \ +} \ + \ + +/*****************************************************************************/ +/* Define a macro for inlining of NEXTBITS */ +/*****************************************************************************/ +#define NEXTBITS(u4_word, u4_offset, pu4_bitstream, u4_no_bits) \ +{ \ + UWORD32 *pu4_buf = (pu4_bitstream); \ + UWORD32 u4_word_off = ((u4_offset) >> 5); \ + UWORD32 u4_bit_off = (u4_offset) & 0x1F; \ + u4_word = pu4_buf[u4_word_off++] << u4_bit_off; \ + if(u4_bit_off) \ + u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off)); \ + u4_word = u4_word >> (INT_IN_BITS - u4_no_bits); \ +} +/*****************************************************************************/ +/* Define a macro for inlining of NEXTBITS_32 */ +/*****************************************************************************/ +#define NEXTBITS_32(u4_word, u4_offset, pu4_bitstream) \ +{ \ + UWORD32 *pu4_buf = (pu4_bitstream); \ + UWORD32 u4_word_off = ((u4_offset) >> 5); \ + UWORD32 u4_bit_off = (u4_offset) & 0x1F; \ + \ + u4_word = pu4_buf[u4_word_off++] << u4_bit_off; \ + if(u4_bit_off) \ + u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off)); \ +} + + +/*****************************************************************************/ +/* Define a macro for inlining of FIND_ONE_IN_STREAM_32 */ +/*****************************************************************************/ +#define FIND_ONE_IN_STREAM_32(u4_ldz, u4_offset, pu4_bitstream) \ +{ \ + UWORD32 u4_word; \ + NEXTBITS_32(u4_word, u4_offset, pu4_bitstream); \ + u4_ldz = CLZ(u4_word); \ + (u4_offset) += (u4_ldz + 1); \ +} + +/*****************************************************************************/ +/* Define a macro for inlining of FIND_ONE_IN_STREAM_LEN */ +/*****************************************************************************/ +#define FIND_ONE_IN_STREAM_LEN(u4_ldz, u4_offset, pu4_bitstream, u4_len) \ +{ \ + UWORD32 u4_word; \ + NEXTBITS_32(u4_word, u4_offset, pu4_bitstream); \ + u4_ldz = CLZ(u4_word); \ + if(u4_ldz < u4_len) \ + (u4_offset) += (u4_ldz + 1); \ + else \ + { \ + u4_ldz = u4_len; \ + (u4_offset) += u4_ldz; \ + } \ +} + +/*****************************************************************************/ +/* Define a macro for inlining of FLUSHBITS */ +/*****************************************************************************/ +#define FLUSHBITS(u4_offset, u4_no_bits) \ +{ \ + (u4_offset) += (u4_no_bits); \ +} + +#endif /* _BITSTREAM_H_ */ diff --git a/decoder/ih264d_cabac.c b/decoder/ih264d_cabac.c new file mode 100755 index 0000000..38028ae --- /dev/null +++ b/decoder/ih264d_cabac.c @@ -0,0 +1,779 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*! + *************************************************************************** + * \file ih264d_cabac.c + * + * \brief + * This file contains Binary decoding routines. + * + * \date + * 04/02/2003 + * + * \author NS + *************************************************************************** + */ +#include <string.h> +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264d_structs.h" +#include "ih264d_cabac.h" +#include "ih264d_bitstrm.h" +#include "ih264d_error_handler.h" +#include "ih264d_defs.h" +#include "ih264d_debug.h" +#include "ih264d_tables.h" +#include "ih264d_parse_cabac.h" +#include "ih264d_tables.h" + + + +/*! + ************************************************************************** + * \if Function name : ih264d_init_cabac_dec_envirnoment \endif + * + * \brief + * This function initializes CABAC decoding envirnoment. This function + * implements 9.3.3.2.3.1 of ISO/IEC14496-10. + * + * \return + * None + * + ************************************************************************** + */ +WORD32 ih264d_init_cabac_dec_envirnoment(decoding_envirnoment_t * ps_cab_env, + dec_bit_stream_t *ps_bitstrm) +{ + UWORD32 u4_code_int_val_ofst; + + ps_cab_env->u4_code_int_range = (HALF - 2) << 23; + NEXTBITS(u4_code_int_val_ofst, ps_bitstrm->u4_ofst, ps_bitstrm->pu4_buffer, + 32); + FLUSHBITS(ps_bitstrm->u4_ofst, 9) + + if(ps_bitstrm->u4_ofst > ps_bitstrm->u4_max_ofst) + return ERROR_EOB_FLUSHBITS_T; + + ps_cab_env->u4_code_int_val_ofst = u4_code_int_val_ofst; + + /*brief description of the design adopted for CABAC*/ + /*according to the standard the u4_code_int_range needs to be initialized 0x 1FE(10 bits) and + 9 bits from the bit stream need to be read and into the u4_code_int_val_ofst.As and when the + u4_code_int_range becomes less than 10 bits we need to renormalize and read from the bitstream* + + In the implemented design + initially + + range_new = range <<23 + valOffset_new = valOffset << 23 + 23 bits(read from the bit stream) + + Thus we have read 23 more bits ahead of time. + + It can be mathematical proved that even with the modified range and u4_ofst the operations + like comparison and subtraction needed for a bin decode are still valid(both in the regular case and the bypass case) + + As bins are decoded..we consume the bits that we have already read into the valOffset.The clz of Range + gives us the number of bits we consumed of the 23 bits that we have read ahead of time. + + when the number bits we have consumed exceeds 23 ,we renormalize..and we read from the bitstream again*/ + +RESET_BIN_COUNTS(ps_cab_env) + + return OK; +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_init_cabac_contexts */ +/* */ +/* Description : This function initializes the cabac contexts */ +/* depending upon slice type and Init_Idc value. */ +/* Inputs : ps_dec, slice type */ +/* Globals : <Does it use any global variables?> */ +/* Outputs : */ +/* Returns : void */ +/* */ +/* Issues : none */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 03 05 2005 100153) Draft */ +/* */ +/*****************************************************************************/ + +void ih264d_init_cabac_contexts(UWORD8 u1_slice_type, dec_struct_t * ps_dec) +{ + + bin_ctxt_model_t *p_cabac_ctxt_table_t = ps_dec->p_cabac_ctxt_table_t; + UWORD8 u1_qp_y = ps_dec->ps_cur_slice->u1_slice_qp; + UWORD8 u1_cabac_init_Idc = 0; + + if(I_SLICE != u1_slice_type) + { + u1_cabac_init_Idc = ps_dec->ps_cur_slice->u1_cabac_init_idc; + } + + { + /* MAKING ps_dec->p_ctxt_inc_mb_map a scratch buffer */ + /* 0th entry of CtxtIncMbMap will be always be containing default values + for CABAC context representing MB not available */ + ctxt_inc_mb_info_t *p_DefCtxt = ps_dec->p_ctxt_inc_mb_map - 1; + UWORD8 *pu1_temp; + WORD8 i; + p_DefCtxt->u1_mb_type = CAB_SKIP; + + p_DefCtxt->u1_cbp = 0x0f; + p_DefCtxt->u1_intra_chroma_pred_mode = 0; + + p_DefCtxt->u1_yuv_dc_csbp = 0x7; + + p_DefCtxt->u1_transform8x8_ctxt = 0; + + pu1_temp = (UWORD8*)p_DefCtxt->i1_ref_idx; + for(i = 0; i < 4; i++, pu1_temp++) + (*pu1_temp) = 0; + pu1_temp = (UWORD8*)p_DefCtxt->u1_mv; + for(i = 0; i < 16; i++, pu1_temp++) + (*pu1_temp) = 0; + ps_dec->ps_def_ctxt_mb_info = p_DefCtxt; + } + + if(u1_slice_type == I_SLICE) + { + u1_cabac_init_Idc = 3; + ps_dec->p_mb_type_t = p_cabac_ctxt_table_t + MB_TYPE_I_SLICE; + } + else if(u1_slice_type == P_SLICE) + { + ps_dec->p_mb_type_t = p_cabac_ctxt_table_t + MB_TYPE_P_SLICE; + ps_dec->p_mb_skip_flag_t = p_cabac_ctxt_table_t + MB_SKIP_FLAG_P_SLICE; + ps_dec->p_sub_mb_type_t = p_cabac_ctxt_table_t + SUB_MB_TYPE_P_SLICE; + } + else if(u1_slice_type == B_SLICE) + { + ps_dec->p_mb_type_t = p_cabac_ctxt_table_t + MB_TYPE_B_SLICE; + ps_dec->p_mb_skip_flag_t = p_cabac_ctxt_table_t + MB_SKIP_FLAG_B_SLICE; + ps_dec->p_sub_mb_type_t = p_cabac_ctxt_table_t + SUB_MB_TYPE_B_SLICE; + } + { + bin_ctxt_model_t *p_cabac_ctxt_table_t_tmp = p_cabac_ctxt_table_t; + if(ps_dec->ps_cur_slice->u1_field_pic_flag) + { + p_cabac_ctxt_table_t_tmp += SIGNIFICANT_COEFF_FLAG_FLD; + + } + else + { + p_cabac_ctxt_table_t_tmp += SIGNIFICANT_COEFF_FLAG_FRAME; + } + { + bin_ctxt_model_t * * p_significant_coeff_flag_t = + ps_dec->p_significant_coeff_flag_t; + p_significant_coeff_flag_t[0] = p_cabac_ctxt_table_t_tmp + + SIG_COEFF_CTXT_CAT_0_OFFSET; + p_significant_coeff_flag_t[1] = p_cabac_ctxt_table_t_tmp + + SIG_COEFF_CTXT_CAT_1_OFFSET; + p_significant_coeff_flag_t[2] = p_cabac_ctxt_table_t_tmp + + SIG_COEFF_CTXT_CAT_2_OFFSET; + p_significant_coeff_flag_t[3] = p_cabac_ctxt_table_t_tmp + + SIG_COEFF_CTXT_CAT_3_OFFSET; + p_significant_coeff_flag_t[4] = p_cabac_ctxt_table_t_tmp + + SIG_COEFF_CTXT_CAT_4_OFFSET; + + p_significant_coeff_flag_t[5] = p_cabac_ctxt_table_t_tmp + + SIG_COEFF_CTXT_CAT_5_OFFSET; + + } + } + + memcpy(p_cabac_ctxt_table_t, + gau1_ih264d_cabac_ctxt_init_table[u1_cabac_init_Idc][u1_qp_y], + NUM_CABAC_CTXTS * sizeof(bin_ctxt_model_t)); +} +/*! + ************************************************************************** + * \if Function name : ih264d_decode_bin \endif + * + * \brief + * This function implements decoding process of a decision as defined + * in 9.3.3.2.2. + * + * \return + * Returns symbol decoded. + * + * \note + * It is specified in 9.3.3.2.3.2 that, one of the input to this function + * is CtxIdx. CtxIdx is used to identify state and MPS of that context + * (Refer Fig 9.11 - Flowchart for encoding a decision). To suffice that + * here we pass a pointer bin_ctxt_model_t which contains these values. + * + ************************************************************************** + */ + +UWORD32 ih264d_decode_bin(UWORD32 u4_ctx_inc, + bin_ctxt_model_t *ps_src_bin_ctxt, + dec_bit_stream_t *ps_bitstrm, + decoding_envirnoment_t *ps_cab_env) + +{ + + UWORD32 u4_qnt_int_range, u4_code_int_range, u4_code_int_val_ofst, + u4_int_range_lps; + + UWORD32 u4_symbol, u4_mps_state; + + bin_ctxt_model_t *ps_bin_ctxt; + + UWORD32 table_lookup; + const UWORD32 *pu4_table = (const UWORD32 *)ps_cab_env->cabac_table; + UWORD32 u4_clz; + + ps_bin_ctxt = ps_src_bin_ctxt + u4_ctx_inc; + + u4_code_int_range = ps_cab_env->u4_code_int_range; + u4_code_int_val_ofst = ps_cab_env->u4_code_int_val_ofst; + + u4_mps_state = (ps_bin_ctxt->u1_mps_state); + u4_clz = CLZ(u4_code_int_range); + + u4_qnt_int_range = u4_code_int_range << u4_clz; + u4_qnt_int_range = (u4_qnt_int_range >> 29) & 0x3; + + table_lookup = pu4_table[(u4_mps_state << 2) + u4_qnt_int_range]; + u4_int_range_lps = table_lookup & 0xff; + + u4_int_range_lps = u4_int_range_lps << (23 - u4_clz); + u4_code_int_range = u4_code_int_range - u4_int_range_lps; + + u4_symbol = ((u4_mps_state >> 6) & 0x1); + + u4_mps_state = (table_lookup >> 8) & 0x7F; + + CHECK_IF_LPS(u4_code_int_range, u4_code_int_val_ofst, u4_symbol, + u4_int_range_lps, u4_mps_state, table_lookup) + + if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_8) + { + UWORD32 *pu4_buffer, u4_offset; + + pu4_buffer = ps_bitstrm->pu4_buffer; + u4_offset = ps_bitstrm->u4_ofst; + + RENORM_RANGE_OFFSET(u4_code_int_range, u4_code_int_val_ofst, u4_offset, + pu4_buffer) + + ps_bitstrm->u4_ofst = u4_offset; + } + + INC_BIN_COUNT(ps_cab_env) + + ps_cab_env->u4_code_int_val_ofst = u4_code_int_val_ofst; + ps_cab_env->u4_code_int_range = u4_code_int_range; + ps_bin_ctxt->u1_mps_state = u4_mps_state; + + return (u4_symbol); +} + +/*! + ************************************************************************** + * \if Function name : ih264d_decode_terminate \endif + * + * \brief + * This function implements decoding process of a termination as defined + * 9.3.3.2.2.3 of ISO/IEC14496-10. + * + * \return + * Returns symbol decoded. + * + * \note + * This routine is called while decoding "end_of_skice_flag" and of the + * bin indicating PCM mode in MBType. + * + ************************************************************************** + */ +UWORD8 ih264d_decode_terminate(decoding_envirnoment_t * ps_cab_env, + dec_bit_stream_t * ps_stream) +{ + UWORD32 u4_symbol; + UWORD32 u4_code_int_val_ofst, u4_code_int_range; + UWORD32 u4_clz; + + u4_code_int_range = ps_cab_env->u4_code_int_range; + u4_code_int_val_ofst = ps_cab_env->u4_code_int_val_ofst; + + u4_clz = CLZ(u4_code_int_range); + u4_code_int_range -= (2 << (23 - u4_clz)); + + if(u4_code_int_val_ofst >= u4_code_int_range) + { + /* S=1 */ + u4_symbol = 1; + + { + + /*the u4_ofst needs to be updated before termination*/ + ps_stream->u4_ofst += u4_clz; + + } + + } + else + { + /* S=0 */ + u4_symbol = 0; + + if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_8) + { + UWORD32 *pu4_buffer, u4_offset; + + pu4_buffer = ps_stream->pu4_buffer; + u4_offset = ps_stream->u4_ofst; + + RENORM_RANGE_OFFSET(u4_code_int_range, u4_code_int_val_ofst, u4_offset, + pu4_buffer) + ps_stream->u4_ofst = u4_offset; + } + } + + ps_cab_env->u4_code_int_range = u4_code_int_range; + ps_cab_env->u4_code_int_val_ofst = u4_code_int_val_ofst; + + INC_BIN_COUNT(ps_cab_env) + + return (u4_symbol); +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_decode_bins_tunary */ +/* */ +/* Description : This function decodes bins in the case of TUNARY */ +/* binarization technique.valid_length is assumed equal to 3 */ +/* and u1_max_bins <= 4 in this functon. */ +/* Inputs : <What inputs does the function take?> */ +/* Globals : <Does it use any global variables?> */ +/* Processing : <Describe how the function operates - include algorithm */ +/* description> */ +/* Outputs : <What does the function produce?> */ +/* Returns : <What does the function return?> */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 20 11 2008 SH Draft */ +/* */ +/*****************************************************************************/ + +UWORD32 ih264d_decode_bins_tunary(UWORD8 u1_max_bins, + UWORD32 u4_ctx_inc, + bin_ctxt_model_t *ps_src_bin_ctxt, + dec_bit_stream_t *ps_bitstrm, + decoding_envirnoment_t *ps_cab_env) + +{ + UWORD32 u4_value; + UWORD32 u4_symbol; + UWORD8 u4_ctx_Inc; + bin_ctxt_model_t *ps_bin_ctxt; + UWORD32 u4_code_int_range, u4_code_int_val_ofst; + const UWORD32 *pu4_table = (const UWORD32 *)ps_cab_env->cabac_table; + + u4_value = 0; + + /*u1_max_bins has to be less than or equal to 4, u1_max_bins <= 4 for this function*/ + + /*here the valid length is assumed to be equal to 3 ,so the calling function is expected + to duplicate CtxInc if valid lenth is 2 and cmaxbin is greater than2*/ + u4_code_int_range = ps_cab_env->u4_code_int_range; + u4_code_int_val_ofst = ps_cab_env->u4_code_int_val_ofst; + + do + { + u4_ctx_Inc = u4_ctx_inc & 0xF; + u4_ctx_inc = u4_ctx_inc >> 4; + + ps_bin_ctxt = ps_src_bin_ctxt + u4_ctx_Inc; + + DECODE_ONE_BIN_MACRO(ps_bin_ctxt, u4_code_int_range, u4_code_int_val_ofst, + pu4_table, ps_bitstrm, u4_symbol) + + INC_BIN_COUNT(ps_cab_env);INC_DECISION_BINS(ps_cab_env); + + u4_value++; + } + while((u4_value < u1_max_bins) & (u4_symbol)); + + u4_value = u4_value - 1 + u4_symbol; + + ps_cab_env->u4_code_int_range = u4_code_int_range; + ps_cab_env->u4_code_int_val_ofst = u4_code_int_val_ofst; + + return (u4_value); + +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_decode_bins */ +/* */ +/* Description : This function decodes bins in the case of MSB_FIRST_FLC */ +/* binarization technique.valid_length is always equal max_bins */ +/* for MSB_FIRST_FLC. assumes u1_max_bins <= 4 */ +/* Inputs : <What inputs does the function take?> */ +/* Globals : <Does it use any global variables?> */ +/* Processing : <Describe how the function operates - include algorithm */ +/* description> */ +/* Outputs : <What does the function produce?> */ +/* Returns : <What does the function return?> */ +/* */ +/* Issues : <List any issues or problems with this function> */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 20 11 2008 SH Draft */ +/* */ +/*****************************************************************************/ + +UWORD32 ih264d_decode_bins(UWORD8 u1_max_bins, + UWORD32 u4_ctx_inc, + bin_ctxt_model_t *ps_src_bin_ctxt, + dec_bit_stream_t *ps_bitstrm, + decoding_envirnoment_t *ps_cab_env) + +{ + UWORD32 u4_value; + UWORD32 u4_symbol, i; + UWORD32 u4_ctxt_inc; + bin_ctxt_model_t *ps_bin_ctxt; + UWORD32 u4_code_int_range, u4_code_int_val_ofst; + const UWORD32 *pu4_table = (const UWORD32 *)ps_cab_env->cabac_table; + + i = 0; + + u4_value = 0; + + /*u1_max_bins has to be less than or equal to 4, u1_max_bins <= 4 for this fucntion*/ + u4_code_int_range = ps_cab_env->u4_code_int_range; + u4_code_int_val_ofst = ps_cab_env->u4_code_int_val_ofst; + + do + { + u4_ctxt_inc = u4_ctx_inc & 0xf; + u4_ctx_inc = u4_ctx_inc >> 4; + + ps_bin_ctxt = ps_src_bin_ctxt + u4_ctxt_inc; + + DECODE_ONE_BIN_MACRO(ps_bin_ctxt, u4_code_int_range, u4_code_int_val_ofst, + pu4_table, ps_bitstrm, u4_symbol) + + INC_BIN_COUNT(ps_cab_env);INC_DECISION_BINS(ps_cab_env); + + u4_value = (u4_value << 1) | (u4_symbol); + + i++; + } + while(i < u1_max_bins); + + ps_cab_env->u4_code_int_range = u4_code_int_range; + ps_cab_env->u4_code_int_val_ofst = u4_code_int_val_ofst; + + return (u4_value); + +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_decode_bins_unary */ +/* */ +/* Description : This function decodes bins in the case of UNARY */ +/* binarization technique.here the valid length is taken to 5*/ +/* and cmax is always greater than 9 */ +/* Inputs : <What inputs does the function take?> */ +/* Globals : <Does it use any global variables?> */ +/* Processing : <Describe how the function operates - include algorithm */ +/* description> */ +/* Outputs : <What does the function produce?> */ +/* Returns : <What does the function return?> */ +/* */ +/* Issues : <List any issues or problems with this function> */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 20 11 2008 SH Draft */ +/* */ +/*****************************************************************************/ +UWORD32 ih264d_decode_bins_unary(UWORD8 u1_max_bins, + UWORD32 u4_ctx_inc, + bin_ctxt_model_t *ps_src_bin_ctxt, + dec_bit_stream_t *ps_bitstrm, + decoding_envirnoment_t *ps_cab_env) +{ + UWORD32 u4_value; + UWORD32 u4_symbol; + bin_ctxt_model_t *ps_bin_ctxt; + UWORD32 u4_ctx_Inc; + UWORD32 u4_code_int_range, u4_code_int_val_ofst; + const UWORD32 *pu4_table = (const UWORD32 *)ps_cab_env->cabac_table; + + /* in this function the valid length for u4_ctx_inc is always taken to be,so if the + the valid length is lessthan 5 the caller need to duplicate accordingly*/ + + /*u1_max_bins is always greater or equal to 9 we have the check for u1_max_bins only after the 2 loop*/ + u4_value = 0; + u4_code_int_range = ps_cab_env->u4_code_int_range; + u4_code_int_val_ofst = ps_cab_env->u4_code_int_val_ofst; + + do + { + u4_ctx_Inc = u4_ctx_inc & 0xf; + u4_ctx_inc = u4_ctx_inc >> 4; + + ps_bin_ctxt = ps_src_bin_ctxt + u4_ctx_Inc; + + DECODE_ONE_BIN_MACRO(ps_bin_ctxt, u4_code_int_range, u4_code_int_val_ofst, + pu4_table, ps_bitstrm, u4_symbol) + + INC_BIN_COUNT(ps_cab_env);INC_DECISION_BINS(ps_cab_env); + + u4_value++; + + } + while(u4_symbol && u4_value < 4); + + if(u4_symbol && (u4_value < u1_max_bins)) + { + + u4_ctx_Inc = u4_ctx_inc & 0xf; + + ps_bin_ctxt = ps_src_bin_ctxt + u4_ctx_Inc; + + do + { + + DECODE_ONE_BIN_MACRO(ps_bin_ctxt, u4_code_int_range, u4_code_int_val_ofst, + pu4_table, ps_bitstrm, u4_symbol) + + INC_BIN_COUNT(ps_cab_env);INC_DECISION_BINS(ps_cab_env); + + u4_value++; + + } + while(u4_symbol && (u4_value < u1_max_bins)); + + } + + ps_cab_env->u4_code_int_range = u4_code_int_range; + ps_cab_env->u4_code_int_val_ofst = u4_code_int_val_ofst; + + u4_value = u4_value - 1 + u4_symbol; + + return (u4_value); + +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_decode_bypass_bins_unary */ +/* */ +/* Description : This function is used in the case of UNARY coding */ +/* */ +/* */ +/* Inputs : <What inputs does the function take?> */ +/* Globals : <Does it use any global variables?> */ +/* Processing : <Describe how the function operates - include algorithm */ +/* description> */ +/* Outputs : <What does the function produce?> */ +/* Returns : <What does the function return?> */ +/* */ +/* Issues : <List any issues or problems with this function> */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 13 10 2005 Ittiam Draft */ +/* */ +/*****************************************************************************/ + +UWORD32 ih264d_decode_bypass_bins_unary(decoding_envirnoment_t *ps_cab_env, + dec_bit_stream_t *ps_bitstrm) +{ + UWORD32 u4_value; + UWORD32 u4_bin; + UWORD32 u4_code_int_val_ofst, u4_code_int_range; + + UWORD32 u1_max_bins; + + u4_code_int_val_ofst = ps_cab_env->u4_code_int_val_ofst; + u4_code_int_range = ps_cab_env->u4_code_int_range; + + if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_9) + { + UWORD32 *pu4_buffer, u4_offset; + + pu4_buffer = ps_bitstrm->pu4_buffer; + u4_offset = ps_bitstrm->u4_ofst; + + RENORM_RANGE_OFFSET(u4_code_int_range, u4_code_int_val_ofst, u4_offset, + pu4_buffer) + ps_bitstrm->u4_ofst = u4_offset; + } + + /*as it is called only form mvd*/ + u1_max_bins = 32; + u4_value = 0; + + do + { + u4_value++; + + u4_code_int_range = u4_code_int_range >> 1; + if(u4_code_int_val_ofst >= u4_code_int_range) + { + /* S=1 */ + u4_bin = 1; + u4_code_int_val_ofst -= u4_code_int_range; + } + else + { + /* S=0 */ + u4_bin = 0; + } + + INC_BIN_COUNT(ps_cab_env);INC_BYPASS_BINS(ps_cab_env); + + if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_9) + { + UWORD32 *pu4_buffer, u4_offset; + + pu4_buffer = ps_bitstrm->pu4_buffer; + u4_offset = ps_bitstrm->u4_ofst; + + RENORM_RANGE_OFFSET(u4_code_int_range, u4_code_int_val_ofst, u4_offset, + pu4_buffer) + + ps_bitstrm->u4_ofst = u4_offset; + } + + } + while(u4_bin && (u4_value < u1_max_bins)); + + ps_cab_env->u4_code_int_val_ofst = u4_code_int_val_ofst; + ps_cab_env->u4_code_int_range = u4_code_int_range; + u4_value = (u4_value - 1 + u4_bin); + +return (u4_value); +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_decode_bypass_bins */ +/* */ +/* Description : This function is used in the case of FLC coding */ +/* */ +/* */ +/* Inputs : <What inputs does the function take?> */ +/* Globals : <Does it use any global variables?> */ +/* Processing : <Describe how the function operates - include algorithm */ +/* description> */ +/* Outputs : <What does the function produce?> */ +/* Returns : <What does the function return?> */ +/* */ +/* Issues : <List any issues or problems with this function> */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 13 10 2005 Ittiam Draft */ +/* */ +/*****************************************************************************/ + +UWORD32 ih264d_decode_bypass_bins(decoding_envirnoment_t *ps_cab_env, + UWORD8 u1_max_bins, + dec_bit_stream_t *ps_bitstrm) +{ + UWORD32 u4_bins; + UWORD32 u4_bin; + UWORD32 u4_code_int_val_ofst, u4_code_int_range; + + u4_bins = 0; + u4_code_int_val_ofst = ps_cab_env->u4_code_int_val_ofst; + u4_code_int_range = ps_cab_env->u4_code_int_range; + + if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_9) + { + UWORD32 *pu4_buffer, u4_offset; + + pu4_buffer = ps_bitstrm->pu4_buffer; + u4_offset = ps_bitstrm->u4_ofst; + + RENORM_RANGE_OFFSET(u4_code_int_range, u4_code_int_val_ofst, u4_offset, + pu4_buffer) + ps_bitstrm->u4_ofst = u4_offset; + } + + do + { + + u4_code_int_range = u4_code_int_range >> 1; + + if(u4_code_int_val_ofst >= u4_code_int_range) + { + /* S=1 */ + u4_bin = 1; + u4_code_int_val_ofst -= u4_code_int_range; + } + else + { + /* S=0 */ + u4_bin = 0; + } + + INC_BIN_COUNT(ps_cab_env);INC_BYPASS_BINS(ps_cab_env); + + u4_bins = ((u4_bins << 1) | u4_bin); + u1_max_bins--; + + if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_9) + { + UWORD32 *pu4_buffer, u4_offset; + + pu4_buffer = ps_bitstrm->pu4_buffer; + u4_offset = ps_bitstrm->u4_ofst; + + RENORM_RANGE_OFFSET(u4_code_int_range, u4_code_int_val_ofst, u4_offset, + pu4_buffer) + ps_bitstrm->u4_ofst = u4_offset; + } + + } + while(u1_max_bins); + + ps_cab_env->u4_code_int_val_ofst = u4_code_int_val_ofst; + ps_cab_env->u4_code_int_range = u4_code_int_range; + + return (u4_bins); +} + diff --git a/decoder/ih264d_cabac.h b/decoder/ih264d_cabac.h new file mode 100755 index 0000000..6ee3d52 --- /dev/null +++ b/decoder/ih264d_cabac.h @@ -0,0 +1,267 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*! + *************************************************************************** + * \file ih264d_cabac.h + * + * \brief + * This file contains declarations of Binary decoding routines and tables. + * + * \date + * 04/02/2003 + * + * \author NS + *************************************************************************** + */ + +#ifndef _IH264D_CABAC_H_ +#define _IH264D_CABAC_H_ + +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264d_bitstrm.h" +#include "ih264d_defs.h" + +#define B_BITS 10 + +#define HALF (1 << (B_BITS-1)) +#define QUARTER (1 << (B_BITS-2)) + +#define CTXT_UNUSED {0,64} +#define NUM_MB_SKIP_CTXT 6 +#define NUM_MB_TYPE_CTXT 9 +#define NUM_SUBMB_TYPE_CTXT 7 +#define NUM_REF_IDX_CTXT 6 +#define NUM_MB_QP_DELTA 4 +#define NUM_PRED_MODE 6 +#define NUM_MB_FIELD 3 +#define NUM_CBP 12 +#define NUM_CTX_MVD 14 + +/* Residual block cabac context parameters */ +#define NUM_CTX_CAT 6 +#define NUM_LUMA_CTX_CAT 3 +#define NUM_CTX_CODED_BLOCK 4 +/* Luma CtxSigCoeff + CtxLastCoeff = 15 + 15 = 30 */ +#define NUM_LUMA_CTX_SIG_COEF 30 +/* Chroma DC CtxSigCoeff + CtxLastCoeff = 3 + 3 = 6 */ +#define NUM_CTX_CHROMA_DC_SIG_COEF 6 +/* Chroma AC CtxSigCoeff + CtxLastCoeff = 14 + 14 = 28 */ +#define NUM_CTX_CHROMA_AC_SIG_COEF 28 +#define NUM_CTX_ABS_LEVEL 10 + +#define LUMA_DC_CTXCAT 0 +#define LUMA_AC_CTXCAT 1 +#define LUMA_4X4_CTXCAT 2 +#define CHROMA_DC_CTXCAT 3 +#define CHROMA_AC_CTXCAT 4 +#define LUMA_8X8_CTXCAT 5 + +/*****************************************************************************/ +/* Constant Macros */ +/*****************************************************************************/ +#define NUM_CABAC_CTXTS 460 +#define QP_RANGE 52 +#define NUM_CAB_INIT_IDC_PLUS_ONE 4 +#define LAST_COEFF_CTXT_MINUS_SIG_COEFF_CTXT 61 +#define LAST_COEFF_CTXT_MINUS_SIG_COEFF_CTXT_8X8 15 + +/*bits 0 to 5 :state + bit 6:mps*/ +typedef struct +{ + UWORD8 u1_mps_state; /* state number */ +} bin_ctxt_model_t; + +typedef struct + +{ + /* Neighbour availability Variables needed to get CtxtInc, for CABAC */ + UWORD8 u1_mb_type; /** macroblock type: I/P/B/SI/SP */ + UWORD8 u1_cbp; /** Coded Block Pattern */ + UWORD8 u1_intra_chroma_pred_mode; + + /*************************************************************************/ + /* Arrangnment of DC CSBP */ + /* bits: b7 b6 b5 b4 b3 b2 b1 b0 */ + /* CSBP: x x x x x Vdc Udc Ydc */ + /*************************************************************************/ + UWORD8 u1_yuv_dc_csbp; + WORD8 i1_ref_idx[4]; + UWORD8 u1_mv[4][4]; + UWORD8 u1_transform8x8_ctxt; +} ctxt_inc_mb_info_t; + +#define ONE_RIGHT_SHIFTED_BY_8 1<<8 +#define ONE_RIGHT_SHIFTED_BY_9 1<<9 +#define ONE_RIGHT_SHIFTED_BY_14 1<<14 +typedef struct +{ + UWORD32 u4_code_int_range; + UWORD32 u4_code_int_val_ofst; + const void *cabac_table; + void * pv_codec_handle; /* For Error Handling */ +} decoding_envirnoment_t; + +WORD32 ih264d_init_cabac_dec_envirnoment(decoding_envirnoment_t * ps_cab_env, + dec_bit_stream_t *ps_bitstrm); + +UWORD32 ih264d_decode_bin(UWORD32 u4_ctx_inc, + bin_ctxt_model_t *ps_bin_ctxt, + dec_bit_stream_t *ps_bitstrm, + decoding_envirnoment_t *ps_cab_env); +UWORD8 ih264d_decode_terminate(decoding_envirnoment_t * ps_cab_env, + dec_bit_stream_t * ps_bitstrm); + +UWORD32 ih264d_decode_bins_tunary(UWORD8 u1_max_bins, + UWORD32 u4_ctx_inc, + bin_ctxt_model_t *ps_src_bin_ctxt, + dec_bit_stream_t *ps_bitstrm, + decoding_envirnoment_t *ps_cab_env); + +UWORD32 ih264d_decode_bins(UWORD8 u1_max_bins, + UWORD32 u4_ctx_inc, + bin_ctxt_model_t *ps_src_bin_ctxt, + dec_bit_stream_t *ps_bitstrm, + decoding_envirnoment_t *ps_cab_env); +UWORD32 ih264d_decode_bins_unary(UWORD8 u1_max_bins, + UWORD32 u4_ctx_inc, + bin_ctxt_model_t *ps_src_bin_ctxt, + dec_bit_stream_t *ps_bitstrm, + decoding_envirnoment_t *ps_cab_env); + +UWORD32 ih264d_decode_bypass_bins_unary(decoding_envirnoment_t *ps_cab_env, + dec_bit_stream_t *ps_bitstrm); + +UWORD32 ih264d_decode_bypass_bins(decoding_envirnoment_t *ps_cab_env, + UWORD8 u1_max_bins, + dec_bit_stream_t *ps_bitstrm); + +/*****************************************************************************/ +/* Function Macros */ +/*****************************************************************************/ + +/*****************************************************************************/ +/* Defining a macro for renormalization*/ +/*****************************************************************************/ + +/*we renormalize every time the number bits(which are read ahead of time) we have + consumed in the u4_ofst exceeds 23*/ + +#define RENORM_RANGE_OFFSET(u4_codeIntRange_m,u4_codeIntValOffset_m,u4_offset_m,pu4_buffer_m) \ + { \ + UWORD32 read_bits_m,u4_clz_m ; \ + u4_clz_m = CLZ(u4_codeIntRange_m); \ + NEXTBITS(read_bits_m,(u4_offset_m+23),pu4_buffer_m,u4_clz_m) \ + FLUSHBITS(u4_offset_m,(u4_clz_m)) \ + u4_codeIntRange_m = u4_codeIntRange_m << u4_clz_m; \ + u4_codeIntValOffset_m = (u4_codeIntValOffset_m << u4_clz_m) | read_bits_m; \ + } + +/*****************************************************************************/ +/* Defining a macro for checking if the symbol is MPS*/ +/*****************************************************************************/ + +#define CHECK_IF_LPS(u4_codeIntRange_m,u4_codeIntValOffset_m,u4_symbol_m, \ + u4_codeIntRangeLPS_m,u1_mps_state_m,table_lookup_m) \ +{ \ + if(u4_codeIntValOffset_m >= u4_codeIntRange_m) \ + { \ + u4_symbol_m = 1 - u4_symbol_m; \ + u4_codeIntValOffset_m -= u4_codeIntRange_m; \ + u4_codeIntRange_m = u4_codeIntRangeLPS_m; \ + u1_mps_state_m = (table_lookup_m >> 15) & 0x7F; \ + } \ +} + +/*! + ************************************************************************** + * \if Function name : DECODE_ONE_BIN_MACRO \endif + * + * \brief + * This function implements decoding process of a decision as defined + * in 9.3.3.2.2. + * + * \return + * Returns symbol decoded. + * + * \note + * It is specified in 9.3.3.2.3.2 that, one of the input to this function + * is CtxIdx. CtxIdx is used to identify state and MPS of that context + * (Refer Fig 9.11 - Flowchart for encoding a decision). To suffice that + * here we pass a pointer bin_ctxt_model_t which contains these values. + * + ************************************************************************** + */ + +#define DECODE_ONE_BIN_MACRO(p_binCtxt_arg ,u4_code_int_range,u4_code_int_val_ofst, \ + pu4_table_arg, \ + p_DecBitStream_arg,u4_symbol) \ +{ \ + bin_ctxt_model_t *p_binCtxt_m = (bin_ctxt_model_t *) p_binCtxt_arg; \ + dec_bit_stream_t *p_DecBitStream_m = (dec_bit_stream_t *) p_DecBitStream_arg; \ + const UWORD32 *pu4_table_m = (const UWORD32 *) pu4_table_arg; \ + \ + UWORD32 u4_quantCodeIntRange_m,u4_codeIntRangeLPS_m; \ + UWORD32 u1_mps_state_m; \ + UWORD32 table_lookup_m; \ + UWORD32 u4_clz_m; \ + \ + u1_mps_state_m = (p_binCtxt_m->u1_mps_state); \ + u4_clz_m = CLZ(u4_code_int_range); \ + u4_quantCodeIntRange_m = u4_code_int_range << u4_clz_m; \ + u4_quantCodeIntRange_m = (u4_quantCodeIntRange_m >> 29) & 0x3; \ + table_lookup_m = pu4_table_m[(u1_mps_state_m << 2)+u4_quantCodeIntRange_m]; \ + u4_codeIntRangeLPS_m = table_lookup_m & 0xff; \ + \ + u4_codeIntRangeLPS_m = u4_codeIntRangeLPS_m << (23 - u4_clz_m); \ + u4_code_int_range = u4_code_int_range - u4_codeIntRangeLPS_m; \ + u4_symbol = ((u1_mps_state_m>> 6) & 0x1); \ + /*if mps*/ \ + u1_mps_state_m = (table_lookup_m >> 8) & 0x7F; \ + if(u4_code_int_val_ofst >= u4_code_int_range) \ + { \ + \ + u4_symbol = 1 - u4_symbol; \ + u4_code_int_val_ofst -= u4_code_int_range; \ + u4_code_int_range = u4_codeIntRangeLPS_m; \ + u1_mps_state_m = (table_lookup_m >> 15) & 0x7F; \ + } \ + if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_8) \ + { \ + UWORD32 *pu4_buffer,u4_offset; \ + UWORD32 read_bits,u4_clz_m ; \ + \ + pu4_buffer = p_DecBitStream_m->pu4_buffer; \ + u4_offset = p_DecBitStream_m->u4_ofst; \ + u4_clz_m = CLZ(u4_code_int_range); \ + NEXTBITS(read_bits,(u4_offset+23),pu4_buffer,u4_clz_m) \ + FLUSHBITS(u4_offset,(u4_clz_m)) \ + u4_code_int_range = u4_code_int_range << u4_clz_m; \ + u4_code_int_val_ofst= (u4_code_int_val_ofst << u4_clz_m) | read_bits; \ + \ + \ + p_DecBitStream_m->u4_ofst = u4_offset; \ + } \ + p_binCtxt_m->u1_mps_state = u1_mps_state_m; \ +} + +#endif /* _IH264D_CABAC_H_ */ diff --git a/decoder/ih264d_cabac_init_tables.c b/decoder/ih264d_cabac_init_tables.c new file mode 100755 index 0000000..2c3a55e --- /dev/null +++ b/decoder/ih264d_cabac_init_tables.c @@ -0,0 +1,9273 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +#ifndef _CABAC_INIT_TABLES_H_ +#define _CABAC_INIT_TABLES_H_ + +/*****************************************************************************/ +/* */ +/* File Name : ih264d_cabac_init_tables.c */ +/* */ +/* Description : This file contains the initialized cabac context */ +/* structures for all possible values of Qp (0 - 51) */ +/* Cabac_init Idc (0 - 2) and I slice. The contexts */ +/* are initialized and stored as per tables 9-11 to */ +/* 9 -23 */ +/* */ +/* Issues / Problems : None */ +/* */ +/* Revision History : */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 10 01 2005 SH */ +/* */ +/*****************************************************************************/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264d_cabac.h" + +/*combined table :guc_RTAB,NextStateLPS,NextStateMPS + input(combined_state): + bits 0-5: state + bits 6:mps + output + bits 0-7:rangeTabLPS + bits 8-14 :combined_next_state_if_mps + bits 15 -21:combined_next_state_if_lps + + */ + +const UWORD32 gau4_ih264d_cabac_table[128][4] = + { + { 2097536, 2097584, 2097616, 2097648 }, + + { 640, 679, 709, 739 }, + + { 33664, 33694, 33723, 33752 }, + + { 66683, 66710, 66738, 66765 }, + + { 66932, 66958, 66985, 67011 }, + + { 132719, 132743, 132768, 132793 }, + + { 132969, 132992, 133016, 133039 }, + + { 165988, 166010, 166032, 166054 }, + + { 199007, 199028, 199049, 199070 }, + + { 232026, 232046, 232066, 232086 }, + + { 265045, 265064, 265083, 265102 }, + + { 298065, 298083, 298101, 298119 }, + + { 298317, 298334, 298351, 298368 }, + + { 364105, 364121, 364137, 364154 }, + + { 364357, 364373, 364388, 364404 }, + + { 397378, 397392, 397407, 397422 }, + + { 430398, 430412, 430426, 430440 }, + + { 430651, 430664, 430678, 430691 }, + + { 496440, 496453, 496465, 496478 }, + + { 496693, 496705, 496717, 496729 }, + + { 529715, 529726, 529737, 529749 }, + + { 529968, 529979, 529989, 530000 }, + + { 595758, 595768, 595778, 595788 }, + + { 596011, 596021, 596031, 596040 }, + + { 629033, 629042, 629051, 629061 }, + + { 629287, 629296, 629304, 629313 }, + + { 695077, 695085, 695094, 695102 }, + + { 695331, 695339, 695347, 695355 }, + + { 728353, 728361, 728368, 728376 }, + + { 728608, 728615, 728622, 728629 }, + + { 761630, 761637, 761643, 761650 }, + + { 794653, 794659, 794665, 794672 }, + + { 794907, 794913, 794919, 794925 }, + + { 827930, 827935, 827941, 827947 }, + + { 860952, 860958, 860963, 860969 }, + + { 861207, 861212, 861217, 861223 }, + + { 894230, 894235, 894240, 894245 }, + + { 894485, 894490, 894494, 894499 }, + + { 927508, 927512, 927517, 927521 }, + + { 960531, 960535, 960539, 960543 }, + + { 960786, 960790, 960794, 960798 }, + + { 993809, 993813, 993817, 993820 }, + + { 994064, 994068, 994071, 994075 }, + + { 994319, 994323, 994326, 994329 }, + + { 1027342, 1027346, 1027349, 1027352 }, + + { 1060366, 1060369, 1060372, 1060375 }, + + { 1060621, 1060624, 1060627, 1060630 }, + + { 1093644, 1093647, 1093650, 1093653 }, + + { 1093900, 1093902, 1093905, 1093908 }, + + { 1094155, 1094158, 1094160, 1094163 }, + + { 1127179, 1127181, 1127183, 1127186 }, + + { 1127434, 1127436, 1127439, 1127441 }, + + { 1160458, 1160460, 1160462, 1160464 }, + + { 1160713, 1160715, 1160717, 1160719 }, + + { 1160969, 1160971, 1160972, 1160974 }, + + { 1193992, 1193994, 1193996, 1193998 }, + + { 1194248, 1194249, 1194251, 1194253 }, + + { 1194503, 1194505, 1194507, 1194508 }, + + { 1227527, 1227529, 1227530, 1227532 }, + + { 1227783, 1227784, 1227786, 1227787 }, + + { 1228038, 1228040, 1228041, 1228043 }, + + { 1261062, 1261063, 1261065, 1261066 }, + + { 1261062, 1261063, 1261064, 1261065 }, + + { 2080514, 2080514, 2080514, 2080514 }, + + { 16768, 16816, 16848, 16880 }, + + { 2114176, 2114215, 2114245, 2114275 }, + + { 2147200, 2147230, 2147259, 2147288 }, + + { 2180219, 2180246, 2180274, 2180301 }, + + { 2180468, 2180494, 2180521, 2180547 }, + + { 2246255, 2246279, 2246304, 2246329 }, + + { 2246505, 2246528, 2246552, 2246575 }, + + { 2279524, 2279546, 2279568, 2279590 }, + + { 2312543, 2312564, 2312585, 2312606 }, + + { 2345562, 2345582, 2345602, 2345622 }, + + { 2378581, 2378600, 2378619, 2378638 }, + + { 2411601, 2411619, 2411637, 2411655 }, + + { 2411853, 2411870, 2411887, 2411904 }, + + { 2477641, 2477657, 2477673, 2477690 }, + + { 2477893, 2477909, 2477924, 2477940 }, + + { 2510914, 2510928, 2510943, 2510958 }, + + { 2543934, 2543948, 2543962, 2543976 }, + + { 2544187, 2544200, 2544214, 2544227 }, + + { 2609976, 2609989, 2610001, 2610014 }, + + { 2610229, 2610241, 2610253, 2610265 }, + + { 2643251, 2643262, 2643273, 2643285 }, + + { 2643504, 2643515, 2643525, 2643536 }, + + { 2709294, 2709304, 2709314, 2709324 }, + + { 2709547, 2709557, 2709567, 2709576 }, + + { 2742569, 2742578, 2742587, 2742597 }, + + { 2742823, 2742832, 2742840, 2742849 }, + + { 2808613, 2808621, 2808630, 2808638 }, + + { 2808867, 2808875, 2808883, 2808891 }, + + { 2841889, 2841897, 2841904, 2841912 }, + + { 2842144, 2842151, 2842158, 2842165 }, + + { 2875166, 2875173, 2875179, 2875186 }, + + { 2908189, 2908195, 2908201, 2908208 }, + + { 2908443, 2908449, 2908455, 2908461 }, + + { 2941466, 2941471, 2941477, 2941483 }, + + { 2974488, 2974494, 2974499, 2974505 }, + + { 2974743, 2974748, 2974753, 2974759 }, + + { 3007766, 3007771, 3007776, 3007781 }, + + { 3008021, 3008026, 3008030, 3008035 }, + + { 3041044, 3041048, 3041053, 3041057 }, + + { 3074067, 3074071, 3074075, 3074079 }, + + { 3074322, 3074326, 3074330, 3074334 }, + + { 3107345, 3107349, 3107353, 3107356 }, + + { 3107600, 3107604, 3107607, 3107611 }, + + { 3107855, 3107859, 3107862, 3107865 }, + + { 3140878, 3140882, 3140885, 3140888 }, + + { 3173902, 3173905, 3173908, 3173911 }, + + { 3174157, 3174160, 3174163, 3174166 }, + + { 3207180, 3207183, 3207186, 3207189 }, + + { 3207436, 3207438, 3207441, 3207444 }, + + { 3207691, 3207694, 3207696, 3207699 }, + + { 3240715, 3240717, 3240719, 3240722 }, + + { 3240970, 3240972, 3240975, 3240977 }, + + { 3273994, 3273996, 3273998, 3274000 }, + + { 3274249, 3274251, 3274253, 3274255 }, + + { 3274505, 3274507, 3274508, 3274510 }, + + { 3307528, 3307530, 3307532, 3307534 }, + + { 3307784, 3307785, 3307787, 3307789 }, + + { 3308039, 3308041, 3308043, 3308044 }, + + { 3341063, 3341065, 3341066, 3341068 }, + + { 3341319, 3341320, 3341322, 3341323 }, + + { 3341574, 3341576, 3341577, 3341579 }, + + { 3374598, 3374599, 3374601, 3374602 }, + + { 3374598, 3374599, 3374600, 3374601 }, + + { 4194050, 4194050, 4194050, 4194050 }, + + }; + +/*****************************************************************************/ +/* Global Variable Initialization */ +/*****************************************************************************/ +const UWORD8 gau1_ih264d_cabac_ctxt_init_table[NUM_CAB_INIT_IDC_PLUS_ONE][QP_RANGE][NUM_CABAC_CTXTS] = + + { + + { + + { + + 62, + 9, 74, 62, 9, 74, 126, 104, 10, 9, 12, 30, 61, 62, + 54, 14, 118, 6, 78, 65, 1, 14, 73, 13, 64, 20, 62, + 67, 90, 104, 126, 104, 67, 78, 65, 1, 86, 95, 2, + 18, 69, 81, 96, 8, 67, 86, 88, 5, 76, 94, 9, 69, + 81, 88, 67, 74, 74, 80, 72, 5, 22, 0, 0, 0, 83, + 86, 97, 72, 22, 1, 18, 78, 96, 126, 98, 101, 67, + 82, 94, 83, 110, 91, 102, 93, 126, 92, 89, 96, + 108, 17, 65, 6, 93, 74, 92, 87, 126, 9, 3, 4, 69, + 15, 68, 69, 88, 85, 78, 75, 77, 9, 13, 68, 13, 21, + 81, 0, 70, 67, 6, 76, 28, 64, 2, 28, 38, 39, 34, + 27, 93, 73, 73, 17, 14, 100, 10, 10, 10, 2, 7, 7, + 0, 3, 1, 6, 69, 6, 24, 12, 68, 64, 2, 0, 13, 24, + 19, 11, 15, 3, 4, 4, 30, 19, 20, 78, 3, 69, 35, + 23, 19, 14, 17, 19, 12, 16, 24, 1, 17, 9, 9, 5, 0, + 12, 6, 10, 11, 8, 18, 27, 10, 82, 8, 78, 17, 32, + 84, 56, 62, 60, 59, 62, 62, 57, 57, 54, 44, 36, + 33, 43, 29, 70, 67, 4, 67, 33, 31, 28, 34, 32, 25, + 20, 22, 0, 4, 64, 94, 89, 108, 76, 19, 18, 11, 64, + 4, 70, 75, 82, 102, 77, 39, 21, 15, 8, 4, 71, 83, + 87, 119, 5, 34, 27, 25, 20, 8, 5, 64, 74, 90, 70, + 34, 32, 21, 4, 5, 72, 81, 97, 5, 58, 49, 45, 36, + 23, 5, 70, 79, 85, 62, 106, 106, 87, 114, 110, 98, + 110, 106, 103, 107, 108, 112, 96, 95, 91, 93, 94, + 86, 67, 80, 85, 70, 3, 5, 2, 13, 13, 14, 9, 22, + 17, 12, 14, 11, 22, 16, 8, 22, 19, 13, 10, 14, 0, + 64, 69, 4, 70, 19, 32, 20, 10, 29, 25, 11, 23, 31, + 19, 25, 13, 6, 20, 52, 49, 52, 52, 54, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 34, 62, 62, 62, 62, 62, + 62, 54, 37, 36, 6, 82, 75, 97, 125, 62, 62, 62, + 57, 55, 53, 41, 44, 31, 32, 22, 19, 16, 65, 71, 3, + 0, 65, 39, 43, 40, 31, 40, 39, 23, 31, 34, 21, 6, + 10, 2, 86, 23, 12, 4, 79, 71, 69, 70, 66, 68, 73, + 69, 70, 67, 1, 70, 66, 65, 0, 62, 62, 62, 62, 62, + 60, 54, 36, 4, 66, 28, 21, 18, 15, 7, 3, 1, 66, + 76, 85, 81, 77, 81, 80, 73, 74, 83, 71, 67, 2, 66, + 66, 4, 4, 62, 62, 62, 62, 61, 57, 46, 29, 1 }, + + { + + 62, + 9, 74, 62, 9, 74, 125, 102, 11, 10, 12, 29, 60, + 62, 54, 14, 115, 6, 77, 64, 1, 14, 72, 12, 65, + 20, 62, 68, 91, 104, 124, 102, 67, 77, 64, 1, + 85, 93, 3, 18, 68, 80, 95, 8, 67, 85, 88, 5, 75, + 93, 9, 69, 80, 88, 66, 73, 73, 79, 71, 5, 22, 0, + 0, 0, 82, 86, 97, 71, 22, 1, 18, 77, 95, 124, + 96, 99, 65, 80, 92, 82, 108, 89, 100, 92, 125, + 91, 88, 95, 107, 18, 64, 7, 92, 73, 91, 86, 124, + 9, 3, 4, 69, 16, 68, 68, 87, 84, 77, 74, 76, 9, + 13, 67, 13, 21, 80, 0, 69, 67, 6, 75, 28, 64, 2, + 28, 37, 39, 34, 27, 92, 72, 72, 17, 14, 99, 10, + 10, 10, 3, 7, 7, 1, 4, 2, 6, 68, 6, 24, 12, 68, + 64, 2, 0, 13, 23, 19, 11, 15, 4, 5, 4, 29, 19, + 20, 77, 3, 69, 35, 23, 19, 14, 17, 19, 12, 16, + 24, 1, 17, 9, 9, 5, 0, 12, 6, 10, 11, 8, 18, 27, + 10, 81, 8, 77, 17, 31, 83, 55, 62, 59, 58, 61, + 62, 56, 56, 52, 43, 35, 32, 41, 28, 71, 67, 4, + 67, 32, 30, 27, 33, 31, 24, 19, 21, 0, 4, 64, + 93, 88, 107, 75, 20, 18, 11, 0, 5, 69, 74, 81, + 100, 76, 39, 21, 15, 8, 5, 70, 82, 86, 117, 5, + 35, 28, 25, 20, 9, 5, 64, 73, 89, 70, 35, 32, + 21, 4, 6, 71, 80, 96, 5, 58, 49, 45, 36, 23, 5, + 69, 78, 84, 62, 105, 105, 86, 112, 108, 97, 108, + 104, 101, 105, 106, 110, 95, 94, 90, 92, 92, 85, + 67, 79, 84, 69, 3, 5, 2, 13, 13, 13, 8, 22, 17, + 13, 14, 11, 22, 16, 8, 22, 19, 13, 10, 14, 0, + 64, 68, 5, 70, 19, 32, 20, 10, 29, 25, 12, 23, + 30, 19, 25, 13, 6, 19, 52, 49, 52, 51, 53, 62, + 62, 62, 62, 62, 62, 62, 62, 62, 33, 62, 62, 62, + 62, 62, 62, 53, 36, 35, 6, 81, 74, 95, 122, 62, + 62, 62, 56, 53, 52, 40, 42, 30, 31, 21, 18, 15, + 66, 71, 3, 0, 66, 38, 42, 39, 30, 39, 38, 22, + 30, 33, 20, 5, 9, 1, 86, 23, 12, 4, 78, 70, 68, + 69, 65, 67, 71, 68, 69, 66, 3, 68, 65, 0, 2, 62, + 62, 62, 62, 62, 58, 51, 34, 2, 65, 29, 22, 19, + 16, 8, 4, 2, 65, 75, 84, 80, 76, 80, 78, 71, 73, + 82, 70, 66, 3, 65, 65, 4, 4, 62, 62, 62, 62, 58, + 54, 43, 26, 64 }, + + { + + 62, + 9, 74, 62, 9, 74, 123, 101, 11, 10, 12, 28, 59, + 61, 54, 14, 113, 6, 76, 0, 1, 13, 72, 11, 66, + 19, 60, 70, 92, 105, 121, 101, 67, 76, 0, 1, 85, + 92, 3, 17, 68, 80, 94, 8, 67, 85, 88, 5, 75, 92, + 9, 69, 80, 88, 66, 73, 73, 79, 71, 5, 22, 0, 0, + 0, 81, 86, 97, 71, 21, 1, 18, 77, 95, 122, 94, + 97, 64, 78, 91, 81, 107, 88, 99, 91, 123, 91, + 88, 95, 106, 18, 64, 7, 91, 73, 90, 86, 123, 9, + 3, 4, 69, 16, 68, 68, 87, 84, 77, 74, 76, 9, 13, + 67, 13, 21, 80, 0, 69, 67, 6, 75, 27, 64, 2, 27, + 36, 38, 33, 26, 91, 72, 72, 16, 13, 99, 9, 10, + 10, 3, 7, 7, 2, 4, 2, 6, 68, 6, 23, 12, 69, 64, + 2, 64, 13, 22, 19, 11, 14, 4, 5, 4, 28, 19, 19, + 77, 3, 70, 34, 23, 19, 14, 17, 19, 12, 16, 24, + 1, 17, 9, 9, 5, 0, 12, 6, 10, 11, 8, 17, 26, 9, + 81, 8, 77, 16, 30, 83, 53, 62, 57, 56, 59, 60, + 54, 54, 50, 41, 33, 30, 39, 26, 72, 67, 4, 68, + 31, 29, 26, 32, 29, 23, 18, 20, 64, 3, 65, 93, + 88, 106, 75, 20, 18, 11, 0, 5, 69, 74, 81, 99, + 75, 39, 21, 15, 8, 5, 70, 81, 85, 115, 5, 35, + 28, 25, 20, 9, 5, 64, 73, 88, 70, 35, 32, 21, 4, + 6, 71, 80, 95, 5, 57, 48, 44, 35, 23, 5, 69, 78, + 84, 62, 104, 104, 85, 111, 107, 96, 107, 103, + 100, 104, 105, 108, 94, 93, 90, 91, 91, 85, 68, + 79, 83, 69, 3, 4, 2, 12, 12, 12, 7, 21, 17, 13, + 14, 10, 21, 16, 8, 21, 18, 13, 10, 13, 0, 64, + 68, 5, 70, 18, 31, 19, 10, 28, 24, 12, 22, 29, + 19, 25, 12, 5, 17, 51, 48, 51, 50, 52, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 32, 62, 62, 62, 62, + 62, 62, 51, 35, 34, 6, 80, 74, 94, 120, 60, 60, + 62, 54, 51, 50, 38, 40, 29, 29, 20, 16, 14, 67, + 72, 2, 0, 67, 37, 41, 37, 28, 37, 36, 21, 28, + 31, 19, 4, 8, 0, 87, 22, 11, 3, 78, 70, 68, 68, + 65, 66, 70, 67, 68, 65, 4, 67, 64, 1, 3, 62, 62, + 62, 62, 60, 55, 48, 31, 0, 65, 29, 22, 19, 16, + 9, 4, 2, 65, 75, 84, 80, 75, 80, 77, 70, 73, 81, + 69, 65, 3, 65, 64, 4, 4, 62, 62, 62, 60, 55, 50, + 39, 23, 67 }, + + { + + 62, + 9, 74, 62, 9, 74, 121, 99, 12, 10, 11, 26, 57, + 60, 54, 14, 111, 6, 75, 1, 1, 12, 72, 10, 67, + 19, 58, 71, 93, 105, 118, 100, 67, 75, 1, 1, 84, + 91, 4, 17, 68, 79, 93, 7, 68, 85, 88, 5, 75, 92, + 9, 69, 80, 88, 65, 73, 73, 79, 70, 5, 22, 0, 0, + 0, 81, 86, 97, 70, 20, 1, 18, 77, 95, 120, 92, + 96, 1, 76, 90, 80, 105, 87, 98, 90, 121, 90, 88, + 94, 105, 18, 64, 7, 91, 73, 90, 85, 121, 9, 2, + 3, 70, 16, 68, 68, 86, 84, 76, 74, 75, 9, 13, + 67, 13, 20, 80, 0, 69, 67, 6, 75, 26, 64, 2, 26, + 35, 37, 32, 25, 91, 71, 72, 15, 13, 98, 9, 10, + 10, 3, 7, 7, 3, 4, 2, 6, 67, 6, 22, 12, 70, 64, + 2, 64, 12, 21, 19, 11, 13, 4, 5, 4, 26, 19, 18, + 77, 3, 70, 33, 23, 19, 14, 17, 19, 12, 16, 24, + 1, 16, 9, 9, 5, 0, 11, 5, 9, 10, 7, 16, 25, 9, + 81, 7, 77, 15, 28, 83, 52, 62, 55, 54, 57, 58, + 52, 52, 48, 39, 32, 29, 37, 24, 73, 67, 4, 68, + 30, 28, 25, 30, 28, 21, 17, 19, 65, 3, 65, 93, + 88, 106, 74, 20, 18, 11, 0, 5, 69, 74, 80, 98, + 75, 39, 21, 15, 8, 6, 69, 80, 84, 113, 5, 35, + 28, 25, 20, 10, 5, 64, 73, 88, 70, 35, 32, 20, + 4, 6, 71, 80, 94, 5, 57, 48, 43, 34, 23, 5, 69, + 77, 83, 62, 103, 103, 85, 110, 106, 95, 105, + 102, 99, 103, 103, 107, 94, 92, 90, 91, 89, 85, + 68, 79, 83, 69, 2, 4, 2, 11, 11, 11, 6, 21, 16, + 13, 13, 10, 21, 15, 8, 20, 18, 12, 10, 12, 0, + 65, 68, 5, 71, 18, 31, 18, 10, 27, 24, 12, 21, + 28, 18, 24, 11, 5, 16, 50, 47, 51, 49, 51, 61, + 62, 62, 62, 62, 62, 62, 62, 62, 31, 62, 62, 62, + 62, 62, 62, 49, 34, 33, 6, 79, 74, 93, 118, 58, + 58, 62, 52, 49, 48, 37, 38, 27, 28, 19, 15, 12, + 68, 73, 2, 64, 68, 36, 39, 36, 26, 35, 34, 19, + 27, 29, 17, 3, 6, 65, 88, 21, 10, 2, 78, 69, 68, + 68, 64, 66, 69, 66, 67, 64, 5, 66, 0, 3, 4, 62, + 62, 62, 62, 58, 52, 45, 28, 65, 64, 30, 23, 20, + 16, 10, 5, 2, 64, 74, 84, 79, 75, 79, 76, 69, + 73, 81, 69, 65, 3, 64, 0, 4, 4, 62, 62, 62, 57, + 52, 46, 35, 19, 69 }, + + { + + 62, + 9, 74, 62, 9, 74, 120, 98, 12, 10, 11, 25, 56, + 58, 54, 14, 108, 5, 74, 1, 1, 11, 72, 9, 68, 18, + 56, 73, 94, 106, 115, 99, 67, 74, 1, 1, 84, 90, + 4, 16, 68, 79, 93, 7, 68, 84, 88, 5, 75, 91, 8, + 70, 80, 88, 65, 72, 73, 78, 70, 5, 22, 0, 0, 0, + 80, 87, 97, 70, 19, 1, 18, 77, 95, 119, 91, 94, + 2, 75, 89, 79, 104, 85, 97, 89, 119, 90, 87, 94, + 104, 18, 64, 7, 90, 73, 89, 85, 120, 8, 2, 3, + 70, 16, 68, 68, 86, 84, 76, 74, 75, 9, 12, 67, + 13, 20, 80, 0, 69, 67, 6, 75, 26, 65, 2, 26, 34, + 36, 31, 24, 90, 71, 72, 14, 12, 98, 8, 10, 9, 3, + 7, 7, 4, 5, 2, 5, 67, 5, 21, 11, 71, 64, 2, 65, + 12, 20, 18, 10, 13, 5, 5, 4, 25, 18, 17, 77, 3, + 71, 33, 23, 19, 14, 17, 19, 12, 16, 23, 1, 16, + 9, 9, 5, 64, 11, 5, 9, 10, 7, 16, 24, 8, 81, 7, + 77, 14, 27, 83, 50, 62, 53, 52, 55, 56, 50, 50, + 46, 37, 30, 27, 34, 22, 74, 67, 3, 69, 29, 27, + 24, 29, 26, 20, 16, 17, 65, 2, 66, 93, 88, 105, + 74, 20, 18, 11, 0, 5, 69, 74, 80, 97, 74, 39, + 21, 15, 8, 6, 69, 80, 84, 111, 5, 35, 28, 25, + 20, 10, 5, 64, 73, 87, 70, 35, 31, 20, 4, 6, 71, + 80, 94, 5, 56, 47, 42, 33, 23, 5, 69, 77, 83, + 62, 102, 102, 84, 108, 105, 94, 104, 100, 98, + 101, 102, 105, 93, 92, 89, 90, 88, 84, 69, 79, + 82, 69, 2, 3, 1, 10, 10, 10, 5, 20, 16, 13, 13, + 9, 20, 15, 8, 19, 17, 12, 9, 11, 64, 65, 68, 5, + 71, 17, 30, 17, 10, 26, 23, 12, 20, 27, 18, 24, + 10, 4, 14, 49, 47, 50, 48, 49, 60, 62, 62, 62, + 62, 62, 62, 62, 62, 29, 62, 62, 62, 62, 62, 62, + 47, 33, 31, 6, 78, 73, 92, 116, 57, 56, 60, 51, + 47, 46, 35, 36, 26, 26, 17, 13, 11, 69, 74, 1, + 64, 69, 34, 38, 34, 25, 33, 32, 18, 25, 27, 16, + 2, 5, 66, 88, 20, 10, 1, 78, 69, 67, 67, 64, 65, + 68, 66, 66, 0, 6, 65, 1, 4, 5, 62, 62, 62, 61, + 55, 49, 42, 25, 68, 64, 30, 23, 20, 17, 10, 5, + 3, 64, 74, 83, 79, 74, 79, 75, 68, 73, 80, 68, + 64, 3, 64, 1, 4, 4, 62, 62, 61, 54, 49, 42, 31, + 16, 72 }, + + { + + 62, + 9, 74, 62, 9, 74, 118, 96, 12, 10, 10, 23, 54, + 57, 54, 14, 106, 5, 73, 2, 1, 11, 71, 8, 69, 18, + 54, 75, 95, 106, 112, 97, 67, 73, 2, 1, 84, 89, + 4, 16, 68, 79, 92, 7, 69, 84, 88, 5, 75, 90, 8, + 70, 80, 88, 64, 72, 72, 78, 69, 5, 22, 0, 0, 0, + 80, 87, 97, 69, 18, 1, 18, 76, 95, 117, 89, 93, + 4, 73, 87, 78, 103, 84, 96, 88, 117, 89, 87, 93, + 103, 18, 64, 7, 90, 73, 89, 84, 118, 8, 2, 3, + 70, 16, 68, 67, 85, 84, 76, 74, 74, 9, 12, 67, + 13, 20, 79, 0, 68, 67, 6, 75, 25, 65, 2, 25, 33, + 36, 30, 23, 89, 70, 72, 13, 12, 97, 8, 10, 9, 3, + 7, 7, 5, 5, 2, 5, 67, 5, 20, 11, 72, 64, 2, 65, + 11, 19, 18, 10, 12, 5, 5, 4, 24, 18, 16, 77, 3, + 71, 32, 23, 19, 14, 17, 19, 12, 16, 23, 1, 16, + 9, 9, 5, 64, 11, 5, 8, 10, 7, 15, 23, 8, 81, 6, + 77, 13, 26, 83, 49, 61, 52, 51, 53, 54, 48, 48, + 44, 35, 28, 25, 32, 21, 75, 67, 3, 69, 28, 26, + 23, 28, 25, 18, 15, 16, 66, 2, 66, 93, 88, 105, + 74, 20, 18, 11, 0, 5, 68, 73, 79, 96, 74, 39, + 21, 15, 8, 6, 68, 79, 83, 109, 5, 35, 28, 25, + 20, 10, 5, 64, 73, 86, 70, 36, 31, 19, 4, 6, 71, + 80, 93, 5, 56, 46, 41, 32, 23, 5, 69, 77, 82, + 62, 101, 101, 83, 107, 104, 93, 103, 99, 97, + 100, 100, 103, 92, 91, 89, 90, 87, 84, 69, 78, + 81, 69, 1, 3, 1, 10, 9, 9, 4, 19, 15, 13, 12, 9, + 20, 15, 8, 18, 16, 12, 9, 10, 64, 65, 68, 5, 71, + 16, 30, 17, 10, 25, 22, 12, 19, 26, 17, 23, 9, + 3, 12, 48, 46, 50, 47, 48, 58, 62, 62, 62, 62, + 62, 62, 62, 62, 28, 62, 62, 62, 62, 62, 61, 45, + 32, 30, 6, 77, 73, 91, 114, 55, 55, 58, 49, 45, + 44, 34, 34, 25, 24, 16, 11, 9, 70, 75, 1, 64, + 70, 33, 36, 32, 23, 32, 31, 16, 24, 26, 14, 1, + 4, 67, 89, 20, 9, 0, 77, 68, 67, 67, 0, 64, 67, + 65, 65, 1, 8, 64, 2, 5, 7, 62, 62, 62, 58, 53, + 46, 39, 22, 70, 64, 31, 24, 21, 17, 11, 5, 3, 0, + 73, 83, 79, 73, 78, 74, 67, 72, 79, 68, 64, 3, + 0, 2, 4, 4, 62, 62, 58, 51, 46, 39, 27, 12, 75 }, + + { + + 62, + 9, 75, 62, 9, 75, 116, 95, 13, 10, 10, 22, 53, + 56, 54, 14, 104, 5, 73, 3, 1, 10, 71, 7, 70, 17, + 53, 76, 96, 107, 109, 96, 67, 73, 3, 1, 83, 88, + 5, 15, 67, 78, 91, 6, 69, 84, 88, 5, 74, 90, 8, + 70, 79, 88, 64, 72, 72, 78, 69, 5, 22, 0, 0, 0, + 79, 87, 97, 69, 18, 0, 18, 76, 94, 115, 87, 91, + 5, 71, 86, 77, 101, 83, 95, 88, 116, 89, 87, 93, + 103, 19, 64, 7, 89, 72, 88, 84, 117, 8, 1, 2, + 71, 16, 68, 67, 85, 84, 75, 74, 74, 9, 12, 66, + 13, 19, 79, 0, 68, 67, 6, 75, 24, 65, 2, 24, 32, + 35, 30, 23, 89, 70, 72, 13, 11, 97, 7, 10, 9, 3, + 7, 7, 5, 5, 2, 5, 66, 5, 19, 11, 72, 65, 2, 66, + 11, 18, 18, 10, 11, 5, 5, 4, 22, 18, 15, 77, 3, + 72, 31, 23, 18, 14, 17, 19, 12, 16, 23, 1, 15, + 9, 8, 5, 64, 10, 4, 8, 9, 6, 14, 22, 7, 81, 6, + 76, 12, 24, 83, 47, 59, 50, 49, 51, 52, 46, 46, + 42, 33, 27, 24, 30, 19, 76, 67, 3, 70, 27, 25, + 22, 26, 23, 17, 14, 15, 67, 1, 67, 93, 88, 104, + 73, 20, 18, 11, 1, 5, 68, 73, 79, 95, 73, 38, + 21, 15, 8, 7, 68, 78, 82, 107, 5, 36, 28, 25, + 20, 11, 5, 64, 72, 86, 70, 36, 31, 19, 4, 6, 70, + 79, 92, 5, 55, 46, 40, 32, 23, 5, 68, 76, 82, + 62, 101, 100, 83, 106, 103, 92, 101, 98, 96, 99, + 99, 102, 92, 90, 89, 89, 85, 84, 70, 78, 81, 69, + 1, 2, 1, 9, 8, 8, 3, 19, 15, 13, 12, 8, 19, 14, + 8, 18, 16, 11, 9, 10, 64, 66, 68, 5, 72, 16, 29, + 16, 9, 24, 22, 13, 19, 25, 17, 23, 9, 3, 11, 47, + 45, 49, 46, 47, 57, 62, 62, 62, 62, 62, 62, 62, + 61, 27, 62, 62, 62, 62, 62, 59, 43, 31, 29, 6, + 76, 73, 89, 111, 53, 53, 56, 47, 43, 42, 32, 32, + 23, 23, 15, 10, 8, 71, 76, 0, 65, 71, 32, 35, + 31, 21, 30, 29, 15, 22, 24, 13, 64, 2, 69, 90, + 19, 8, 64, 77, 68, 67, 66, 0, 64, 65, 64, 64, 2, + 9, 1, 3, 7, 8, 62, 62, 60, 56, 50, 44, 36, 20, + 72, 0, 31, 24, 21, 17, 12, 6, 3, 0, 73, 83, 78, + 73, 78, 73, 66, 72, 79, 67, 0, 3, 0, 3, 4, 4, + 62, 62, 56, 48, 42, 35, 24, 9, 77 }, + + { + + 62, + 9, 75, 62, 9, 75, 114, 93, 13, 10, 9, 20, 51, + 54, 54, 14, 101, 4, 72, 3, 1, 9, 71, 6, 71, 17, + 51, 78, 97, 107, 106, 95, 67, 72, 3, 1, 83, 87, + 5, 15, 67, 78, 91, 6, 70, 83, 88, 5, 74, 89, 7, + 70, 79, 88, 0, 71, 72, 77, 68, 5, 22, 0, 0, 0, + 79, 87, 97, 68, 17, 0, 18, 76, 94, 114, 85, 90, + 7, 69, 85, 76, 100, 81, 94, 87, 114, 88, 86, 92, + 102, 19, 64, 7, 89, 72, 88, 83, 115, 7, 1, 2, + 71, 16, 68, 67, 84, 84, 75, 74, 73, 9, 11, 66, + 13, 19, 79, 0, 68, 67, 6, 75, 24, 65, 2, 24, 31, + 34, 29, 22, 88, 69, 72, 12, 11, 96, 7, 10, 8, 3, + 7, 7, 6, 6, 2, 5, 66, 5, 18, 11, 73, 65, 2, 66, + 10, 17, 17, 10, 11, 6, 5, 4, 21, 17, 14, 77, 3, + 72, 31, 23, 18, 14, 17, 19, 12, 16, 23, 1, 15, + 9, 8, 5, 64, 10, 4, 7, 9, 6, 14, 21, 7, 81, 5, + 76, 11, 23, 83, 46, 57, 48, 47, 49, 50, 44, 44, + 40, 31, 25, 22, 27, 17, 77, 67, 2, 70, 26, 24, + 21, 25, 22, 15, 13, 14, 67, 1, 67, 93, 88, 104, + 73, 20, 18, 11, 1, 5, 68, 73, 78, 94, 73, 38, + 21, 15, 8, 7, 67, 77, 82, 105, 5, 36, 28, 25, + 20, 11, 5, 64, 72, 85, 70, 36, 30, 18, 4, 6, 70, + 79, 92, 5, 55, 45, 39, 31, 23, 5, 68, 76, 81, + 62, 100, 99, 82, 104, 102, 91, 100, 96, 95, 97, + 97, 100, 91, 89, 88, 89, 84, 83, 70, 78, 80, 69, + 0, 2, 0, 8, 7, 7, 2, 18, 14, 13, 11, 8, 19, 14, + 8, 17, 15, 11, 8, 9, 64, 66, 68, 5, 72, 15, 29, + 15, 9, 23, 21, 13, 18, 24, 16, 22, 8, 2, 9, 46, + 45, 49, 45, 45, 55, 62, 62, 62, 62, 62, 62, 62, + 59, 25, 62, 62, 62, 62, 62, 56, 41, 30, 28, 6, + 75, 72, 88, 109, 52, 51, 54, 46, 41, 40, 31, 30, + 22, 21, 13, 8, 6, 72, 77, 0, 65, 72, 30, 33, 29, + 20, 28, 27, 13, 21, 22, 11, 65, 1, 70, 90, 18, + 8, 65, 77, 67, 66, 66, 1, 0, 64, 0, 0, 3, 10, 2, + 4, 8, 9, 62, 61, 58, 53, 48, 41, 33, 17, 74, 0, + 32, 25, 22, 18, 13, 6, 4, 1, 72, 82, 78, 72, 77, + 72, 65, 72, 78, 67, 0, 3, 1, 4, 4, 4, 62, 62, + 53, 45, 39, 31, 20, 5, 80 }, + + { + + 62, + 8, 75, 62, 8, 75, 113, 92, 13, 10, 9, 19, 50, + 53, 54, 14, 99, 4, 71, 4, 1, 8, 71, 5, 73, 16, + 49, 80, 98, 108, 104, 94, 67, 71, 4, 1, 83, 86, + 5, 14, 67, 78, 90, 5, 70, 83, 89, 5, 74, 89, 7, + 71, 79, 88, 0, 71, 72, 77, 68, 5, 22, 0, 0, 0, + 78, 88, 97, 68, 16, 0, 18, 76, 94, 112, 84, 88, + 8, 68, 84, 75, 99, 80, 93, 86, 112, 88, 86, 92, + 101, 19, 64, 7, 88, 72, 87, 83, 114, 7, 0, 1, + 72, 16, 68, 67, 84, 84, 75, 74, 73, 8, 11, 66, + 13, 18, 79, 0, 68, 67, 5, 75, 23, 66, 2, 23, 29, + 33, 28, 21, 88, 69, 72, 11, 10, 96, 6, 9, 8, 3, + 7, 7, 7, 6, 2, 4, 66, 4, 17, 10, 74, 65, 2, 67, + 10, 16, 17, 9, 10, 6, 5, 4, 19, 17, 13, 77, 3, + 73, 30, 22, 18, 14, 17, 18, 11, 16, 22, 0, 14, + 9, 8, 4, 65, 9, 3, 7, 8, 5, 13, 20, 6, 81, 5, + 76, 10, 21, 83, 44, 55, 46, 45, 47, 47, 42, 42, + 38, 29, 23, 20, 25, 15, 78, 67, 2, 71, 25, 22, + 19, 23, 20, 14, 11, 12, 68, 0, 68, 93, 88, 103, + 73, 20, 18, 11, 1, 5, 68, 73, 78, 93, 72, 38, + 21, 15, 8, 7, 67, 77, 81, 104, 5, 36, 28, 25, + 19, 11, 5, 64, 72, 85, 70, 36, 30, 18, 4, 6, 70, + 79, 91, 5, 54, 44, 38, 30, 22, 5, 68, 76, 81, + 62, 99, 98, 82, 103, 101, 91, 99, 95, 94, 96, + 96, 99, 91, 89, 88, 88, 83, 83, 71, 78, 80, 69, + 0, 1, 0, 7, 6, 5, 1, 17, 14, 13, 11, 7, 18, 13, + 7, 16, 14, 10, 8, 8, 65, 67, 68, 5, 73, 14, 28, + 14, 9, 22, 20, 13, 17, 23, 16, 22, 7, 1, 7, 45, + 44, 48, 43, 44, 54, 62, 62, 62, 62, 62, 62, 62, + 56, 24, 62, 62, 62, 62, 61, 54, 39, 28, 26, 6, + 75, 72, 87, 107, 50, 49, 52, 44, 38, 38, 29, 28, + 20, 19, 12, 6, 5, 73, 78, 64, 66, 73, 29, 32, + 27, 18, 26, 25, 12, 19, 20, 10, 66, 64, 72, 91, + 17, 7, 66, 77, 67, 66, 65, 1, 0, 0, 0, 1, 4, 11, + 3, 5, 9, 10, 61, 59, 56, 51, 45, 38, 30, 14, 77, + 0, 32, 25, 22, 18, 13, 6, 4, 1, 72, 82, 78, 72, + 77, 71, 64, 72, 78, 66, 1, 3, 1, 4, 4, 3, 62, + 61, 51, 42, 36, 27, 16, 2, 83 }, + + { + + 62, + 8, 75, 62, 8, 75, 111, 91, 14, 10, 9, 18, 49, + 52, 54, 14, 97, 4, 70, 5, 1, 8, 70, 4, 74, 15, + 47, 81, 99, 109, 101, 92, 67, 70, 5, 1, 82, 85, + 6, 13, 67, 77, 89, 5, 70, 83, 89, 5, 74, 88, 7, + 71, 79, 88, 0, 71, 71, 77, 68, 5, 22, 0, 0, 0, + 77, 88, 97, 68, 15, 0, 18, 75, 94, 110, 82, 86, + 9, 66, 82, 74, 97, 79, 91, 85, 110, 88, 86, 92, + 100, 19, 64, 7, 87, 72, 86, 82, 113, 7, 0, 1, + 72, 16, 68, 66, 83, 83, 74, 74, 73, 8, 11, 66, + 13, 18, 78, 0, 67, 67, 5, 74, 22, 66, 2, 22, 28, + 33, 27, 20, 87, 69, 71, 10, 9, 96, 5, 9, 8, 4, + 7, 7, 8, 6, 2, 4, 65, 4, 17, 10, 75, 65, 2, 68, + 10, 15, 17, 9, 9, 6, 5, 4, 18, 17, 13, 77, 3, + 74, 29, 22, 18, 14, 17, 18, 11, 16, 22, 0, 14, + 9, 8, 4, 65, 9, 3, 7, 8, 5, 12, 20, 6, 81, 5, + 76, 9, 20, 83, 42, 54, 45, 44, 45, 45, 41, 41, + 36, 27, 22, 19, 23, 14, 79, 67, 2, 72, 24, 21, + 18, 22, 19, 13, 10, 11, 69, 64, 69, 93, 87, 102, + 72, 21, 18, 11, 1, 6, 67, 72, 77, 92, 71, 38, + 21, 15, 8, 8, 67, 76, 80, 102, 5, 36, 28, 25, + 19, 12, 5, 64, 72, 84, 70, 37, 30, 18, 4, 7, 70, + 79, 90, 5, 54, 44, 38, 29, 22, 5, 68, 75, 80, + 62, 98, 97, 81, 102, 99, 90, 97, 94, 92, 95, 95, + 97, 90, 88, 88, 87, 81, 83, 72, 77, 79, 69, 0, + 0, 0, 7, 5, 4, 0, 17, 14, 13, 11, 7, 17, 13, 7, + 15, 14, 10, 8, 7, 65, 67, 67, 6, 73, 14, 27, 14, + 9, 22, 20, 13, 16, 22, 16, 22, 6, 1, 6, 45, 43, + 47, 42, 43, 53, 60, 60, 62, 62, 62, 62, 62, 54, + 23, 62, 62, 62, 62, 58, 52, 38, 27, 25, 6, 74, + 72, 86, 105, 48, 48, 50, 42, 36, 37, 28, 26, 19, + 18, 11, 5, 4, 74, 78, 64, 66, 74, 28, 31, 26, + 16, 25, 24, 11, 18, 19, 9, 67, 65, 73, 92, 17, + 6, 66, 76, 67, 66, 64, 2, 1, 1, 1, 2, 5, 13, 4, + 6, 11, 12, 60, 58, 54, 49, 42, 35, 27, 11, 79, + 1, 32, 25, 23, 18, 14, 7, 4, 2, 71, 82, 77, 71, + 77, 70, 1, 71, 77, 65, 2, 3, 2, 5, 4, 3, 62, 59, + 49, 40, 33, 24, 12, 64, 85 }, + + { + + 62, + 8, 75, 62, 8, 75, 109, 89, 14, 10, 8, 16, 47, + 50, 54, 14, 94, 3, 69, 5, 1, 7, 70, 3, 75, 15, + 45, 83, 100, 109, 98, 91, 67, 69, 5, 1, 82, 84, + 6, 13, 67, 77, 89, 5, 71, 82, 89, 5, 74, 87, 6, + 71, 79, 88, 1, 70, 71, 76, 67, 5, 22, 0, 0, 0, + 77, 88, 97, 67, 14, 0, 18, 75, 94, 109, 80, 85, + 11, 64, 81, 73, 96, 77, 90, 84, 108, 87, 85, 91, + 99, 19, 64, 7, 87, 72, 86, 82, 111, 6, 0, 1, 72, + 16, 68, 66, 83, 83, 74, 74, 72, 8, 10, 66, 13, + 18, 78, 0, 67, 67, 5, 74, 22, 66, 2, 22, 27, 32, + 26, 19, 86, 68, 71, 9, 9, 95, 5, 9, 7, 4, 7, 7, + 9, 7, 2, 4, 65, 4, 16, 10, 76, 65, 2, 68, 9, 14, + 16, 9, 9, 7, 5, 4, 17, 16, 12, 77, 3, 74, 29, + 22, 18, 14, 17, 18, 11, 16, 22, 0, 14, 9, 8, 4, + 65, 9, 3, 6, 8, 5, 12, 19, 5, 81, 4, 76, 8, 19, + 83, 41, 52, 43, 42, 43, 43, 39, 39, 34, 25, 20, + 17, 20, 12, 80, 67, 1, 72, 23, 20, 17, 21, 17, + 11, 9, 10, 69, 64, 69, 93, 87, 102, 72, 21, 18, + 11, 1, 6, 67, 72, 77, 91, 71, 38, 21, 15, 8, 8, + 66, 75, 80, 100, 5, 36, 28, 25, 19, 12, 5, 64, + 72, 83, 70, 37, 29, 17, 4, 7, 70, 79, 90, 5, 53, + 43, 37, 28, 22, 5, 68, 75, 80, 62, 97, 96, 80, + 100, 98, 89, 96, 92, 91, 93, 93, 95, 89, 87, 87, + 87, 80, 82, 72, 77, 78, 69, 64, 0, 64, 6, 4, 3, + 64, 16, 13, 13, 10, 6, 17, 13, 7, 14, 13, 10, 7, + 6, 65, 67, 67, 6, 73, 13, 27, 13, 9, 21, 19, 13, + 15, 21, 15, 21, 5, 0, 4, 44, 43, 47, 41, 41, 51, + 58, 58, 62, 62, 62, 62, 62, 52, 21, 59, 62, 59, + 62, 56, 49, 36, 26, 24, 6, 73, 71, 85, 103, 47, + 46, 48, 41, 34, 35, 26, 24, 18, 16, 9, 3, 2, 75, + 79, 65, 66, 75, 26, 29, 24, 15, 23, 22, 9, 16, + 17, 7, 68, 66, 74, 92, 16, 6, 67, 76, 66, 65, + 64, 2, 2, 2, 2, 3, 6, 14, 5, 7, 12, 13, 60, 56, + 52, 46, 40, 32, 24, 8, 81, 1, 33, 26, 23, 19, + 15, 7, 5, 2, 71, 81, 77, 70, 76, 69, 2, 71, 76, + 65, 2, 3, 2, 6, 4, 3, 62, 57, 46, 37, 30, 20, 8, + 68, 88 }, + + { + + 62, + 8, 76, 62, 8, 76, 107, 88, 15, 10, 8, 15, 46, + 49, 54, 14, 92, 3, 69, 6, 1, 6, 70, 2, 76, 14, + 44, 84, 101, 110, 95, 90, 67, 69, 6, 1, 81, 83, + 7, 12, 66, 76, 88, 4, 71, 82, 89, 5, 73, 87, 6, + 71, 78, 88, 1, 70, 71, 76, 67, 5, 22, 0, 0, 0, + 76, 88, 97, 67, 14, 64, 18, 75, 93, 107, 78, 83, + 12, 1, 80, 72, 94, 76, 89, 84, 107, 87, 85, 91, + 99, 20, 64, 7, 86, 71, 85, 81, 110, 6, 64, 0, + 73, 16, 68, 66, 82, 83, 73, 74, 72, 8, 10, 65, + 13, 17, 78, 0, 67, 67, 5, 74, 21, 66, 2, 21, 26, + 31, 26, 19, 86, 68, 71, 9, 8, 95, 4, 9, 7, 4, 7, + 7, 9, 7, 2, 4, 64, 4, 15, 10, 76, 66, 2, 69, 9, + 13, 16, 9, 8, 7, 5, 4, 15, 16, 11, 77, 3, 75, + 28, 22, 17, 14, 17, 18, 11, 16, 22, 0, 13, 9, 7, + 4, 65, 8, 2, 6, 7, 4, 11, 18, 5, 81, 4, 75, 7, + 17, 83, 39, 50, 41, 40, 41, 41, 37, 37, 32, 23, + 19, 16, 18, 10, 81, 67, 1, 73, 22, 19, 16, 19, + 16, 10, 8, 9, 70, 65, 70, 93, 87, 101, 71, 21, + 18, 11, 2, 6, 67, 72, 76, 90, 70, 37, 21, 15, 8, + 9, 66, 74, 79, 98, 5, 37, 28, 25, 19, 13, 5, 64, + 71, 83, 70, 37, 29, 17, 4, 7, 69, 78, 89, 5, 53, + 43, 36, 28, 22, 5, 67, 74, 79, 62, 97, 95, 80, + 99, 97, 88, 94, 91, 90, 92, 92, 94, 89, 86, 87, + 86, 78, 82, 73, 77, 78, 69, 64, 64, 64, 5, 3, 2, + 65, 16, 13, 13, 10, 6, 16, 12, 7, 14, 13, 9, 7, + 6, 65, 68, 67, 6, 74, 13, 26, 12, 8, 20, 19, 14, + 15, 20, 15, 21, 5, 0, 3, 43, 42, 46, 40, 40, 50, + 56, 56, 61, 60, 62, 62, 60, 49, 20, 57, 62, 56, + 62, 53, 47, 34, 25, 23, 6, 72, 71, 83, 100, 45, + 44, 46, 39, 32, 33, 25, 22, 16, 15, 8, 2, 1, 76, + 80, 65, 67, 76, 25, 28, 23, 13, 21, 20, 8, 15, + 15, 6, 70, 68, 76, 93, 15, 5, 68, 76, 66, 65, 0, + 3, 2, 4, 3, 4, 7, 15, 7, 8, 14, 14, 59, 55, 50, + 44, 37, 30, 21, 6, 83, 2, 33, 26, 24, 19, 16, 8, + 5, 3, 70, 81, 76, 70, 76, 68, 3, 71, 76, 64, 3, + 3, 3, 7, 4, 3, 62, 55, 44, 34, 26, 16, 5, 71, 90 }, + + { + + 62, + 8, 76, 62, 8, 76, 106, 86, 15, 10, 7, 13, 44, + 48, 54, 14, 90, 3, 68, 7, 1, 5, 70, 1, 77, 14, + 42, 86, 102, 110, 92, 89, 67, 68, 7, 1, 81, 82, + 7, 12, 66, 76, 87, 4, 72, 82, 89, 5, 73, 86, 6, + 72, 78, 88, 2, 70, 71, 76, 66, 5, 22, 0, 0, 0, + 76, 89, 97, 66, 13, 64, 18, 75, 93, 105, 77, 82, + 14, 2, 79, 71, 93, 75, 88, 83, 105, 86, 85, 90, + 98, 20, 64, 7, 86, 71, 85, 81, 108, 6, 64, 0, + 73, 16, 68, 66, 82, 83, 73, 74, 71, 8, 10, 65, + 13, 17, 78, 0, 67, 67, 5, 74, 20, 67, 2, 20, 25, + 30, 25, 18, 85, 67, 71, 8, 8, 94, 4, 9, 7, 4, 7, + 7, 10, 7, 2, 3, 64, 3, 14, 9, 77, 66, 2, 69, 8, + 12, 16, 8, 7, 7, 5, 4, 14, 16, 10, 77, 3, 75, + 27, 22, 17, 14, 17, 18, 11, 16, 21, 0, 13, 9, 7, + 4, 66, 8, 2, 5, 7, 4, 10, 17, 4, 81, 3, 75, 6, + 16, 83, 38, 48, 39, 38, 39, 39, 35, 35, 30, 21, + 17, 14, 16, 8, 82, 67, 1, 73, 21, 18, 15, 18, + 14, 8, 7, 7, 71, 65, 70, 93, 87, 101, 71, 21, + 18, 11, 2, 6, 67, 72, 76, 89, 70, 37, 21, 15, 8, + 9, 65, 74, 78, 96, 5, 37, 28, 25, 19, 13, 5, 64, + 71, 82, 70, 37, 29, 16, 4, 7, 69, 78, 88, 5, 52, + 42, 35, 27, 22, 5, 67, 74, 79, 62, 96, 94, 79, + 98, 96, 87, 93, 90, 89, 91, 90, 92, 88, 86, 87, + 86, 77, 82, 73, 77, 77, 69, 65, 64, 64, 4, 2, 1, + 66, 15, 12, 13, 9, 5, 16, 12, 7, 13, 12, 9, 7, + 5, 66, 68, 67, 6, 74, 12, 26, 11, 8, 19, 18, 14, + 14, 19, 14, 20, 4, 64, 1, 42, 41, 46, 39, 39, + 48, 54, 54, 59, 57, 62, 62, 57, 47, 19, 54, 62, + 53, 58, 50, 44, 32, 24, 21, 6, 71, 71, 82, 98, + 43, 42, 44, 37, 30, 31, 23, 20, 15, 13, 7, 0, + 64, 77, 81, 66, 67, 77, 24, 26, 21, 11, 19, 18, + 6, 13, 13, 4, 71, 69, 77, 94, 14, 4, 69, 76, 65, + 65, 0, 3, 3, 5, 3, 5, 8, 16, 8, 9, 15, 15, 59, + 53, 48, 41, 35, 27, 18, 3, 86, 2, 34, 27, 24, + 19, 16, 8, 5, 3, 70, 81, 76, 69, 75, 67, 4, 71, + 75, 64, 3, 3, 3, 8, 4, 3, 61, 53, 41, 31, 23, + 12, 1, 75, 93 }, + + { + + 62, + 8, 76, 62, 8, 76, 104, 85, 15, 10, 7, 12, 43, + 46, 54, 14, 87, 2, 67, 7, 1, 5, 69, 0, 78, 13, + 40, 88, 103, 111, 89, 87, 67, 67, 7, 1, 81, 81, + 7, 11, 66, 76, 87, 4, 72, 81, 89, 5, 73, 85, 5, + 72, 78, 88, 2, 69, 70, 75, 66, 5, 22, 0, 0, 0, + 75, 89, 97, 66, 12, 64, 18, 74, 93, 104, 75, 80, + 15, 4, 77, 70, 92, 73, 87, 82, 103, 86, 84, 90, + 97, 20, 64, 7, 85, 71, 84, 80, 107, 5, 64, 0, + 73, 16, 68, 65, 81, 83, 73, 74, 71, 8, 9, 65, + 13, 17, 77, 0, 66, 67, 5, 74, 20, 67, 2, 20, 24, + 30, 24, 17, 84, 67, 71, 7, 7, 94, 3, 9, 6, 4, 7, + 7, 11, 8, 2, 3, 64, 3, 13, 9, 78, 66, 2, 70, 8, + 11, 15, 8, 7, 8, 5, 4, 13, 15, 9, 77, 3, 76, 27, + 22, 17, 14, 17, 18, 11, 16, 21, 0, 13, 9, 7, 4, + 66, 8, 2, 5, 7, 4, 10, 16, 4, 81, 3, 75, 5, 15, + 83, 36, 46, 38, 37, 37, 37, 33, 33, 28, 19, 15, + 12, 13, 7, 83, 67, 0, 74, 20, 17, 14, 17, 13, 7, + 6, 6, 71, 66, 71, 93, 87, 100, 71, 21, 18, 11, + 2, 6, 66, 71, 75, 88, 69, 37, 21, 15, 8, 9, 65, + 73, 78, 94, 5, 37, 28, 25, 19, 13, 5, 64, 71, + 81, 70, 38, 28, 16, 4, 7, 69, 78, 88, 5, 52, 41, + 34, 26, 22, 5, 67, 74, 78, 62, 95, 93, 78, 96, + 95, 86, 92, 88, 88, 89, 89, 90, 87, 85, 86, 85, + 76, 81, 74, 76, 76, 69, 65, 65, 65, 4, 1, 0, 67, + 14, 12, 13, 9, 5, 15, 12, 7, 12, 11, 9, 6, 4, + 66, 68, 67, 6, 74, 11, 25, 11, 8, 18, 17, 14, + 13, 18, 14, 20, 3, 65, 64, 41, 41, 45, 38, 37, + 47, 52, 52, 57, 55, 62, 61, 54, 45, 17, 51, 62, + 50, 54, 48, 42, 30, 23, 20, 6, 70, 70, 81, 96, + 42, 41, 42, 36, 28, 29, 22, 18, 14, 11, 5, 65, + 65, 78, 82, 66, 67, 78, 22, 25, 19, 10, 18, 17, + 5, 12, 12, 3, 72, 70, 78, 94, 14, 4, 70, 75, 65, + 64, 1, 4, 4, 6, 4, 6, 9, 18, 9, 10, 16, 17, 58, + 51, 46, 39, 32, 24, 15, 0, 88, 2, 34, 27, 25, + 20, 17, 8, 6, 4, 69, 80, 76, 68, 75, 66, 5, 70, + 74, 0, 4, 3, 4, 9, 4, 3, 59, 51, 39, 28, 20, 9, + 66, 78, 96 }, + + { + + 61, + 8, 76, 61, 8, 76, 102, 83, 16, 10, 6, 10, 41, + 45, 54, 14, 85, 2, 66, 8, 1, 4, 69, 64, 79, 13, + 38, 89, 104, 111, 86, 86, 67, 66, 8, 1, 80, 80, + 8, 11, 66, 75, 86, 3, 73, 81, 89, 5, 73, 85, 5, + 72, 78, 88, 3, 69, 70, 75, 65, 5, 22, 0, 0, 0, + 75, 89, 97, 65, 11, 64, 18, 74, 93, 102, 73, 79, + 17, 6, 76, 69, 90, 72, 86, 81, 101, 85, 84, 89, + 96, 20, 64, 7, 85, 71, 84, 80, 105, 5, 65, 64, + 74, 16, 68, 65, 81, 83, 72, 74, 70, 8, 9, 65, + 13, 16, 77, 0, 66, 67, 5, 74, 19, 67, 2, 19, 23, + 29, 23, 16, 84, 66, 71, 6, 7, 93, 3, 9, 6, 4, 7, + 7, 12, 8, 2, 3, 0, 3, 12, 9, 79, 66, 2, 70, 7, + 10, 15, 8, 6, 8, 5, 4, 11, 15, 8, 77, 3, 76, 26, + 22, 17, 14, 17, 18, 11, 16, 21, 0, 12, 9, 7, 4, + 66, 7, 1, 4, 6, 3, 9, 15, 3, 81, 2, 75, 4, 13, + 83, 35, 44, 36, 35, 35, 35, 31, 31, 26, 17, 14, + 11, 11, 5, 84, 67, 0, 74, 19, 16, 13, 15, 11, 5, + 5, 5, 72, 66, 71, 93, 87, 100, 70, 21, 18, 11, + 2, 6, 66, 71, 75, 87, 69, 37, 21, 15, 8, 10, 64, + 72, 77, 92, 5, 37, 28, 25, 19, 14, 5, 64, 71, + 81, 70, 38, 28, 15, 4, 7, 69, 78, 87, 5, 51, 41, + 33, 25, 22, 5, 67, 73, 78, 62, 94, 92, 78, 95, + 94, 85, 90, 87, 87, 88, 87, 89, 87, 84, 86, 85, + 74, 81, 74, 76, 76, 69, 66, 65, 65, 3, 0, 64, + 68, 14, 11, 13, 8, 4, 15, 11, 7, 11, 11, 8, 6, + 3, 66, 69, 67, 6, 75, 11, 25, 10, 8, 17, 17, 14, + 12, 17, 13, 19, 2, 65, 65, 40, 40, 45, 37, 36, + 45, 50, 50, 55, 52, 60, 59, 51, 42, 16, 48, 62, + 47, 50, 45, 39, 28, 22, 19, 6, 69, 70, 80, 94, + 40, 39, 40, 34, 26, 27, 20, 16, 12, 10, 4, 66, + 67, 79, 83, 67, 68, 79, 21, 23, 18, 8, 16, 15, + 3, 10, 10, 1, 73, 72, 80, 95, 13, 3, 71, 75, 64, + 64, 1, 4, 4, 7, 5, 7, 10, 19, 10, 11, 18, 18, + 58, 50, 44, 36, 30, 21, 12, 66, 90, 3, 35, 28, + 25, 20, 18, 9, 6, 4, 69, 80, 75, 68, 74, 65, 6, + 70, 74, 0, 4, 3, 4, 10, 4, 3, 58, 49, 36, 25, + 17, 5, 70, 82, 98 }, + + { + + 60, + 8, 76, 60, 8, 76, 100, 82, 16, 10, 6, 9, 40, 44, + 54, 14, 83, 2, 65, 9, 1, 3, 69, 65, 80, 12, 36, + 91, 105, 112, 83, 85, 67, 65, 9, 1, 80, 79, 8, + 10, 66, 75, 85, 3, 73, 81, 89, 5, 73, 84, 5, 72, + 78, 88, 3, 69, 70, 75, 65, 5, 22, 0, 0, 0, 74, + 89, 97, 65, 10, 64, 18, 74, 93, 100, 71, 77, 18, + 8, 75, 68, 89, 71, 85, 80, 99, 85, 84, 89, 95, + 20, 64, 7, 84, 71, 83, 79, 104, 5, 65, 64, 74, + 16, 68, 65, 80, 83, 72, 74, 70, 8, 9, 65, 13, + 16, 77, 0, 66, 67, 5, 74, 18, 67, 2, 18, 22, 28, + 22, 15, 83, 66, 71, 5, 6, 93, 2, 9, 6, 4, 7, 7, + 13, 8, 2, 3, 0, 3, 11, 9, 80, 66, 2, 71, 7, 9, + 15, 8, 5, 8, 5, 4, 10, 15, 7, 77, 3, 77, 25, 22, + 17, 14, 17, 18, 11, 16, 21, 0, 12, 9, 7, 4, 66, + 7, 1, 4, 6, 3, 8, 14, 3, 81, 2, 75, 3, 12, 83, + 33, 42, 34, 33, 33, 33, 29, 29, 24, 15, 12, 9, + 9, 3, 85, 67, 0, 75, 18, 15, 12, 14, 10, 4, 4, + 4, 73, 67, 72, 93, 87, 99, 70, 21, 18, 11, 2, 6, + 66, 71, 74, 86, 68, 37, 21, 15, 8, 10, 64, 71, + 76, 90, 5, 37, 28, 25, 19, 14, 5, 64, 71, 80, + 70, 38, 28, 15, 4, 7, 69, 78, 86, 5, 51, 40, 32, + 24, 22, 5, 67, 73, 77, 62, 93, 91, 77, 94, 93, + 84, 89, 86, 86, 87, 86, 87, 86, 83, 86, 84, 73, + 81, 75, 76, 75, 69, 66, 66, 65, 2, 64, 65, 69, + 13, 11, 13, 8, 4, 14, 11, 7, 10, 10, 8, 6, 2, + 66, 69, 67, 6, 75, 10, 24, 9, 8, 16, 16, 14, 11, + 16, 13, 19, 1, 66, 67, 39, 39, 44, 36, 35, 44, + 48, 48, 53, 50, 57, 56, 48, 40, 15, 45, 59, 44, + 46, 42, 37, 26, 21, 18, 6, 68, 70, 79, 92, 38, + 37, 38, 32, 24, 25, 19, 14, 11, 8, 3, 68, 68, + 80, 84, 67, 68, 80, 20, 22, 16, 6, 14, 13, 2, 9, + 8, 0, 74, 73, 81, 96, 12, 2, 72, 75, 64, 64, 2, + 5, 5, 8, 6, 8, 11, 20, 11, 12, 19, 19, 57, 48, + 42, 34, 27, 18, 9, 69, 92, 3, 35, 28, 26, 20, + 19, 9, 6, 5, 68, 80, 75, 67, 74, 64, 7, 70, 73, + 1, 5, 3, 5, 11, 4, 3, 57, 47, 34, 22, 14, 1, 74, + 85, 101 }, + + { + + 58, + 7, 77, 58, 7, 77, 99, 81, 16, 10, 5, 7, 38, 42, + 53, 14, 81, 1, 65, 9, 0, 2, 69, 67, 82, 11, 34, + 93, 106, 113, 81, 84, 68, 65, 9, 0, 80, 78, 8, + 9, 66, 75, 85, 2, 74, 81, 90, 5, 73, 84, 4, 73, + 78, 88, 3, 69, 70, 75, 65, 4, 22, 0, 0, 0, 74, + 90, 97, 65, 9, 65, 18, 74, 93, 99, 70, 76, 19, + 9, 74, 67, 88, 70, 84, 80, 98, 85, 84, 89, 95, + 20, 64, 7, 84, 71, 83, 79, 103, 4, 66, 65, 75, + 16, 68, 65, 80, 83, 72, 74, 70, 7, 8, 65, 12, + 15, 77, 64, 66, 67, 4, 74, 17, 68, 1, 17, 20, + 27, 21, 14, 83, 66, 71, 4, 5, 93, 1, 8, 5, 4, 7, + 7, 13, 8, 2, 2, 0, 2, 10, 8, 81, 67, 1, 72, 6, + 8, 14, 7, 4, 8, 5, 4, 8, 14, 6, 77, 3, 78, 24, + 21, 16, 14, 17, 17, 10, 16, 20, 64, 11, 9, 6, 3, + 67, 6, 0, 3, 5, 2, 7, 13, 2, 81, 1, 75, 2, 10, + 83, 31, 40, 32, 31, 31, 30, 27, 27, 22, 13, 10, + 7, 6, 1, 87, 68, 64, 76, 17, 13, 10, 12, 8, 2, + 2, 2, 74, 68, 73, 93, 87, 99, 70, 21, 18, 11, 2, + 6, 66, 71, 74, 85, 68, 36, 21, 15, 8, 10, 64, + 71, 76, 89, 4, 37, 28, 24, 18, 14, 5, 64, 71, + 80, 70, 38, 27, 14, 3, 7, 69, 78, 86, 5, 50, 39, + 31, 23, 21, 5, 67, 73, 77, 62, 93, 90, 77, 93, + 92, 84, 88, 85, 85, 86, 85, 86, 86, 83, 86, 84, + 72, 81, 76, 76, 75, 69, 67, 67, 66, 1, 65, 67, + 71, 12, 10, 13, 7, 3, 13, 10, 6, 9, 9, 7, 5, 1, + 67, 70, 67, 6, 76, 9, 23, 8, 7, 15, 15, 14, 10, + 14, 12, 18, 0, 67, 69, 38, 38, 43, 34, 33, 42, + 46, 46, 50, 47, 54, 53, 45, 37, 13, 42, 55, 41, + 41, 39, 34, 24, 19, 16, 6, 68, 70, 78, 90, 36, + 35, 36, 30, 21, 23, 17, 11, 9, 6, 1, 70, 70, 81, + 85, 68, 69, 82, 18, 20, 14, 4, 12, 11, 0, 7, 6, + 65, 76, 75, 83, 97, 11, 1, 73, 75, 64, 64, 2, 5, + 5, 9, 6, 9, 11, 21, 12, 13, 20, 20, 56, 46, 39, + 31, 24, 15, 5, 72, 95, 3, 35, 28, 26, 20, 19, 9, + 6, 5, 68, 80, 75, 67, 74, 0, 8, 70, 73, 1, 5, 3, + 5, 11, 4, 2, 55, 44, 31, 19, 10, 66, 78, 89, 104 }, + + { + + 57, + 7, 77, 57, 7, 77, 97, 79, 17, 11, 5, 6, 37, 41, + 53, 14, 78, 1, 64, 10, 0, 2, 68, 68, 83, 11, 33, + 94, 107, 113, 78, 82, 68, 64, 10, 0, 79, 76, 9, + 9, 65, 74, 84, 2, 74, 80, 90, 5, 72, 83, 4, 73, + 77, 88, 4, 68, 69, 74, 64, 4, 22, 0, 0, 0, 73, + 90, 97, 64, 9, 65, 18, 73, 92, 97, 68, 74, 21, + 11, 72, 66, 86, 68, 82, 79, 96, 84, 83, 88, 94, + 21, 0, 8, 83, 70, 82, 78, 101, 4, 66, 65, 75, + 17, 68, 64, 79, 82, 71, 73, 69, 7, 8, 64, 12, + 15, 76, 64, 65, 67, 4, 73, 17, 68, 1, 17, 19, + 27, 21, 14, 82, 65, 70, 4, 5, 92, 1, 8, 5, 5, 7, + 7, 14, 9, 3, 2, 1, 2, 10, 8, 81, 67, 1, 72, 6, + 7, 14, 7, 4, 9, 6, 4, 7, 14, 6, 76, 3, 78, 24, + 21, 16, 14, 17, 17, 10, 16, 20, 64, 11, 9, 6, 3, + 67, 6, 0, 3, 5, 2, 7, 13, 2, 80, 1, 74, 2, 9, + 82, 30, 39, 31, 30, 29, 28, 26, 26, 20, 12, 9, + 6, 4, 0, 88, 68, 64, 76, 16, 12, 9, 11, 7, 1, 1, + 1, 74, 68, 73, 92, 86, 98, 69, 22, 18, 11, 3, 7, + 65, 70, 73, 83, 67, 36, 21, 15, 8, 11, 0, 70, + 75, 87, 4, 38, 29, 24, 18, 15, 5, 64, 70, 79, + 70, 39, 27, 14, 3, 8, 68, 77, 85, 5, 50, 39, 31, + 23, 21, 5, 66, 72, 76, 62, 92, 89, 76, 91, 90, + 83, 86, 83, 83, 84, 83, 84, 85, 82, 85, 83, 70, + 80, 76, 75, 74, 68, 67, 67, 66, 1, 65, 68, 72, + 12, 10, 14, 7, 3, 13, 10, 6, 9, 9, 7, 5, 1, 67, + 70, 66, 7, 76, 9, 23, 8, 7, 15, 15, 15, 10, 13, + 12, 18, 0, 67, 70, 38, 38, 43, 33, 32, 41, 44, + 44, 48, 45, 52, 51, 43, 35, 12, 40, 52, 38, 37, + 37, 32, 23, 18, 15, 6, 67, 69, 76, 87, 35, 34, + 35, 29, 19, 22, 16, 9, 8, 5, 0, 71, 71, 82, 85, + 68, 69, 83, 17, 19, 13, 3, 11, 10, 64, 6, 5, 66, + 77, 76, 84, 97, 11, 1, 73, 74, 0, 0, 3, 6, 6, + 11, 7, 10, 12, 23, 14, 14, 22, 22, 56, 45, 37, + 29, 22, 13, 2, 74, 97, 4, 36, 29, 27, 21, 20, + 10, 7, 6, 67, 79, 74, 66, 73, 2, 10, 69, 72, 2, + 6, 4, 6, 12, 4, 2, 54, 42, 29, 17, 7, 69, 81, + 92, 106 }, + + { + + 56, + 7, 77, 56, 7, 77, 95, 78, 17, 11, 5, 5, 36, 40, + 53, 14, 76, 1, 0, 11, 0, 1, 68, 69, 84, 10, 31, + 96, 108, 114, 75, 81, 68, 0, 11, 0, 79, 75, 9, + 8, 65, 74, 83, 2, 74, 80, 90, 5, 72, 82, 4, 73, + 77, 88, 4, 68, 69, 74, 64, 4, 22, 0, 0, 0, 72, + 90, 97, 64, 8, 65, 18, 73, 92, 95, 66, 72, 22, + 13, 71, 65, 85, 67, 81, 78, 94, 84, 83, 88, 93, + 21, 0, 8, 82, 70, 81, 78, 100, 4, 66, 65, 75, + 17, 68, 64, 79, 82, 71, 73, 69, 7, 8, 64, 12, + 15, 76, 64, 65, 67, 4, 73, 16, 68, 1, 16, 18, + 26, 20, 13, 81, 65, 70, 3, 4, 92, 0, 8, 5, 5, 7, + 7, 15, 9, 3, 2, 1, 2, 9, 8, 82, 67, 1, 73, 6, 6, + 14, 7, 3, 9, 6, 4, 6, 14, 5, 76, 3, 79, 23, 21, + 16, 14, 17, 17, 10, 16, 20, 64, 11, 9, 6, 3, 67, + 6, 0, 3, 5, 2, 6, 12, 1, 80, 1, 74, 1, 8, 82, + 28, 37, 29, 28, 27, 26, 24, 24, 18, 10, 7, 4, 2, + 65, 89, 68, 64, 77, 15, 11, 8, 10, 5, 0, 0, 0, + 75, 69, 74, 92, 86, 97, 69, 22, 18, 11, 3, 7, + 65, 70, 73, 82, 66, 36, 21, 15, 8, 11, 0, 69, + 74, 85, 4, 38, 29, 24, 18, 15, 5, 64, 70, 78, + 70, 39, 27, 14, 3, 8, 68, 77, 84, 5, 49, 38, 30, + 22, 21, 5, 66, 72, 76, 62, 91, 88, 75, 90, 89, + 82, 85, 82, 82, 83, 82, 82, 84, 81, 85, 82, 69, + 80, 77, 75, 73, 68, 67, 68, 66, 0, 66, 69, 73, + 11, 10, 14, 7, 2, 12, 10, 6, 8, 8, 7, 5, 0, 67, + 70, 66, 7, 76, 8, 22, 7, 7, 14, 14, 15, 9, 12, + 12, 18, 64, 68, 72, 37, 37, 42, 32, 31, 40, 42, + 42, 46, 43, 49, 48, 40, 33, 11, 37, 49, 35, 33, + 34, 30, 21, 17, 14, 6, 66, 69, 75, 85, 33, 32, + 33, 27, 17, 20, 14, 7, 7, 3, 64, 73, 72, 83, 86, + 69, 69, 84, 16, 18, 11, 1, 9, 8, 65, 4, 3, 67, + 78, 77, 85, 98, 10, 0, 74, 74, 0, 0, 4, 6, 7, + 12, 8, 11, 13, 24, 15, 15, 23, 23, 55, 43, 35, + 27, 19, 10, 64, 77, 99, 4, 36, 29, 27, 21, 21, + 10, 7, 6, 67, 79, 74, 65, 73, 3, 11, 69, 71, 3, + 7, 4, 6, 13, 4, 2, 53, 40, 27, 14, 4, 73, 85, + 95, 109 }, + + { + + 55, + 7, 77, 55, 7, 77, 93, 76, 18, 11, 4, 3, 34, 39, + 53, 14, 74, 1, 1, 12, 0, 0, 68, 70, 85, 10, 29, + 97, 109, 114, 72, 80, 68, 1, 12, 0, 78, 74, 10, + 8, 65, 73, 82, 1, 75, 80, 90, 5, 72, 82, 4, 73, + 77, 88, 5, 68, 69, 74, 0, 4, 22, 0, 0, 0, 72, + 90, 97, 0, 7, 65, 18, 73, 92, 93, 64, 71, 24, + 15, 70, 64, 83, 66, 80, 77, 92, 83, 83, 87, 92, + 21, 0, 8, 82, 70, 81, 77, 98, 4, 67, 66, 76, 17, + 68, 64, 78, 82, 70, 73, 68, 7, 8, 64, 12, 14, + 76, 64, 65, 67, 4, 73, 15, 68, 1, 15, 17, 25, + 19, 12, 81, 64, 70, 2, 4, 91, 0, 8, 5, 5, 7, 7, + 16, 9, 3, 2, 2, 2, 8, 8, 83, 67, 1, 73, 5, 5, + 14, 7, 2, 9, 6, 4, 4, 14, 4, 76, 3, 79, 22, 21, + 16, 14, 17, 17, 10, 16, 20, 64, 10, 9, 6, 3, 67, + 5, 64, 2, 4, 1, 5, 11, 1, 80, 0, 74, 0, 6, 82, + 27, 35, 27, 26, 25, 24, 22, 22, 16, 8, 6, 3, 0, + 67, 90, 68, 64, 77, 14, 10, 7, 8, 4, 65, 64, 64, + 76, 69, 74, 92, 86, 97, 68, 22, 18, 11, 3, 7, + 65, 70, 72, 81, 66, 36, 21, 15, 8, 12, 1, 68, + 73, 83, 4, 38, 29, 24, 18, 16, 5, 64, 70, 78, + 70, 39, 27, 13, 3, 8, 68, 77, 83, 5, 49, 38, 29, + 21, 21, 5, 66, 71, 75, 62, 90, 87, 75, 89, 88, + 81, 83, 81, 81, 82, 80, 81, 84, 80, 85, 82, 67, + 80, 77, 75, 73, 68, 68, 68, 66, 64, 67, 70, 74, + 11, 9, 14, 6, 2, 12, 9, 6, 7, 8, 6, 5, 64, 67, + 71, 66, 7, 77, 8, 22, 6, 7, 13, 14, 15, 8, 11, + 11, 17, 65, 68, 73, 36, 36, 42, 31, 30, 38, 40, + 40, 44, 40, 47, 46, 37, 30, 10, 34, 46, 32, 29, + 31, 27, 19, 16, 13, 6, 65, 69, 74, 83, 31, 30, + 31, 25, 15, 18, 13, 5, 5, 2, 65, 74, 74, 84, 87, + 69, 70, 85, 15, 16, 10, 64, 7, 6, 67, 3, 1, 69, + 79, 79, 87, 99, 9, 64, 75, 74, 1, 0, 4, 7, 7, + 13, 9, 12, 14, 25, 16, 16, 25, 24, 55, 42, 33, + 24, 17, 7, 67, 80, 101, 5, 37, 30, 28, 21, 22, + 11, 7, 7, 66, 79, 73, 65, 72, 4, 12, 69, 71, 3, + 7, 4, 7, 14, 4, 2, 52, 38, 24, 11, 1, 77, 89, + 99, 111 }, + + { + + 53, + 7, 77, 53, 7, 77, 92, 75, 18, 11, 4, 2, 33, 37, + 53, 14, 71, 0, 2, 12, 0, 64, 68, 71, 86, 9, 27, + 99, 110, 115, 69, 79, 68, 2, 12, 0, 78, 73, 10, + 7, 65, 73, 82, 1, 75, 79, 90, 5, 72, 81, 3, 74, + 77, 88, 5, 67, 69, 73, 0, 4, 22, 0, 0, 0, 71, + 91, 97, 0, 6, 65, 18, 73, 92, 92, 0, 69, 25, 16, + 69, 0, 82, 64, 79, 76, 90, 83, 82, 87, 91, 21, + 0, 8, 81, 70, 80, 77, 97, 3, 67, 66, 76, 17, 68, + 64, 78, 82, 70, 73, 68, 7, 7, 64, 12, 14, 76, + 64, 65, 67, 4, 73, 15, 69, 1, 15, 16, 24, 18, + 11, 80, 64, 70, 1, 3, 91, 64, 8, 4, 5, 7, 7, 17, + 10, 3, 1, 2, 1, 7, 7, 84, 67, 1, 74, 5, 4, 13, + 6, 2, 10, 6, 4, 3, 13, 3, 76, 3, 80, 22, 21, 16, + 14, 17, 17, 10, 16, 19, 64, 10, 9, 6, 3, 68, 5, + 64, 2, 4, 1, 5, 10, 0, 80, 0, 74, 64, 5, 82, 25, + 33, 25, 24, 23, 22, 20, 20, 14, 6, 4, 1, 66, 69, + 91, 68, 65, 78, 13, 9, 6, 7, 2, 66, 65, 66, 76, + 70, 75, 92, 86, 96, 68, 22, 18, 11, 3, 7, 65, + 70, 72, 80, 65, 36, 21, 15, 8, 12, 1, 68, 73, + 81, 4, 38, 29, 24, 18, 16, 5, 64, 70, 77, 70, + 39, 26, 13, 3, 8, 68, 77, 83, 5, 48, 37, 28, 20, + 21, 5, 66, 71, 75, 62, 89, 86, 74, 87, 87, 80, + 82, 79, 80, 80, 79, 79, 83, 80, 84, 81, 66, 79, + 78, 75, 72, 68, 68, 69, 67, 65, 68, 71, 75, 10, + 9, 14, 6, 1, 11, 9, 6, 6, 7, 6, 4, 65, 68, 71, + 66, 7, 77, 7, 21, 5, 7, 12, 13, 15, 7, 10, 11, + 17, 66, 69, 75, 35, 36, 41, 30, 28, 37, 38, 38, + 42, 38, 44, 43, 34, 28, 8, 31, 42, 29, 25, 29, + 25, 17, 15, 11, 6, 64, 68, 73, 81, 30, 28, 29, + 24, 13, 16, 11, 3, 4, 0, 67, 76, 75, 85, 88, 70, + 70, 86, 13, 15, 8, 65, 5, 4, 68, 1, 64, 70, 80, + 80, 88, 99, 8, 64, 76, 74, 1, 1, 5, 7, 8, 14, 9, + 13, 15, 26, 17, 17, 26, 25, 54, 40, 31, 22, 14, + 4, 70, 83, 104, 5, 37, 30, 28, 22, 22, 11, 8, 7, + 66, 78, 73, 64, 72, 5, 13, 69, 70, 4, 8, 4, 7, + 15, 4, 2, 50, 36, 22, 8, 65, 81, 93, 102, 114 }, + + { + + 52, + 7, 77, 52, 7, 77, 90, 73, 18, 11, 3, 0, 31, 36, + 53, 14, 69, 0, 3, 13, 0, 64, 67, 72, 87, 9, 25, + 101, 111, 115, 66, 77, 68, 3, 13, 0, 78, 72, 10, + 7, 65, 73, 81, 1, 76, 79, 90, 5, 72, 80, 3, 74, + 77, 88, 6, 67, 68, 73, 1, 4, 22, 0, 0, 0, 71, + 91, 97, 1, 5, 65, 18, 72, 92, 90, 2, 68, 27, 18, + 67, 1, 81, 0, 78, 75, 88, 82, 82, 86, 90, 21, 0, + 8, 81, 70, 80, 76, 95, 3, 67, 66, 76, 17, 68, 0, + 77, 82, 70, 73, 67, 7, 7, 64, 12, 14, 75, 64, + 64, 67, 4, 73, 14, 69, 1, 14, 15, 24, 17, 10, + 79, 0, 70, 0, 3, 90, 64, 8, 4, 5, 7, 7, 18, 10, + 3, 1, 2, 1, 6, 7, 85, 67, 1, 74, 4, 3, 13, 6, 1, + 10, 6, 4, 2, 13, 2, 76, 3, 80, 21, 21, 16, 14, + 17, 17, 10, 16, 19, 64, 10, 9, 6, 3, 68, 5, 64, + 1, 4, 1, 4, 9, 0, 80, 64, 74, 65, 4, 82, 24, 31, + 24, 23, 21, 20, 18, 18, 12, 4, 2, 64, 68, 70, + 92, 68, 65, 78, 12, 8, 5, 6, 1, 68, 66, 67, 77, + 70, 75, 92, 86, 96, 68, 22, 18, 11, 3, 7, 64, + 69, 71, 79, 65, 36, 21, 15, 8, 12, 2, 67, 72, + 79, 4, 38, 29, 24, 18, 16, 5, 64, 70, 76, 70, + 40, 26, 12, 3, 8, 68, 77, 82, 5, 48, 36, 27, 19, + 21, 5, 66, 71, 74, 62, 88, 85, 73, 86, 86, 79, + 81, 78, 79, 79, 77, 77, 82, 79, 84, 81, 65, 79, + 78, 74, 71, 68, 69, 69, 67, 65, 69, 72, 76, 9, + 8, 14, 5, 1, 11, 9, 6, 5, 6, 6, 4, 66, 68, 71, + 66, 7, 77, 6, 21, 5, 7, 11, 12, 15, 6, 9, 10, + 16, 67, 70, 77, 34, 35, 41, 29, 27, 35, 36, 36, + 40, 35, 41, 41, 31, 26, 7, 28, 39, 26, 21, 26, + 22, 15, 14, 10, 6, 0, 68, 72, 79, 28, 27, 27, + 22, 11, 14, 10, 1, 3, 65, 68, 78, 77, 86, 89, + 70, 70, 87, 12, 13, 6, 67, 4, 3, 70, 0, 65, 72, + 81, 81, 89, 100, 8, 65, 77, 73, 2, 1, 5, 8, 9, + 15, 10, 14, 16, 28, 18, 18, 27, 27, 54, 38, 29, + 19, 12, 1, 73, 86, 106, 5, 38, 31, 29, 22, 23, + 11, 8, 8, 65, 78, 73, 0, 71, 6, 14, 68, 69, 4, + 8, 4, 8, 16, 4, 2, 49, 34, 19, 5, 68, 84, 97, + 106, 117 }, + + { + + 51, + 7, 78, 51, 7, 78, 88, 72, 19, 11, 3, 64, 30, 35, + 53, 14, 67, 0, 3, 14, 0, 65, 67, 73, 88, 8, 24, + 102, 112, 116, 0, 76, 68, 3, 14, 0, 77, 71, 11, + 6, 64, 72, 80, 0, 76, 79, 90, 5, 71, 80, 3, 74, + 76, 88, 6, 67, 68, 73, 1, 4, 22, 0, 0, 0, 70, + 91, 97, 1, 5, 66, 18, 72, 91, 88, 4, 66, 28, 20, + 66, 2, 79, 1, 77, 75, 87, 82, 82, 86, 90, 22, 0, + 8, 80, 69, 79, 76, 94, 3, 68, 67, 77, 17, 68, 0, + 77, 82, 69, 73, 67, 7, 7, 0, 12, 13, 75, 64, 64, + 67, 4, 73, 13, 69, 1, 13, 14, 23, 17, 10, 79, 0, + 70, 0, 2, 90, 65, 8, 4, 5, 7, 7, 18, 10, 3, 1, + 3, 1, 5, 7, 85, 68, 1, 75, 4, 2, 13, 6, 0, 10, + 6, 4, 0, 13, 1, 76, 3, 81, 20, 21, 15, 14, 17, + 17, 10, 16, 19, 64, 9, 9, 5, 3, 68, 4, 65, 1, 3, + 0, 3, 8, 64, 80, 64, 73, 66, 2, 82, 22, 29, 22, + 21, 19, 18, 16, 16, 10, 2, 1, 65, 70, 72, 93, + 68, 65, 79, 11, 7, 4, 4, 64, 69, 67, 68, 78, 71, + 76, 92, 86, 95, 67, 22, 18, 11, 4, 7, 64, 69, + 71, 78, 64, 35, 21, 15, 8, 13, 2, 66, 71, 77, 4, + 39, 29, 24, 18, 17, 5, 64, 69, 76, 70, 40, 26, + 12, 3, 8, 67, 76, 81, 5, 47, 36, 26, 19, 21, 5, + 65, 70, 74, 62, 88, 84, 73, 85, 85, 78, 79, 77, + 78, 78, 76, 76, 82, 78, 84, 80, 0, 79, 79, 74, + 71, 68, 69, 70, 67, 66, 70, 73, 77, 9, 8, 14, 5, + 0, 10, 8, 6, 5, 6, 5, 4, 66, 68, 72, 66, 7, 78, + 6, 20, 4, 6, 10, 12, 16, 6, 8, 10, 16, 67, 70, + 78, 33, 34, 40, 28, 26, 34, 34, 34, 38, 33, 39, + 38, 28, 23, 6, 26, 36, 23, 17, 23, 20, 13, 13, + 9, 6, 1, 68, 70, 76, 26, 25, 25, 20, 9, 12, 8, + 64, 1, 66, 69, 79, 78, 87, 90, 71, 71, 88, 11, + 12, 5, 69, 2, 1, 71, 65, 67, 73, 83, 83, 91, + 101, 7, 66, 78, 73, 2, 1, 6, 8, 9, 17, 11, 15, + 17, 29, 20, 19, 29, 28, 53, 37, 27, 17, 9, 64, + 76, 88, 108, 6, 38, 31, 29, 22, 24, 12, 8, 8, + 65, 78, 72, 0, 71, 7, 15, 68, 69, 5, 9, 4, 8, + 17, 4, 2, 48, 32, 17, 2, 72, 88, 100, 109, 119 }, + + { + + 50, + 7, 78, 50, 7, 78, 86, 70, 19, 11, 2, 66, 28, 33, + 53, 14, 64, 64, 4, 14, 0, 66, 67, 74, 89, 8, 22, + 104, 113, 116, 3, 75, 68, 4, 14, 0, 77, 70, 11, + 6, 64, 72, 80, 0, 77, 78, 90, 5, 71, 79, 2, 74, + 76, 88, 7, 66, 68, 72, 2, 4, 22, 0, 0, 0, 70, + 91, 97, 2, 4, 66, 18, 72, 91, 87, 6, 65, 30, 22, + 65, 3, 78, 3, 76, 74, 85, 81, 81, 85, 89, 22, 0, + 8, 80, 69, 79, 75, 92, 2, 68, 67, 77, 17, 68, 0, + 76, 82, 69, 73, 66, 7, 6, 0, 12, 13, 75, 64, 64, + 67, 4, 73, 13, 69, 1, 13, 13, 22, 16, 9, 78, 1, + 70, 64, 2, 89, 65, 8, 3, 5, 7, 7, 19, 11, 3, 1, + 3, 1, 4, 7, 86, 68, 1, 75, 3, 1, 12, 6, 0, 11, + 6, 4, 64, 12, 0, 76, 3, 81, 20, 21, 15, 14, 17, + 17, 10, 16, 19, 64, 9, 9, 5, 3, 68, 4, 65, 0, 3, + 0, 3, 7, 64, 80, 65, 73, 67, 1, 82, 21, 27, 20, + 19, 17, 16, 14, 14, 8, 0, 64, 67, 73, 74, 94, + 68, 66, 79, 10, 6, 3, 3, 65, 71, 68, 69, 78, 71, + 76, 92, 86, 95, 67, 22, 18, 11, 4, 7, 64, 69, + 70, 77, 64, 35, 21, 15, 8, 13, 3, 65, 71, 75, 4, + 39, 29, 24, 18, 17, 5, 64, 69, 75, 70, 40, 25, + 11, 3, 8, 67, 76, 81, 5, 47, 35, 25, 18, 21, 5, + 65, 70, 73, 62, 87, 83, 72, 83, 84, 77, 78, 75, + 77, 76, 74, 74, 81, 77, 83, 80, 1, 78, 79, 74, + 70, 68, 70, 70, 68, 67, 71, 74, 78, 8, 7, 14, 4, + 0, 10, 8, 6, 4, 5, 5, 3, 67, 68, 72, 66, 7, 78, + 5, 20, 3, 6, 9, 11, 16, 5, 7, 9, 15, 68, 71, 80, + 32, 34, 40, 27, 24, 32, 32, 32, 36, 30, 36, 36, + 25, 21, 4, 23, 32, 20, 13, 21, 17, 11, 12, 8, 6, + 2, 67, 69, 74, 25, 23, 23, 19, 7, 10, 7, 66, 0, + 68, 71, 81, 80, 88, 91, 71, 71, 89, 9, 10, 3, + 70, 0, 64, 73, 66, 69, 75, 84, 84, 92, 101, 6, + 66, 79, 73, 3, 2, 6, 9, 10, 18, 12, 16, 18, 30, + 21, 20, 30, 29, 53, 35, 25, 14, 7, 67, 79, 91, + 110, 6, 39, 32, 30, 23, 25, 12, 9, 9, 64, 77, + 72, 1, 70, 8, 16, 68, 68, 5, 9, 4, 9, 18, 4, 2, + 46, 30, 14, 64, 75, 92, 104, 113, 122 }, + + { + + 48, + 6, 78, 48, 6, 78, 85, 69, 19, 11, 2, 67, 27, 32, + 53, 14, 1, 64, 5, 15, 0, 67, 67, 75, 91, 7, 20, + 106, 114, 117, 5, 74, 68, 5, 15, 0, 77, 69, 11, + 5, 64, 72, 79, 64, 77, 78, 91, 5, 71, 79, 2, 75, + 76, 88, 7, 66, 68, 72, 2, 4, 22, 0, 0, 0, 69, + 92, 97, 2, 3, 66, 18, 72, 91, 85, 7, 0, 31, 23, + 64, 4, 77, 4, 75, 73, 83, 81, 81, 85, 88, 22, 0, + 8, 79, 69, 78, 75, 91, 2, 69, 68, 78, 17, 68, 0, + 76, 82, 69, 73, 66, 6, 6, 0, 12, 12, 75, 64, 64, + 67, 3, 73, 12, 70, 1, 12, 11, 21, 15, 8, 78, 1, + 70, 65, 1, 89, 66, 7, 3, 5, 7, 7, 20, 11, 3, 0, + 3, 0, 3, 6, 87, 68, 1, 76, 3, 0, 12, 5, 64, 11, + 6, 4, 66, 12, 64, 76, 3, 82, 19, 20, 15, 14, 17, + 16, 9, 16, 18, 65, 8, 9, 5, 2, 69, 3, 66, 0, 2, + 64, 2, 6, 65, 80, 65, 73, 68, 64, 82, 19, 25, + 18, 17, 15, 13, 12, 12, 6, 65, 66, 69, 75, 76, + 95, 68, 66, 80, 9, 4, 1, 1, 67, 72, 70, 71, 79, + 72, 77, 92, 86, 94, 67, 22, 18, 11, 4, 7, 64, + 69, 70, 76, 0, 35, 21, 15, 8, 13, 3, 65, 70, 74, + 4, 39, 29, 24, 17, 17, 5, 64, 69, 75, 70, 40, + 25, 11, 3, 8, 67, 76, 80, 5, 46, 34, 24, 17, 20, + 5, 65, 70, 73, 62, 86, 82, 72, 82, 83, 77, 77, + 74, 76, 75, 73, 73, 81, 77, 83, 79, 2, 78, 80, + 74, 70, 68, 70, 71, 68, 68, 72, 76, 79, 7, 7, + 14, 4, 64, 9, 7, 5, 3, 4, 4, 3, 68, 69, 73, 66, + 7, 79, 4, 19, 2, 6, 8, 10, 16, 4, 6, 9, 15, 69, + 72, 82, 31, 33, 39, 25, 23, 31, 30, 30, 33, 28, + 33, 33, 22, 18, 3, 20, 29, 17, 9, 18, 15, 9, 10, + 6, 6, 2, 67, 68, 72, 23, 21, 21, 17, 4, 8, 5, + 68, 65, 70, 72, 83, 81, 89, 92, 72, 72, 90, 8, + 9, 1, 72, 65, 66, 74, 68, 71, 76, 85, 86, 94, + 102, 5, 67, 80, 73, 3, 2, 7, 9, 10, 19, 12, 17, + 19, 31, 22, 21, 31, 30, 52, 33, 23, 12, 4, 70, + 82, 94, 113, 6, 39, 32, 30, 23, 25, 12, 9, 9, + 64, 77, 72, 1, 70, 9, 17, 68, 68, 6, 10, 4, 9, + 18, 4, 1, 45, 28, 12, 67, 78, 96, 108, 116, 125 }, + + { + + 47, + 6, 78, 47, 6, 78, 83, 68, 20, 11, 2, 68, 26, 31, + 53, 14, 3, 64, 6, 16, 0, 67, 66, 76, 92, 6, 18, + 107, 115, 118, 8, 72, 68, 6, 16, 0, 76, 68, 12, + 4, 64, 71, 78, 64, 77, 78, 91, 5, 71, 78, 2, 75, + 76, 88, 7, 66, 67, 72, 2, 4, 22, 0, 0, 0, 68, + 92, 97, 2, 2, 66, 18, 71, 91, 83, 9, 2, 32, 25, + 1, 5, 75, 5, 73, 72, 81, 81, 81, 85, 87, 22, 0, + 8, 78, 69, 77, 74, 90, 2, 69, 68, 78, 17, 68, 1, + 75, 81, 68, 73, 66, 6, 6, 0, 12, 12, 74, 64, 0, + 67, 3, 72, 11, 70, 1, 11, 10, 21, 14, 7, 77, 1, + 69, 66, 0, 89, 67, 7, 3, 6, 7, 7, 21, 11, 3, 0, + 4, 0, 3, 6, 88, 68, 1, 77, 3, 64, 12, 5, 65, 11, + 6, 4, 67, 12, 64, 76, 3, 83, 18, 20, 15, 14, 17, + 16, 9, 16, 18, 65, 8, 9, 5, 2, 69, 3, 66, 0, 2, + 64, 1, 6, 65, 80, 65, 73, 69, 65, 82, 17, 24, + 17, 16, 13, 11, 11, 11, 4, 67, 67, 70, 77, 77, + 96, 68, 66, 81, 8, 3, 0, 0, 68, 73, 71, 72, 80, + 73, 78, 92, 85, 93, 66, 23, 18, 11, 4, 8, 0, 68, + 69, 75, 1, 35, 21, 15, 8, 14, 3, 64, 69, 72, 4, + 39, 29, 24, 17, 18, 5, 64, 69, 74, 70, 41, 25, + 11, 3, 9, 67, 76, 79, 5, 46, 34, 24, 16, 20, 5, + 65, 69, 72, 62, 85, 81, 71, 81, 81, 76, 75, 73, + 74, 74, 72, 71, 80, 76, 83, 78, 4, 78, 81, 73, + 69, 68, 70, 72, 68, 68, 73, 77, 80, 7, 7, 14, 4, + 64, 8, 7, 5, 2, 4, 4, 3, 69, 69, 73, 65, 8, 79, + 4, 18, 2, 6, 8, 10, 16, 3, 5, 9, 15, 70, 72, 83, + 31, 32, 38, 24, 22, 30, 28, 28, 31, 26, 31, 30, + 20, 16, 2, 17, 26, 14, 5, 15, 13, 8, 9, 5, 6, 3, + 67, 67, 70, 21, 20, 19, 15, 2, 7, 4, 70, 66, 71, + 73, 84, 82, 90, 92, 72, 72, 91, 7, 8, 0, 74, 66, + 67, 75, 69, 72, 77, 86, 87, 95, 103, 5, 68, 80, + 72, 3, 2, 8, 10, 11, 20, 13, 18, 20, 33, 23, 22, + 33, 32, 51, 32, 21, 10, 1, 73, 85, 97, 115, 7, + 39, 32, 31, 23, 26, 13, 9, 10, 0, 77, 71, 2, 70, + 10, 19, 67, 67, 7, 11, 4, 10, 19, 4, 1, 44, 26, + 10, 69, 81, 99, 112, 119, 126 }, + + { + + 46, + 6, 78, 46, 6, 78, 81, 66, 20, 11, 1, 70, 24, 29, + 53, 14, 6, 65, 7, 16, 0, 68, 66, 77, 93, 6, 16, + 109, 116, 118, 11, 71, 68, 7, 16, 0, 76, 67, 12, + 4, 64, 71, 78, 64, 78, 77, 91, 5, 71, 77, 1, 75, + 76, 88, 8, 65, 67, 71, 3, 4, 22, 0, 0, 0, 68, + 92, 97, 3, 1, 66, 18, 71, 91, 82, 11, 3, 34, 27, + 2, 6, 74, 7, 72, 71, 79, 80, 80, 84, 86, 22, 0, + 8, 78, 69, 77, 74, 88, 1, 69, 68, 78, 17, 68, 1, + 75, 81, 68, 73, 65, 6, 5, 0, 12, 12, 74, 64, 0, + 67, 3, 72, 11, 70, 1, 11, 9, 20, 13, 6, 76, 2, + 69, 67, 0, 88, 67, 7, 2, 6, 7, 7, 22, 12, 3, 0, + 4, 0, 2, 6, 89, 68, 1, 77, 2, 65, 11, 5, 65, 12, + 6, 4, 68, 11, 65, 76, 3, 83, 18, 20, 15, 14, 17, + 16, 9, 16, 18, 65, 8, 9, 5, 2, 69, 3, 66, 64, 2, + 64, 1, 5, 66, 80, 66, 73, 70, 66, 82, 16, 22, + 15, 14, 11, 9, 9, 9, 2, 69, 69, 72, 80, 79, 97, + 68, 67, 81, 7, 2, 64, 64, 70, 75, 72, 73, 80, + 73, 78, 92, 85, 93, 66, 23, 18, 11, 4, 8, 0, 68, + 69, 74, 1, 35, 21, 15, 8, 14, 4, 0, 69, 70, 4, + 39, 29, 24, 17, 18, 5, 64, 69, 73, 70, 41, 24, + 10, 3, 9, 67, 76, 79, 5, 45, 33, 23, 15, 20, 5, + 65, 69, 72, 62, 84, 80, 70, 79, 80, 75, 74, 71, + 73, 72, 70, 69, 79, 75, 82, 78, 5, 77, 81, 73, + 68, 68, 71, 72, 69, 69, 74, 78, 81, 6, 6, 14, 3, + 65, 8, 7, 5, 1, 3, 4, 2, 70, 69, 73, 65, 8, 79, + 3, 18, 1, 6, 7, 9, 16, 2, 4, 8, 14, 71, 73, 85, + 30, 32, 38, 23, 20, 28, 26, 26, 29, 23, 28, 28, + 17, 14, 0, 14, 22, 11, 1, 13, 10, 6, 8, 4, 6, 4, + 66, 66, 68, 20, 18, 17, 14, 0, 5, 2, 72, 67, 73, + 75, 86, 84, 91, 93, 73, 72, 92, 5, 6, 65, 75, + 68, 69, 77, 71, 74, 79, 87, 88, 96, 103, 4, 68, + 81, 72, 4, 3, 8, 10, 12, 21, 14, 19, 21, 34, 24, + 23, 34, 33, 51, 30, 19, 7, 64, 76, 88, 100, 117, + 7, 40, 33, 31, 24, 27, 13, 10, 10, 0, 76, 71, 3, + 69, 11, 20, 67, 66, 7, 11, 4, 10, 20, 4, 1, 42, + 24, 7, 72, 84, 103, 116, 123, 126 }, + + { + + 45, + 6, 79, 45, 6, 79, 79, 65, 21, 11, 1, 71, 23, 28, + 53, 14, 8, 65, 7, 17, 0, 69, 66, 78, 94, 5, 15, + 110, 117, 119, 14, 70, 68, 7, 17, 0, 75, 66, 13, + 3, 0, 70, 77, 65, 78, 77, 91, 5, 70, 77, 1, 75, + 75, 88, 8, 65, 67, 71, 3, 4, 22, 0, 0, 0, 67, + 92, 97, 3, 1, 67, 18, 71, 90, 80, 13, 5, 35, 29, + 3, 7, 72, 8, 71, 71, 78, 80, 80, 84, 86, 23, 0, + 8, 77, 68, 76, 73, 87, 1, 70, 69, 79, 17, 68, 1, + 74, 81, 67, 73, 65, 6, 5, 1, 12, 11, 74, 64, 0, + 67, 3, 72, 10, 70, 1, 10, 8, 19, 13, 6, 76, 2, + 69, 67, 64, 88, 68, 7, 2, 6, 7, 7, 22, 12, 3, 0, + 5, 0, 1, 6, 89, 69, 1, 78, 2, 66, 11, 5, 66, 12, + 6, 4, 70, 11, 66, 76, 3, 84, 17, 20, 14, 14, 17, + 16, 9, 16, 18, 65, 7, 9, 4, 2, 69, 2, 67, 64, 1, + 65, 0, 4, 66, 80, 66, 72, 71, 68, 82, 14, 20, + 13, 12, 9, 7, 7, 7, 0, 71, 70, 73, 82, 81, 98, + 68, 67, 82, 6, 1, 65, 66, 71, 76, 73, 74, 81, + 74, 79, 92, 85, 92, 65, 23, 18, 11, 5, 8, 0, 68, + 68, 73, 2, 34, 21, 15, 8, 15, 4, 1, 68, 68, 4, + 40, 29, 24, 17, 19, 5, 64, 68, 73, 70, 41, 24, + 10, 3, 9, 66, 75, 78, 5, 45, 33, 22, 15, 20, 5, + 64, 68, 71, 62, 84, 79, 70, 78, 79, 74, 72, 70, + 72, 71, 69, 68, 79, 74, 82, 77, 7, 77, 82, 73, + 68, 68, 71, 73, 69, 70, 75, 79, 82, 6, 6, 14, 3, + 65, 7, 6, 5, 1, 3, 3, 2, 70, 69, 74, 65, 8, 80, + 3, 17, 0, 5, 6, 9, 17, 2, 3, 8, 14, 71, 73, 86, + 29, 31, 37, 22, 19, 27, 24, 24, 27, 21, 26, 25, + 14, 11, 64, 12, 19, 8, 66, 10, 8, 4, 7, 3, 6, 5, + 66, 64, 65, 18, 16, 15, 12, 65, 3, 1, 74, 69, + 74, 76, 87, 85, 92, 94, 73, 73, 93, 4, 5, 66, + 77, 70, 71, 78, 72, 76, 80, 89, 90, 98, 104, 3, + 69, 82, 72, 4, 3, 9, 11, 12, 23, 15, 20, 22, 35, + 26, 24, 36, 34, 50, 29, 17, 5, 67, 78, 91, 102, + 119, 8, 40, 33, 32, 24, 28, 14, 10, 11, 1, 76, + 70, 3, 69, 12, 21, 67, 66, 8, 12, 4, 11, 21, 4, + 1, 41, 22, 5, 75, 88, 107, 119, 126, 126 }, + + { + + 43, + 6, 79, 43, 6, 79, 78, 0, 21, 11, 0, 73, 21, 27, + 53, 14, 10, 65, 8, 18, 0, 70, 66, 79, 95, 5, 13, + 112, 118, 119, 17, 69, 68, 8, 18, 0, 75, 65, 13, + 3, 0, 70, 76, 65, 79, 77, 91, 5, 70, 76, 1, 76, + 75, 88, 9, 65, 67, 71, 4, 4, 22, 0, 0, 0, 67, + 93, 97, 4, 0, 67, 18, 71, 90, 78, 14, 6, 37, 30, + 4, 8, 71, 9, 70, 70, 76, 79, 80, 83, 85, 23, 0, + 8, 77, 68, 76, 73, 85, 1, 70, 69, 79, 17, 68, 1, + 74, 81, 67, 73, 64, 6, 5, 1, 12, 11, 74, 64, 0, + 67, 3, 72, 9, 71, 1, 9, 7, 18, 12, 5, 75, 3, 69, + 68, 64, 87, 68, 7, 2, 6, 7, 7, 23, 12, 3, 64, 5, + 64, 0, 5, 90, 69, 1, 78, 1, 67, 11, 4, 67, 12, + 6, 4, 71, 11, 67, 76, 3, 84, 16, 20, 14, 14, 17, + 16, 9, 16, 17, 65, 7, 9, 4, 2, 70, 2, 67, 65, 1, + 65, 64, 3, 67, 80, 67, 72, 72, 69, 82, 13, 18, + 11, 10, 7, 5, 5, 5, 65, 73, 72, 75, 84, 83, 99, + 68, 67, 82, 5, 0, 66, 67, 73, 78, 74, 76, 82, + 74, 79, 92, 85, 92, 65, 23, 18, 11, 5, 8, 0, 68, + 68, 72, 2, 34, 21, 15, 8, 15, 5, 1, 67, 66, 4, + 40, 29, 24, 17, 19, 5, 64, 68, 72, 70, 41, 24, + 9, 3, 9, 66, 75, 77, 5, 44, 32, 21, 14, 20, 5, + 64, 68, 71, 62, 83, 78, 69, 77, 78, 73, 71, 69, + 71, 70, 67, 66, 78, 74, 82, 77, 8, 77, 82, 73, + 67, 68, 72, 73, 69, 71, 76, 80, 83, 5, 5, 14, 2, + 66, 7, 6, 5, 0, 2, 3, 2, 71, 70, 74, 65, 8, 80, + 2, 17, 64, 5, 5, 8, 17, 1, 2, 7, 13, 72, 74, 88, + 28, 30, 37, 21, 18, 25, 22, 22, 25, 18, 23, 23, + 11, 9, 65, 9, 16, 5, 70, 7, 5, 2, 6, 1, 6, 6, + 66, 0, 0, 16, 14, 13, 10, 67, 1, 64, 76, 70, 76, + 77, 89, 87, 93, 95, 74, 73, 94, 3, 3, 68, 79, + 72, 73, 80, 74, 78, 82, 90, 91, 99, 105, 2, 70, + 83, 72, 5, 3, 9, 11, 13, 24, 15, 21, 23, 36, 27, + 25, 37, 35, 50, 27, 15, 2, 69, 81, 94, 105, 122, + 8, 41, 34, 32, 24, 28, 14, 10, 11, 1, 76, 70, 4, + 68, 13, 22, 67, 65, 8, 12, 4, 11, 22, 4, 1, 40, + 20, 2, 78, 91, 111, 123, 126, 126 }, + + { + + 42, + 6, 79, 42, 6, 79, 76, 1, 21, 11, 0, 74, 20, 25, + 53, 14, 13, 66, 9, 18, 0, 70, 65, 80, 96, 4, 11, + 114, 119, 120, 20, 67, 68, 9, 18, 0, 75, 64, 13, + 2, 0, 70, 76, 65, 79, 76, 91, 5, 70, 75, 0, 76, + 75, 88, 9, 64, 66, 70, 4, 4, 22, 0, 0, 0, 66, + 93, 97, 4, 64, 67, 18, 70, 90, 77, 16, 8, 38, + 32, 6, 9, 70, 11, 69, 69, 74, 79, 79, 83, 84, + 23, 0, 8, 76, 68, 75, 72, 84, 0, 70, 69, 79, 17, + 68, 2, 73, 81, 67, 73, 64, 6, 4, 1, 12, 11, 73, + 64, 1, 67, 3, 72, 9, 71, 1, 9, 6, 18, 11, 4, 74, + 3, 69, 69, 65, 87, 69, 7, 1, 6, 7, 7, 24, 13, 3, + 64, 5, 64, 64, 5, 91, 69, 1, 79, 1, 68, 10, 4, + 67, 13, 6, 4, 72, 10, 68, 76, 3, 85, 16, 20, 14, + 14, 17, 16, 9, 16, 17, 65, 7, 9, 4, 2, 70, 2, + 67, 65, 1, 65, 64, 2, 67, 80, 67, 72, 73, 70, + 82, 11, 16, 10, 9, 5, 3, 3, 3, 67, 75, 74, 77, + 87, 84, 100, 68, 68, 83, 4, 64, 67, 68, 74, 79, + 75, 77, 82, 75, 80, 92, 85, 91, 65, 23, 18, 11, + 5, 8, 1, 67, 67, 71, 3, 34, 21, 15, 8, 15, 5, 2, + 67, 64, 4, 40, 29, 24, 17, 19, 5, 64, 68, 71, + 70, 42, 23, 9, 3, 9, 66, 75, 77, 5, 44, 31, 20, + 13, 20, 5, 64, 68, 70, 62, 82, 77, 68, 75, 77, + 72, 70, 67, 70, 68, 66, 64, 77, 73, 81, 76, 9, + 76, 83, 72, 66, 68, 72, 74, 70, 71, 77, 81, 84, + 4, 5, 14, 2, 66, 6, 6, 5, 64, 1, 3, 1, 72, 70, + 74, 65, 8, 80, 1, 16, 64, 5, 4, 7, 17, 0, 1, 7, + 13, 73, 75, 90, 27, 30, 36, 20, 16, 24, 20, 20, + 23, 16, 20, 20, 8, 7, 67, 6, 12, 2, 74, 5, 3, 0, + 5, 0, 6, 7, 65, 1, 2, 15, 13, 11, 9, 69, 64, 65, + 78, 71, 78, 79, 91, 88, 94, 96, 74, 73, 95, 1, + 2, 70, 80, 73, 74, 81, 75, 79, 83, 91, 92, 100, + 105, 2, 70, 84, 71, 5, 4, 10, 12, 14, 25, 16, + 22, 24, 38, 28, 26, 38, 37, 49, 25, 13, 0, 72, + 84, 97, 108, 124, 8, 41, 34, 33, 25, 29, 14, 11, + 12, 2, 75, 70, 5, 68, 14, 23, 66, 64, 9, 13, 4, + 12, 23, 4, 1, 38, 18, 0, 81, 94, 114, 126, 126, + 126 }, + + { + + 41, + 6, 79, 41, 6, 79, 74, 3, 22, 11, 64, 76, 18, 24, + 53, 14, 15, 66, 10, 19, 0, 71, 65, 81, 97, 4, 9, + 115, 120, 120, 23, 66, 68, 10, 19, 0, 74, 0, 14, + 2, 0, 69, 75, 66, 80, 76, 91, 5, 70, 75, 0, 76, + 75, 88, 10, 64, 66, 70, 5, 4, 22, 0, 0, 0, 66, + 93, 97, 5, 65, 67, 18, 70, 90, 75, 18, 9, 40, + 34, 7, 10, 68, 12, 68, 68, 72, 78, 79, 82, 83, + 23, 0, 8, 76, 68, 75, 72, 82, 0, 71, 70, 80, 17, + 68, 2, 73, 81, 66, 73, 0, 6, 4, 1, 12, 10, 73, + 64, 1, 67, 3, 72, 8, 71, 1, 8, 5, 17, 10, 3, 74, + 4, 69, 70, 65, 86, 69, 7, 1, 6, 7, 7, 25, 13, 3, + 64, 6, 64, 65, 5, 92, 69, 1, 79, 0, 69, 10, 4, + 68, 13, 6, 4, 74, 10, 69, 76, 3, 85, 15, 20, 14, + 14, 17, 16, 9, 16, 17, 65, 6, 9, 4, 2, 70, 1, + 68, 66, 0, 66, 65, 1, 68, 80, 68, 72, 74, 72, + 82, 10, 14, 8, 7, 3, 1, 1, 1, 69, 77, 75, 78, + 89, 86, 101, 68, 68, 83, 3, 65, 68, 70, 76, 81, + 76, 78, 83, 75, 80, 92, 85, 91, 64, 23, 18, 11, + 5, 8, 1, 67, 67, 70, 3, 34, 21, 15, 8, 16, 6, 3, + 66, 1, 4, 40, 29, 24, 17, 20, 5, 64, 68, 71, 70, + 42, 23, 8, 3, 9, 66, 75, 76, 5, 43, 31, 19, 12, + 20, 5, 64, 67, 70, 62, 81, 76, 68, 74, 76, 71, + 68, 66, 69, 67, 64, 0, 77, 72, 81, 76, 11, 76, + 83, 72, 66, 68, 73, 74, 70, 72, 78, 82, 85, 4, + 4, 14, 1, 67, 6, 5, 5, 65, 1, 2, 1, 73, 70, 75, + 65, 8, 81, 1, 16, 65, 5, 3, 7, 17, 64, 0, 6, 12, + 74, 75, 91, 26, 29, 36, 19, 15, 22, 18, 18, 21, + 13, 18, 18, 5, 4, 68, 3, 9, 64, 78, 2, 0, 65, 4, + 64, 6, 8, 65, 2, 4, 13, 11, 9, 7, 71, 66, 67, + 80, 73, 79, 80, 92, 90, 95, 97, 75, 74, 96, 0, + 0, 71, 82, 75, 76, 83, 77, 81, 85, 92, 94, 102, + 106, 1, 71, 85, 71, 6, 4, 10, 12, 14, 26, 17, + 23, 25, 39, 29, 27, 40, 38, 49, 24, 11, 66, 74, + 87, 100, 111, 126, 9, 42, 35, 33, 25, 30, 15, + 11, 12, 2, 75, 69, 5, 67, 15, 24, 66, 64, 9, 13, + 4, 12, 24, 4, 1, 37, 16, 66, 84, 97, 118, 126, + 126, 126 }, + + { + + 40, + 6, 79, 40, 6, 79, 72, 4, 22, 11, 64, 77, 17, 23, + 53, 14, 17, 66, 11, 20, 0, 72, 65, 82, 98, 3, 7, + 117, 121, 121, 26, 65, 68, 11, 20, 0, 74, 1, 14, + 1, 0, 69, 74, 66, 80, 76, 91, 5, 70, 74, 0, 76, + 75, 88, 10, 64, 66, 70, 5, 4, 22, 0, 0, 0, 65, + 93, 97, 5, 66, 67, 18, 70, 90, 73, 20, 11, 41, + 36, 8, 11, 67, 13, 67, 67, 70, 78, 79, 82, 82, + 23, 0, 8, 75, 68, 74, 71, 81, 0, 71, 70, 80, 17, + 68, 2, 72, 81, 66, 73, 0, 6, 4, 1, 12, 10, 73, + 64, 1, 67, 3, 72, 7, 71, 1, 7, 4, 16, 9, 2, 73, + 4, 69, 71, 66, 86, 70, 7, 1, 6, 7, 7, 26, 13, 3, + 64, 6, 64, 66, 5, 93, 69, 1, 80, 0, 70, 10, 4, + 69, 13, 6, 4, 75, 10, 70, 76, 3, 86, 14, 20, 14, + 14, 17, 16, 9, 16, 17, 65, 6, 9, 4, 2, 70, 1, + 68, 66, 0, 66, 66, 0, 68, 80, 68, 72, 75, 73, + 82, 8, 12, 6, 5, 1, 64, 64, 64, 71, 79, 77, 80, + 91, 88, 102, 68, 68, 84, 2, 66, 69, 71, 77, 82, + 77, 79, 84, 76, 81, 92, 85, 90, 64, 23, 18, 11, + 5, 8, 1, 67, 66, 69, 4, 34, 21, 15, 8, 16, 6, 4, + 65, 3, 4, 40, 29, 24, 17, 20, 5, 64, 68, 70, 70, + 42, 23, 8, 3, 9, 66, 75, 75, 5, 43, 30, 18, 11, + 20, 5, 64, 67, 69, 62, 80, 75, 67, 73, 75, 70, + 67, 65, 68, 66, 0, 2, 76, 71, 81, 75, 12, 76, + 84, 72, 65, 68, 73, 75, 70, 73, 79, 83, 86, 3, + 4, 14, 1, 67, 5, 5, 5, 66, 0, 2, 1, 74, 70, 75, + 65, 8, 81, 0, 15, 66, 5, 2, 6, 17, 65, 64, 6, + 12, 75, 76, 93, 25, 28, 35, 18, 14, 21, 16, 16, + 19, 11, 15, 15, 2, 2, 69, 0, 6, 67, 82, 64, 65, + 67, 3, 65, 6, 9, 65, 3, 6, 11, 9, 7, 5, 73, 68, + 68, 82, 74, 81, 81, 94, 91, 96, 98, 75, 74, 97, + 64, 64, 73, 84, 77, 78, 84, 78, 83, 86, 93, 95, + 103, 107, 0, 72, 86, 71, 6, 4, 11, 13, 15, 27, + 18, 24, 26, 40, 30, 28, 41, 39, 48, 22, 9, 68, + 77, 90, 103, 114, 126, 9, 42, 35, 34, 25, 31, + 15, 11, 13, 3, 75, 69, 6, 67, 16, 25, 66, 0, 10, + 14, 4, 13, 25, 4, 1, 36, 14, 68, 87, 100, 122, + 126, 126, 126 }, + + { + + 38, + 5, 80, 38, 5, 80, 71, 5, 22, 11, 65, 79, 15, 21, + 52, 14, 19, 67, 11, 20, 64, 73, 65, 84, 100, 2, + 5, 119, 122, 122, 28, 64, 69, 11, 20, 64, 74, 2, + 14, 0, 0, 69, 74, 67, 81, 76, 92, 5, 70, 74, 64, + 77, 75, 88, 10, 64, 66, 70, 5, 3, 22, 0, 0, 0, + 65, 94, 97, 5, 67, 68, 18, 70, 90, 72, 21, 12, + 42, 37, 9, 12, 66, 14, 66, 67, 69, 78, 79, 82, + 82, 23, 0, 8, 75, 68, 74, 71, 80, 64, 72, 71, + 81, 17, 68, 2, 72, 81, 66, 73, 0, 5, 3, 1, 11, + 9, 73, 65, 1, 67, 2, 72, 6, 72, 0, 6, 2, 15, 8, + 1, 73, 4, 69, 72, 67, 86, 71, 6, 0, 6, 7, 7, 26, + 13, 3, 65, 6, 65, 67, 4, 94, 70, 0, 81, 64, 71, + 9, 3, 70, 13, 6, 4, 77, 9, 71, 76, 3, 87, 13, + 19, 13, 14, 17, 15, 8, 16, 16, 66, 5, 9, 3, 1, + 71, 0, 69, 67, 64, 67, 67, 64, 69, 80, 69, 72, + 76, 75, 82, 6, 10, 4, 3, 64, 67, 66, 66, 73, 81, + 79, 82, 94, 90, 104, 69, 69, 85, 1, 68, 71, 73, + 79, 84, 79, 81, 85, 77, 82, 92, 85, 90, 64, 23, + 18, 11, 5, 8, 1, 67, 66, 68, 4, 33, 21, 15, 8, + 16, 6, 4, 65, 4, 3, 40, 29, 23, 16, 20, 5, 64, + 68, 70, 70, 42, 22, 7, 2, 9, 66, 75, 75, 5, 42, + 29, 17, 10, 19, 5, 64, 67, 69, 62, 80, 74, 67, + 72, 74, 70, 66, 64, 67, 65, 1, 3, 76, 71, 81, + 75, 13, 76, 85, 72, 65, 68, 74, 76, 71, 74, 80, + 85, 88, 2, 3, 14, 0, 68, 4, 4, 4, 67, 64, 1, 0, + 75, 71, 76, 65, 8, 82, 64, 14, 67, 4, 1, 5, 17, + 66, 66, 5, 11, 76, 77, 95, 24, 27, 34, 16, 12, + 19, 14, 14, 16, 8, 12, 12, 64, 64, 71, 66, 2, + 70, 87, 67, 68, 69, 1, 67, 6, 9, 65, 4, 8, 9, 7, + 5, 3, 76, 70, 70, 85, 76, 83, 83, 96, 93, 97, + 99, 76, 75, 99, 66, 66, 75, 86, 79, 80, 86, 80, + 85, 88, 95, 97, 105, 108, 64, 73, 87, 71, 6, 4, + 11, 13, 15, 28, 18, 25, 26, 41, 31, 29, 42, 40, + 47, 20, 6, 71, 80, 93, 107, 117, 126, 9, 42, 35, + 34, 25, 31, 15, 11, 13, 3, 75, 69, 6, 67, 17, + 26, 66, 0, 10, 14, 4, 13, 25, 4, 0, 34, 11, 71, + 90, 104, 126, 126, 126, 126 }, + + { + + 37, + 5, 80, 37, 5, 80, 69, 7, 23, 12, 65, 80, 14, 20, + 52, 14, 22, 67, 12, 21, 64, 73, 64, 85, 101, 2, + 4, 120, 123, 122, 31, 1, 69, 12, 21, 64, 73, 4, + 15, 0, 1, 68, 73, 67, 81, 75, 92, 5, 69, 73, 64, + 77, 74, 88, 11, 0, 65, 69, 6, 3, 22, 0, 0, 0, + 64, 94, 97, 6, 67, 68, 18, 69, 89, 70, 23, 14, + 44, 39, 11, 13, 64, 16, 64, 66, 67, 77, 78, 81, + 81, 24, 1, 9, 74, 67, 73, 70, 78, 64, 72, 71, + 81, 18, 68, 3, 71, 80, 65, 72, 1, 5, 3, 2, 11, + 9, 72, 65, 2, 67, 2, 71, 6, 72, 0, 6, 1, 15, 8, + 1, 72, 5, 68, 72, 67, 85, 71, 6, 0, 7, 7, 7, 27, + 14, 4, 65, 7, 65, 67, 4, 94, 70, 0, 81, 64, 72, + 9, 3, 70, 14, 7, 4, 78, 9, 71, 75, 3, 87, 13, + 19, 13, 14, 17, 15, 8, 16, 16, 66, 5, 9, 3, 1, + 71, 0, 69, 67, 64, 67, 67, 64, 69, 79, 69, 71, + 76, 76, 81, 5, 9, 3, 2, 66, 69, 67, 67, 75, 82, + 80, 83, 96, 91, 105, 69, 69, 85, 0, 69, 72, 74, + 80, 85, 80, 82, 85, 77, 82, 91, 84, 89, 0, 24, + 18, 11, 6, 9, 2, 66, 65, 66, 5, 33, 21, 15, 8, + 17, 7, 5, 64, 6, 3, 41, 30, 23, 16, 21, 5, 64, + 67, 69, 70, 43, 22, 7, 2, 10, 65, 74, 74, 5, 42, + 29, 17, 10, 19, 5, 0, 66, 68, 62, 79, 73, 66, + 70, 72, 69, 64, 1, 65, 0, 3, 5, 75, 70, 80, 74, + 15, 75, 85, 71, 64, 67, 74, 76, 71, 74, 80, 86, + 89, 2, 3, 15, 0, 68, 4, 4, 4, 67, 64, 1, 0, 75, + 71, 76, 64, 9, 82, 64, 14, 67, 4, 1, 5, 18, 66, + 67, 5, 11, 76, 77, 96, 24, 27, 34, 15, 11, 18, + 12, 12, 14, 6, 10, 10, 66, 66, 72, 68, 64, 73, + 91, 69, 70, 70, 0, 68, 6, 10, 64, 6, 11, 8, 6, + 4, 2, 78, 71, 71, 87, 77, 84, 84, 97, 94, 98, + 99, 76, 75, 100, 67, 67, 76, 87, 80, 81, 87, 81, + 86, 89, 96, 98, 106, 108, 64, 73, 87, 70, 7, 5, + 12, 14, 16, 30, 19, 26, 27, 43, 33, 30, 44, 42, + 47, 19, 4, 73, 82, 95, 110, 119, 126, 10, 43, + 36, 35, 26, 32, 16, 12, 14, 4, 74, 68, 7, 66, + 19, 28, 65, 1, 11, 15, 5, 14, 26, 4, 0, 33, 9, + 73, 92, 107, 126, 126, 126, 126 }, + + { + + 36, + 5, 80, 36, 5, 80, 67, 8, 23, 12, 65, 81, 13, 19, + 52, 14, 24, 67, 13, 22, 64, 74, 64, 86, 102, 1, + 2, 122, 124, 123, 34, 2, 69, 13, 22, 64, 73, 5, + 15, 64, 1, 68, 72, 67, 81, 75, 92, 5, 69, 72, + 64, 77, 74, 88, 11, 0, 65, 69, 6, 3, 22, 0, 0, + 0, 0, 94, 97, 6, 68, 68, 18, 69, 89, 68, 25, 16, + 45, 41, 12, 14, 0, 17, 0, 65, 65, 77, 78, 81, + 80, 24, 1, 9, 73, 67, 72, 70, 77, 64, 72, 71, + 81, 18, 68, 3, 71, 80, 65, 72, 1, 5, 3, 2, 11, + 9, 72, 65, 2, 67, 2, 71, 5, 72, 0, 5, 0, 14, 7, + 0, 71, 5, 68, 73, 68, 85, 72, 6, 0, 7, 7, 7, 28, + 14, 4, 65, 7, 65, 68, 4, 95, 70, 0, 82, 64, 73, + 9, 3, 71, 14, 7, 4, 79, 9, 72, 75, 3, 88, 12, + 19, 13, 14, 17, 15, 8, 16, 16, 66, 5, 9, 3, 1, + 71, 0, 69, 67, 64, 67, 68, 65, 70, 79, 69, 71, + 77, 77, 81, 3, 7, 1, 0, 68, 71, 69, 69, 77, 84, + 82, 85, 98, 93, 106, 69, 69, 86, 64, 70, 73, 75, + 82, 86, 81, 83, 86, 78, 83, 91, 84, 88, 0, 24, + 18, 11, 6, 9, 2, 66, 65, 65, 6, 33, 21, 15, 8, + 17, 7, 6, 0, 8, 3, 41, 30, 23, 16, 21, 5, 64, + 67, 68, 70, 43, 22, 7, 2, 10, 65, 74, 73, 5, 41, + 28, 16, 9, 19, 5, 0, 66, 68, 62, 78, 72, 65, 69, + 71, 68, 0, 2, 64, 1, 4, 7, 74, 69, 80, 73, 16, + 75, 86, 71, 0, 67, 74, 77, 71, 75, 81, 87, 90, + 1, 3, 15, 0, 69, 3, 4, 4, 68, 65, 1, 0, 76, 71, + 76, 64, 9, 82, 65, 13, 68, 4, 0, 4, 18, 67, 68, + 5, 11, 77, 78, 98, 23, 26, 33, 14, 10, 17, 10, + 10, 12, 4, 7, 7, 69, 68, 73, 71, 67, 76, 95, 72, + 72, 72, 64, 69, 6, 11, 64, 7, 13, 6, 4, 2, 0, + 80, 73, 73, 89, 78, 86, 85, 99, 95, 99, 100, 77, + 75, 101, 68, 68, 78, 89, 82, 83, 88, 83, 88, 90, + 97, 99, 107, 109, 65, 74, 88, 70, 7, 5, 13, 14, + 17, 31, 20, 27, 28, 44, 34, 31, 45, 43, 46, 17, + 2, 75, 85, 98, 113, 122, 126, 10, 43, 36, 35, + 26, 33, 16, 12, 14, 4, 74, 68, 8, 66, 20, 29, + 65, 2, 12, 16, 5, 14, 27, 4, 0, 32, 7, 75, 95, + 110, 126, 126, 126, 126 }, + + { + + 35, + 5, 80, 35, 5, 80, 65, 10, 24, 12, 66, 83, 11, + 18, 52, 14, 26, 67, 14, 23, 64, 75, 64, 87, 103, + 1, 0, 123, 125, 123, 37, 3, 69, 14, 23, 64, 72, + 6, 16, 64, 1, 67, 71, 68, 82, 75, 92, 5, 69, 72, + 64, 77, 74, 88, 12, 0, 65, 69, 7, 3, 22, 0, 0, + 0, 0, 94, 97, 7, 69, 68, 18, 69, 89, 66, 27, 17, + 47, 43, 13, 15, 2, 18, 1, 64, 0, 76, 78, 80, 79, + 24, 1, 9, 73, 67, 72, 69, 75, 64, 73, 72, 82, + 18, 68, 3, 70, 80, 64, 72, 2, 5, 3, 2, 11, 8, + 72, 65, 2, 67, 2, 71, 4, 72, 0, 4, 64, 13, 6, + 64, 71, 6, 68, 74, 68, 84, 72, 6, 0, 7, 7, 7, + 29, 14, 4, 65, 8, 65, 69, 4, 96, 70, 0, 82, 65, + 74, 9, 3, 72, 14, 7, 4, 81, 9, 73, 75, 3, 88, + 11, 19, 13, 14, 17, 15, 8, 16, 16, 66, 4, 9, 3, + 1, 71, 64, 70, 68, 65, 68, 69, 66, 70, 79, 70, + 71, 78, 79, 81, 2, 5, 64, 65, 70, 73, 71, 71, + 79, 86, 83, 86, 100, 95, 107, 69, 69, 86, 65, + 71, 74, 77, 83, 88, 82, 84, 87, 78, 83, 91, 84, + 88, 1, 24, 18, 11, 6, 9, 2, 66, 64, 64, 6, 33, + 21, 15, 8, 18, 8, 7, 1, 10, 3, 41, 30, 23, 16, + 22, 5, 64, 67, 68, 70, 43, 22, 6, 2, 10, 65, 74, + 72, 5, 41, 28, 15, 8, 19, 5, 0, 65, 67, 62, 77, + 71, 65, 68, 70, 67, 2, 3, 0, 2, 6, 8, 74, 68, + 80, 73, 18, 75, 86, 71, 0, 67, 75, 77, 71, 76, + 82, 88, 91, 1, 2, 15, 64, 69, 3, 3, 4, 69, 65, + 0, 0, 77, 71, 77, 64, 9, 83, 65, 13, 69, 4, 64, + 4, 18, 68, 69, 4, 10, 78, 78, 99, 22, 25, 33, + 13, 9, 15, 8, 8, 10, 1, 5, 5, 72, 71, 74, 74, + 70, 79, 99, 75, 75, 74, 65, 70, 6, 12, 64, 8, + 15, 4, 2, 0, 65, 82, 75, 74, 91, 80, 87, 86, + 100, 97, 100, 101, 77, 76, 102, 69, 70, 79, 91, + 84, 85, 90, 84, 90, 92, 98, 101, 109, 110, 66, + 75, 89, 70, 8, 5, 13, 15, 17, 32, 21, 28, 29, + 45, 35, 32, 47, 44, 46, 16, 0, 78, 87, 101, 116, + 125, 126, 11, 44, 37, 36, 26, 34, 17, 12, 15, 5, + 74, 67, 8, 65, 21, 30, 65, 2, 12, 16, 5, 15, 28, + 4, 0, 31, 5, 78, 98, 113, 126, 126, 126, 126 }, + + { + + 33, + 5, 80, 33, 5, 80, 64, 11, 24, 12, 66, 84, 10, + 16, 52, 14, 29, 68, 15, 23, 64, 76, 64, 88, 104, + 0, 65, 125, 126, 124, 40, 4, 69, 15, 23, 64, 72, + 7, 16, 65, 1, 67, 71, 68, 82, 74, 92, 5, 69, 71, + 65, 78, 74, 88, 12, 1, 65, 68, 7, 3, 22, 0, 0, + 0, 1, 95, 97, 7, 70, 68, 18, 69, 89, 65, 28, 19, + 48, 44, 14, 16, 3, 20, 2, 0, 2, 76, 77, 80, 78, + 24, 1, 9, 72, 67, 71, 69, 74, 65, 73, 72, 82, + 18, 68, 3, 70, 80, 64, 72, 2, 5, 2, 2, 11, 8, + 72, 65, 2, 67, 2, 71, 4, 73, 0, 4, 65, 12, 5, + 65, 70, 6, 68, 75, 69, 84, 73, 6, 64, 7, 7, 7, + 30, 15, 4, 66, 8, 66, 70, 3, 97, 70, 0, 83, 65, + 75, 8, 2, 72, 15, 7, 4, 82, 8, 74, 75, 3, 89, + 11, 19, 13, 14, 17, 15, 8, 16, 15, 66, 4, 9, 3, + 1, 72, 64, 70, 68, 65, 68, 69, 67, 71, 79, 70, + 71, 79, 80, 81, 0, 3, 66, 67, 72, 75, 73, 73, + 81, 88, 85, 88, 103, 97, 108, 69, 70, 87, 66, + 72, 75, 78, 85, 89, 83, 86, 87, 79, 84, 91, 84, + 87, 1, 24, 18, 11, 6, 9, 2, 66, 64, 0, 7, 33, + 21, 15, 8, 18, 8, 7, 1, 12, 3, 41, 30, 23, 16, + 22, 5, 64, 67, 67, 70, 43, 21, 6, 2, 10, 65, 74, + 72, 5, 40, 27, 14, 7, 19, 5, 0, 65, 67, 62, 76, + 70, 64, 66, 69, 66, 3, 5, 1, 4, 7, 10, 73, 68, + 79, 72, 19, 74, 87, 71, 1, 67, 75, 78, 72, 77, + 83, 89, 92, 0, 2, 15, 64, 70, 2, 3, 4, 70, 66, + 0, 64, 78, 72, 77, 64, 9, 83, 66, 12, 70, 4, 65, + 3, 18, 69, 70, 4, 10, 79, 79, 101, 21, 25, 32, + 12, 7, 14, 6, 6, 8, 64, 2, 2, 75, 73, 76, 77, + 74, 82, 103, 77, 77, 76, 66, 72, 6, 13, 0, 9, + 17, 3, 0, 65, 66, 84, 77, 76, 93, 81, 89, 88, + 102, 98, 101, 102, 78, 76, 103, 71, 71, 81, 92, + 86, 87, 91, 86, 92, 93, 99, 102, 110, 110, 67, + 75, 90, 70, 8, 6, 14, 15, 18, 33, 21, 29, 30, + 46, 36, 33, 48, 45, 45, 14, 65, 80, 90, 104, + 119, 126, 126, 11, 44, 37, 36, 27, 34, 17, 13, + 15, 5, 73, 67, 9, 65, 22, 31, 65, 3, 13, 17, 5, + 15, 29, 4, 0, 29, 3, 80, 101, 116, 126, 126, + 126, 126 }, + + { + + 32, + 5, 80, 32, 5, 80, 1, 13, 24, 12, 67, 86, 8, 15, + 52, 14, 31, 68, 16, 24, 64, 76, 0, 89, 105, 0, + 67, 126, 126, 124, 43, 6, 69, 16, 24, 64, 72, 8, + 16, 65, 1, 67, 70, 68, 83, 74, 92, 5, 69, 70, + 65, 78, 74, 88, 13, 1, 64, 68, 8, 3, 22, 0, 0, + 0, 1, 95, 97, 8, 71, 68, 18, 68, 89, 0, 30, 20, + 50, 46, 16, 17, 4, 21, 3, 1, 4, 75, 77, 79, 77, + 24, 1, 9, 72, 67, 71, 68, 72, 65, 73, 72, 82, + 18, 68, 4, 69, 80, 64, 72, 3, 5, 2, 2, 11, 8, + 71, 65, 3, 67, 2, 71, 3, 73, 0, 3, 66, 12, 4, + 66, 69, 7, 68, 76, 69, 83, 73, 6, 64, 7, 7, 7, + 31, 15, 4, 66, 8, 66, 71, 3, 98, 70, 0, 83, 66, + 76, 8, 2, 73, 15, 7, 4, 83, 8, 75, 75, 3, 89, + 10, 19, 13, 14, 17, 15, 8, 16, 15, 66, 4, 9, 3, + 1, 72, 64, 70, 69, 65, 68, 70, 68, 71, 79, 71, + 71, 80, 81, 81, 64, 1, 67, 68, 74, 77, 75, 75, + 83, 90, 87, 90, 105, 98, 109, 69, 70, 87, 67, + 73, 76, 79, 86, 91, 84, 87, 88, 79, 84, 91, 84, + 87, 1, 24, 18, 11, 6, 9, 3, 65, 0, 1, 7, 33, 21, + 15, 8, 18, 9, 8, 2, 14, 3, 41, 30, 23, 16, 22, + 5, 64, 67, 66, 70, 44, 21, 5, 2, 10, 65, 74, 71, + 5, 40, 26, 13, 6, 19, 5, 0, 65, 66, 62, 75, 69, + 0, 65, 68, 65, 4, 6, 2, 5, 9, 12, 72, 67, 79, + 72, 20, 74, 87, 70, 2, 67, 76, 78, 72, 77, 84, + 90, 93, 64, 1, 15, 65, 70, 2, 3, 4, 71, 67, 0, + 64, 79, 72, 77, 64, 9, 83, 67, 12, 70, 4, 66, 2, + 18, 70, 71, 3, 9, 80, 80, 103, 20, 24, 32, 11, + 6, 12, 4, 4, 6, 67, 64, 0, 78, 75, 77, 80, 77, + 85, 107, 80, 80, 78, 67, 73, 6, 14, 0, 10, 19, + 1, 64, 67, 68, 86, 79, 77, 95, 82, 91, 89, 104, + 100, 102, 103, 78, 76, 104, 72, 73, 83, 94, 87, + 88, 93, 87, 93, 95, 100, 103, 111, 111, 67, 76, + 91, 69, 9, 6, 14, 16, 19, 34, 22, 30, 31, 48, + 37, 34, 49, 47, 45, 12, 67, 83, 92, 107, 122, + 126, 126, 11, 45, 38, 37, 27, 35, 17, 13, 16, 6, + 73, 67, 10, 64, 23, 32, 64, 4, 13, 17, 5, 16, + 30, 4, 0, 28, 1, 83, 104, 119, 126, 126, 126, + 126 }, + + { + + 31, + 5, 81, 31, 5, 81, 3, 14, 25, 12, 67, 87, 7, 14, + 52, 14, 33, 68, 16, 25, 64, 77, 0, 90, 106, 64, + 68, 126, 126, 125, 46, 7, 69, 16, 25, 64, 71, 9, + 17, 66, 2, 66, 69, 69, 83, 74, 92, 5, 68, 70, + 65, 78, 73, 88, 13, 1, 64, 68, 8, 3, 22, 0, 0, + 0, 2, 95, 97, 8, 71, 69, 18, 68, 88, 2, 32, 22, + 51, 48, 17, 18, 6, 22, 4, 1, 5, 75, 77, 79, 77, + 25, 1, 9, 71, 66, 70, 68, 71, 65, 74, 73, 83, + 18, 68, 4, 69, 80, 0, 72, 3, 5, 2, 3, 11, 7, 71, + 65, 3, 67, 2, 71, 2, 73, 0, 2, 67, 11, 4, 66, + 69, 7, 68, 76, 70, 83, 74, 6, 64, 7, 7, 7, 31, + 15, 4, 66, 9, 66, 72, 3, 98, 71, 0, 84, 66, 77, + 8, 2, 74, 15, 7, 4, 85, 8, 76, 75, 3, 90, 9, 19, + 12, 14, 17, 15, 8, 16, 15, 66, 3, 9, 2, 1, 72, + 65, 71, 69, 66, 69, 71, 69, 72, 79, 71, 70, 81, + 83, 81, 66, 64, 69, 70, 76, 79, 77, 77, 85, 92, + 88, 91, 107, 100, 110, 69, 70, 88, 68, 74, 77, + 81, 88, 92, 85, 88, 89, 80, 85, 91, 84, 86, 2, + 24, 18, 11, 7, 9, 3, 65, 0, 2, 8, 32, 21, 15, 8, + 19, 9, 9, 3, 16, 3, 42, 30, 23, 16, 23, 5, 64, + 66, 66, 70, 44, 21, 5, 2, 10, 64, 73, 70, 5, 39, + 26, 12, 6, 19, 5, 1, 64, 66, 62, 75, 68, 0, 64, + 67, 64, 6, 7, 3, 6, 10, 13, 72, 66, 79, 71, 22, + 74, 88, 70, 2, 67, 76, 79, 72, 78, 85, 91, 94, + 64, 1, 15, 65, 71, 1, 2, 4, 71, 67, 64, 64, 79, + 72, 78, 64, 9, 84, 67, 11, 71, 3, 67, 2, 19, 70, + 72, 3, 9, 80, 80, 104, 19, 23, 31, 10, 5, 11, 2, + 2, 4, 69, 66, 66, 81, 78, 78, 82, 80, 88, 111, + 83, 82, 80, 68, 74, 6, 15, 0, 12, 22, 64, 66, + 69, 70, 88, 81, 79, 97, 84, 92, 90, 105, 101, + 103, 104, 79, 77, 105, 73, 74, 84, 96, 89, 90, + 94, 89, 95, 96, 102, 105, 113, 112, 68, 77, 92, + 69, 9, 6, 15, 16, 19, 36, 23, 31, 32, 49, 39, + 35, 51, 48, 44, 11, 69, 85, 95, 109, 125, 126, + 126, 12, 45, 38, 37, 27, 36, 18, 13, 16, 6, 73, + 66, 10, 64, 24, 33, 64, 4, 14, 18, 5, 16, 31, 4, + 0, 27, 64, 85, 107, 123, 126, 126, 126, 126 }, + + { + + 30, + 5, 81, 30, 5, 81, 5, 16, 25, 12, 68, 89, 5, 12, + 52, 14, 36, 69, 17, 25, 64, 78, 0, 91, 107, 64, + 70, 126, 126, 125, 49, 8, 69, 17, 25, 64, 71, + 10, 17, 66, 2, 66, 69, 69, 84, 73, 92, 5, 68, + 69, 66, 78, 73, 88, 14, 2, 64, 67, 9, 3, 22, 0, + 0, 0, 2, 95, 97, 9, 72, 69, 18, 68, 88, 3, 34, + 23, 53, 50, 18, 19, 7, 24, 5, 2, 7, 74, 76, 78, + 76, 25, 1, 9, 71, 66, 70, 67, 69, 66, 74, 73, + 83, 18, 68, 4, 68, 80, 0, 72, 4, 5, 1, 3, 11, 7, + 71, 65, 3, 67, 2, 71, 2, 73, 0, 2, 68, 10, 3, + 67, 68, 8, 68, 77, 70, 82, 74, 6, 65, 7, 7, 7, + 32, 16, 4, 66, 9, 66, 73, 3, 99, 71, 0, 84, 67, + 78, 7, 2, 74, 16, 7, 4, 86, 7, 77, 75, 3, 90, 9, + 19, 12, 14, 17, 15, 8, 16, 15, 66, 3, 9, 2, 1, + 72, 65, 71, 70, 66, 69, 71, 70, 72, 79, 72, 70, + 82, 84, 81, 67, 66, 71, 72, 78, 81, 79, 79, 87, + 94, 90, 93, 110, 102, 111, 69, 71, 88, 69, 75, + 78, 82, 89, 94, 86, 89, 89, 80, 85, 91, 84, 86, + 2, 24, 18, 11, 7, 9, 3, 65, 1, 3, 8, 32, 21, 15, + 8, 19, 10, 10, 3, 18, 3, 42, 30, 23, 16, 23, 5, + 64, 66, 65, 70, 44, 20, 4, 2, 10, 64, 73, 70, 5, + 39, 25, 11, 5, 19, 5, 1, 64, 65, 62, 74, 67, 1, + 1, 66, 0, 7, 9, 4, 8, 12, 15, 71, 65, 78, 71, + 23, 73, 88, 70, 3, 67, 77, 79, 73, 79, 86, 92, + 95, 65, 0, 15, 66, 71, 1, 2, 4, 72, 68, 64, 65, + 80, 72, 78, 64, 9, 84, 68, 11, 72, 3, 68, 1, 19, + 71, 73, 2, 8, 81, 81, 106, 18, 23, 31, 9, 3, 9, + 0, 0, 2, 72, 69, 68, 84, 80, 80, 85, 84, 91, + 115, 85, 85, 82, 69, 75, 6, 16, 1, 13, 24, 65, + 68, 71, 71, 90, 83, 80, 99, 85, 94, 92, 107, + 103, 104, 105, 79, 77, 106, 75, 76, 86, 97, 91, + 92, 96, 90, 97, 98, 103, 106, 114, 112, 69, 77, + 93, 69, 10, 7, 15, 17, 20, 37, 24, 32, 33, 50, + 40, 36, 52, 49, 44, 9, 71, 88, 97, 112, 126, + 126, 126, 12, 46, 39, 38, 28, 37, 18, 14, 17, 7, + 72, 66, 11, 0, 25, 34, 64, 5, 14, 18, 5, 17, 32, + 4, 0, 25, 66, 88, 110, 126, 126, 126, 126, 126 }, + + { + + 28, + 4, 81, 28, 4, 81, 6, 17, 25, 12, 68, 90, 4, 11, + 52, 14, 38, 69, 18, 26, 64, 79, 0, 92, 109, 65, + 72, 126, 126, 126, 51, 9, 69, 18, 26, 64, 71, + 11, 17, 67, 2, 66, 68, 70, 84, 73, 93, 5, 68, + 69, 66, 79, 73, 88, 14, 2, 64, 67, 9, 3, 22, 0, + 0, 0, 3, 96, 97, 9, 73, 69, 18, 68, 88, 5, 35, + 25, 54, 51, 19, 20, 8, 25, 6, 3, 9, 74, 76, 78, + 75, 25, 1, 9, 70, 66, 69, 67, 68, 66, 75, 74, + 84, 18, 68, 4, 68, 80, 0, 72, 4, 4, 1, 3, 11, 6, + 71, 65, 3, 67, 1, 71, 1, 74, 0, 1, 70, 9, 2, 68, + 68, 8, 68, 78, 71, 82, 75, 5, 65, 7, 7, 7, 33, + 16, 4, 67, 9, 67, 74, 2, 100, 71, 0, 85, 67, 79, + 7, 1, 75, 16, 7, 4, 88, 7, 78, 75, 3, 91, 8, 18, + 12, 14, 17, 14, 7, 16, 14, 67, 2, 9, 2, 0, 73, + 66, 72, 70, 67, 70, 72, 71, 73, 79, 72, 70, 83, + 86, 81, 69, 68, 73, 74, 80, 84, 81, 81, 89, 96, + 92, 95, 112, 104, 112, 69, 71, 89, 70, 77, 80, + 84, 91, 95, 88, 91, 90, 81, 86, 91, 84, 85, 2, + 24, 18, 11, 7, 9, 3, 65, 1, 4, 9, 32, 21, 15, 8, + 19, 10, 10, 4, 19, 3, 42, 30, 23, 15, 23, 5, 64, + 66, 65, 70, 44, 20, 4, 2, 10, 64, 73, 69, 5, 38, + 24, 10, 4, 18, 5, 1, 64, 65, 62, 73, 66, 1, 2, + 65, 0, 8, 10, 5, 9, 13, 16, 71, 65, 78, 70, 24, + 73, 89, 70, 3, 67, 77, 80, 73, 80, 87, 94, 96, + 66, 0, 15, 66, 72, 0, 1, 3, 73, 69, 65, 65, 81, + 73, 79, 64, 9, 85, 69, 10, 73, 3, 69, 0, 19, 72, + 74, 2, 8, 82, 82, 108, 17, 22, 30, 7, 2, 8, 65, + 65, 64, 74, 72, 71, 87, 83, 81, 88, 87, 94, 119, + 88, 87, 84, 71, 77, 6, 16, 1, 14, 26, 67, 70, + 73, 73, 93, 85, 82, 101, 87, 96, 93, 109, 104, + 105, 106, 80, 78, 107, 76, 77, 88, 99, 93, 94, + 97, 92, 99, 99, 104, 108, 116, 113, 70, 78, 94, + 69, 10, 7, 16, 17, 20, 38, 24, 33, 34, 51, 41, + 37, 53, 50, 43, 7, 73, 90, 100, 115, 126, 126, + 126, 12, 46, 39, 38, 28, 37, 18, 14, 17, 7, 72, + 66, 11, 0, 26, 35, 64, 5, 15, 19, 5, 17, 32, 4, + 64, 24, 68, 90, 113, 126, 126, 126, 126, 126 }, + + { + + 27, + 4, 81, 27, 4, 81, 8, 18, 26, 12, 68, 91, 3, 10, + 52, 14, 40, 69, 19, 27, 64, 79, 1, 93, 110, 66, + 74, 126, 126, 126, 54, 11, 69, 19, 27, 64, 70, + 12, 18, 68, 2, 65, 67, 70, 84, 73, 93, 5, 68, + 68, 66, 79, 73, 88, 14, 2, 0, 67, 9, 3, 22, 0, + 0, 0, 4, 96, 97, 9, 74, 69, 18, 67, 88, 7, 37, + 27, 55, 53, 21, 21, 10, 26, 8, 4, 11, 74, 76, + 78, 74, 25, 1, 9, 69, 66, 68, 66, 67, 66, 75, + 74, 84, 18, 68, 5, 67, 79, 1, 72, 4, 4, 1, 3, + 11, 6, 70, 65, 4, 67, 1, 70, 0, 74, 0, 0, 71, 9, + 1, 69, 67, 8, 67, 79, 72, 82, 76, 5, 65, 8, 7, + 7, 34, 16, 4, 67, 10, 67, 74, 2, 101, 71, 0, 86, + 67, 80, 7, 1, 76, 16, 7, 4, 89, 7, 78, 75, 3, + 92, 7, 18, 12, 14, 17, 14, 7, 16, 14, 67, 2, 9, + 2, 0, 73, 66, 72, 70, 67, 70, 73, 71, 73, 79, + 72, 70, 84, 87, 81, 71, 69, 74, 75, 82, 86, 82, + 82, 91, 98, 93, 96, 114, 105, 113, 69, 71, 90, + 71, 78, 81, 85, 92, 96, 89, 92, 91, 82, 87, 91, + 83, 84, 3, 25, 18, 11, 7, 10, 4, 64, 2, 5, 10, + 32, 21, 15, 8, 20, 10, 11, 5, 21, 3, 42, 30, 23, + 15, 24, 5, 64, 66, 64, 70, 45, 20, 4, 2, 11, 64, + 73, 68, 5, 38, 24, 10, 3, 18, 5, 1, 0, 64, 62, + 72, 65, 2, 3, 0, 1, 10, 11, 7, 10, 14, 18, 70, + 64, 78, 69, 26, 73, 90, 69, 4, 67, 77, 81, 73, + 80, 88, 95, 97, 66, 0, 15, 66, 72, 64, 1, 3, 74, + 69, 65, 65, 82, 73, 79, 0, 10, 85, 69, 9, 73, 3, + 69, 0, 19, 73, 75, 2, 8, 83, 82, 109, 17, 21, + 29, 6, 1, 7, 67, 67, 66, 76, 74, 74, 89, 85, 82, + 91, 90, 97, 123, 91, 89, 85, 72, 78, 6, 17, 1, + 15, 28, 69, 71, 75, 75, 95, 86, 83, 103, 88, 97, + 94, 110, 105, 106, 106, 80, 78, 108, 77, 78, 89, + 101, 94, 95, 98, 93, 100, 100, 105, 109, 117, + 114, 70, 79, 94, 68, 10, 7, 17, 18, 21, 39, 25, + 34, 35, 53, 42, 38, 55, 52, 42, 6, 75, 92, 103, + 118, 126, 126, 126, 13, 46, 39, 39, 28, 38, 19, + 14, 18, 8, 72, 65, 12, 0, 27, 37, 0, 6, 16, 20, + 5, 18, 33, 4, 64, 23, 70, 92, 115, 126, 126, + 126, 126, 126 }, + + { + + 26, + 4, 81, 26, 4, 81, 10, 20, 26, 12, 69, 93, 1, 8, + 52, 14, 43, 70, 20, 27, 64, 80, 1, 94, 111, 66, + 76, 126, 126, 126, 57, 12, 69, 20, 27, 64, 70, + 13, 18, 68, 2, 65, 67, 70, 85, 72, 93, 5, 68, + 67, 67, 79, 73, 88, 15, 3, 0, 66, 10, 3, 22, 0, + 0, 0, 4, 96, 97, 10, 75, 69, 18, 67, 88, 8, 39, + 28, 57, 55, 22, 22, 11, 28, 9, 5, 13, 73, 75, + 77, 73, 25, 1, 9, 69, 66, 68, 66, 65, 67, 75, + 74, 84, 18, 68, 5, 67, 79, 1, 72, 5, 4, 0, 3, + 11, 6, 70, 65, 4, 67, 1, 70, 0, 74, 0, 0, 72, 8, + 0, 70, 66, 9, 67, 80, 72, 81, 76, 5, 66, 8, 7, + 7, 35, 17, 4, 67, 10, 67, 75, 2, 102, 71, 0, 86, + 68, 81, 6, 1, 76, 17, 7, 4, 90, 6, 79, 75, 3, + 92, 7, 18, 12, 14, 17, 14, 7, 16, 14, 67, 2, 9, + 2, 0, 73, 66, 72, 71, 67, 70, 73, 72, 74, 79, + 73, 70, 85, 88, 81, 72, 71, 76, 77, 84, 88, 84, + 84, 93, 100, 95, 98, 117, 107, 114, 69, 72, 90, + 72, 79, 82, 86, 94, 98, 90, 93, 91, 82, 87, 91, + 83, 84, 3, 25, 18, 11, 7, 10, 4, 64, 2, 6, 10, + 32, 21, 15, 8, 20, 11, 12, 5, 23, 3, 42, 30, 23, + 15, 24, 5, 64, 66, 0, 70, 45, 19, 3, 2, 11, 64, + 73, 68, 5, 37, 23, 9, 2, 18, 5, 1, 0, 64, 62, + 71, 64, 3, 5, 1, 2, 11, 13, 8, 12, 16, 20, 69, + 0, 77, 69, 27, 72, 90, 69, 5, 67, 78, 81, 74, + 81, 89, 96, 98, 67, 64, 15, 67, 73, 64, 1, 3, + 75, 70, 65, 66, 83, 73, 79, 0, 10, 85, 70, 9, + 74, 3, 70, 64, 19, 74, 76, 1, 7, 84, 83, 111, + 16, 21, 29, 5, 64, 5, 69, 69, 68, 79, 77, 76, + 92, 87, 84, 94, 94, 100, 126, 93, 92, 87, 73, + 79, 6, 18, 2, 16, 30, 70, 73, 77, 76, 97, 88, + 85, 105, 89, 99, 96, 112, 107, 107, 107, 81, 78, + 109, 79, 80, 91, 102, 96, 97, 100, 95, 102, 102, + 106, 110, 118, 114, 71, 79, 95, 68, 11, 8, 17, + 18, 22, 40, 26, 35, 36, 54, 43, 39, 56, 53, 42, + 4, 77, 95, 105, 121, 126, 126, 126, 13, 47, 40, + 39, 29, 39, 19, 15, 18, 8, 71, 65, 13, 1, 28, + 38, 0, 7, 16, 20, 5, 18, 34, 4, 64, 21, 72, 95, + 118, 126, 126, 126, 126, 126 }, + + { + + 25, + 4, 82, 25, 4, 82, 12, 21, 27, 12, 69, 94, 0, 7, + 52, 14, 45, 70, 20, 28, 64, 81, 1, 95, 112, 67, + 77, 126, 126, 126, 60, 13, 69, 20, 28, 64, 69, + 14, 19, 69, 3, 64, 66, 71, 85, 72, 93, 5, 67, + 67, 67, 79, 72, 88, 15, 3, 0, 66, 10, 3, 22, 0, + 0, 0, 5, 96, 97, 10, 75, 70, 18, 67, 87, 10, 41, + 30, 58, 57, 23, 23, 13, 29, 10, 5, 14, 73, 75, + 77, 73, 26, 1, 9, 68, 65, 67, 65, 64, 67, 76, + 75, 85, 18, 68, 5, 66, 79, 2, 72, 5, 4, 0, 4, + 11, 5, 70, 65, 4, 67, 1, 70, 64, 74, 0, 64, 73, + 7, 0, 70, 66, 9, 67, 80, 73, 81, 77, 5, 66, 8, + 7, 7, 35, 17, 4, 67, 11, 67, 76, 2, 102, 72, 0, + 87, 68, 82, 6, 1, 77, 17, 7, 4, 92, 6, 80, 75, + 3, 93, 6, 18, 11, 14, 17, 14, 7, 16, 14, 67, 1, + 9, 1, 0, 73, 67, 73, 71, 68, 71, 74, 73, 74, 79, + 73, 69, 86, 90, 81, 74, 73, 78, 79, 86, 90, 86, + 86, 95, 102, 96, 99, 119, 109, 115, 69, 72, 91, + 73, 80, 83, 88, 95, 99, 91, 94, 92, 83, 88, 91, + 83, 83, 4, 25, 18, 11, 8, 10, 4, 64, 3, 7, 11, + 31, 21, 15, 8, 21, 11, 13, 6, 25, 3, 43, 30, 23, + 15, 25, 5, 64, 65, 0, 70, 45, 19, 3, 2, 11, 0, + 72, 67, 5, 37, 23, 8, 2, 18, 5, 2, 1, 0, 62, 71, + 0, 3, 6, 2, 3, 13, 14, 9, 13, 17, 21, 69, 1, 77, + 68, 29, 72, 91, 69, 5, 67, 78, 82, 74, 82, 90, + 97, 99, 67, 64, 15, 67, 73, 65, 0, 3, 75, 70, + 66, 66, 83, 73, 80, 0, 10, 86, 70, 8, 75, 2, 71, + 64, 20, 74, 77, 1, 7, 84, 83, 112, 15, 20, 28, + 4, 65, 4, 71, 71, 70, 81, 79, 79, 95, 90, 85, + 96, 97, 103, 126, 96, 94, 89, 74, 80, 6, 19, 2, + 18, 33, 72, 75, 79, 78, 99, 90, 86, 107, 91, + 100, 97, 113, 108, 108, 108, 81, 79, 110, 80, + 81, 92, 104, 98, 99, 101, 96, 104, 103, 108, + 112, 120, 115, 72, 80, 96, 68, 11, 8, 18, 19, + 22, 42, 27, 36, 37, 55, 45, 40, 58, 54, 41, 3, + 79, 97, 108, 123, 126, 126, 126, 14, 47, 40, 40, + 29, 40, 20, 15, 19, 9, 71, 64, 13, 1, 29, 39, 0, + 7, 17, 21, 5, 19, 35, 4, 64, 20, 74, 97, 121, + 126, 126, 126, 126, 126 }, + + { + + 23, + 4, 82, 23, 4, 82, 13, 23, 27, 12, 70, 96, 65, 6, + 52, 14, 47, 70, 21, 29, 64, 82, 1, 96, 113, 67, + 79, 126, 126, 126, 62, 14, 69, 21, 29, 64, 69, + 15, 19, 69, 3, 64, 65, 71, 86, 72, 93, 5, 67, + 66, 67, 80, 72, 88, 16, 3, 0, 66, 11, 3, 22, 0, + 0, 0, 5, 97, 97, 11, 76, 70, 18, 67, 87, 12, 42, + 31, 60, 58, 24, 24, 14, 30, 11, 6, 16, 72, 75, + 76, 72, 26, 1, 9, 68, 65, 67, 65, 1, 67, 76, 75, + 85, 18, 68, 5, 66, 79, 2, 72, 6, 4, 0, 4, 11, 5, + 70, 65, 4, 67, 1, 70, 65, 75, 0, 65, 74, 6, 64, + 71, 65, 10, 67, 81, 73, 80, 77, 5, 66, 8, 7, 7, + 36, 17, 4, 68, 11, 68, 77, 1, 103, 72, 0, 87, + 69, 83, 6, 0, 78, 17, 7, 4, 93, 6, 81, 75, 3, + 93, 5, 18, 11, 14, 17, 14, 7, 16, 13, 67, 1, 9, + 1, 0, 74, 67, 73, 72, 68, 71, 75, 74, 75, 79, + 74, 69, 87, 91, 81, 75, 75, 80, 81, 88, 92, 88, + 88, 97, 104, 98, 101, 121, 111, 116, 69, 72, 91, + 74, 81, 84, 89, 97, 101, 92, 96, 93, 83, 88, 91, + 83, 83, 4, 25, 18, 11, 8, 10, 4, 64, 3, 8, 11, + 31, 21, 15, 8, 21, 12, 13, 7, 27, 3, 43, 30, 23, + 15, 25, 5, 64, 65, 1, 70, 45, 19, 2, 2, 11, 0, + 72, 66, 5, 36, 22, 7, 1, 18, 5, 2, 1, 0, 62, 70, + 1, 4, 7, 3, 4, 14, 15, 10, 14, 19, 23, 68, 1, + 77, 68, 30, 72, 91, 69, 6, 67, 79, 82, 74, 83, + 91, 98, 100, 68, 65, 15, 68, 74, 65, 0, 3, 76, + 71, 66, 66, 84, 74, 80, 0, 10, 86, 71, 8, 76, 2, + 72, 65, 20, 75, 78, 0, 6, 85, 84, 114, 14, 19, + 28, 3, 66, 2, 73, 73, 72, 84, 82, 81, 98, 92, + 86, 99, 100, 106, 126, 99, 97, 91, 75, 82, 6, + 20, 2, 19, 35, 74, 77, 81, 80, 101, 92, 88, 109, + 92, 102, 98, 115, 110, 109, 109, 82, 79, 111, + 81, 83, 94, 106, 100, 101, 103, 98, 106, 105, + 109, 113, 121, 116, 73, 81, 97, 68, 12, 8, 18, + 19, 23, 43, 27, 37, 38, 56, 46, 41, 59, 55, 41, + 1, 81, 100, 110, 126, 126, 126, 126, 14, 48, 41, + 40, 29, 40, 20, 15, 19, 9, 71, 64, 14, 2, 30, + 40, 0, 8, 17, 21, 5, 19, 36, 4, 64, 19, 76, 100, + 124, 126, 126, 126, 126, 126 }, + + { + + 22, + 4, 82, 22, 4, 82, 15, 24, 27, 12, 70, 97, 66, 4, + 52, 14, 50, 71, 22, 29, 64, 82, 2, 97, 114, 68, + 81, 126, 126, 126, 62, 16, 69, 22, 29, 64, 69, + 16, 19, 70, 3, 64, 65, 71, 86, 71, 93, 5, 67, + 65, 68, 80, 72, 88, 16, 4, 1, 65, 11, 3, 22, 0, + 0, 0, 6, 97, 97, 11, 77, 70, 18, 66, 87, 13, 44, + 33, 61, 60, 26, 25, 15, 32, 12, 7, 18, 72, 74, + 76, 71, 26, 1, 9, 67, 65, 66, 64, 2, 68, 76, 75, + 85, 18, 68, 6, 65, 79, 2, 72, 6, 4, 64, 4, 11, + 5, 69, 65, 5, 67, 1, 70, 65, 75, 0, 65, 75, 6, + 65, 72, 64, 10, 67, 82, 74, 80, 78, 5, 67, 8, 7, + 7, 37, 18, 4, 68, 11, 68, 78, 1, 104, 72, 0, 88, + 69, 84, 5, 0, 78, 18, 7, 4, 94, 5, 82, 75, 3, + 94, 5, 18, 11, 14, 17, 14, 7, 16, 13, 67, 1, 9, + 1, 0, 74, 67, 73, 72, 68, 71, 75, 75, 75, 79, + 74, 69, 88, 92, 81, 77, 77, 81, 82, 90, 94, 90, + 90, 99, 106, 100, 103, 124, 112, 117, 69, 73, + 92, 75, 82, 85, 90, 98, 102, 93, 97, 93, 84, 89, + 91, 83, 82, 4, 25, 18, 11, 8, 10, 5, 0, 4, 9, + 12, 31, 21, 15, 8, 21, 12, 14, 7, 29, 3, 43, 30, + 23, 15, 25, 5, 64, 65, 2, 70, 46, 18, 2, 2, 11, + 0, 72, 66, 5, 36, 21, 6, 0, 18, 5, 2, 1, 1, 62, + 69, 2, 5, 9, 4, 5, 15, 17, 11, 16, 20, 25, 67, + 2, 76, 67, 31, 71, 92, 68, 7, 67, 79, 83, 75, + 83, 92, 99, 101, 69, 65, 15, 68, 74, 66, 0, 3, + 77, 72, 66, 67, 85, 74, 80, 0, 10, 86, 72, 7, + 76, 2, 73, 66, 20, 76, 79, 0, 6, 86, 85, 116, + 13, 19, 27, 2, 68, 1, 75, 75, 74, 86, 85, 84, + 101, 94, 88, 102, 104, 109, 126, 101, 99, 93, + 76, 83, 6, 21, 3, 20, 37, 75, 78, 83, 81, 103, + 94, 89, 111, 93, 104, 100, 117, 111, 110, 110, + 82, 79, 112, 83, 84, 96, 107, 101, 102, 104, 99, + 107, 106, 110, 114, 122, 116, 73, 81, 98, 67, + 12, 9, 19, 20, 24, 44, 28, 38, 39, 58, 47, 42, + 60, 57, 40, 64, 83, 102, 113, 126, 126, 126, + 126, 14, 48, 41, 41, 30, 41, 20, 16, 20, 10, 70, + 64, 15, 2, 31, 41, 1, 9, 18, 22, 5, 20, 37, 4, + 64, 17, 78, 102, 126, 126, 126, 126, 126, 126 }, + + { + + 21, + 4, 82, 21, 4, 82, 17, 26, 28, 12, 71, 99, 68, 3, + 52, 14, 52, 71, 23, 30, 64, 83, 2, 98, 115, 68, + 83, 126, 126, 126, 62, 17, 69, 23, 30, 64, 68, + 17, 20, 70, 3, 0, 64, 72, 87, 71, 93, 5, 67, 65, + 68, 80, 72, 88, 17, 4, 1, 65, 12, 3, 22, 0, 0, + 0, 6, 97, 97, 12, 78, 70, 18, 66, 87, 15, 46, + 34, 62, 62, 27, 26, 17, 33, 13, 8, 20, 71, 74, + 75, 70, 26, 1, 9, 67, 65, 66, 64, 4, 68, 77, 76, + 86, 18, 68, 6, 65, 79, 3, 72, 7, 4, 64, 4, 11, + 4, 69, 65, 5, 67, 1, 70, 66, 75, 0, 66, 76, 5, + 66, 73, 64, 11, 67, 83, 74, 79, 78, 5, 67, 8, 7, + 7, 38, 18, 4, 68, 12, 68, 79, 1, 105, 72, 0, 88, + 70, 85, 5, 0, 79, 18, 7, 4, 96, 5, 83, 75, 3, + 94, 4, 18, 11, 14, 17, 14, 7, 16, 13, 67, 0, 9, + 1, 0, 74, 68, 74, 73, 69, 72, 76, 76, 76, 79, + 75, 69, 89, 94, 81, 78, 79, 83, 84, 92, 96, 92, + 92, 101, 108, 101, 104, 126, 114, 118, 69, 73, + 92, 76, 83, 86, 92, 100, 104, 94, 98, 94, 84, + 89, 91, 83, 82, 5, 25, 18, 11, 8, 10, 5, 0, 4, + 10, 12, 31, 21, 15, 8, 22, 13, 15, 8, 31, 3, 43, + 30, 23, 15, 26, 5, 64, 65, 2, 70, 46, 18, 1, 2, + 11, 0, 72, 65, 5, 35, 21, 5, 64, 18, 5, 2, 2, 1, + 62, 68, 3, 5, 10, 5, 6, 17, 18, 12, 17, 22, 26, + 67, 3, 76, 67, 33, 71, 92, 68, 7, 67, 80, 83, + 75, 84, 93, 100, 102, 69, 66, 15, 69, 75, 66, + 64, 3, 78, 72, 67, 67, 86, 74, 81, 0, 10, 87, + 72, 7, 77, 2, 74, 66, 20, 77, 80, 64, 5, 87, 85, + 117, 12, 18, 27, 1, 69, 64, 77, 77, 76, 89, 87, + 86, 104, 97, 89, 105, 107, 112, 126, 104, 102, + 95, 77, 84, 6, 22, 3, 21, 39, 77, 80, 85, 83, + 105, 96, 91, 113, 95, 105, 101, 118, 113, 111, + 111, 83, 80, 113, 84, 86, 97, 109, 103, 104, + 106, 101, 109, 108, 111, 116, 124, 117, 74, 82, + 99, 67, 13, 9, 19, 20, 24, 45, 29, 39, 40, 59, + 48, 43, 62, 58, 40, 65, 85, 105, 115, 126, 126, + 126, 126, 15, 49, 42, 41, 30, 42, 21, 16, 20, + 10, 70, 0, 15, 3, 32, 42, 1, 9, 18, 22, 5, 20, + 38, 4, 64, 16, 80, 105, 126, 126, 126, 126, 126, + 126 }, + + { + + 20, + 4, 82, 20, 4, 82, 19, 27, 28, 12, 71, 100, 69, + 2, 52, 14, 54, 71, 24, 31, 64, 84, 2, 99, 116, + 69, 85, 126, 126, 126, 62, 18, 69, 24, 31, 64, + 68, 18, 20, 71, 3, 0, 0, 72, 87, 71, 93, 5, 67, + 64, 68, 80, 72, 88, 17, 4, 1, 65, 12, 3, 22, 0, + 0, 0, 7, 97, 97, 12, 79, 70, 18, 66, 87, 17, 48, + 36, 62, 62, 28, 27, 18, 34, 14, 9, 22, 71, 74, + 75, 69, 26, 1, 9, 66, 65, 65, 0, 5, 68, 77, 76, + 86, 18, 68, 6, 64, 79, 3, 72, 7, 4, 64, 4, 11, + 4, 69, 65, 5, 67, 1, 70, 67, 75, 0, 67, 77, 4, + 67, 74, 0, 11, 67, 84, 75, 79, 79, 5, 67, 8, 7, + 7, 39, 18, 4, 68, 12, 68, 80, 1, 106, 72, 0, 89, + 70, 86, 5, 0, 80, 18, 7, 4, 97, 5, 84, 75, 3, + 95, 3, 18, 11, 14, 17, 14, 7, 16, 13, 67, 0, 9, + 1, 0, 74, 68, 74, 73, 69, 72, 77, 77, 76, 79, + 75, 69, 90, 95, 81, 80, 81, 85, 86, 94, 98, 94, + 94, 103, 110, 103, 106, 126, 116, 119, 69, 73, + 93, 77, 84, 87, 93, 101, 105, 95, 99, 95, 85, + 90, 91, 83, 81, 5, 25, 18, 11, 8, 10, 5, 0, 5, + 11, 13, 31, 21, 15, 8, 22, 13, 16, 9, 33, 3, 43, + 30, 23, 15, 26, 5, 64, 65, 3, 70, 46, 18, 1, 2, + 11, 0, 72, 64, 5, 35, 20, 4, 65, 18, 5, 2, 2, 2, + 62, 67, 4, 6, 11, 6, 7, 18, 19, 13, 18, 23, 28, + 66, 4, 76, 66, 34, 71, 93, 68, 8, 67, 80, 84, + 75, 85, 94, 101, 103, 70, 66, 15, 69, 75, 67, + 64, 3, 79, 73, 67, 67, 87, 74, 81, 0, 10, 87, + 73, 6, 78, 2, 75, 67, 20, 78, 81, 64, 5, 88, 86, + 119, 11, 17, 26, 0, 70, 65, 79, 79, 78, 91, 90, + 89, 107, 99, 90, 108, 110, 115, 126, 107, 104, + 97, 78, 85, 6, 23, 3, 22, 41, 79, 82, 87, 85, + 107, 98, 92, 115, 96, 107, 102, 120, 114, 112, + 112, 83, 80, 114, 85, 87, 99, 111, 105, 106, + 107, 102, 111, 109, 112, 117, 125, 118, 75, 83, + 100, 67, 13, 9, 20, 21, 25, 46, 30, 40, 41, 60, + 49, 44, 62, 59, 39, 67, 87, 107, 118, 126, 126, + 126, 126, 15, 49, 42, 42, 30, 43, 21, 16, 21, + 11, 70, 0, 16, 3, 33, 43, 1, 10, 19, 23, 5, 21, + 39, 4, 64, 15, 82, 107, 126, 126, 126, 126, 126, + 126 }, + + { + + 18, + 3, 83, 18, 3, 83, 20, 28, 28, 12, 72, 102, 71, + 0, 51, 14, 56, 72, 24, 31, 65, 85, 2, 101, 118, + 70, 87, 126, 126, 126, 62, 19, 70, 24, 31, 65, + 68, 19, 20, 72, 3, 0, 0, 73, 88, 71, 94, 5, 67, + 64, 69, 81, 72, 88, 17, 4, 1, 65, 12, 2, 22, 0, + 0, 0, 7, 98, 97, 12, 80, 71, 18, 66, 87, 18, 49, + 37, 62, 62, 29, 28, 19, 35, 15, 9, 23, 71, 74, + 75, 69, 26, 1, 9, 66, 65, 65, 0, 6, 69, 78, 77, + 87, 18, 68, 6, 64, 79, 3, 72, 7, 3, 65, 4, 10, + 3, 69, 66, 5, 67, 0, 70, 68, 76, 64, 68, 79, 3, + 68, 75, 0, 11, 67, 85, 76, 79, 80, 4, 68, 8, 7, + 7, 39, 18, 4, 69, 12, 69, 81, 0, 107, 73, 64, + 90, 71, 87, 4, 64, 81, 18, 7, 4, 99, 4, 85, 75, + 3, 96, 2, 17, 10, 14, 17, 13, 6, 16, 12, 68, 64, + 9, 0, 64, 75, 69, 75, 74, 70, 73, 78, 78, 77, + 79, 76, 69, 91, 97, 81, 82, 83, 87, 88, 96, 101, + 96, 96, 105, 112, 105, 108, 126, 118, 121, 70, + 74, 94, 78, 86, 89, 95, 103, 107, 97, 101, 96, + 86, 91, 91, 83, 81, 5, 25, 18, 11, 8, 10, 5, 0, + 5, 12, 13, 30, 21, 15, 8, 22, 13, 16, 9, 34, 2, + 43, 30, 22, 14, 26, 5, 64, 65, 3, 70, 46, 17, 0, + 1, 11, 0, 72, 64, 5, 34, 19, 3, 66, 17, 5, 2, 2, + 2, 62, 67, 5, 6, 12, 7, 7, 19, 20, 14, 19, 24, + 29, 66, 4, 76, 66, 35, 71, 94, 68, 8, 67, 81, + 85, 76, 86, 95, 103, 105, 71, 67, 15, 70, 76, + 68, 65, 2, 80, 74, 68, 68, 88, 75, 82, 0, 10, + 88, 74, 5, 79, 1, 76, 68, 20, 79, 83, 65, 4, 89, + 87, 121, 10, 16, 25, 65, 72, 67, 81, 81, 81, 94, + 93, 92, 110, 102, 92, 111, 114, 118, 126, 110, + 107, 99, 80, 87, 6, 23, 3, 23, 43, 81, 84, 89, + 87, 110, 100, 94, 118, 98, 109, 104, 122, 116, + 113, 113, 84, 81, 116, 87, 89, 101, 113, 107, + 108, 109, 104, 113, 111, 114, 119, 126, 119, 76, + 84, 101, 67, 13, 9, 20, 21, 25, 47, 30, 41, 41, + 61, 50, 45, 62, 60, 38, 69, 90, 110, 121, 126, + 126, 126, 126, 15, 49, 42, 42, 30, 43, 21, 16, + 21, 11, 70, 0, 16, 3, 34, 44, 1, 10, 19, 23, 5, + 21, 39, 4, 65, 13, 85, 110, 126, 126, 126, 126, + 126, 126 }, + + { + + 17, + 3, 83, 17, 3, 83, 22, 30, 29, 13, 72, 103, 72, + 64, 51, 14, 59, 72, 25, 32, 65, 85, 3, 102, 119, + 70, 88, 126, 126, 126, 62, 21, 70, 25, 32, 65, + 67, 21, 21, 72, 4, 1, 1, 73, 88, 70, 94, 5, 66, + 0, 69, 81, 71, 88, 18, 5, 2, 64, 13, 2, 22, 0, + 0, 0, 8, 98, 97, 13, 80, 71, 18, 65, 86, 20, 51, + 39, 62, 62, 31, 29, 21, 37, 17, 10, 25, 70, 73, + 74, 68, 27, 2, 10, 65, 64, 64, 1, 8, 69, 78, 77, + 87, 19, 68, 7, 0, 78, 4, 71, 8, 3, 65, 5, 10, 3, + 68, 66, 6, 67, 0, 69, 68, 76, 64, 68, 80, 3, 68, + 75, 1, 12, 66, 85, 76, 78, 80, 4, 68, 9, 7, 7, + 40, 19, 5, 69, 13, 69, 81, 0, 107, 73, 64, 90, + 71, 88, 4, 64, 81, 19, 8, 4, 100, 4, 85, 74, 3, + 96, 2, 17, 10, 14, 17, 13, 6, 16, 12, 68, 64, 9, + 0, 64, 75, 69, 75, 74, 70, 73, 78, 78, 77, 78, + 76, 68, 91, 98, 80, 83, 84, 88, 89, 98, 103, 97, + 97, 107, 113, 106, 109, 126, 119, 122, 70, 74, + 94, 79, 87, 90, 96, 104, 108, 98, 102, 96, 86, + 91, 90, 82, 80, 6, 26, 18, 11, 9, 11, 6, 1, 6, + 14, 14, 30, 21, 15, 8, 23, 14, 17, 10, 36, 2, + 44, 31, 22, 14, 27, 5, 64, 64, 4, 70, 47, 17, 0, + 1, 12, 1, 71, 0, 5, 34, 19, 3, 66, 17, 5, 3, 3, + 3, 62, 66, 6, 7, 14, 9, 8, 21, 22, 16, 21, 26, + 31, 65, 5, 75, 65, 37, 70, 94, 67, 9, 66, 81, + 85, 76, 86, 95, 104, 106, 71, 67, 16, 70, 76, + 68, 65, 2, 80, 74, 68, 68, 88, 75, 82, 1, 11, + 88, 74, 5, 79, 1, 76, 68, 21, 79, 84, 65, 4, 89, + 87, 122, 10, 16, 25, 66, 73, 68, 83, 83, 83, 96, + 95, 94, 112, 104, 93, 113, 117, 121, 126, 112, + 109, 100, 81, 88, 6, 24, 4, 25, 46, 82, 85, 90, + 88, 112, 101, 95, 120, 99, 110, 105, 123, 117, + 114, 113, 84, 81, 117, 88, 90, 102, 114, 108, + 109, 110, 105, 114, 112, 115, 120, 126, 119, 76, + 84, 101, 66, 14, 10, 21, 22, 26, 49, 31, 42, 42, + 62, 52, 46, 62, 62, 38, 70, 92, 112, 123, 126, + 126, 126, 126, 16, 50, 43, 43, 31, 44, 22, 17, + 22, 12, 69, 1, 17, 4, 36, 46, 2, 11, 20, 24, 6, + 22, 40, 4, 65, 12, 87, 112, 126, 126, 126, 126, + 126, 126 }, + + { + + 16, + 3, 83, 16, 3, 83, 24, 31, 29, 13, 72, 104, 73, + 65, 51, 14, 61, 72, 26, 33, 65, 86, 3, 103, 120, + 71, 90, 126, 126, 126, 62, 22, 70, 26, 33, 65, + 67, 22, 21, 73, 4, 1, 2, 73, 88, 70, 94, 5, 66, + 1, 69, 81, 71, 88, 18, 5, 2, 64, 13, 2, 22, 0, + 0, 0, 9, 98, 97, 13, 81, 71, 18, 65, 86, 22, 53, + 41, 62, 62, 32, 30, 22, 38, 18, 11, 27, 70, 73, + 74, 67, 27, 2, 10, 64, 64, 0, 1, 9, 69, 78, 77, + 87, 19, 68, 7, 0, 78, 4, 71, 8, 3, 65, 5, 10, 3, + 68, 66, 6, 67, 0, 69, 69, 76, 64, 69, 81, 2, 69, + 76, 2, 12, 66, 86, 77, 78, 81, 4, 68, 9, 7, 7, + 41, 19, 5, 69, 13, 69, 82, 0, 108, 73, 64, 91, + 71, 89, 4, 64, 82, 19, 8, 4, 101, 4, 86, 74, 3, + 97, 1, 17, 10, 14, 17, 13, 6, 16, 12, 68, 64, 9, + 0, 64, 75, 69, 75, 74, 70, 73, 79, 79, 78, 78, + 76, 68, 92, 99, 80, 85, 86, 90, 91, 100, 105, + 99, 99, 109, 115, 108, 111, 126, 121, 123, 70, + 74, 95, 80, 88, 91, 97, 106, 109, 99, 103, 97, + 87, 92, 90, 82, 79, 6, 26, 18, 11, 9, 11, 6, 1, + 6, 15, 15, 30, 21, 15, 8, 23, 14, 18, 11, 38, 2, + 44, 31, 22, 14, 27, 5, 64, 64, 5, 70, 47, 17, 0, + 1, 12, 1, 71, 1, 5, 33, 18, 2, 67, 17, 5, 3, 3, + 3, 62, 65, 7, 8, 15, 10, 9, 22, 23, 17, 22, 27, + 33, 64, 6, 75, 64, 38, 70, 95, 67, 10, 66, 81, + 86, 76, 87, 96, 105, 107, 72, 67, 16, 70, 77, + 69, 65, 2, 81, 75, 68, 68, 89, 75, 82, 1, 11, + 88, 75, 4, 80, 1, 77, 69, 21, 80, 85, 65, 4, 90, + 88, 124, 9, 15, 24, 67, 74, 69, 85, 85, 85, 98, + 98, 97, 115, 106, 94, 116, 120, 124, 126, 115, + 111, 102, 82, 89, 6, 25, 4, 26, 48, 84, 87, 92, + 90, 114, 103, 97, 122, 100, 112, 106, 125, 118, + 115, 114, 85, 81, 118, 89, 91, 104, 116, 110, + 111, 111, 107, 116, 113, 116, 121, 126, 120, 77, + 85, 102, 66, 14, 10, 22, 22, 27, 50, 32, 43, 43, + 62, 53, 47, 62, 62, 37, 72, 94, 114, 126, 126, + 126, 126, 126, 16, 50, 43, 43, 31, 45, 22, 17, + 22, 12, 69, 1, 18, 4, 37, 47, 2, 12, 21, 25, 6, + 22, 41, 4, 65, 11, 89, 114, 126, 126, 126, 126, + 126, 126 }, + + { + + 15, + 3, 83, 15, 3, 83, 26, 33, 30, 13, 73, 106, 75, + 66, 51, 14, 62, 72, 27, 34, 65, 87, 3, 104, 121, + 71, 92, 126, 126, 126, 62, 23, 70, 27, 34, 65, + 66, 23, 22, 73, 4, 2, 3, 74, 89, 70, 94, 5, 66, + 1, 69, 81, 71, 88, 19, 5, 2, 64, 14, 2, 22, 0, + 0, 0, 9, 98, 97, 14, 82, 71, 18, 65, 86, 24, 55, + 42, 62, 62, 33, 31, 24, 39, 19, 12, 29, 69, 73, + 73, 66, 27, 2, 10, 64, 64, 0, 2, 11, 69, 79, 78, + 88, 19, 68, 7, 1, 78, 5, 71, 9, 3, 65, 5, 10, 2, + 68, 66, 6, 67, 0, 69, 70, 76, 64, 70, 82, 1, 70, + 77, 2, 13, 66, 87, 77, 77, 81, 4, 68, 9, 7, 7, + 42, 19, 5, 69, 14, 69, 83, 0, 109, 73, 64, 91, + 72, 90, 4, 64, 83, 19, 8, 4, 103, 4, 87, 74, 3, + 97, 0, 17, 10, 14, 17, 13, 6, 16, 12, 68, 65, 9, + 0, 64, 75, 70, 76, 75, 71, 74, 80, 80, 78, 78, + 77, 68, 93, 101, 80, 86, 88, 92, 93, 102, 107, + 101, 101, 111, 117, 109, 112, 126, 123, 124, 70, + 74, 95, 81, 89, 92, 99, 107, 111, 100, 104, 98, + 87, 92, 90, 82, 79, 7, 26, 18, 11, 9, 11, 6, 1, + 7, 16, 15, 30, 21, 15, 8, 24, 15, 19, 12, 40, 2, + 44, 31, 22, 14, 28, 5, 64, 64, 5, 70, 47, 17, + 64, 1, 12, 1, 71, 2, 5, 33, 18, 1, 68, 17, 5, 3, + 4, 4, 62, 64, 8, 8, 16, 11, 10, 24, 24, 18, 23, + 29, 34, 64, 7, 75, 64, 40, 70, 95, 67, 10, 66, + 82, 86, 76, 88, 97, 106, 108, 72, 68, 16, 71, + 77, 69, 66, 2, 82, 75, 69, 68, 90, 75, 83, 1, + 11, 89, 75, 4, 81, 1, 78, 69, 21, 81, 86, 66, 3, + 91, 88, 125, 8, 14, 24, 68, 75, 71, 87, 87, 87, + 101, 100, 99, 118, 109, 95, 119, 123, 126, 126, + 118, 114, 104, 83, 90, 6, 26, 4, 27, 50, 86, 89, + 94, 92, 116, 105, 98, 124, 102, 113, 107, 126, + 120, 116, 115, 85, 82, 119, 90, 93, 105, 118, + 112, 113, 113, 108, 118, 115, 117, 123, 126, + 121, 78, 86, 103, 66, 15, 10, 22, 23, 27, 51, + 33, 44, 44, 62, 54, 48, 62, 62, 37, 73, 96, 117, + 126, 126, 126, 126, 126, 17, 51, 44, 44, 31, 46, + 23, 17, 23, 13, 69, 2, 18, 5, 38, 48, 2, 12, 21, + 25, 6, 23, 42, 4, 65, 10, 91, 117, 126, 126, + 126, 126, 126, 126 }, + + }, + + { + + { + + 62, + 9, 74, 62, 9, 74, 126, 104, 10, 9, 12, 38, 62, + 62, 54, 22, 118, 65, 71, 79, 11, 13, 70, 9, 29, + 41, 62, 61, 27, 69, 126, 101, 76, 71, 79, 11, + 69, 90, 11, 20, 69, 82, 96, 4, 75, 87, 100, 7, + 74, 85, 4, 81, 86, 95, 66, 77, 70, 86, 72, 2, + 22, 0, 0, 0, 83, 86, 97, 72, 22, 1, 48, 12, 80, + 126, 91, 96, 81, 98, 102, 97, 119, 99, 110, 102, + 126, 80, 89, 94, 92, 24, 65, 84, 126, 73, 104, + 91, 126, 8, 7, 8, 2, 10, 68, 74, 88, 103, 91, + 89, 92, 76, 87, 110, 105, 78, 112, 99, 126, 126, + 126, 126, 66, 78, 71, 72, 4, 8, 70, 75, 89, 119, + 75, 43, 41, 126, 9, 2, 5, 3, 2, 67, 84, 74, 65, + 11, 6, 2, 69, 70, 8, 71, 5, 2, 22, 38, 31, 20, + 16, 19, 12, 17, 25, 66, 25, 21, 29, 89, 18, 35, + 32, 62, 62, 48, 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 53, 62, 62, 62, 62, 62, 62, 62, 56, 62, + 62, 62, 27, 62, 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 53, 45, 38, 22, 75, 72, 77, 28, 32, 28, + 33, 18, 21, 18, 37, 9, 66, 7, 73, 67, 116, 112, + 71, 2, 10, 66, 77, 80, 84, 87, 126, 101, 24, 10, + 2, 75, 77, 91, 107, 111, 122, 76, 19, 11, 6, 5, + 72, 69, 69, 74, 86, 66, 29, 31, 32, 11, 8, 67, + 73, 89, 11, 59, 55, 55, 44, 26, 2, 73, 70, 78, + 62, 126, 124, 110, 126, 124, 105, 121, 117, 102, + 117, 116, 122, 95, 100, 95, 111, 114, 89, 80, + 82, 85, 81, 72, 64, 67, 7, 69, 69, 69, 69, 67, + 77, 64, 2, 67, 64, 6, 65, 66, 1, 12, 66, 71, 75, + 70, 72, 3, 26, 16, 28, 26, 22, 22, 15, 22, 22, + 4, 13, 23, 66, 13, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 54, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 49, 37, 26, 8, 65, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 43, 33, + 19, 15, 14, 18, 41, 41, 42, 43, 35, 39, 29, 21, + 24, 13, 70, 9, 71, 83, 31, 14, 9, 85, 81, 77, + 81, 80, 73, 74, 83, 71, 67, 2, 66, 66, 4, 4, 62, + 62, 62, 62, 62, 60, 53, 36, 6, 71, 39, 27, 21, + 11, 6, 0, 65, 67, 82, 81, 76, 72, 78, 72, 68, + 70, 76, 66, 1, 6, 2, 3, 9, 5, 62, 62, 62, 62, + 62, 60, 53, 36, 6 }, + + { + + 62, + 9, 74, 62, 9, 74, 125, 102, 11, 10, 12, 37, + 61, 62, 55, 22, 116, 65, 70, 78, 11, 13, 69, + 9, 28, 40, 61, 58, 25, 70, 124, 100, 75, 70, + 78, 11, 69, 89, 11, 20, 68, 81, 95, 4, 75, 86, + 99, 7, 73, 84, 4, 80, 85, 94, 65, 76, 70, 85, + 71, 2, 22, 0, 0, 0, 82, 86, 97, 71, 22, 1, 48, + 12, 80, 124, 89, 94, 79, 95, 100, 95, 117, 97, + 108, 100, 124, 80, 88, 93, 91, 24, 65, 83, + 124, 72, 103, 90, 125, 8, 7, 8, 2, 11, 68, 73, + 87, 102, 90, 88, 91, 75, 86, 108, 103, 77, + 110, 97, 122, 122, 123, 124, 65, 77, 70, 71, + 4, 9, 69, 74, 88, 116, 74, 41, 40, 124, 9, 3, + 5, 4, 3, 66, 82, 73, 64, 11, 6, 2, 68, 69, 7, + 70, 5, 2, 22, 37, 31, 20, 16, 19, 12, 17, 24, + 65, 25, 21, 29, 89, 18, 35, 32, 62, 62, 47, + 62, 62, 62, 61, 62, 62, 62, 62, 62, 62, 52, + 62, 62, 62, 62, 62, 62, 62, 54, 62, 60, 62, + 26, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, + 61, 52, 44, 37, 21, 75, 72, 77, 28, 31, 27, + 32, 17, 20, 17, 36, 8, 66, 6, 73, 67, 115, + 110, 70, 3, 10, 65, 76, 79, 83, 86, 124, 99, + 25, 11, 3, 74, 76, 89, 105, 109, 120, 75, 20, + 12, 7, 6, 71, 68, 68, 73, 85, 66, 30, 31, 32, + 11, 9, 66, 73, 88, 11, 59, 55, 54, 43, 26, 3, + 72, 69, 77, 62, 124, 122, 108, 124, 122, 103, + 119, 115, 100, 115, 114, 119, 94, 99, 94, 109, + 112, 88, 79, 81, 84, 80, 71, 64, 67, 7, 69, + 69, 69, 68, 66, 76, 0, 2, 66, 0, 6, 64, 65, 1, + 12, 65, 70, 74, 69, 71, 3, 25, 16, 27, 26, 22, + 22, 15, 22, 22, 4, 13, 22, 66, 12, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, + 52, 62, 62, 62, 62, 62, 62, 62, 61, 62, 48, + 36, 25, 8, 65, 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 42, 32, 18, 15, 14, 17, 40, + 40, 41, 41, 34, 38, 28, 20, 23, 12, 70, 8, 71, + 83, 30, 13, 8, 84, 80, 76, 80, 78, 71, 73, 82, + 70, 66, 3, 65, 65, 4, 4, 62, 62, 62, 62, 60, + 56, 49, 32, 4, 70, 39, 28, 22, 12, 7, 1, 64, + 66, 81, 80, 75, 71, 77, 71, 67, 69, 75, 65, 2, + 6, 3, 4, 9, 5, 62, 62, 62, 62, 60, 56, 49, 32, + 4 }, + + { + + 62, + 9, 74, 62, 9, 74, 123, 101, 11, 10, 12, 36, + 59, 61, 55, 22, 114, 65, 70, 77, 11, 12, 69, + 8, 26, 39, 58, 54, 22, 72, 121, 99, 75, 70, + 77, 11, 69, 88, 11, 19, 68, 81, 94, 4, 75, 86, + 99, 7, 73, 84, 4, 80, 85, 94, 65, 76, 70, 85, + 71, 2, 22, 0, 0, 0, 81, 86, 97, 71, 21, 1, 47, + 12, 80, 122, 88, 93, 77, 93, 99, 94, 115, 96, + 107, 99, 122, 80, 88, 93, 91, 24, 65, 82, 122, + 72, 102, 89, 123, 8, 7, 8, 1, 11, 68, 73, 86, + 101, 89, 87, 90, 75, 85, 107, 102, 76, 109, + 96, 117, 118, 120, 121, 65, 77, 70, 71, 4, 9, + 69, 74, 88, 114, 74, 39, 38, 121, 9, 3, 5, 4, + 3, 66, 80, 72, 64, 11, 6, 2, 67, 68, 6, 70, 5, + 2, 21, 36, 30, 20, 15, 19, 12, 17, 23, 65, 24, + 20, 28, 89, 18, 34, 31, 62, 62, 46, 60, 62, + 62, 59, 62, 62, 62, 62, 62, 62, 50, 62, 62, + 62, 62, 62, 62, 62, 52, 62, 58, 62, 24, 62, + 62, 62, 62, 62, 62, 62, 62, 62, 62, 59, 50, + 42, 35, 19, 75, 72, 78, 27, 30, 26, 31, 16, + 19, 16, 34, 7, 66, 5, 74, 68, 114, 109, 69, 3, + 10, 65, 75, 78, 82, 85, 122, 98, 25, 11, 3, + 73, 75, 88, 103, 107, 118, 74, 21, 13, 8, 7, + 70, 68, 68, 73, 84, 66, 31, 31, 31, 11, 9, 66, + 73, 88, 11, 59, 54, 53, 42, 26, 3, 72, 69, 77, + 62, 123, 121, 107, 122, 120, 102, 117, 113, + 99, 113, 112, 117, 93, 98, 94, 108, 110, 88, + 79, 81, 83, 80, 71, 64, 67, 6, 69, 69, 69, 68, + 66, 75, 0, 2, 66, 0, 6, 64, 65, 1, 11, 65, 70, + 74, 69, 70, 2, 24, 16, 26, 25, 21, 21, 15, 21, + 21, 4, 13, 21, 66, 11, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, 50, 62, 62, + 62, 62, 62, 62, 62, 59, 59, 46, 34, 24, 7, 66, + 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, + 62, 40, 30, 16, 14, 13, 15, 39, 39, 39, 39, + 32, 36, 26, 19, 21, 11, 71, 7, 72, 84, 28, 12, + 7, 84, 80, 75, 80, 77, 70, 73, 81, 69, 65, 3, + 65, 64, 4, 4, 62, 62, 62, 62, 57, 52, 45, 28, + 1, 70, 39, 28, 22, 12, 8, 1, 64, 66, 81, 80, + 75, 71, 77, 70, 66, 69, 75, 65, 2, 6, 3, 5, 9, + 5, 62, 62, 62, 62, 57, 52, 45, 28, 1 }, + + { + + 62, + 9, 74, 62, 9, 74, 121, 99, 12, 10, 11, 34, 57, + 60, 55, 22, 112, 65, 69, 76, 11, 12, 69, 8, + 25, 38, 56, 51, 20, 73, 118, 98, 75, 69, 76, + 11, 70, 87, 11, 19, 68, 81, 94, 4, 75, 86, 99, + 7, 73, 83, 4, 80, 84, 94, 65, 76, 70, 85, 71, + 2, 22, 0, 0, 0, 81, 86, 97, 70, 20, 1, 46, 11, + 80, 119, 87, 92, 76, 91, 97, 92, 113, 94, 106, + 98, 120, 80, 88, 92, 91, 24, 65, 81, 120, 72, + 101, 89, 121, 8, 6, 7, 1, 11, 68, 72, 86, 100, + 88, 87, 89, 74, 84, 105, 100, 76, 108, 95, + 112, 113, 117, 118, 65, 77, 70, 70, 4, 9, 68, + 73, 87, 112, 74, 37, 36, 118, 9, 3, 5, 4, 3, + 65, 79, 71, 64, 11, 6, 2, 67, 67, 5, 70, 5, 1, + 21, 35, 30, 20, 15, 19, 12, 17, 22, 65, 23, + 19, 28, 89, 18, 34, 31, 62, 62, 45, 58, 62, + 62, 57, 62, 62, 62, 62, 62, 61, 48, 62, 62, + 62, 62, 62, 62, 60, 50, 62, 56, 62, 22, 62, + 62, 62, 62, 62, 62, 62, 62, 62, 62, 57, 48, + 40, 34, 17, 75, 72, 78, 26, 29, 25, 30, 15, + 18, 15, 32, 6, 67, 4, 75, 68, 114, 107, 68, 4, + 10, 65, 74, 78, 82, 85, 120, 97, 25, 11, 4, + 72, 74, 87, 102, 106, 116, 73, 21, 13, 8, 7, + 69, 67, 68, 73, 84, 66, 31, 31, 30, 11, 9, 66, + 73, 87, 11, 58, 54, 52, 41, 26, 3, 72, 69, 77, + 62, 122, 119, 106, 121, 119, 101, 115, 111, + 98, 112, 110, 115, 93, 97, 93, 107, 108, 87, + 79, 81, 83, 79, 71, 64, 67, 6, 69, 69, 70, 67, + 65, 74, 0, 2, 65, 0, 6, 64, 65, 1, 11, 65, 70, + 74, 69, 70, 1, 23, 16, 25, 24, 20, 21, 15, 20, + 20, 4, 13, 20, 66, 10, 62, 62, 61, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, 48, 62, 62, + 62, 62, 62, 62, 62, 57, 57, 44, 32, 22, 6, 67, + 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 59, + 60, 38, 28, 15, 13, 12, 14, 37, 37, 37, 37, + 31, 34, 24, 18, 20, 10, 72, 6, 73, 85, 27, 11, + 6, 84, 79, 75, 79, 76, 69, 73, 81, 69, 65, 3, + 64, 0, 4, 4, 62, 62, 62, 59, 54, 48, 41, 24, + 65, 70, 39, 28, 22, 12, 8, 2, 64, 66, 80, 80, + 75, 70, 76, 69, 65, 69, 74, 65, 2, 6, 3, 5, 9, + 5, 62, 62, 62, 59, 54, 48, 41, 24, 65 }, + + { + + 62, + 9, 74, 62, 9, 74, 120, 98, 12, 10, 11, 33, 55, + 59, 55, 21, 110, 65, 69, 75, 10, 11, 69, 7, + 23, 37, 53, 47, 17, 75, 115, 97, 75, 69, 75, + 10, 70, 86, 11, 18, 68, 80, 93, 4, 75, 86, 99, + 7, 73, 83, 4, 80, 84, 93, 65, 76, 70, 85, 70, + 2, 22, 0, 0, 0, 80, 87, 97, 70, 19, 1, 45, 11, + 80, 117, 86, 91, 74, 89, 96, 91, 112, 93, 104, + 97, 118, 80, 87, 92, 91, 24, 65, 80, 118, 72, + 101, 88, 119, 8, 6, 7, 0, 11, 68, 72, 85, 99, + 87, 86, 88, 74, 84, 104, 99, 75, 107, 94, 107, + 109, 114, 115, 65, 76, 70, 70, 4, 9, 68, 73, + 87, 110, 74, 35, 34, 116, 9, 4, 5, 4, 3, 65, + 77, 70, 0, 10, 6, 2, 66, 67, 4, 70, 5, 1, 20, + 34, 29, 19, 14, 19, 12, 17, 21, 65, 22, 18, + 27, 89, 17, 33, 30, 62, 62, 44, 56, 62, 62, + 55, 62, 62, 62, 62, 62, 59, 46, 59, 62, 62, + 62, 62, 62, 57, 48, 62, 54, 62, 21, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 60, 55, 46, 38, + 32, 15, 75, 72, 79, 25, 28, 24, 28, 14, 16, + 14, 31, 5, 67, 3, 75, 69, 113, 106, 67, 4, 10, + 64, 74, 77, 81, 84, 118, 95, 25, 12, 4, 72, + 73, 86, 100, 104, 115, 73, 22, 14, 9, 8, 68, + 67, 68, 72, 83, 66, 32, 31, 30, 10, 9, 66, 73, + 87, 11, 58, 53, 51, 40, 26, 3, 71, 69, 77, 62, + 120, 118, 105, 119, 117, 100, 114, 110, 97, + 110, 109, 113, 92, 96, 93, 106, 107, 87, 79, + 81, 82, 79, 71, 65, 67, 5, 69, 69, 70, 67, 65, + 73, 0, 2, 65, 0, 6, 64, 65, 1, 10, 65, 70, 74, + 69, 69, 0, 22, 16, 24, 24, 19, 20, 15, 19, 19, + 4, 13, 19, 66, 9, 62, 62, 60, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 46, 62, 62, 62, + 62, 62, 62, 62, 54, 54, 42, 30, 21, 5, 67, 62, + 62, 62, 62, 62, 62, 62, 62, 62, 62, 57, 57, + 36, 26, 13, 12, 12, 12, 36, 36, 36, 35, 29, + 32, 23, 17, 18, 9, 73, 4, 74, 85, 25, 9, 4, + 83, 79, 74, 79, 75, 68, 73, 80, 68, 64, 3, 64, + 1, 4, 4, 62, 62, 62, 56, 50, 44, 36, 20, 68, + 69, 39, 28, 22, 12, 9, 2, 64, 66, 80, 80, 75, + 70, 76, 69, 64, 69, 74, 64, 3, 6, 3, 6, 9, 5, + 62, 62, 62, 56, 50, 44, 36, 20, 68 }, + + { + + 62, + 9, 74, 62, 9, 74, 118, 96, 12, 10, 10, 32, 53, + 58, 55, 21, 108, 65, 69, 74, 10, 11, 69, 6, + 21, 36, 51, 44, 15, 77, 112, 96, 74, 69, 74, + 10, 70, 85, 11, 18, 68, 80, 92, 4, 75, 86, 99, + 7, 73, 83, 4, 80, 83, 93, 65, 76, 70, 85, 70, + 2, 22, 0, 0, 0, 80, 87, 97, 69, 18, 1, 44, 10, + 80, 114, 85, 90, 72, 87, 94, 89, 110, 91, 103, + 96, 115, 80, 87, 91, 90, 24, 65, 79, 116, 72, + 100, 88, 117, 8, 5, 6, 0, 11, 68, 71, 85, 98, + 86, 86, 87, 73, 83, 102, 97, 74, 105, 93, 102, + 105, 111, 112, 64, 76, 69, 69, 4, 9, 67, 73, + 86, 108, 74, 33, 32, 113, 9, 4, 5, 4, 3, 64, + 76, 69, 0, 10, 6, 2, 66, 66, 3, 69, 5, 0, 20, + 33, 29, 19, 14, 19, 12, 17, 20, 64, 21, 18, + 27, 89, 17, 32, 29, 62, 62, 43, 55, 62, 62, + 53, 62, 62, 62, 62, 61, 57, 44, 57, 62, 60, + 62, 62, 62, 55, 46, 62, 52, 62, 19, 62, 62, + 62, 62, 62, 62, 62, 62, 61, 58, 53, 44, 37, + 30, 13, 75, 72, 79, 24, 27, 23, 27, 13, 15, + 13, 29, 4, 68, 2, 76, 70, 112, 104, 66, 5, 10, + 64, 73, 77, 81, 83, 116, 94, 25, 12, 5, 71, + 72, 85, 99, 103, 113, 72, 23, 15, 10, 8, 67, + 66, 67, 72, 83, 66, 32, 31, 29, 10, 9, 66, 73, + 86, 11, 57, 52, 50, 39, 26, 3, 71, 69, 76, 62, + 119, 116, 103, 117, 116, 99, 112, 108, 96, + 108, 107, 111, 91, 95, 92, 105, 105, 87, 79, + 80, 82, 78, 71, 65, 67, 5, 69, 69, 71, 66, 65, + 72, 0, 2, 65, 0, 6, 64, 65, 1, 10, 65, 70, 74, + 69, 69, 64, 21, 16, 23, 23, 19, 19, 15, 19, + 18, 4, 13, 18, 66, 8, 62, 62, 59, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, 44, 62, 62, + 62, 62, 62, 62, 61, 52, 52, 40, 29, 19, 5, 68, + 62, 62, 62, 62, 62, 62, 62, 62, 62, 61, 55, + 54, 34, 24, 12, 12, 11, 10, 35, 34, 34, 33, + 27, 30, 21, 16, 17, 8, 73, 3, 75, 86, 24, 8, + 3, 83, 79, 73, 78, 74, 67, 72, 79, 68, 64, 3, + 0, 2, 4, 4, 62, 62, 59, 53, 47, 40, 32, 16, + 71, 69, 39, 28, 22, 12, 9, 2, 0, 65, 79, 80, + 75, 69, 76, 68, 0, 69, 74, 64, 3, 6, 4, 6, 9, + 5, 62, 62, 59, 53, 47, 40, 32, 16, 71 }, + + { + + 62, + 9, 75, 62, 9, 75, 116, 95, 13, 10, 10, 30, 51, + 57, 55, 21, 107, 65, 68, 74, 10, 10, 68, 6, + 20, 34, 48, 40, 12, 78, 110, 95, 74, 68, 74, + 10, 71, 85, 11, 17, 68, 80, 92, 4, 75, 85, 98, + 7, 72, 82, 4, 79, 83, 93, 65, 76, 70, 85, 70, + 2, 22, 0, 0, 0, 79, 87, 97, 69, 18, 0, 44, 10, + 80, 112, 84, 89, 71, 84, 93, 88, 108, 90, 102, + 95, 113, 80, 87, 91, 90, 24, 65, 78, 113, 72, + 99, 87, 115, 7, 5, 6, 64, 12, 68, 71, 84, 98, + 86, 85, 86, 73, 82, 101, 96, 74, 104, 92, 97, + 100, 108, 109, 64, 76, 69, 69, 4, 9, 67, 72, + 86, 106, 73, 31, 30, 110, 9, 4, 5, 4, 4, 64, + 74, 68, 0, 10, 6, 2, 65, 65, 2, 69, 5, 0, 19, + 32, 28, 19, 13, 19, 12, 17, 18, 64, 20, 17, + 26, 89, 17, 32, 29, 62, 62, 42, 53, 62, 62, + 51, 62, 62, 62, 62, 57, 55, 43, 55, 62, 58, + 62, 62, 62, 52, 44, 62, 50, 62, 17, 62, 62, + 62, 62, 62, 62, 62, 62, 59, 56, 50, 42, 35, + 29, 12, 75, 72, 80, 23, 26, 22, 26, 12, 14, + 12, 27, 3, 68, 1, 77, 70, 112, 103, 65, 5, 10, + 64, 72, 76, 80, 83, 114, 93, 26, 12, 5, 70, + 71, 84, 97, 101, 111, 71, 23, 15, 10, 9, 66, + 66, 67, 72, 82, 66, 33, 31, 28, 10, 9, 66, 73, + 86, 10, 57, 52, 49, 38, 25, 3, 71, 69, 76, 62, + 118, 115, 102, 116, 114, 98, 110, 106, 95, + 107, 105, 109, 91, 94, 92, 104, 103, 86, 79, + 80, 81, 78, 71, 65, 67, 4, 69, 69, 71, 66, 64, + 71, 0, 2, 64, 1, 6, 0, 64, 1, 9, 65, 70, 74, + 69, 68, 65, 20, 16, 22, 22, 18, 19, 15, 18, + 18, 4, 12, 16, 67, 7, 62, 62, 58, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, 42, 62, 62, + 62, 62, 62, 62, 58, 50, 49, 38, 27, 18, 4, 69, + 62, 62, 62, 62, 62, 62, 62, 62, 61, 58, 52, + 51, 32, 23, 10, 11, 10, 9, 33, 33, 32, 31, 26, + 28, 19, 15, 15, 7, 74, 2, 76, 87, 22, 7, 2, + 83, 78, 73, 78, 73, 66, 72, 79, 67, 0, 3, 0, + 3, 4, 4, 62, 62, 57, 50, 44, 36, 28, 12, 74, + 69, 39, 28, 22, 12, 10, 3, 0, 65, 79, 79, 74, + 69, 75, 67, 1, 68, 73, 64, 3, 6, 4, 7, 9, 5, + 62, 62, 57, 50, 44, 36, 28, 12, 74 }, + + { + + 62, + 9, 75, 62, 9, 75, 114, 93, 13, 10, 9, 29, 49, + 56, 55, 21, 105, 65, 68, 73, 9, 10, 68, 5, 18, + 33, 46, 37, 10, 80, 107, 94, 74, 68, 73, 9, + 71, 84, 11, 17, 68, 79, 91, 4, 75, 85, 98, 7, + 72, 82, 4, 79, 82, 92, 65, 76, 70, 85, 69, 2, + 22, 0, 0, 0, 79, 87, 97, 68, 17, 0, 43, 9, 80, + 109, 83, 88, 69, 82, 91, 86, 107, 88, 100, 94, + 111, 80, 86, 90, 90, 24, 65, 77, 111, 72, 98, + 87, 113, 7, 4, 5, 64, 12, 68, 70, 84, 97, 85, + 85, 85, 72, 81, 99, 94, 73, 103, 91, 92, 96, + 105, 106, 64, 75, 69, 68, 4, 9, 66, 72, 85, + 104, 73, 29, 28, 107, 9, 5, 5, 4, 4, 0, 73, + 67, 1, 9, 6, 2, 65, 65, 1, 69, 5, 64, 19, 31, + 28, 18, 13, 19, 12, 17, 17, 64, 19, 16, 26, + 89, 17, 31, 28, 60, 62, 41, 51, 62, 62, 49, + 62, 61, 62, 62, 54, 53, 41, 52, 62, 55, 62, + 62, 62, 49, 42, 62, 48, 62, 16, 62, 62, 62, + 62, 62, 62, 62, 62, 57, 53, 48, 40, 33, 27, + 10, 75, 72, 80, 22, 25, 21, 24, 11, 13, 11, + 26, 2, 69, 0, 77, 71, 111, 101, 64, 6, 10, 0, + 72, 76, 80, 82, 112, 91, 26, 13, 6, 70, 70, + 83, 96, 100, 109, 71, 24, 16, 11, 9, 65, 65, + 67, 71, 82, 66, 33, 31, 28, 9, 9, 66, 73, 85, + 10, 56, 51, 48, 37, 25, 3, 70, 69, 76, 62, + 116, 113, 101, 114, 113, 97, 109, 105, 94, + 105, 104, 107, 90, 93, 91, 103, 101, 86, 79, + 80, 81, 77, 71, 66, 67, 4, 69, 69, 72, 65, 64, + 70, 0, 2, 64, 1, 6, 0, 64, 1, 9, 65, 70, 74, + 69, 68, 66, 19, 16, 21, 22, 17, 18, 15, 17, + 17, 4, 12, 15, 67, 6, 61, 62, 57, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, 40, 62, 62, + 62, 62, 62, 62, 56, 48, 47, 36, 25, 16, 3, 69, + 62, 62, 62, 62, 62, 62, 62, 62, 59, 56, 50, + 48, 30, 21, 9, 10, 10, 7, 32, 31, 31, 29, 24, + 26, 18, 14, 14, 6, 75, 0, 77, 87, 21, 5, 0, + 82, 78, 72, 77, 72, 65, 72, 78, 67, 0, 3, 1, + 4, 4, 4, 62, 62, 54, 47, 40, 32, 24, 8, 77, + 68, 39, 28, 22, 12, 10, 3, 0, 65, 78, 79, 74, + 68, 75, 66, 2, 68, 73, 0, 4, 6, 4, 7, 9, 5, + 62, 62, 54, 47, 40, 32, 24, 8, 77 }, + + { + + 62, + 8, 75, 62, 8, 75, 113, 92, 13, 10, 9, 27, 46, + 55, 55, 20, 103, 66, 68, 72, 9, 9, 68, 4, 16, + 32, 43, 33, 7, 82, 104, 93, 74, 68, 72, 9, 72, + 83, 11, 16, 68, 79, 91, 3, 76, 85, 98, 7, 72, + 82, 4, 79, 82, 92, 65, 76, 70, 85, 69, 2, 22, + 0, 0, 0, 78, 88, 97, 68, 16, 0, 42, 9, 81, + 107, 82, 87, 68, 80, 90, 85, 105, 87, 99, 93, + 109, 80, 86, 90, 90, 24, 65, 76, 109, 72, 98, + 86, 111, 7, 4, 5, 65, 12, 68, 70, 83, 96, 84, + 84, 85, 72, 81, 98, 93, 73, 102, 90, 88, 92, + 102, 104, 64, 75, 69, 68, 3, 9, 66, 72, 85, + 102, 73, 27, 26, 105, 9, 5, 5, 4, 4, 0, 71, + 67, 1, 9, 5, 2, 64, 64, 64, 69, 5, 64, 18, 29, + 27, 18, 12, 19, 12, 16, 16, 64, 18, 15, 25, + 89, 16, 30, 27, 58, 62, 39, 49, 62, 62, 46, + 62, 59, 62, 62, 50, 51, 39, 50, 62, 53, 62, + 62, 62, 46, 40, 62, 46, 62, 14, 62, 62, 62, + 62, 62, 62, 62, 60, 55, 51, 46, 38, 31, 25, 8, + 75, 73, 81, 21, 23, 20, 23, 10, 11, 9, 24, 1, + 69, 64, 78, 72, 111, 100, 0, 6, 10, 0, 71, 75, + 79, 82, 110, 90, 26, 13, 6, 69, 69, 82, 94, + 98, 108, 70, 24, 16, 11, 10, 64, 65, 67, 71, + 81, 67, 34, 31, 27, 9, 9, 66, 73, 85, 10, 56, + 50, 47, 36, 25, 3, 70, 69, 76, 62, 115, 112, + 100, 113, 111, 96, 107, 103, 93, 104, 102, + 105, 90, 93, 91, 102, 100, 86, 79, 80, 80, 77, + 71, 66, 67, 3, 69, 69, 72, 65, 64, 69, 0, 1, + 64, 1, 5, 0, 64, 1, 8, 65, 70, 74, 69, 67, 67, + 18, 16, 19, 21, 16, 17, 14, 16, 16, 4, 12, 14, + 67, 4, 60, 60, 56, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 60, 38, 62, 62, 62, 62, 62, 62, + 53, 45, 44, 34, 23, 15, 2, 70, 62, 62, 62, 62, + 62, 62, 62, 62, 56, 53, 47, 45, 28, 19, 7, 9, + 9, 5, 30, 30, 29, 27, 22, 24, 16, 12, 12, 4, + 76, 64, 78, 88, 19, 4, 64, 82, 78, 72, 77, 71, + 64, 72, 78, 66, 1, 3, 1, 4, 4, 3, 62, 60, 51, + 44, 37, 28, 19, 3, 80, 68, 39, 28, 22, 12, 11, + 3, 0, 65, 78, 79, 74, 68, 75, 66, 2, 68, 73, + 0, 4, 6, 4, 8, 9, 4, 62, 60, 51, 44, 37, 28, + 19, 3, 80 }, + + { + + 62, + 8, 75, 62, 8, 75, 111, 91, 14, 10, 9, 26, 44, + 54, 56, 20, 101, 66, 67, 71, 9, 8, 68, 4, 15, + 31, 41, 29, 4, 83, 101, 92, 73, 67, 71, 9, 72, + 82, 11, 16, 67, 79, 90, 3, 76, 85, 98, 7, 72, + 81, 4, 79, 82, 92, 65, 76, 70, 84, 69, 2, 22, + 0, 0, 0, 77, 88, 97, 68, 15, 0, 41, 9, 81, + 105, 80, 86, 66, 78, 88, 84, 103, 85, 98, 91, + 106, 80, 86, 90, 89, 24, 65, 75, 107, 71, 97, + 85, 109, 7, 4, 5, 65, 12, 68, 70, 82, 95, 83, + 83, 84, 71, 80, 97, 91, 72, 100, 89, 83, 87, + 98, 101, 0, 75, 68, 67, 3, 9, 66, 71, 84, 99, + 73, 25, 25, 102, 9, 5, 5, 4, 4, 1, 69, 66, 1, + 9, 5, 2, 0, 0, 65, 68, 5, 64, 17, 28, 26, 18, + 11, 19, 12, 16, 15, 0, 17, 15, 24, 89, 16, 30, + 27, 56, 62, 38, 48, 62, 62, 44, 60, 57, 62, + 62, 47, 49, 37, 48, 62, 51, 62, 62, 62, 44, + 38, 62, 44, 62, 12, 62, 62, 62, 62, 62, 62, + 60, 58, 53, 49, 44, 37, 30, 24, 6, 75, 73, 81, + 21, 22, 19, 22, 9, 10, 8, 22, 0, 69, 65, 79, + 72, 110, 99, 1, 6, 10, 0, 70, 74, 78, 81, 107, + 89, 26, 13, 6, 68, 68, 81, 92, 96, 106, 69, + 25, 17, 12, 11, 0, 65, 66, 71, 80, 67, 35, 31, + 26, 9, 10, 65, 73, 84, 10, 56, 50, 46, 35, 25, + 3, 70, 69, 75, 62, 114, 111, 98, 111, 109, 95, + 105, 101, 92, 102, 100, 103, 89, 92, 90, 101, + 98, 85, 78, 79, 79, 76, 71, 66, 67, 2, 69, 69, + 72, 65, 0, 68, 1, 1, 0, 1, 5, 0, 64, 1, 7, 65, + 69, 73, 69, 66, 67, 17, 16, 18, 20, 16, 17, + 14, 16, 15, 4, 12, 13, 67, 3, 59, 59, 56, 61, + 62, 62, 62, 62, 62, 62, 62, 62, 62, 57, 36, + 62, 62, 62, 62, 62, 62, 50, 43, 42, 33, 22, + 14, 2, 71, 62, 62, 62, 62, 62, 62, 62, 62, 54, + 51, 45, 43, 26, 17, 5, 9, 8, 4, 29, 29, 27, + 25, 21, 23, 14, 11, 10, 3, 76, 65, 78, 89, 17, + 3, 65, 82, 77, 71, 77, 70, 1, 71, 77, 65, 2, + 3, 2, 5, 4, 3, 62, 58, 49, 41, 34, 24, 15, 64, + 83, 68, 39, 28, 23, 13, 12, 4, 1, 64, 78, 79, + 74, 68, 74, 65, 3, 68, 72, 0, 4, 6, 5, 9, 9, + 4, 62, 58, 49, 41, 34, 24, 15, 64, 83 }, + + { + + 62, + 8, 75, 62, 8, 75, 109, 89, 14, 10, 8, 25, 42, + 53, 56, 20, 99, 66, 67, 70, 8, 8, 68, 3, 13, + 30, 38, 26, 2, 85, 98, 91, 73, 67, 70, 8, 72, + 81, 11, 15, 67, 78, 89, 3, 76, 85, 98, 7, 72, + 81, 4, 79, 81, 91, 65, 76, 70, 84, 68, 2, 22, + 0, 0, 0, 77, 88, 97, 67, 14, 0, 40, 8, 81, + 102, 79, 85, 64, 76, 87, 82, 102, 84, 96, 90, + 104, 80, 85, 89, 89, 24, 65, 74, 105, 71, 96, + 85, 107, 7, 3, 4, 66, 12, 68, 69, 82, 94, 82, + 83, 83, 71, 79, 95, 90, 71, 99, 88, 78, 83, + 95, 98, 0, 74, 68, 67, 3, 9, 65, 71, 84, 97, + 73, 23, 23, 99, 9, 6, 5, 4, 4, 1, 68, 65, 2, + 8, 5, 2, 0, 0, 66, 68, 5, 65, 17, 27, 26, 17, + 11, 19, 12, 16, 14, 0, 16, 14, 24, 89, 16, 29, + 26, 54, 62, 37, 46, 62, 62, 42, 57, 55, 62, + 62, 43, 47, 35, 45, 61, 48, 62, 62, 62, 41, + 36, 58, 42, 62, 11, 62, 62, 62, 62, 62, 60, + 58, 56, 51, 46, 42, 35, 28, 22, 4, 75, 73, 82, + 20, 21, 18, 20, 8, 9, 7, 21, 64, 70, 66, 79, + 73, 109, 97, 2, 7, 10, 1, 70, 74, 78, 80, 105, + 87, 26, 14, 7, 68, 67, 80, 91, 95, 104, 69, + 26, 18, 13, 11, 1, 64, 66, 70, 80, 67, 35, 31, + 26, 8, 10, 65, 73, 84, 10, 55, 49, 45, 34, 25, + 3, 69, 69, 75, 62, 112, 109, 97, 109, 108, 94, + 104, 100, 91, 100, 99, 101, 88, 91, 90, 100, + 96, 85, 78, 79, 79, 76, 71, 67, 67, 2, 69, 69, + 73, 64, 0, 67, 1, 1, 0, 1, 5, 0, 64, 1, 7, 65, + 69, 73, 69, 66, 68, 16, 16, 17, 20, 15, 16, + 14, 15, 14, 4, 12, 12, 67, 2, 58, 58, 55, 59, + 60, 62, 62, 62, 62, 62, 62, 62, 62, 55, 34, + 62, 62, 62, 62, 62, 62, 48, 41, 39, 31, 20, + 12, 1, 71, 62, 62, 62, 62, 62, 62, 62, 62, 52, + 48, 43, 40, 24, 15, 4, 8, 8, 2, 28, 27, 26, + 23, 19, 21, 13, 10, 9, 2, 77, 67, 79, 89, 16, + 1, 67, 81, 77, 70, 76, 69, 2, 71, 76, 65, 2, + 3, 2, 6, 4, 3, 62, 56, 46, 38, 30, 20, 11, 68, + 86, 67, 39, 28, 23, 13, 12, 4, 1, 64, 77, 79, + 74, 67, 74, 64, 4, 68, 72, 1, 5, 6, 5, 9, 9, + 4, 62, 56, 46, 38, 30, 20, 11, 68, 86 }, + + { + + 62, + 8, 76, 62, 8, 76, 107, 88, 15, 10, 8, 23, 40, + 52, 56, 20, 98, 66, 66, 70, 8, 7, 67, 3, 12, + 28, 36, 22, 64, 86, 96, 90, 73, 66, 70, 8, 73, + 81, 11, 15, 67, 78, 89, 3, 76, 84, 97, 7, 71, + 80, 4, 78, 81, 91, 65, 76, 70, 84, 68, 2, 22, + 0, 0, 0, 76, 88, 97, 67, 14, 64, 40, 8, 81, + 100, 78, 84, 0, 73, 85, 81, 100, 82, 95, 89, + 102, 80, 85, 89, 89, 24, 65, 73, 102, 71, 95, + 84, 105, 6, 3, 4, 66, 13, 68, 69, 81, 94, 82, + 82, 82, 70, 78, 94, 88, 71, 98, 87, 73, 78, + 92, 95, 0, 74, 68, 66, 3, 9, 65, 70, 83, 95, + 72, 21, 21, 96, 9, 6, 5, 4, 5, 2, 66, 64, 2, + 8, 5, 2, 1, 1, 67, 68, 5, 65, 16, 26, 25, 17, + 10, 19, 12, 16, 12, 0, 15, 13, 23, 89, 16, 29, + 26, 52, 62, 36, 44, 61, 62, 40, 55, 53, 62, + 62, 40, 45, 34, 43, 57, 46, 62, 62, 62, 38, + 34, 55, 40, 62, 9, 62, 62, 62, 62, 62, 58, 55, + 54, 49, 44, 39, 33, 26, 21, 3, 75, 73, 82, 19, + 20, 17, 19, 7, 8, 6, 19, 65, 70, 67, 80, 73, + 109, 96, 3, 7, 10, 1, 69, 73, 77, 80, 103, 86, + 27, 14, 7, 67, 66, 79, 89, 93, 102, 68, 26, + 18, 13, 12, 2, 64, 66, 70, 79, 67, 36, 31, 25, + 8, 10, 65, 73, 83, 9, 55, 49, 44, 33, 24, 3, + 69, 69, 75, 62, 111, 108, 96, 108, 106, 93, + 102, 98, 90, 99, 97, 99, 88, 90, 89, 99, 94, + 84, 78, 79, 78, 75, 71, 67, 67, 1, 69, 69, 73, + 64, 1, 66, 1, 1, 1, 2, 5, 1, 0, 1, 6, 65, 69, + 73, 69, 65, 69, 15, 16, 16, 19, 14, 16, 14, + 14, 14, 4, 11, 10, 68, 1, 56, 57, 54, 58, 58, + 62, 62, 62, 62, 62, 62, 62, 62, 52, 32, 62, + 62, 62, 62, 62, 62, 45, 39, 37, 29, 18, 11, 0, + 72, 62, 62, 62, 62, 62, 62, 60, 59, 49, 46, + 40, 37, 22, 14, 2, 7, 7, 1, 26, 26, 24, 21, + 18, 19, 11, 9, 7, 1, 78, 68, 80, 90, 14, 0, + 68, 81, 76, 70, 76, 68, 3, 71, 76, 64, 3, 3, + 3, 7, 4, 3, 62, 54, 44, 35, 27, 16, 7, 72, 89, + 67, 39, 28, 23, 13, 13, 5, 1, 64, 77, 78, 73, + 67, 73, 0, 5, 67, 71, 1, 5, 6, 5, 10, 9, 4, + 62, 54, 44, 35, 27, 16, 7, 72, 89 }, + + { + + 62, + 8, 76, 62, 8, 76, 106, 86, 15, 10, 7, 22, 38, + 51, 56, 19, 96, 66, 66, 69, 8, 7, 67, 2, 10, + 27, 33, 19, 66, 88, 93, 89, 73, 66, 69, 8, 73, + 80, 11, 14, 67, 78, 88, 3, 76, 84, 97, 7, 71, + 80, 4, 78, 80, 91, 65, 76, 70, 84, 68, 2, 22, + 0, 0, 0, 76, 89, 97, 66, 13, 64, 39, 7, 81, + 97, 77, 83, 2, 71, 84, 79, 98, 81, 94, 88, + 100, 80, 85, 88, 89, 24, 65, 72, 100, 71, 95, + 84, 103, 6, 2, 3, 67, 13, 68, 68, 81, 93, 81, + 82, 81, 70, 78, 92, 87, 70, 97, 86, 68, 74, + 89, 92, 0, 74, 68, 66, 3, 9, 64, 70, 83, 93, + 72, 19, 19, 94, 9, 6, 5, 4, 5, 2, 65, 0, 2, 8, + 5, 2, 1, 2, 68, 68, 5, 66, 16, 25, 25, 17, 10, + 19, 12, 16, 11, 0, 14, 12, 23, 89, 15, 28, 25, + 50, 62, 35, 42, 59, 60, 38, 52, 51, 62, 62, + 36, 43, 32, 41, 54, 43, 58, 62, 62, 35, 32, + 51, 38, 62, 7, 62, 62, 62, 62, 62, 56, 53, 52, + 47, 42, 37, 31, 24, 19, 1, 75, 73, 83, 18, 19, + 16, 18, 6, 6, 5, 17, 66, 71, 68, 81, 74, 108, + 94, 4, 8, 10, 1, 68, 73, 77, 79, 101, 85, 27, + 14, 8, 66, 65, 78, 88, 92, 101, 67, 27, 19, + 14, 12, 3, 0, 66, 70, 79, 67, 36, 31, 24, 8, + 10, 65, 73, 83, 9, 54, 48, 43, 32, 24, 3, 69, + 69, 75, 62, 110, 106, 95, 106, 105, 92, 100, + 96, 89, 97, 95, 97, 87, 89, 89, 98, 93, 84, + 78, 79, 78, 75, 71, 67, 67, 1, 69, 69, 74, 0, + 1, 65, 1, 1, 1, 2, 5, 1, 0, 1, 6, 65, 69, 73, + 69, 65, 70, 14, 16, 15, 18, 13, 15, 14, 13, + 13, 4, 11, 9, 68, 0, 55, 56, 53, 56, 56, 62, + 61, 62, 62, 62, 62, 62, 61, 50, 30, 62, 62, + 62, 62, 62, 59, 43, 36, 34, 27, 16, 9, 64, 73, + 62, 62, 62, 62, 62, 62, 57, 56, 47, 43, 38, + 34, 20, 12, 1, 6, 6, 64, 25, 24, 22, 19, 16, + 17, 9, 8, 6, 0, 79, 69, 81, 91, 13, 64, 69, + 81, 76, 69, 75, 67, 4, 71, 75, 64, 3, 3, 3, 8, + 4, 3, 61, 52, 41, 32, 24, 12, 2, 76, 92, 67, + 39, 28, 23, 13, 13, 5, 1, 64, 76, 78, 73, 66, + 73, 0, 6, 67, 71, 1, 5, 6, 5, 10, 9, 4, 61, + 52, 41, 32, 24, 12, 2, 76, 92 }, + + { + + 62, + 8, 76, 62, 8, 76, 104, 85, 15, 10, 7, 21, 36, + 50, 56, 19, 94, 66, 66, 68, 7, 6, 67, 1, 8, + 26, 31, 15, 69, 90, 90, 88, 72, 66, 68, 7, 73, + 79, 11, 14, 67, 77, 87, 3, 76, 84, 97, 7, 71, + 80, 4, 78, 80, 90, 65, 76, 70, 84, 67, 2, 22, + 0, 0, 0, 75, 89, 97, 66, 12, 64, 38, 7, 81, + 95, 76, 82, 4, 69, 82, 78, 97, 79, 92, 87, 97, + 80, 84, 88, 88, 24, 65, 71, 98, 71, 94, 83, + 101, 6, 2, 3, 67, 13, 68, 68, 80, 92, 80, 81, + 80, 69, 77, 91, 85, 69, 95, 85, 0, 70, 86, 89, + 1, 73, 67, 65, 3, 9, 64, 70, 82, 91, 72, 17, + 17, 91, 9, 7, 5, 4, 5, 3, 0, 1, 3, 7, 5, 2, 2, + 2, 69, 67, 5, 66, 15, 24, 24, 16, 9, 19, 12, + 16, 10, 1, 13, 12, 22, 89, 15, 27, 24, 48, 62, + 34, 41, 57, 58, 36, 50, 49, 62, 62, 33, 41, + 30, 38, 51, 41, 55, 62, 62, 33, 30, 48, 36, + 62, 6, 62, 62, 62, 61, 60, 54, 51, 50, 45, 39, + 35, 29, 23, 17, 64, 75, 73, 83, 17, 18, 15, + 16, 5, 5, 4, 16, 67, 71, 69, 81, 75, 107, 93, + 5, 8, 10, 2, 68, 72, 76, 78, 99, 83, 27, 15, + 8, 66, 64, 77, 86, 90, 99, 67, 28, 20, 15, 13, + 4, 0, 65, 69, 78, 67, 37, 31, 24, 7, 10, 65, + 73, 82, 9, 54, 47, 42, 31, 24, 3, 68, 69, 74, + 62, 108, 105, 93, 104, 103, 91, 99, 95, 88, + 95, 94, 95, 86, 88, 88, 97, 91, 84, 78, 78, + 77, 74, 71, 68, 67, 0, 69, 69, 74, 0, 1, 64, + 1, 1, 1, 2, 5, 1, 0, 1, 5, 65, 69, 73, 69, 64, + 71, 13, 16, 14, 18, 13, 14, 14, 13, 12, 4, 11, + 8, 68, 64, 54, 55, 52, 54, 54, 62, 59, 61, 62, + 59, 62, 62, 58, 47, 28, 62, 62, 62, 62, 59, + 56, 40, 34, 32, 25, 15, 8, 64, 73, 62, 62, 62, + 62, 59, 59, 55, 53, 45, 41, 36, 31, 18, 10, + 64, 6, 6, 66, 24, 23, 21, 17, 14, 15, 8, 7, 4, + 64, 79, 71, 82, 91, 11, 66, 71, 80, 76, 68, + 75, 66, 5, 70, 74, 0, 4, 3, 4, 9, 4, 3, 60, + 50, 38, 29, 20, 8, 65, 80, 95, 66, 39, 28, 23, + 13, 14, 5, 2, 0, 76, 78, 73, 66, 73, 1, 7, 67, + 71, 2, 6, 6, 6, 11, 9, 4, 60, 50, 38, 29, 20, + 8, 65, 80, 95 }, + + { + + 61, + 8, 76, 61, 8, 76, 102, 83, 16, 10, 6, 19, 34, + 49, 56, 19, 92, 66, 65, 67, 7, 6, 67, 1, 7, + 25, 28, 12, 71, 91, 87, 87, 72, 65, 67, 7, 74, + 78, 11, 13, 67, 77, 87, 3, 76, 84, 97, 7, 71, + 79, 4, 78, 79, 90, 65, 76, 70, 84, 67, 2, 22, + 0, 0, 0, 75, 89, 97, 65, 11, 64, 37, 6, 81, + 92, 75, 81, 5, 67, 81, 76, 95, 78, 91, 86, 95, + 80, 84, 87, 88, 24, 65, 70, 96, 71, 93, 83, + 99, 6, 1, 2, 68, 13, 68, 67, 80, 91, 79, 81, + 79, 69, 76, 89, 84, 69, 94, 84, 5, 65, 83, 86, + 1, 73, 67, 65, 3, 9, 0, 69, 82, 89, 72, 15, + 15, 88, 9, 7, 5, 4, 5, 3, 1, 2, 3, 7, 5, 2, 2, + 3, 70, 67, 5, 67, 15, 23, 24, 16, 9, 19, 12, + 16, 9, 1, 12, 11, 22, 89, 15, 27, 24, 46, 61, + 33, 39, 55, 55, 34, 47, 47, 62, 62, 29, 39, + 28, 36, 48, 38, 52, 61, 62, 30, 28, 44, 34, + 62, 4, 60, 62, 60, 58, 57, 52, 49, 48, 43, 37, + 33, 27, 21, 16, 66, 75, 73, 84, 16, 17, 14, + 15, 4, 4, 3, 14, 68, 72, 70, 82, 75, 107, 91, + 6, 9, 10, 2, 67, 72, 76, 78, 97, 82, 27, 15, + 9, 65, 0, 76, 85, 89, 97, 66, 28, 20, 15, 13, + 5, 1, 65, 69, 78, 67, 37, 31, 23, 7, 10, 65, + 73, 82, 9, 53, 47, 41, 30, 24, 3, 68, 69, 74, + 62, 107, 103, 92, 103, 102, 90, 97, 93, 87, + 94, 92, 93, 86, 87, 88, 96, 89, 83, 78, 78, + 77, 74, 71, 68, 67, 0, 69, 69, 75, 1, 2, 0, 1, + 1, 2, 2, 5, 1, 0, 1, 5, 65, 69, 73, 69, 64, + 72, 12, 16, 13, 17, 12, 14, 14, 12, 11, 4, 11, + 7, 68, 65, 53, 54, 51, 53, 52, 60, 57, 59, 59, + 57, 62, 60, 55, 45, 26, 62, 62, 62, 62, 55, + 53, 38, 32, 29, 23, 13, 6, 65, 74, 62, 62, 62, + 60, 56, 57, 52, 50, 42, 38, 33, 28, 16, 8, 65, + 5, 5, 67, 22, 21, 19, 15, 13, 13, 6, 6, 3, 65, + 80, 72, 83, 92, 10, 67, 72, 80, 75, 68, 74, + 65, 6, 70, 74, 0, 4, 3, 4, 10, 4, 3, 59, 48, + 36, 26, 17, 4, 69, 84, 98, 66, 39, 28, 23, 13, + 14, 6, 2, 0, 75, 78, 73, 65, 72, 2, 8, 67, 70, + 2, 6, 6, 6, 11, 9, 4, 59, 48, 36, 26, 17, 4, + 69, 84, 98 }, + + { + + 60, + 8, 76, 60, 8, 76, 100, 82, 16, 10, 6, 18, 32, + 48, 56, 19, 90, 66, 65, 66, 7, 5, 67, 0, 5, + 24, 26, 8, 74, 93, 84, 86, 72, 65, 66, 7, 74, + 77, 11, 13, 67, 77, 86, 3, 76, 84, 97, 7, 71, + 79, 4, 78, 79, 90, 65, 76, 70, 84, 67, 2, 22, + 0, 0, 0, 74, 89, 97, 65, 10, 64, 36, 6, 81, + 90, 74, 80, 7, 65, 79, 75, 93, 76, 90, 85, 93, + 80, 84, 87, 88, 24, 65, 69, 94, 71, 92, 82, + 97, 6, 1, 2, 68, 13, 68, 67, 79, 90, 78, 80, + 78, 68, 75, 88, 82, 68, 93, 83, 10, 2, 80, 83, + 1, 73, 67, 64, 3, 9, 0, 69, 81, 87, 72, 13, + 13, 85, 9, 7, 5, 4, 5, 4, 3, 3, 3, 7, 5, 2, 3, + 4, 71, 67, 5, 67, 14, 22, 23, 16, 8, 19, 12, + 16, 8, 1, 11, 10, 21, 89, 15, 26, 23, 44, 58, + 32, 37, 53, 53, 32, 45, 45, 62, 62, 26, 37, + 26, 34, 45, 36, 49, 57, 62, 27, 26, 41, 32, + 62, 2, 58, 62, 58, 56, 55, 50, 47, 46, 41, 35, + 31, 25, 19, 14, 68, 75, 73, 84, 15, 16, 13, + 14, 3, 3, 2, 12, 69, 72, 71, 83, 76, 106, 90, + 7, 9, 10, 2, 66, 71, 75, 77, 95, 81, 27, 15, + 9, 64, 1, 75, 83, 87, 95, 65, 29, 21, 16, 14, + 6, 1, 65, 69, 77, 67, 38, 31, 22, 7, 10, 65, + 73, 81, 9, 53, 46, 40, 29, 24, 3, 68, 69, 74, + 62, 106, 102, 91, 101, 100, 89, 95, 91, 86, + 92, 90, 91, 85, 86, 87, 95, 87, 83, 78, 78, + 76, 73, 71, 68, 67, 64, 69, 69, 75, 1, 2, 1, + 1, 1, 2, 2, 5, 1, 0, 1, 4, 65, 69, 73, 69, 0, + 73, 11, 16, 12, 16, 11, 13, 14, 11, 10, 4, 11, + 6, 68, 66, 52, 53, 50, 51, 50, 58, 55, 57, 57, + 54, 61, 57, 52, 42, 24, 62, 62, 62, 62, 52, + 50, 35, 30, 27, 21, 11, 5, 66, 75, 62, 62, 62, + 58, 53, 54, 50, 47, 40, 36, 31, 25, 14, 6, 67, + 4, 4, 69, 21, 20, 17, 13, 11, 11, 4, 5, 1, 66, + 81, 73, 84, 93, 8, 68, 73, 80, 75, 67, 74, 64, + 7, 70, 73, 1, 5, 3, 5, 11, 4, 3, 58, 46, 33, + 23, 14, 0, 73, 88, 101, 66, 39, 28, 23, 13, + 15, 6, 2, 0, 75, 78, 73, 65, 72, 3, 9, 67, 70, + 2, 6, 6, 6, 12, 9, 4, 58, 46, 33, 23, 14, 0, + 73, 88, 101 }, + + { + + 58, + 7, 77, 58, 7, 77, 99, 81, 16, 10, 5, 16, 29, + 47, 56, 18, 89, 67, 65, 66, 6, 4, 67, 64, 3, + 22, 23, 4, 77, 95, 82, 86, 72, 65, 66, 6, 75, + 77, 11, 12, 67, 77, 86, 2, 77, 84, 97, 6, 71, + 79, 4, 78, 79, 90, 65, 76, 71, 84, 67, 2, 22, + 0, 0, 0, 74, 90, 97, 65, 9, 65, 35, 5, 82, 88, + 73, 79, 8, 0, 78, 74, 92, 75, 89, 84, 91, 80, + 84, 87, 88, 24, 65, 69, 92, 71, 92, 82, 96, 5, + 0, 1, 69, 13, 68, 67, 79, 90, 78, 80, 78, 68, + 75, 87, 81, 68, 92, 82, 14, 6, 77, 81, 1, 73, + 67, 64, 2, 9, 0, 69, 81, 85, 72, 11, 11, 83, + 9, 7, 5, 4, 5, 4, 4, 3, 3, 6, 4, 2, 3, 4, 73, + 67, 5, 68, 13, 20, 22, 15, 7, 19, 12, 15, 6, + 1, 10, 9, 20, 89, 14, 25, 22, 41, 54, 30, 35, + 50, 50, 29, 42, 43, 55, 62, 22, 34, 24, 31, + 41, 33, 45, 52, 59, 24, 24, 37, 30, 62, 0, 55, + 59, 55, 53, 52, 47, 44, 43, 39, 32, 28, 23, + 17, 12, 70, 75, 74, 85, 14, 14, 11, 12, 1, 1, + 0, 10, 70, 73, 72, 84, 77, 106, 89, 7, 9, 10, + 2, 66, 71, 75, 77, 93, 80, 27, 15, 9, 64, 1, + 74, 82, 86, 94, 65, 29, 21, 16, 14, 7, 1, 65, + 69, 77, 68, 38, 30, 21, 6, 10, 65, 73, 81, 8, + 52, 45, 38, 28, 23, 3, 68, 69, 74, 62, 105, + 101, 90, 100, 99, 88, 94, 90, 85, 91, 89, 89, + 85, 86, 87, 94, 86, 83, 78, 78, 76, 73, 71, + 69, 68, 65, 69, 70, 76, 1, 2, 2, 1, 0, 2, 2, + 4, 1, 0, 1, 3, 65, 69, 73, 69, 0, 74, 10, 16, + 10, 15, 10, 12, 13, 10, 9, 4, 10, 4, 69, 68, + 50, 51, 49, 49, 48, 55, 52, 54, 54, 51, 58, + 54, 48, 39, 22, 62, 62, 61, 60, 48, 46, 32, + 27, 24, 19, 9, 3, 67, 76, 59, 60, 60, 55, 50, + 51, 47, 43, 37, 33, 28, 22, 12, 4, 69, 3, 3, + 71, 19, 18, 15, 10, 9, 9, 2, 3, 64, 68, 82, + 75, 85, 94, 6, 70, 75, 80, 75, 67, 74, 0, 8, + 70, 73, 1, 5, 3, 5, 11, 4, 2, 56, 44, 30, 19, + 10, 67, 78, 93, 104, 66, 39, 28, 23, 13, 15, + 6, 2, 0, 75, 78, 73, 65, 72, 3, 9, 67, 70, 2, + 6, 6, 6, 12, 8, 3, 56, 44, 30, 19, 10, 67, 78, + 93, 104 }, + + { + + 57, + 7, 77, 57, 7, 77, 97, 79, 17, 11, 5, 15, 27, + 46, 57, 18, 87, 67, 64, 65, 6, 4, 66, 64, 2, + 21, 21, 1, 79, 96, 79, 85, 71, 64, 65, 6, 75, + 76, 11, 12, 66, 76, 85, 2, 77, 83, 96, 6, 70, + 78, 4, 77, 78, 89, 64, 75, 71, 83, 66, 2, 22, + 0, 0, 0, 73, 90, 97, 64, 9, 65, 35, 5, 82, 85, + 71, 77, 10, 3, 76, 72, 90, 73, 87, 82, 88, 80, + 83, 86, 87, 24, 65, 68, 89, 70, 91, 81, 94, 5, + 0, 1, 69, 14, 68, 66, 78, 89, 77, 79, 77, 67, + 74, 85, 79, 67, 90, 80, 19, 11, 73, 78, 2, 72, + 66, 0, 2, 10, 1, 68, 80, 82, 71, 9, 10, 80, 9, + 8, 5, 5, 6, 5, 6, 4, 4, 6, 4, 2, 4, 5, 74, 66, + 5, 68, 13, 19, 22, 15, 7, 19, 12, 15, 5, 2, + 10, 9, 20, 89, 14, 25, 22, 39, 51, 29, 34, 48, + 48, 27, 40, 41, 49, 62, 19, 32, 23, 29, 38, + 31, 42, 48, 55, 22, 22, 34, 28, 62, 64, 53, + 57, 53, 51, 50, 45, 42, 41, 37, 30, 26, 22, + 16, 11, 71, 75, 74, 85, 14, 13, 10, 11, 0, 0, + 64, 9, 71, 73, 73, 84, 77, 105, 87, 8, 10, 10, + 3, 65, 70, 74, 76, 90, 78, 28, 16, 10, 0, 2, + 72, 80, 84, 92, 64, 30, 22, 17, 15, 8, 2, 64, + 68, 76, 68, 39, 30, 21, 6, 11, 64, 73, 80, 8, + 52, 45, 37, 27, 23, 4, 67, 68, 73, 62, 103, + 99, 88, 98, 97, 86, 92, 88, 83, 89, 87, 86, + 84, 85, 86, 92, 84, 82, 77, 77, 75, 72, 70, + 69, 68, 65, 69, 70, 76, 2, 3, 3, 2, 0, 3, 3, + 4, 2, 1, 1, 3, 64, 68, 72, 68, 1, 74, 9, 16, + 9, 15, 10, 12, 13, 10, 9, 4, 10, 3, 69, 69, + 49, 50, 49, 48, 47, 53, 50, 52, 52, 49, 56, + 52, 45, 37, 20, 61, 60, 57, 56, 45, 43, 30, + 25, 22, 18, 8, 2, 67, 76, 57, 58, 58, 53, 48, + 49, 45, 40, 35, 31, 26, 20, 11, 3, 70, 3, 3, + 72, 18, 17, 14, 8, 8, 8, 1, 2, 65, 69, 82, 76, + 85, 94, 5, 71, 76, 79, 74, 66, 73, 2, 10, 69, + 72, 2, 6, 4, 6, 12, 4, 2, 55, 42, 28, 16, 7, + 71, 82, 97, 106, 65, 39, 29, 24, 14, 16, 7, 3, + 1, 74, 77, 72, 64, 71, 4, 10, 66, 69, 3, 7, 6, + 7, 13, 8, 3, 55, 42, 28, 16, 7, 71, 82, 97, + 106 }, + + { + + 56, + 7, 77, 56, 7, 77, 95, 78, 17, 11, 5, 14, 25, + 45, 57, 18, 85, 67, 64, 64, 6, 3, 66, 65, 0, + 20, 18, 66, 82, 98, 76, 84, 71, 64, 64, 6, 75, + 75, 11, 11, 66, 76, 84, 2, 77, 83, 96, 6, 70, + 78, 4, 77, 78, 89, 64, 75, 71, 83, 66, 2, 22, + 0, 0, 0, 72, 90, 97, 64, 8, 65, 34, 5, 82, 83, + 70, 76, 12, 5, 75, 71, 88, 72, 86, 81, 86, 80, + 83, 86, 87, 24, 65, 67, 87, 70, 90, 80, 92, 5, + 0, 1, 70, 14, 68, 66, 77, 88, 76, 78, 76, 67, + 73, 84, 78, 66, 89, 79, 24, 15, 70, 75, 2, 72, + 66, 0, 2, 10, 1, 68, 80, 80, 71, 7, 8, 77, 9, + 8, 5, 5, 6, 5, 8, 5, 4, 6, 4, 2, 5, 6, 75, 66, + 5, 68, 12, 18, 21, 15, 6, 19, 12, 15, 4, 2, 9, + 8, 19, 89, 14, 24, 21, 37, 48, 28, 32, 46, 46, + 25, 38, 39, 43, 62, 15, 30, 21, 27, 35, 29, + 39, 44, 51, 19, 20, 31, 26, 62, 66, 51, 55, + 51, 49, 48, 43, 40, 39, 35, 28, 24, 20, 14, 9, + 73, 75, 74, 86, 13, 12, 9, 10, 64, 64, 65, 7, + 72, 73, 74, 85, 78, 104, 86, 9, 10, 10, 3, 64, + 69, 73, 75, 88, 77, 28, 16, 10, 1, 3, 71, 78, + 82, 90, 0, 31, 23, 18, 16, 9, 2, 64, 68, 75, + 68, 40, 30, 20, 6, 11, 64, 73, 80, 8, 52, 44, + 36, 26, 23, 4, 67, 68, 73, 62, 102, 98, 87, + 96, 95, 85, 90, 86, 82, 87, 85, 84, 83, 84, + 86, 91, 82, 82, 77, 77, 74, 72, 70, 69, 68, + 66, 69, 70, 76, 2, 3, 4, 2, 0, 3, 3, 4, 2, 1, + 1, 2, 64, 68, 72, 68, 2, 75, 8, 16, 8, 14, 9, + 11, 13, 9, 8, 4, 10, 2, 69, 70, 48, 49, 48, + 46, 45, 51, 48, 50, 50, 46, 53, 49, 42, 34, + 18, 57, 56, 53, 51, 42, 40, 27, 23, 19, 16, 6, + 1, 68, 77, 55, 56, 55, 51, 45, 46, 42, 37, 33, + 28, 24, 17, 9, 1, 72, 2, 2, 74, 17, 16, 12, 6, + 6, 6, 64, 1, 67, 70, 83, 77, 86, 95, 3, 72, + 77, 79, 74, 65, 73, 3, 11, 69, 71, 3, 7, 4, 6, + 13, 4, 2, 54, 40, 25, 13, 4, 75, 86, 101, 109, + 65, 39, 29, 24, 14, 17, 7, 3, 1, 74, 77, 72, + 64, 71, 5, 11, 66, 69, 3, 7, 6, 7, 14, 8, 3, + 54, 40, 25, 13, 4, 75, 86, 101, 109 }, + + { + + 55, + 7, 77, 55, 7, 77, 93, 76, 18, 11, 4, 12, 23, + 44, 57, 18, 83, 67, 0, 0, 6, 3, 66, 65, 64, + 19, 16, 69, 84, 99, 73, 83, 71, 0, 0, 6, 76, + 74, 11, 11, 66, 76, 84, 2, 77, 83, 96, 6, 70, + 77, 4, 77, 77, 89, 64, 75, 71, 83, 66, 2, 22, + 0, 0, 0, 72, 90, 97, 0, 7, 65, 33, 4, 82, 80, + 69, 75, 13, 7, 73, 69, 86, 70, 85, 80, 84, 80, + 83, 85, 87, 24, 65, 66, 85, 70, 89, 80, 90, 5, + 64, 0, 70, 14, 68, 65, 77, 87, 75, 78, 75, 66, + 72, 82, 76, 66, 88, 78, 29, 20, 67, 72, 2, 72, + 66, 1, 2, 10, 2, 67, 79, 78, 71, 5, 6, 74, 9, + 8, 5, 5, 6, 6, 9, 6, 4, 6, 4, 2, 5, 7, 76, 66, + 5, 69, 12, 17, 21, 15, 6, 19, 12, 15, 3, 2, 8, + 7, 19, 89, 14, 24, 21, 35, 45, 27, 30, 44, 43, + 23, 35, 37, 36, 62, 12, 28, 19, 25, 32, 26, + 36, 40, 47, 16, 18, 27, 24, 62, 68, 49, 53, + 49, 46, 45, 41, 38, 37, 33, 26, 22, 18, 12, 8, + 75, 75, 74, 86, 12, 11, 8, 9, 65, 65, 66, 5, + 73, 74, 75, 86, 78, 104, 84, 10, 11, 10, 3, 0, + 69, 73, 75, 86, 76, 28, 16, 11, 2, 4, 70, 77, + 81, 88, 1, 31, 23, 18, 16, 10, 3, 64, 68, 75, + 68, 40, 30, 19, 6, 11, 64, 73, 79, 8, 51, 44, + 35, 25, 23, 4, 67, 68, 73, 62, 101, 96, 86, + 95, 94, 84, 88, 84, 81, 86, 83, 82, 83, 83, + 85, 90, 80, 81, 77, 77, 74, 71, 70, 69, 68, + 66, 69, 70, 77, 3, 4, 5, 2, 0, 4, 3, 4, 2, 1, + 1, 2, 64, 68, 72, 68, 2, 76, 7, 16, 7, 13, 8, + 11, 13, 8, 7, 4, 10, 1, 69, 71, 47, 48, 47, + 45, 43, 49, 46, 48, 47, 44, 50, 46, 39, 32, + 16, 53, 52, 49, 46, 38, 37, 25, 21, 17, 14, 4, + 64, 69, 78, 53, 53, 53, 48, 42, 44, 40, 34, + 30, 26, 21, 14, 7, 64, 73, 1, 1, 75, 15, 14, + 10, 4, 5, 4, 66, 0, 68, 71, 84, 78, 87, 96, 2, + 73, 78, 79, 73, 65, 72, 4, 12, 69, 71, 3, 7, + 4, 7, 14, 4, 2, 53, 38, 23, 10, 1, 79, 90, + 105, 112, 65, 39, 29, 24, 14, 17, 8, 3, 1, 73, + 77, 72, 0, 70, 6, 12, 66, 68, 3, 7, 6, 7, 14, + 8, 3, 53, 38, 23, 10, 1, 79, 90, 105, 112 }, + + { + + 53, + 7, 77, 53, 7, 77, 92, 75, 18, 11, 4, 11, 21, + 43, 57, 17, 81, 67, 0, 1, 5, 2, 66, 66, 66, + 18, 13, 73, 87, 101, 70, 82, 71, 0, 1, 5, 76, + 73, 11, 10, 66, 75, 83, 2, 77, 83, 96, 6, 70, + 77, 4, 77, 77, 88, 64, 75, 71, 83, 65, 2, 22, + 0, 0, 0, 71, 91, 97, 0, 6, 65, 32, 4, 82, 78, + 68, 74, 15, 9, 72, 68, 85, 69, 83, 79, 82, 80, + 82, 85, 87, 24, 65, 65, 83, 70, 89, 79, 88, 5, + 64, 0, 71, 14, 68, 65, 76, 86, 74, 77, 74, 66, + 72, 81, 75, 65, 87, 77, 34, 24, 64, 69, 2, 71, + 66, 1, 2, 10, 2, 67, 79, 76, 71, 3, 4, 72, 9, + 9, 5, 5, 6, 6, 11, 7, 5, 5, 4, 2, 6, 7, 77, + 66, 5, 69, 11, 16, 20, 14, 5, 19, 12, 15, 2, + 2, 7, 6, 18, 89, 13, 23, 20, 33, 41, 26, 28, + 42, 41, 21, 33, 35, 30, 62, 8, 26, 17, 22, 29, + 24, 32, 35, 43, 13, 16, 24, 22, 62, 69, 47, + 51, 46, 44, 43, 39, 36, 35, 31, 23, 20, 16, + 10, 6, 77, 75, 74, 87, 11, 10, 7, 7, 66, 67, + 67, 4, 74, 74, 76, 86, 79, 103, 83, 11, 11, + 10, 4, 0, 68, 72, 74, 84, 74, 28, 17, 11, 2, + 5, 69, 75, 79, 87, 1, 32, 24, 19, 17, 11, 3, + 64, 67, 74, 68, 41, 30, 19, 5, 11, 64, 73, 79, + 8, 51, 43, 34, 24, 23, 4, 66, 68, 73, 62, 99, + 95, 85, 93, 92, 83, 87, 83, 80, 84, 82, 80, + 82, 82, 85, 89, 79, 81, 77, 77, 73, 71, 70, + 70, 68, 67, 69, 70, 77, 3, 4, 6, 2, 0, 4, 3, + 4, 2, 1, 1, 1, 64, 68, 72, 68, 3, 77, 6, 16, + 6, 13, 7, 10, 13, 7, 6, 4, 10, 0, 69, 72, 46, + 47, 46, 43, 41, 47, 44, 45, 45, 41, 47, 43, + 36, 29, 14, 48, 48, 45, 41, 35, 33, 22, 18, + 14, 12, 2, 65, 70, 78, 50, 51, 50, 46, 39, 41, + 37, 31, 28, 23, 19, 11, 5, 66, 75, 0, 1, 77, + 14, 13, 9, 2, 3, 2, 67, 64, 70, 72, 85, 80, + 88, 96, 0, 75, 80, 78, 73, 64, 72, 5, 13, 69, + 70, 4, 8, 4, 7, 15, 4, 2, 52, 36, 20, 7, 66, + 83, 95, 109, 115, 64, 39, 29, 24, 14, 18, 8, + 3, 1, 73, 77, 72, 0, 70, 6, 13, 66, 68, 4, 8, + 6, 7, 15, 8, 3, 52, 36, 20, 7, 66, 83, 95, + 109, 115 }, + + { + + 52, + 7, 77, 52, 7, 77, 90, 73, 18, 11, 3, 10, 19, + 42, 57, 17, 79, 67, 0, 2, 5, 2, 66, 67, 68, + 17, 11, 76, 89, 103, 67, 81, 70, 0, 2, 5, 76, + 72, 11, 10, 66, 75, 82, 2, 77, 83, 96, 6, 70, + 77, 4, 77, 76, 88, 64, 75, 71, 83, 65, 2, 22, + 0, 0, 0, 71, 91, 97, 1, 5, 65, 31, 3, 82, 75, + 67, 73, 17, 11, 70, 66, 83, 67, 82, 78, 79, + 80, 82, 84, 86, 24, 65, 64, 81, 70, 88, 79, + 86, 5, 65, 64, 71, 14, 68, 64, 76, 85, 73, 77, + 73, 65, 71, 79, 73, 64, 85, 76, 39, 28, 2, 66, + 3, 71, 65, 2, 2, 10, 3, 67, 78, 74, 71, 1, 2, + 69, 9, 9, 5, 5, 6, 7, 12, 8, 5, 5, 4, 2, 6, 8, + 78, 65, 5, 70, 11, 15, 20, 14, 5, 19, 12, 15, + 1, 3, 6, 6, 18, 89, 13, 22, 19, 31, 38, 25, + 27, 40, 39, 19, 30, 33, 24, 62, 5, 24, 15, 20, + 26, 21, 29, 31, 39, 11, 14, 20, 20, 62, 71, + 45, 49, 44, 42, 41, 37, 34, 33, 29, 21, 18, + 14, 9, 4, 79, 75, 74, 87, 10, 9, 6, 6, 67, 68, + 68, 2, 75, 75, 77, 87, 80, 102, 81, 12, 12, + 10, 4, 1, 68, 72, 73, 82, 73, 28, 17, 12, 3, + 6, 68, 74, 78, 85, 2, 33, 25, 20, 17, 12, 4, + 0, 67, 74, 68, 41, 30, 18, 5, 11, 64, 73, 78, + 8, 50, 42, 33, 23, 23, 4, 66, 68, 72, 62, 98, + 93, 83, 91, 91, 82, 85, 81, 79, 82, 80, 78, + 81, 81, 84, 88, 77, 81, 77, 76, 73, 70, 70, + 70, 68, 67, 69, 70, 78, 4, 4, 7, 2, 0, 4, 3, + 4, 2, 1, 1, 1, 64, 68, 72, 68, 3, 78, 5, 16, + 5, 12, 7, 9, 13, 7, 5, 4, 10, 64, 69, 73, 45, + 46, 45, 41, 39, 45, 42, 43, 42, 38, 44, 40, + 33, 27, 12, 44, 44, 41, 36, 32, 30, 20, 16, + 12, 10, 1, 67, 70, 79, 48, 48, 48, 44, 36, 38, + 35, 28, 26, 21, 17, 8, 3, 68, 76, 0, 0, 79, + 13, 11, 7, 0, 1, 0, 69, 65, 71, 73, 85, 81, + 89, 97, 64, 76, 81, 78, 73, 0, 71, 6, 14, 68, + 69, 4, 8, 4, 8, 16, 4, 2, 51, 34, 17, 4, 69, + 87, 99, 113, 118, 64, 39, 29, 24, 14, 18, 8, + 4, 2, 72, 77, 72, 1, 70, 7, 14, 66, 68, 4, 8, + 6, 8, 15, 8, 3, 51, 34, 17, 4, 69, 87, 99, + 113, 118 }, + + { + + 51, + 7, 78, 51, 7, 78, 88, 72, 19, 11, 3, 8, 17, + 41, 57, 17, 78, 67, 1, 2, 5, 1, 65, 67, 69, + 15, 8, 80, 92, 104, 65, 80, 70, 1, 2, 5, 77, + 72, 11, 9, 66, 75, 82, 2, 77, 82, 95, 6, 69, + 76, 4, 76, 76, 88, 64, 75, 71, 83, 65, 2, 22, + 0, 0, 0, 70, 91, 97, 1, 5, 66, 31, 3, 82, 73, + 66, 72, 18, 14, 69, 65, 81, 66, 81, 77, 77, + 80, 82, 84, 86, 24, 65, 0, 78, 70, 87, 78, 84, + 4, 65, 64, 72, 15, 68, 64, 75, 85, 73, 76, 72, + 65, 70, 78, 72, 64, 84, 75, 44, 33, 5, 0, 3, + 71, 65, 2, 2, 10, 3, 66, 78, 72, 70, 64, 0, + 66, 9, 9, 5, 5, 7, 7, 14, 9, 5, 5, 4, 2, 7, 9, + 79, 65, 5, 70, 10, 14, 19, 14, 4, 19, 12, 15, + 64, 3, 5, 5, 17, 89, 13, 22, 19, 29, 35, 24, + 25, 37, 36, 17, 28, 31, 17, 62, 1, 22, 14, 18, + 22, 19, 26, 27, 34, 8, 12, 17, 18, 62, 73, 43, + 47, 42, 39, 38, 35, 31, 31, 27, 19, 15, 12, 7, + 3, 80, 75, 74, 88, 9, 8, 5, 5, 68, 69, 69, 0, + 76, 75, 78, 88, 80, 102, 80, 13, 12, 10, 4, 2, + 67, 71, 73, 80, 72, 29, 17, 12, 4, 7, 67, 72, + 76, 83, 3, 33, 25, 20, 18, 13, 4, 0, 67, 73, + 68, 42, 30, 17, 5, 11, 64, 73, 78, 7, 50, 42, + 32, 22, 22, 4, 66, 68, 72, 62, 97, 92, 82, 90, + 89, 81, 83, 79, 78, 81, 78, 76, 81, 80, 84, + 87, 75, 80, 77, 76, 72, 70, 70, 70, 68, 68, + 69, 70, 78, 4, 5, 8, 2, 0, 5, 4, 4, 3, 2, 1, + 0, 64, 68, 72, 68, 4, 79, 4, 16, 4, 11, 6, 9, + 13, 6, 5, 4, 9, 66, 70, 74, 43, 45, 44, 40, + 37, 43, 40, 41, 40, 36, 41, 38, 30, 24, 10, + 40, 40, 37, 32, 28, 27, 17, 14, 9, 8, 64, 68, + 71, 80, 46, 46, 45, 41, 33, 36, 32, 25, 23, + 18, 14, 5, 1, 69, 78, 64, 64, 80, 11, 10, 5, + 65, 0, 65, 71, 66, 73, 74, 86, 82, 90, 98, 66, + 77, 82, 78, 72, 0, 71, 7, 15, 68, 69, 5, 9, 4, + 8, 17, 4, 2, 50, 32, 15, 1, 72, 91, 103, 117, + 121, 64, 39, 29, 24, 14, 19, 9, 4, 2, 72, 76, + 71, 1, 69, 8, 15, 65, 67, 4, 8, 6, 8, 16, 8, + 3, 50, 32, 15, 1, 72, 91, 103, 117, 121 }, + + { + + 50, + 7, 78, 50, 7, 78, 86, 70, 19, 11, 2, 7, 15, + 40, 57, 17, 76, 67, 1, 3, 4, 1, 65, 68, 71, + 14, 6, 83, 94, 106, 1, 79, 70, 1, 3, 4, 77, + 71, 11, 9, 66, 74, 81, 2, 77, 82, 95, 6, 69, + 76, 4, 76, 75, 87, 64, 75, 71, 83, 64, 2, 22, + 0, 0, 0, 70, 91, 97, 2, 4, 66, 30, 2, 82, 70, + 65, 71, 20, 16, 67, 0, 80, 64, 79, 76, 75, 80, + 81, 83, 86, 24, 65, 1, 76, 70, 86, 78, 82, 4, + 66, 65, 72, 15, 68, 0, 75, 84, 72, 76, 71, 64, + 69, 76, 70, 0, 83, 74, 49, 37, 8, 3, 3, 70, + 65, 3, 2, 10, 4, 66, 77, 70, 70, 66, 65, 0, 9, + 10, 5, 5, 7, 8, 15, 10, 6, 4, 4, 2, 7, 9, 80, + 65, 5, 71, 10, 13, 19, 13, 4, 19, 12, 15, 65, + 3, 4, 4, 17, 89, 13, 21, 18, 27, 32, 23, 23, + 35, 34, 15, 25, 29, 11, 62, 65, 20, 12, 15, + 19, 16, 23, 22, 30, 5, 10, 13, 16, 62, 74, 41, + 45, 40, 37, 36, 33, 29, 29, 25, 16, 13, 10, 5, + 1, 82, 75, 74, 88, 8, 7, 4, 3, 69, 70, 70, 64, + 77, 76, 79, 88, 81, 101, 78, 14, 13, 10, 5, 2, + 67, 71, 72, 78, 70, 29, 18, 13, 4, 8, 66, 71, + 75, 81, 3, 34, 26, 21, 18, 14, 5, 0, 66, 73, + 68, 42, 30, 17, 4, 11, 64, 73, 77, 7, 49, 41, + 31, 21, 22, 4, 65, 68, 72, 62, 95, 90, 81, 88, + 88, 80, 82, 78, 77, 79, 77, 74, 80, 79, 83, + 86, 73, 80, 77, 76, 72, 69, 70, 71, 68, 68, + 69, 70, 79, 5, 5, 9, 2, 0, 5, 4, 4, 3, 2, 1, + 0, 64, 68, 72, 68, 4, 80, 3, 16, 3, 11, 5, 8, + 13, 5, 4, 4, 9, 67, 70, 75, 42, 44, 43, 38, + 35, 41, 38, 38, 37, 33, 38, 35, 27, 22, 8, 35, + 36, 33, 27, 25, 24, 15, 12, 7, 6, 66, 70, 72, + 80, 43, 43, 43, 39, 30, 33, 30, 22, 21, 16, + 12, 2, 64, 71, 79, 65, 64, 82, 10, 8, 4, 67, + 65, 67, 72, 67, 74, 75, 87, 84, 91, 98, 67, + 79, 84, 77, 72, 1, 70, 8, 16, 68, 68, 5, 9, 4, + 9, 18, 4, 2, 49, 30, 12, 65, 76, 95, 107, 121, + 124, 0, 39, 29, 24, 14, 19, 9, 4, 2, 71, 76, + 71, 2, 69, 9, 16, 65, 67, 5, 9, 6, 8, 16, 8, + 3, 49, 30, 12, 65, 76, 95, 107, 121, 124 }, + + { + + 48, + 6, 78, 48, 6, 78, 85, 69, 19, 11, 2, 5, 12, + 39, 57, 16, 74, 68, 1, 4, 4, 0, 65, 69, 73, + 13, 3, 87, 97, 108, 4, 78, 70, 1, 4, 4, 78, + 70, 11, 8, 66, 74, 81, 1, 78, 82, 95, 6, 69, + 76, 4, 76, 75, 87, 64, 75, 71, 83, 64, 2, 22, + 0, 0, 0, 69, 92, 97, 2, 3, 66, 29, 2, 83, 68, + 64, 70, 21, 18, 66, 1, 78, 0, 78, 75, 73, 80, + 81, 83, 86, 24, 65, 2, 74, 70, 86, 77, 80, 4, + 66, 65, 73, 15, 68, 0, 74, 83, 71, 75, 71, 64, + 69, 75, 69, 0, 82, 73, 53, 41, 11, 5, 3, 70, + 65, 3, 1, 10, 4, 66, 77, 68, 70, 68, 67, 2, 9, + 10, 5, 5, 7, 8, 17, 10, 6, 4, 3, 2, 8, 10, 82, + 65, 5, 71, 9, 11, 18, 13, 3, 19, 12, 14, 66, + 3, 3, 3, 16, 89, 12, 20, 17, 25, 28, 21, 21, + 33, 31, 12, 23, 27, 4, 62, 69, 18, 10, 13, 16, + 14, 19, 18, 26, 2, 8, 10, 14, 62, 76, 39, 42, + 37, 34, 33, 30, 27, 26, 23, 14, 11, 8, 3, 64, + 84, 75, 75, 89, 7, 5, 3, 2, 70, 72, 72, 66, + 78, 76, 80, 89, 82, 101, 77, 15, 13, 10, 5, 3, + 66, 70, 72, 76, 69, 29, 18, 13, 5, 9, 65, 69, + 73, 80, 4, 34, 26, 21, 19, 15, 5, 0, 66, 72, + 69, 43, 30, 16, 4, 11, 64, 73, 77, 7, 49, 40, + 30, 20, 22, 4, 65, 68, 72, 62, 94, 89, 80, 87, + 86, 79, 80, 76, 76, 78, 75, 72, 80, 79, 83, + 85, 72, 80, 77, 76, 71, 69, 70, 71, 68, 69, + 69, 70, 79, 5, 5, 10, 2, 64, 5, 4, 3, 3, 2, 1, + 64, 64, 68, 72, 68, 5, 81, 2, 16, 1, 10, 4, 7, + 12, 4, 3, 4, 9, 68, 70, 77, 41, 42, 42, 36, + 33, 39, 36, 36, 35, 30, 35, 32, 24, 19, 6, 31, + 32, 28, 22, 21, 20, 12, 9, 4, 4, 68, 71, 73, + 81, 41, 41, 40, 36, 27, 30, 27, 19, 18, 13, 9, + 64, 66, 73, 81, 66, 65, 84, 8, 7, 2, 69, 67, + 69, 74, 69, 76, 77, 88, 85, 92, 99, 69, 80, + 85, 77, 72, 1, 70, 9, 17, 68, 68, 6, 10, 4, 9, + 18, 4, 1, 48, 28, 9, 68, 79, 99, 112, 126, + 126, 0, 39, 29, 24, 14, 20, 9, 4, 2, 71, 76, + 71, 2, 69, 9, 16, 65, 67, 5, 9, 6, 8, 17, 8, + 2, 48, 28, 9, 68, 79, 99, 112, 126, 126 }, + + { + + 47, + 6, 78, 47, 6, 78, 83, 68, 20, 11, 2, 4, 10, + 38, 58, 16, 72, 68, 2, 5, 4, 64, 65, 69, 74, + 12, 1, 91, 100, 109, 7, 77, 69, 2, 5, 4, 78, + 69, 11, 8, 65, 74, 80, 1, 78, 82, 95, 6, 69, + 75, 4, 76, 75, 87, 64, 75, 71, 82, 64, 2, 22, + 0, 0, 0, 68, 92, 97, 2, 2, 66, 28, 2, 83, 66, + 1, 69, 23, 20, 64, 2, 76, 2, 77, 73, 70, 80, + 81, 83, 85, 24, 65, 3, 72, 69, 85, 76, 78, 4, + 66, 65, 73, 15, 68, 0, 73, 82, 70, 74, 70, 0, + 68, 74, 67, 1, 80, 72, 58, 46, 15, 8, 4, 70, + 64, 4, 1, 10, 4, 65, 76, 65, 70, 70, 68, 5, 9, + 10, 5, 5, 7, 9, 19, 11, 6, 4, 3, 2, 9, 11, 83, + 64, 5, 71, 8, 10, 17, 13, 2, 19, 12, 14, 67, + 4, 2, 3, 15, 89, 12, 20, 17, 23, 25, 20, 20, + 31, 29, 10, 21, 25, 65, 62, 72, 16, 8, 11, 13, + 12, 16, 14, 22, 0, 6, 7, 12, 62, 78, 37, 40, + 35, 32, 31, 28, 25, 24, 21, 12, 9, 7, 2, 65, + 86, 75, 75, 89, 7, 4, 2, 1, 71, 73, 73, 68, + 79, 76, 81, 90, 82, 100, 76, 16, 13, 10, 5, 4, + 65, 69, 71, 73, 68, 29, 18, 13, 6, 10, 64, 67, + 71, 78, 5, 35, 27, 22, 20, 16, 5, 1, 66, 71, + 69, 44, 30, 15, 4, 12, 0, 73, 76, 7, 49, 40, + 29, 19, 22, 4, 65, 68, 71, 62, 93, 88, 78, 85, + 84, 78, 78, 74, 75, 76, 73, 70, 79, 78, 82, + 84, 70, 79, 76, 75, 70, 68, 70, 71, 68, 70, + 69, 70, 79, 5, 6, 11, 3, 64, 6, 4, 3, 3, 2, 1, + 65, 64, 67, 71, 68, 6, 81, 1, 16, 0, 9, 4, 7, + 12, 4, 2, 4, 9, 69, 70, 78, 40, 41, 42, 35, + 31, 37, 34, 34, 33, 28, 32, 29, 21, 16, 4, 27, + 28, 24, 17, 18, 17, 9, 7, 2, 3, 69, 72, 73, + 82, 39, 39, 38, 34, 25, 28, 25, 16, 16, 11, 7, + 66, 68, 75, 83, 66, 66, 85, 7, 6, 0, 71, 68, + 70, 76, 70, 78, 78, 88, 86, 92, 100, 71, 81, + 86, 77, 71, 2, 70, 10, 19, 67, 67, 7, 11, 4, + 10, 19, 4, 1, 47, 26, 7, 71, 82, 103, 116, + 126, 126, 0, 39, 29, 25, 15, 21, 10, 5, 3, 71, + 76, 71, 2, 68, 10, 17, 65, 66, 5, 9, 6, 9, 18, + 8, 2, 47, 26, 7, 71, 82, 103, 116, 126, 126 }, + + { + + 46, + 6, 78, 46, 6, 78, 81, 66, 20, 11, 1, 3, 8, 37, + 58, 16, 70, 68, 2, 6, 3, 64, 65, 70, 76, 11, + 65, 94, 102, 111, 10, 76, 69, 2, 6, 3, 78, 68, + 11, 7, 65, 73, 79, 1, 78, 82, 95, 6, 69, 75, + 4, 76, 74, 86, 64, 75, 71, 82, 0, 2, 22, 0, 0, + 0, 68, 92, 97, 3, 1, 66, 27, 1, 83, 0, 2, 68, + 25, 22, 0, 4, 75, 3, 75, 72, 68, 80, 80, 82, + 85, 24, 65, 4, 70, 69, 84, 76, 76, 4, 67, 66, + 74, 15, 68, 1, 73, 81, 69, 74, 69, 0, 67, 72, + 66, 2, 79, 71, 62, 50, 18, 11, 4, 69, 64, 4, + 1, 10, 5, 65, 76, 0, 70, 72, 70, 8, 9, 11, 5, + 5, 7, 9, 20, 12, 7, 3, 3, 2, 9, 11, 84, 64, 5, + 72, 8, 9, 17, 12, 2, 19, 12, 14, 68, 4, 1, 2, + 15, 89, 12, 19, 16, 21, 22, 19, 18, 29, 27, 8, + 18, 23, 71, 62, 76, 14, 6, 8, 10, 9, 13, 9, + 18, 66, 4, 3, 10, 62, 79, 35, 38, 33, 30, 29, + 26, 23, 22, 19, 9, 7, 5, 0, 67, 88, 75, 75, + 90, 6, 3, 1, 64, 72, 74, 74, 69, 80, 77, 82, + 90, 83, 99, 74, 17, 14, 10, 6, 4, 65, 69, 70, + 71, 66, 29, 19, 14, 6, 11, 0, 66, 70, 76, 5, + 36, 28, 23, 20, 17, 6, 1, 65, 71, 69, 44, 30, + 15, 3, 12, 0, 73, 76, 7, 48, 39, 28, 18, 22, + 4, 64, 68, 71, 62, 91, 86, 77, 83, 83, 77, 77, + 73, 74, 74, 72, 68, 78, 77, 82, 83, 68, 79, + 76, 75, 70, 68, 70, 72, 68, 70, 69, 70, 80, 6, + 6, 12, 3, 64, 6, 4, 3, 3, 2, 1, 65, 64, 67, + 71, 68, 6, 82, 0, 16, 64, 9, 3, 6, 12, 3, 1, + 4, 9, 70, 70, 79, 39, 40, 41, 33, 29, 35, 32, + 31, 30, 25, 29, 26, 18, 14, 2, 22, 24, 20, 12, + 15, 14, 7, 5, 64, 1, 71, 74, 74, 82, 36, 36, + 35, 32, 22, 25, 22, 13, 14, 8, 5, 69, 70, 77, + 84, 67, 66, 87, 6, 4, 64, 73, 70, 72, 77, 71, + 79, 79, 89, 88, 93, 100, 72, 83, 88, 76, 71, + 3, 69, 11, 20, 67, 66, 7, 11, 4, 10, 20, 4, 1, + 46, 24, 4, 74, 86, 107, 120, 126, 126, 1, 39, + 29, 25, 15, 21, 10, 5, 3, 70, 76, 71, 3, 68, + 11, 18, 65, 66, 6, 10, 6, 9, 18, 8, 2, 46, 24, + 4, 74, 86, 107, 120, 126, 126 }, + + { + + 45, + 6, 79, 45, 6, 79, 79, 65, 21, 11, 1, 1, 6, 36, + 58, 16, 69, 68, 3, 6, 3, 65, 64, 70, 77, 9, + 67, 98, 105, 112, 12, 75, 69, 3, 6, 3, 79, 68, + 11, 7, 65, 73, 79, 1, 78, 81, 94, 6, 68, 74, + 4, 75, 74, 86, 64, 75, 71, 82, 0, 2, 22, 0, 0, + 0, 67, 92, 97, 3, 1, 67, 27, 1, 83, 2, 3, 67, + 26, 25, 2, 5, 73, 5, 74, 71, 66, 80, 80, 82, + 85, 24, 65, 5, 67, 69, 83, 75, 74, 3, 67, 66, + 74, 16, 68, 1, 72, 81, 69, 73, 68, 1, 66, 71, + 64, 2, 78, 70, 62, 55, 21, 14, 4, 69, 64, 5, + 1, 10, 5, 64, 75, 2, 69, 74, 72, 11, 9, 11, 5, + 5, 8, 10, 22, 13, 7, 3, 3, 2, 10, 12, 85, 64, + 5, 72, 7, 8, 16, 12, 1, 19, 12, 14, 70, 4, 0, + 1, 14, 89, 12, 19, 16, 19, 19, 18, 16, 26, 24, + 6, 16, 21, 78, 62, 79, 12, 5, 6, 6, 7, 10, 5, + 13, 69, 2, 0, 8, 62, 81, 33, 36, 31, 27, 26, + 24, 20, 20, 17, 7, 4, 3, 65, 68, 89, 75, 75, + 90, 5, 2, 0, 65, 73, 75, 75, 71, 81, 77, 83, + 91, 83, 99, 73, 18, 14, 10, 6, 5, 64, 68, 70, + 69, 65, 30, 19, 14, 7, 12, 1, 64, 68, 74, 6, + 36, 28, 23, 21, 18, 6, 1, 65, 70, 69, 45, 30, + 14, 3, 12, 0, 73, 75, 6, 48, 39, 27, 17, 21, + 4, 64, 68, 71, 62, 90, 85, 76, 82, 81, 76, 75, + 71, 73, 73, 70, 66, 78, 76, 81, 82, 66, 78, + 76, 75, 69, 67, 70, 72, 68, 71, 69, 70, 80, 6, + 7, 13, 3, 64, 7, 5, 3, 4, 3, 1, 66, 64, 67, + 71, 68, 7, 83, 64, 16, 65, 8, 2, 6, 12, 2, 1, + 4, 8, 72, 71, 80, 37, 39, 40, 32, 27, 33, 30, + 29, 28, 23, 26, 24, 15, 11, 0, 18, 20, 16, 8, + 11, 11, 4, 3, 66, 64, 73, 75, 75, 83, 34, 34, + 33, 29, 19, 23, 20, 10, 11, 6, 2, 72, 72, 78, + 86, 68, 67, 88, 4, 3, 66, 75, 71, 74, 79, 72, + 81, 80, 90, 89, 94, 101, 74, 84, 89, 76, 70, + 3, 69, 12, 21, 67, 66, 8, 12, 4, 11, 21, 4, 1, + 45, 22, 2, 77, 89, 111, 124, 126, 126, 1, 39, + 29, 25, 15, 22, 11, 5, 3, 70, 75, 70, 3, 67, + 12, 19, 64, 65, 6, 10, 6, 9, 19, 8, 2, 45, 22, + 2, 77, 89, 111, 124, 126, 126 }, + + { + + 43, + 6, 79, 43, 6, 79, 78, 0, 21, 11, 0, 0, 4, 35, + 58, 15, 67, 68, 3, 7, 3, 65, 64, 71, 79, 8, + 70, 101, 107, 114, 15, 74, 69, 3, 7, 3, 79, + 67, 11, 6, 65, 73, 78, 1, 78, 81, 94, 6, 68, + 74, 4, 75, 73, 86, 64, 75, 71, 82, 0, 2, 22, + 0, 0, 0, 67, 93, 97, 4, 0, 67, 26, 0, 83, 5, + 4, 66, 28, 27, 3, 7, 71, 6, 73, 70, 64, 80, + 80, 81, 85, 24, 65, 6, 65, 69, 83, 75, 72, 3, + 68, 67, 75, 16, 68, 2, 72, 80, 68, 73, 67, 1, + 66, 69, 0, 3, 77, 69, 62, 59, 24, 17, 4, 69, + 64, 5, 1, 10, 6, 64, 75, 4, 69, 76, 74, 13, 9, + 11, 5, 5, 8, 10, 23, 14, 7, 3, 3, 2, 10, 13, + 86, 64, 5, 73, 7, 7, 16, 12, 1, 19, 12, 14, + 71, 4, 64, 0, 14, 89, 11, 18, 15, 17, 15, 17, + 14, 24, 22, 4, 13, 19, 84, 62, 83, 10, 3, 4, + 3, 4, 6, 1, 9, 72, 0, 67, 6, 62, 83, 31, 34, + 28, 25, 24, 22, 18, 18, 15, 5, 2, 1, 67, 70, + 91, 75, 75, 91, 4, 1, 64, 66, 74, 77, 76, 73, + 82, 78, 84, 92, 84, 98, 71, 19, 15, 10, 6, 6, + 64, 68, 69, 67, 64, 30, 19, 15, 8, 13, 2, 0, + 67, 73, 7, 37, 29, 24, 21, 19, 7, 1, 65, 70, + 69, 45, 30, 13, 3, 12, 0, 73, 75, 6, 47, 38, + 26, 16, 21, 4, 64, 68, 71, 62, 89, 83, 75, 80, + 80, 75, 73, 69, 72, 71, 68, 64, 77, 75, 81, + 81, 65, 78, 76, 75, 69, 67, 70, 72, 68, 71, + 69, 70, 81, 7, 7, 14, 3, 64, 7, 5, 3, 4, 3, 1, + 66, 64, 67, 71, 68, 7, 84, 65, 16, 66, 7, 1, + 5, 12, 1, 0, 4, 8, 73, 71, 81, 36, 38, 39, 30, + 25, 31, 28, 27, 25, 20, 23, 21, 12, 9, 65, 14, + 16, 12, 3, 8, 7, 2, 0, 69, 66, 75, 77, 76, 84, + 32, 31, 30, 27, 16, 20, 17, 7, 9, 3, 0, 75, + 74, 80, 87, 69, 68, 90, 3, 1, 68, 77, 73, 76, + 81, 73, 82, 81, 91, 90, 95, 102, 75, 85, 90, + 76, 70, 4, 68, 13, 22, 67, 65, 8, 12, 4, 11, + 22, 4, 1, 44, 20, 64, 80, 92, 115, 126, 126, + 126, 1, 39, 29, 25, 15, 22, 11, 5, 3, 69, 75, + 70, 4, 67, 12, 20, 64, 65, 6, 10, 6, 9, 19, 8, + 2, 44, 20, 64, 80, 92, 115, 126, 126, 126 }, + + { + + 42, + 6, 79, 42, 6, 79, 76, 1, 21, 11, 0, 64, 2, 34, + 58, 15, 65, 68, 3, 8, 2, 66, 64, 72, 81, 7, + 72, 105, 110, 116, 18, 73, 68, 3, 8, 2, 79, + 66, 11, 6, 65, 72, 77, 1, 78, 81, 94, 6, 68, + 74, 4, 75, 73, 85, 64, 75, 71, 82, 1, 2, 22, + 0, 0, 0, 66, 93, 97, 4, 64, 67, 25, 0, 83, 7, + 5, 65, 30, 29, 5, 8, 70, 8, 71, 69, 2, 80, 79, + 81, 84, 24, 65, 7, 0, 69, 82, 74, 70, 3, 68, + 67, 75, 16, 68, 2, 71, 79, 67, 72, 66, 2, 65, + 68, 2, 4, 75, 68, 62, 62, 27, 20, 5, 68, 0, 6, + 1, 10, 6, 64, 74, 6, 69, 78, 76, 16, 9, 12, 5, + 5, 8, 11, 25, 15, 8, 2, 3, 2, 11, 13, 87, 0, + 5, 73, 6, 6, 15, 11, 0, 19, 12, 14, 72, 5, 65, + 0, 13, 89, 11, 17, 14, 15, 12, 16, 13, 22, 20, + 2, 11, 17, 90, 62, 86, 8, 1, 1, 0, 2, 3, 67, + 5, 74, 65, 70, 4, 62, 84, 29, 32, 26, 23, 22, + 20, 16, 16, 13, 2, 0, 64, 68, 72, 93, 75, 75, + 91, 3, 0, 65, 68, 75, 78, 77, 74, 83, 78, 85, + 92, 85, 97, 70, 20, 15, 10, 7, 6, 0, 67, 68, + 65, 1, 30, 20, 15, 8, 14, 3, 2, 65, 71, 7, 38, + 30, 25, 22, 20, 7, 2, 64, 69, 69, 46, 30, 13, + 2, 12, 0, 73, 74, 6, 47, 37, 25, 15, 21, 4, 0, + 68, 70, 62, 87, 82, 73, 78, 78, 74, 72, 68, + 71, 69, 67, 1, 76, 74, 80, 80, 0, 78, 76, 74, + 68, 66, 70, 73, 68, 72, 69, 70, 81, 7, 7, 15, + 3, 64, 7, 5, 3, 4, 3, 1, 67, 64, 67, 71, 68, + 8, 85, 66, 16, 67, 7, 1, 4, 12, 1, 64, 4, 8, + 74, 71, 82, 35, 37, 38, 28, 23, 29, 26, 24, + 23, 17, 20, 18, 9, 6, 67, 9, 12, 8, 65, 5, 4, + 64, 65, 71, 68, 76, 78, 76, 84, 29, 29, 28, + 25, 13, 17, 15, 4, 7, 1, 65, 78, 76, 82, 89, + 69, 68, 92, 2, 0, 69, 79, 75, 78, 82, 74, 84, + 82, 91, 92, 96, 102, 77, 87, 92, 75, 70, 5, + 68, 14, 23, 66, 64, 9, 13, 4, 12, 23, 4, 1, + 43, 18, 67, 83, 96, 119, 126, 126, 126, 2, 39, + 29, 25, 15, 23, 11, 6, 4, 69, 75, 70, 4, 67, + 13, 21, 64, 65, 7, 11, 6, 10, 20, 8, 2, 43, + 18, 67, 83, 96, 119, 126, 126, 126 }, + + { + + 41, + 6, 79, 41, 6, 79, 74, 3, 22, 11, 64, 66, 0, + 33, 58, 15, 0, 68, 4, 9, 2, 66, 64, 72, 82, 6, + 75, 108, 112, 117, 21, 72, 68, 4, 9, 2, 80, + 65, 11, 5, 65, 72, 77, 1, 78, 81, 94, 6, 68, + 73, 4, 75, 72, 85, 64, 75, 71, 82, 1, 2, 22, + 0, 0, 0, 66, 93, 97, 5, 65, 67, 24, 64, 83, + 10, 6, 64, 31, 31, 6, 10, 68, 9, 70, 68, 4, + 80, 79, 80, 84, 24, 65, 8, 2, 69, 81, 74, 68, + 3, 69, 68, 76, 16, 68, 3, 71, 78, 66, 72, 65, + 2, 64, 66, 3, 4, 74, 67, 62, 62, 30, 23, 5, + 68, 0, 6, 1, 10, 7, 0, 74, 8, 69, 80, 78, 19, + 9, 12, 5, 5, 8, 11, 26, 16, 8, 2, 3, 2, 11, + 14, 88, 0, 5, 74, 6, 5, 15, 11, 0, 19, 12, 14, + 73, 5, 66, 64, 13, 89, 11, 17, 14, 13, 9, 15, + 11, 20, 17, 0, 8, 15, 97, 62, 90, 6, 64, 64, + 66, 64, 0, 71, 1, 77, 67, 74, 2, 62, 86, 27, + 30, 24, 20, 19, 18, 14, 14, 11, 0, 65, 66, 70, + 73, 95, 75, 75, 92, 2, 64, 66, 69, 76, 79, 78, + 76, 84, 79, 86, 93, 85, 97, 68, 21, 16, 10, 7, + 7, 0, 67, 68, 0, 2, 30, 20, 16, 9, 15, 4, 3, + 64, 69, 8, 38, 30, 25, 22, 21, 8, 2, 64, 69, + 69, 46, 30, 12, 2, 12, 0, 73, 74, 6, 46, 37, + 24, 14, 21, 4, 0, 68, 70, 62, 86, 80, 72, 77, + 77, 73, 70, 66, 70, 68, 65, 3, 76, 73, 80, 79, + 2, 77, 76, 74, 68, 66, 70, 73, 68, 72, 69, 70, + 82, 8, 8, 16, 3, 64, 8, 5, 3, 4, 3, 1, 67, 64, + 67, 71, 68, 8, 86, 67, 16, 68, 6, 0, 4, 12, 0, + 65, 4, 8, 75, 71, 83, 34, 36, 37, 27, 21, 27, + 24, 22, 20, 15, 17, 15, 6, 4, 69, 5, 8, 4, 70, + 1, 1, 66, 67, 74, 70, 78, 80, 77, 85, 27, 26, + 25, 22, 10, 15, 12, 1, 4, 65, 68, 81, 78, 84, + 90, 70, 69, 93, 0, 65, 71, 81, 76, 80, 84, 75, + 85, 83, 92, 93, 97, 103, 78, 88, 93, 75, 69, + 5, 67, 15, 24, 66, 64, 9, 13, 4, 12, 24, 4, 1, + 42, 16, 69, 86, 99, 123, 126, 126, 126, 2, 39, + 29, 25, 15, 23, 12, 6, 4, 68, 75, 70, 5, 66, + 14, 22, 64, 64, 7, 11, 6, 10, 20, 8, 2, 42, + 16, 69, 86, 99, 123, 126, 126, 126 }, + + { + + 40, + 6, 79, 40, 6, 79, 72, 4, 22, 11, 64, 67, 65, + 32, 58, 15, 2, 68, 4, 10, 2, 67, 64, 73, 84, + 5, 77, 112, 115, 119, 24, 71, 68, 4, 10, 2, + 80, 64, 11, 5, 65, 72, 76, 1, 78, 81, 94, 6, + 68, 73, 4, 75, 72, 85, 64, 75, 71, 82, 1, 2, + 22, 0, 0, 0, 65, 93, 97, 5, 66, 67, 23, 64, + 83, 12, 7, 0, 33, 33, 8, 11, 66, 11, 69, 67, + 6, 80, 79, 80, 84, 24, 65, 9, 4, 69, 80, 73, + 66, 3, 69, 68, 76, 16, 68, 3, 70, 77, 65, 71, + 64, 3, 0, 65, 5, 5, 73, 66, 62, 62, 33, 26, 5, + 68, 0, 7, 1, 10, 7, 0, 73, 10, 69, 82, 80, 22, + 9, 12, 5, 5, 8, 12, 28, 17, 8, 2, 3, 2, 12, + 15, 89, 0, 5, 74, 5, 4, 14, 11, 64, 19, 12, + 14, 74, 5, 67, 65, 12, 89, 11, 16, 13, 11, 6, + 14, 9, 18, 15, 65, 6, 13, 103, 62, 93, 4, 66, + 66, 69, 66, 66, 75, 66, 80, 69, 77, 0, 62, 88, + 25, 28, 22, 18, 17, 16, 12, 12, 9, 65, 67, 68, + 72, 75, 97, 75, 75, 92, 1, 65, 67, 70, 77, 80, + 79, 78, 85, 79, 87, 94, 86, 96, 67, 22, 16, + 10, 7, 8, 1, 66, 67, 2, 3, 30, 20, 16, 10, 16, + 5, 5, 1, 67, 9, 39, 31, 26, 23, 22, 8, 2, 64, + 68, 69, 47, 30, 11, 2, 12, 0, 73, 73, 6, 46, + 36, 23, 13, 21, 4, 0, 68, 70, 62, 85, 79, 71, + 75, 75, 72, 68, 64, 69, 66, 0, 5, 75, 72, 79, + 78, 4, 77, 76, 74, 67, 65, 70, 73, 68, 73, 69, + 70, 82, 8, 8, 17, 3, 64, 8, 5, 3, 4, 3, 1, 68, + 64, 67, 71, 68, 9, 87, 68, 16, 69, 5, 64, 3, + 12, 64, 66, 4, 8, 76, 71, 84, 33, 35, 36, 25, + 19, 25, 22, 20, 18, 12, 14, 12, 3, 1, 71, 1, + 4, 0, 75, 65, 65, 69, 69, 76, 72, 80, 81, 78, + 86, 25, 24, 23, 20, 7, 12, 10, 65, 2, 67, 70, + 84, 80, 86, 92, 71, 70, 95, 64, 66, 73, 83, + 78, 82, 86, 76, 87, 84, 93, 94, 98, 104, 80, + 89, 94, 75, 69, 6, 67, 16, 25, 66, 0, 10, 14, + 4, 13, 25, 4, 1, 41, 14, 72, 89, 102, 126, + 126, 126, 126, 2, 39, 29, 25, 15, 24, 12, 6, + 4, 68, 75, 70, 5, 66, 15, 23, 64, 64, 7, 11, + 6, 10, 21, 8, 2, 41, 14, 72, 89, 102, 126, + 126, 126, 126 }, + + { + + 38, + 5, 80, 38, 5, 80, 71, 5, 22, 11, 65, 69, 68, + 31, 58, 14, 3, 69, 4, 10, 1, 68, 64, 74, 86, + 3, 80, 116, 118, 121, 26, 71, 68, 4, 10, 1, + 81, 64, 11, 4, 65, 72, 76, 0, 79, 81, 94, 5, + 68, 73, 4, 75, 72, 85, 64, 75, 72, 82, 1, 2, + 22, 0, 0, 0, 65, 94, 97, 5, 67, 68, 22, 65, + 84, 14, 8, 1, 34, 35, 9, 12, 65, 12, 68, 66, + 8, 80, 79, 80, 84, 24, 65, 9, 6, 69, 80, 73, + 65, 2, 70, 69, 77, 16, 68, 3, 70, 77, 65, 71, + 64, 3, 0, 64, 6, 5, 72, 65, 62, 62, 36, 28, 5, + 68, 0, 7, 0, 10, 7, 0, 73, 12, 69, 84, 82, 24, + 9, 12, 5, 5, 8, 12, 29, 17, 8, 1, 2, 2, 12, + 15, 91, 0, 5, 75, 4, 2, 13, 10, 65, 19, 12, + 13, 76, 5, 68, 66, 11, 89, 10, 15, 12, 8, 2, + 12, 7, 15, 12, 68, 3, 11, 110, 62, 97, 1, 68, + 69, 73, 69, 70, 80, 71, 83, 71, 81, 65, 62, + 90, 22, 25, 19, 15, 14, 13, 9, 9, 7, 68, 70, + 70, 74, 77, 99, 75, 76, 93, 0, 67, 69, 72, 79, + 82, 81, 80, 86, 80, 88, 95, 87, 96, 66, 22, + 16, 10, 7, 8, 1, 66, 67, 4, 4, 30, 20, 16, 10, + 16, 6, 6, 2, 66, 9, 39, 31, 26, 23, 23, 8, 2, + 64, 68, 70, 47, 29, 10, 1, 12, 0, 73, 73, 5, + 45, 35, 21, 12, 20, 4, 0, 68, 70, 62, 84, 78, + 70, 74, 74, 71, 67, 0, 68, 65, 1, 7, 75, 72, + 79, 77, 5, 77, 76, 74, 67, 65, 70, 74, 69, 74, + 69, 71, 83, 8, 8, 18, 3, 65, 8, 5, 2, 4, 3, 1, + 69, 64, 67, 71, 68, 9, 88, 69, 16, 71, 4, 65, + 2, 11, 65, 67, 4, 7, 78, 72, 86, 31, 33, 35, + 23, 17, 22, 19, 17, 15, 9, 11, 9, 64, 65, 73, + 67, 0, 68, 80, 69, 69, 72, 72, 79, 74, 82, 83, + 79, 87, 22, 21, 20, 17, 4, 9, 7, 69, 64, 70, + 73, 87, 82, 88, 94, 72, 71, 97, 66, 68, 75, + 86, 80, 84, 88, 78, 89, 86, 94, 96, 99, 105, + 82, 91, 96, 75, 69, 6, 67, 17, 26, 66, 0, 10, + 14, 4, 13, 25, 4, 0, 39, 12, 75, 93, 106, 126, + 126, 126, 126, 2, 39, 29, 25, 15, 24, 12, 6, + 4, 68, 75, 70, 5, 66, 15, 23, 64, 64, 7, 11, + 6, 10, 21, 7, 1, 39, 12, 75, 93, 106, 126, + 126, 126, 126 }, + + { + + 37, + 5, 80, 37, 5, 80, 69, 7, 23, 12, 65, 70, 70, + 30, 59, 14, 5, 69, 5, 11, 1, 68, 0, 74, 87, 2, + 82, 119, 120, 122, 29, 70, 67, 5, 11, 1, 81, + 0, 11, 4, 64, 71, 75, 0, 79, 80, 93, 5, 67, + 72, 4, 74, 71, 84, 0, 74, 72, 81, 2, 2, 22, 0, + 0, 0, 64, 94, 97, 6, 67, 68, 22, 65, 84, 17, + 10, 3, 36, 38, 11, 14, 0, 14, 66, 64, 11, 80, + 78, 79, 83, 24, 65, 10, 9, 68, 79, 72, 0, 2, + 70, 69, 77, 17, 68, 4, 69, 76, 64, 70, 0, 4, + 1, 1, 8, 6, 70, 0, 62, 62, 40, 31, 6, 67, 1, + 8, 0, 11, 8, 1, 72, 15, 68, 86, 83, 27, 9, 13, + 5, 6, 9, 13, 31, 18, 9, 1, 2, 2, 13, 16, 92, + 1, 5, 75, 4, 1, 13, 10, 65, 19, 12, 13, 77, 6, + 68, 66, 11, 89, 10, 15, 12, 6, 64, 11, 6, 13, + 10, 70, 1, 9, 116, 62, 100, 64, 69, 71, 76, + 71, 73, 84, 75, 85, 73, 84, 67, 62, 91, 20, + 23, 17, 13, 12, 11, 7, 7, 5, 70, 72, 71, 75, + 78, 100, 75, 76, 93, 0, 68, 70, 73, 80, 83, + 82, 81, 87, 80, 89, 95, 87, 95, 64, 23, 17, + 10, 8, 9, 2, 65, 66, 7, 6, 31, 21, 17, 11, 17, + 8, 8, 4, 64, 10, 40, 32, 27, 24, 24, 9, 3, 0, + 67, 70, 48, 29, 10, 1, 13, 1, 73, 72, 5, 45, + 35, 20, 11, 20, 5, 1, 67, 69, 62, 82, 76, 68, + 72, 72, 69, 65, 2, 66, 0, 3, 10, 74, 71, 78, + 75, 7, 76, 75, 73, 66, 64, 69, 74, 69, 74, 69, + 71, 83, 9, 9, 19, 4, 65, 9, 6, 2, 5, 4, 1, 69, + 0, 66, 70, 67, 10, 88, 70, 16, 72, 4, 65, 2, + 11, 65, 67, 4, 7, 79, 72, 87, 30, 32, 35, 22, + 16, 20, 17, 15, 13, 7, 9, 7, 67, 67, 75, 71, + 66, 72, 84, 72, 72, 74, 74, 81, 75, 83, 84, + 79, 87, 20, 19, 18, 15, 2, 7, 5, 72, 66, 72, + 75, 89, 83, 89, 95, 72, 71, 98, 67, 69, 76, + 88, 81, 85, 89, 79, 90, 87, 94, 97, 99, 105, + 83, 92, 97, 74, 68, 7, 66, 19, 28, 65, 1, 11, + 15, 5, 14, 26, 4, 0, 38, 10, 77, 96, 109, 126, + 126, 126, 126, 3, 39, 30, 26, 16, 25, 13, 7, + 5, 67, 74, 69, 6, 65, 16, 24, 0, 0, 8, 12, 6, + 11, 22, 7, 1, 38, 10, 77, 96, 109, 126, 126, + 126, 126 }, + + { + + 36, + 5, 80, 36, 5, 80, 67, 8, 23, 12, 65, 71, 72, + 29, 59, 14, 7, 69, 5, 12, 1, 69, 0, 75, 89, 1, + 85, 123, 123, 124, 32, 69, 67, 5, 12, 1, 81, + 1, 11, 3, 64, 71, 74, 0, 79, 80, 93, 5, 67, + 72, 4, 74, 71, 84, 0, 74, 72, 81, 2, 2, 22, 0, + 0, 0, 0, 94, 97, 6, 68, 68, 21, 65, 84, 19, + 11, 4, 38, 40, 12, 15, 2, 15, 65, 0, 13, 80, + 78, 79, 83, 24, 65, 11, 11, 68, 78, 71, 2, 2, + 70, 69, 78, 17, 68, 4, 68, 75, 0, 69, 1, 4, 2, + 2, 9, 7, 69, 1, 62, 62, 43, 34, 6, 67, 1, 8, + 0, 11, 8, 1, 72, 17, 68, 88, 85, 30, 9, 13, 5, + 6, 9, 13, 33, 19, 9, 1, 2, 2, 14, 17, 93, 1, + 5, 75, 3, 0, 12, 10, 66, 19, 12, 13, 78, 6, + 69, 67, 10, 89, 10, 14, 11, 4, 67, 10, 4, 11, + 8, 72, 64, 7, 122, 62, 104, 66, 71, 73, 79, + 73, 76, 88, 79, 88, 75, 87, 69, 62, 93, 18, + 21, 15, 11, 10, 9, 5, 5, 3, 72, 74, 73, 77, + 80, 102, 75, 76, 94, 64, 69, 71, 74, 81, 84, + 83, 83, 88, 80, 90, 96, 88, 94, 0, 24, 17, 10, + 8, 10, 3, 64, 65, 9, 7, 31, 21, 17, 12, 18, 9, + 10, 6, 1, 11, 41, 33, 28, 25, 25, 9, 3, 0, 66, + 70, 49, 29, 9, 1, 13, 1, 73, 72, 5, 45, 34, + 19, 10, 20, 5, 1, 67, 69, 62, 81, 75, 67, 70, + 70, 68, 0, 4, 65, 2, 5, 12, 73, 70, 78, 74, 9, + 76, 75, 73, 65, 64, 69, 74, 69, 75, 69, 71, + 83, 9, 9, 20, 4, 65, 9, 6, 2, 5, 4, 1, 70, 0, + 66, 70, 67, 11, 89, 71, 16, 73, 3, 66, 1, 11, + 66, 68, 4, 7, 80, 72, 88, 29, 31, 34, 20, 14, + 18, 15, 13, 11, 4, 6, 4, 70, 70, 77, 75, 70, + 76, 89, 75, 75, 77, 76, 84, 77, 85, 85, 80, + 88, 18, 17, 15, 13, 64, 4, 2, 75, 68, 75, 77, + 92, 85, 91, 97, 73, 72, 100, 68, 70, 78, 90, + 83, 87, 91, 80, 92, 88, 95, 98, 100, 106, 85, + 93, 98, 74, 68, 8, 66, 20, 29, 65, 2, 12, 16, + 5, 14, 27, 4, 0, 37, 8, 80, 99, 112, 126, 126, + 126, 126, 3, 39, 30, 26, 16, 26, 13, 7, 5, 67, + 74, 69, 6, 65, 17, 25, 0, 0, 8, 12, 6, 11, 23, + 7, 1, 37, 8, 80, 99, 112, 126, 126, 126, 126 }, + + { + + 35, + 5, 80, 35, 5, 80, 65, 10, 24, 12, 66, 73, 74, + 28, 59, 14, 9, 69, 6, 13, 1, 69, 0, 75, 90, 0, + 87, 126, 125, 125, 35, 68, 67, 6, 13, 1, 82, + 2, 11, 3, 64, 71, 74, 0, 79, 80, 93, 5, 67, + 71, 4, 74, 70, 84, 0, 74, 72, 81, 2, 2, 22, 0, + 0, 0, 0, 94, 97, 7, 69, 68, 20, 66, 84, 22, + 12, 5, 39, 42, 14, 17, 4, 17, 64, 1, 15, 80, + 78, 78, 83, 24, 65, 12, 13, 68, 77, 71, 4, 2, + 71, 70, 78, 17, 68, 5, 68, 74, 1, 69, 2, 5, 3, + 4, 11, 7, 68, 2, 62, 62, 46, 37, 6, 67, 1, 9, + 0, 11, 9, 2, 71, 19, 68, 90, 87, 33, 9, 13, 5, + 6, 9, 14, 34, 20, 9, 1, 2, 2, 14, 18, 94, 1, + 5, 76, 3, 64, 12, 10, 66, 19, 12, 13, 79, 6, + 70, 68, 10, 89, 10, 14, 11, 2, 70, 9, 2, 9, 5, + 74, 67, 5, 126, 62, 107, 68, 73, 75, 82, 76, + 79, 92, 83, 91, 77, 91, 71, 62, 95, 16, 19, + 13, 8, 7, 7, 3, 3, 1, 74, 76, 75, 79, 81, 104, + 75, 76, 94, 65, 70, 72, 75, 82, 85, 84, 85, + 89, 81, 91, 97, 88, 94, 2, 25, 18, 10, 8, 11, + 3, 64, 65, 11, 8, 31, 21, 18, 13, 19, 10, 11, + 7, 3, 12, 41, 33, 28, 25, 26, 10, 3, 0, 66, + 70, 49, 29, 8, 1, 13, 1, 73, 71, 5, 44, 34, + 18, 9, 20, 5, 1, 67, 69, 62, 80, 73, 66, 69, + 69, 67, 2, 6, 64, 3, 7, 14, 73, 69, 77, 73, + 11, 75, 75, 73, 65, 0, 69, 74, 69, 75, 69, 71, + 84, 10, 10, 21, 4, 65, 10, 6, 2, 5, 4, 1, 70, + 0, 66, 70, 67, 11, 90, 72, 16, 74, 2, 67, 1, + 11, 67, 69, 4, 7, 81, 72, 89, 28, 30, 33, 19, + 12, 16, 13, 11, 8, 2, 3, 1, 73, 72, 79, 79, + 74, 80, 94, 79, 78, 79, 78, 86, 79, 87, 87, + 81, 89, 16, 14, 13, 10, 67, 2, 0, 78, 71, 77, + 80, 95, 87, 93, 98, 74, 73, 101, 70, 72, 80, + 92, 84, 89, 93, 81, 93, 89, 96, 99, 101, 107, + 86, 94, 99, 74, 67, 8, 65, 21, 30, 65, 2, 12, + 16, 5, 15, 28, 4, 0, 36, 6, 82, 102, 115, 126, + 126, 126, 126, 3, 39, 30, 26, 16, 26, 14, 7, + 5, 66, 74, 69, 7, 64, 18, 26, 0, 1, 8, 12, 6, + 11, 23, 7, 1, 36, 6, 82, 102, 115, 126, 126, + 126, 126 }, + + { + + 33, + 5, 80, 33, 5, 80, 64, 11, 24, 12, 66, 74, 76, + 27, 59, 13, 11, 69, 6, 14, 0, 70, 0, 76, 92, + 64, 90, 126, 126, 126, 38, 67, 67, 6, 14, 0, + 82, 3, 11, 2, 64, 70, 73, 0, 79, 80, 93, 5, + 67, 71, 4, 74, 70, 83, 0, 74, 72, 81, 3, 2, + 22, 0, 0, 0, 1, 95, 97, 7, 70, 68, 19, 66, 84, + 24, 13, 6, 41, 44, 15, 18, 5, 18, 1, 2, 17, + 80, 77, 78, 83, 24, 65, 13, 15, 68, 77, 70, 6, + 2, 71, 70, 79, 17, 68, 5, 67, 73, 2, 68, 3, 5, + 3, 5, 12, 8, 67, 3, 62, 62, 49, 40, 6, 66, 1, + 9, 0, 11, 9, 2, 71, 21, 68, 92, 89, 35, 9, 14, + 5, 6, 9, 14, 36, 21, 10, 0, 2, 2, 15, 18, 95, + 1, 5, 76, 2, 65, 11, 9, 67, 19, 12, 13, 80, 6, + 71, 69, 9, 89, 9, 13, 10, 0, 74, 8, 0, 7, 3, + 76, 69, 3, 126, 62, 111, 70, 75, 78, 85, 78, + 83, 97, 87, 94, 79, 94, 73, 62, 96, 14, 17, + 10, 6, 5, 5, 1, 1, 64, 77, 78, 77, 81, 83, + 106, 75, 76, 95, 66, 71, 73, 77, 83, 87, 85, + 86, 90, 81, 92, 97, 89, 93, 3, 26, 18, 10, 9, + 11, 4, 0, 64, 13, 10, 31, 22, 18, 13, 20, 11, + 13, 9, 4, 12, 42, 34, 29, 26, 27, 10, 3, 1, + 65, 70, 50, 29, 8, 0, 13, 1, 73, 71, 5, 44, + 33, 17, 8, 20, 5, 2, 67, 69, 62, 78, 72, 65, + 67, 67, 66, 3, 7, 0, 5, 8, 16, 72, 68, 77, 72, + 12, 75, 75, 73, 64, 0, 69, 75, 69, 76, 69, 71, + 84, 10, 10, 22, 4, 65, 10, 6, 2, 5, 4, 1, 71, + 0, 66, 70, 67, 12, 91, 73, 16, 75, 2, 68, 0, + 11, 68, 70, 4, 7, 82, 72, 90, 27, 29, 32, 17, + 10, 14, 11, 8, 6, 64, 0, 65, 76, 75, 81, 84, + 78, 84, 99, 82, 82, 82, 81, 89, 81, 89, 88, + 82, 89, 13, 12, 10, 8, 70, 64, 66, 81, 73, 80, + 82, 98, 89, 95, 100, 75, 73, 103, 71, 73, 81, + 94, 86, 91, 94, 82, 95, 90, 97, 101, 102, 107, + 88, 96, 101, 73, 67, 9, 65, 22, 31, 65, 3, 13, + 17, 5, 15, 29, 4, 0, 35, 4, 85, 105, 119, 126, + 126, 126, 126, 4, 39, 30, 26, 16, 27, 14, 7, + 5, 66, 74, 69, 7, 64, 18, 27, 0, 1, 9, 13, 6, + 11, 24, 7, 1, 35, 4, 85, 105, 119, 126, 126, + 126, 126 }, + + { + + 32, + 5, 80, 32, 5, 80, 1, 13, 24, 12, 67, 75, 78, + 26, 59, 13, 13, 69, 6, 15, 0, 70, 0, 77, 94, + 65, 92, 126, 126, 126, 41, 66, 66, 6, 15, 0, + 82, 4, 11, 2, 64, 70, 72, 0, 79, 80, 93, 5, + 67, 71, 4, 74, 69, 83, 0, 74, 72, 81, 3, 2, + 22, 0, 0, 0, 1, 95, 97, 8, 71, 68, 18, 67, 84, + 27, 14, 7, 43, 46, 17, 20, 7, 20, 2, 3, 20, + 80, 77, 77, 82, 24, 65, 14, 17, 68, 76, 70, 8, + 2, 72, 71, 79, 17, 68, 6, 67, 72, 3, 68, 4, 6, + 4, 7, 14, 9, 65, 4, 62, 62, 52, 43, 7, 66, 2, + 10, 0, 11, 10, 2, 70, 23, 68, 94, 91, 38, 9, + 14, 5, 6, 9, 15, 37, 22, 10, 0, 2, 2, 15, 19, + 96, 2, 5, 77, 2, 66, 11, 9, 67, 19, 12, 13, + 81, 7, 72, 69, 9, 89, 9, 12, 9, 65, 77, 7, 64, + 5, 1, 78, 72, 1, 126, 62, 114, 72, 77, 80, 88, + 81, 86, 101, 91, 96, 81, 98, 75, 62, 98, 12, + 15, 8, 4, 3, 3, 64, 64, 66, 79, 80, 79, 82, + 85, 108, 75, 76, 95, 67, 72, 74, 78, 84, 88, + 86, 88, 91, 82, 93, 98, 90, 92, 5, 27, 19, 10, + 9, 12, 4, 0, 0, 15, 11, 31, 22, 19, 14, 21, + 12, 14, 10, 6, 13, 43, 35, 30, 26, 28, 11, 4, + 1, 65, 70, 50, 29, 7, 0, 13, 1, 73, 70, 5, 43, + 32, 16, 7, 20, 5, 2, 67, 68, 62, 77, 70, 0, + 65, 66, 65, 5, 9, 1, 7, 10, 18, 71, 67, 76, + 71, 14, 75, 75, 72, 64, 1, 69, 75, 69, 76, 69, + 71, 85, 11, 10, 23, 4, 65, 10, 6, 2, 5, 4, 1, + 71, 0, 66, 70, 67, 12, 92, 74, 16, 76, 1, 68, + 64, 11, 68, 71, 4, 7, 83, 72, 91, 26, 28, 31, + 15, 8, 12, 9, 6, 3, 67, 66, 68, 79, 77, 83, + 88, 82, 88, 104, 85, 85, 84, 83, 91, 83, 90, + 90, 82, 90, 11, 9, 8, 6, 73, 67, 68, 84, 75, + 82, 84, 101, 91, 97, 101, 75, 74, 105, 72, 75, + 83, 96, 88, 93, 96, 83, 96, 91, 97, 102, 103, + 108, 89, 97, 102, 73, 67, 10, 64, 23, 32, 64, + 4, 13, 17, 5, 16, 30, 4, 0, 34, 2, 88, 108, + 122, 126, 126, 126, 126, 4, 39, 30, 26, 16, + 27, 14, 8, 6, 65, 74, 69, 8, 64, 19, 28, 0, 1, + 9, 13, 6, 12, 24, 7, 1, 34, 2, 88, 108, 122, + 126, 126, 126, 126 }, + + { + + 31, + 5, 81, 31, 5, 81, 3, 14, 25, 12, 67, 77, 80, + 25, 59, 13, 14, 69, 7, 15, 0, 71, 1, 77, 95, + 67, 95, 126, 126, 126, 43, 65, 66, 7, 15, 0, + 83, 4, 11, 1, 64, 70, 72, 0, 79, 79, 92, 5, + 66, 70, 4, 73, 69, 83, 0, 74, 72, 81, 3, 2, + 22, 0, 0, 0, 2, 95, 97, 8, 71, 69, 18, 67, 84, + 29, 15, 8, 44, 49, 18, 21, 9, 21, 3, 4, 22, + 80, 77, 77, 82, 24, 65, 15, 20, 68, 75, 69, + 10, 1, 72, 71, 80, 18, 68, 6, 66, 72, 3, 67, + 5, 6, 5, 8, 15, 9, 64, 5, 62, 62, 55, 46, 7, + 66, 2, 10, 0, 11, 10, 3, 70, 25, 67, 96, 93, + 41, 9, 14, 5, 6, 10, 15, 39, 23, 10, 0, 2, 2, + 16, 20, 97, 2, 5, 77, 1, 67, 10, 9, 68, 19, + 12, 13, 83, 7, 73, 70, 8, 89, 9, 12, 9, 67, + 80, 6, 66, 2, 65, 80, 74, 64, 126, 62, 118, + 74, 78, 82, 92, 83, 89, 105, 96, 99, 83, 101, + 77, 62, 100, 10, 13, 6, 1, 0, 1, 67, 66, 68, + 81, 83, 81, 84, 86, 109, 75, 76, 96, 68, 73, + 75, 79, 85, 89, 87, 90, 92, 82, 94, 99, 90, + 92, 6, 28, 19, 10, 9, 13, 5, 1, 0, 17, 12, 32, + 22, 19, 15, 22, 13, 16, 12, 8, 14, 43, 35, 30, + 27, 29, 11, 4, 1, 64, 70, 51, 29, 6, 0, 13, 1, + 73, 70, 4, 43, 32, 15, 6, 19, 5, 2, 67, 68, + 62, 76, 69, 1, 64, 64, 64, 7, 11, 2, 8, 12, + 20, 71, 66, 76, 70, 16, 74, 75, 72, 0, 1, 69, + 75, 69, 77, 69, 71, 85, 11, 11, 24, 4, 65, 11, + 7, 2, 6, 5, 1, 72, 0, 66, 70, 67, 13, 93, 75, + 16, 77, 0, 69, 64, 11, 69, 71, 4, 6, 85, 73, + 92, 24, 27, 30, 14, 6, 10, 7, 4, 1, 69, 69, + 70, 82, 80, 85, 92, 86, 92, 108, 89, 88, 87, + 85, 94, 85, 92, 91, 83, 91, 9, 7, 5, 3, 76, + 69, 71, 87, 78, 85, 87, 104, 93, 98, 103, 76, + 75, 106, 74, 76, 85, 98, 89, 95, 98, 84, 98, + 92, 98, 103, 104, 109, 91, 98, 103, 73, 66, + 10, 64, 24, 33, 64, 4, 14, 18, 5, 16, 31, 4, + 0, 33, 0, 90, 111, 125, 126, 126, 126, 126, 4, + 39, 30, 26, 16, 28, 15, 8, 6, 65, 73, 68, 8, + 0, 20, 29, 1, 2, 9, 13, 6, 12, 25, 7, 1, 33, + 0, 90, 111, 125, 126, 126, 126, 126 }, + + { + + 30, + 5, 81, 30, 5, 81, 5, 16, 25, 12, 68, 78, 82, + 24, 59, 13, 16, 69, 7, 16, 64, 71, 1, 78, 97, + 68, 97, 126, 126, 126, 46, 64, 66, 7, 16, 64, + 83, 5, 11, 1, 64, 69, 71, 0, 79, 79, 92, 5, + 66, 70, 4, 73, 68, 82, 0, 74, 72, 81, 4, 2, + 22, 0, 0, 0, 2, 95, 97, 9, 72, 69, 17, 68, 84, + 32, 16, 9, 46, 51, 20, 23, 10, 23, 5, 5, 24, + 80, 76, 76, 82, 24, 65, 16, 22, 68, 74, 69, + 12, 1, 73, 72, 80, 18, 68, 7, 66, 71, 4, 67, + 6, 7, 6, 10, 17, 10, 0, 6, 62, 62, 58, 49, 7, + 65, 2, 11, 0, 11, 11, 3, 69, 27, 67, 98, 95, + 44, 9, 15, 5, 6, 10, 16, 40, 24, 11, 64, 2, 2, + 16, 20, 98, 2, 5, 78, 1, 68, 10, 8, 68, 19, + 12, 13, 84, 7, 74, 71, 8, 89, 9, 11, 8, 69, + 83, 5, 68, 0, 67, 82, 77, 66, 126, 62, 121, + 76, 80, 85, 95, 86, 92, 110, 100, 102, 85, + 105, 79, 62, 101, 8, 11, 4, 64, 65, 64, 69, + 68, 70, 84, 85, 83, 86, 88, 111, 75, 76, 96, + 69, 74, 76, 81, 86, 90, 88, 91, 93, 83, 95, + 99, 91, 91, 8, 29, 20, 10, 10, 13, 5, 1, 1, + 19, 14, 32, 23, 20, 15, 23, 14, 17, 13, 10, + 14, 44, 36, 31, 27, 30, 12, 4, 2, 64, 70, 51, + 29, 6, 64, 13, 1, 73, 69, 4, 42, 31, 14, 5, + 19, 5, 3, 67, 68, 62, 74, 67, 2, 1, 0, 0, 8, + 12, 3, 10, 13, 22, 70, 65, 75, 69, 18, 74, 75, + 72, 0, 2, 69, 76, 69, 77, 69, 71, 86, 12, 11, + 25, 4, 65, 11, 7, 2, 6, 5, 1, 72, 0, 66, 70, + 67, 13, 94, 76, 16, 78, 0, 70, 65, 11, 70, 72, + 4, 6, 86, 73, 93, 23, 26, 29, 12, 4, 8, 5, 1, + 65, 72, 72, 73, 85, 82, 87, 97, 90, 96, 113, + 92, 91, 89, 87, 96, 87, 94, 93, 84, 91, 6, 4, + 3, 1, 79, 72, 73, 90, 80, 87, 89, 107, 95, + 100, 104, 77, 75, 108, 75, 78, 86, 100, 91, + 97, 99, 85, 99, 93, 99, 105, 105, 109, 92, + 100, 105, 72, 66, 11, 0, 25, 34, 64, 5, 14, + 18, 5, 17, 32, 4, 0, 32, 65, 93, 114, 126, + 126, 126, 126, 126, 5, 39, 30, 26, 16, 28, 15, + 8, 6, 64, 73, 68, 9, 0, 21, 30, 1, 2, 10, 14, + 6, 12, 25, 7, 1, 32, 65, 93, 114, 126, 126, + 126, 126, 126 }, + + { + + 28, + 4, 81, 28, 4, 81, 6, 17, 25, 12, 68, 80, 85, + 23, 59, 12, 18, 70, 7, 17, 64, 72, 1, 79, 99, + 69, 100, 126, 126, 126, 49, 0, 66, 7, 17, 64, + 84, 6, 11, 0, 64, 69, 71, 64, 80, 79, 92, 5, + 66, 70, 4, 73, 68, 82, 0, 74, 72, 81, 4, 2, + 22, 0, 0, 0, 3, 96, 97, 9, 73, 69, 16, 68, 85, + 34, 17, 10, 47, 53, 21, 24, 12, 24, 6, 6, 26, + 80, 76, 76, 82, 24, 65, 17, 24, 68, 74, 68, + 14, 1, 73, 72, 81, 18, 68, 7, 65, 70, 5, 66, + 6, 7, 6, 11, 18, 10, 1, 7, 62, 62, 61, 51, 7, + 65, 2, 11, 64, 11, 11, 3, 69, 29, 67, 100, 97, + 46, 9, 15, 5, 6, 10, 16, 42, 24, 11, 64, 1, 2, + 17, 21, 100, 2, 5, 78, 0, 70, 9, 8, 69, 19, + 12, 12, 85, 7, 75, 72, 7, 89, 8, 10, 7, 71, + 87, 3, 70, 65, 70, 85, 79, 68, 126, 62, 125, + 78, 82, 87, 98, 88, 96, 114, 104, 105, 87, + 108, 81, 62, 103, 6, 8, 1, 67, 68, 67, 71, 71, + 72, 86, 87, 85, 88, 90, 113, 75, 77, 97, 70, + 76, 77, 82, 87, 92, 90, 93, 94, 83, 96, 100, + 92, 91, 9, 30, 20, 10, 10, 14, 6, 2, 1, 21, + 15, 32, 23, 20, 16, 24, 15, 19, 15, 11, 15, + 44, 36, 31, 28, 31, 12, 4, 2, 0, 71, 52, 29, + 5, 64, 13, 1, 73, 69, 4, 42, 30, 13, 4, 19, 5, + 3, 67, 68, 62, 73, 66, 3, 2, 2, 1, 10, 14, 4, + 11, 15, 24, 70, 65, 75, 68, 19, 74, 75, 72, 1, + 2, 69, 76, 69, 78, 69, 71, 86, 12, 11, 26, 4, + 66, 11, 7, 1, 6, 5, 1, 73, 0, 66, 70, 67, 14, + 95, 77, 16, 80, 64, 71, 66, 10, 71, 73, 4, 6, + 87, 73, 95, 22, 24, 28, 10, 2, 6, 3, 64, 67, + 75, 75, 76, 88, 85, 89, 101, 94, 101, 118, 96, + 95, 92, 90, 99, 89, 96, 94, 85, 92, 4, 2, 0, + 65, 82, 75, 76, 93, 83, 90, 92, 110, 97, 102, + 106, 78, 76, 110, 77, 79, 88, 102, 93, 99, + 101, 87, 101, 95, 100, 106, 106, 110, 94, 101, + 106, 72, 66, 11, 0, 26, 35, 64, 5, 15, 19, 5, + 17, 32, 4, 64, 31, 67, 96, 117, 126, 126, 126, + 126, 126, 5, 39, 30, 26, 16, 29, 15, 8, 6, 64, + 73, 68, 9, 0, 21, 30, 1, 2, 10, 14, 6, 12, 26, + 7, 0, 31, 67, 96, 117, 126, 126, 126, 126, 126 }, + + { + + 27, + 4, 81, 27, 4, 81, 8, 18, 26, 12, 68, 81, 87, + 22, 60, 12, 20, 70, 8, 18, 64, 73, 1, 79, 100, + 70, 102, 126, 126, 126, 52, 1, 65, 8, 18, 64, + 84, 7, 11, 0, 0, 69, 70, 64, 80, 79, 92, 5, + 66, 69, 4, 73, 68, 82, 0, 74, 72, 80, 4, 2, + 22, 0, 0, 0, 4, 96, 97, 9, 74, 69, 15, 68, 85, + 36, 19, 11, 49, 55, 23, 25, 14, 26, 7, 8, 29, + 80, 76, 76, 81, 24, 65, 18, 26, 67, 73, 67, + 16, 1, 73, 72, 81, 18, 68, 7, 64, 69, 6, 65, + 7, 8, 7, 12, 20, 11, 3, 8, 62, 62, 62, 54, 8, + 65, 3, 12, 64, 11, 11, 4, 68, 32, 67, 102, 98, + 49, 9, 15, 5, 6, 10, 17, 44, 25, 11, 64, 1, 2, + 18, 22, 101, 3, 5, 78, 64, 71, 8, 8, 70, 19, + 12, 12, 86, 8, 76, 72, 6, 89, 8, 10, 7, 73, + 90, 2, 71, 67, 72, 87, 81, 70, 126, 62, 126, + 80, 84, 89, 101, 90, 99, 118, 108, 107, 89, + 111, 83, 62, 105, 4, 6, 64, 69, 70, 69, 73, + 73, 74, 88, 89, 86, 89, 91, 115, 75, 77, 97, + 70, 77, 78, 83, 88, 93, 91, 95, 95, 83, 97, + 101, 92, 90, 10, 31, 20, 10, 10, 15, 7, 3, 2, + 24, 16, 32, 23, 20, 17, 25, 16, 21, 17, 13, + 16, 45, 37, 32, 29, 32, 12, 5, 2, 1, 71, 53, + 29, 4, 64, 14, 2, 73, 68, 4, 42, 30, 12, 3, + 19, 5, 3, 67, 67, 62, 72, 65, 5, 4, 4, 2, 12, + 16, 5, 13, 17, 26, 69, 64, 74, 67, 21, 73, 74, + 71, 2, 3, 69, 76, 69, 79, 69, 71, 86, 12, 12, + 27, 5, 66, 12, 7, 1, 6, 5, 1, 74, 0, 65, 69, + 67, 15, 95, 78, 16, 81, 65, 71, 66, 10, 71, + 74, 4, 6, 88, 73, 96, 21, 23, 28, 9, 0, 4, 1, + 66, 69, 77, 78, 79, 91, 88, 91, 105, 98, 105, + 123, 99, 98, 95, 92, 101, 90, 97, 95, 85, 93, + 2, 0, 65, 67, 84, 77, 78, 96, 85, 92, 94, 112, + 99, 104, 108, 78, 77, 111, 78, 80, 90, 104, + 94, 100, 103, 88, 103, 96, 100, 107, 106, 111, + 96, 102, 107, 72, 65, 12, 0, 27, 37, 0, 6, 16, + 20, 5, 18, 33, 4, 64, 30, 69, 98, 120, 126, + 126, 126, 126, 126, 5, 39, 30, 27, 17, 30, 16, + 9, 7, 64, 73, 68, 9, 1, 22, 31, 1, 3, 10, 14, + 6, 13, 27, 7, 0, 30, 69, 98, 120, 126, 126, + 126, 126, 126 }, + + { + + 26, + 4, 81, 26, 4, 81, 10, 20, 26, 12, 69, 82, 89, + 21, 60, 12, 22, 70, 8, 19, 65, 73, 1, 80, 102, + 71, 105, 126, 126, 126, 55, 2, 65, 8, 19, 65, + 84, 8, 11, 64, 0, 68, 69, 64, 80, 79, 92, 5, + 66, 69, 4, 73, 67, 81, 0, 74, 72, 80, 5, 2, + 22, 0, 0, 0, 4, 96, 97, 10, 75, 69, 14, 69, + 85, 39, 20, 12, 51, 57, 24, 27, 15, 27, 9, 9, + 31, 80, 75, 75, 81, 24, 65, 19, 28, 67, 72, + 67, 18, 1, 74, 73, 82, 18, 68, 8, 64, 68, 7, + 65, 8, 8, 8, 14, 21, 12, 4, 9, 62, 62, 62, 57, + 8, 64, 3, 12, 64, 11, 12, 4, 68, 34, 67, 104, + 100, 52, 9, 16, 5, 6, 10, 17, 45, 26, 12, 65, + 1, 2, 18, 22, 102, 3, 5, 79, 64, 72, 8, 7, 70, + 19, 12, 12, 87, 8, 77, 73, 6, 89, 8, 9, 6, 75, + 93, 1, 73, 69, 74, 89, 84, 72, 126, 62, 126, + 82, 86, 92, 104, 93, 102, 123, 112, 110, 91, + 115, 85, 62, 106, 2, 4, 66, 71, 72, 71, 75, + 75, 76, 91, 91, 88, 91, 93, 117, 75, 77, 98, + 71, 78, 79, 85, 89, 94, 92, 96, 96, 84, 98, + 101, 93, 89, 12, 32, 21, 10, 11, 15, 7, 3, 3, + 26, 18, 32, 24, 21, 17, 26, 17, 22, 18, 15, + 16, 46, 38, 33, 29, 33, 13, 5, 3, 1, 71, 53, + 29, 4, 65, 14, 2, 73, 68, 4, 41, 29, 11, 2, + 19, 5, 4, 67, 67, 62, 70, 0, 6, 6, 5, 3, 13, + 17, 6, 15, 18, 28, 68, 0, 74, 66, 23, 73, 74, + 71, 2, 3, 69, 77, 69, 79, 69, 71, 87, 13, 12, + 28, 5, 66, 12, 7, 1, 6, 5, 1, 74, 0, 65, 69, + 67, 15, 96, 79, 16, 82, 65, 72, 67, 10, 72, + 75, 4, 6, 89, 73, 97, 20, 22, 27, 7, 65, 2, + 64, 69, 72, 80, 81, 82, 94, 90, 93, 110, 102, + 109, 126, 102, 101, 97, 94, 104, 92, 99, 97, + 86, 93, 64, 66, 68, 69, 87, 80, 81, 99, 87, + 95, 96, 115, 101, 106, 109, 79, 77, 113, 79, + 82, 91, 106, 96, 102, 104, 89, 104, 97, 101, + 109, 107, 111, 97, 104, 109, 71, 65, 13, 1, + 28, 38, 0, 7, 16, 20, 5, 18, 34, 4, 64, 29, + 71, 101, 123, 126, 126, 126, 126, 126, 6, 39, + 30, 27, 17, 30, 16, 9, 7, 0, 73, 68, 10, 1, + 23, 32, 1, 3, 11, 15, 6, 13, 27, 7, 0, 29, 71, + 101, 123, 126, 126, 126, 126, 126 }, + + { + + 25, + 4, 82, 25, 4, 82, 12, 21, 27, 12, 69, 84, 91, + 20, 60, 12, 23, 70, 9, 19, 65, 74, 2, 80, 103, + 73, 107, 126, 126, 126, 57, 3, 65, 9, 19, 65, + 85, 8, 11, 64, 0, 68, 69, 64, 80, 78, 91, 5, + 65, 68, 4, 72, 67, 81, 0, 74, 72, 80, 5, 2, + 22, 0, 0, 0, 5, 96, 97, 10, 75, 70, 14, 69, + 85, 41, 21, 13, 52, 60, 26, 28, 17, 29, 10, + 10, 33, 80, 75, 75, 81, 24, 65, 20, 31, 67, + 71, 66, 20, 0, 74, 73, 82, 19, 68, 8, 0, 68, + 7, 64, 9, 9, 9, 15, 23, 12, 5, 10, 62, 62, 62, + 60, 8, 64, 3, 13, 64, 11, 12, 5, 67, 36, 66, + 106, 102, 55, 9, 16, 5, 6, 11, 18, 47, 27, 12, + 65, 1, 2, 19, 23, 103, 3, 5, 79, 65, 73, 7, 7, + 71, 19, 12, 12, 89, 8, 78, 74, 5, 89, 8, 9, 6, + 77, 96, 0, 75, 72, 77, 91, 86, 74, 126, 62, + 126, 84, 87, 94, 108, 95, 105, 126, 117, 113, + 93, 118, 87, 62, 108, 0, 2, 68, 74, 75, 73, + 78, 77, 78, 93, 94, 90, 93, 94, 118, 75, 77, + 98, 72, 79, 80, 86, 90, 95, 93, 98, 97, 84, + 99, 102, 93, 89, 13, 33, 21, 10, 11, 16, 8, 4, + 3, 28, 19, 33, 24, 21, 18, 27, 18, 24, 20, 17, + 17, 46, 38, 33, 30, 34, 13, 5, 3, 2, 71, 54, + 29, 3, 65, 14, 2, 73, 67, 3, 41, 29, 10, 1, + 18, 5, 4, 67, 67, 62, 69, 1, 7, 7, 7, 4, 15, + 19, 7, 16, 20, 30, 68, 1, 73, 65, 25, 72, 74, + 71, 3, 4, 69, 77, 69, 80, 69, 71, 87, 13, 13, + 29, 5, 66, 13, 8, 1, 7, 6, 1, 75, 0, 65, 69, + 67, 16, 97, 80, 16, 83, 66, 73, 67, 10, 73, + 75, 4, 5, 91, 74, 98, 18, 21, 26, 6, 67, 0, + 66, 71, 74, 82, 84, 84, 97, 93, 95, 114, 106, + 113, 126, 106, 104, 100, 96, 106, 94, 101, 98, + 87, 94, 66, 68, 70, 72, 90, 82, 83, 102, 90, + 97, 99, 118, 103, 107, 111, 80, 78, 114, 81, + 83, 93, 108, 97, 104, 106, 90, 106, 98, 102, + 110, 108, 112, 99, 105, 110, 71, 64, 13, 1, + 29, 39, 0, 7, 17, 21, 5, 19, 35, 4, 64, 28, + 73, 103, 126, 126, 126, 126, 126, 126, 6, 39, + 30, 27, 17, 31, 17, 9, 7, 0, 72, 67, 10, 2, + 24, 33, 2, 4, 11, 15, 6, 13, 28, 7, 0, 28, 73, + 103, 126, 126, 126, 126, 126, 126 }, + + { + + 23, + 4, 82, 23, 4, 82, 13, 23, 27, 12, 70, 85, 93, + 19, 60, 11, 25, 70, 9, 20, 65, 74, 2, 81, 105, + 74, 110, 126, 126, 126, 60, 4, 65, 9, 20, 65, + 85, 9, 11, 65, 0, 68, 68, 64, 80, 78, 91, 5, + 65, 68, 4, 72, 66, 81, 0, 74, 72, 80, 5, 2, + 22, 0, 0, 0, 5, 97, 97, 11, 76, 70, 13, 70, + 85, 44, 22, 14, 54, 62, 27, 30, 19, 30, 11, + 11, 35, 80, 75, 74, 81, 24, 65, 21, 33, 67, + 71, 66, 22, 0, 75, 74, 83, 19, 68, 9, 0, 67, + 8, 64, 10, 9, 9, 17, 24, 13, 6, 11, 62, 62, + 62, 62, 8, 64, 3, 13, 64, 11, 13, 5, 67, 38, + 66, 108, 104, 57, 9, 16, 5, 6, 11, 18, 48, 28, + 12, 65, 1, 2, 19, 24, 104, 3, 5, 80, 65, 74, + 7, 7, 71, 19, 12, 12, 90, 8, 79, 75, 5, 89, 7, + 8, 5, 79, 100, 64, 77, 74, 79, 93, 89, 76, + 126, 62, 126, 86, 89, 96, 111, 98, 109, 126, + 121, 116, 95, 122, 89, 62, 110, 65, 0, 71, 76, + 77, 75, 80, 79, 80, 95, 96, 92, 95, 96, 120, + 75, 77, 99, 73, 80, 81, 87, 91, 97, 94, 100, + 98, 85, 100, 103, 94, 88, 15, 34, 22, 10, 11, + 17, 8, 4, 4, 30, 20, 33, 24, 22, 19, 28, 19, + 25, 21, 18, 18, 47, 39, 34, 30, 35, 14, 5, 3, + 2, 71, 54, 29, 2, 65, 14, 2, 73, 67, 3, 40, + 28, 9, 0, 18, 5, 4, 67, 67, 62, 68, 3, 8, 9, + 8, 5, 17, 21, 8, 18, 22, 32, 67, 2, 73, 64, + 26, 72, 74, 71, 3, 4, 69, 77, 69, 80, 69, 71, + 88, 14, 13, 30, 5, 66, 13, 8, 1, 7, 6, 1, 75, + 0, 65, 69, 67, 16, 98, 81, 16, 84, 67, 74, 68, + 10, 74, 76, 4, 5, 92, 74, 99, 17, 20, 25, 4, + 69, 65, 68, 73, 77, 85, 87, 87, 100, 95, 97, + 118, 110, 117, 126, 109, 108, 102, 99, 109, + 96, 103, 100, 88, 95, 68, 71, 73, 74, 93, 85, + 86, 105, 92, 100, 101, 121, 105, 109, 112, 81, + 79, 116, 82, 85, 95, 110, 99, 106, 108, 91, + 107, 99, 103, 111, 109, 113, 100, 106, 111, + 71, 64, 14, 2, 30, 40, 0, 8, 17, 21, 5, 19, + 36, 4, 64, 27, 75, 106, 126, 126, 126, 126, + 126, 126, 6, 39, 30, 27, 17, 31, 17, 9, 7, 1, + 72, 67, 11, 2, 24, 34, 2, 4, 11, 15, 6, 13, + 28, 7, 0, 27, 75, 106, 126, 126, 126, 126, + 126, 126 }, + + { + + 22, + 4, 82, 22, 4, 82, 15, 24, 27, 12, 70, 86, 95, + 18, 60, 11, 27, 70, 9, 21, 66, 75, 2, 82, 107, + 75, 112, 126, 126, 126, 62, 5, 64, 9, 21, 66, + 85, 10, 11, 65, 0, 67, 67, 64, 80, 78, 91, 5, + 65, 68, 4, 72, 66, 80, 0, 74, 72, 80, 6, 2, + 22, 0, 0, 0, 6, 97, 97, 11, 77, 70, 12, 70, + 85, 46, 23, 15, 56, 62, 29, 31, 20, 32, 13, + 12, 38, 80, 74, 74, 80, 24, 65, 22, 35, 67, + 70, 65, 24, 0, 75, 74, 83, 19, 68, 9, 1, 66, + 9, 0, 11, 10, 10, 18, 26, 14, 8, 12, 62, 62, + 62, 62, 9, 0, 4, 14, 64, 11, 13, 5, 66, 40, + 66, 110, 106, 60, 9, 17, 5, 6, 11, 19, 50, 29, + 13, 66, 1, 2, 20, 24, 105, 4, 5, 80, 66, 75, + 6, 6, 72, 19, 12, 12, 91, 9, 80, 75, 4, 89, 7, + 7, 4, 81, 103, 65, 78, 76, 81, 95, 91, 78, + 126, 62, 126, 88, 91, 99, 114, 100, 112, 126, + 125, 118, 97, 125, 91, 62, 111, 67, 65, 73, + 78, 79, 77, 82, 81, 82, 98, 98, 94, 96, 98, + 122, 75, 77, 99, 74, 81, 82, 89, 92, 98, 95, + 101, 99, 85, 101, 103, 95, 87, 16, 35, 22, 10, + 12, 17, 9, 5, 5, 32, 22, 33, 25, 22, 19, 29, + 20, 27, 23, 20, 18, 48, 40, 35, 31, 36, 14, 6, + 4, 3, 71, 55, 29, 2, 66, 14, 2, 73, 66, 3, 40, + 27, 8, 64, 18, 5, 5, 67, 66, 62, 66, 4, 10, + 11, 10, 6, 18, 22, 9, 20, 23, 34, 66, 3, 72, + 0, 28, 72, 74, 70, 4, 5, 69, 78, 69, 81, 69, + 71, 88, 14, 13, 31, 5, 66, 13, 8, 1, 7, 6, 1, + 76, 0, 65, 69, 67, 17, 99, 82, 16, 85, 67, 74, + 69, 10, 74, 77, 4, 5, 93, 74, 100, 16, 19, 24, + 2, 71, 67, 70, 76, 79, 88, 90, 90, 103, 98, + 99, 123, 114, 121, 126, 112, 111, 105, 101, + 111, 98, 104, 101, 88, 95, 71, 73, 75, 76, 96, + 88, 88, 108, 94, 102, 103, 124, 107, 111, 114, + 81, 79, 118, 83, 86, 96, 112, 101, 108, 109, + 92, 109, 100, 103, 113, 110, 113, 102, 108, + 113, 70, 64, 15, 2, 31, 41, 1, 9, 18, 22, 5, + 20, 37, 4, 64, 26, 77, 109, 126, 126, 126, + 126, 126, 126, 7, 39, 30, 27, 17, 32, 17, 10, + 8, 1, 72, 67, 11, 2, 25, 35, 2, 4, 12, 16, 6, + 14, 29, 7, 0, 26, 77, 109, 126, 126, 126, 126, + 126, 126 }, + + { + + 21, + 4, 82, 21, 4, 82, 17, 26, 28, 12, 71, 88, 97, + 17, 60, 11, 29, 70, 10, 22, 66, 75, 2, 82, + 108, 76, 115, 126, 126, 126, 62, 6, 64, 10, + 22, 66, 86, 11, 11, 66, 0, 67, 67, 64, 80, 78, + 91, 5, 65, 67, 4, 72, 65, 80, 0, 74, 72, 80, + 6, 2, 22, 0, 0, 0, 6, 97, 97, 12, 78, 70, 11, + 71, 85, 49, 24, 16, 57, 62, 30, 33, 22, 33, + 14, 13, 40, 80, 74, 73, 80, 24, 65, 23, 37, + 67, 69, 65, 26, 0, 76, 75, 84, 19, 68, 10, 1, + 65, 10, 0, 12, 10, 11, 20, 27, 14, 9, 13, 62, + 62, 62, 62, 9, 0, 4, 14, 64, 11, 14, 6, 66, + 42, 66, 112, 108, 62, 9, 17, 5, 6, 11, 19, 51, + 30, 13, 66, 1, 2, 20, 25, 106, 4, 5, 81, 66, + 76, 6, 6, 72, 19, 12, 12, 92, 9, 81, 76, 4, + 89, 7, 7, 4, 83, 106, 66, 80, 78, 84, 97, 94, + 80, 126, 62, 126, 90, 93, 101, 117, 103, 115, + 126, 126, 121, 99, 126, 93, 62, 113, 69, 67, + 75, 81, 82, 79, 84, 83, 84, 100, 100, 96, 98, + 99, 124, 75, 77, 100, 75, 82, 83, 90, 93, 99, + 96, 103, 100, 86, 102, 104, 95, 87, 18, 36, + 23, 10, 12, 18, 9, 5, 5, 34, 23, 33, 25, 23, + 20, 30, 21, 28, 24, 22, 19, 48, 40, 35, 31, + 37, 15, 6, 4, 3, 71, 55, 29, 1, 66, 14, 2, 73, + 66, 3, 39, 27, 7, 65, 18, 5, 5, 67, 66, 62, + 65, 6, 11, 12, 11, 7, 20, 24, 10, 21, 25, 36, + 66, 4, 72, 1, 30, 71, 74, 70, 4, 5, 69, 78, + 69, 81, 69, 71, 89, 15, 14, 32, 5, 66, 14, 8, + 1, 7, 6, 1, 76, 0, 65, 69, 67, 17, 100, 83, + 16, 86, 68, 75, 69, 10, 75, 78, 4, 5, 94, 74, + 101, 15, 18, 23, 1, 73, 69, 72, 78, 82, 90, + 93, 93, 106, 100, 101, 126, 118, 125, 126, + 116, 114, 107, 103, 114, 100, 106, 103, 89, + 96, 73, 76, 78, 79, 99, 90, 91, 111, 97, 105, + 106, 126, 109, 113, 115, 82, 80, 119, 85, 88, + 98, 114, 102, 110, 111, 93, 110, 101, 104, + 114, 111, 114, 103, 109, 114, 70, 0, 15, 3, + 32, 42, 1, 9, 18, 22, 5, 20, 38, 4, 64, 25, + 79, 111, 126, 126, 126, 126, 126, 126, 7, 39, + 30, 27, 17, 32, 18, 10, 8, 2, 72, 67, 12, 3, + 26, 36, 2, 5, 12, 16, 6, 14, 29, 7, 0, 25, 79, + 111, 126, 126, 126, 126, 126, 126 }, + + { + + 20, + 4, 82, 20, 4, 82, 19, 27, 28, 12, 71, 89, 99, + 16, 60, 11, 31, 70, 10, 23, 66, 76, 2, 83, + 110, 77, 117, 126, 126, 126, 62, 7, 64, 10, + 23, 66, 86, 12, 11, 66, 0, 67, 66, 64, 80, 78, + 91, 5, 65, 67, 4, 72, 65, 80, 0, 74, 72, 80, + 6, 2, 22, 0, 0, 0, 7, 97, 97, 12, 79, 70, 10, + 71, 85, 51, 25, 17, 59, 62, 32, 34, 24, 35, + 15, 14, 42, 80, 74, 73, 80, 24, 65, 24, 39, + 67, 68, 64, 28, 0, 76, 75, 84, 19, 68, 10, 2, + 64, 11, 1, 13, 11, 12, 21, 29, 15, 10, 14, 62, + 62, 62, 62, 9, 0, 4, 15, 64, 11, 14, 6, 65, + 44, 66, 114, 110, 62, 9, 17, 5, 6, 11, 20, 53, + 31, 13, 66, 1, 2, 21, 26, 107, 4, 5, 81, 67, + 77, 5, 6, 73, 19, 12, 12, 93, 9, 82, 77, 3, + 89, 7, 6, 3, 85, 109, 67, 82, 80, 86, 99, 96, + 82, 126, 62, 126, 92, 95, 103, 120, 105, 118, + 126, 126, 124, 101, 126, 95, 62, 115, 71, 69, + 77, 83, 84, 81, 86, 85, 86, 102, 102, 98, 100, + 101, 126, 75, 77, 100, 76, 83, 84, 91, 94, + 100, 97, 105, 101, 86, 103, 105, 96, 86, 19, + 37, 23, 10, 12, 19, 10, 6, 6, 36, 24, 33, 25, + 23, 21, 31, 22, 30, 26, 24, 20, 49, 41, 36, + 32, 38, 15, 6, 4, 4, 71, 56, 29, 0, 66, 14, 2, + 73, 65, 3, 39, 26, 6, 66, 18, 5, 5, 67, 66, + 62, 64, 7, 12, 14, 13, 8, 22, 26, 11, 23, 27, + 38, 65, 5, 71, 2, 32, 71, 74, 70, 5, 6, 69, + 78, 69, 82, 69, 71, 89, 15, 14, 33, 5, 66, 14, + 8, 1, 7, 6, 1, 77, 0, 65, 69, 67, 18, 101, 84, + 16, 87, 69, 76, 70, 10, 76, 79, 4, 5, 95, 74, + 102, 14, 17, 22, 64, 75, 71, 74, 80, 84, 93, + 96, 96, 109, 103, 103, 126, 122, 126, 126, + 119, 117, 110, 105, 116, 102, 108, 104, 90, + 97, 75, 78, 80, 81, 102, 93, 93, 114, 99, 107, + 108, 126, 111, 115, 117, 83, 81, 121, 86, 89, + 100, 116, 104, 112, 113, 94, 112, 102, 105, + 115, 112, 115, 105, 110, 115, 70, 0, 16, 3, + 33, 43, 1, 10, 19, 23, 5, 21, 39, 4, 64, 24, + 81, 114, 126, 126, 126, 126, 126, 126, 7, 39, + 30, 27, 17, 33, 18, 10, 8, 2, 72, 67, 12, 3, + 27, 37, 2, 5, 12, 16, 6, 14, 30, 7, 0, 24, 81, + 114, 126, 126, 126, 126, 126, 126 }, + + { + + 18, + 3, 83, 18, 3, 83, 20, 28, 28, 12, 72, 91, 102, + 15, 60, 10, 32, 71, 10, 23, 67, 77, 2, 84, + 112, 79, 120, 126, 126, 126, 62, 7, 64, 10, + 23, 67, 87, 12, 11, 67, 0, 67, 66, 65, 81, 78, + 91, 4, 65, 67, 4, 72, 65, 80, 0, 74, 73, 80, + 6, 2, 22, 0, 0, 0, 7, 98, 97, 12, 80, 71, 9, + 72, 86, 53, 26, 18, 60, 62, 33, 35, 25, 36, + 16, 15, 44, 80, 74, 73, 80, 24, 65, 24, 41, + 67, 68, 64, 29, 64, 77, 76, 85, 19, 68, 10, 2, + 64, 11, 1, 13, 11, 12, 22, 30, 15, 11, 15, 62, + 62, 62, 62, 9, 0, 4, 15, 65, 11, 14, 6, 65, + 46, 66, 116, 112, 62, 9, 17, 5, 6, 11, 20, 54, + 31, 13, 67, 0, 2, 21, 26, 109, 4, 5, 82, 68, + 79, 4, 5, 74, 19, 12, 11, 95, 9, 83, 78, 2, + 89, 6, 5, 2, 88, 113, 69, 84, 83, 89, 102, 99, + 84, 126, 62, 126, 95, 97, 106, 124, 108, 122, + 126, 126, 126, 103, 126, 97, 62, 117, 74, 72, + 80, 86, 87, 84, 89, 88, 88, 105, 105, 100, + 102, 103, 126, 75, 78, 101, 77, 85, 86, 93, + 96, 102, 99, 107, 102, 87, 104, 106, 97, 86, + 20, 37, 23, 10, 12, 19, 10, 6, 6, 38, 25, 33, + 25, 23, 21, 31, 23, 31, 27, 25, 20, 49, 41, + 36, 32, 39, 15, 6, 4, 4, 72, 56, 28, 64, 67, + 14, 2, 73, 65, 2, 38, 25, 4, 67, 17, 5, 5, 67, + 66, 62, 0, 8, 13, 15, 14, 9, 23, 27, 12, 24, + 28, 40, 65, 5, 71, 3, 33, 71, 74, 70, 5, 6, + 69, 79, 70, 83, 69, 72, 90, 15, 14, 34, 5, 67, + 14, 8, 0, 7, 6, 1, 78, 0, 65, 69, 67, 18, 102, + 85, 16, 89, 70, 77, 71, 9, 77, 80, 4, 4, 97, + 75, 104, 12, 15, 21, 66, 77, 74, 77, 83, 87, + 96, 99, 99, 113, 106, 105, 126, 126, 126, 126, + 123, 121, 113, 108, 119, 104, 110, 106, 91, + 98, 78, 81, 83, 84, 105, 96, 96, 118, 102, + 110, 111, 126, 113, 117, 119, 84, 82, 123, 88, + 91, 102, 119, 106, 114, 115, 96, 114, 104, + 106, 117, 113, 116, 107, 112, 117, 70, 0, 16, + 3, 34, 44, 1, 10, 19, 23, 5, 21, 39, 4, 65, + 22, 83, 117, 126, 126, 126, 126, 126, 126, 7, + 39, 30, 27, 17, 33, 18, 10, 8, 2, 72, 67, 12, + 3, 27, 37, 2, 5, 12, 16, 6, 14, 30, 6, 64, 22, + 83, 117, 126, 126, 126, 126, 126, 126 }, + + { + + 17, + 3, 83, 17, 3, 83, 22, 30, 29, 13, 72, 92, 104, + 14, 61, 10, 34, 71, 11, 24, 67, 77, 3, 84, + 113, 80, 122, 126, 126, 126, 62, 8, 0, 11, 24, + 67, 87, 13, 11, 67, 1, 66, 65, 65, 81, 77, 90, + 4, 64, 66, 4, 71, 64, 79, 1, 73, 73, 79, 7, 2, + 22, 0, 0, 0, 8, 98, 97, 13, 80, 71, 9, 72, 86, + 56, 28, 20, 62, 62, 35, 37, 27, 38, 18, 17, + 47, 80, 73, 72, 79, 24, 65, 25, 44, 66, 67, 0, + 31, 64, 77, 76, 85, 20, 68, 11, 3, 0, 12, 2, + 14, 12, 13, 24, 32, 16, 13, 17, 62, 62, 62, + 62, 10, 1, 5, 16, 65, 12, 15, 7, 64, 49, 65, + 118, 113, 62, 9, 18, 5, 7, 12, 21, 56, 32, 14, + 67, 0, 2, 22, 27, 110, 5, 5, 82, 68, 80, 4, 5, + 74, 19, 12, 11, 96, 10, 83, 78, 2, 89, 6, 5, + 2, 90, 116, 70, 85, 85, 91, 104, 101, 86, 126, + 62, 126, 97, 98, 108, 126, 110, 125, 126, 126, + 126, 105, 126, 99, 62, 118, 76, 74, 82, 88, + 89, 86, 91, 90, 90, 107, 107, 101, 103, 104, + 126, 75, 78, 101, 77, 86, 87, 94, 97, 103, + 100, 108, 103, 87, 105, 106, 97, 85, 22, 38, + 24, 10, 13, 20, 11, 7, 7, 41, 27, 34, 26, 24, + 22, 32, 25, 33, 29, 27, 21, 50, 42, 37, 33, + 40, 16, 7, 5, 5, 72, 57, 28, 64, 67, 15, 3, + 73, 64, 2, 38, 25, 3, 68, 17, 6, 6, 66, 65, + 62, 2, 10, 15, 17, 16, 11, 25, 29, 14, 26, 30, + 43, 64, 6, 70, 5, 35, 70, 73, 69, 6, 7, 68, + 79, 70, 83, 69, 72, 90, 16, 15, 35, 6, 67, 15, + 9, 0, 8, 7, 1, 78, 1, 64, 68, 66, 19, 102, 86, + 16, 90, 70, 77, 71, 9, 77, 80, 4, 4, 98, 75, + 105, 11, 14, 21, 67, 78, 76, 79, 85, 89, 98, + 101, 101, 116, 108, 107, 126, 126, 126, 126, + 126, 124, 115, 110, 121, 105, 111, 107, 91, + 98, 80, 83, 85, 86, 107, 98, 98, 121, 104, + 112, 113, 126, 114, 118, 120, 84, 82, 124, 89, + 92, 103, 121, 107, 115, 116, 97, 115, 105, + 106, 118, 113, 116, 108, 113, 118, 69, 1, 17, + 4, 36, 46, 2, 11, 20, 24, 6, 22, 40, 4, 65, + 21, 85, 119, 126, 126, 126, 126, 126, 126, 8, + 39, 31, 28, 18, 34, 19, 11, 9, 3, 71, 66, 13, + 4, 28, 38, 3, 6, 13, 17, 6, 15, 31, 6, 64, 21, + 85, 119, 126, 126, 126, 126, 126, 126 }, + + { + + 16, + 3, 83, 16, 3, 83, 24, 31, 29, 13, 72, 93, 106, + 13, 61, 10, 36, 71, 11, 25, 67, 78, 3, 85, + 115, 81, 125, 126, 126, 126, 62, 9, 0, 11, 25, + 67, 87, 14, 11, 68, 1, 66, 64, 65, 81, 77, 90, + 4, 64, 66, 4, 71, 64, 79, 1, 73, 73, 79, 7, 2, + 22, 0, 0, 0, 9, 98, 97, 13, 81, 71, 8, 72, 86, + 58, 29, 21, 62, 62, 36, 38, 29, 39, 19, 18, + 49, 80, 73, 72, 79, 24, 65, 26, 46, 66, 66, 1, + 33, 64, 77, 76, 86, 20, 68, 11, 4, 1, 13, 3, + 15, 12, 14, 25, 33, 17, 14, 18, 62, 62, 62, + 62, 10, 1, 5, 16, 65, 12, 15, 7, 64, 51, 65, + 120, 115, 62, 9, 18, 5, 7, 12, 21, 58, 33, 14, + 67, 0, 2, 23, 28, 111, 5, 5, 82, 69, 81, 3, 5, + 75, 19, 12, 11, 97, 10, 84, 79, 1, 89, 6, 4, + 1, 92, 119, 71, 87, 87, 93, 106, 103, 88, 126, + 62, 126, 99, 100, 110, 126, 112, 126, 126, + 126, 126, 107, 126, 101, 62, 120, 78, 76, 84, + 90, 91, 88, 93, 92, 92, 109, 109, 103, 105, + 106, 126, 75, 78, 102, 78, 87, 88, 95, 98, + 104, 101, 110, 104, 87, 106, 107, 98, 84, 23, + 39, 24, 10, 13, 21, 12, 8, 8, 43, 28, 34, 26, + 24, 23, 33, 26, 35, 31, 29, 22, 51, 43, 38, + 34, 41, 16, 7, 5, 6, 72, 58, 28, 65, 67, 15, + 3, 73, 64, 2, 38, 24, 2, 69, 17, 6, 6, 66, 65, + 62, 3, 11, 16, 19, 18, 12, 27, 31, 15, 28, 32, + 45, 0, 7, 70, 6, 37, 70, 73, 69, 7, 7, 68, 79, + 70, 84, 69, 72, 90, 16, 15, 36, 6, 67, 15, 9, + 0, 8, 7, 1, 79, 1, 64, 68, 66, 20, 103, 87, + 16, 91, 71, 78, 72, 9, 78, 81, 4, 4, 99, 75, + 106, 10, 13, 20, 69, 80, 78, 81, 87, 91, 101, + 104, 104, 119, 111, 109, 126, 126, 126, 126, + 126, 126, 118, 112, 124, 107, 113, 108, 92, + 99, 82, 85, 88, 88, 110, 101, 101, 124, 106, + 115, 115, 126, 116, 120, 122, 85, 83, 126, 90, + 93, 105, 123, 109, 117, 118, 98, 117, 106, + 107, 119, 114, 117, 110, 114, 119, 69, 1, 18, + 4, 37, 47, 2, 12, 21, 25, 6, 22, 41, 4, 65, + 20, 87, 122, 126, 126, 126, 126, 126, 126, 8, + 39, 31, 28, 18, 35, 19, 11, 9, 3, 71, 66, 13, + 4, 29, 39, 3, 6, 13, 17, 6, 15, 32, 6, 64, 20, + 87, 122, 126, 126, 126, 126, 126, 126 }, + + { + + 15, + 3, 83, 15, 3, 83, 26, 33, 30, 13, 73, 95, 108, + 12, 61, 10, 38, 71, 12, 26, 67, 78, 3, 85, + 116, 82, 126, 126, 126, 126, 62, 10, 0, 12, + 26, 67, 88, 15, 11, 68, 1, 66, 64, 65, 81, 77, + 90, 4, 64, 65, 4, 71, 0, 79, 1, 73, 73, 79, 7, + 2, 22, 0, 0, 0, 9, 98, 97, 14, 82, 71, 7, 73, + 86, 61, 30, 22, 62, 62, 38, 40, 31, 41, 20, + 19, 51, 80, 73, 71, 79, 24, 65, 27, 48, 66, + 65, 1, 35, 64, 78, 77, 86, 20, 68, 12, 4, 2, + 14, 3, 16, 13, 15, 27, 35, 17, 15, 19, 62, 62, + 62, 62, 10, 1, 5, 17, 65, 12, 16, 8, 0, 53, + 65, 122, 117, 62, 9, 18, 5, 7, 12, 22, 59, 34, + 14, 67, 0, 2, 23, 29, 112, 5, 5, 83, 69, 82, + 3, 5, 75, 19, 12, 11, 98, 10, 85, 80, 1, 89, + 6, 4, 1, 94, 122, 72, 89, 89, 96, 108, 106, + 90, 126, 62, 126, 101, 102, 112, 126, 115, + 126, 126, 126, 126, 109, 126, 103, 62, 122, + 80, 78, 86, 93, 94, 90, 95, 94, 94, 111, 111, + 105, 107, 107, 126, 75, 78, 102, 79, 88, 89, + 96, 99, 105, 102, 112, 105, 88, 107, 108, 98, + 84, 25, 40, 25, 10, 13, 22, 12, 8, 8, 45, 29, + 34, 26, 25, 24, 34, 27, 36, 32, 31, 23, 51, + 43, 38, 34, 42, 17, 7, 5, 6, 72, 58, 28, 66, + 67, 15, 3, 73, 0, 2, 37, 24, 1, 70, 17, 6, 6, + 66, 65, 62, 4, 13, 17, 20, 19, 13, 29, 33, 16, + 29, 34, 47, 0, 8, 69, 7, 39, 69, 73, 69, 7, 8, + 68, 79, 70, 84, 69, 72, 91, 17, 16, 37, 6, 67, + 16, 9, 0, 8, 7, 1, 79, 1, 64, 68, 66, 20, 104, + 88, 16, 92, 72, 79, 72, 9, 79, 82, 4, 4, 100, + 75, 107, 9, 12, 19, 70, 82, 80, 83, 89, 94, + 103, 107, 107, 122, 113, 111, 126, 126, 126, + 126, 126, 126, 120, 114, 126, 109, 115, 110, + 93, 100, 84, 88, 90, 91, 113, 103, 103, 126, + 109, 117, 118, 126, 118, 122, 123, 86, 84, + 126, 92, 95, 107, 125, 110, 119, 120, 99, 118, + 107, 108, 120, 115, 118, 111, 115, 120, 69, 2, + 18, 5, 38, 48, 2, 12, 21, 25, 6, 23, 42, 4, + 65, 19, 89, 124, 126, 126, 126, 126, 126, 126, + 8, 39, 31, 28, 18, 35, 20, 11, 9, 4, 71, 66, + 14, 5, 30, 40, 3, 7, 13, 17, 6, 15, 32, 6, 64, + 19, 89, 124, 126, 126, 126, 126, 126, 126 }, + + }, + + { + + { + + 62, + 9, 74, 62, 9, 74, 126, 104, 10, 9, 12, 47, 62, + 62, 12, 1, 99, 47, 85, 102, 6, 6, 73, 6, 23, 53, + 62, 62, 21, 97, 126, 117, 74, 85, 102, 6, 93, + 88, 19, 8, 89, 103, 116, 6, 5, 84, 96, 0, 85, + 106, 0, 75, 90, 101, 8, 79, 75, 97, 13, 3, 22, + 0, 0, 0, 83, 86, 97, 72, 22, 1, 29, 88, 126, + 126, 91, 95, 84, 86, 89, 91, 126, 76, 103, 90, + 126, 80, 76, 84, 78, 8, 2, 83, 126, 79, 104, 91, + 126, 65, 79, 72, 92, 7, 68, 71, 98, 86, 88, 82, + 72, 67, 72, 89, 69, 4, 66, 6, 71, 71, 5, 74, 19, + 69, 1, 12, 16, 21, 22, 10, 76, 78, 83, 11, 67, + 90, 67, 72, 75, 80, 83, 64, 32, 64, 94, 75, 0, + 74, 28, 36, 91, 65, 69, 77, 66, 1, 68, 81, 33, + 56, 40, 74, 66, 124, 26, 62, 62, 126, 24, 21, + 29, 34, 32, 26, 21, 23, 30, 20, 27, 16, 8, 5, 3, + 19, 19, 21, 15, 7, 11, 26, 14, 5, 15, 18, 69, + 30, 0, 62, 62, 62, 53, 62, 62, 62, 62, 46, 38, + 34, 30, 48, 43, 73, 29, 32, 19, 47, 27, 27, 35, + 42, 43, 51, 47, 21, 93, 7, 6, 25, 126, 115, 82, + 1, 10, 4, 85, 89, 94, 92, 126, 100, 6, 67, 71, + 77, 85, 88, 104, 98, 126, 82, 15, 2, 66, 70, 75, + 79, 83, 92, 108, 79, 69, 75, 5, 5, 78, 83, 81, + 99, 81, 25, 1, 5, 4, 73, 76, 86, 83, 87, 62, + 126, 126, 120, 126, 114, 117, 118, 117, 113, + 118, 120, 124, 94, 102, 99, 106, 126, 92, 6, 86, + 94, 91, 77, 71, 73, 64, 81, 64, 6, 67, 68, 67, + 68, 77, 64, 68, 78, 8, 4, 65, 9, 19, 3, 70, 76, + 86, 70, 64, 70, 8, 7, 69, 65, 74, 9, 9, 76, 82, + 77, 77, 21, 62, 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 52, 62, 62, 62, 62, 62, 62, + 48, 62, 62, 46, 25, 18, 9, 79, 62, 62, 62, 62, + 48, 48, 38, 41, 47, 45, 35, 22, 35, 16, 1, 32, + 37, 39, 40, 47, 33, 34, 22, 21, 3, 11, 3, 78, + 123, 10, 7, 2, 30, 13, 2, 78, 74, 72, 72, 75, + 71, 0, 70, 75, 72, 67, 10, 4, 11, 68, 62, 62, + 62, 62, 56, 51, 40, 25, 64, 71, 26, 19, 14, 7, + 4, 0, 67, 68, 79, 78, 74, 72, 72, 75, 71, 0, 70, + 75, 72, 67, 10, 4, 11, 68, 62, 62, 62, 62, 56, + 51, 40, 25, 64 }, + + { + + 62, + 9, 74, 62, 9, 74, 125, 102, 11, 10, 12, 46, + 62, 62, 13, 2, 97, 46, 84, 100, 6, 6, 71, 6, + 22, 52, 62, 60, 19, 97, 125, 115, 73, 84, 100, + 6, 92, 87, 20, 8, 88, 102, 114, 5, 4, 84, 96, + 0, 84, 105, 0, 75, 89, 100, 8, 78, 74, 96, 14, + 3, 22, 0, 0, 0, 82, 86, 97, 71, 22, 1, 29, 87, + 125, 124, 89, 94, 82, 84, 88, 89, 125, 75, + 101, 89, 124, 80, 76, 84, 78, 9, 2, 82, 124, + 78, 103, 90, 125, 65, 78, 72, 91, 8, 68, 70, + 97, 85, 87, 81, 71, 66, 71, 88, 68, 5, 66, 6, + 70, 70, 5, 73, 20, 68, 1, 13, 17, 22, 23, 11, + 76, 77, 82, 11, 67, 89, 67, 71, 74, 79, 81, 1, + 33, 1, 92, 75, 64, 73, 29, 37, 91, 65, 68, 77, + 65, 1, 67, 79, 33, 56, 41, 72, 67, 122, 25, + 62, 62, 125, 24, 21, 29, 34, 32, 26, 21, 23, + 30, 20, 27, 16, 8, 5, 3, 19, 19, 21, 15, 7, + 11, 26, 14, 4, 15, 18, 69, 29, 0, 62, 62, 62, + 52, 62, 62, 62, 62, 45, 37, 32, 29, 46, 42, + 74, 28, 31, 18, 46, 27, 27, 34, 41, 42, 50, + 46, 20, 93, 7, 6, 24, 125, 113, 80, 2, 10, 4, + 84, 88, 93, 91, 125, 98, 7, 66, 70, 76, 83, + 87, 102, 97, 124, 81, 16, 3, 65, 69, 74, 78, + 82, 91, 106, 78, 67, 74, 6, 5, 77, 82, 80, 98, + 80, 26, 2, 6, 5, 72, 75, 85, 82, 86, 62, 125, + 125, 118, 125, 112, 115, 116, 115, 111, 116, + 118, 121, 93, 101, 98, 105, 123, 91, 5, 85, + 93, 90, 76, 71, 72, 64, 80, 64, 6, 67, 68, 66, + 68, 77, 64, 68, 77, 8, 4, 65, 9, 19, 3, 70, + 75, 84, 70, 64, 69, 8, 7, 69, 65, 73, 9, 9, + 75, 81, 76, 76, 20, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, 50, 62, 62, + 62, 62, 62, 62, 47, 60, 60, 45, 24, 17, 9, 79, + 62, 62, 62, 60, 46, 47, 37, 39, 46, 43, 34, + 20, 33, 15, 0, 31, 36, 37, 39, 46, 32, 33, 21, + 20, 2, 11, 3, 78, 122, 9, 6, 1, 29, 12, 1, 77, + 73, 71, 71, 73, 70, 1, 69, 73, 71, 66, 11, 5, + 12, 67, 62, 62, 62, 62, 54, 50, 38, 24, 65, + 70, 27, 20, 15, 8, 5, 1, 66, 67, 78, 77, 73, + 71, 71, 73, 70, 1, 69, 73, 71, 66, 11, 5, 12, + 67, 62, 62, 62, 62, 54, 50, 38, 24, 65 }, + + { + + 62, + 9, 74, 62, 9, 74, 123, 101, 11, 10, 12, 44, + 60, 62, 14, 2, 95, 44, 84, 99, 6, 6, 70, 5, + 21, 51, 60, 57, 17, 98, 123, 114, 73, 84, 99, + 6, 92, 86, 20, 8, 87, 101, 113, 4, 3, 84, 96, + 0, 84, 104, 0, 75, 89, 100, 8, 78, 74, 95, 14, + 3, 22, 0, 0, 0, 81, 86, 97, 71, 21, 1, 29, 86, + 124, 122, 88, 93, 80, 82, 87, 88, 123, 74, + 100, 88, 122, 81, 76, 84, 78, 9, 2, 81, 122, + 78, 102, 89, 123, 65, 78, 72, 91, 8, 68, 70, + 96, 85, 86, 81, 71, 66, 71, 87, 67, 5, 66, 6, + 70, 70, 5, 73, 20, 68, 1, 13, 17, 22, 23, 11, + 77, 76, 81, 10, 67, 89, 67, 70, 74, 79, 80, 2, + 34, 3, 90, 76, 65, 73, 29, 37, 92, 65, 68, 78, + 64, 1, 67, 78, 33, 56, 41, 71, 68, 121, 24, + 62, 62, 124, 24, 21, 29, 33, 31, 26, 21, 23, + 29, 19, 26, 16, 8, 5, 3, 18, 18, 20, 15, 7, + 11, 25, 13, 3, 14, 17, 69, 28, 64, 62, 62, 62, + 50, 60, 62, 62, 62, 44, 35, 30, 27, 44, 40, + 75, 27, 30, 16, 45, 26, 26, 33, 39, 40, 48, + 44, 18, 93, 6, 5, 22, 124, 112, 79, 3, 10, 4, + 83, 87, 92, 90, 123, 97, 8, 65, 69, 75, 82, + 86, 101, 96, 122, 80, 16, 3, 65, 69, 73, 77, + 81, 90, 105, 78, 66, 73, 6, 5, 76, 81, 80, 97, + 79, 26, 3, 6, 5, 71, 74, 84, 81, 85, 62, 124, + 123, 116, 123, 111, 114, 114, 113, 110, 114, + 116, 119, 92, 100, 97, 104, 120, 91, 4, 85, + 92, 89, 76, 71, 72, 64, 80, 64, 5, 67, 68, 65, + 68, 77, 64, 68, 77, 8, 4, 65, 8, 18, 3, 70, + 75, 83, 71, 64, 68, 7, 7, 69, 65, 73, 9, 9, + 75, 80, 76, 76, 18, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, 48, 62, 62, + 62, 62, 62, 61, 45, 58, 58, 43, 23, 16, 8, 79, + 62, 62, 62, 58, 44, 45, 35, 37, 44, 41, 32, + 18, 31, 13, 64, 30, 35, 35, 37, 44, 30, 31, + 20, 19, 1, 10, 2, 78, 121, 8, 5, 64, 28, 11, + 0, 77, 73, 70, 70, 72, 69, 2, 69, 72, 70, 65, + 11, 6, 13, 66, 62, 62, 62, 60, 52, 48, 36, 22, + 66, 69, 27, 20, 16, 9, 6, 1, 65, 67, 77, 77, + 73, 70, 70, 72, 69, 2, 69, 72, 70, 65, 11, 6, + 13, 66, 62, 62, 62, 60, 52, 48, 36, 22, 66 }, + + { + + 62, + 9, 74, 62, 9, 74, 121, 99, 12, 10, 11, 42, 59, + 61, 14, 2, 93, 43, 84, 97, 6, 5, 69, 4, 20, + 50, 58, 53, 15, 99, 121, 112, 73, 84, 97, 6, + 91, 85, 21, 8, 86, 100, 112, 3, 2, 84, 97, 0, + 84, 103, 0, 76, 89, 100, 8, 78, 74, 94, 15, 3, + 22, 0, 0, 0, 81, 86, 97, 70, 20, 1, 28, 86, + 123, 120, 87, 92, 79, 81, 86, 87, 121, 73, 99, + 87, 120, 82, 76, 84, 78, 10, 2, 80, 120, 78, + 101, 88, 121, 65, 78, 72, 91, 9, 68, 69, 95, + 85, 85, 81, 71, 66, 70, 86, 67, 5, 66, 6, 70, + 70, 5, 73, 20, 68, 1, 14, 17, 23, 23, 12, 77, + 76, 80, 10, 67, 89, 67, 69, 74, 78, 79, 3, 35, + 4, 88, 76, 66, 72, 29, 37, 93, 65, 67, 78, 64, + 1, 67, 77, 33, 56, 41, 70, 69, 119, 23, 62, + 62, 122, 24, 21, 28, 32, 31, 25, 20, 23, 29, + 18, 25, 16, 8, 5, 2, 18, 17, 19, 14, 7, 11, + 24, 13, 2, 14, 16, 69, 27, 64, 62, 62, 61, 49, + 58, 62, 62, 62, 43, 33, 28, 26, 42, 38, 77, + 26, 29, 14, 44, 25, 25, 32, 38, 38, 46, 42, + 17, 93, 5, 4, 21, 122, 110, 77, 3, 10, 4, 82, + 86, 91, 89, 121, 96, 9, 64, 68, 75, 81, 85, + 99, 95, 120, 80, 17, 4, 64, 68, 72, 77, 81, + 89, 104, 78, 64, 72, 6, 5, 75, 81, 80, 96, 78, + 27, 4, 7, 5, 70, 74, 83, 81, 85, 62, 122, 122, + 115, 121, 110, 112, 113, 112, 108, 112, 114, + 117, 92, 99, 97, 103, 117, 91, 3, 85, 91, 88, + 76, 71, 72, 64, 79, 64, 4, 67, 68, 65, 68, 77, + 64, 68, 77, 7, 4, 65, 7, 17, 3, 70, 75, 82, + 72, 64, 67, 6, 7, 69, 65, 72, 9, 8, 74, 79, + 76, 76, 17, 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 46, 62, 62, 62, 62, + 62, 59, 43, 56, 55, 41, 22, 15, 7, 79, 62, 62, + 62, 56, 42, 43, 34, 35, 42, 39, 30, 16, 29, + 11, 65, 29, 34, 33, 36, 42, 29, 29, 18, 17, 0, + 9, 1, 78, 120, 7, 3, 65, 27, 10, 64, 77, 72, + 70, 70, 71, 68, 3, 69, 71, 69, 64, 12, 7, 13, + 65, 62, 62, 62, 58, 50, 46, 34, 20, 67, 69, + 28, 21, 17, 9, 7, 2, 65, 66, 77, 77, 72, 70, + 70, 71, 68, 3, 69, 71, 69, 64, 12, 7, 13, 65, + 62, 62, 62, 58, 50, 46, 34, 20, 67 }, + + { + + 62, + 9, 74, 62, 9, 74, 120, 98, 12, 10, 11, 40, 57, + 60, 15, 2, 92, 41, 84, 96, 5, 5, 68, 3, 18, + 48, 56, 50, 12, 100, 119, 111, 73, 84, 96, 5, + 91, 84, 21, 7, 86, 99, 110, 2, 0, 85, 97, 0, + 83, 102, 64, 76, 89, 100, 8, 78, 74, 94, 15, + 3, 22, 0, 0, 0, 80, 87, 97, 70, 19, 1, 28, 85, + 122, 118, 86, 91, 77, 79, 86, 86, 119, 72, 98, + 86, 117, 82, 77, 84, 79, 10, 1, 79, 117, 77, + 101, 88, 119, 65, 78, 72, 91, 9, 68, 69, 94, + 85, 85, 80, 71, 66, 70, 85, 66, 5, 67, 5, 70, + 70, 5, 73, 20, 68, 1, 14, 17, 23, 23, 12, 78, + 75, 80, 9, 67, 88, 67, 68, 73, 78, 77, 5, 36, + 6, 86, 77, 67, 72, 30, 37, 94, 65, 67, 79, 0, + 1, 67, 76, 33, 56, 41, 68, 70, 118, 22, 62, + 62, 121, 23, 21, 28, 32, 30, 25, 20, 23, 28, + 17, 24, 15, 8, 5, 2, 17, 17, 18, 14, 6, 10, + 23, 12, 1, 13, 15, 69, 25, 65, 62, 62, 59, 47, + 57, 62, 62, 62, 42, 31, 25, 24, 40, 36, 78, + 24, 28, 13, 43, 24, 24, 30, 36, 36, 44, 41, + 15, 93, 4, 3, 19, 121, 109, 76, 4, 10, 4, 81, + 85, 90, 89, 119, 94, 10, 64, 68, 74, 79, 84, + 98, 94, 117, 79, 17, 4, 64, 68, 71, 76, 80, + 89, 103, 78, 0, 71, 6, 5, 74, 80, 80, 95, 77, + 27, 5, 7, 5, 69, 73, 82, 80, 84, 62, 121, 120, + 113, 120, 109, 111, 111, 110, 107, 111, 112, + 114, 91, 98, 96, 102, 114, 90, 2, 84, 90, 88, + 76, 71, 72, 65, 79, 65, 3, 67, 68, 64, 68, 77, + 64, 68, 76, 7, 3, 65, 6, 16, 2, 70, 75, 81, + 73, 65, 67, 6, 6, 69, 65, 72, 8, 8, 74, 79, + 76, 76, 15, 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 44, 62, 62, 62, 62, + 62, 57, 41, 54, 53, 39, 20, 14, 6, 79, 62, 62, + 62, 54, 40, 41, 32, 33, 40, 37, 28, 14, 26, + 10, 67, 28, 33, 30, 34, 41, 27, 27, 17, 16, + 64, 8, 0, 78, 119, 5, 2, 67, 25, 9, 65, 77, + 72, 69, 69, 70, 68, 3, 68, 70, 68, 0, 12, 8, + 14, 65, 62, 62, 60, 56, 48, 44, 31, 18, 69, + 68, 28, 21, 17, 10, 7, 2, 64, 66, 76, 77, 72, + 69, 69, 70, 68, 3, 68, 70, 68, 0, 12, 8, 14, + 65, 62, 62, 60, 56, 48, 44, 31, 18, 69 }, + + { + + 62, + 9, 74, 62, 9, 74, 118, 96, 12, 10, 10, 38, 56, + 59, 16, 2, 90, 39, 83, 94, 5, 5, 67, 2, 17, + 47, 54, 47, 10, 100, 117, 110, 73, 83, 94, 5, + 91, 83, 21, 7, 85, 98, 109, 1, 64, 85, 97, 0, + 83, 101, 64, 76, 89, 100, 8, 77, 74, 93, 16, + 3, 22, 0, 0, 0, 80, 87, 97, 69, 18, 1, 27, 85, + 120, 115, 85, 90, 76, 78, 85, 85, 117, 71, 97, + 85, 115, 83, 77, 84, 79, 10, 1, 78, 115, 77, + 100, 87, 117, 65, 78, 72, 90, 9, 68, 68, 93, + 84, 84, 80, 71, 65, 69, 84, 66, 5, 67, 5, 69, + 70, 5, 73, 21, 68, 1, 15, 18, 23, 23, 12, 78, + 75, 79, 9, 67, 88, 67, 67, 73, 77, 76, 6, 37, + 7, 84, 77, 68, 71, 30, 37, 95, 65, 66, 79, 1, + 1, 67, 74, 33, 56, 41, 67, 71, 116, 21, 62, + 62, 120, 23, 21, 27, 31, 30, 25, 19, 23, 28, + 16, 23, 15, 8, 5, 2, 17, 16, 17, 13, 6, 10, + 22, 12, 0, 12, 15, 69, 24, 65, 62, 62, 58, 46, + 55, 62, 62, 62, 41, 29, 23, 23, 38, 34, 79, + 23, 27, 11, 42, 23, 23, 29, 35, 34, 42, 39, + 14, 93, 3, 2, 17, 119, 107, 75, 4, 10, 4, 80, + 84, 89, 88, 117, 93, 11, 0, 67, 73, 78, 83, + 96, 93, 115, 78, 18, 5, 0, 67, 70, 75, 80, 88, + 102, 77, 1, 70, 6, 5, 73, 80, 79, 94, 76, 27, + 6, 7, 5, 68, 72, 81, 80, 83, 62, 120, 119, + 112, 118, 108, 109, 110, 108, 105, 109, 110, + 112, 90, 97, 95, 101, 111, 90, 1, 84, 89, 87, + 76, 71, 72, 65, 78, 65, 2, 67, 68, 0, 68, 77, + 64, 68, 76, 6, 3, 65, 5, 15, 2, 70, 75, 80, + 73, 65, 66, 5, 6, 69, 65, 72, 8, 7, 74, 78, + 76, 76, 14, 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 42, 62, 62, 62, 62, + 62, 55, 40, 52, 50, 37, 19, 13, 5, 79, 62, 62, + 62, 52, 38, 39, 31, 31, 38, 35, 26, 12, 24, 8, + 68, 27, 32, 28, 33, 39, 26, 25, 16, 15, 65, 7, + 64, 78, 118, 4, 1, 68, 24, 8, 66, 77, 71, 69, + 68, 69, 67, 4, 68, 69, 67, 1, 13, 9, 14, 64, + 62, 62, 58, 54, 46, 42, 29, 16, 70, 68, 29, + 22, 18, 11, 8, 3, 64, 66, 75, 77, 71, 69, 68, + 69, 67, 4, 68, 69, 67, 1, 13, 9, 14, 64, 62, + 62, 58, 54, 46, 42, 29, 16, 70 }, + + { + + 62, + 9, 75, 62, 9, 75, 116, 95, 13, 10, 10, 37, 54, + 58, 16, 3, 88, 38, 83, 93, 5, 4, 66, 1, 16, + 46, 53, 43, 8, 101, 115, 108, 73, 83, 93, 5, + 90, 82, 22, 7, 84, 97, 108, 64, 65, 85, 98, 0, + 83, 101, 64, 77, 88, 100, 7, 77, 74, 92, 16, + 3, 22, 0, 0, 0, 79, 87, 97, 69, 18, 0, 27, 84, + 119, 113, 84, 89, 74, 76, 84, 84, 115, 70, 96, + 85, 113, 84, 77, 84, 79, 11, 1, 77, 113, 77, + 99, 86, 115, 65, 78, 72, 90, 10, 69, 68, 93, + 84, 83, 80, 70, 65, 69, 83, 65, 5, 67, 5, 69, + 70, 5, 73, 21, 68, 1, 15, 18, 24, 24, 13, 79, + 74, 78, 8, 67, 88, 67, 66, 73, 77, 75, 7, 37, + 9, 83, 78, 69, 71, 30, 37, 95, 66, 66, 80, 1, + 0, 66, 73, 33, 56, 42, 66, 72, 115, 20, 62, + 62, 118, 23, 21, 27, 30, 29, 24, 19, 22, 27, + 16, 23, 15, 7, 5, 1, 16, 15, 16, 13, 6, 10, + 22, 11, 65, 12, 14, 69, 23, 66, 62, 62, 56, + 44, 53, 62, 62, 62, 39, 27, 21, 21, 36, 32, + 81, 22, 25, 9, 40, 22, 22, 28, 33, 32, 40, 37, + 12, 93, 2, 1, 16, 118, 106, 73, 5, 10, 4, 79, + 84, 89, 87, 116, 92, 12, 1, 66, 73, 77, 82, + 95, 92, 113, 78, 18, 5, 0, 67, 69, 75, 79, 87, + 101, 77, 3, 69, 6, 5, 73, 79, 79, 94, 76, 28, + 6, 8, 5, 67, 72, 81, 79, 83, 62, 118, 117, + 110, 116, 106, 108, 108, 107, 104, 107, 108, + 110, 90, 96, 95, 101, 108, 90, 0, 84, 89, 86, + 76, 71, 72, 65, 78, 65, 1, 67, 68, 0, 68, 77, + 64, 68, 76, 6, 3, 65, 4, 14, 2, 70, 75, 79, + 74, 65, 65, 4, 6, 69, 65, 71, 8, 7, 73, 77, + 76, 76, 12, 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 40, 62, 62, 62, 62, + 62, 52, 38, 50, 48, 35, 18, 12, 4, 79, 62, 62, + 62, 50, 36, 38, 29, 29, 36, 32, 24, 10, 22, 6, + 69, 26, 30, 26, 31, 37, 24, 23, 14, 13, 66, 6, + 65, 79, 117, 3, 64, 70, 23, 6, 67, 76, 71, 68, + 68, 68, 66, 5, 68, 68, 66, 2, 13, 10, 15, 0, + 62, 62, 56, 52, 44, 40, 27, 14, 71, 67, 29, + 22, 19, 11, 9, 3, 0, 65, 75, 76, 71, 68, 68, + 68, 66, 5, 68, 68, 66, 2, 13, 10, 15, 0, 62, + 62, 56, 52, 44, 40, 27, 14, 71 }, + + { + + 62, + 9, 75, 62, 9, 75, 114, 93, 13, 10, 9, 35, 53, + 57, 17, 3, 87, 36, 83, 91, 4, 4, 65, 0, 15, + 45, 51, 40, 5, 102, 113, 107, 73, 83, 91, 4, + 90, 81, 22, 7, 84, 96, 106, 65, 66, 85, 98, 0, + 82, 100, 65, 77, 88, 100, 7, 77, 74, 91, 17, + 3, 22, 0, 0, 0, 79, 87, 97, 68, 17, 0, 26, 84, + 118, 111, 83, 88, 73, 75, 83, 83, 113, 69, 95, + 84, 110, 84, 78, 84, 80, 11, 1, 76, 110, 76, + 99, 86, 113, 65, 78, 72, 90, 10, 69, 67, 92, + 84, 82, 79, 70, 65, 68, 82, 65, 5, 68, 5, 69, + 70, 5, 73, 21, 68, 1, 16, 18, 24, 24, 13, 79, + 74, 78, 8, 67, 87, 67, 65, 72, 76, 73, 9, 38, + 10, 81, 78, 70, 70, 31, 37, 96, 66, 65, 80, 2, + 0, 66, 72, 33, 56, 42, 64, 73, 113, 19, 62, + 62, 117, 23, 21, 26, 30, 29, 24, 18, 22, 27, + 15, 22, 15, 7, 5, 1, 16, 15, 15, 12, 6, 10, + 21, 11, 66, 11, 13, 69, 22, 66, 62, 62, 54, + 43, 52, 62, 62, 62, 38, 25, 19, 20, 34, 30, + 82, 21, 24, 8, 39, 21, 21, 26, 32, 30, 38, 36, + 11, 93, 1, 0, 14, 116, 104, 72, 5, 10, 4, 78, + 83, 88, 87, 114, 90, 13, 2, 66, 72, 75, 81, + 93, 91, 110, 77, 19, 6, 1, 66, 68, 74, 79, 86, + 100, 77, 4, 68, 6, 5, 72, 79, 79, 93, 75, 28, + 7, 8, 5, 66, 71, 80, 79, 82, 62, 117, 116, + 109, 115, 105, 106, 107, 105, 102, 105, 106, + 107, 89, 95, 94, 100, 105, 89, 64, 83, 88, 85, + 76, 71, 72, 65, 77, 66, 0, 67, 68, 1, 68, 77, + 64, 68, 75, 5, 2, 65, 3, 13, 1, 70, 75, 78, + 75, 66, 64, 4, 5, 69, 65, 71, 7, 6, 73, 77, + 76, 76, 11, 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 38, 62, 62, 62, 62, + 62, 50, 36, 48, 45, 33, 17, 11, 3, 79, 62, 61, + 62, 48, 34, 36, 28, 27, 34, 30, 22, 8, 20, 5, + 71, 25, 29, 24, 30, 36, 23, 21, 13, 12, 67, 5, + 66, 79, 116, 1, 65, 71, 21, 5, 68, 76, 70, 68, + 67, 67, 65, 5, 67, 67, 65, 3, 14, 11, 15, 0, + 62, 60, 54, 50, 42, 38, 24, 12, 72, 67, 30, + 23, 19, 12, 10, 4, 0, 65, 74, 76, 70, 68, 67, + 67, 65, 5, 67, 67, 65, 3, 14, 11, 15, 0, 62, + 60, 54, 50, 42, 38, 24, 12, 72 }, + + { + + 62, + 8, 75, 62, 8, 75, 113, 92, 13, 10, 9, 33, 51, + 56, 17, 3, 85, 34, 83, 90, 4, 3, 64, 64, 13, + 43, 49, 36, 3, 103, 111, 106, 73, 83, 90, 4, + 90, 81, 22, 6, 83, 95, 105, 66, 68, 86, 99, 0, + 82, 99, 65, 78, 88, 100, 7, 77, 74, 91, 17, 3, + 22, 0, 0, 0, 78, 88, 97, 68, 16, 0, 26, 83, + 117, 109, 82, 88, 71, 73, 83, 82, 111, 69, 94, + 83, 108, 85, 78, 85, 80, 11, 0, 76, 108, 76, + 98, 85, 112, 65, 78, 72, 90, 10, 69, 67, 91, + 84, 82, 79, 70, 65, 68, 81, 64, 5, 68, 4, 69, + 70, 4, 73, 21, 68, 1, 16, 18, 24, 24, 13, 80, + 73, 77, 7, 67, 87, 67, 64, 72, 76, 72, 10, 39, + 12, 79, 79, 71, 70, 31, 37, 97, 66, 65, 81, 2, + 0, 66, 71, 33, 56, 42, 0, 74, 112, 18, 59, 62, + 116, 22, 21, 26, 29, 28, 23, 18, 22, 26, 14, + 21, 14, 7, 4, 0, 15, 14, 14, 12, 5, 9, 20, 10, + 67, 10, 12, 69, 20, 67, 62, 62, 52, 41, 50, + 60, 62, 62, 37, 23, 16, 18, 31, 28, 84, 19, + 23, 6, 38, 20, 20, 25, 30, 28, 36, 34, 9, 93, + 0, 64, 12, 115, 103, 71, 6, 10, 4, 78, 82, 87, + 86, 112, 89, 13, 2, 65, 72, 74, 80, 92, 90, + 108, 77, 19, 6, 1, 66, 68, 74, 78, 86, 99, 77, + 5, 67, 6, 5, 71, 78, 79, 92, 74, 28, 8, 8, 5, + 65, 71, 79, 78, 82, 62, 116, 114, 107, 113, + 104, 105, 105, 104, 101, 104, 104, 105, 89, + 94, 94, 99, 102, 89, 65, 83, 87, 85, 76, 71, + 72, 66, 77, 66, 64, 67, 68, 1, 68, 77, 65, 68, + 75, 5, 2, 66, 2, 12, 1, 71, 75, 77, 76, 66, + 64, 3, 5, 69, 66, 71, 7, 6, 73, 76, 76, 76, 9, + 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 61, 36, 62, 62, 62, 62, 61, 48, 34, + 45, 43, 31, 15, 9, 2, 79, 61, 59, 62, 46, 31, + 34, 26, 24, 32, 28, 20, 6, 17, 3, 72, 23, 28, + 21, 28, 34, 21, 19, 11, 10, 68, 4, 67, 79, + 115, 0, 67, 73, 20, 4, 69, 76, 70, 67, 67, 66, + 65, 6, 67, 66, 65, 4, 14, 11, 16, 1, 61, 58, + 52, 48, 40, 36, 22, 10, 74, 66, 30, 23, 20, + 12, 10, 4, 1, 65, 74, 76, 70, 67, 67, 66, 65, + 6, 67, 66, 65, 4, 14, 11, 16, 1, 61, 58, 52, + 48, 40, 36, 22, 10, 74 }, + + { + + 62, + 8, 75, 62, 8, 75, 111, 91, 14, 10, 9, 31, 49, + 56, 18, 3, 83, 33, 82, 88, 4, 3, 0, 64, 12, + 42, 47, 33, 1, 103, 109, 104, 72, 82, 88, 4, + 89, 80, 23, 6, 82, 94, 104, 67, 69, 86, 99, 0, + 82, 98, 65, 78, 88, 100, 7, 76, 73, 90, 17, 3, + 22, 0, 0, 0, 77, 88, 97, 68, 15, 0, 26, 82, + 115, 106, 81, 87, 69, 71, 82, 81, 109, 68, 92, + 82, 106, 86, 78, 85, 80, 12, 0, 75, 106, 76, + 97, 84, 110, 65, 77, 72, 89, 11, 69, 66, 90, + 83, 81, 79, 70, 64, 67, 80, 0, 5, 68, 4, 68, + 69, 4, 73, 22, 68, 1, 16, 19, 25, 24, 14, 80, + 72, 76, 6, 67, 87, 67, 0, 72, 75, 71, 11, 40, + 14, 77, 80, 72, 69, 31, 38, 98, 66, 65, 81, 3, + 0, 66, 69, 33, 56, 42, 1, 75, 111, 17, 57, 62, + 114, 22, 21, 26, 28, 28, 23, 18, 22, 26, 13, + 20, 14, 7, 4, 0, 15, 13, 14, 12, 5, 9, 19, 9, + 68, 10, 12, 69, 19, 67, 62, 62, 51, 40, 48, + 58, 62, 62, 36, 21, 14, 17, 29, 27, 85, 18, + 22, 4, 37, 19, 19, 24, 28, 27, 34, 32, 8, 93, + 0, 65, 11, 113, 101, 69, 7, 10, 4, 77, 81, 86, + 85, 110, 88, 14, 3, 64, 71, 73, 79, 91, 89, + 106, 76, 20, 7, 2, 66, 67, 73, 77, 85, 97, 76, + 7, 66, 7, 5, 70, 77, 78, 91, 73, 29, 9, 9, 6, + 64, 70, 78, 77, 81, 62, 114, 112, 105, 111, + 103, 104, 103, 102, 99, 102, 102, 103, 88, 93, + 93, 98, 98, 89, 66, 83, 86, 84, 75, 71, 72, + 66, 77, 66, 65, 67, 68, 2, 68, 77, 65, 68, 75, + 5, 2, 66, 2, 11, 1, 71, 74, 75, 76, 66, 0, 2, + 5, 69, 66, 70, 7, 6, 72, 75, 75, 75, 7, 62, + 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, + 62, 58, 34, 62, 62, 62, 62, 58, 46, 33, 43, + 41, 30, 14, 8, 1, 79, 59, 57, 60, 44, 29, 32, + 25, 22, 30, 26, 18, 4, 15, 1, 73, 22, 27, 19, + 27, 32, 20, 17, 10, 9, 69, 3, 67, 79, 114, 64, + 68, 75, 19, 3, 70, 76, 69, 66, 66, 64, 64, 7, + 67, 65, 64, 5, 15, 12, 17, 2, 60, 57, 50, 46, + 38, 34, 20, 8, 75, 65, 30, 24, 21, 13, 11, 5, + 2, 64, 73, 76, 69, 66, 66, 64, 64, 7, 67, 65, + 64, 5, 15, 12, 17, 2, 60, 57, 50, 46, 38, 34, + 20, 8, 75 }, + + { + + 62, + 8, 75, 62, 8, 75, 109, 89, 14, 10, 8, 29, 48, + 55, 19, 3, 82, 31, 82, 87, 3, 3, 1, 65, 11, + 41, 45, 30, 65, 104, 107, 103, 72, 82, 87, 3, + 89, 79, 23, 6, 82, 93, 102, 68, 70, 86, 99, 0, + 81, 97, 66, 78, 88, 100, 7, 76, 73, 89, 18, 3, + 22, 0, 0, 0, 77, 88, 97, 67, 14, 0, 25, 82, + 114, 104, 80, 86, 68, 70, 81, 80, 107, 67, 91, + 81, 103, 86, 79, 85, 81, 12, 0, 74, 103, 75, + 97, 84, 108, 65, 77, 72, 89, 11, 69, 66, 89, + 83, 80, 78, 70, 64, 67, 79, 0, 5, 69, 4, 68, + 69, 4, 73, 22, 68, 1, 17, 19, 25, 24, 14, 81, + 72, 76, 6, 67, 86, 67, 1, 71, 75, 69, 13, 41, + 15, 75, 80, 73, 69, 32, 38, 99, 66, 64, 82, 4, + 0, 66, 68, 33, 56, 42, 3, 76, 109, 16, 54, 62, + 113, 22, 21, 25, 28, 27, 23, 17, 22, 25, 12, + 19, 14, 7, 4, 0, 14, 13, 13, 11, 5, 9, 18, 9, + 69, 9, 11, 69, 18, 68, 60, 62, 49, 38, 47, 56, + 62, 62, 35, 19, 12, 15, 27, 25, 86, 17, 21, 3, + 36, 18, 18, 22, 27, 25, 32, 31, 6, 93, 64, 66, + 9, 112, 100, 68, 7, 10, 4, 76, 80, 85, 85, + 108, 86, 15, 4, 64, 70, 71, 78, 89, 88, 103, + 75, 20, 7, 2, 65, 66, 72, 77, 84, 96, 76, 8, + 65, 7, 5, 69, 77, 78, 90, 72, 29, 10, 9, 6, 0, + 69, 77, 77, 80, 62, 113, 111, 104, 110, 102, + 102, 102, 100, 98, 100, 100, 100, 87, 92, 92, + 97, 95, 88, 67, 82, 85, 83, 75, 71, 72, 66, + 76, 67, 66, 67, 68, 3, 68, 77, 65, 68, 74, 4, + 1, 66, 1, 10, 0, 71, 74, 74, 77, 67, 1, 2, 4, + 69, 66, 70, 6, 5, 72, 75, 75, 75, 6, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, + 56, 32, 62, 62, 62, 62, 55, 44, 31, 41, 38, + 28, 13, 7, 0, 79, 57, 54, 57, 42, 27, 30, 23, + 20, 28, 24, 16, 2, 13, 0, 75, 21, 26, 17, 25, + 31, 18, 15, 9, 8, 70, 2, 68, 79, 113, 66, 69, + 76, 17, 2, 71, 76, 69, 66, 65, 0, 0, 7, 66, + 64, 0, 6, 15, 13, 17, 2, 60, 55, 48, 44, 36, + 32, 17, 6, 76, 65, 31, 24, 21, 14, 12, 5, 2, + 64, 72, 76, 69, 66, 65, 0, 0, 7, 66, 64, 0, 6, + 15, 13, 17, 2, 60, 55, 48, 44, 36, 32, 17, 6, + 76 }, + + { + + 62, + 8, 76, 62, 8, 76, 107, 88, 15, 10, 8, 28, 46, + 54, 19, 4, 80, 30, 82, 85, 3, 2, 2, 66, 10, + 40, 44, 26, 67, 105, 105, 101, 72, 82, 85, 3, + 88, 78, 24, 6, 81, 92, 101, 70, 71, 86, 100, + 0, 81, 97, 66, 79, 87, 100, 6, 76, 73, 88, 18, + 3, 22, 0, 0, 0, 76, 88, 97, 67, 14, 64, 25, + 81, 113, 102, 79, 85, 66, 68, 80, 79, 105, 66, + 90, 81, 101, 87, 79, 85, 81, 13, 0, 73, 101, + 75, 96, 83, 106, 65, 77, 72, 89, 12, 70, 65, + 89, 83, 79, 78, 69, 64, 66, 78, 1, 5, 69, 4, + 68, 69, 4, 73, 22, 68, 1, 17, 19, 26, 25, 15, + 81, 71, 75, 5, 67, 86, 67, 2, 71, 74, 68, 14, + 41, 17, 74, 81, 74, 68, 32, 38, 99, 67, 64, + 82, 4, 64, 65, 67, 33, 56, 43, 4, 77, 108, 15, + 51, 62, 111, 22, 21, 25, 27, 27, 22, 17, 21, + 25, 12, 19, 14, 6, 4, 64, 14, 12, 12, 11, 5, + 9, 18, 8, 71, 9, 10, 69, 17, 68, 57, 62, 47, + 37, 45, 54, 62, 61, 33, 17, 10, 14, 25, 23, + 88, 16, 19, 1, 34, 17, 17, 21, 25, 23, 30, 29, + 5, 93, 65, 67, 8, 110, 98, 66, 8, 10, 4, 75, + 80, 85, 84, 107, 85, 16, 5, 0, 70, 70, 77, 88, + 87, 101, 75, 21, 8, 3, 65, 65, 72, 76, 83, 95, + 76, 10, 64, 7, 5, 69, 76, 78, 90, 72, 30, 10, + 10, 6, 1, 69, 77, 76, 80, 62, 111, 109, 102, + 108, 100, 101, 100, 99, 96, 98, 98, 98, 87, + 91, 92, 97, 92, 88, 68, 82, 85, 82, 75, 71, + 72, 66, 76, 67, 67, 67, 68, 3, 68, 77, 65, 68, + 74, 4, 1, 66, 0, 9, 0, 71, 74, 73, 78, 67, 2, + 1, 4, 69, 66, 69, 6, 5, 71, 74, 75, 75, 4, 62, + 61, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, + 62, 53, 30, 62, 62, 62, 62, 53, 41, 29, 39, + 36, 26, 12, 6, 64, 79, 55, 52, 55, 40, 25, 29, + 22, 18, 26, 21, 14, 0, 11, 65, 76, 20, 24, 15, + 24, 29, 17, 13, 7, 6, 71, 1, 69, 80, 112, 67, + 71, 78, 16, 0, 72, 75, 68, 65, 65, 1, 1, 8, + 66, 0, 1, 7, 16, 14, 18, 3, 59, 53, 46, 42, + 34, 30, 15, 4, 77, 64, 31, 25, 22, 14, 13, 6, + 3, 0, 72, 75, 68, 65, 65, 1, 1, 8, 66, 0, 1, + 7, 16, 14, 18, 3, 59, 53, 46, 42, 34, 30, 15, + 4, 77 }, + + { + + 62, + 8, 76, 62, 8, 76, 106, 86, 15, 10, 7, 26, 45, + 53, 20, 4, 78, 28, 82, 84, 3, 2, 3, 67, 8, 38, + 42, 23, 69, 106, 103, 100, 72, 82, 84, 3, 88, + 77, 24, 5, 80, 91, 100, 71, 73, 87, 100, 0, + 81, 96, 66, 79, 87, 100, 6, 76, 73, 88, 19, 3, + 22, 0, 0, 0, 76, 89, 97, 66, 13, 64, 24, 81, + 112, 100, 78, 84, 65, 67, 80, 78, 103, 65, 89, + 80, 99, 88, 79, 85, 81, 13, 64, 72, 99, 75, + 95, 82, 104, 65, 77, 72, 89, 12, 70, 65, 88, + 83, 79, 78, 69, 64, 66, 77, 1, 5, 69, 3, 68, + 69, 4, 73, 22, 68, 1, 18, 19, 26, 25, 15, 82, + 71, 74, 5, 67, 86, 67, 3, 71, 74, 67, 15, 42, + 18, 72, 81, 75, 68, 32, 38, 100, 67, 0, 83, 5, + 64, 65, 66, 33, 56, 43, 5, 78, 106, 14, 48, + 60, 110, 21, 21, 24, 26, 26, 22, 16, 21, 24, + 11, 18, 13, 6, 4, 64, 13, 11, 11, 10, 4, 8, + 17, 8, 72, 8, 9, 69, 15, 69, 55, 62, 45, 35, + 43, 52, 62, 58, 32, 15, 7, 12, 23, 21, 89, 14, + 18, 64, 33, 16, 16, 20, 24, 21, 28, 27, 3, 93, + 66, 68, 6, 109, 97, 65, 8, 10, 4, 74, 79, 84, + 83, 105, 84, 17, 5, 1, 69, 69, 76, 86, 86, 99, + 74, 21, 8, 3, 64, 64, 71, 76, 83, 94, 76, 11, + 0, 7, 5, 68, 76, 78, 89, 71, 30, 11, 10, 6, 2, + 68, 76, 76, 79, 62, 110, 108, 101, 106, 99, + 99, 99, 97, 95, 97, 96, 96, 86, 90, 91, 96, + 89, 88, 69, 82, 84, 82, 75, 71, 72, 67, 75, + 67, 68, 67, 68, 4, 68, 77, 65, 68, 74, 3, 1, + 66, 64, 8, 0, 71, 74, 72, 79, 67, 2, 0, 4, 69, + 66, 69, 6, 4, 71, 73, 75, 75, 3, 62, 60, 62, + 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 50, + 28, 62, 62, 62, 62, 50, 39, 27, 37, 33, 24, + 10, 5, 65, 79, 52, 50, 53, 38, 23, 27, 20, 16, + 24, 19, 12, 65, 8, 67, 77, 19, 23, 12, 22, 27, + 15, 11, 6, 5, 72, 0, 70, 80, 111, 68, 72, 79, + 15, 64, 73, 75, 68, 65, 64, 2, 1, 9, 66, 1, 2, + 8, 16, 15, 18, 4, 59, 51, 44, 40, 32, 28, 13, + 2, 79, 64, 32, 25, 23, 15, 13, 6, 3, 0, 71, + 75, 68, 65, 64, 2, 1, 9, 66, 1, 2, 8, 16, 15, + 18, 4, 59, 51, 44, 40, 32, 28, 13, 2, 79 }, + + { + + 62, + 8, 76, 62, 8, 76, 104, 85, 15, 10, 7, 24, 43, + 52, 21, 4, 77, 26, 81, 82, 2, 2, 4, 68, 7, 37, + 40, 20, 72, 106, 101, 99, 72, 81, 82, 2, 88, + 76, 24, 5, 80, 90, 98, 72, 74, 87, 100, 0, 80, + 95, 67, 79, 87, 100, 6, 75, 73, 87, 19, 3, 22, + 0, 0, 0, 75, 89, 97, 66, 12, 64, 24, 80, 110, + 97, 77, 83, 0, 65, 79, 77, 101, 64, 88, 79, + 96, 88, 80, 85, 82, 13, 64, 71, 96, 74, 95, + 82, 102, 65, 77, 72, 88, 12, 70, 64, 87, 82, + 78, 77, 69, 0, 65, 76, 2, 5, 70, 3, 67, 69, 4, + 73, 23, 68, 1, 18, 20, 26, 25, 15, 82, 70, 74, + 4, 67, 85, 67, 4, 70, 73, 65, 17, 43, 20, 70, + 82, 76, 67, 33, 38, 101, 67, 0, 83, 6, 64, 65, + 64, 33, 56, 43, 7, 79, 105, 13, 46, 57, 109, + 21, 21, 24, 26, 26, 22, 16, 21, 24, 10, 17, + 13, 6, 4, 64, 13, 11, 10, 10, 4, 8, 16, 7, 73, + 7, 9, 69, 14, 69, 53, 62, 44, 34, 42, 50, 62, + 56, 31, 13, 5, 11, 21, 19, 90, 13, 17, 65, 32, + 15, 15, 18, 22, 19, 26, 26, 2, 93, 67, 69, 4, + 107, 95, 64, 9, 10, 4, 73, 78, 83, 83, 103, + 82, 18, 6, 1, 68, 67, 75, 85, 85, 96, 73, 22, + 9, 4, 64, 0, 70, 75, 82, 93, 75, 12, 1, 7, 5, + 67, 75, 77, 88, 70, 30, 12, 10, 6, 3, 67, 75, + 75, 78, 62, 109, 106, 99, 105, 98, 98, 97, 95, + 93, 95, 94, 93, 85, 89, 90, 95, 86, 87, 70, + 81, 83, 81, 75, 71, 72, 67, 75, 68, 69, 67, + 68, 5, 68, 77, 65, 68, 73, 3, 0, 66, 65, 7, + 64, 71, 74, 71, 79, 68, 3, 0, 3, 69, 66, 69, + 5, 4, 71, 73, 75, 75, 1, 62, 59, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 60, 48, 26, 62, + 62, 62, 62, 47, 37, 26, 35, 31, 22, 9, 4, 66, + 79, 50, 47, 50, 36, 21, 25, 19, 14, 22, 17, + 10, 67, 6, 68, 79, 18, 22, 10, 21, 26, 14, 9, + 5, 4, 73, 64, 71, 80, 110, 70, 73, 81, 13, 65, + 74, 75, 67, 64, 0, 3, 2, 9, 65, 2, 3, 9, 17, + 16, 19, 4, 58, 49, 42, 38, 30, 26, 10, 0, 80, + 0, 32, 26, 23, 16, 14, 7, 4, 0, 70, 75, 67, + 64, 0, 3, 2, 9, 65, 2, 3, 9, 17, 16, 19, 4, + 58, 49, 42, 38, 30, 26, 10, 0, 80 }, + + { + + 61, + 8, 76, 61, 8, 76, 102, 83, 16, 10, 6, 22, 42, + 51, 21, 4, 75, 25, 81, 81, 2, 1, 5, 69, 6, 36, + 38, 16, 74, 107, 99, 97, 72, 81, 81, 2, 87, + 75, 25, 5, 79, 89, 97, 73, 75, 87, 101, 0, 80, + 94, 67, 80, 87, 100, 6, 75, 73, 86, 20, 3, 22, + 0, 0, 0, 75, 89, 97, 65, 11, 64, 23, 80, 109, + 95, 76, 82, 1, 64, 78, 76, 99, 0, 87, 78, 94, + 89, 80, 85, 82, 14, 64, 70, 94, 74, 94, 81, + 100, 65, 77, 72, 88, 13, 70, 64, 86, 82, 77, + 77, 69, 0, 65, 75, 2, 5, 70, 3, 67, 69, 4, 73, + 23, 68, 1, 19, 20, 27, 25, 16, 83, 70, 73, 4, + 67, 85, 67, 5, 70, 73, 64, 18, 44, 21, 68, 82, + 77, 67, 33, 38, 102, 67, 1, 84, 6, 64, 65, 0, + 33, 56, 43, 8, 80, 103, 12, 43, 54, 107, 21, + 21, 23, 25, 25, 21, 15, 21, 23, 9, 16, 13, 6, + 4, 65, 12, 10, 9, 9, 4, 8, 15, 7, 74, 7, 8, + 69, 13, 70, 51, 60, 42, 32, 40, 48, 62, 53, + 30, 11, 3, 9, 19, 17, 92, 12, 16, 67, 31, 14, + 14, 17, 21, 17, 24, 24, 0, 93, 68, 70, 3, 106, + 94, 1, 9, 10, 4, 72, 77, 82, 82, 101, 81, 19, + 7, 2, 68, 66, 74, 83, 84, 94, 73, 22, 9, 4, 0, + 1, 70, 75, 81, 92, 75, 14, 2, 7, 5, 66, 75, + 77, 87, 69, 31, 13, 11, 6, 4, 67, 74, 75, 78, + 62, 107, 105, 98, 103, 97, 96, 96, 94, 92, 93, + 92, 91, 85, 88, 90, 94, 83, 87, 71, 81, 82, + 80, 75, 71, 72, 67, 74, 68, 70, 67, 68, 5, 68, + 77, 65, 68, 73, 2, 0, 66, 66, 6, 64, 71, 74, + 70, 80, 68, 4, 64, 3, 69, 66, 68, 5, 3, 70, + 72, 75, 75, 0, 62, 58, 61, 61, 61, 62, 62, 62, + 61, 62, 62, 62, 57, 45, 24, 62, 60, 59, 60, + 44, 35, 24, 33, 28, 20, 8, 3, 67, 79, 48, 45, + 48, 34, 19, 23, 17, 12, 20, 15, 8, 69, 4, 70, + 80, 17, 21, 8, 19, 24, 12, 7, 3, 2, 74, 65, + 72, 80, 109, 71, 75, 82, 12, 66, 75, 75, 67, + 64, 0, 4, 3, 10, 65, 3, 4, 10, 17, 17, 19, 5, + 58, 47, 40, 36, 28, 24, 8, 65, 81, 0, 33, 26, + 24, 16, 15, 7, 4, 1, 70, 75, 67, 64, 0, 4, 3, + 10, 65, 3, 4, 10, 17, 17, 19, 5, 58, 47, 40, + 36, 28, 24, 8, 65, 81 }, + + { + + 60, + 8, 76, 60, 8, 76, 100, 82, 16, 10, 6, 20, 40, + 50, 22, 4, 73, 23, 81, 79, 2, 1, 6, 70, 5, 35, + 36, 13, 76, 108, 97, 96, 72, 81, 79, 2, 87, + 74, 25, 5, 78, 88, 96, 74, 76, 87, 101, 0, 80, + 93, 67, 80, 87, 100, 6, 75, 73, 85, 20, 3, 22, + 0, 0, 0, 74, 89, 97, 65, 10, 64, 23, 79, 108, + 93, 75, 81, 3, 1, 77, 75, 97, 1, 86, 77, 92, + 90, 80, 85, 82, 14, 64, 69, 92, 74, 93, 80, + 98, 65, 77, 72, 88, 13, 70, 0, 85, 82, 76, 77, + 69, 0, 64, 74, 3, 5, 70, 3, 67, 69, 4, 73, 23, + 68, 1, 19, 20, 27, 25, 16, 83, 69, 72, 3, 67, + 85, 67, 6, 70, 72, 0, 19, 45, 23, 66, 83, 78, + 66, 33, 38, 103, 67, 1, 84, 7, 64, 65, 1, 33, + 56, 43, 9, 81, 102, 11, 40, 51, 106, 21, 21, + 23, 24, 25, 21, 15, 21, 23, 8, 15, 13, 6, 4, + 65, 12, 9, 8, 9, 4, 8, 14, 6, 75, 6, 7, 69, + 12, 70, 49, 58, 40, 31, 38, 46, 59, 51, 29, 9, + 1, 8, 17, 15, 93, 11, 15, 69, 30, 13, 13, 16, + 19, 15, 22, 22, 64, 93, 69, 71, 1, 104, 92, 2, + 10, 10, 4, 71, 76, 81, 81, 99, 80, 20, 8, 3, + 67, 65, 73, 82, 83, 92, 72, 23, 10, 5, 0, 2, + 69, 74, 80, 91, 75, 15, 3, 7, 5, 65, 74, 77, + 86, 68, 31, 14, 11, 6, 5, 66, 73, 74, 77, 62, + 106, 103, 96, 101, 96, 95, 94, 92, 90, 91, 90, + 89, 84, 87, 89, 93, 80, 87, 72, 81, 81, 79, + 75, 71, 72, 67, 74, 68, 71, 67, 68, 6, 68, 77, + 65, 68, 73, 2, 0, 66, 67, 5, 64, 71, 74, 69, + 81, 68, 5, 65, 3, 69, 66, 68, 5, 3, 70, 71, + 75, 75, 65, 61, 57, 60, 59, 59, 62, 62, 62, + 59, 60, 62, 61, 54, 42, 22, 61, 57, 55, 55, + 41, 33, 22, 31, 26, 18, 7, 2, 68, 79, 46, 43, + 46, 32, 17, 21, 16, 10, 18, 13, 6, 71, 2, 72, + 81, 16, 20, 6, 18, 22, 11, 5, 2, 1, 75, 66, + 73, 80, 108, 72, 76, 84, 11, 67, 76, 75, 66, + 0, 1, 5, 4, 11, 65, 4, 5, 11, 18, 18, 20, 6, + 57, 45, 38, 34, 26, 22, 6, 67, 82, 1, 33, 27, + 25, 17, 16, 8, 5, 1, 69, 75, 66, 0, 1, 5, 4, + 11, 65, 4, 5, 11, 18, 18, 20, 6, 57, 45, 38, + 34, 26, 22, 6, 67, 82 }, + + { + + 58, + 7, 77, 58, 7, 77, 99, 81, 16, 10, 5, 18, 38, + 49, 22, 4, 72, 21, 81, 78, 1, 0, 7, 71, 3, 33, + 34, 9, 79, 109, 95, 95, 72, 81, 78, 1, 87, 74, + 25, 4, 78, 88, 95, 76, 78, 88, 102, 64, 80, + 93, 68, 81, 87, 100, 5, 75, 73, 85, 20, 2, 22, + 0, 0, 0, 74, 90, 97, 65, 9, 65, 22, 79, 107, + 91, 74, 81, 4, 2, 77, 74, 96, 1, 85, 77, 90, + 91, 81, 86, 83, 14, 65, 69, 90, 74, 93, 80, + 97, 65, 77, 72, 88, 13, 71, 0, 85, 82, 76, 77, + 69, 0, 64, 73, 3, 5, 71, 2, 67, 69, 3, 73, 23, + 68, 1, 19, 20, 27, 25, 16, 84, 69, 72, 2, 67, + 85, 68, 6, 70, 72, 1, 20, 45, 24, 65, 84, 80, + 66, 33, 38, 104, 68, 1, 85, 7, 65, 65, 2, 33, + 55, 43, 10, 82, 101, 9, 37, 47, 105, 20, 21, + 22, 23, 24, 20, 14, 20, 22, 7, 14, 12, 5, 3, + 66, 11, 8, 7, 8, 3, 7, 13, 5, 77, 5, 6, 69, + 10, 71, 46, 55, 38, 29, 36, 43, 55, 48, 27, 7, + 65, 6, 14, 13, 95, 9, 13, 71, 28, 12, 12, 14, + 17, 13, 20, 20, 66, 93, 70, 72, 64, 103, 91, + 3, 10, 10, 4, 71, 76, 81, 81, 98, 79, 20, 8, + 3, 67, 64, 72, 81, 83, 90, 72, 23, 10, 5, 0, + 2, 69, 74, 80, 90, 75, 16, 4, 7, 4, 65, 74, + 77, 86, 68, 31, 14, 11, 6, 6, 66, 73, 74, 77, + 62, 105, 102, 95, 100, 95, 94, 93, 91, 89, 90, + 89, 87, 84, 87, 89, 93, 77, 87, 74, 81, 81, + 79, 75, 71, 72, 68, 74, 69, 72, 68, 68, 6, 69, + 77, 66, 68, 73, 1, 64, 67, 68, 4, 65, 72, 74, + 68, 82, 69, 5, 66, 2, 69, 67, 68, 4, 2, 70, + 71, 75, 75, 67, 59, 56, 58, 57, 56, 62, 62, + 62, 56, 57, 62, 58, 50, 39, 20, 57, 53, 51, + 49, 38, 30, 20, 28, 23, 16, 5, 0, 69, 79, 43, + 40, 43, 30, 14, 19, 14, 7, 16, 10, 4, 74, 64, + 74, 83, 14, 18, 3, 16, 20, 9, 3, 0, 64, 76, + 67, 74, 81, 107, 74, 78, 86, 9, 69, 78, 75, + 66, 0, 1, 6, 4, 11, 65, 5, 5, 12, 18, 18, 20, + 6, 56, 43, 36, 31, 23, 20, 3, 69, 84, 1, 33, + 27, 25, 17, 16, 8, 5, 1, 69, 75, 66, 0, 1, 6, + 4, 11, 65, 5, 5, 12, 18, 18, 20, 6, 56, 43, + 36, 31, 23, 20, 3, 69, 84 }, + + { + + 57, + 7, 77, 57, 7, 77, 97, 79, 17, 11, 5, 17, 37, + 49, 23, 5, 70, 20, 80, 76, 1, 0, 9, 71, 2, 32, + 33, 6, 81, 109, 93, 93, 71, 80, 76, 1, 86, 73, + 26, 4, 77, 87, 93, 77, 79, 88, 102, 64, 79, + 92, 68, 81, 86, 99, 5, 74, 72, 84, 21, 2, 22, + 0, 0, 0, 73, 90, 97, 64, 9, 65, 22, 78, 105, + 88, 72, 80, 6, 4, 76, 72, 94, 2, 83, 76, 87, + 91, 81, 86, 83, 15, 65, 68, 87, 73, 92, 79, + 95, 65, 76, 72, 87, 14, 71, 1, 84, 81, 75, 76, + 68, 1, 0, 72, 4, 6, 71, 2, 66, 68, 3, 72, 24, + 67, 1, 20, 21, 28, 26, 17, 84, 68, 71, 2, 67, + 84, 68, 7, 69, 71, 3, 22, 46, 26, 0, 84, 81, + 65, 34, 39, 104, 68, 2, 85, 8, 65, 64, 4, 33, + 55, 44, 12, 83, 99, 8, 35, 44, 103, 20, 21, + 22, 23, 24, 20, 14, 20, 22, 7, 14, 12, 5, 3, + 66, 11, 8, 7, 8, 3, 7, 13, 5, 78, 5, 6, 69, 9, + 71, 44, 53, 37, 28, 35, 41, 52, 46, 26, 6, 67, + 5, 12, 12, 96, 8, 12, 72, 27, 12, 12, 13, 16, + 12, 19, 19, 67, 93, 70, 72, 65, 101, 89, 5, + 11, 10, 4, 70, 75, 80, 80, 96, 77, 21, 9, 4, + 66, 1, 71, 79, 82, 87, 71, 24, 11, 6, 1, 3, + 68, 73, 79, 88, 74, 18, 5, 8, 4, 64, 73, 76, + 85, 67, 32, 15, 12, 7, 7, 65, 72, 73, 76, 62, + 103, 100, 93, 98, 93, 92, 91, 89, 87, 88, 87, + 84, 83, 86, 88, 92, 73, 86, 75, 80, 80, 78, + 74, 71, 71, 68, 73, 69, 72, 68, 68, 7, 69, 77, + 66, 68, 72, 1, 64, 67, 68, 4, 65, 72, 73, 66, + 82, 69, 6, 66, 2, 69, 67, 67, 4, 2, 69, 70, + 74, 74, 68, 58, 55, 57, 56, 54, 60, 60, 59, + 54, 55, 59, 56, 47, 37, 18, 54, 50, 48, 44, + 36, 28, 19, 26, 21, 15, 4, 64, 69, 79, 41, 38, + 41, 28, 12, 18, 13, 5, 15, 8, 3, 76, 66, 75, + 84, 13, 17, 1, 15, 19, 8, 2, 64, 65, 77, 67, + 74, 81, 106, 75, 79, 87, 8, 70, 79, 74, 65, 1, + 2, 8, 5, 12, 64, 7, 6, 13, 19, 19, 21, 7, 56, + 42, 35, 29, 21, 19, 1, 70, 85, 2, 34, 28, 26, + 18, 17, 9, 6, 2, 68, 74, 65, 1, 2, 8, 5, 12, + 64, 7, 6, 13, 19, 19, 21, 7, 56, 42, 35, 29, + 21, 19, 1, 70, 85 }, + + { + + 56, + 7, 77, 56, 7, 77, 95, 78, 17, 11, 5, 15, 35, + 48, 24, 5, 68, 18, 80, 75, 1, 0, 10, 72, 1, + 31, 31, 3, 83, 110, 91, 92, 71, 80, 75, 1, 86, + 72, 26, 4, 76, 86, 92, 78, 80, 88, 102, 64, + 79, 91, 68, 81, 86, 99, 5, 74, 72, 83, 21, 2, + 22, 0, 0, 0, 72, 90, 97, 64, 8, 65, 22, 77, + 104, 86, 71, 79, 8, 6, 75, 71, 92, 3, 82, 75, + 85, 92, 81, 86, 83, 15, 65, 67, 85, 73, 91, + 78, 93, 65, 76, 72, 87, 14, 71, 1, 83, 81, 74, + 76, 68, 1, 0, 71, 5, 6, 71, 2, 66, 68, 3, 72, + 24, 67, 1, 20, 21, 28, 26, 17, 85, 67, 70, 1, + 67, 84, 68, 8, 69, 71, 4, 23, 47, 28, 2, 85, + 82, 65, 34, 39, 105, 68, 2, 86, 9, 65, 64, 5, + 33, 55, 44, 13, 84, 98, 7, 32, 41, 102, 20, + 21, 22, 22, 23, 20, 14, 20, 21, 6, 13, 12, 5, + 3, 66, 10, 7, 6, 8, 3, 7, 12, 4, 79, 4, 5, 69, + 8, 72, 42, 51, 35, 26, 33, 39, 49, 44, 25, 4, + 69, 3, 10, 10, 97, 7, 11, 74, 26, 11, 11, 12, + 14, 10, 17, 17, 69, 93, 71, 73, 67, 100, 88, + 6, 12, 10, 4, 69, 74, 79, 79, 94, 76, 22, 10, + 5, 65, 2, 70, 78, 81, 85, 70, 24, 11, 6, 1, 4, + 67, 72, 78, 87, 74, 19, 6, 8, 4, 0, 72, 76, + 84, 66, 32, 16, 12, 7, 8, 64, 71, 72, 75, 62, + 102, 98, 91, 96, 92, 91, 89, 87, 86, 86, 85, + 82, 82, 85, 87, 91, 70, 86, 76, 80, 79, 77, + 74, 71, 71, 68, 73, 69, 73, 68, 68, 8, 69, 77, + 66, 68, 72, 1, 64, 67, 69, 3, 65, 72, 73, 65, + 83, 69, 7, 67, 2, 69, 67, 67, 4, 2, 69, 69, + 74, 74, 70, 57, 54, 56, 54, 52, 57, 57, 56, + 52, 52, 56, 53, 44, 34, 16, 50, 46, 44, 39, + 33, 26, 17, 24, 19, 13, 3, 65, 70, 79, 39, 36, + 39, 26, 10, 16, 11, 3, 13, 6, 1, 78, 68, 77, + 85, 12, 16, 64, 13, 17, 6, 0, 65, 66, 78, 68, + 75, 81, 105, 76, 80, 89, 7, 71, 80, 74, 65, 2, + 3, 9, 6, 13, 64, 8, 7, 14, 19, 20, 22, 8, 55, + 40, 33, 27, 19, 17, 64, 72, 86, 3, 34, 28, 27, + 19, 18, 9, 7, 2, 67, 74, 65, 2, 3, 9, 6, 13, + 64, 8, 7, 14, 19, 20, 22, 8, 55, 40, 33, 27, + 19, 17, 64, 72, 86 }, + + { + + 55, + 7, 77, 55, 7, 77, 93, 76, 18, 11, 4, 13, 34, + 47, 24, 5, 66, 17, 80, 73, 1, 64, 11, 73, 0, + 30, 29, 64, 85, 111, 89, 90, 71, 80, 73, 1, + 85, 71, 27, 4, 75, 85, 91, 79, 81, 88, 103, + 64, 79, 90, 68, 82, 86, 99, 5, 74, 72, 82, 22, + 2, 22, 0, 0, 0, 72, 90, 97, 0, 7, 65, 21, 77, + 103, 84, 70, 78, 9, 7, 74, 70, 90, 4, 81, 74, + 83, 93, 81, 86, 83, 16, 65, 66, 83, 73, 90, + 77, 91, 65, 76, 72, 87, 15, 71, 2, 82, 81, 73, + 76, 68, 1, 1, 70, 5, 6, 71, 2, 66, 68, 3, 72, + 24, 67, 1, 21, 21, 29, 26, 18, 85, 67, 69, 1, + 67, 84, 68, 9, 69, 70, 5, 24, 48, 29, 4, 85, + 83, 64, 34, 39, 106, 68, 3, 86, 9, 65, 64, 6, + 33, 55, 44, 14, 85, 96, 6, 29, 38, 100, 20, + 21, 21, 21, 23, 19, 13, 20, 21, 5, 12, 12, 5, + 3, 67, 10, 6, 5, 7, 3, 7, 11, 4, 80, 4, 4, 69, + 7, 72, 40, 49, 33, 25, 31, 37, 46, 41, 24, 2, + 71, 2, 8, 8, 99, 6, 10, 76, 25, 10, 10, 11, + 13, 8, 15, 15, 70, 93, 72, 74, 68, 98, 86, 8, + 12, 10, 4, 68, 73, 78, 78, 92, 75, 23, 11, 6, + 65, 3, 69, 76, 80, 83, 70, 25, 12, 7, 2, 5, + 67, 72, 77, 86, 74, 21, 7, 8, 4, 1, 72, 76, + 83, 65, 33, 17, 13, 7, 9, 64, 70, 72, 75, 62, + 100, 97, 90, 94, 91, 89, 88, 86, 84, 84, 83, + 80, 82, 84, 87, 90, 67, 86, 77, 80, 78, 76, + 74, 71, 71, 68, 72, 69, 74, 68, 68, 8, 69, 77, + 66, 68, 72, 0, 64, 67, 70, 2, 65, 72, 73, 64, + 84, 69, 8, 68, 2, 69, 67, 66, 4, 1, 68, 68, + 74, 74, 71, 56, 53, 55, 52, 50, 55, 55, 53, + 49, 49, 53, 50, 41, 31, 14, 46, 43, 40, 34, + 30, 24, 15, 22, 16, 11, 2, 66, 71, 79, 37, 34, + 37, 24, 8, 14, 10, 1, 11, 4, 64, 80, 70, 79, + 86, 11, 15, 66, 12, 15, 5, 65, 67, 68, 79, 69, + 76, 81, 104, 77, 82, 90, 6, 72, 81, 74, 64, 2, + 3, 10, 7, 14, 64, 9, 8, 15, 20, 21, 22, 9, 55, + 38, 31, 25, 17, 15, 66, 74, 87, 3, 35, 29, 28, + 19, 19, 10, 7, 3, 67, 74, 64, 2, 3, 10, 7, 14, + 64, 9, 8, 15, 20, 21, 22, 9, 55, 38, 31, 25, + 17, 15, 66, 74, 87 }, + + { + + 53, + 7, 77, 53, 7, 77, 92, 75, 18, 11, 4, 11, 32, + 46, 25, 5, 65, 15, 80, 72, 0, 64, 12, 74, 65, + 28, 27, 67, 88, 112, 87, 89, 71, 80, 72, 0, + 85, 70, 27, 3, 75, 84, 89, 80, 83, 89, 103, + 64, 78, 89, 69, 82, 86, 99, 5, 74, 72, 82, 22, + 2, 22, 0, 0, 0, 71, 91, 97, 0, 6, 65, 21, 76, + 102, 82, 69, 77, 11, 9, 74, 69, 88, 5, 80, 73, + 80, 93, 82, 86, 84, 16, 66, 65, 80, 72, 90, + 77, 89, 65, 76, 72, 87, 15, 71, 2, 81, 81, 73, + 75, 68, 1, 1, 69, 6, 6, 72, 1, 66, 68, 3, 72, + 24, 67, 1, 21, 21, 29, 26, 18, 86, 66, 69, 0, + 67, 83, 68, 10, 68, 70, 7, 26, 49, 31, 6, 86, + 84, 64, 35, 39, 107, 68, 3, 87, 10, 65, 64, 7, + 33, 55, 44, 16, 86, 95, 5, 26, 35, 99, 19, 21, + 21, 21, 22, 19, 13, 20, 20, 4, 11, 11, 5, 3, + 67, 9, 6, 4, 7, 2, 6, 10, 3, 81, 3, 3, 69, 5, + 73, 38, 47, 31, 23, 30, 35, 42, 39, 23, 0, 74, + 0, 6, 6, 100, 4, 9, 77, 24, 9, 9, 9, 11, 6, + 13, 14, 72, 93, 73, 75, 70, 97, 85, 9, 13, 10, + 4, 67, 72, 77, 78, 90, 73, 24, 11, 6, 64, 5, + 68, 75, 79, 80, 69, 25, 12, 7, 2, 6, 66, 71, + 77, 85, 74, 22, 8, 8, 4, 2, 71, 76, 82, 64, + 33, 18, 13, 7, 10, 0, 69, 71, 74, 62, 99, 95, + 88, 93, 90, 88, 86, 84, 83, 83, 81, 77, 81, + 83, 86, 89, 64, 85, 78, 79, 77, 76, 74, 71, + 71, 69, 72, 70, 75, 68, 68, 9, 69, 77, 66, 68, + 71, 0, 65, 67, 71, 1, 66, 72, 73, 0, 85, 70, + 8, 68, 1, 69, 67, 66, 3, 1, 68, 68, 74, 74, + 73, 55, 52, 54, 51, 47, 52, 52, 50, 47, 46, + 49, 47, 37, 29, 12, 42, 39, 36, 29, 27, 22, + 13, 20, 14, 9, 0, 67, 72, 79, 34, 31, 34, 22, + 6, 12, 8, 64, 9, 2, 66, 82, 73, 80, 88, 10, + 14, 69, 10, 14, 3, 67, 68, 69, 80, 70, 77, 81, + 103, 79, 83, 92, 4, 73, 82, 74, 64, 3, 4, 11, + 7, 14, 0, 10, 9, 16, 20, 22, 23, 9, 54, 36, + 29, 23, 15, 13, 69, 76, 89, 4, 35, 29, 28, 20, + 19, 10, 8, 3, 66, 74, 64, 3, 4, 11, 7, 14, 0, + 10, 9, 16, 20, 22, 23, 9, 54, 36, 29, 23, 15, + 13, 69, 76, 89 }, + + { + + 52, + 7, 77, 52, 7, 77, 90, 73, 18, 11, 3, 9, 31, + 45, 26, 5, 0, 13, 79, 70, 0, 64, 13, 75, 66, + 27, 25, 70, 90, 112, 85, 88, 71, 79, 70, 0, + 85, 69, 27, 3, 74, 83, 88, 81, 84, 89, 103, + 64, 78, 88, 69, 82, 86, 99, 5, 73, 72, 81, 23, + 2, 22, 0, 0, 0, 71, 91, 97, 1, 5, 65, 20, 76, + 100, 79, 68, 76, 12, 10, 73, 68, 86, 6, 79, + 72, 78, 94, 82, 86, 84, 16, 66, 64, 78, 72, + 89, 76, 87, 65, 76, 72, 86, 15, 71, 3, 80, 80, + 72, 75, 68, 2, 2, 68, 6, 6, 72, 1, 65, 68, 3, + 72, 25, 67, 1, 22, 22, 29, 26, 18, 86, 66, 68, + 0, 67, 83, 68, 11, 68, 69, 8, 27, 50, 32, 8, + 86, 85, 0, 35, 39, 108, 68, 4, 87, 11, 65, 64, + 9, 33, 55, 44, 17, 87, 93, 4, 24, 32, 98, 19, + 21, 20, 20, 22, 19, 12, 20, 20, 3, 10, 11, 5, + 3, 67, 9, 5, 3, 6, 2, 6, 9, 3, 82, 2, 3, 69, + 4, 73, 36, 45, 30, 22, 28, 33, 39, 36, 22, 65, + 76, 64, 4, 4, 101, 3, 8, 79, 23, 8, 8, 8, 10, + 4, 11, 12, 73, 93, 74, 76, 72, 95, 83, 10, 13, + 10, 4, 66, 71, 76, 77, 88, 72, 25, 12, 7, 0, + 6, 67, 73, 78, 78, 68, 26, 13, 8, 3, 7, 65, + 71, 76, 84, 73, 23, 9, 8, 4, 3, 71, 75, 81, 0, + 33, 19, 13, 7, 11, 1, 68, 71, 73, 62, 98, 94, + 87, 91, 89, 86, 85, 82, 81, 81, 79, 75, 80, + 82, 85, 88, 2, 85, 79, 79, 76, 75, 74, 71, 71, + 69, 71, 70, 76, 68, 68, 10, 69, 77, 66, 68, + 71, 64, 65, 67, 72, 0, 66, 72, 73, 1, 85, 70, + 9, 69, 1, 69, 67, 66, 3, 0, 68, 67, 74, 74, + 74, 54, 51, 53, 49, 45, 50, 49, 47, 44, 43, + 46, 44, 34, 26, 10, 38, 36, 32, 24, 24, 20, + 12, 18, 11, 7, 64, 68, 73, 79, 32, 29, 32, 20, + 4, 10, 7, 66, 7, 0, 68, 84, 75, 82, 89, 9, 13, + 71, 9, 12, 2, 69, 69, 70, 81, 71, 78, 81, 102, + 80, 84, 93, 3, 74, 83, 74, 0, 3, 5, 12, 8, 15, + 0, 11, 10, 17, 21, 23, 23, 10, 54, 34, 27, 21, + 13, 11, 71, 78, 90, 4, 36, 30, 29, 21, 20, 11, + 8, 3, 65, 74, 0, 3, 5, 12, 8, 15, 0, 11, 10, + 17, 21, 23, 23, 10, 54, 34, 27, 21, 13, 11, + 71, 78, 90 }, + + { + + 51, + 7, 78, 51, 7, 78, 88, 72, 19, 11, 3, 8, 29, + 44, 26, 6, 2, 12, 79, 69, 0, 65, 14, 76, 67, + 26, 24, 74, 92, 113, 83, 86, 71, 79, 69, 0, + 84, 68, 28, 3, 73, 82, 87, 83, 85, 89, 104, + 64, 78, 88, 69, 83, 85, 99, 4, 73, 72, 80, 23, + 2, 22, 0, 0, 0, 70, 91, 97, 1, 5, 66, 20, 75, + 99, 77, 67, 75, 14, 12, 72, 67, 84, 7, 78, 72, + 76, 95, 82, 86, 84, 17, 66, 0, 76, 72, 88, 75, + 85, 65, 76, 72, 86, 16, 72, 3, 80, 80, 71, 75, + 67, 2, 2, 67, 7, 6, 72, 1, 65, 68, 3, 72, 25, + 67, 1, 22, 22, 30, 27, 19, 87, 65, 67, 64, 67, + 83, 68, 12, 68, 69, 9, 28, 50, 34, 9, 87, 86, + 0, 35, 39, 108, 69, 4, 88, 11, 66, 0, 10, 33, + 55, 45, 18, 88, 92, 3, 21, 29, 96, 19, 21, 20, + 19, 21, 18, 12, 19, 19, 3, 10, 11, 4, 3, 68, + 8, 4, 2, 6, 2, 6, 9, 2, 84, 2, 2, 69, 3, 74, + 33, 43, 28, 20, 26, 31, 36, 34, 20, 67, 78, + 66, 2, 2, 103, 2, 6, 81, 21, 7, 7, 7, 8, 2, 9, + 10, 75, 93, 75, 77, 73, 94, 82, 12, 14, 10, 4, + 65, 71, 76, 76, 87, 71, 26, 13, 8, 0, 7, 66, + 72, 77, 76, 68, 26, 13, 8, 3, 8, 65, 70, 75, + 83, 73, 25, 10, 8, 4, 3, 70, 75, 81, 0, 34, + 19, 14, 7, 12, 1, 68, 70, 73, 62, 96, 92, 85, + 89, 87, 85, 83, 81, 80, 79, 77, 73, 80, 81, + 85, 88, 5, 85, 80, 79, 76, 74, 74, 71, 71, 69, + 71, 70, 77, 68, 68, 10, 69, 77, 66, 68, 71, + 64, 65, 67, 73, 64, 66, 72, 73, 2, 86, 70, 10, + 70, 1, 69, 67, 65, 3, 0, 67, 66, 74, 74, 76, + 53, 50, 52, 47, 43, 47, 47, 44, 42, 40, 43, + 41, 31, 23, 8, 35, 32, 28, 19, 22, 17, 10, 16, + 9, 5, 65, 69, 74, 79, 30, 27, 30, 18, 2, 9, 5, + 68, 5, 66, 70, 86, 77, 84, 90, 8, 11, 73, 7, + 10, 0, 71, 71, 72, 82, 72, 79, 82, 101, 81, + 86, 95, 2, 76, 84, 73, 0, 4, 5, 13, 9, 16, 0, + 12, 11, 18, 21, 24, 24, 11, 53, 32, 25, 19, + 11, 9, 73, 80, 91, 5, 36, 30, 30, 21, 21, 11, + 9, 4, 65, 73, 0, 4, 5, 13, 9, 16, 0, 12, 11, + 18, 21, 24, 24, 11, 53, 32, 25, 19, 11, 9, 73, + 80, 91 }, + + { + + 50, + 7, 78, 50, 7, 78, 86, 70, 19, 11, 2, 6, 28, + 43, 27, 6, 3, 10, 79, 67, 64, 65, 15, 77, 68, + 25, 22, 77, 95, 114, 81, 85, 71, 79, 67, 64, + 84, 67, 28, 3, 73, 81, 85, 84, 86, 89, 104, + 64, 77, 87, 70, 83, 85, 99, 4, 73, 72, 79, 24, + 2, 22, 0, 0, 0, 70, 91, 97, 2, 4, 66, 19, 75, + 98, 75, 66, 74, 15, 13, 71, 66, 82, 8, 77, 71, + 73, 95, 83, 86, 85, 17, 66, 1, 73, 71, 88, 75, + 83, 65, 76, 72, 86, 16, 72, 4, 79, 80, 70, 74, + 67, 2, 3, 66, 7, 6, 73, 1, 65, 68, 3, 72, 25, + 67, 1, 23, 22, 30, 27, 19, 87, 65, 67, 64, 67, + 82, 68, 13, 67, 68, 11, 30, 51, 35, 11, 87, + 87, 1, 36, 39, 109, 69, 5, 88, 12, 66, 0, 11, + 33, 55, 45, 20, 89, 90, 2, 18, 26, 95, 19, 21, + 19, 19, 21, 18, 11, 19, 19, 2, 9, 11, 4, 3, + 68, 8, 4, 1, 5, 2, 6, 8, 2, 85, 1, 1, 69, 2, + 74, 31, 41, 26, 19, 25, 29, 33, 31, 19, 69, + 80, 67, 0, 0, 104, 1, 5, 82, 20, 6, 6, 5, 7, + 0, 7, 9, 76, 93, 76, 78, 75, 92, 80, 13, 14, + 10, 4, 64, 70, 75, 76, 85, 69, 27, 14, 8, 1, + 9, 65, 70, 76, 73, 67, 27, 14, 9, 4, 9, 64, + 70, 74, 82, 73, 26, 11, 8, 4, 4, 70, 75, 80, + 1, 34, 20, 14, 7, 13, 2, 67, 70, 72, 62, 95, + 91, 84, 88, 86, 83, 82, 79, 78, 77, 75, 70, + 79, 80, 84, 87, 8, 84, 81, 78, 75, 73, 74, 71, + 71, 69, 70, 71, 78, 68, 68, 11, 69, 77, 66, + 68, 70, 65, 66, 67, 74, 65, 67, 72, 73, 3, 87, + 71, 11, 70, 0, 69, 67, 65, 2, 64, 67, 66, 74, + 74, 77, 52, 49, 51, 46, 40, 45, 44, 41, 39, + 37, 40, 38, 28, 21, 6, 31, 29, 24, 14, 19, 15, + 8, 14, 6, 3, 66, 70, 75, 79, 28, 24, 27, 16, + 0, 7, 4, 70, 3, 68, 72, 88, 79, 85, 92, 7, 10, + 75, 6, 9, 64, 73, 72, 73, 83, 73, 80, 82, 100, + 83, 87, 96, 0, 77, 85, 73, 1, 4, 6, 14, 10, + 16, 1, 13, 12, 19, 22, 25, 24, 11, 53, 30, 23, + 17, 9, 7, 76, 82, 92, 5, 37, 31, 30, 22, 22, + 12, 9, 4, 64, 73, 1, 4, 6, 14, 10, 16, 1, 13, + 12, 19, 22, 25, 24, 11, 53, 30, 23, 17, 9, 7, + 76, 82, 92 }, + + { + + 48, + 6, 78, 48, 6, 78, 85, 69, 19, 11, 2, 4, 26, + 42, 27, 6, 5, 8, 79, 66, 64, 66, 16, 78, 70, + 23, 20, 81, 97, 115, 79, 84, 71, 79, 66, 64, + 84, 67, 28, 2, 72, 80, 84, 85, 88, 90, 105, + 64, 77, 86, 70, 84, 85, 99, 4, 73, 72, 79, 24, + 2, 22, 0, 0, 0, 69, 92, 97, 2, 3, 66, 19, 74, + 97, 73, 65, 74, 17, 15, 71, 65, 80, 8, 76, 70, + 71, 96, 83, 87, 85, 17, 67, 1, 71, 71, 87, 74, + 82, 65, 76, 72, 86, 16, 72, 4, 78, 80, 70, 74, + 67, 2, 3, 65, 8, 6, 73, 0, 65, 68, 2, 72, 25, + 67, 1, 23, 22, 30, 27, 19, 88, 64, 66, 65, 67, + 82, 68, 14, 67, 68, 12, 31, 52, 37, 13, 88, + 88, 1, 36, 39, 110, 69, 5, 89, 12, 66, 0, 12, + 33, 55, 45, 21, 90, 89, 1, 15, 22, 94, 18, 21, + 19, 18, 20, 17, 11, 19, 18, 1, 8, 10, 4, 2, + 69, 7, 3, 0, 5, 1, 5, 7, 1, 86, 0, 0, 69, 0, + 75, 29, 39, 24, 17, 23, 26, 29, 29, 18, 71, + 83, 69, 66, 65, 106, 64, 4, 84, 19, 5, 5, 4, + 5, 65, 5, 7, 78, 93, 77, 79, 77, 91, 79, 14, + 15, 10, 4, 64, 69, 74, 75, 83, 68, 27, 14, 9, + 1, 10, 64, 69, 75, 71, 67, 27, 14, 9, 4, 9, + 64, 69, 74, 81, 73, 27, 12, 8, 4, 5, 69, 75, + 79, 2, 34, 21, 14, 7, 14, 2, 66, 69, 72, 62, + 94, 89, 82, 86, 85, 82, 80, 78, 77, 76, 73, + 68, 79, 79, 84, 86, 11, 84, 82, 78, 74, 73, + 74, 71, 71, 70, 70, 71, 79, 68, 68, 11, 69, + 77, 67, 68, 70, 65, 66, 68, 75, 66, 67, 73, + 73, 4, 88, 71, 11, 71, 0, 69, 68, 65, 2, 64, + 67, 65, 74, 74, 79, 51, 48, 50, 44, 38, 42, + 41, 38, 37, 34, 36, 35, 24, 18, 4, 27, 25, 20, + 9, 16, 13, 6, 11, 4, 1, 68, 72, 76, 79, 25, + 22, 25, 14, 66, 5, 2, 73, 1, 70, 74, 90, 82, + 87, 93, 5, 9, 78, 4, 7, 66, 75, 74, 75, 84, + 74, 81, 82, 99, 84, 89, 98, 64, 78, 86, 73, 1, + 5, 6, 15, 10, 17, 1, 14, 12, 20, 22, 25, 25, + 12, 52, 28, 21, 15, 7, 5, 78, 84, 94, 6, 37, + 31, 31, 22, 22, 12, 10, 4, 64, 73, 1, 5, 6, + 15, 10, 17, 1, 14, 12, 20, 22, 25, 25, 12, 52, + 28, 21, 15, 7, 5, 78, 84, 94 }, + + { + + 47, + 6, 78, 47, 6, 78, 83, 68, 20, 11, 2, 2, 24, + 42, 28, 6, 7, 7, 78, 64, 64, 66, 17, 78, 71, + 22, 18, 84, 99, 115, 77, 82, 70, 78, 64, 64, + 83, 66, 29, 2, 71, 79, 83, 86, 89, 90, 105, + 64, 77, 85, 70, 84, 85, 99, 4, 72, 71, 78, 24, + 2, 22, 0, 0, 0, 68, 92, 97, 2, 2, 66, 19, 73, + 95, 70, 64, 73, 19, 17, 70, 64, 78, 9, 74, 69, + 69, 97, 83, 87, 85, 18, 67, 2, 69, 71, 86, 73, + 80, 65, 75, 72, 85, 17, 72, 5, 77, 79, 69, 74, + 67, 3, 4, 64, 9, 6, 73, 0, 64, 67, 2, 72, 26, + 67, 1, 23, 23, 31, 27, 20, 88, 0, 65, 66, 67, + 82, 68, 15, 67, 67, 13, 32, 53, 39, 15, 89, + 89, 2, 36, 40, 111, 69, 5, 89, 13, 66, 0, 14, + 33, 55, 45, 22, 91, 88, 0, 13, 19, 92, 18, 21, + 19, 17, 20, 17, 11, 19, 18, 0, 7, 10, 4, 2, + 69, 7, 2, 0, 5, 1, 5, 6, 0, 87, 0, 0, 69, 64, + 75, 27, 37, 23, 16, 21, 24, 26, 27, 17, 73, + 85, 70, 68, 66, 107, 65, 3, 86, 18, 4, 4, 3, + 3, 66, 3, 5, 79, 93, 77, 80, 78, 89, 77, 16, + 16, 10, 4, 0, 68, 73, 74, 81, 67, 28, 15, 10, + 2, 11, 0, 68, 74, 69, 66, 28, 15, 10, 4, 10, + 0, 68, 73, 79, 72, 29, 13, 9, 4, 6, 68, 74, + 78, 3, 35, 22, 15, 8, 15, 3, 65, 68, 71, 62, + 92, 87, 80, 84, 84, 81, 78, 76, 75, 74, 71, + 66, 78, 78, 83, 85, 15, 84, 83, 78, 73, 72, + 73, 71, 71, 70, 70, 71, 80, 68, 68, 12, 69, + 77, 67, 68, 70, 65, 66, 68, 75, 67, 67, 73, + 72, 6, 88, 71, 12, 72, 0, 69, 68, 64, 2, 64, + 66, 64, 73, 73, 81, 50, 47, 49, 42, 36, 39, + 39, 35, 35, 32, 33, 33, 21, 15, 2, 23, 22, 17, + 4, 13, 11, 5, 9, 2, 0, 69, 73, 77, 79, 23, 20, + 23, 12, 68, 3, 1, 75, 64, 72, 76, 92, 84, 89, + 94, 4, 8, 80, 3, 5, 67, 77, 75, 76, 85, 75, + 81, 82, 98, 85, 90, 100, 65, 79, 87, 73, 2, 6, + 7, 17, 11, 18, 1, 15, 13, 21, 23, 26, 26, 13, + 51, 27, 19, 13, 5, 3, 80, 86, 95, 7, 37, 32, + 32, 23, 23, 13, 11, 5, 0, 73, 2, 6, 7, 17, 11, + 18, 1, 15, 13, 21, 23, 26, 26, 13, 51, 27, 19, + 13, 5, 3, 80, 86, 95 }, + + { + + 46, + 6, 78, 46, 6, 78, 81, 66, 20, 11, 1, 0, 23, + 41, 29, 6, 8, 5, 78, 0, 65, 66, 18, 79, 72, + 21, 16, 87, 102, 116, 75, 81, 70, 78, 0, 65, + 83, 65, 29, 2, 71, 78, 81, 87, 90, 90, 105, + 64, 76, 84, 71, 84, 85, 99, 4, 72, 71, 77, 25, + 2, 22, 0, 0, 0, 68, 92, 97, 3, 1, 66, 18, 73, + 94, 68, 0, 72, 20, 18, 69, 0, 76, 10, 73, 68, + 66, 97, 84, 87, 86, 18, 67, 3, 66, 70, 86, 73, + 78, 65, 75, 72, 85, 17, 72, 5, 76, 79, 68, 73, + 67, 3, 4, 0, 9, 6, 74, 0, 64, 67, 2, 72, 26, + 67, 1, 24, 23, 31, 27, 20, 89, 0, 65, 66, 67, + 81, 68, 16, 66, 67, 15, 34, 54, 40, 17, 89, + 90, 2, 37, 40, 112, 69, 6, 90, 14, 66, 0, 15, + 33, 55, 45, 24, 92, 86, 64, 10, 16, 91, 18, + 21, 18, 17, 19, 17, 10, 19, 17, 64, 6, 10, 4, + 2, 69, 6, 2, 64, 4, 1, 5, 5, 0, 88, 64, 64, + 69, 65, 76, 25, 35, 21, 14, 20, 22, 23, 24, + 16, 75, 87, 72, 70, 68, 108, 66, 2, 87, 17, 3, + 3, 1, 2, 68, 1, 4, 81, 93, 78, 81, 80, 88, 76, + 17, 16, 10, 4, 1, 67, 72, 74, 79, 65, 29, 16, + 10, 3, 13, 1, 66, 73, 66, 65, 28, 15, 10, 5, + 11, 1, 68, 72, 78, 72, 30, 14, 9, 4, 7, 68, + 74, 77, 4, 35, 23, 15, 8, 16, 4, 64, 68, 70, + 62, 91, 86, 79, 83, 83, 79, 77, 74, 74, 72, + 69, 0, 77, 77, 82, 84, 18, 83, 84, 77, 72, 71, + 73, 71, 71, 70, 69, 72, 81, 68, 68, 13, 69, + 77, 67, 68, 69, 66, 67, 68, 76, 68, 68, 73, + 72, 7, 89, 72, 13, 72, 64, 69, 68, 64, 1, 65, + 66, 64, 73, 73, 82, 49, 46, 48, 41, 33, 37, + 36, 32, 32, 29, 30, 30, 18, 13, 0, 19, 18, 13, + 64, 10, 9, 3, 7, 64, 65, 70, 74, 78, 79, 21, + 17, 20, 10, 70, 1, 64, 77, 66, 74, 78, 94, 86, + 90, 96, 3, 7, 82, 1, 4, 69, 79, 76, 77, 86, + 76, 82, 82, 97, 87, 91, 101, 67, 80, 88, 73, + 2, 6, 8, 18, 12, 18, 2, 16, 14, 22, 23, 27, + 26, 13, 51, 25, 17, 11, 3, 1, 83, 88, 96, 7, + 38, 32, 32, 24, 24, 13, 11, 5, 1, 73, 2, 6, 8, + 18, 12, 18, 2, 16, 14, 22, 23, 27, 26, 13, 51, + 25, 17, 11, 3, 1, 83, 88, 96 }, + + { + + 45, + 6, 79, 45, 6, 79, 79, 65, 21, 11, 1, 64, 21, + 40, 29, 7, 10, 4, 78, 2, 65, 67, 19, 80, 73, + 20, 15, 91, 104, 117, 73, 79, 70, 78, 2, 65, + 82, 64, 30, 2, 70, 77, 80, 89, 91, 90, 106, + 64, 76, 84, 71, 85, 84, 99, 3, 72, 71, 76, 25, + 2, 22, 0, 0, 0, 67, 92, 97, 3, 1, 67, 18, 72, + 93, 66, 1, 71, 22, 20, 68, 1, 74, 11, 72, 68, + 64, 98, 84, 87, 86, 19, 67, 4, 64, 70, 85, 72, + 76, 65, 75, 72, 85, 18, 73, 6, 76, 79, 67, 73, + 66, 3, 5, 1, 10, 6, 74, 0, 64, 67, 2, 72, 26, + 67, 1, 24, 23, 32, 28, 21, 89, 1, 64, 67, 67, + 81, 68, 17, 66, 66, 16, 35, 54, 42, 18, 90, + 91, 3, 37, 40, 112, 70, 6, 90, 14, 67, 1, 16, + 33, 55, 46, 25, 93, 85, 65, 7, 13, 89, 18, 21, + 18, 16, 19, 16, 10, 18, 17, 64, 6, 10, 3, 2, + 70, 6, 1, 65, 4, 1, 5, 5, 64, 90, 64, 65, 69, + 66, 76, 22, 33, 19, 13, 18, 20, 20, 22, 14, + 77, 89, 73, 72, 70, 110, 67, 0, 89, 15, 2, 2, + 0, 0, 70, 64, 2, 82, 93, 79, 82, 81, 86, 74, + 19, 17, 10, 4, 2, 67, 72, 73, 78, 64, 30, 17, + 11, 3, 14, 2, 65, 72, 64, 65, 29, 16, 11, 5, + 12, 1, 67, 71, 77, 72, 32, 15, 9, 4, 7, 67, + 74, 77, 4, 36, 23, 16, 8, 17, 4, 64, 67, 70, + 62, 89, 84, 77, 81, 81, 78, 75, 73, 72, 70, + 67, 2, 77, 76, 82, 84, 21, 83, 85, 77, 72, 70, + 73, 71, 71, 70, 69, 72, 82, 68, 68, 13, 69, + 77, 67, 68, 69, 66, 67, 68, 77, 69, 68, 73, + 72, 8, 90, 72, 14, 73, 64, 69, 68, 0, 1, 65, + 65, 0, 73, 73, 84, 48, 45, 47, 39, 31, 34, 34, + 29, 30, 26, 27, 27, 15, 10, 65, 16, 15, 9, 69, + 8, 6, 1, 5, 66, 67, 71, 75, 79, 79, 19, 15, + 18, 8, 72, 0, 65, 79, 68, 77, 80, 96, 88, 92, + 97, 2, 5, 84, 0, 2, 70, 81, 78, 79, 87, 77, + 83, 83, 96, 88, 93, 103, 68, 82, 89, 72, 3, 7, + 8, 19, 13, 19, 2, 17, 15, 23, 24, 28, 27, 14, + 50, 23, 15, 9, 1, 64, 85, 90, 97, 8, 38, 33, + 33, 24, 25, 14, 12, 6, 1, 72, 3, 7, 8, 19, 13, + 19, 2, 17, 15, 23, 24, 28, 27, 14, 50, 23, 15, + 9, 1, 64, 85, 90, 97 }, + + { + + 43, + 6, 79, 43, 6, 79, 78, 0, 21, 11, 0, 66, 20, + 39, 30, 7, 12, 2, 78, 3, 65, 67, 20, 81, 75, + 18, 13, 94, 106, 118, 71, 78, 70, 78, 3, 65, + 82, 0, 30, 1, 69, 76, 79, 90, 93, 91, 106, 64, + 76, 83, 71, 85, 84, 99, 3, 72, 71, 76, 26, 2, + 22, 0, 0, 0, 67, 93, 97, 4, 0, 67, 17, 72, 92, + 64, 2, 70, 23, 21, 68, 2, 72, 12, 71, 67, 1, + 99, 84, 87, 86, 19, 68, 5, 1, 70, 84, 71, 74, + 65, 75, 72, 85, 18, 73, 6, 75, 79, 67, 73, 66, + 3, 5, 2, 10, 6, 74, 64, 64, 67, 2, 72, 26, 67, + 1, 25, 23, 32, 28, 21, 90, 1, 0, 67, 67, 81, + 68, 18, 66, 66, 17, 36, 55, 43, 20, 90, 92, 3, + 37, 40, 113, 70, 7, 91, 15, 67, 1, 17, 33, 55, + 46, 26, 94, 83, 66, 4, 10, 88, 17, 21, 17, 15, + 18, 16, 9, 18, 16, 65, 5, 9, 3, 2, 70, 5, 0, + 66, 3, 0, 4, 4, 64, 91, 65, 66, 69, 68, 77, + 20, 31, 17, 11, 16, 18, 16, 19, 13, 79, 92, + 75, 74, 72, 111, 69, 64, 91, 14, 1, 1, 64, 64, + 72, 66, 0, 84, 93, 80, 83, 83, 85, 73, 20, 17, + 10, 4, 3, 66, 71, 72, 76, 0, 31, 17, 12, 4, + 15, 3, 0, 71, 1, 64, 29, 16, 11, 6, 13, 2, 67, + 71, 76, 72, 33, 16, 9, 4, 8, 67, 74, 76, 5, + 36, 24, 16, 8, 18, 5, 0, 67, 69, 62, 88, 83, + 76, 79, 80, 76, 74, 71, 71, 69, 65, 4, 76, 75, + 81, 83, 24, 83, 86, 77, 71, 70, 73, 71, 71, + 71, 68, 72, 83, 68, 68, 14, 69, 77, 67, 68, + 69, 67, 67, 68, 78, 70, 68, 73, 72, 9, 91, 72, + 14, 74, 64, 69, 68, 0, 1, 66, 65, 1, 73, 73, + 85, 47, 44, 46, 37, 29, 32, 31, 26, 27, 23, + 23, 24, 11, 7, 67, 12, 11, 5, 74, 5, 4, 64, 3, + 69, 69, 73, 76, 80, 79, 16, 13, 16, 6, 74, 65, + 67, 81, 70, 79, 82, 98, 91, 94, 98, 1, 4, 87, + 65, 0, 72, 83, 79, 80, 88, 78, 84, 83, 95, 89, + 94, 104, 69, 83, 90, 72, 3, 7, 9, 20, 13, 20, + 2, 18, 16, 24, 24, 29, 27, 15, 50, 21, 13, 7, + 64, 66, 87, 92, 99, 8, 39, 33, 34, 25, 25, 14, + 12, 6, 2, 72, 3, 7, 9, 20, 13, 20, 2, 18, 16, + 24, 24, 29, 27, 15, 50, 21, 13, 7, 64, 66, 87, + 92, 99 }, + + { + + 42, + 6, 79, 42, 6, 79, 76, 1, 21, 11, 0, 68, 18, + 38, 31, 7, 13, 0, 77, 5, 66, 67, 21, 82, 76, + 17, 11, 97, 109, 118, 69, 77, 70, 77, 5, 66, + 82, 1, 30, 1, 69, 75, 77, 91, 94, 91, 106, 64, + 75, 82, 72, 85, 84, 99, 3, 71, 71, 75, 26, 2, + 22, 0, 0, 0, 66, 93, 97, 4, 64, 67, 17, 71, + 90, 2, 3, 69, 25, 23, 67, 3, 70, 13, 70, 66, + 4, 99, 85, 87, 87, 19, 68, 6, 4, 69, 84, 71, + 72, 65, 75, 72, 84, 18, 73, 7, 74, 78, 66, 72, + 66, 4, 6, 3, 11, 6, 75, 64, 0, 67, 2, 72, 27, + 67, 1, 25, 24, 32, 28, 21, 90, 2, 0, 68, 67, + 80, 68, 19, 65, 65, 19, 38, 56, 45, 22, 91, + 93, 4, 38, 40, 114, 70, 7, 91, 16, 67, 1, 19, + 33, 55, 46, 28, 95, 82, 67, 2, 7, 87, 17, 21, + 17, 15, 18, 16, 9, 18, 16, 66, 4, 9, 3, 2, 70, + 5, 0, 67, 3, 0, 4, 3, 65, 92, 66, 66, 69, 69, + 77, 18, 29, 16, 10, 15, 16, 13, 17, 12, 81, + 94, 76, 76, 74, 112, 70, 65, 92, 13, 0, 0, 66, + 66, 74, 68, 64, 85, 93, 81, 84, 85, 83, 71, + 21, 18, 10, 4, 4, 65, 70, 72, 74, 2, 32, 18, + 12, 5, 17, 4, 1, 70, 4, 0, 30, 17, 12, 6, 14, + 3, 66, 70, 75, 71, 34, 17, 9, 4, 9, 66, 73, + 75, 6, 36, 25, 16, 8, 19, 6, 1, 66, 68, 62, + 87, 81, 74, 78, 79, 75, 72, 69, 69, 67, 0, 7, + 75, 74, 80, 82, 27, 82, 87, 76, 70, 69, 73, + 71, 71, 71, 68, 73, 84, 68, 68, 15, 69, 77, + 67, 68, 68, 67, 68, 68, 79, 71, 69, 73, 72, + 10, 91, 73, 15, 74, 65, 69, 68, 0, 0, 66, 65, + 1, 73, 73, 87, 46, 43, 45, 36, 26, 29, 28, 23, + 25, 20, 20, 21, 8, 5, 69, 8, 8, 1, 79, 2, 2, + 65, 1, 71, 71, 74, 77, 81, 79, 14, 10, 13, 4, + 76, 67, 68, 83, 72, 81, 84, 100, 93, 95, 100, + 0, 3, 89, 66, 64, 73, 85, 80, 81, 89, 79, 85, + 83, 94, 91, 95, 106, 71, 84, 91, 72, 4, 8, 10, + 21, 14, 20, 3, 19, 17, 25, 25, 30, 28, 15, 49, + 19, 11, 5, 66, 68, 90, 94, 100, 9, 39, 34, 34, + 26, 26, 15, 13, 6, 3, 72, 4, 8, 10, 21, 14, + 20, 3, 19, 17, 25, 25, 30, 28, 15, 49, 19, 11, + 5, 66, 68, 90, 94, 100 }, + + { + + 41, + 6, 79, 41, 6, 79, 74, 3, 22, 11, 64, 70, 17, + 37, 31, 7, 15, 64, 77, 6, 66, 68, 22, 83, 77, + 16, 9, 101, 111, 119, 67, 75, 70, 77, 6, 66, + 81, 2, 31, 1, 68, 74, 76, 92, 95, 91, 107, 64, + 75, 81, 72, 86, 84, 99, 3, 71, 71, 74, 27, 2, + 22, 0, 0, 0, 66, 93, 97, 5, 65, 67, 16, 71, + 89, 4, 4, 68, 26, 24, 66, 4, 68, 14, 69, 65, + 6, 100, 85, 87, 87, 20, 68, 7, 6, 69, 83, 70, + 70, 65, 75, 72, 84, 19, 73, 7, 73, 78, 65, 72, + 66, 4, 6, 4, 11, 6, 75, 64, 0, 67, 2, 72, 27, + 67, 1, 26, 24, 33, 28, 22, 91, 2, 1, 68, 67, + 80, 68, 20, 65, 65, 20, 39, 57, 46, 24, 91, + 94, 4, 38, 40, 115, 70, 8, 92, 16, 67, 1, 20, + 33, 55, 46, 29, 96, 80, 68, 64, 4, 85, 17, 21, + 16, 14, 17, 15, 8, 18, 15, 67, 3, 9, 3, 2, 71, + 4, 64, 68, 2, 0, 4, 2, 65, 93, 66, 67, 69, 70, + 78, 16, 27, 14, 8, 13, 14, 10, 14, 11, 83, 96, + 78, 78, 76, 114, 71, 66, 94, 12, 64, 64, 67, + 67, 76, 70, 66, 87, 93, 82, 85, 86, 82, 70, + 23, 18, 10, 4, 5, 64, 69, 71, 72, 3, 33, 19, + 13, 5, 18, 5, 3, 69, 6, 0, 30, 17, 12, 7, 15, + 3, 66, 69, 74, 71, 36, 18, 9, 4, 10, 66, 73, + 74, 7, 37, 26, 17, 8, 20, 6, 2, 66, 68, 62, + 85, 80, 73, 76, 78, 73, 71, 68, 68, 65, 2, 9, + 75, 73, 80, 81, 30, 82, 88, 76, 69, 68, 73, + 71, 71, 71, 67, 73, 85, 68, 68, 15, 69, 77, + 67, 68, 68, 68, 68, 68, 80, 72, 69, 73, 72, + 11, 92, 73, 16, 75, 65, 69, 68, 1, 0, 67, 64, + 2, 73, 73, 88, 45, 42, 44, 34, 24, 27, 26, 20, + 22, 17, 17, 18, 5, 2, 71, 4, 4, 66, 84, 64, 0, + 67, 64, 74, 73, 75, 78, 82, 79, 12, 8, 11, 2, + 78, 69, 70, 85, 74, 83, 86, 102, 95, 97, 101, + 64, 2, 91, 68, 66, 75, 87, 82, 83, 90, 80, 86, + 83, 93, 92, 97, 107, 72, 85, 92, 72, 4, 8, 10, + 22, 15, 21, 3, 20, 18, 26, 25, 31, 28, 16, 49, + 17, 9, 3, 68, 70, 92, 96, 101, 9, 40, 34, 35, + 26, 27, 15, 13, 7, 3, 72, 4, 8, 10, 22, 15, + 21, 3, 20, 18, 26, 25, 31, 28, 16, 49, 17, 9, + 3, 68, 70, 92, 96, 101 }, + + { + + 40, + 6, 79, 40, 6, 79, 72, 4, 22, 11, 64, 72, 15, + 36, 32, 7, 17, 66, 77, 8, 66, 68, 23, 84, 78, + 15, 7, 104, 113, 120, 65, 74, 70, 77, 8, 66, + 81, 3, 31, 1, 67, 73, 75, 93, 96, 91, 107, 64, + 75, 80, 72, 86, 84, 99, 3, 71, 71, 73, 27, 2, + 22, 0, 0, 0, 65, 93, 97, 5, 66, 67, 16, 70, + 88, 6, 5, 67, 28, 26, 65, 5, 66, 15, 68, 64, + 8, 101, 85, 87, 87, 20, 68, 8, 8, 69, 82, 69, + 68, 65, 75, 72, 84, 19, 73, 8, 72, 78, 64, 72, + 66, 4, 7, 5, 12, 6, 75, 64, 0, 67, 2, 72, 27, + 67, 1, 26, 24, 33, 28, 22, 91, 3, 2, 69, 67, + 80, 68, 21, 65, 64, 21, 40, 58, 48, 26, 92, + 95, 5, 38, 40, 116, 70, 8, 92, 17, 67, 1, 21, + 33, 55, 46, 30, 97, 79, 69, 67, 1, 84, 17, 21, + 16, 13, 17, 15, 8, 18, 15, 68, 2, 9, 3, 2, 71, + 4, 65, 69, 2, 0, 4, 1, 66, 94, 67, 68, 69, 71, + 78, 14, 25, 12, 7, 11, 12, 7, 12, 10, 85, 98, + 79, 80, 78, 115, 72, 67, 96, 11, 65, 65, 68, + 69, 78, 72, 68, 88, 93, 83, 86, 88, 80, 68, + 24, 19, 10, 4, 6, 0, 68, 70, 70, 4, 34, 20, + 14, 6, 19, 6, 4, 68, 8, 1, 31, 18, 13, 7, 16, + 4, 65, 68, 73, 71, 37, 19, 9, 4, 11, 65, 73, + 73, 8, 37, 27, 17, 8, 21, 7, 3, 65, 67, 62, + 84, 78, 71, 74, 77, 72, 69, 66, 66, 0, 4, 11, + 74, 72, 79, 80, 33, 82, 89, 76, 68, 67, 73, + 71, 71, 71, 67, 73, 86, 68, 68, 16, 69, 77, + 67, 68, 68, 68, 68, 68, 81, 73, 69, 73, 72, + 12, 93, 73, 17, 76, 65, 69, 68, 1, 0, 67, 64, + 3, 73, 73, 90, 44, 41, 43, 32, 22, 24, 23, 17, + 20, 14, 14, 15, 2, 64, 73, 0, 1, 70, 89, 67, + 65, 69, 66, 76, 75, 76, 79, 83, 79, 10, 6, 9, + 0, 80, 71, 71, 87, 76, 85, 88, 104, 97, 99, + 102, 65, 1, 93, 69, 68, 76, 89, 83, 84, 91, + 81, 87, 83, 92, 93, 98, 109, 73, 86, 93, 72, + 5, 9, 11, 23, 16, 22, 3, 21, 19, 27, 26, 32, + 29, 17, 48, 15, 7, 1, 70, 72, 94, 98, 102, 10, + 40, 35, 36, 27, 28, 16, 14, 7, 4, 72, 5, 9, + 11, 23, 16, 22, 3, 21, 19, 27, 26, 32, 29, 17, + 48, 15, 7, 1, 70, 72, 94, 98, 102 }, + + { + + 38, + 5, 80, 38, 5, 80, 71, 5, 22, 11, 65, 74, 13, + 35, 32, 7, 18, 68, 77, 9, 67, 69, 24, 85, 80, + 13, 5, 108, 116, 121, 0, 73, 70, 77, 9, 67, + 81, 3, 31, 0, 67, 73, 74, 95, 98, 92, 108, 65, + 75, 80, 73, 87, 84, 99, 2, 71, 71, 73, 27, 1, + 22, 0, 0, 0, 65, 94, 97, 5, 67, 68, 15, 70, + 87, 8, 6, 67, 29, 27, 65, 6, 65, 15, 67, 64, + 10, 102, 86, 88, 88, 20, 69, 8, 10, 69, 82, + 69, 67, 65, 75, 72, 84, 19, 74, 8, 72, 78, 64, + 72, 66, 4, 7, 6, 12, 6, 76, 65, 0, 67, 1, 72, + 27, 67, 1, 26, 24, 33, 28, 22, 92, 3, 2, 70, + 67, 80, 69, 21, 65, 64, 22, 41, 58, 49, 27, + 93, 97, 5, 38, 40, 117, 71, 8, 93, 17, 68, 1, + 22, 33, 54, 46, 31, 98, 78, 71, 70, 66, 83, + 16, 21, 15, 12, 16, 14, 7, 17, 14, 69, 1, 8, + 2, 1, 72, 3, 66, 70, 1, 64, 3, 0, 67, 96, 68, + 69, 69, 73, 79, 11, 22, 10, 5, 9, 9, 3, 9, 8, + 87, 101, 81, 83, 80, 117, 74, 69, 98, 9, 66, + 66, 70, 71, 80, 74, 70, 90, 93, 84, 87, 90, + 79, 67, 25, 19, 10, 4, 6, 0, 68, 70, 69, 5, + 34, 20, 14, 6, 20, 7, 5, 68, 10, 1, 31, 18, + 13, 7, 16, 4, 65, 68, 72, 71, 38, 20, 9, 3, + 11, 65, 73, 73, 8, 37, 27, 17, 8, 22, 7, 3, + 65, 67, 62, 83, 77, 70, 73, 76, 71, 68, 65, + 65, 1, 5, 13, 74, 72, 79, 80, 36, 82, 91, 76, + 68, 67, 73, 71, 71, 72, 67, 74, 87, 69, 68, + 16, 70, 77, 68, 68, 68, 69, 69, 69, 82, 74, + 70, 74, 72, 13, 94, 74, 17, 77, 66, 69, 69, 1, + 64, 68, 64, 3, 73, 73, 92, 42, 40, 41, 30, 19, + 21, 20, 14, 17, 11, 10, 12, 65, 67, 75, 67, + 66, 74, 95, 70, 68, 71, 69, 79, 77, 78, 81, + 84, 79, 7, 3, 6, 65, 83, 73, 73, 90, 78, 88, + 90, 107, 100, 101, 104, 67, 64, 96, 71, 70, + 78, 91, 85, 86, 92, 82, 88, 84, 91, 95, 100, + 111, 75, 88, 95, 72, 5, 9, 11, 24, 16, 22, 3, + 22, 19, 28, 26, 32, 29, 17, 47, 13, 5, 65, 73, + 74, 97, 100, 104, 10, 40, 35, 36, 27, 28, 16, + 14, 7, 4, 72, 5, 9, 11, 24, 16, 22, 3, 22, 19, + 28, 26, 32, 29, 17, 47, 13, 5, 65, 73, 74, 97, + 100, 104 }, + + { + + 37, + 5, 80, 37, 5, 80, 69, 7, 23, 12, 65, 75, 12, + 35, 33, 8, 20, 69, 76, 11, 67, 69, 26, 85, 81, + 12, 4, 111, 118, 121, 2, 71, 69, 76, 11, 67, + 80, 4, 32, 0, 66, 72, 72, 96, 99, 92, 108, 65, + 74, 79, 73, 87, 83, 98, 2, 70, 70, 72, 28, 1, + 22, 0, 0, 0, 64, 94, 97, 6, 67, 68, 15, 69, + 85, 11, 8, 66, 31, 29, 64, 8, 0, 16, 65, 0, + 13, 102, 86, 88, 88, 21, 69, 9, 13, 68, 81, + 68, 65, 65, 74, 72, 83, 20, 74, 9, 71, 77, 0, + 71, 65, 5, 8, 7, 13, 7, 76, 65, 1, 66, 1, 71, + 28, 66, 1, 27, 25, 34, 29, 23, 92, 4, 3, 70, + 67, 79, 69, 22, 64, 0, 24, 43, 59, 51, 29, 93, + 98, 6, 39, 41, 117, 71, 9, 93, 18, 68, 2, 24, + 33, 54, 47, 33, 99, 76, 72, 72, 69, 81, 16, + 21, 15, 12, 16, 14, 7, 17, 14, 69, 1, 8, 2, 1, + 72, 3, 66, 70, 1, 64, 3, 0, 67, 97, 68, 69, + 69, 74, 79, 9, 20, 9, 4, 8, 7, 0, 7, 7, 88, + 103, 82, 85, 81, 118, 75, 70, 99, 8, 66, 66, + 71, 72, 81, 75, 71, 91, 93, 84, 87, 91, 77, + 65, 27, 20, 10, 4, 7, 1, 67, 69, 67, 7, 35, + 21, 15, 7, 22, 8, 7, 67, 13, 2, 32, 19, 14, 8, + 17, 5, 64, 67, 70, 70, 40, 21, 10, 3, 12, 64, + 72, 72, 9, 38, 28, 18, 9, 23, 8, 4, 64, 66, + 62, 81, 75, 68, 71, 74, 69, 66, 0, 0, 3, 7, + 16, 73, 71, 78, 79, 40, 81, 92, 75, 67, 66, + 72, 71, 70, 72, 66, 74, 87, 69, 68, 17, 70, + 77, 68, 68, 67, 69, 69, 69, 82, 74, 70, 74, + 71, 15, 94, 74, 18, 77, 66, 69, 69, 2, 64, 68, + 0, 4, 72, 72, 93, 41, 39, 40, 29, 17, 19, 18, + 11, 15, 9, 7, 10, 68, 69, 77, 70, 69, 77, 100, + 72, 70, 72, 71, 81, 78, 79, 82, 84, 79, 5, 1, + 4, 67, 85, 74, 74, 92, 79, 90, 91, 109, 102, + 102, 105, 68, 65, 98, 72, 71, 79, 92, 86, 87, + 93, 82, 88, 84, 90, 96, 101, 112, 76, 89, 96, + 71, 6, 10, 12, 26, 17, 23, 4, 24, 20, 29, 27, + 33, 30, 18, 47, 12, 4, 67, 75, 75, 99, 101, + 105, 11, 41, 36, 37, 28, 29, 17, 15, 8, 5, 71, + 6, 10, 12, 26, 17, 23, 4, 24, 20, 29, 27, 33, + 30, 18, 47, 12, 4, 67, 75, 75, 99, 101, 105 }, + + { + + 36, + 5, 80, 36, 5, 80, 67, 8, 23, 12, 65, 77, 10, + 34, 34, 8, 22, 71, 76, 12, 67, 69, 27, 86, 82, + 11, 2, 114, 120, 122, 4, 70, 69, 76, 12, 67, + 80, 5, 32, 0, 65, 71, 71, 97, 100, 92, 108, + 65, 74, 78, 73, 87, 83, 98, 2, 70, 70, 71, 28, + 1, 22, 0, 0, 0, 0, 94, 97, 6, 68, 68, 15, 68, + 84, 13, 9, 65, 33, 31, 0, 9, 2, 17, 64, 1, 15, + 103, 86, 88, 88, 21, 69, 10, 15, 68, 80, 67, + 0, 65, 74, 72, 83, 20, 74, 9, 70, 77, 1, 71, + 65, 5, 8, 8, 14, 7, 76, 65, 1, 66, 1, 71, 28, + 66, 1, 27, 25, 34, 29, 23, 93, 5, 4, 71, 67, + 79, 69, 23, 64, 0, 25, 44, 60, 53, 31, 94, 99, + 6, 39, 41, 118, 71, 9, 94, 19, 68, 2, 25, 33, + 54, 47, 34, 100, 75, 73, 75, 72, 80, 16, 21, + 15, 11, 15, 14, 7, 17, 13, 70, 0, 8, 2, 1, 72, + 2, 67, 71, 1, 64, 3, 64, 68, 98, 69, 70, 69, + 75, 80, 7, 18, 7, 2, 6, 5, 66, 5, 6, 90, 105, + 84, 87, 83, 119, 76, 71, 101, 7, 67, 67, 72, + 74, 83, 77, 73, 93, 93, 85, 88, 93, 76, 64, + 28, 21, 10, 4, 8, 2, 66, 68, 65, 8, 36, 22, + 16, 8, 23, 9, 8, 66, 15, 3, 32, 19, 14, 8, 18, + 6, 0, 66, 69, 70, 41, 22, 10, 3, 13, 0, 72, + 71, 10, 38, 29, 18, 9, 24, 9, 5, 0, 65, 62, + 80, 73, 66, 69, 73, 68, 64, 2, 1, 5, 9, 18, + 72, 70, 77, 78, 43, 81, 93, 75, 66, 65, 72, + 71, 70, 72, 66, 74, 88, 69, 68, 18, 70, 77, + 68, 68, 67, 69, 69, 69, 83, 75, 70, 74, 71, + 16, 95, 74, 19, 78, 66, 69, 69, 2, 64, 68, 0, + 5, 72, 72, 95, 40, 38, 39, 27, 15, 16, 15, 8, + 13, 6, 4, 7, 71, 72, 79, 74, 73, 81, 105, 75, + 72, 74, 73, 83, 80, 80, 83, 85, 79, 3, 64, 2, + 69, 87, 76, 76, 94, 81, 92, 93, 111, 104, 104, + 106, 69, 66, 100, 74, 73, 81, 94, 87, 88, 94, + 83, 89, 84, 89, 97, 102, 114, 77, 90, 97, 71, + 6, 11, 13, 27, 18, 24, 4, 25, 21, 30, 27, 34, + 31, 19, 46, 10, 2, 69, 77, 77, 101, 103, 106, + 12, 41, 36, 38, 29, 30, 17, 16, 8, 6, 71, 6, + 11, 13, 27, 18, 24, 4, 25, 21, 30, 27, 34, 31, + 19, 46, 10, 2, 69, 77, 77, 101, 103, 106 }, + + { + + 35, + 5, 80, 35, 5, 80, 65, 10, 24, 12, 66, 79, 9, + 33, 34, 8, 24, 72, 76, 14, 67, 70, 28, 87, 83, + 10, 0, 118, 122, 123, 6, 68, 69, 76, 14, 67, + 79, 6, 33, 0, 64, 70, 70, 98, 101, 92, 109, + 65, 74, 77, 73, 88, 83, 98, 2, 70, 70, 70, 29, + 1, 22, 0, 0, 0, 0, 94, 97, 7, 69, 68, 14, 68, + 83, 15, 10, 64, 34, 32, 1, 10, 4, 18, 0, 2, + 17, 104, 86, 88, 88, 22, 69, 11, 17, 68, 79, + 66, 2, 65, 74, 72, 83, 21, 74, 10, 69, 77, 2, + 71, 65, 5, 9, 9, 14, 7, 76, 65, 1, 66, 1, 71, + 28, 66, 1, 28, 25, 35, 29, 24, 93, 5, 5, 71, + 67, 79, 69, 24, 64, 1, 26, 45, 61, 54, 33, 94, + 100, 7, 39, 41, 119, 71, 10, 94, 19, 68, 2, + 26, 33, 54, 47, 35, 101, 73, 74, 78, 75, 78, + 16, 21, 14, 10, 15, 13, 6, 17, 13, 71, 64, 8, + 2, 1, 73, 2, 68, 72, 0, 64, 3, 65, 68, 99, 69, + 71, 69, 76, 80, 5, 16, 5, 1, 4, 3, 69, 2, 5, + 92, 107, 85, 89, 85, 121, 77, 72, 103, 6, 68, + 68, 73, 75, 85, 79, 75, 94, 93, 86, 89, 94, + 74, 1, 30, 21, 10, 4, 9, 3, 65, 67, 0, 9, 37, + 23, 17, 8, 24, 10, 10, 65, 17, 3, 33, 20, 15, + 9, 19, 6, 0, 65, 68, 70, 43, 23, 10, 3, 14, 0, + 72, 70, 11, 39, 30, 19, 9, 25, 9, 6, 0, 65, + 62, 78, 72, 65, 67, 72, 66, 0, 3, 3, 7, 11, + 20, 72, 69, 77, 77, 46, 81, 94, 75, 65, 64, + 72, 71, 70, 72, 65, 74, 89, 69, 68, 18, 70, + 77, 68, 68, 67, 70, 69, 69, 84, 76, 70, 74, + 71, 17, 96, 74, 20, 79, 66, 69, 69, 3, 64, 69, + 1, 6, 72, 72, 96, 39, 37, 38, 25, 13, 14, 13, + 5, 10, 3, 1, 4, 74, 75, 81, 78, 76, 85, 110, + 78, 74, 76, 75, 86, 82, 81, 84, 86, 79, 1, 66, + 0, 71, 89, 78, 77, 96, 83, 94, 95, 113, 106, + 106, 107, 70, 67, 102, 75, 75, 82, 96, 89, 90, + 95, 84, 90, 84, 88, 98, 104, 115, 78, 91, 98, + 71, 7, 11, 13, 28, 19, 25, 4, 26, 22, 31, 28, + 35, 31, 20, 46, 8, 0, 71, 79, 79, 103, 105, + 107, 12, 42, 37, 39, 29, 31, 18, 16, 9, 6, 71, + 7, 11, 13, 28, 19, 25, 4, 26, 22, 31, 28, 35, + 31, 20, 46, 8, 0, 71, 79, 79, 103, 105, 107 }, + + { + + 33, + 5, 80, 33, 5, 80, 64, 11, 24, 12, 66, 81, 7, + 32, 35, 8, 25, 74, 76, 15, 68, 70, 29, 88, 85, + 8, 65, 121, 125, 124, 8, 67, 69, 76, 15, 68, + 79, 7, 33, 64, 64, 69, 68, 99, 103, 93, 109, + 65, 73, 76, 74, 88, 83, 98, 2, 70, 70, 70, 29, + 1, 22, 0, 0, 0, 1, 95, 97, 7, 70, 68, 14, 67, + 82, 17, 11, 0, 36, 34, 1, 11, 6, 19, 1, 3, 20, + 104, 87, 88, 89, 22, 70, 12, 20, 67, 79, 66, + 4, 65, 74, 72, 83, 21, 74, 10, 68, 77, 2, 70, + 65, 5, 9, 10, 15, 7, 77, 66, 1, 66, 1, 71, 28, + 66, 1, 28, 25, 35, 29, 24, 94, 6, 5, 72, 67, + 78, 69, 25, 0, 1, 28, 47, 62, 56, 35, 95, 101, + 7, 40, 41, 120, 71, 10, 95, 20, 68, 2, 27, 33, + 54, 47, 37, 102, 72, 75, 81, 78, 77, 15, 21, + 14, 10, 14, 13, 6, 17, 12, 72, 65, 7, 2, 1, + 73, 1, 68, 73, 0, 65, 2, 66, 69, 100, 70, 72, + 69, 78, 81, 3, 14, 3, 64, 3, 1, 73, 0, 4, 94, + 110, 87, 91, 87, 122, 79, 73, 104, 5, 69, 69, + 75, 77, 87, 81, 76, 96, 93, 87, 90, 96, 73, 2, + 31, 22, 10, 4, 10, 4, 64, 67, 2, 11, 38, 23, + 17, 9, 26, 11, 11, 64, 20, 4, 33, 20, 15, 9, + 20, 7, 1, 65, 67, 70, 44, 24, 10, 3, 15, 1, + 72, 69, 12, 39, 31, 19, 9, 26, 10, 7, 1, 64, + 62, 77, 70, 0, 66, 71, 65, 2, 5, 4, 8, 13, 23, + 71, 68, 76, 76, 49, 80, 95, 74, 64, 64, 72, + 71, 70, 73, 65, 75, 90, 69, 68, 19, 70, 77, + 68, 68, 66, 70, 70, 69, 85, 77, 71, 74, 71, + 18, 97, 75, 20, 79, 67, 69, 69, 3, 65, 69, 1, + 6, 72, 72, 98, 38, 36, 37, 24, 10, 11, 10, 2, + 8, 0, 66, 1, 78, 77, 83, 82, 80, 89, 115, 81, + 76, 78, 77, 88, 84, 83, 85, 87, 79, 65, 69, + 66, 73, 91, 80, 79, 98, 85, 96, 97, 115, 109, + 107, 109, 71, 68, 105, 77, 76, 84, 98, 90, 91, + 96, 85, 91, 84, 87, 100, 105, 117, 80, 92, 99, + 71, 7, 12, 14, 29, 19, 25, 5, 27, 23, 32, 28, + 36, 32, 20, 45, 6, 65, 73, 81, 81, 106, 107, + 109, 13, 42, 37, 39, 30, 31, 18, 17, 9, 7, 71, + 7, 12, 14, 29, 19, 25, 5, 27, 23, 32, 28, 36, + 32, 20, 45, 6, 65, 73, 81, 81, 106, 107, 109 }, + + { + + 32, + 5, 80, 32, 5, 80, 1, 13, 24, 12, 67, 83, 6, + 31, 36, 8, 27, 76, 75, 17, 68, 70, 30, 89, 86, + 7, 67, 124, 126, 124, 10, 66, 69, 75, 17, 68, + 79, 8, 33, 64, 0, 68, 67, 100, 104, 93, 109, + 65, 73, 75, 74, 88, 83, 98, 2, 69, 70, 69, 30, + 1, 22, 0, 0, 0, 1, 95, 97, 8, 71, 68, 13, 67, + 80, 20, 12, 1, 37, 35, 2, 12, 8, 20, 2, 4, 22, + 105, 87, 88, 89, 22, 70, 13, 22, 67, 78, 65, + 6, 65, 74, 72, 82, 21, 74, 11, 67, 76, 3, 70, + 65, 6, 10, 11, 15, 7, 77, 66, 2, 66, 1, 71, + 29, 66, 1, 29, 26, 35, 29, 24, 94, 6, 6, 72, + 67, 78, 69, 26, 0, 2, 29, 48, 62, 57, 37, 95, + 102, 8, 40, 41, 121, 71, 11, 95, 21, 68, 2, + 29, 33, 54, 47, 38, 103, 70, 76, 83, 81, 76, + 15, 21, 13, 9, 14, 13, 5, 17, 12, 73, 66, 7, + 2, 1, 73, 1, 69, 74, 64, 65, 2, 67, 69, 101, + 71, 72, 69, 79, 81, 1, 12, 2, 65, 1, 64, 76, + 66, 3, 96, 112, 88, 93, 89, 123, 80, 74, 106, + 4, 70, 70, 76, 78, 89, 83, 78, 97, 93, 88, 91, + 98, 71, 4, 32, 22, 10, 4, 11, 5, 0, 66, 4, 12, + 39, 24, 18, 10, 27, 12, 13, 0, 22, 5, 34, 21, + 16, 10, 21, 8, 1, 64, 66, 69, 45, 25, 10, 3, + 16, 1, 71, 68, 13, 39, 32, 19, 9, 27, 11, 8, + 1, 0, 62, 76, 69, 1, 64, 70, 0, 3, 7, 6, 10, + 15, 25, 70, 67, 75, 75, 52, 80, 96, 74, 0, 0, + 72, 71, 70, 73, 64, 75, 91, 69, 68, 20, 70, + 77, 68, 68, 66, 71, 70, 69, 86, 78, 71, 74, + 71, 19, 97, 75, 21, 80, 67, 69, 69, 3, 65, 70, + 1, 7, 72, 72, 99, 37, 35, 36, 22, 8, 9, 7, 64, + 5, 66, 69, 65, 81, 80, 85, 86, 83, 93, 120, + 84, 78, 79, 79, 91, 86, 84, 86, 88, 79, 67, + 71, 68, 75, 93, 82, 80, 100, 87, 98, 99, 117, + 111, 109, 110, 72, 69, 107, 78, 78, 85, 100, + 91, 92, 97, 86, 92, 84, 86, 101, 106, 118, 81, + 93, 100, 71, 8, 12, 15, 30, 20, 26, 5, 28, 24, + 33, 29, 37, 32, 21, 45, 4, 67, 75, 83, 83, + 108, 109, 110, 13, 43, 38, 40, 31, 32, 19, 17, + 9, 8, 71, 8, 12, 15, 30, 20, 26, 5, 28, 24, + 33, 29, 37, 32, 21, 45, 4, 67, 75, 83, 83, + 108, 109, 110 }, + + { + + 31, + 5, 81, 31, 5, 81, 3, 14, 25, 12, 67, 84, 4, + 30, 36, 9, 29, 77, 75, 18, 68, 71, 31, 90, 87, + 6, 68, 126, 126, 125, 12, 64, 69, 75, 18, 68, + 78, 9, 34, 64, 1, 67, 66, 102, 105, 93, 110, + 65, 73, 75, 74, 89, 82, 98, 1, 69, 70, 68, 30, + 1, 22, 0, 0, 0, 2, 95, 97, 8, 71, 69, 13, 66, + 79, 22, 13, 2, 39, 37, 3, 13, 10, 21, 3, 4, + 24, 106, 87, 88, 89, 23, 70, 14, 24, 67, 77, + 64, 8, 65, 74, 72, 82, 22, 75, 11, 67, 76, 4, + 70, 64, 6, 10, 12, 16, 7, 77, 66, 2, 66, 1, + 71, 29, 66, 1, 29, 26, 36, 30, 25, 95, 7, 7, + 73, 67, 78, 69, 27, 0, 2, 30, 49, 62, 59, 38, + 96, 103, 8, 40, 41, 121, 72, 11, 96, 21, 69, + 3, 30, 33, 54, 48, 39, 104, 69, 77, 86, 84, + 74, 15, 21, 13, 8, 13, 12, 5, 16, 11, 73, 66, + 7, 1, 1, 74, 0, 70, 75, 64, 65, 2, 67, 70, + 103, 71, 73, 69, 80, 82, 65, 10, 0, 67, 64, + 66, 79, 68, 1, 98, 114, 90, 95, 91, 125, 81, + 76, 108, 2, 71, 71, 77, 80, 91, 85, 80, 99, + 93, 89, 92, 99, 70, 5, 34, 23, 10, 4, 12, 5, + 0, 65, 5, 13, 40, 25, 19, 10, 28, 13, 14, 1, + 24, 5, 34, 21, 16, 10, 22, 8, 2, 0, 65, 69, + 47, 26, 10, 3, 16, 2, 71, 68, 13, 40, 32, 20, + 9, 28, 11, 8, 2, 0, 62, 74, 67, 3, 1, 68, 1, + 5, 8, 7, 12, 17, 27, 70, 66, 75, 75, 55, 80, + 97, 74, 0, 1, 72, 71, 70, 73, 64, 75, 92, 69, + 68, 20, 70, 77, 68, 68, 66, 71, 70, 69, 87, + 79, 71, 74, 71, 20, 98, 75, 22, 81, 67, 69, + 69, 4, 65, 70, 2, 8, 72, 72, 101, 36, 34, 35, + 20, 6, 6, 5, 67, 3, 69, 72, 68, 84, 83, 87, + 89, 87, 97, 125, 86, 81, 81, 81, 93, 88, 85, + 87, 89, 79, 69, 73, 70, 77, 95, 83, 82, 102, + 89, 101, 101, 119, 113, 111, 111, 73, 71, 109, + 80, 80, 87, 102, 93, 94, 98, 87, 93, 85, 85, + 102, 108, 120, 82, 95, 101, 70, 8, 13, 15, 31, + 21, 27, 5, 29, 25, 34, 29, 38, 33, 22, 44, 2, + 69, 77, 85, 85, 110, 111, 111, 14, 43, 38, 41, + 31, 33, 19, 18, 10, 8, 70, 8, 13, 15, 31, 21, + 27, 5, 29, 25, 34, 29, 38, 33, 22, 44, 2, 69, + 77, 85, 85, 110, 111, 111 }, + + { + + 30, + 5, 81, 30, 5, 81, 5, 16, 25, 12, 68, 86, 3, + 29, 37, 9, 30, 79, 75, 20, 69, 71, 32, 91, 88, + 5, 70, 126, 126, 126, 14, 0, 69, 75, 20, 69, + 78, 10, 34, 64, 1, 66, 64, 103, 106, 93, 110, + 65, 72, 74, 75, 89, 82, 98, 1, 69, 70, 67, 31, + 1, 22, 0, 0, 0, 2, 95, 97, 9, 72, 69, 12, 66, + 78, 24, 14, 3, 40, 38, 4, 14, 12, 22, 4, 5, + 27, 106, 88, 88, 90, 23, 70, 15, 27, 66, 77, + 64, 10, 65, 74, 72, 82, 22, 75, 12, 66, 76, 5, + 69, 64, 6, 11, 13, 16, 7, 78, 66, 2, 66, 1, + 71, 29, 66, 1, 30, 26, 36, 30, 25, 95, 7, 7, + 73, 67, 77, 69, 28, 1, 3, 32, 51, 62, 60, 40, + 96, 104, 9, 41, 41, 122, 72, 12, 96, 22, 69, + 3, 31, 33, 54, 48, 41, 105, 67, 78, 89, 87, + 73, 15, 21, 12, 8, 13, 12, 4, 16, 11, 74, 67, + 7, 1, 1, 74, 0, 70, 76, 65, 65, 2, 68, 70, + 104, 72, 74, 69, 81, 82, 67, 8, 65, 68, 65, + 68, 82, 71, 0, 100, 116, 91, 97, 93, 126, 82, + 77, 109, 1, 72, 72, 79, 81, 93, 87, 81, 100, + 93, 90, 93, 101, 68, 7, 35, 23, 10, 4, 13, 6, + 1, 65, 7, 15, 41, 26, 19, 11, 30, 14, 16, 2, + 27, 6, 35, 22, 17, 11, 23, 9, 2, 1, 64, 69, + 48, 27, 10, 3, 17, 2, 71, 67, 14, 40, 33, 20, + 9, 29, 12, 9, 2, 1, 62, 73, 66, 4, 2, 67, 3, + 6, 10, 9, 14, 19, 30, 69, 65, 74, 74, 58, 79, + 98, 73, 1, 2, 72, 71, 70, 73, 0, 76, 93, 69, + 68, 21, 70, 77, 68, 68, 65, 72, 71, 69, 88, + 80, 72, 74, 71, 21, 99, 76, 23, 81, 68, 69, + 69, 4, 66, 71, 2, 8, 72, 72, 102, 35, 33, 34, + 19, 3, 4, 2, 70, 0, 72, 75, 71, 87, 85, 89, + 93, 90, 101, 126, 89, 83, 83, 83, 96, 90, 86, + 88, 90, 79, 71, 76, 73, 79, 97, 85, 83, 104, + 91, 103, 103, 121, 115, 112, 113, 74, 72, 111, + 81, 81, 88, 104, 94, 95, 99, 88, 94, 85, 84, + 104, 109, 121, 84, 96, 102, 70, 9, 13, 16, 32, + 22, 27, 6, 30, 26, 35, 30, 39, 33, 22, 44, 0, + 71, 79, 87, 87, 113, 113, 112, 14, 44, 39, 41, + 32, 34, 20, 18, 10, 9, 70, 9, 13, 16, 32, 22, + 27, 6, 30, 26, 35, 30, 39, 33, 22, 44, 0, 71, + 79, 87, 87, 113, 113, 112 }, + + { + + 28, + 4, 81, 28, 4, 81, 6, 17, 25, 12, 68, 88, 1, + 28, 37, 9, 32, 81, 75, 21, 69, 72, 33, 92, 90, + 3, 72, 126, 126, 126, 16, 1, 69, 75, 21, 69, + 78, 10, 34, 65, 2, 65, 0, 104, 108, 94, 111, + 65, 72, 73, 75, 90, 82, 98, 1, 69, 70, 67, 31, + 1, 22, 0, 0, 0, 3, 96, 97, 9, 73, 69, 12, 65, + 77, 26, 15, 3, 42, 40, 4, 15, 14, 22, 5, 6, + 29, 107, 88, 89, 90, 23, 71, 15, 29, 66, 76, + 0, 11, 65, 74, 72, 82, 22, 75, 12, 65, 76, 5, + 69, 64, 6, 11, 14, 17, 7, 78, 67, 2, 66, 0, + 71, 29, 66, 1, 30, 26, 36, 30, 25, 96, 8, 8, + 74, 67, 77, 69, 29, 1, 3, 33, 52, 62, 62, 42, + 97, 105, 9, 41, 41, 123, 72, 12, 97, 22, 69, + 3, 32, 33, 54, 48, 42, 106, 66, 79, 92, 91, + 72, 14, 21, 12, 7, 12, 11, 4, 16, 10, 75, 68, + 6, 1, 0, 75, 64, 71, 77, 65, 66, 1, 69, 71, + 105, 73, 75, 69, 83, 83, 69, 6, 67, 70, 67, + 71, 86, 73, 64, 102, 119, 93, 100, 95, 126, + 84, 78, 111, 0, 73, 73, 80, 83, 95, 89, 83, + 102, 93, 91, 94, 103, 67, 8, 36, 24, 10, 4, + 13, 7, 2, 64, 9, 16, 41, 26, 20, 11, 31, 15, + 17, 3, 29, 6, 35, 22, 17, 11, 23, 9, 3, 1, 0, + 69, 49, 28, 10, 3, 18, 3, 71, 66, 15, 40, 34, + 20, 9, 30, 12, 10, 3, 1, 62, 72, 64, 6, 4, 66, + 4, 8, 11, 10, 15, 21, 32, 69, 64, 74, 73, 61, + 79, 99, 73, 2, 2, 72, 71, 70, 74, 0, 76, 94, + 69, 68, 21, 70, 77, 69, 68, 65, 72, 71, 70, + 89, 81, 72, 75, 71, 22, 100, 76, 23, 82, 68, + 69, 70, 4, 66, 71, 2, 9, 72, 72, 104, 34, 32, + 33, 17, 1, 1, 64, 73, 65, 75, 79, 74, 91, 88, + 91, 97, 94, 105, 126, 92, 85, 85, 86, 98, 92, + 88, 90, 91, 79, 74, 78, 75, 81, 100, 87, 85, + 107, 93, 105, 105, 123, 118, 114, 114, 76, 73, + 114, 83, 83, 90, 106, 96, 97, 100, 89, 95, 85, + 83, 105, 111, 123, 85, 97, 103, 70, 9, 14, 16, + 33, 22, 28, 6, 31, 26, 36, 30, 39, 34, 23, 43, + 65, 73, 81, 89, 89, 115, 115, 114, 15, 44, 39, + 42, 32, 34, 20, 19, 10, 9, 70, 9, 14, 16, 33, + 22, 28, 6, 31, 26, 36, 30, 39, 34, 23, 43, 65, + 73, 81, 89, 89, 115, 115, 114 }, + + { + + 27, + 4, 81, 27, 4, 81, 8, 18, 26, 12, 68, 90, 64, + 28, 38, 9, 34, 82, 74, 23, 69, 72, 34, 92, 91, + 2, 74, 126, 126, 126, 18, 3, 68, 74, 23, 69, + 77, 11, 35, 65, 3, 64, 1, 105, 109, 94, 111, + 65, 72, 72, 75, 90, 82, 98, 1, 68, 69, 66, 31, + 1, 22, 0, 0, 0, 4, 96, 97, 9, 74, 69, 12, 64, + 75, 29, 16, 4, 44, 42, 5, 16, 16, 23, 7, 7, + 31, 108, 88, 89, 90, 24, 71, 16, 31, 66, 75, + 1, 13, 65, 73, 72, 81, 23, 75, 13, 64, 75, 6, + 69, 64, 7, 12, 15, 18, 7, 78, 67, 3, 65, 0, + 71, 30, 66, 1, 30, 27, 37, 30, 26, 96, 9, 9, + 75, 67, 77, 69, 30, 1, 4, 34, 53, 62, 62, 44, + 98, 106, 10, 41, 42, 124, 72, 12, 97, 23, 69, + 3, 34, 33, 54, 48, 43, 107, 65, 80, 94, 94, + 70, 14, 21, 12, 6, 12, 11, 4, 16, 10, 76, 69, + 6, 1, 0, 75, 64, 72, 77, 65, 66, 1, 70, 72, + 106, 73, 75, 69, 84, 83, 71, 4, 68, 71, 69, + 73, 89, 75, 65, 104, 121, 94, 102, 96, 126, + 85, 79, 113, 64, 74, 74, 81, 85, 96, 91, 85, + 103, 93, 91, 95, 104, 65, 10, 38, 25, 10, 4, + 14, 8, 3, 0, 11, 17, 42, 27, 21, 12, 32, 16, + 18, 4, 31, 7, 36, 23, 18, 11, 24, 10, 4, 2, 2, + 68, 51, 29, 11, 3, 19, 4, 70, 65, 16, 41, 35, + 21, 10, 31, 13, 11, 4, 2, 62, 70, 1, 8, 6, 65, + 5, 10, 13, 12, 17, 23, 34, 68, 0, 73, 72, 62, + 79, 100, 73, 3, 3, 71, 71, 70, 74, 0, 76, 95, + 69, 68, 22, 70, 77, 69, 68, 65, 72, 71, 70, + 89, 82, 72, 75, 70, 24, 100, 76, 24, 83, 68, + 69, 70, 5, 66, 71, 3, 10, 71, 71, 106, 33, 31, + 32, 15, 64, 65, 66, 76, 67, 77, 82, 76, 94, + 91, 93, 101, 97, 108, 126, 95, 87, 86, 88, + 100, 93, 89, 91, 92, 79, 76, 80, 77, 83, 102, + 89, 86, 109, 95, 107, 107, 125, 120, 116, 115, + 77, 74, 116, 84, 85, 91, 108, 97, 98, 101, 90, + 95, 85, 82, 106, 112, 125, 86, 98, 104, 70, + 10, 15, 17, 35, 23, 29, 6, 32, 27, 37, 31, 40, + 35, 24, 42, 66, 75, 83, 91, 91, 117, 117, 115, + 16, 44, 40, 43, 33, 35, 21, 20, 11, 10, 70, + 10, 15, 17, 35, 23, 29, 6, 32, 27, 37, 31, 40, + 35, 24, 42, 66, 75, 83, 91, 91, 117, 117, 115 }, + + { + + 26, + 4, 81, 26, 4, 81, 10, 20, 26, 12, 69, 92, 65, + 27, 39, 9, 35, 84, 74, 24, 70, 72, 35, 93, 92, + 1, 76, 126, 126, 126, 20, 4, 68, 74, 24, 70, + 77, 12, 35, 65, 3, 0, 3, 106, 110, 94, 111, + 65, 71, 71, 76, 90, 82, 98, 1, 68, 69, 65, 32, + 1, 22, 0, 0, 0, 4, 96, 97, 10, 75, 69, 11, 64, + 74, 31, 17, 5, 45, 43, 6, 17, 18, 24, 8, 8, + 34, 108, 89, 89, 91, 24, 71, 17, 34, 65, 75, + 1, 15, 65, 73, 72, 81, 23, 75, 13, 0, 75, 7, + 68, 64, 7, 12, 16, 18, 7, 79, 67, 3, 65, 0, + 71, 30, 66, 1, 31, 27, 37, 30, 26, 97, 9, 9, + 75, 67, 76, 69, 31, 2, 4, 36, 55, 62, 62, 46, + 98, 107, 10, 42, 42, 125, 72, 13, 98, 24, 69, + 3, 35, 33, 54, 48, 45, 108, 0, 81, 97, 97, 69, + 14, 21, 11, 6, 11, 11, 3, 16, 9, 77, 70, 6, 1, + 0, 75, 65, 72, 78, 66, 66, 1, 71, 72, 107, 74, + 76, 69, 85, 84, 73, 2, 70, 73, 70, 75, 92, 78, + 66, 106, 123, 96, 104, 98, 126, 86, 80, 114, + 65, 75, 75, 83, 86, 98, 93, 86, 105, 93, 92, + 96, 106, 64, 11, 39, 25, 10, 4, 15, 9, 4, 0, + 13, 19, 43, 28, 21, 13, 34, 17, 20, 5, 34, 8, + 36, 23, 18, 12, 25, 11, 4, 3, 3, 68, 52, 30, + 11, 3, 20, 4, 70, 64, 17, 41, 36, 21, 10, 32, + 14, 12, 4, 3, 62, 69, 2, 9, 7, 64, 7, 11, 15, + 13, 19, 25, 37, 67, 1, 72, 71, 62, 78, 101, + 72, 4, 4, 71, 71, 70, 74, 1, 77, 96, 69, 68, + 23, 70, 77, 69, 68, 64, 73, 72, 70, 90, 83, + 73, 75, 70, 25, 101, 77, 25, 83, 69, 69, 70, + 5, 67, 72, 3, 10, 71, 71, 107, 32, 30, 31, 14, + 67, 67, 69, 79, 70, 80, 85, 79, 97, 93, 95, + 105, 101, 112, 126, 98, 89, 88, 90, 103, 95, + 90, 92, 93, 79, 78, 83, 80, 85, 104, 91, 88, + 111, 97, 109, 109, 126, 122, 117, 117, 78, 75, + 118, 86, 86, 93, 110, 98, 99, 102, 91, 96, 85, + 81, 108, 113, 126, 88, 99, 105, 70, 10, 15, + 18, 36, 24, 29, 7, 33, 28, 38, 31, 41, 35, 24, + 42, 68, 77, 85, 93, 93, 120, 119, 116, 16, 45, + 40, 43, 34, 36, 21, 20, 11, 11, 70, 10, 15, + 18, 36, 24, 29, 7, 33, 28, 38, 31, 41, 35, 24, + 42, 68, 77, 85, 93, 93, 120, 119, 116 }, + + { + + 25, + 4, 82, 25, 4, 82, 12, 21, 27, 12, 69, 93, 67, + 26, 39, 10, 37, 85, 74, 26, 70, 73, 36, 94, + 93, 0, 77, 126, 126, 126, 22, 6, 68, 74, 26, + 70, 76, 13, 36, 65, 4, 1, 4, 108, 111, 94, + 112, 65, 71, 71, 76, 91, 81, 98, 0, 68, 69, + 64, 32, 1, 22, 0, 0, 0, 5, 96, 97, 10, 75, 70, + 11, 0, 73, 33, 18, 6, 47, 45, 7, 18, 20, 25, + 9, 8, 36, 109, 89, 89, 91, 25, 71, 18, 36, 65, + 74, 2, 17, 65, 73, 72, 81, 24, 76, 14, 0, 75, + 8, 68, 0, 7, 13, 17, 19, 7, 79, 67, 3, 65, 0, + 71, 30, 66, 1, 31, 27, 38, 31, 27, 97, 10, 10, + 76, 67, 76, 69, 32, 2, 5, 37, 56, 62, 62, 47, + 99, 108, 11, 42, 42, 125, 73, 13, 98, 24, 70, + 4, 36, 33, 54, 49, 46, 109, 1, 82, 100, 100, + 67, 14, 21, 11, 5, 11, 10, 3, 15, 9, 77, 70, + 6, 0, 0, 76, 65, 73, 79, 66, 66, 1, 71, 73, + 109, 74, 77, 69, 86, 84, 76, 0, 72, 74, 72, + 77, 95, 80, 68, 108, 125, 97, 106, 100, 126, + 87, 82, 116, 67, 76, 76, 84, 88, 100, 95, 88, + 106, 93, 93, 97, 107, 1, 13, 41, 26, 10, 4, + 16, 9, 4, 1, 14, 20, 44, 29, 22, 13, 35, 18, + 21, 6, 36, 8, 37, 24, 19, 12, 26, 11, 5, 4, 4, + 68, 54, 31, 11, 3, 20, 5, 70, 64, 17, 42, 36, + 22, 10, 33, 14, 12, 5, 3, 62, 67, 4, 11, 9, 1, + 8, 13, 16, 15, 21, 27, 39, 67, 2, 72, 71, 62, + 78, 102, 72, 4, 5, 71, 71, 70, 74, 1, 77, 97, + 69, 68, 23, 70, 77, 69, 68, 64, 73, 72, 70, + 91, 84, 73, 75, 70, 26, 102, 77, 26, 84, 69, + 69, 70, 6, 67, 72, 4, 11, 71, 71, 109, 31, 29, + 30, 12, 69, 70, 71, 82, 72, 83, 88, 82, 100, + 96, 97, 108, 104, 116, 126, 100, 92, 90, 92, + 105, 97, 91, 93, 94, 79, 80, 85, 82, 87, 106, + 92, 89, 113, 99, 112, 111, 126, 124, 119, 118, + 79, 77, 120, 87, 88, 94, 112, 100, 101, 103, + 92, 97, 86, 80, 109, 115, 126, 89, 101, 106, + 69, 11, 16, 18, 37, 25, 30, 7, 34, 29, 39, 32, + 42, 36, 25, 41, 70, 79, 87, 95, 95, 122, 121, + 117, 17, 45, 41, 44, 34, 37, 22, 21, 12, 11, + 69, 11, 16, 18, 37, 25, 30, 7, 34, 29, 39, 32, + 42, 36, 25, 41, 70, 79, 87, 95, 95, 122, 121, + 117 }, + + { + + 23, + 4, 82, 23, 4, 82, 13, 23, 27, 12, 70, 95, 68, + 25, 40, 10, 39, 87, 74, 27, 70, 73, 37, 95, + 95, 65, 79, 126, 126, 126, 24, 7, 68, 74, 27, + 70, 76, 14, 36, 66, 5, 2, 5, 109, 113, 95, + 112, 65, 71, 70, 76, 91, 81, 98, 0, 68, 69, + 64, 33, 1, 22, 0, 0, 0, 5, 97, 97, 11, 76, 70, + 10, 0, 72, 35, 19, 7, 48, 46, 7, 19, 22, 26, + 10, 9, 38, 110, 89, 89, 91, 25, 72, 19, 38, + 65, 73, 3, 19, 65, 73, 72, 81, 24, 76, 14, 1, + 75, 8, 68, 0, 7, 13, 18, 19, 7, 79, 68, 3, 65, + 0, 71, 30, 66, 1, 32, 27, 38, 31, 27, 98, 10, + 11, 76, 67, 76, 69, 33, 2, 5, 38, 57, 62, 62, + 49, 99, 109, 11, 42, 42, 126, 73, 14, 99, 25, + 70, 4, 37, 33, 54, 49, 47, 110, 3, 83, 103, + 103, 66, 13, 21, 10, 4, 10, 10, 2, 15, 8, 78, + 71, 5, 0, 0, 76, 66, 74, 80, 67, 67, 0, 72, + 73, 110, 75, 78, 69, 88, 85, 78, 65, 74, 76, + 74, 79, 99, 83, 69, 110, 126, 99, 108, 102, + 126, 89, 83, 118, 68, 77, 77, 85, 89, 102, 97, + 90, 108, 93, 94, 98, 109, 2, 14, 42, 26, 10, + 4, 17, 10, 5, 2, 16, 21, 45, 29, 23, 14, 36, + 19, 23, 7, 38, 9, 37, 24, 19, 13, 27, 12, 5, + 4, 5, 68, 55, 32, 11, 3, 21, 5, 70, 0, 18, 42, + 37, 22, 10, 34, 15, 13, 5, 4, 62, 66, 5, 12, + 11, 2, 10, 14, 18, 16, 22, 29, 41, 66, 3, 71, + 70, 62, 78, 103, 72, 5, 5, 71, 71, 70, 75, 2, + 77, 98, 69, 68, 24, 70, 77, 69, 68, 64, 74, + 72, 70, 92, 85, 73, 75, 70, 27, 103, 77, 26, + 85, 69, 69, 70, 6, 67, 73, 4, 12, 71, 71, 110, + 30, 28, 29, 10, 71, 72, 74, 85, 75, 86, 92, + 85, 104, 99, 99, 112, 108, 120, 126, 103, 94, + 92, 94, 108, 99, 93, 94, 95, 79, 83, 87, 84, + 89, 108, 94, 91, 115, 101, 114, 113, 126, 126, + 121, 119, 80, 78, 123, 89, 90, 96, 114, 101, + 102, 104, 93, 98, 86, 79, 110, 116, 126, 90, + 102, 107, 69, 11, 16, 19, 38, 25, 31, 7, 35, + 30, 40, 32, 43, 36, 26, 41, 72, 81, 89, 97, + 97, 124, 123, 119, 17, 46, 41, 45, 35, 37, 22, + 21, 12, 12, 69, 11, 16, 19, 38, 25, 31, 7, 35, + 30, 40, 32, 43, 36, 26, 41, 72, 81, 89, 97, + 97, 124, 123, 119 }, + + { + + 22, + 4, 82, 22, 4, 82, 15, 24, 27, 12, 70, 97, 70, + 24, 41, 10, 40, 89, 73, 29, 71, 73, 38, 96, + 96, 66, 81, 126, 126, 126, 26, 8, 68, 73, 29, + 71, 76, 15, 36, 66, 5, 3, 7, 110, 114, 95, + 112, 65, 70, 69, 77, 91, 81, 98, 0, 67, 69, 0, + 33, 1, 22, 0, 0, 0, 6, 97, 97, 11, 77, 70, 10, + 1, 70, 38, 20, 8, 50, 48, 8, 20, 24, 27, 11, + 10, 41, 110, 90, 89, 92, 25, 72, 20, 41, 64, + 73, 3, 21, 65, 73, 72, 80, 24, 76, 15, 2, 74, + 9, 67, 0, 8, 14, 19, 20, 7, 80, 68, 4, 65, 0, + 71, 31, 66, 1, 32, 28, 38, 31, 27, 98, 11, 11, + 77, 67, 75, 69, 34, 3, 6, 40, 59, 62, 62, 51, + 100, 110, 12, 43, 42, 126, 73, 14, 99, 26, 70, + 4, 39, 33, 54, 49, 49, 111, 4, 84, 105, 106, + 65, 13, 21, 10, 4, 10, 10, 2, 15, 8, 79, 72, + 5, 0, 0, 76, 66, 74, 81, 67, 67, 0, 73, 74, + 111, 76, 78, 69, 89, 85, 80, 67, 75, 77, 75, + 81, 102, 85, 70, 112, 126, 100, 110, 104, 126, + 90, 84, 119, 69, 78, 78, 87, 91, 104, 99, 91, + 109, 93, 95, 99, 111, 4, 16, 43, 27, 10, 4, + 18, 11, 6, 2, 18, 23, 46, 30, 23, 15, 38, 20, + 24, 8, 41, 10, 38, 25, 20, 13, 28, 13, 6, 5, + 6, 67, 56, 33, 11, 3, 22, 6, 69, 1, 19, 42, + 38, 22, 10, 35, 16, 14, 6, 5, 62, 65, 7, 14, + 12, 3, 11, 16, 20, 18, 24, 31, 44, 65, 4, 70, + 69, 62, 77, 104, 71, 6, 6, 71, 71, 70, 75, 2, + 78, 99, 69, 68, 25, 70, 77, 69, 68, 0, 74, 73, + 70, 93, 86, 74, 75, 70, 28, 103, 78, 27, 85, + 70, 69, 70, 6, 68, 73, 4, 12, 71, 71, 112, 29, + 27, 28, 9, 74, 75, 77, 88, 77, 89, 95, 88, + 107, 101, 101, 116, 111, 124, 126, 106, 96, + 93, 96, 110, 101, 94, 95, 96, 79, 85, 90, 87, + 91, 110, 96, 92, 117, 103, 116, 115, 126, 126, + 122, 121, 81, 79, 125, 90, 91, 97, 116, 102, + 103, 105, 94, 99, 86, 78, 112, 117, 126, 92, + 103, 108, 69, 12, 17, 20, 39, 26, 31, 8, 36, + 31, 41, 33, 44, 37, 26, 40, 74, 83, 91, 99, + 99, 126, 125, 120, 18, 46, 42, 45, 36, 38, 23, + 22, 12, 13, 69, 12, 17, 20, 39, 26, 31, 8, 36, + 31, 41, 33, 44, 37, 26, 40, 74, 83, 91, 99, + 99, 126, 125, 120 }, + + { + + 21, + 4, 82, 21, 4, 82, 17, 26, 28, 12, 71, 99, 71, + 23, 41, 10, 42, 90, 73, 30, 71, 74, 39, 97, + 97, 67, 83, 126, 126, 126, 28, 10, 68, 73, 30, + 71, 75, 16, 37, 66, 6, 4, 8, 111, 115, 95, + 113, 65, 70, 68, 77, 92, 81, 98, 0, 67, 69, 1, + 34, 1, 22, 0, 0, 0, 6, 97, 97, 12, 78, 70, 9, + 1, 69, 40, 21, 9, 51, 49, 9, 21, 26, 28, 12, + 11, 43, 111, 90, 89, 92, 26, 72, 21, 43, 64, + 72, 4, 23, 65, 73, 72, 80, 25, 76, 15, 3, 74, + 10, 67, 0, 8, 14, 20, 20, 7, 80, 68, 4, 65, 0, + 71, 31, 66, 1, 33, 28, 39, 31, 28, 99, 11, 12, + 77, 67, 75, 69, 35, 3, 6, 41, 60, 62, 62, 53, + 100, 111, 12, 43, 42, 126, 73, 15, 100, 26, + 70, 4, 40, 33, 54, 49, 50, 112, 6, 85, 108, + 109, 0, 13, 21, 9, 3, 9, 9, 1, 15, 7, 80, 73, + 5, 0, 0, 77, 67, 75, 82, 68, 67, 0, 74, 74, + 112, 76, 79, 69, 90, 86, 82, 69, 77, 79, 77, + 83, 105, 88, 71, 114, 126, 102, 112, 106, 126, + 91, 85, 121, 70, 79, 79, 88, 92, 106, 101, 93, + 111, 93, 96, 100, 112, 5, 17, 45, 27, 10, 4, + 19, 12, 7, 3, 20, 24, 47, 31, 24, 15, 39, 21, + 26, 9, 43, 10, 38, 25, 20, 14, 29, 13, 6, 6, + 7, 67, 58, 34, 11, 3, 23, 6, 69, 2, 20, 43, + 39, 23, 10, 36, 16, 15, 6, 5, 62, 0, 8, 15, + 14, 4, 13, 17, 21, 19, 26, 33, 46, 65, 5, 70, + 68, 62, 77, 105, 71, 7, 7, 71, 71, 70, 75, 3, + 78, 100, 69, 68, 25, 70, 77, 69, 68, 0, 75, + 73, 70, 94, 87, 74, 75, 70, 29, 104, 78, 28, + 86, 70, 69, 70, 7, 68, 74, 5, 13, 71, 71, 113, + 28, 26, 27, 7, 76, 77, 79, 91, 80, 92, 98, 91, + 110, 104, 103, 120, 115, 126, 126, 109, 98, + 95, 98, 113, 103, 95, 96, 97, 79, 87, 92, 89, + 93, 112, 98, 94, 119, 105, 118, 117, 126, 126, + 124, 122, 82, 80, 126, 92, 93, 99, 118, 104, + 105, 106, 95, 100, 86, 77, 113, 119, 126, 93, + 104, 109, 69, 12, 17, 20, 40, 27, 32, 8, 37, + 32, 42, 33, 45, 37, 27, 40, 76, 85, 93, 101, + 101, 126, 126, 121, 18, 47, 42, 46, 36, 39, + 23, 22, 13, 13, 69, 12, 17, 20, 40, 27, 32, 8, + 37, 32, 42, 33, 45, 37, 27, 40, 76, 85, 93, + 101, 101, 126, 126, 121 }, + + { + + 20, + 4, 82, 20, 4, 82, 19, 27, 28, 12, 71, 101, 73, + 22, 42, 10, 44, 92, 73, 32, 71, 74, 40, 98, + 98, 68, 85, 126, 126, 126, 30, 11, 68, 73, 32, + 71, 75, 17, 37, 66, 7, 5, 9, 112, 116, 95, + 113, 65, 70, 67, 77, 92, 81, 98, 0, 67, 69, 2, + 34, 1, 22, 0, 0, 0, 7, 97, 97, 12, 79, 70, 9, + 2, 68, 42, 22, 10, 53, 51, 10, 22, 28, 29, 13, + 12, 45, 112, 90, 89, 92, 26, 72, 22, 45, 64, + 71, 5, 25, 65, 73, 72, 80, 25, 76, 16, 4, 74, + 11, 67, 0, 8, 15, 21, 21, 7, 80, 68, 4, 65, 0, + 71, 31, 66, 1, 33, 28, 39, 31, 28, 99, 12, 13, + 78, 67, 75, 69, 36, 3, 7, 42, 61, 62, 62, 55, + 101, 112, 13, 43, 42, 126, 73, 15, 100, 27, + 70, 4, 41, 33, 54, 49, 51, 113, 7, 86, 111, + 112, 1, 13, 21, 9, 2, 9, 9, 1, 15, 7, 81, 74, + 5, 0, 0, 77, 67, 76, 83, 68, 67, 0, 75, 75, + 113, 77, 80, 69, 91, 86, 84, 71, 79, 80, 79, + 85, 108, 90, 72, 116, 126, 103, 114, 108, 126, + 92, 86, 123, 71, 80, 80, 89, 94, 108, 103, 95, + 112, 93, 97, 101, 114, 7, 19, 46, 28, 10, 4, + 20, 13, 8, 4, 22, 25, 48, 32, 25, 16, 40, 22, + 27, 10, 45, 11, 39, 26, 21, 14, 30, 14, 7, 7, + 8, 67, 59, 35, 11, 3, 24, 7, 69, 3, 21, 43, + 40, 23, 10, 37, 17, 16, 7, 6, 62, 1, 10, 17, + 16, 5, 14, 19, 23, 21, 28, 35, 48, 64, 6, 69, + 67, 62, 77, 106, 71, 8, 8, 71, 71, 70, 75, 3, + 78, 101, 69, 68, 26, 70, 77, 69, 68, 0, 75, + 73, 70, 95, 88, 74, 75, 70, 30, 105, 78, 29, + 87, 70, 69, 70, 7, 68, 74, 5, 14, 71, 71, 115, + 27, 25, 26, 5, 78, 80, 82, 94, 82, 95, 101, + 94, 113, 107, 105, 124, 118, 126, 126, 112, + 100, 97, 100, 115, 105, 96, 97, 98, 79, 89, + 94, 91, 95, 114, 100, 95, 121, 107, 120, 119, + 126, 126, 126, 123, 83, 81, 126, 93, 95, 100, + 120, 105, 106, 107, 96, 101, 86, 76, 114, 120, + 126, 94, 105, 110, 69, 13, 18, 21, 41, 28, 33, + 8, 38, 33, 43, 34, 46, 38, 28, 39, 78, 87, 95, + 103, 103, 126, 126, 122, 19, 47, 43, 47, 37, + 40, 24, 23, 13, 14, 69, 13, 18, 21, 41, 28, + 33, 8, 38, 33, 43, 34, 46, 38, 28, 39, 78, 87, + 95, 103, 103, 126, 126, 122 }, + + { + + 18, + 3, 83, 18, 3, 83, 20, 28, 28, 12, 72, 103, 75, + 21, 42, 10, 45, 94, 73, 33, 72, 75, 41, 99, + 100, 70, 87, 126, 126, 126, 32, 12, 68, 73, + 33, 72, 75, 17, 37, 67, 7, 5, 10, 114, 118, + 96, 114, 66, 70, 67, 78, 93, 81, 98, 64, 67, + 69, 2, 34, 0, 22, 0, 0, 0, 7, 98, 97, 12, 80, + 71, 8, 2, 67, 44, 23, 10, 54, 52, 10, 23, 29, + 29, 14, 12, 47, 113, 91, 90, 93, 26, 73, 22, + 47, 64, 71, 5, 26, 65, 73, 72, 80, 25, 77, 16, + 4, 74, 11, 67, 0, 8, 15, 22, 21, 7, 81, 69, 4, + 65, 64, 71, 31, 66, 1, 33, 28, 39, 31, 28, + 100, 12, 13, 79, 67, 75, 70, 36, 3, 7, 43, 62, + 62, 62, 56, 102, 114, 13, 43, 42, 126, 74, 15, + 101, 27, 71, 4, 42, 33, 53, 49, 52, 114, 8, + 88, 114, 116, 2, 12, 21, 8, 1, 8, 8, 0, 14, 6, + 82, 75, 4, 64, 64, 78, 68, 77, 84, 69, 68, 64, + 76, 76, 115, 78, 81, 69, 93, 87, 87, 74, 81, + 82, 81, 88, 112, 93, 74, 118, 126, 105, 117, + 110, 126, 94, 88, 125, 73, 81, 81, 91, 96, + 110, 105, 97, 114, 93, 98, 102, 116, 8, 20, + 47, 28, 10, 4, 20, 13, 8, 4, 23, 26, 48, 32, + 25, 16, 41, 23, 28, 10, 47, 11, 39, 26, 21, + 14, 30, 14, 7, 7, 9, 67, 60, 36, 11, 2, 24, 7, + 69, 3, 21, 43, 40, 23, 10, 38, 17, 16, 7, 6, + 62, 2, 11, 18, 17, 6, 15, 20, 24, 22, 29, 36, + 50, 64, 6, 69, 67, 62, 77, 108, 71, 8, 8, 71, + 71, 70, 76, 3, 79, 102, 70, 68, 26, 71, 77, + 70, 68, 0, 76, 74, 71, 96, 89, 75, 76, 70, 31, + 106, 79, 29, 88, 71, 69, 71, 7, 69, 75, 5, 14, + 71, 71, 117, 25, 24, 24, 3, 81, 83, 85, 97, + 85, 98, 105, 97, 117, 110, 107, 126, 122, 126, + 126, 115, 103, 99, 103, 118, 107, 98, 99, 99, + 79, 92, 97, 94, 97, 117, 102, 97, 124, 109, + 123, 121, 126, 126, 126, 125, 85, 83, 126, 95, + 97, 102, 122, 107, 108, 108, 97, 102, 87, 75, + 116, 122, 126, 96, 107, 112, 69, 13, 18, 21, + 42, 28, 33, 8, 39, 33, 44, 34, 46, 38, 28, 38, + 80, 89, 98, 106, 105, 126, 126, 124, 19, 47, + 43, 47, 37, 40, 24, 23, 13, 14, 69, 13, 18, + 21, 42, 28, 33, 8, 39, 33, 44, 34, 46, 38, 28, + 38, 80, 89, 98, 106, 105, 126, 126, 124 }, + + { + + 17, + 3, 83, 17, 3, 83, 22, 30, 29, 13, 72, 104, 76, + 21, 43, 11, 47, 95, 72, 35, 72, 75, 43, 99, + 101, 71, 88, 126, 126, 126, 34, 14, 67, 72, + 35, 72, 74, 18, 38, 67, 8, 6, 12, 115, 119, + 96, 114, 66, 69, 66, 78, 93, 80, 97, 64, 66, + 68, 3, 35, 0, 22, 0, 0, 0, 8, 98, 97, 13, 80, + 71, 8, 3, 65, 47, 25, 11, 56, 54, 11, 25, 31, + 30, 16, 13, 50, 113, 91, 90, 93, 27, 73, 23, + 50, 0, 70, 6, 28, 65, 72, 72, 79, 26, 77, 17, + 5, 73, 12, 66, 1, 9, 16, 23, 22, 8, 81, 69, 5, + 64, 64, 70, 32, 65, 1, 34, 29, 40, 32, 29, + 100, 13, 14, 79, 67, 74, 70, 37, 4, 8, 45, 62, + 62, 62, 58, 102, 115, 14, 44, 43, 126, 74, 16, + 101, 28, 71, 5, 44, 33, 53, 50, 54, 115, 10, + 89, 116, 119, 4, 12, 21, 8, 1, 8, 8, 0, 14, 6, + 82, 75, 4, 64, 64, 78, 68, 77, 84, 69, 68, 64, + 76, 76, 116, 78, 81, 69, 94, 87, 89, 76, 82, + 83, 82, 90, 115, 95, 75, 119, 126, 106, 119, + 111, 126, 95, 89, 126, 74, 81, 81, 92, 97, + 111, 106, 98, 115, 93, 98, 102, 117, 10, 22, + 49, 29, 10, 4, 21, 14, 9, 5, 25, 28, 49, 33, + 26, 17, 43, 24, 30, 11, 50, 12, 40, 27, 22, + 15, 31, 15, 8, 8, 11, 66, 62, 37, 12, 2, 25, + 8, 68, 4, 22, 44, 41, 24, 11, 39, 18, 17, 8, + 7, 62, 4, 13, 20, 19, 8, 17, 22, 26, 24, 31, + 38, 53, 0, 7, 68, 66, 62, 76, 109, 70, 9, 9, + 70, 71, 69, 76, 4, 79, 102, 70, 68, 27, 71, + 77, 70, 68, 1, 76, 74, 71, 96, 89, 75, 76, 69, + 33, 106, 79, 30, 88, 71, 69, 71, 8, 69, 75, 6, + 15, 70, 70, 118, 24, 23, 23, 2, 83, 85, 87, + 100, 87, 100, 108, 99, 120, 112, 109, 126, + 125, 126, 126, 117, 105, 100, 105, 120, 108, + 99, 100, 99, 79, 94, 99, 96, 99, 119, 103, 98, + 126, 110, 125, 122, 126, 126, 126, 126, 86, + 84, 126, 96, 98, 103, 123, 108, 109, 109, 97, + 102, 87, 74, 117, 123, 126, 97, 108, 113, 68, + 14, 19, 22, 44, 29, 34, 9, 41, 34, 45, 35, 47, + 39, 29, 38, 81, 90, 100, 108, 106, 126, 126, + 125, 20, 48, 44, 48, 38, 41, 25, 24, 14, 15, + 68, 14, 19, 22, 44, 29, 34, 9, 41, 34, 45, 35, + 47, 39, 29, 38, 81, 90, 100, 108, 106, 126, + 126, 125 }, + + { + + 16, + 3, 83, 16, 3, 83, 24, 31, 29, 13, 72, 106, 78, + 20, 44, 11, 49, 97, 72, 36, 72, 75, 44, 100, + 102, 72, 90, 126, 126, 126, 36, 15, 67, 72, + 36, 72, 74, 19, 38, 67, 9, 7, 13, 116, 120, + 96, 114, 66, 69, 65, 78, 93, 80, 97, 64, 66, + 68, 4, 35, 0, 22, 0, 0, 0, 9, 98, 97, 13, 81, + 71, 8, 4, 64, 49, 26, 12, 58, 56, 12, 26, 33, + 31, 17, 14, 52, 114, 91, 90, 93, 27, 73, 24, + 52, 0, 69, 7, 30, 65, 72, 72, 79, 26, 77, 17, + 6, 73, 13, 66, 1, 9, 16, 24, 23, 8, 81, 69, 5, + 64, 64, 70, 32, 65, 1, 34, 29, 40, 32, 29, + 101, 14, 15, 80, 67, 74, 70, 38, 4, 8, 46, 62, + 62, 62, 60, 103, 116, 14, 44, 43, 126, 74, 16, + 102, 29, 71, 5, 45, 33, 53, 50, 55, 116, 11, + 90, 119, 122, 5, 12, 21, 8, 0, 7, 8, 0, 14, 5, + 83, 76, 4, 64, 64, 78, 69, 78, 85, 69, 68, 64, + 77, 77, 117, 79, 82, 69, 95, 88, 91, 78, 84, + 85, 84, 92, 118, 97, 76, 121, 126, 108, 121, + 113, 126, 96, 90, 126, 75, 82, 82, 93, 99, + 113, 108, 100, 117, 93, 99, 103, 119, 11, 23, + 50, 30, 10, 4, 22, 15, 10, 6, 27, 29, 50, 34, + 27, 18, 44, 25, 31, 12, 52, 13, 40, 27, 22, + 15, 32, 16, 9, 9, 12, 66, 62, 38, 12, 2, 26, + 9, 68, 5, 23, 44, 42, 24, 11, 40, 19, 18, 9, + 8, 62, 5, 15, 22, 21, 9, 18, 24, 28, 25, 33, + 40, 55, 1, 8, 67, 65, 62, 76, 110, 70, 10, 10, + 70, 71, 69, 76, 4, 79, 103, 70, 68, 28, 71, + 77, 70, 68, 1, 76, 74, 71, 97, 90, 75, 76, 69, + 34, 107, 79, 31, 89, 71, 69, 71, 8, 69, 75, 6, + 16, 70, 70, 120, 23, 22, 22, 0, 85, 88, 90, + 103, 89, 103, 111, 102, 123, 115, 111, 126, + 126, 126, 126, 120, 107, 102, 107, 122, 110, + 100, 101, 100, 79, 96, 101, 98, 101, 121, 105, + 100, 126, 112, 126, 124, 126, 126, 126, 126, + 87, 85, 126, 98, 100, 105, 125, 109, 110, 110, + 98, 103, 87, 73, 118, 124, 126, 98, 109, 114, + 68, 14, 20, 23, 45, 30, 35, 9, 42, 35, 46, 35, + 48, 40, 30, 37, 83, 92, 102, 110, 108, 126, + 126, 126, 21, 48, 44, 49, 39, 42, 25, 25, 14, + 16, 68, 14, 20, 23, 45, 30, 35, 9, 42, 35, 46, + 35, 48, 40, 30, 37, 83, 92, 102, 110, 108, + 126, 126, 126 }, + + { + + 15, + 3, 83, 15, 3, 83, 26, 33, 30, 13, 73, 108, 79, + 19, 44, 11, 51, 98, 72, 38, 72, 76, 45, 101, + 103, 73, 92, 126, 126, 126, 38, 17, 67, 72, + 38, 72, 73, 20, 39, 67, 10, 8, 14, 117, 121, + 96, 115, 66, 69, 64, 78, 94, 80, 97, 64, 66, + 68, 5, 36, 0, 22, 0, 0, 0, 9, 98, 97, 14, 82, + 71, 7, 4, 0, 51, 27, 13, 59, 57, 13, 27, 35, + 32, 18, 15, 54, 115, 91, 90, 93, 28, 73, 25, + 54, 0, 68, 8, 32, 65, 72, 72, 79, 27, 77, 18, + 7, 73, 14, 66, 1, 9, 17, 25, 23, 8, 81, 69, 5, + 64, 64, 70, 32, 65, 1, 35, 29, 41, 32, 30, + 101, 14, 16, 80, 67, 74, 70, 39, 4, 9, 47, 62, + 62, 62, 62, 103, 117, 15, 44, 43, 126, 74, 17, + 102, 29, 71, 5, 46, 33, 53, 50, 56, 117, 13, + 91, 122, 125, 7, 12, 21, 7, 64, 7, 7, 64, 14, + 5, 84, 77, 4, 64, 64, 79, 69, 79, 86, 70, 68, + 64, 78, 77, 118, 79, 83, 69, 96, 88, 93, 80, + 86, 86, 86, 94, 121, 100, 77, 123, 126, 109, + 123, 115, 126, 97, 91, 126, 76, 83, 83, 94, + 100, 115, 110, 102, 118, 93, 100, 104, 120, + 13, 25, 52, 30, 10, 4, 23, 16, 11, 7, 29, 30, + 51, 35, 28, 18, 45, 26, 33, 13, 54, 13, 41, + 28, 23, 16, 33, 16, 9, 10, 13, 66, 62, 39, 12, + 2, 27, 9, 68, 6, 24, 45, 43, 25, 11, 41, 19, + 19, 9, 8, 62, 7, 16, 23, 23, 10, 20, 25, 29, + 27, 35, 42, 57, 1, 9, 67, 64, 62, 76, 111, 70, + 11, 11, 70, 71, 69, 76, 5, 79, 104, 70, 68, + 28, 71, 77, 70, 68, 1, 77, 74, 71, 98, 91, 75, + 76, 69, 35, 108, 79, 32, 90, 71, 69, 71, 9, + 69, 76, 7, 17, 70, 70, 121, 22, 21, 21, 65, + 87, 90, 92, 106, 92, 106, 114, 105, 126, 118, + 113, 126, 126, 126, 126, 123, 109, 104, 109, + 125, 112, 101, 102, 101, 79, 98, 103, 100, + 103, 123, 107, 101, 126, 114, 126, 126, 126, + 126, 126, 126, 88, 86, 126, 99, 102, 106, 126, + 111, 112, 111, 99, 104, 87, 72, 119, 126, 126, + 99, 110, 115, 68, 15, 20, 23, 46, 31, 36, 9, + 43, 36, 47, 36, 49, 40, 31, 37, 85, 94, 104, + 112, 110, 126, 126, 126, 21, 49, 45, 50, 39, + 43, 26, 25, 15, 16, 68, 15, 20, 23, 46, 31, + 36, 9, 43, 36, 47, 36, 49, 40, 31, 37, 85, 94, + 104, 112, 110, 126, 126, 126 }, + + }, + + { + + { + + 62, + 9, 74, 62, 9, 74, 126, 104, 10, 9, 12, 30, 61, + 62, 54, 14, 118, 6, 78, 65, 1, 14, 73, 13, 64, + 20, 62, 67, 90, 104, 126, 104, 67, 78, 65, 1, + 86, 95, 2, 18, 69, 81, 96, 8, 67, 86, 88, 5, 76, + 94, 9, 69, 81, 88, 67, 74, 74, 80, 72, 5, 22, 0, + 0, 0, 83, 86, 97, 72, 22, 1, 52, 8, 69, 126, + 102, 82, 74, 107, 126, 126, 126, 95, 126, 114, + 126, 123, 115, 122, 115, 0, 68, 84, 104, 70, 93, + 90, 126, 74, 97, 91, 126, 7, 82, 76, 125, 93, + 87, 77, 71, 0, 68, 84, 1, 65, 2, 7, 66, 64, 2, + 78, 13, 11, 28, 19, 25, 18, 17, 19, 46, 12, 13, + 44, 30, 1, 108, 100, 101, 91, 94, 88, 84, 86, + 83, 87, 94, 70, 72, 74, 4, 102, 100, 95, 75, 72, + 75, 71, 17, 69, 1, 65, 26, 72, 6, 9, 1, 72, 62, + 54, 38, 45, 54, 44, 26, 45, 34, 30, 33, 18, 5, + 1, 2, 25, 18, 24, 21, 19, 18, 22, 14, 29, 21, 8, + 12, 17, 89, 62, 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 46, 62, 60, 41, 62, 62, 62, + 62, 60, 58, 62, 47, 41, 15, 26, 3, 68, 97, 71, + 21, 13, 9, 1, 5, 0, 72, 74, 91, 67, 36, 24, 19, + 17, 64, 68, 78, 77, 86, 92, 8, 3, 1, 65, 73, 76, + 80, 88, 110, 97, 84, 79, 73, 74, 86, 96, 97, + 117, 78, 30, 15, 10, 1, 71, 79, 86, 90, 97, 62, + 93, 84, 79, 66, 71, 1, 3, 4, 75, 1, 5, 66, 79, + 71, 68, 19, 1, 27, 23, 36, 34, 19, 27, 31, 21, + 15, 1, 17, 64, 104, 97, 96, 88, 85, 85, 85, 88, + 66, 77, 76, 76, 5, 76, 83, 99, 95, 95, 76, 74, + 70, 75, 68, 65, 73, 1, 1, 68, 75, 8, 64, 70, 57, + 44, 47, 49, 50, 52, 48, 47, 40, 40, 43, 37, 19, + 23, 16, 46, 42, 41, 36, 34, 28, 13, 6, 0, 77, + 82, 94, 69, 109, 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 61, 50, 28, 5, 62, 62, 33, 62, 62, + 62, 60, 62, 58, 52, 58, 51, 52, 34, 37, 24, 66, + 42, 32, 13, 120, 112, 114, 85, 92, 89, 71, 81, + 80, 68, 70, 7, 68, 13, 74, 62, 62, 62, 62, 60, + 57, 29, 9, 82, 75, 40, 29, 20, 9, 8, 2, 64, 68, + 92, 106, 97, 90, 90, 88, 73, 79, 86, 73, 70, 69, + 66, 64, 5, 4, 62, 62, 62, 62, 60, 54, 43, 27, 67 }, + + { + + 62, + 9, 74, 62, 9, 74, 125, 102, 11, 10, 12, 29, + 60, 62, 54, 14, 115, 6, 77, 64, 1, 14, 72, 12, + 65, 20, 62, 68, 91, 104, 124, 102, 67, 77, 64, + 1, 85, 93, 3, 18, 68, 80, 95, 8, 67, 85, 88, + 5, 75, 93, 9, 69, 80, 88, 66, 73, 73, 79, 71, + 5, 22, 0, 0, 0, 82, 86, 97, 71, 22, 1, 52, 8, + 69, 125, 101, 82, 73, 105, 125, 125, 125, 93, + 125, 112, 125, 121, 114, 121, 114, 1, 67, 83, + 103, 69, 92, 89, 125, 73, 96, 90, 125, 8, 81, + 75, 123, 92, 86, 76, 70, 1, 67, 83, 2, 64, 2, + 7, 65, 64, 2, 77, 13, 11, 28, 19, 25, 18, 17, + 19, 45, 12, 13, 43, 29, 1, 107, 99, 100, 90, + 93, 87, 83, 85, 82, 86, 92, 70, 72, 73, 3, + 101, 99, 95, 74, 72, 74, 70, 17, 68, 1, 65, + 25, 71, 6, 8, 1, 72, 62, 54, 38, 45, 54, 44, + 26, 45, 34, 29, 33, 18, 5, 1, 2, 25, 18, 24, + 21, 19, 17, 22, 14, 28, 20, 8, 11, 16, 89, 62, + 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, + 62, 60, 44, 62, 59, 40, 62, 62, 62, 62, 58, + 56, 61, 45, 39, 15, 25, 2, 68, 97, 70, 22, 14, + 10, 2, 5, 0, 71, 73, 90, 66, 37, 25, 20, 17, + 0, 67, 77, 76, 85, 91, 9, 4, 2, 64, 72, 75, + 79, 87, 108, 96, 82, 78, 72, 73, 85, 95, 96, + 115, 77, 31, 16, 11, 2, 70, 78, 85, 89, 96, + 62, 92, 83, 78, 66, 70, 1, 4, 5, 74, 2, 6, 65, + 78, 71, 68, 19, 2, 27, 23, 35, 34, 19, 26, 30, + 21, 15, 1, 16, 64, 103, 96, 95, 87, 84, 84, + 84, 87, 66, 76, 75, 75, 5, 75, 82, 98, 94, 95, + 76, 73, 70, 74, 68, 65, 72, 1, 1, 67, 74, 8, + 64, 70, 57, 44, 47, 49, 49, 52, 48, 47, 40, + 40, 43, 37, 19, 22, 15, 45, 41, 40, 35, 33, + 27, 13, 6, 0, 76, 81, 93, 69, 108, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 61, 59, 48, 27, 5, + 62, 62, 32, 62, 62, 62, 58, 62, 56, 50, 56, + 49, 50, 33, 35, 23, 67, 41, 31, 12, 118, 110, + 112, 84, 91, 88, 69, 80, 79, 68, 69, 9, 66, + 15, 73, 62, 62, 62, 62, 58, 55, 27, 7, 83, 74, + 41, 29, 20, 9, 9, 2, 64, 68, 91, 105, 96, 89, + 89, 86, 72, 78, 85, 72, 69, 68, 65, 0, 6, 4, + 62, 62, 62, 62, 59, 53, 41, 26, 67 }, + + { + + 62, + 9, 74, 62, 9, 74, 123, 101, 11, 10, 12, 28, + 59, 61, 54, 14, 113, 6, 76, 0, 1, 13, 72, 11, + 66, 19, 60, 70, 92, 105, 121, 101, 67, 76, 0, + 1, 85, 92, 3, 17, 68, 80, 94, 8, 67, 85, 88, + 5, 75, 92, 9, 69, 80, 88, 66, 73, 73, 79, 71, + 5, 22, 0, 0, 0, 81, 86, 97, 71, 21, 1, 52, 8, + 69, 124, 100, 82, 73, 104, 123, 123, 124, 92, + 123, 111, 123, 120, 113, 120, 113, 2, 67, 82, + 102, 69, 92, 88, 123, 73, 96, 90, 124, 8, 81, + 75, 122, 92, 85, 76, 70, 1, 67, 82, 2, 64, 1, + 7, 65, 64, 2, 77, 13, 11, 27, 19, 24, 18, 17, + 19, 43, 12, 13, 41, 28, 0, 106, 98, 99, 89, + 92, 86, 82, 84, 82, 85, 91, 70, 72, 73, 2, + 101, 98, 95, 74, 72, 73, 70, 16, 67, 1, 65, + 24, 70, 5, 7, 1, 73, 60, 53, 37, 44, 53, 43, + 25, 44, 34, 28, 32, 18, 5, 1, 2, 24, 17, 23, + 20, 18, 16, 21, 13, 26, 19, 7, 10, 15, 89, 62, + 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, + 62, 58, 41, 62, 57, 38, 62, 62, 62, 62, 56, + 54, 58, 43, 37, 14, 23, 1, 69, 97, 70, 22, 14, + 10, 2, 5, 0, 71, 73, 89, 66, 37, 25, 20, 17, + 1, 67, 76, 76, 84, 90, 10, 5, 2, 64, 71, 75, + 79, 86, 107, 95, 81, 77, 72, 73, 84, 94, 95, + 114, 77, 31, 16, 11, 2, 69, 77, 84, 88, 95, + 62, 92, 83, 78, 66, 70, 1, 4, 5, 74, 2, 6, 64, + 78, 71, 68, 18, 2, 26, 22, 34, 33, 19, 25, 29, + 21, 15, 0, 15, 65, 102, 95, 94, 87, 84, 84, + 83, 86, 66, 76, 75, 75, 4, 75, 82, 98, 93, 95, + 76, 73, 70, 73, 68, 65, 71, 1, 1, 67, 73, 7, + 64, 71, 56, 44, 47, 48, 48, 51, 47, 46, 39, + 39, 42, 36, 18, 21, 14, 43, 40, 38, 33, 32, + 26, 12, 5, 0, 76, 81, 93, 70, 107, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 59, 57, 46, 26, 4, + 62, 60, 31, 62, 62, 62, 56, 60, 54, 48, 54, + 47, 48, 31, 33, 21, 68, 39, 29, 10, 117, 109, + 111, 83, 90, 87, 67, 79, 78, 68, 68, 10, 65, + 16, 72, 62, 62, 62, 62, 55, 52, 24, 5, 84, 74, + 41, 29, 20, 9, 9, 2, 64, 68, 90, 104, 95, 88, + 88, 85, 71, 77, 84, 71, 68, 67, 65, 1, 6, 4, + 62, 62, 62, 61, 57, 51, 39, 24, 68 }, + + { + + 62, + 9, 74, 62, 9, 74, 121, 99, 12, 10, 11, 26, 57, + 60, 54, 14, 111, 6, 75, 1, 1, 12, 72, 10, 67, + 19, 58, 71, 93, 105, 118, 100, 67, 75, 1, 1, + 84, 91, 4, 17, 68, 79, 93, 7, 68, 85, 88, 5, + 75, 92, 9, 69, 80, 88, 65, 73, 73, 79, 70, 5, + 22, 0, 0, 0, 81, 86, 97, 70, 20, 1, 52, 8, 69, + 123, 99, 82, 72, 103, 121, 121, 122, 91, 121, + 110, 121, 119, 112, 119, 112, 3, 67, 81, 101, + 69, 91, 88, 121, 73, 95, 89, 123, 8, 81, 74, + 120, 91, 84, 76, 70, 1, 67, 81, 3, 0, 1, 7, + 65, 64, 2, 77, 13, 10, 27, 19, 23, 18, 17, 19, + 41, 12, 12, 39, 27, 64, 105, 97, 98, 88, 91, + 86, 81, 84, 81, 84, 90, 70, 72, 73, 1, 100, + 97, 95, 74, 72, 72, 70, 15, 66, 1, 65, 23, 69, + 5, 6, 1, 74, 59, 52, 37, 43, 52, 42, 25, 43, + 33, 27, 31, 18, 5, 1, 1, 23, 16, 22, 19, 17, + 15, 20, 13, 24, 18, 7, 9, 14, 89, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 55, + 39, 62, 55, 37, 62, 61, 62, 59, 54, 51, 56, + 41, 34, 13, 21, 0, 70, 97, 70, 23, 14, 10, 2, + 5, 0, 71, 73, 89, 66, 37, 25, 20, 17, 2, 66, + 76, 75, 84, 89, 11, 5, 3, 64, 70, 74, 78, 86, + 106, 94, 80, 76, 71, 73, 83, 93, 94, 113, 76, + 31, 16, 11, 2, 68, 77, 83, 87, 94, 62, 91, 82, + 77, 66, 70, 1, 4, 5, 74, 2, 6, 64, 78, 71, 68, + 18, 3, 25, 21, 33, 32, 19, 24, 28, 21, 15, 0, + 14, 65, 101, 94, 93, 86, 83, 83, 83, 85, 66, + 76, 75, 74, 4, 75, 82, 97, 92, 95, 76, 73, 70, + 72, 68, 65, 70, 1, 1, 67, 72, 6, 64, 72, 55, + 43, 46, 47, 47, 50, 46, 45, 38, 38, 41, 35, + 17, 20, 13, 42, 39, 37, 31, 30, 25, 11, 5, 64, + 76, 81, 93, 70, 106, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 57, 54, 44, 24, 3, 61, 59, 29, + 62, 62, 60, 54, 58, 52, 46, 52, 45, 45, 29, + 31, 19, 69, 37, 27, 9, 116, 108, 110, 82, 89, + 86, 66, 78, 77, 68, 67, 12, 0, 18, 71, 62, 62, + 62, 62, 52, 49, 21, 3, 85, 74, 41, 29, 20, 9, + 9, 2, 64, 68, 90, 103, 94, 87, 87, 84, 71, 77, + 83, 71, 68, 67, 65, 1, 6, 4, 62, 62, 62, 59, + 55, 49, 37, 22, 69 }, + + { + + 62, + 9, 74, 62, 9, 74, 120, 98, 12, 10, 11, 25, 56, + 58, 54, 14, 108, 5, 74, 1, 1, 11, 72, 9, 68, + 18, 56, 73, 94, 106, 115, 99, 67, 74, 1, 1, + 84, 90, 4, 16, 68, 79, 93, 7, 68, 84, 88, 5, + 75, 91, 8, 70, 80, 88, 65, 72, 73, 78, 70, 5, + 22, 0, 0, 0, 80, 87, 97, 70, 19, 1, 52, 8, 69, + 122, 98, 82, 72, 101, 120, 119, 121, 90, 120, + 108, 119, 118, 112, 118, 112, 3, 67, 80, 100, + 69, 91, 87, 119, 73, 95, 89, 122, 8, 80, 74, + 119, 91, 84, 76, 69, 1, 67, 81, 3, 0, 0, 6, + 65, 64, 2, 77, 13, 10, 26, 19, 23, 18, 17, 18, + 39, 12, 12, 37, 26, 65, 104, 96, 97, 87, 91, + 85, 80, 83, 81, 83, 89, 70, 72, 72, 0, 100, + 96, 95, 74, 72, 72, 70, 14, 65, 1, 65, 21, 68, + 4, 5, 1, 75, 57, 51, 36, 42, 51, 41, 24, 42, + 33, 25, 30, 17, 5, 1, 1, 22, 16, 21, 19, 16, + 14, 19, 12, 22, 17, 6, 8, 13, 89, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, 62, 59, 53, + 36, 62, 54, 35, 62, 59, 62, 57, 51, 49, 53, + 39, 32, 12, 20, 65, 71, 97, 70, 23, 15, 10, 2, + 5, 0, 71, 73, 88, 65, 38, 25, 20, 17, 3, 66, + 75, 75, 83, 89, 12, 6, 3, 64, 70, 74, 78, 85, + 105, 94, 79, 76, 71, 73, 82, 92, 94, 112, 76, + 32, 16, 11, 2, 67, 76, 83, 86, 93, 62, 91, 82, + 77, 66, 70, 1, 4, 5, 73, 2, 6, 0, 78, 71, 68, + 17, 3, 24, 20, 32, 31, 19, 22, 27, 20, 15, 64, + 13, 66, 101, 94, 92, 86, 83, 83, 82, 84, 67, + 76, 75, 74, 3, 75, 82, 97, 91, 95, 76, 72, 70, + 72, 68, 65, 69, 1, 0, 67, 71, 6, 65, 73, 54, + 43, 46, 46, 46, 49, 45, 44, 37, 37, 40, 34, + 16, 19, 12, 40, 37, 35, 29, 29, 24, 10, 4, 64, + 76, 81, 93, 71, 106, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 60, 55, 52, 42, 23, 2, 59, 57, 28, + 62, 62, 58, 52, 55, 50, 44, 50, 43, 43, 27, + 29, 17, 70, 35, 25, 7, 115, 107, 109, 82, 88, + 85, 64, 77, 76, 68, 66, 13, 1, 19, 71, 62, 62, + 62, 62, 49, 46, 18, 1, 86, 74, 41, 29, 20, 9, + 9, 2, 64, 68, 89, 102, 93, 86, 87, 83, 70, 76, + 82, 70, 67, 66, 64, 2, 7, 4, 62, 62, 62, 57, + 53, 47, 35, 20, 70 }, + + { + + 62, + 9, 74, 62, 9, 74, 118, 96, 12, 10, 10, 23, 54, + 57, 54, 14, 106, 5, 73, 2, 1, 11, 71, 8, 69, + 18, 54, 75, 95, 106, 112, 97, 67, 73, 2, 1, + 84, 89, 4, 16, 68, 79, 92, 7, 69, 84, 88, 5, + 75, 90, 8, 70, 80, 88, 64, 72, 72, 78, 69, 5, + 22, 0, 0, 0, 80, 87, 97, 69, 18, 1, 52, 8, 69, + 121, 97, 82, 71, 100, 118, 117, 119, 89, 118, + 107, 117, 117, 111, 117, 111, 4, 67, 79, 99, + 69, 90, 86, 117, 73, 95, 88, 120, 9, 80, 73, + 118, 90, 83, 76, 69, 2, 66, 80, 4, 1, 0, 6, + 65, 64, 2, 77, 13, 9, 25, 19, 22, 18, 17, 18, + 37, 12, 11, 36, 25, 66, 103, 95, 96, 86, 90, + 84, 79, 82, 80, 82, 88, 70, 72, 72, 64, 99, + 95, 95, 73, 72, 71, 70, 13, 64, 1, 65, 20, 67, + 4, 4, 1, 75, 56, 50, 36, 41, 50, 40, 23, 42, + 33, 24, 29, 17, 5, 1, 0, 22, 15, 20, 18, 15, + 13, 19, 11, 20, 16, 5, 7, 12, 89, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, 62, 57, 51, + 34, 60, 52, 33, 62, 57, 60, 55, 49, 47, 50, + 37, 29, 11, 18, 66, 71, 97, 70, 23, 15, 10, 2, + 5, 0, 71, 73, 88, 65, 38, 25, 20, 17, 4, 65, + 74, 75, 82, 88, 13, 7, 3, 0, 69, 73, 77, 85, + 104, 93, 77, 75, 71, 72, 81, 91, 93, 111, 75, + 32, 17, 11, 2, 66, 75, 82, 85, 92, 62, 91, 82, + 76, 66, 70, 1, 4, 5, 73, 2, 7, 0, 78, 71, 68, + 16, 4, 23, 19, 31, 31, 19, 21, 26, 20, 15, 65, + 12, 66, 100, 93, 91, 85, 82, 82, 82, 83, 67, + 76, 75, 74, 2, 75, 82, 96, 90, 95, 76, 72, 70, + 71, 68, 65, 68, 1, 0, 67, 70, 5, 65, 73, 53, + 43, 45, 46, 45, 48, 44, 43, 37, 36, 39, 33, + 15, 18, 11, 39, 36, 34, 27, 28, 23, 9, 3, 65, + 76, 80, 93, 71, 105, 62, 62, 62, 62, 62, 62, + 62, 62, 60, 58, 53, 50, 40, 21, 1, 57, 55, 27, + 61, 62, 56, 50, 53, 48, 42, 48, 41, 40, 25, + 27, 15, 71, 33, 23, 6, 114, 105, 108, 81, 87, + 84, 1, 76, 75, 68, 65, 15, 3, 21, 70, 62, 62, + 62, 62, 47, 43, 16, 64, 87, 74, 41, 29, 20, 9, + 9, 2, 64, 68, 89, 101, 92, 85, 86, 82, 69, 76, + 81, 69, 66, 65, 64, 2, 7, 4, 62, 62, 62, 56, + 51, 45, 33, 18, 71 }, + + { + + 62, + 9, 75, 62, 9, 75, 116, 95, 13, 10, 10, 22, 53, + 56, 54, 14, 104, 5, 73, 3, 1, 10, 71, 7, 70, + 17, 53, 76, 96, 107, 109, 96, 67, 73, 3, 1, + 83, 88, 5, 15, 67, 78, 91, 6, 69, 84, 88, 5, + 74, 90, 8, 70, 79, 88, 64, 72, 72, 78, 69, 5, + 22, 0, 0, 0, 79, 87, 97, 69, 18, 0, 52, 8, 69, + 120, 97, 82, 71, 99, 116, 115, 118, 88, 116, + 106, 115, 116, 110, 116, 110, 5, 67, 78, 99, + 68, 90, 86, 115, 73, 94, 88, 119, 9, 80, 73, + 116, 90, 82, 75, 69, 2, 66, 79, 4, 1, 64, 6, + 65, 64, 2, 77, 13, 9, 25, 19, 21, 18, 17, 18, + 35, 12, 11, 34, 24, 67, 103, 94, 96, 86, 89, + 84, 78, 82, 80, 82, 86, 70, 72, 72, 65, 99, + 94, 95, 73, 72, 70, 69, 12, 64, 1, 65, 19, 66, + 3, 3, 1, 76, 54, 49, 35, 41, 49, 40, 23, 41, + 32, 23, 28, 17, 5, 1, 0, 21, 14, 19, 17, 15, + 12, 18, 11, 18, 15, 5, 6, 11, 89, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, 62, 54, 48, + 31, 58, 50, 32, 62, 54, 57, 52, 47, 44, 48, + 34, 27, 10, 16, 67, 72, 97, 69, 24, 15, 11, 2, + 5, 0, 71, 73, 87, 65, 38, 26, 20, 17, 5, 65, + 74, 74, 82, 87, 14, 7, 4, 0, 68, 73, 77, 84, + 103, 92, 76, 74, 70, 72, 81, 91, 92, 109, 75, + 32, 17, 11, 3, 66, 75, 81, 85, 91, 62, 90, 81, + 76, 66, 70, 1, 4, 5, 73, 3, 7, 1, 78, 71, 69, + 16, 4, 22, 18, 30, 30, 19, 20, 25, 20, 15, 65, + 11, 67, 99, 92, 90, 85, 82, 82, 81, 83, 67, + 75, 74, 73, 2, 75, 82, 96, 89, 95, 76, 72, 70, + 70, 68, 65, 67, 0, 0, 67, 70, 4, 65, 74, 52, + 42, 45, 45, 44, 48, 44, 42, 36, 36, 38, 32, + 14, 17, 10, 37, 35, 32, 25, 26, 21, 8, 3, 65, + 76, 80, 92, 72, 104, 62, 62, 62, 62, 62, 62, + 62, 62, 58, 55, 51, 47, 38, 20, 1, 56, 54, 25, + 59, 62, 54, 48, 51, 46, 40, 45, 39, 38, 23, + 25, 14, 73, 31, 21, 4, 113, 104, 107, 80, 86, + 83, 2, 75, 74, 68, 64, 16, 4, 22, 69, 62, 62, + 62, 59, 44, 41, 13, 66, 89, 73, 41, 29, 20, 9, + 9, 2, 64, 68, 88, 100, 92, 84, 85, 81, 69, 75, + 80, 69, 66, 65, 64, 3, 7, 4, 62, 62, 61, 54, + 50, 44, 30, 17, 72 }, + + { + + 62, + 9, 75, 62, 9, 75, 114, 93, 13, 10, 9, 20, 51, + 54, 54, 14, 101, 4, 72, 3, 1, 9, 71, 6, 71, + 17, 51, 78, 97, 107, 106, 95, 67, 72, 3, 1, + 83, 87, 5, 15, 67, 78, 91, 6, 70, 83, 88, 5, + 74, 89, 7, 70, 79, 88, 0, 71, 72, 77, 68, 5, + 22, 0, 0, 0, 79, 87, 97, 68, 17, 0, 52, 8, 69, + 119, 96, 82, 70, 97, 115, 113, 116, 87, 115, + 104, 113, 115, 109, 115, 110, 6, 67, 77, 98, + 68, 89, 85, 113, 73, 94, 87, 118, 9, 79, 72, + 115, 89, 82, 75, 68, 2, 66, 78, 5, 2, 64, 5, + 65, 64, 2, 77, 13, 8, 24, 19, 21, 18, 17, 17, + 33, 12, 10, 32, 23, 68, 102, 93, 95, 85, 88, + 83, 77, 81, 79, 81, 85, 70, 72, 71, 66, 98, + 93, 95, 73, 72, 70, 69, 11, 0, 1, 65, 17, 65, + 3, 2, 1, 77, 53, 48, 35, 40, 48, 39, 22, 40, + 32, 22, 27, 17, 5, 1, 64, 20, 14, 18, 17, 14, + 11, 17, 10, 16, 14, 4, 5, 10, 89, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 60, 61, 52, 46, + 29, 56, 49, 30, 62, 52, 55, 50, 44, 42, 45, + 32, 24, 9, 15, 69, 73, 97, 69, 24, 16, 11, 2, + 5, 0, 71, 73, 87, 64, 39, 26, 20, 17, 6, 64, + 73, 74, 81, 86, 15, 8, 4, 0, 67, 72, 76, 84, + 102, 92, 75, 74, 70, 72, 80, 90, 92, 108, 74, + 33, 17, 11, 3, 65, 74, 80, 84, 90, 62, 90, 81, + 75, 66, 70, 1, 4, 5, 72, 3, 7, 1, 78, 71, 69, + 15, 5, 21, 17, 29, 29, 19, 19, 24, 19, 15, 66, + 10, 67, 98, 92, 89, 84, 81, 81, 81, 82, 67, + 75, 74, 73, 1, 75, 82, 95, 88, 95, 76, 71, 70, + 70, 68, 65, 66, 0, 0, 67, 69, 4, 66, 75, 51, + 42, 44, 44, 43, 47, 43, 41, 35, 35, 37, 31, + 13, 16, 9, 36, 33, 31, 23, 25, 20, 7, 2, 66, + 76, 80, 92, 72, 103, 62, 62, 62, 62, 62, 62, + 62, 61, 56, 53, 49, 45, 36, 18, 0, 54, 52, 24, + 57, 62, 52, 46, 49, 44, 38, 43, 37, 35, 21, + 23, 12, 74, 29, 19, 3, 112, 103, 106, 80, 85, + 82, 4, 74, 73, 68, 0, 18, 6, 24, 69, 62, 62, + 61, 56, 41, 38, 10, 68, 90, 73, 41, 29, 20, 9, + 9, 2, 64, 68, 88, 99, 91, 83, 84, 80, 68, 75, + 79, 68, 65, 64, 0, 3, 8, 4, 62, 62, 59, 52, + 48, 42, 28, 15, 73 }, + + { + + 62, + 8, 75, 62, 8, 75, 113, 92, 13, 10, 9, 19, 50, + 53, 54, 14, 99, 4, 71, 4, 1, 8, 71, 5, 73, 16, + 49, 80, 98, 108, 104, 94, 67, 71, 4, 1, 83, + 86, 5, 14, 67, 78, 90, 5, 70, 83, 89, 5, 74, + 89, 7, 71, 79, 88, 0, 71, 72, 77, 68, 5, 22, + 0, 0, 0, 78, 88, 97, 68, 16, 0, 52, 8, 69, + 118, 95, 82, 70, 96, 113, 111, 115, 86, 113, + 103, 112, 114, 109, 114, 109, 6, 67, 76, 97, + 68, 89, 85, 112, 73, 94, 87, 117, 9, 79, 72, + 114, 89, 81, 75, 68, 2, 66, 78, 5, 2, 65, 5, + 65, 64, 2, 77, 13, 8, 23, 19, 20, 18, 17, 17, + 31, 12, 10, 30, 22, 69, 101, 92, 94, 84, 88, + 83, 76, 81, 79, 80, 84, 70, 72, 71, 68, 98, + 92, 95, 73, 73, 69, 69, 10, 1, 1, 65, 16, 64, + 2, 1, 1, 78, 51, 47, 34, 39, 47, 38, 21, 39, + 31, 20, 26, 16, 5, 1, 64, 19, 13, 17, 16, 13, + 10, 16, 9, 14, 12, 3, 4, 9, 89, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 61, 58, 58, 49, 43, + 26, 54, 47, 28, 61, 50, 52, 47, 42, 39, 42, + 30, 22, 8, 13, 70, 74, 98, 69, 24, 16, 11, 2, + 5, 0, 71, 73, 86, 64, 39, 26, 20, 17, 7, 64, + 73, 74, 81, 86, 16, 8, 4, 0, 67, 72, 76, 83, + 101, 91, 74, 73, 70, 72, 79, 89, 91, 107, 74, + 33, 17, 11, 3, 64, 74, 80, 83, 90, 62, 90, 81, + 75, 66, 70, 1, 4, 5, 72, 3, 7, 2, 78, 71, 69, + 14, 5, 20, 16, 28, 28, 19, 17, 22, 19, 15, 67, + 9, 68, 98, 91, 88, 84, 81, 81, 80, 81, 68, 75, + 74, 73, 0, 75, 82, 95, 88, 96, 76, 71, 70, 69, + 68, 65, 66, 0, 64, 67, 68, 3, 66, 76, 50, 41, + 44, 43, 41, 46, 42, 40, 34, 34, 36, 30, 12, + 15, 8, 34, 32, 29, 21, 23, 19, 6, 1, 66, 76, + 80, 92, 73, 103, 62, 62, 62, 62, 62, 62, 61, + 58, 54, 51, 47, 42, 34, 17, 64, 52, 50, 22, + 55, 61, 49, 43, 46, 41, 36, 41, 34, 33, 19, + 20, 10, 75, 27, 17, 1, 111, 102, 105, 79, 84, + 82, 5, 73, 73, 68, 0, 19, 7, 25, 68, 62, 62, + 58, 53, 38, 35, 7, 70, 91, 73, 41, 29, 20, 9, + 9, 2, 64, 68, 87, 99, 90, 82, 84, 79, 68, 74, + 79, 68, 65, 64, 0, 4, 8, 3, 62, 62, 57, 50, + 46, 40, 26, 13, 74 }, + + { + + 62, + 8, 75, 62, 8, 75, 111, 91, 14, 10, 9, 18, 49, + 52, 54, 14, 97, 4, 70, 5, 1, 8, 70, 4, 74, 15, + 47, 81, 99, 109, 101, 92, 67, 70, 5, 1, 82, + 85, 6, 13, 67, 77, 89, 5, 70, 83, 89, 5, 74, + 88, 7, 71, 79, 88, 0, 71, 71, 77, 68, 5, 22, + 0, 0, 0, 77, 88, 97, 68, 15, 0, 52, 8, 69, + 117, 94, 82, 70, 95, 111, 109, 113, 84, 111, + 102, 110, 113, 108, 113, 108, 7, 66, 75, 96, + 68, 88, 84, 110, 73, 93, 87, 115, 10, 79, 72, + 112, 89, 80, 75, 68, 3, 65, 77, 5, 2, 65, 5, + 64, 64, 2, 76, 13, 8, 23, 19, 19, 18, 17, 17, + 29, 12, 10, 29, 21, 69, 100, 91, 93, 83, 87, + 82, 75, 80, 79, 79, 83, 70, 72, 71, 69, 97, + 91, 95, 72, 73, 68, 69, 9, 2, 1, 65, 15, 0, 1, + 0, 1, 78, 50, 46, 34, 38, 46, 37, 21, 39, 31, + 19, 25, 16, 5, 1, 64, 19, 12, 16, 15, 12, 9, + 16, 9, 13, 11, 3, 3, 8, 89, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 59, 56, 56, 46, 41, 23, + 53, 45, 27, 59, 48, 50, 45, 40, 37, 40, 28, + 20, 8, 11, 71, 74, 98, 69, 25, 16, 11, 3, 5, + 0, 70, 73, 85, 64, 39, 26, 21, 17, 8, 0, 72, + 73, 80, 85, 17, 9, 5, 1, 66, 71, 76, 82, 100, + 90, 72, 72, 69, 71, 78, 88, 90, 106, 73, 33, + 18, 12, 3, 0, 73, 79, 82, 89, 62, 89, 80, 74, + 66, 70, 1, 5, 6, 72, 3, 8, 3, 78, 71, 69, 14, + 5, 19, 16, 27, 28, 19, 16, 21, 19, 15, 67, 8, + 69, 97, 90, 87, 84, 80, 81, 79, 80, 68, 75, + 74, 72, 0, 75, 82, 95, 87, 96, 76, 71, 70, 68, + 68, 65, 65, 0, 64, 67, 67, 2, 66, 76, 49, 41, + 44, 43, 40, 45, 41, 39, 34, 33, 35, 30, 12, + 14, 7, 33, 31, 27, 19, 22, 18, 6, 1, 66, 75, + 79, 92, 74, 102, 62, 62, 62, 62, 62, 62, 59, + 56, 52, 49, 45, 40, 32, 16, 65, 50, 49, 21, + 53, 59, 47, 41, 44, 39, 34, 39, 32, 31, 18, + 18, 8, 76, 25, 15, 64, 110, 100, 103, 78, 83, + 81, 7, 72, 72, 68, 1, 21, 8, 27, 67, 62, 62, + 56, 50, 36, 32, 5, 72, 92, 73, 41, 29, 20, 9, + 10, 2, 64, 68, 86, 98, 89, 81, 83, 77, 67, 73, + 78, 67, 64, 0, 0, 5, 8, 3, 62, 61, 56, 49, 44, + 38, 24, 11, 74 }, + + { + + 62, + 8, 75, 62, 8, 75, 109, 89, 14, 10, 8, 16, 47, + 50, 54, 14, 94, 3, 69, 5, 1, 7, 70, 3, 75, 15, + 45, 83, 100, 109, 98, 91, 67, 69, 5, 1, 82, + 84, 6, 13, 67, 77, 89, 5, 71, 82, 89, 5, 74, + 87, 6, 71, 79, 88, 1, 70, 71, 76, 67, 5, 22, + 0, 0, 0, 77, 88, 97, 67, 14, 0, 52, 8, 69, + 116, 93, 82, 69, 93, 110, 107, 112, 83, 110, + 100, 108, 112, 107, 112, 108, 8, 66, 74, 95, + 68, 88, 83, 108, 73, 93, 86, 114, 10, 78, 71, + 111, 88, 80, 75, 67, 3, 65, 76, 6, 3, 66, 4, + 64, 64, 2, 76, 13, 7, 22, 19, 19, 18, 17, 16, + 27, 12, 9, 27, 20, 70, 99, 90, 92, 82, 86, 81, + 74, 79, 78, 78, 82, 70, 72, 70, 70, 97, 90, + 95, 72, 73, 68, 69, 8, 3, 1, 65, 13, 1, 1, 64, + 1, 79, 48, 45, 33, 37, 45, 36, 20, 38, 31, 18, + 24, 16, 5, 1, 65, 18, 12, 15, 15, 11, 8, 15, + 8, 11, 10, 2, 2, 7, 89, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 57, 54, 53, 44, 39, 21, 51, + 44, 25, 56, 46, 48, 43, 37, 35, 37, 26, 17, 7, + 10, 73, 75, 98, 69, 25, 17, 11, 3, 5, 0, 70, + 73, 85, 0, 40, 26, 21, 17, 9, 0, 71, 73, 79, + 84, 18, 10, 5, 1, 65, 71, 75, 82, 99, 90, 71, + 72, 69, 71, 77, 87, 90, 105, 73, 34, 18, 12, + 3, 1, 72, 78, 81, 88, 62, 89, 80, 74, 66, 70, + 1, 5, 6, 71, 3, 8, 3, 78, 71, 69, 13, 6, 18, + 15, 26, 27, 19, 15, 20, 18, 15, 68, 7, 69, 96, + 90, 86, 83, 80, 80, 79, 79, 68, 75, 74, 72, + 64, 75, 82, 94, 86, 96, 76, 70, 70, 68, 68, + 65, 64, 0, 64, 67, 66, 2, 67, 77, 48, 41, 43, + 42, 39, 44, 40, 38, 33, 32, 34, 29, 11, 13, 6, + 31, 29, 26, 17, 21, 17, 5, 0, 67, 75, 79, 92, + 74, 101, 62, 62, 62, 62, 62, 60, 57, 53, 50, + 47, 43, 38, 30, 14, 66, 48, 47, 20, 51, 57, + 45, 39, 42, 37, 32, 37, 30, 28, 16, 16, 6, 77, + 23, 13, 65, 109, 99, 102, 78, 82, 80, 9, 71, + 71, 68, 2, 22, 10, 28, 67, 62, 60, 53, 47, 33, + 29, 2, 74, 93, 73, 41, 29, 20, 9, 10, 2, 64, + 68, 86, 97, 88, 80, 82, 76, 66, 73, 77, 66, 0, + 1, 1, 5, 9, 3, 60, 59, 54, 47, 42, 36, 22, 9, + 75 }, + + { + + 62, + 8, 76, 62, 8, 76, 107, 88, 15, 10, 8, 15, 46, + 49, 54, 14, 92, 3, 69, 6, 1, 6, 70, 2, 76, 14, + 44, 84, 101, 110, 95, 90, 67, 69, 6, 1, 81, + 83, 7, 12, 66, 76, 88, 4, 71, 82, 89, 5, 73, + 87, 6, 71, 78, 88, 1, 70, 71, 76, 67, 5, 22, + 0, 0, 0, 76, 88, 97, 67, 14, 64, 52, 8, 69, + 115, 93, 82, 69, 92, 108, 105, 110, 82, 108, + 99, 106, 111, 106, 111, 107, 9, 66, 73, 95, + 67, 87, 83, 106, 73, 92, 86, 113, 10, 78, 71, + 109, 88, 79, 74, 67, 3, 65, 75, 6, 3, 66, 4, + 64, 64, 2, 76, 13, 7, 22, 19, 18, 18, 17, 16, + 25, 12, 9, 25, 19, 71, 99, 89, 92, 82, 85, 81, + 73, 79, 78, 78, 80, 70, 72, 70, 71, 96, 89, + 95, 72, 73, 67, 68, 7, 3, 1, 65, 12, 2, 0, 65, + 1, 80, 47, 44, 33, 37, 44, 36, 20, 37, 30, 17, + 23, 16, 5, 1, 65, 17, 11, 14, 14, 11, 7, 14, + 8, 9, 9, 2, 1, 6, 89, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 54, 52, 51, 41, 36, 18, 49, 42, + 24, 54, 43, 45, 40, 35, 32, 35, 23, 15, 6, 8, + 74, 76, 98, 68, 26, 17, 12, 3, 5, 0, 70, 73, + 84, 0, 40, 27, 21, 17, 10, 1, 71, 72, 79, 83, + 19, 10, 6, 1, 64, 70, 75, 81, 98, 89, 70, 71, + 68, 71, 77, 87, 89, 103, 72, 34, 18, 12, 4, 1, + 72, 77, 81, 87, 62, 88, 79, 73, 66, 70, 1, 5, + 6, 71, 4, 8, 4, 78, 71, 70, 13, 6, 17, 14, 25, + 26, 19, 14, 19, 18, 15, 68, 6, 70, 95, 89, 85, + 83, 79, 80, 78, 79, 68, 74, 73, 71, 64, 75, + 82, 94, 85, 96, 76, 70, 70, 67, 68, 65, 0, 64, + 64, 67, 66, 1, 67, 78, 47, 40, 43, 41, 38, 44, + 40, 37, 32, 32, 33, 28, 10, 12, 5, 30, 28, 24, + 15, 19, 15, 4, 0, 67, 75, 79, 91, 75, 100, 62, + 62, 62, 62, 62, 58, 55, 51, 48, 44, 41, 35, + 28, 13, 66, 47, 46, 18, 49, 54, 43, 37, 40, + 35, 30, 34, 28, 26, 14, 14, 5, 79, 21, 11, 67, + 108, 98, 101, 77, 81, 79, 10, 70, 70, 68, 3, + 24, 11, 30, 66, 61, 59, 51, 44, 30, 27, 64, + 76, 95, 72, 41, 29, 20, 9, 10, 2, 64, 68, 85, + 96, 88, 79, 81, 75, 66, 72, 76, 66, 0, 1, 1, + 6, 9, 3, 59, 58, 52, 45, 41, 35, 19, 8, 76 }, + + { + + 62, + 8, 76, 62, 8, 76, 106, 86, 15, 10, 7, 13, 44, + 48, 54, 14, 90, 3, 68, 7, 1, 5, 70, 1, 77, 14, + 42, 86, 102, 110, 92, 89, 67, 68, 7, 1, 81, + 82, 7, 12, 66, 76, 87, 4, 72, 82, 89, 5, 73, + 86, 6, 72, 78, 88, 2, 70, 71, 76, 66, 5, 22, + 0, 0, 0, 76, 89, 97, 66, 13, 64, 52, 8, 69, + 114, 92, 82, 68, 91, 106, 103, 109, 81, 106, + 98, 104, 110, 106, 110, 106, 9, 66, 72, 94, + 67, 87, 82, 104, 73, 92, 85, 112, 10, 78, 70, + 108, 87, 78, 74, 67, 3, 65, 75, 7, 4, 67, 4, + 64, 64, 2, 76, 13, 6, 21, 19, 17, 18, 17, 16, + 23, 12, 8, 23, 18, 72, 98, 88, 91, 81, 85, 80, + 72, 78, 77, 77, 79, 70, 72, 70, 72, 96, 88, + 95, 72, 73, 66, 68, 6, 4, 1, 65, 11, 3, 0, 66, + 1, 81, 45, 43, 32, 36, 43, 35, 19, 36, 30, 15, + 22, 15, 5, 1, 66, 16, 10, 13, 13, 10, 6, 13, + 7, 7, 8, 1, 0, 5, 89, 62, 62, 61, 62, 62, 62, + 62, 62, 61, 52, 50, 48, 39, 34, 16, 47, 40, + 22, 52, 41, 43, 38, 33, 30, 32, 21, 12, 5, 6, + 75, 77, 98, 68, 26, 17, 12, 3, 5, 0, 70, 73, + 84, 0, 40, 27, 21, 17, 11, 1, 70, 72, 78, 83, + 20, 11, 6, 1, 64, 70, 74, 81, 97, 88, 69, 70, + 68, 71, 76, 86, 88, 102, 72, 34, 18, 12, 4, 2, + 71, 77, 80, 86, 62, 88, 79, 73, 66, 70, 1, 5, + 6, 71, 4, 8, 4, 78, 71, 70, 12, 7, 16, 13, 24, + 25, 19, 12, 18, 18, 15, 69, 5, 70, 95, 88, 84, + 82, 79, 79, 78, 78, 69, 74, 73, 71, 65, 75, + 82, 93, 84, 96, 76, 70, 70, 66, 68, 65, 1, 64, + 65, 67, 65, 0, 67, 79, 46, 40, 42, 40, 37, 43, + 39, 36, 31, 31, 32, 27, 9, 11, 4, 28, 27, 23, + 13, 18, 14, 3, 64, 68, 75, 79, 91, 75, 100, + 62, 62, 62, 62, 62, 56, 53, 48, 46, 42, 39, + 33, 26, 11, 67, 45, 44, 17, 47, 52, 41, 35, + 37, 33, 28, 32, 26, 23, 12, 12, 3, 80, 19, 9, + 68, 107, 97, 100, 76, 80, 78, 12, 69, 69, 68, + 4, 25, 13, 31, 65, 59, 57, 48, 41, 27, 24, 67, + 78, 96, 72, 41, 29, 20, 9, 10, 2, 64, 68, 85, + 95, 87, 78, 81, 74, 65, 72, 75, 65, 1, 2, 1, + 6, 9, 3, 58, 56, 50, 43, 39, 33, 17, 6, 77 }, + + { + + 62, + 8, 76, 62, 8, 76, 104, 85, 15, 10, 7, 12, 43, + 46, 54, 14, 87, 2, 67, 7, 1, 5, 69, 0, 78, 13, + 40, 88, 103, 111, 89, 87, 67, 67, 7, 1, 81, + 81, 7, 11, 66, 76, 87, 4, 72, 81, 89, 5, 73, + 85, 5, 72, 78, 88, 2, 69, 70, 75, 66, 5, 22, + 0, 0, 0, 75, 89, 97, 66, 12, 64, 52, 8, 69, + 113, 91, 82, 68, 89, 105, 101, 107, 80, 105, + 96, 102, 109, 105, 109, 106, 10, 66, 71, 93, + 67, 86, 81, 102, 73, 92, 85, 110, 11, 77, 70, + 107, 87, 78, 74, 66, 4, 64, 74, 7, 4, 67, 3, + 64, 64, 2, 76, 13, 6, 20, 19, 17, 18, 17, 15, + 21, 12, 8, 22, 17, 73, 97, 87, 90, 80, 84, 79, + 71, 77, 77, 76, 78, 70, 72, 69, 73, 95, 87, + 95, 71, 73, 66, 68, 5, 5, 1, 65, 9, 4, 64, 67, + 1, 81, 44, 42, 32, 35, 42, 34, 18, 36, 30, 14, + 21, 15, 5, 1, 66, 16, 10, 12, 13, 9, 5, 13, 6, + 5, 7, 0, 64, 4, 89, 61, 62, 59, 62, 61, 60, + 60, 60, 59, 50, 48, 46, 36, 32, 13, 45, 39, + 20, 49, 39, 41, 36, 30, 28, 29, 19, 10, 4, 5, + 77, 77, 98, 68, 26, 18, 12, 3, 5, 0, 70, 73, + 83, 1, 41, 27, 21, 17, 12, 2, 69, 72, 77, 82, + 21, 12, 6, 2, 0, 69, 74, 80, 96, 88, 67, 70, + 68, 70, 75, 85, 88, 101, 71, 35, 19, 12, 4, 3, + 70, 76, 79, 85, 62, 88, 79, 72, 66, 70, 1, 5, + 6, 70, 4, 9, 5, 78, 71, 70, 11, 7, 15, 12, 23, + 25, 19, 11, 17, 17, 15, 70, 4, 71, 94, 88, 83, + 82, 78, 79, 77, 77, 69, 74, 73, 71, 66, 75, + 82, 93, 83, 96, 76, 69, 70, 66, 68, 65, 2, 64, + 65, 67, 64, 0, 68, 79, 45, 40, 42, 40, 36, 42, + 38, 35, 31, 30, 31, 26, 8, 10, 3, 27, 25, 21, + 11, 17, 13, 2, 65, 68, 75, 78, 91, 76, 99, 62, + 62, 62, 62, 60, 54, 51, 46, 44, 40, 37, 31, + 24, 10, 68, 43, 42, 16, 45, 50, 39, 33, 35, + 31, 26, 30, 24, 21, 10, 10, 1, 81, 17, 7, 70, + 106, 95, 99, 76, 79, 77, 14, 68, 68, 68, 5, + 27, 14, 33, 65, 58, 55, 46, 38, 25, 21, 69, + 80, 97, 72, 41, 29, 20, 9, 10, 2, 64, 68, 84, + 94, 86, 77, 80, 73, 64, 71, 74, 64, 2, 3, 2, + 7, 10, 3, 56, 55, 49, 42, 37, 31, 15, 4, 78 }, + + { + + 61, + 8, 76, 61, 8, 76, 102, 83, 16, 10, 6, 10, 41, + 45, 54, 14, 85, 2, 66, 8, 1, 4, 69, 64, 79, + 13, 38, 89, 104, 111, 86, 86, 67, 66, 8, 1, + 80, 80, 8, 11, 66, 75, 86, 3, 73, 81, 89, 5, + 73, 85, 5, 72, 78, 88, 3, 69, 70, 75, 65, 5, + 22, 0, 0, 0, 75, 89, 97, 65, 11, 64, 52, 8, + 69, 112, 90, 82, 67, 88, 103, 99, 106, 79, + 103, 95, 100, 108, 104, 108, 105, 11, 66, 70, + 92, 67, 86, 81, 100, 73, 91, 84, 109, 11, 77, + 69, 105, 86, 77, 74, 66, 4, 64, 73, 8, 5, 68, + 3, 64, 64, 2, 76, 13, 5, 20, 19, 16, 18, 17, + 15, 19, 12, 7, 20, 16, 74, 96, 86, 89, 79, 83, + 79, 70, 77, 76, 75, 77, 70, 72, 69, 74, 95, + 86, 95, 71, 73, 65, 68, 4, 6, 1, 65, 8, 5, 64, + 68, 1, 82, 42, 41, 31, 34, 41, 33, 18, 35, 29, + 13, 20, 15, 5, 1, 67, 15, 9, 11, 12, 8, 4, 12, + 6, 3, 6, 0, 65, 3, 89, 60, 61, 58, 62, 59, 58, + 58, 58, 56, 47, 46, 43, 34, 29, 11, 43, 37, + 19, 47, 37, 38, 33, 28, 25, 27, 17, 7, 3, 3, + 78, 78, 98, 68, 27, 18, 12, 3, 5, 0, 70, 73, + 83, 1, 41, 27, 21, 17, 13, 2, 69, 71, 77, 81, + 22, 12, 7, 2, 1, 69, 73, 80, 95, 87, 66, 69, + 67, 70, 74, 84, 87, 100, 71, 35, 19, 12, 4, 4, + 70, 75, 78, 84, 62, 87, 78, 72, 66, 70, 1, 5, + 6, 70, 4, 9, 5, 78, 71, 70, 11, 8, 14, 11, 22, + 24, 19, 10, 16, 17, 15, 70, 3, 71, 93, 87, 82, + 81, 78, 78, 77, 76, 69, 74, 73, 70, 66, 75, + 82, 92, 82, 96, 76, 69, 70, 65, 68, 65, 3, 64, + 65, 67, 0, 64, 68, 80, 44, 39, 41, 39, 35, 41, + 37, 34, 30, 29, 30, 25, 7, 9, 2, 25, 24, 20, + 9, 15, 12, 1, 65, 69, 75, 78, 91, 76, 98, 62, + 62, 61, 61, 57, 52, 49, 43, 42, 38, 35, 28, + 22, 8, 69, 41, 41, 14, 43, 48, 37, 31, 33, 29, + 24, 28, 22, 18, 8, 8, 64, 82, 15, 5, 71, 105, + 94, 98, 75, 78, 76, 15, 67, 67, 68, 6, 28, 16, + 34, 64, 56, 54, 43, 35, 22, 18, 72, 82, 98, + 72, 41, 29, 20, 9, 10, 2, 64, 68, 84, 93, 85, + 76, 79, 72, 64, 71, 73, 64, 2, 3, 2, 7, 10, 3, + 55, 53, 47, 40, 35, 29, 13, 2, 79 }, + + { + + 60, + 8, 76, 60, 8, 76, 100, 82, 16, 10, 6, 9, 40, + 44, 54, 14, 83, 2, 65, 9, 1, 3, 69, 65, 80, + 12, 36, 91, 105, 112, 83, 85, 67, 65, 9, 1, + 80, 79, 8, 10, 66, 75, 85, 3, 73, 81, 89, 5, + 73, 84, 5, 72, 78, 88, 3, 69, 70, 75, 65, 5, + 22, 0, 0, 0, 74, 89, 97, 65, 10, 64, 52, 8, + 69, 111, 89, 82, 67, 87, 101, 97, 104, 78, + 101, 94, 98, 107, 103, 107, 104, 12, 66, 69, + 91, 67, 85, 80, 98, 73, 91, 84, 108, 11, 77, + 69, 104, 86, 76, 74, 66, 4, 64, 72, 8, 5, 68, + 3, 64, 64, 2, 76, 13, 5, 19, 19, 15, 18, 17, + 15, 17, 12, 7, 18, 15, 75, 95, 85, 88, 78, 82, + 78, 69, 76, 76, 74, 76, 70, 72, 69, 75, 94, + 85, 95, 71, 73, 64, 68, 3, 7, 1, 65, 7, 6, 65, + 69, 1, 83, 41, 40, 31, 33, 40, 32, 17, 34, 29, + 12, 19, 15, 5, 1, 67, 14, 8, 10, 11, 7, 3, 11, + 5, 1, 5, 64, 66, 2, 89, 58, 60, 56, 60, 57, + 56, 56, 56, 54, 45, 44, 41, 31, 27, 8, 41, 35, + 17, 45, 35, 36, 31, 26, 23, 24, 15, 5, 2, 1, + 79, 79, 98, 68, 27, 18, 12, 3, 5, 0, 70, 73, + 82, 1, 41, 27, 21, 17, 14, 3, 68, 71, 76, 80, + 23, 13, 7, 2, 2, 68, 73, 79, 94, 86, 65, 68, + 67, 70, 73, 83, 86, 99, 70, 35, 19, 12, 4, 5, + 69, 74, 77, 83, 62, 87, 78, 71, 66, 70, 1, 5, + 6, 70, 4, 9, 6, 78, 71, 70, 10, 8, 13, 10, 21, + 23, 19, 9, 15, 17, 15, 71, 2, 72, 92, 86, 81, + 81, 77, 78, 76, 75, 69, 74, 73, 70, 67, 75, + 82, 92, 81, 96, 76, 69, 70, 64, 68, 65, 4, 64, + 65, 67, 1, 65, 68, 81, 43, 39, 41, 38, 34, 40, + 36, 33, 29, 28, 29, 24, 6, 8, 1, 24, 23, 18, + 7, 14, 11, 0, 66, 69, 75, 78, 91, 77, 97, 62, + 62, 59, 59, 54, 50, 47, 41, 40, 36, 33, 26, + 20, 7, 70, 39, 39, 13, 41, 46, 35, 29, 31, 27, + 22, 26, 20, 16, 6, 6, 66, 83, 13, 3, 73, 104, + 93, 97, 74, 77, 75, 17, 66, 66, 68, 7, 30, 17, + 36, 0, 55, 52, 41, 32, 19, 15, 75, 84, 99, 72, + 41, 29, 20, 9, 10, 2, 64, 68, 83, 92, 84, 75, + 78, 71, 0, 70, 72, 0, 3, 4, 2, 8, 10, 3, 54, + 52, 45, 38, 33, 27, 11, 0, 80 }, + + { + + 58, + 7, 77, 58, 7, 77, 99, 81, 16, 10, 5, 7, 38, + 42, 53, 14, 81, 1, 65, 9, 0, 2, 69, 67, 82, + 11, 34, 93, 106, 113, 81, 84, 68, 65, 9, 0, + 80, 78, 8, 9, 66, 75, 85, 2, 74, 81, 90, 5, + 73, 84, 4, 73, 78, 88, 3, 69, 70, 75, 65, 4, + 22, 0, 0, 0, 74, 90, 97, 65, 9, 65, 52, 7, 69, + 110, 89, 82, 67, 86, 100, 96, 103, 77, 100, + 93, 97, 106, 103, 106, 104, 12, 66, 69, 91, + 67, 85, 80, 97, 73, 91, 84, 107, 11, 77, 69, + 103, 86, 76, 74, 66, 4, 64, 72, 8, 5, 69, 2, + 64, 65, 2, 76, 12, 4, 18, 19, 14, 17, 17, 14, + 15, 11, 6, 16, 14, 76, 95, 85, 88, 78, 82, 78, + 68, 76, 76, 74, 75, 71, 72, 69, 77, 94, 85, + 95, 71, 74, 64, 68, 2, 7, 1, 65, 5, 6, 66, 70, + 1, 84, 39, 39, 30, 32, 39, 31, 16, 33, 28, 10, + 18, 14, 4, 1, 68, 13, 7, 9, 10, 6, 2, 10, 4, + 64, 3, 65, 68, 0, 89, 56, 58, 54, 58, 55, 53, + 53, 53, 51, 42, 41, 38, 28, 24, 5, 39, 33, 15, + 42, 32, 33, 28, 23, 20, 21, 12, 2, 1, 64, 81, + 80, 99, 68, 27, 18, 12, 3, 5, 64, 70, 73, 82, + 1, 41, 27, 21, 17, 15, 3, 68, 71, 76, 80, 23, + 13, 7, 2, 2, 68, 73, 79, 93, 86, 64, 68, 67, + 70, 73, 83, 86, 98, 70, 35, 19, 12, 4, 5, 69, + 74, 77, 83, 62, 87, 78, 71, 66, 70, 1, 5, 6, + 70, 4, 9, 6, 78, 71, 71, 9, 8, 12, 9, 20, 22, + 18, 7, 13, 16, 14, 72, 0, 73, 92, 86, 80, 81, + 77, 78, 76, 75, 70, 74, 73, 70, 68, 75, 82, + 92, 81, 97, 76, 69, 70, 64, 69, 65, 4, 65, 66, + 67, 1, 66, 69, 82, 42, 38, 40, 37, 32, 39, 35, + 32, 28, 27, 28, 23, 5, 6, 64, 22, 21, 16, 5, + 12, 9, 64, 67, 70, 75, 78, 91, 78, 97, 62, 61, + 57, 56, 51, 47, 44, 38, 37, 33, 30, 23, 17, 5, + 71, 37, 37, 11, 39, 43, 32, 26, 28, 24, 20, + 23, 17, 13, 4, 3, 68, 85, 11, 1, 75, 103, 92, + 96, 74, 77, 75, 18, 66, 66, 68, 7, 31, 18, 37, + 0, 53, 50, 38, 28, 16, 12, 78, 87, 101, 72, + 41, 28, 19, 9, 10, 2, 65, 68, 83, 92, 84, 75, + 78, 70, 0, 70, 72, 0, 3, 4, 2, 8, 10, 2, 52, + 50, 43, 36, 31, 25, 8, 65, 81 }, + + { + + 57, + 7, 77, 57, 7, 77, 97, 79, 17, 11, 5, 6, 37, + 41, 53, 14, 78, 1, 64, 10, 0, 2, 68, 68, 83, + 11, 33, 94, 107, 113, 78, 82, 68, 64, 10, 0, + 79, 76, 9, 9, 65, 74, 84, 2, 74, 80, 90, 5, + 72, 83, 4, 73, 77, 88, 4, 68, 69, 74, 64, 4, + 22, 0, 0, 0, 73, 90, 97, 64, 9, 65, 52, 7, 69, + 108, 88, 82, 66, 84, 98, 94, 101, 75, 98, 91, + 95, 104, 102, 105, 103, 13, 65, 68, 90, 66, + 84, 79, 95, 72, 90, 83, 105, 12, 76, 68, 101, + 85, 75, 73, 65, 5, 0, 71, 9, 6, 69, 2, 0, 65, + 2, 75, 12, 4, 18, 19, 14, 17, 17, 14, 14, 11, + 6, 15, 13, 76, 94, 84, 87, 77, 81, 77, 67, 75, + 75, 73, 73, 71, 72, 68, 78, 93, 84, 95, 70, + 74, 0, 67, 2, 8, 1, 65, 4, 7, 66, 71, 1, 84, + 38, 39, 30, 32, 39, 31, 16, 33, 28, 9, 18, 14, + 4, 1, 68, 13, 7, 9, 10, 6, 1, 10, 4, 65, 2, + 65, 69, 64, 89, 55, 57, 53, 57, 54, 51, 51, + 51, 49, 40, 39, 36, 26, 22, 3, 38, 32, 14, 40, + 30, 31, 26, 21, 18, 19, 10, 0, 1, 65, 82, 80, + 99, 67, 28, 19, 13, 4, 5, 64, 69, 72, 81, 2, + 42, 28, 22, 17, 16, 4, 67, 70, 75, 79, 24, 14, + 8, 3, 3, 67, 72, 78, 91, 85, 1, 67, 66, 69, + 72, 82, 85, 96, 69, 36, 20, 13, 5, 6, 68, 73, + 76, 82, 62, 86, 77, 70, 66, 69, 1, 6, 7, 69, + 5, 10, 7, 77, 71, 71, 9, 9, 12, 9, 19, 22, 18, + 6, 12, 16, 14, 72, 64, 73, 91, 85, 79, 80, 76, + 77, 75, 74, 70, 73, 72, 69, 68, 74, 81, 91, + 80, 97, 76, 68, 70, 0, 69, 65, 5, 65, 66, 66, + 2, 66, 69, 82, 42, 38, 40, 37, 31, 39, 35, 32, + 28, 27, 28, 23, 5, 5, 65, 21, 20, 15, 4, 11, + 8, 64, 67, 70, 74, 77, 90, 78, 96, 60, 59, 55, + 54, 49, 45, 42, 36, 35, 31, 28, 21, 15, 4, 71, + 36, 36, 10, 38, 41, 30, 24, 26, 22, 18, 21, + 15, 11, 3, 1, 69, 86, 10, 0, 76, 101, 90, 94, + 73, 76, 74, 20, 65, 65, 68, 8, 33, 20, 39, 1, + 52, 49, 36, 25, 14, 10, 80, 89, 102, 71, 42, + 28, 19, 9, 11, 2, 65, 68, 82, 91, 83, 74, 77, + 68, 1, 69, 71, 1, 4, 5, 3, 9, 11, 2, 51, 49, + 42, 35, 30, 24, 6, 66, 81 }, + + { + + 56, + 7, 77, 56, 7, 77, 95, 78, 17, 11, 5, 5, 36, + 40, 53, 14, 76, 1, 0, 11, 0, 1, 68, 69, 84, + 10, 31, 96, 108, 114, 75, 81, 68, 0, 11, 0, + 79, 75, 9, 8, 65, 74, 83, 2, 74, 80, 90, 5, + 72, 82, 4, 73, 77, 88, 4, 68, 69, 74, 64, 4, + 22, 0, 0, 0, 72, 90, 97, 64, 8, 65, 52, 7, 69, + 107, 87, 82, 66, 83, 96, 92, 100, 74, 96, 90, + 93, 103, 101, 104, 102, 14, 65, 67, 89, 66, + 84, 78, 93, 72, 90, 83, 104, 12, 76, 68, 100, + 85, 74, 73, 65, 5, 0, 70, 9, 6, 70, 2, 0, 65, + 2, 75, 12, 4, 17, 19, 13, 17, 17, 14, 12, 11, + 6, 13, 12, 77, 93, 83, 86, 76, 80, 76, 66, 74, + 75, 72, 72, 71, 72, 68, 79, 93, 83, 95, 70, + 74, 1, 67, 1, 9, 1, 65, 3, 8, 67, 72, 1, 85, + 36, 38, 29, 31, 38, 30, 15, 32, 28, 8, 17, 14, + 4, 1, 68, 12, 6, 8, 9, 5, 0, 9, 3, 67, 1, 66, + 70, 65, 89, 53, 56, 51, 55, 52, 49, 49, 49, + 46, 38, 37, 33, 23, 20, 0, 36, 30, 12, 38, 28, + 29, 24, 19, 16, 16, 8, 65, 0, 67, 83, 81, 99, + 67, 28, 19, 13, 4, 5, 64, 69, 72, 80, 2, 42, + 28, 22, 17, 17, 4, 66, 70, 74, 78, 25, 15, 8, + 3, 4, 67, 72, 77, 90, 84, 2, 66, 66, 69, 71, + 81, 84, 95, 69, 36, 20, 13, 5, 7, 67, 72, 75, + 81, 62, 86, 77, 70, 66, 69, 1, 6, 7, 69, 5, + 10, 8, 77, 71, 71, 8, 9, 11, 8, 18, 21, 18, 5, + 11, 16, 14, 73, 65, 74, 90, 84, 78, 80, 76, + 77, 74, 73, 70, 73, 72, 69, 69, 74, 81, 91, + 79, 97, 76, 68, 70, 1, 69, 65, 6, 65, 66, 66, + 3, 67, 69, 83, 41, 38, 40, 36, 30, 38, 34, 31, + 27, 26, 27, 22, 4, 4, 66, 19, 19, 13, 2, 10, + 7, 65, 68, 70, 74, 77, 90, 79, 95, 58, 57, 53, + 52, 46, 43, 40, 33, 33, 29, 26, 19, 13, 3, 72, + 34, 34, 9, 36, 39, 28, 22, 24, 20, 16, 19, 13, + 9, 1, 64, 71, 87, 8, 65, 78, 100, 89, 93, 72, + 75, 73, 22, 64, 64, 68, 9, 34, 21, 40, 2, 51, + 47, 33, 22, 11, 7, 83, 91, 103, 71, 42, 28, + 19, 9, 11, 2, 65, 68, 81, 90, 82, 73, 76, 67, + 2, 68, 70, 2, 5, 6, 3, 10, 11, 2, 50, 47, 40, + 33, 28, 22, 4, 68, 82 }, + + { + + 55, + 7, 77, 55, 7, 77, 93, 76, 18, 11, 4, 3, 34, + 39, 53, 14, 74, 1, 1, 12, 0, 0, 68, 70, 85, + 10, 29, 97, 109, 114, 72, 80, 68, 1, 12, 0, + 78, 74, 10, 8, 65, 73, 82, 1, 75, 80, 90, 5, + 72, 82, 4, 73, 77, 88, 5, 68, 69, 74, 0, 4, + 22, 0, 0, 0, 72, 90, 97, 0, 7, 65, 52, 7, 69, + 106, 86, 82, 65, 82, 94, 90, 98, 73, 94, 89, + 91, 102, 100, 103, 101, 15, 65, 66, 88, 66, + 83, 78, 91, 72, 89, 82, 103, 12, 76, 67, 98, + 84, 73, 73, 65, 5, 0, 69, 10, 7, 70, 2, 0, 65, + 2, 75, 12, 3, 17, 19, 12, 17, 17, 14, 10, 11, + 5, 11, 11, 78, 92, 82, 85, 75, 79, 76, 65, 74, + 74, 71, 71, 71, 72, 68, 80, 92, 82, 95, 70, + 74, 2, 67, 0, 10, 1, 65, 2, 9, 67, 73, 1, 86, + 35, 37, 29, 30, 37, 29, 15, 31, 27, 7, 16, 14, + 4, 1, 69, 11, 5, 7, 8, 4, 64, 8, 3, 69, 0, 66, + 71, 66, 89, 52, 54, 50, 53, 50, 47, 47, 47, + 44, 35, 35, 31, 21, 17, 65, 34, 28, 11, 36, + 26, 26, 21, 17, 13, 14, 6, 68, 64, 69, 84, 82, + 99, 67, 29, 19, 13, 4, 5, 64, 69, 72, 80, 2, + 42, 28, 22, 17, 18, 5, 66, 69, 74, 77, 26, 15, + 9, 3, 5, 66, 71, 77, 89, 83, 3, 65, 65, 69, + 70, 80, 83, 94, 68, 36, 20, 13, 5, 8, 67, 71, + 74, 80, 62, 85, 76, 69, 66, 69, 1, 6, 7, 69, + 5, 10, 8, 77, 71, 71, 8, 10, 10, 7, 17, 20, + 18, 4, 10, 16, 14, 73, 66, 74, 89, 83, 77, 79, + 75, 76, 74, 72, 70, 73, 72, 68, 69, 74, 81, + 90, 78, 97, 76, 68, 70, 2, 69, 65, 7, 65, 66, + 66, 4, 68, 69, 84, 40, 37, 39, 35, 29, 37, 33, + 30, 26, 25, 26, 21, 3, 3, 67, 18, 18, 12, 0, + 8, 6, 66, 68, 71, 74, 77, 90, 79, 94, 56, 55, + 51, 50, 43, 41, 38, 31, 31, 27, 24, 16, 11, 1, + 73, 32, 33, 7, 34, 37, 26, 20, 22, 18, 14, 17, + 11, 6, 64, 66, 73, 88, 6, 67, 79, 99, 88, 92, + 71, 74, 72, 23, 0, 0, 68, 10, 36, 23, 42, 3, + 49, 46, 31, 19, 8, 4, 86, 93, 104, 71, 42, 28, + 19, 9, 11, 2, 65, 68, 81, 89, 81, 72, 75, 66, + 2, 68, 69, 2, 5, 6, 3, 10, 11, 2, 49, 46, 38, + 31, 26, 20, 2, 70, 83 }, + + { + + 53, + 7, 77, 53, 7, 77, 92, 75, 18, 11, 4, 2, 33, + 37, 53, 14, 71, 0, 2, 12, 0, 64, 68, 71, 86, + 9, 27, 99, 110, 115, 69, 79, 68, 2, 12, 0, 78, + 73, 10, 7, 65, 73, 82, 1, 75, 79, 90, 5, 72, + 81, 3, 74, 77, 88, 5, 67, 69, 73, 0, 4, 22, 0, + 0, 0, 71, 91, 97, 0, 6, 65, 52, 7, 69, 105, + 85, 82, 65, 80, 93, 88, 97, 72, 93, 87, 89, + 101, 100, 102, 101, 15, 65, 65, 87, 66, 83, + 77, 89, 72, 89, 82, 102, 12, 75, 67, 97, 84, + 73, 73, 64, 5, 0, 69, 10, 7, 71, 1, 0, 65, 2, + 75, 12, 3, 16, 19, 12, 17, 17, 13, 8, 11, 5, + 9, 10, 79, 91, 81, 84, 74, 79, 75, 64, 73, 74, + 70, 70, 71, 72, 67, 81, 92, 81, 95, 70, 74, 2, + 67, 64, 11, 1, 65, 0, 10, 68, 74, 1, 87, 33, + 36, 28, 29, 36, 28, 14, 30, 27, 5, 15, 13, 4, + 1, 69, 10, 5, 6, 8, 3, 65, 7, 2, 71, 64, 67, + 72, 67, 89, 50, 53, 48, 51, 48, 45, 44, 45, + 41, 33, 33, 28, 18, 15, 68, 32, 27, 9, 33, 24, + 24, 19, 14, 11, 11, 4, 70, 65, 70, 86, 83, 99, + 67, 29, 20, 13, 4, 5, 64, 69, 72, 79, 3, 43, + 28, 22, 17, 19, 5, 65, 69, 73, 77, 27, 16, 9, + 3, 5, 66, 71, 76, 88, 83, 4, 65, 65, 69, 69, + 79, 83, 93, 68, 37, 20, 13, 5, 9, 66, 71, 73, + 79, 62, 85, 76, 69, 66, 69, 1, 6, 7, 68, 5, + 10, 9, 77, 71, 71, 7, 10, 9, 6, 16, 19, 18, 2, + 9, 15, 14, 74, 67, 75, 89, 83, 76, 79, 75, 76, + 73, 71, 71, 73, 72, 68, 70, 74, 81, 90, 77, + 97, 76, 67, 70, 2, 69, 65, 8, 65, 67, 66, 5, + 68, 70, 85, 39, 37, 39, 34, 28, 36, 32, 29, + 25, 24, 25, 20, 2, 2, 68, 16, 16, 10, 65, 7, + 5, 67, 69, 71, 74, 77, 90, 80, 94, 53, 52, 49, + 47, 40, 39, 36, 28, 29, 25, 22, 14, 9, 0, 74, + 30, 31, 6, 32, 35, 24, 18, 19, 16, 12, 15, 9, + 4, 66, 68, 75, 89, 4, 69, 81, 98, 87, 91, 71, + 73, 71, 25, 1, 1, 68, 11, 37, 24, 43, 3, 48, + 44, 28, 16, 5, 1, 89, 95, 105, 71, 42, 28, 19, + 9, 11, 2, 65, 68, 80, 88, 80, 71, 75, 65, 3, + 67, 68, 3, 6, 7, 4, 11, 12, 2, 47, 44, 36, 29, + 24, 18, 0, 72, 84 }, + + { + + 52, + 7, 77, 52, 7, 77, 90, 73, 18, 11, 3, 0, 31, + 36, 53, 14, 69, 0, 3, 13, 0, 64, 67, 72, 87, + 9, 25, 101, 111, 115, 66, 77, 68, 3, 13, 0, + 78, 72, 10, 7, 65, 73, 81, 1, 76, 79, 90, 5, + 72, 80, 3, 74, 77, 88, 6, 67, 68, 73, 1, 4, + 22, 0, 0, 0, 71, 91, 97, 1, 5, 65, 52, 7, 69, + 104, 84, 82, 64, 79, 91, 86, 95, 71, 91, 86, + 87, 100, 99, 101, 100, 16, 65, 64, 86, 66, 82, + 76, 87, 72, 89, 81, 100, 13, 75, 66, 96, 83, + 72, 73, 64, 6, 1, 68, 11, 8, 71, 1, 0, 65, 2, + 75, 12, 2, 15, 19, 11, 17, 17, 13, 6, 11, 4, + 8, 9, 80, 90, 80, 83, 73, 78, 74, 0, 72, 73, + 69, 69, 71, 72, 67, 82, 91, 80, 95, 69, 74, 3, + 67, 65, 12, 1, 65, 64, 11, 68, 75, 1, 87, 32, + 35, 28, 28, 35, 27, 13, 30, 27, 4, 14, 13, 4, + 1, 70, 10, 4, 5, 7, 2, 66, 7, 1, 73, 65, 68, + 73, 68, 89, 48, 52, 46, 49, 47, 43, 42, 43, + 39, 31, 31, 26, 16, 13, 70, 30, 25, 7, 31, 22, + 22, 17, 12, 9, 8, 2, 73, 66, 72, 87, 83, 99, + 67, 29, 20, 13, 4, 5, 64, 69, 72, 79, 3, 43, + 28, 22, 17, 20, 6, 64, 69, 72, 76, 28, 17, 9, + 4, 6, 65, 70, 76, 87, 82, 6, 64, 65, 68, 68, + 78, 82, 92, 67, 37, 21, 13, 5, 10, 65, 70, 72, + 78, 62, 85, 76, 68, 66, 69, 1, 6, 7, 68, 5, + 11, 9, 77, 71, 71, 6, 11, 8, 5, 15, 19, 18, 1, + 8, 15, 14, 75, 68, 75, 88, 82, 75, 78, 74, 75, + 73, 70, 71, 73, 72, 68, 71, 74, 81, 89, 76, + 97, 76, 67, 70, 3, 69, 65, 9, 65, 67, 66, 6, + 69, 70, 85, 38, 37, 38, 34, 27, 35, 31, 28, + 25, 23, 24, 19, 1, 1, 69, 15, 15, 9, 67, 6, 4, + 68, 70, 72, 74, 76, 90, 80, 93, 51, 50, 47, + 45, 38, 37, 34, 26, 27, 23, 20, 12, 7, 65, 75, + 28, 29, 5, 30, 33, 22, 16, 17, 14, 10, 13, 7, + 1, 68, 70, 77, 90, 2, 71, 82, 97, 85, 90, 70, + 72, 70, 27, 2, 2, 68, 12, 39, 26, 45, 4, 46, + 42, 26, 13, 3, 65, 91, 97, 106, 71, 42, 28, + 19, 9, 11, 2, 65, 68, 80, 87, 79, 70, 74, 64, + 4, 67, 67, 4, 7, 8, 4, 11, 12, 2, 46, 43, 35, + 28, 22, 16, 65, 74, 85 }, + + { + + 51, + 7, 78, 51, 7, 78, 88, 72, 19, 11, 3, 64, 30, + 35, 53, 14, 67, 0, 3, 14, 0, 65, 67, 73, 88, + 8, 24, 102, 112, 116, 0, 76, 68, 3, 14, 0, 77, + 71, 11, 6, 64, 72, 80, 0, 76, 79, 90, 5, 71, + 80, 3, 74, 76, 88, 6, 67, 68, 73, 1, 4, 22, 0, + 0, 0, 70, 91, 97, 1, 5, 66, 52, 7, 69, 103, + 84, 82, 64, 78, 89, 84, 94, 70, 89, 85, 85, + 99, 98, 100, 99, 17, 65, 0, 86, 65, 82, 76, + 85, 72, 88, 81, 99, 13, 75, 66, 94, 83, 71, + 72, 64, 6, 1, 67, 11, 8, 72, 1, 0, 65, 2, 75, + 12, 2, 15, 19, 10, 17, 17, 13, 4, 11, 4, 6, 8, + 81, 90, 79, 83, 73, 77, 74, 1, 72, 73, 69, 67, + 71, 72, 67, 83, 91, 79, 95, 69, 74, 4, 66, 66, + 12, 1, 65, 65, 12, 69, 76, 1, 88, 30, 34, 27, + 28, 34, 27, 13, 29, 26, 3, 13, 13, 4, 1, 70, + 9, 3, 4, 6, 2, 67, 6, 1, 75, 66, 68, 74, 69, + 89, 47, 50, 45, 47, 45, 41, 40, 41, 36, 28, + 29, 23, 13, 10, 73, 28, 23, 6, 29, 19, 19, 14, + 10, 6, 6, 64, 75, 67, 74, 88, 84, 99, 66, 30, + 20, 14, 4, 5, 64, 69, 72, 78, 3, 43, 29, 22, + 17, 21, 6, 64, 68, 72, 75, 29, 17, 10, 4, 7, + 65, 70, 75, 86, 81, 7, 0, 64, 68, 68, 78, 81, + 90, 67, 37, 21, 13, 6, 10, 65, 69, 72, 77, 62, + 84, 75, 68, 66, 69, 1, 6, 7, 68, 6, 11, 10, + 77, 71, 72, 6, 11, 7, 4, 14, 18, 18, 0, 7, 15, + 14, 75, 69, 76, 87, 81, 74, 78, 74, 75, 72, + 70, 71, 72, 71, 67, 71, 74, 81, 89, 75, 97, + 76, 67, 70, 4, 69, 65, 10, 66, 67, 66, 6, 70, + 70, 86, 37, 36, 38, 33, 26, 35, 31, 27, 24, + 23, 23, 18, 0, 0, 70, 13, 14, 7, 69, 4, 2, 69, + 70, 72, 74, 76, 89, 81, 92, 49, 48, 45, 43, + 35, 35, 32, 23, 25, 20, 18, 9, 5, 66, 75, 27, + 28, 3, 28, 30, 20, 14, 15, 12, 8, 10, 5, 64, + 70, 72, 78, 92, 0, 73, 84, 96, 84, 89, 69, 71, + 69, 28, 3, 3, 68, 13, 40, 27, 46, 5, 45, 41, + 23, 10, 0, 67, 94, 99, 108, 70, 42, 28, 19, 9, + 11, 2, 65, 68, 79, 86, 79, 69, 73, 0, 4, 66, + 66, 4, 7, 8, 4, 12, 12, 2, 45, 41, 33, 26, 21, + 15, 68, 75, 86 }, + + { + + 50, + 7, 78, 50, 7, 78, 86, 70, 19, 11, 2, 66, 28, + 33, 53, 14, 64, 64, 4, 14, 0, 66, 67, 74, 89, + 8, 22, 104, 113, 116, 3, 75, 68, 4, 14, 0, 77, + 70, 11, 6, 64, 72, 80, 0, 77, 78, 90, 5, 71, + 79, 2, 74, 76, 88, 7, 66, 68, 72, 2, 4, 22, 0, + 0, 0, 70, 91, 97, 2, 4, 66, 52, 7, 69, 102, + 83, 82, 0, 76, 88, 82, 92, 69, 88, 83, 83, 98, + 97, 99, 99, 18, 65, 1, 85, 65, 81, 75, 83, 72, + 88, 80, 98, 13, 74, 65, 93, 82, 71, 72, 0, 6, + 1, 66, 12, 9, 72, 0, 0, 65, 2, 75, 12, 1, 14, + 19, 10, 17, 17, 12, 2, 11, 3, 4, 7, 82, 89, + 78, 82, 72, 76, 73, 2, 71, 72, 68, 66, 71, 72, + 66, 84, 90, 78, 95, 69, 74, 4, 66, 67, 13, 1, + 65, 67, 13, 69, 77, 1, 89, 29, 33, 27, 27, 33, + 26, 12, 28, 26, 2, 12, 13, 4, 1, 71, 8, 3, 3, + 6, 1, 68, 5, 0, 77, 67, 69, 75, 70, 89, 45, + 49, 43, 45, 43, 39, 37, 39, 34, 26, 27, 21, + 11, 8, 75, 26, 22, 4, 26, 17, 17, 12, 7, 4, 3, + 66, 78, 68, 75, 90, 85, 99, 66, 30, 21, 14, 4, + 5, 64, 69, 72, 78, 4, 44, 29, 22, 17, 22, 7, + 0, 68, 71, 74, 30, 18, 10, 4, 8, 64, 69, 75, + 85, 81, 8, 0, 64, 68, 67, 77, 81, 89, 66, 38, + 21, 13, 6, 11, 64, 68, 71, 76, 62, 84, 75, 67, + 66, 69, 1, 6, 7, 67, 6, 11, 10, 77, 71, 72, 5, + 12, 6, 3, 13, 17, 18, 64, 6, 14, 14, 76, 70, + 76, 86, 81, 73, 77, 73, 74, 72, 69, 71, 72, + 71, 67, 72, 74, 81, 88, 74, 97, 76, 66, 70, 4, + 69, 65, 11, 66, 67, 66, 7, 70, 71, 87, 36, 36, + 37, 32, 25, 34, 30, 26, 23, 22, 22, 17, 64, + 64, 71, 12, 12, 6, 71, 3, 1, 70, 71, 73, 74, + 76, 89, 81, 91, 47, 46, 43, 40, 32, 33, 30, + 21, 23, 18, 16, 7, 3, 68, 76, 25, 26, 2, 26, + 28, 18, 12, 13, 10, 6, 8, 3, 67, 72, 74, 80, + 93, 65, 75, 85, 95, 83, 88, 69, 70, 68, 30, 4, + 4, 68, 14, 42, 29, 48, 5, 43, 39, 21, 7, 66, + 70, 97, 101, 109, 70, 42, 28, 19, 9, 11, 2, + 65, 68, 79, 85, 78, 68, 72, 1, 5, 66, 65, 5, + 8, 9, 5, 12, 13, 2, 43, 40, 31, 24, 19, 13, + 70, 77, 87 }, + + { + + 48, + 6, 78, 48, 6, 78, 85, 69, 19, 11, 2, 67, 27, + 32, 53, 14, 1, 64, 5, 15, 0, 67, 67, 75, 91, + 7, 20, 106, 114, 117, 5, 74, 68, 5, 15, 0, 77, + 69, 11, 5, 64, 72, 79, 64, 77, 78, 91, 5, 71, + 79, 2, 75, 76, 88, 7, 66, 68, 72, 2, 4, 22, 0, + 0, 0, 69, 92, 97, 2, 3, 66, 52, 7, 69, 101, + 82, 82, 0, 75, 86, 80, 91, 68, 86, 82, 82, 97, + 97, 98, 98, 18, 65, 2, 84, 65, 81, 75, 82, 72, + 88, 80, 97, 13, 74, 65, 92, 82, 70, 72, 0, 6, + 1, 66, 12, 9, 73, 0, 0, 65, 2, 75, 12, 1, 13, + 19, 9, 17, 17, 12, 0, 11, 3, 2, 6, 83, 88, 77, + 81, 71, 76, 73, 3, 71, 72, 67, 65, 71, 72, 66, + 86, 90, 77, 95, 69, 75, 5, 66, 68, 14, 1, 65, + 68, 14, 70, 78, 1, 90, 27, 32, 26, 26, 32, 25, + 11, 27, 25, 0, 11, 12, 4, 1, 71, 7, 2, 2, 5, + 0, 69, 4, 64, 79, 69, 70, 76, 71, 89, 43, 47, + 41, 43, 41, 37, 35, 37, 31, 23, 25, 18, 8, 5, + 78, 24, 20, 2, 24, 15, 14, 9, 5, 1, 0, 68, 80, + 69, 77, 91, 86, 100, 66, 30, 21, 14, 4, 5, 64, + 69, 72, 77, 4, 44, 29, 22, 17, 23, 7, 0, 68, + 71, 74, 31, 18, 10, 4, 8, 64, 69, 74, 84, 80, + 9, 1, 64, 68, 66, 76, 80, 88, 66, 38, 21, 13, + 6, 12, 64, 68, 70, 76, 62, 84, 75, 67, 66, 69, + 1, 6, 7, 67, 6, 11, 11, 77, 71, 72, 4, 12, 5, + 2, 12, 16, 18, 66, 4, 14, 14, 77, 71, 77, 86, + 80, 72, 77, 73, 74, 71, 68, 72, 72, 71, 67, + 73, 74, 81, 88, 74, 98, 76, 66, 70, 5, 69, 65, + 11, 66, 68, 66, 8, 71, 71, 88, 35, 35, 37, 31, + 23, 33, 29, 25, 22, 21, 21, 16, 65, 65, 72, + 10, 11, 4, 73, 1, 0, 71, 72, 73, 74, 76, 89, + 82, 91, 44, 43, 41, 38, 29, 30, 27, 18, 21, + 16, 14, 4, 1, 69, 77, 23, 24, 0, 24, 26, 15, + 9, 10, 7, 4, 6, 0, 69, 74, 77, 82, 94, 67, 77, + 87, 94, 82, 87, 68, 69, 68, 31, 5, 4, 68, 14, + 43, 30, 49, 6, 42, 37, 18, 4, 69, 73, 100, + 103, 110, 70, 42, 28, 19, 9, 11, 2, 65, 68, + 78, 85, 77, 67, 72, 2, 5, 65, 65, 5, 8, 9, 5, + 13, 13, 1, 42, 38, 29, 22, 17, 11, 72, 79, 88 }, + + { + + 47, + 6, 78, 47, 6, 78, 83, 68, 20, 11, 2, 68, 26, + 31, 53, 14, 3, 64, 6, 16, 0, 67, 66, 76, 92, + 6, 18, 107, 115, 118, 8, 72, 68, 6, 16, 0, 76, + 68, 12, 4, 64, 71, 78, 64, 77, 78, 91, 5, 71, + 78, 2, 75, 76, 88, 7, 66, 67, 72, 2, 4, 22, 0, + 0, 0, 68, 92, 97, 2, 2, 66, 52, 7, 69, 100, + 81, 82, 0, 74, 84, 78, 89, 66, 84, 81, 80, 96, + 96, 97, 97, 19, 64, 3, 83, 65, 80, 74, 80, 72, + 87, 80, 95, 14, 74, 65, 90, 82, 69, 72, 0, 7, + 2, 65, 12, 9, 73, 0, 1, 65, 2, 74, 12, 1, 13, + 19, 8, 17, 17, 12, 65, 11, 3, 1, 5, 83, 87, + 76, 80, 70, 75, 72, 4, 70, 72, 66, 64, 71, 72, + 66, 87, 89, 76, 95, 68, 75, 6, 66, 69, 15, 1, + 65, 69, 15, 71, 79, 1, 90, 26, 31, 26, 25, 31, + 24, 11, 27, 25, 64, 10, 12, 4, 1, 71, 7, 1, 1, + 4, 64, 70, 4, 64, 80, 70, 70, 77, 72, 89, 42, + 46, 40, 42, 40, 35, 33, 35, 29, 21, 23, 16, 5, + 3, 81, 23, 18, 1, 22, 13, 12, 7, 3, 64, 65, + 70, 82, 69, 79, 92, 86, 100, 66, 31, 21, 14, + 5, 5, 64, 68, 72, 76, 4, 44, 29, 23, 17, 24, + 8, 1, 67, 70, 73, 32, 19, 11, 5, 9, 0, 69, 73, + 83, 79, 11, 2, 0, 67, 65, 75, 79, 87, 65, 38, + 22, 14, 6, 13, 0, 67, 69, 75, 62, 83, 74, 66, + 66, 69, 1, 7, 8, 67, 6, 12, 12, 77, 71, 72, 4, + 12, 4, 2, 11, 16, 18, 67, 3, 14, 14, 77, 72, + 78, 85, 79, 71, 77, 72, 74, 70, 67, 72, 72, + 71, 66, 73, 74, 81, 88, 73, 98, 76, 66, 70, 6, + 69, 65, 12, 66, 68, 66, 9, 72, 71, 88, 34, 35, + 37, 31, 22, 32, 28, 24, 22, 20, 20, 16, 65, + 66, 73, 9, 10, 2, 75, 0, 64, 71, 72, 73, 73, + 75, 89, 83, 90, 42, 41, 39, 36, 27, 28, 25, + 16, 19, 14, 12, 2, 64, 70, 78, 21, 23, 64, 22, + 24, 13, 7, 8, 5, 2, 4, 65, 71, 75, 79, 84, 95, + 69, 79, 89, 93, 80, 85, 67, 68, 67, 33, 6, 5, + 68, 15, 45, 31, 51, 7, 41, 36, 16, 1, 71, 76, + 102, 105, 111, 70, 42, 28, 19, 9, 12, 2, 65, + 68, 77, 84, 76, 66, 71, 4, 6, 64, 64, 6, 9, + 10, 5, 14, 13, 1, 41, 37, 28, 21, 15, 9, 74, + 81, 88 }, + + { + + 46, + 6, 78, 46, 6, 78, 81, 66, 20, 11, 1, 70, 24, + 29, 53, 14, 6, 65, 7, 16, 0, 68, 66, 77, 93, + 6, 16, 109, 116, 118, 11, 71, 68, 7, 16, 0, + 76, 67, 12, 4, 64, 71, 78, 64, 78, 77, 91, 5, + 71, 77, 1, 75, 76, 88, 8, 65, 67, 71, 3, 4, + 22, 0, 0, 0, 68, 92, 97, 3, 1, 66, 52, 7, 69, + 99, 80, 82, 1, 72, 83, 76, 88, 65, 83, 79, 78, + 95, 95, 96, 97, 20, 64, 4, 82, 65, 80, 73, 78, + 72, 87, 79, 94, 14, 73, 64, 89, 81, 69, 72, 1, + 7, 2, 64, 13, 10, 74, 64, 1, 65, 2, 74, 12, 0, + 12, 19, 8, 17, 17, 11, 67, 11, 2, 64, 4, 84, + 86, 75, 79, 69, 74, 71, 5, 69, 71, 65, 0, 71, + 72, 65, 88, 89, 75, 95, 68, 75, 6, 66, 70, 16, + 1, 65, 71, 16, 71, 80, 1, 91, 24, 30, 25, 24, + 30, 23, 10, 26, 25, 65, 9, 12, 4, 1, 72, 6, 1, + 0, 4, 65, 71, 3, 65, 82, 71, 71, 78, 73, 89, + 40, 45, 38, 40, 38, 33, 30, 33, 26, 19, 21, + 13, 3, 1, 83, 21, 17, 64, 19, 11, 10, 5, 0, + 66, 68, 72, 85, 70, 80, 94, 87, 100, 66, 31, + 22, 14, 5, 5, 64, 68, 72, 76, 5, 45, 29, 23, + 17, 25, 8, 2, 67, 69, 72, 33, 20, 11, 5, 10, + 0, 68, 73, 82, 79, 12, 2, 0, 67, 64, 74, 79, + 86, 65, 39, 22, 14, 6, 14, 1, 66, 68, 74, 62, + 83, 74, 66, 66, 69, 1, 7, 8, 66, 6, 12, 12, + 77, 71, 72, 3, 13, 3, 1, 10, 15, 18, 68, 2, + 13, 14, 78, 73, 78, 84, 79, 70, 76, 72, 73, + 70, 66, 72, 72, 71, 66, 74, 74, 81, 87, 72, + 98, 76, 65, 70, 6, 69, 65, 13, 66, 68, 66, 10, + 72, 72, 89, 33, 35, 36, 30, 21, 31, 27, 23, + 21, 19, 19, 15, 66, 67, 74, 7, 8, 1, 77, 64, + 65, 72, 73, 74, 73, 75, 89, 83, 89, 40, 39, + 37, 33, 24, 26, 23, 13, 17, 12, 10, 0, 66, 72, + 79, 19, 21, 65, 20, 22, 11, 5, 6, 3, 0, 2, 67, + 74, 77, 81, 86, 96, 71, 81, 90, 92, 79, 84, + 67, 67, 66, 35, 7, 6, 68, 16, 46, 33, 52, 7, + 39, 34, 13, 65, 74, 79, 105, 107, 112, 70, 42, + 28, 19, 9, 12, 2, 65, 68, 77, 83, 75, 65, 70, + 5, 7, 64, 0, 7, 10, 11, 6, 14, 14, 1, 39, 35, + 26, 19, 13, 7, 76, 83, 89 }, + + { + + 45, + 6, 79, 45, 6, 79, 79, 65, 21, 11, 1, 71, 23, + 28, 53, 14, 8, 65, 7, 17, 0, 69, 66, 78, 94, + 5, 15, 110, 117, 119, 14, 70, 68, 7, 17, 0, + 75, 66, 13, 3, 0, 70, 77, 65, 78, 77, 91, 5, + 70, 77, 1, 75, 75, 88, 8, 65, 67, 71, 3, 4, + 22, 0, 0, 0, 67, 92, 97, 3, 1, 67, 52, 7, 69, + 98, 80, 82, 1, 71, 81, 74, 86, 64, 81, 78, 76, + 94, 94, 95, 96, 21, 64, 5, 82, 64, 79, 73, 76, + 72, 86, 79, 93, 14, 73, 64, 87, 81, 68, 71, 1, + 7, 2, 0, 13, 10, 74, 64, 1, 65, 2, 74, 12, 0, + 12, 19, 7, 17, 17, 11, 69, 11, 2, 66, 3, 85, + 86, 74, 79, 69, 73, 71, 6, 69, 71, 65, 2, 71, + 72, 65, 89, 88, 74, 95, 68, 75, 7, 65, 71, 16, + 1, 65, 72, 17, 72, 81, 1, 92, 23, 29, 25, 24, + 29, 23, 10, 25, 24, 66, 8, 12, 4, 1, 72, 5, 0, + 64, 3, 65, 72, 2, 65, 84, 72, 71, 79, 74, 89, + 39, 43, 37, 38, 36, 31, 28, 31, 24, 16, 19, + 11, 0, 65, 86, 19, 15, 65, 17, 8, 7, 2, 65, + 69, 70, 75, 87, 71, 82, 95, 88, 100, 65, 32, + 22, 15, 5, 5, 64, 68, 72, 75, 5, 45, 30, 23, + 17, 26, 9, 2, 66, 69, 71, 34, 20, 12, 5, 11, + 1, 68, 72, 81, 78, 13, 3, 1, 67, 64, 74, 78, + 84, 64, 39, 22, 14, 7, 14, 1, 65, 68, 73, 62, + 82, 73, 65, 66, 69, 1, 7, 8, 66, 7, 12, 13, + 77, 71, 73, 3, 13, 2, 0, 9, 14, 18, 69, 1, 13, + 14, 78, 74, 79, 83, 78, 69, 76, 71, 73, 69, + 66, 72, 71, 70, 65, 74, 74, 81, 87, 71, 98, + 76, 65, 70, 7, 69, 65, 14, 67, 68, 66, 10, 73, + 72, 90, 32, 34, 36, 29, 20, 31, 27, 22, 20, + 19, 18, 14, 67, 68, 75, 6, 7, 64, 79, 66, 67, + 73, 73, 74, 73, 75, 88, 84, 88, 38, 37, 35, + 31, 21, 24, 21, 11, 15, 9, 8, 66, 68, 73, 79, + 18, 20, 67, 18, 19, 9, 3, 4, 1, 65, 64, 69, + 76, 79, 83, 87, 98, 73, 83, 92, 91, 78, 83, + 66, 66, 65, 36, 8, 7, 68, 17, 48, 34, 54, 8, + 38, 33, 11, 68, 77, 81, 108, 109, 114, 69, 42, + 28, 19, 9, 12, 2, 65, 68, 76, 82, 75, 64, 69, + 6, 7, 0, 1, 7, 10, 11, 6, 15, 14, 1, 38, 34, + 24, 17, 12, 6, 79, 84, 90 }, + + { + + 43, + 6, 79, 43, 6, 79, 78, 0, 21, 11, 0, 73, 21, + 27, 53, 14, 10, 65, 8, 18, 0, 70, 66, 79, 95, + 5, 13, 112, 118, 119, 17, 69, 68, 8, 18, 0, + 75, 65, 13, 3, 0, 70, 76, 65, 79, 77, 91, 5, + 70, 76, 1, 76, 75, 88, 9, 65, 67, 71, 4, 4, + 22, 0, 0, 0, 67, 93, 97, 4, 0, 67, 52, 7, 69, + 97, 79, 82, 2, 70, 79, 72, 85, 0, 79, 77, 74, + 93, 94, 94, 95, 21, 64, 6, 81, 64, 79, 72, 74, + 72, 86, 78, 92, 14, 73, 0, 86, 80, 67, 71, 1, + 7, 2, 0, 14, 11, 75, 64, 1, 65, 2, 74, 12, 64, + 11, 19, 6, 17, 17, 11, 71, 11, 1, 68, 2, 86, + 85, 73, 78, 68, 73, 70, 7, 68, 70, 64, 3, 71, + 72, 65, 90, 88, 73, 95, 68, 75, 8, 65, 72, 17, + 1, 65, 73, 18, 72, 82, 1, 93, 21, 28, 24, 23, + 28, 22, 9, 24, 24, 68, 7, 11, 4, 1, 73, 4, 64, + 65, 2, 66, 73, 1, 66, 86, 73, 72, 80, 75, 89, + 37, 42, 35, 36, 34, 29, 26, 29, 21, 14, 17, 8, + 65, 67, 88, 17, 13, 67, 15, 6, 5, 0, 67, 71, + 73, 77, 90, 72, 84, 96, 89, 100, 65, 32, 22, + 15, 5, 5, 64, 68, 72, 75, 5, 45, 30, 23, 17, + 27, 9, 3, 66, 68, 71, 35, 21, 12, 5, 11, 1, + 67, 72, 80, 77, 14, 4, 1, 67, 0, 73, 77, 83, + 64, 39, 22, 14, 7, 15, 2, 65, 67, 72, 62, 82, + 73, 65, 66, 69, 1, 7, 8, 66, 7, 12, 13, 77, + 71, 73, 2, 14, 1, 64, 8, 13, 18, 71, 0, 13, + 14, 79, 75, 79, 83, 77, 68, 75, 71, 72, 69, + 65, 73, 71, 70, 65, 75, 74, 81, 86, 70, 98, + 76, 65, 70, 8, 69, 65, 15, 67, 69, 66, 11, 74, + 72, 91, 31, 34, 35, 28, 19, 30, 26, 21, 19, + 18, 17, 13, 68, 69, 76, 4, 6, 65, 81, 67, 68, + 74, 74, 75, 73, 75, 88, 84, 88, 35, 34, 33, + 29, 18, 22, 19, 8, 13, 7, 6, 68, 70, 75, 80, + 16, 18, 68, 16, 17, 7, 1, 1, 64, 67, 66, 71, + 79, 81, 85, 89, 99, 75, 85, 93, 90, 77, 82, + 65, 65, 64, 38, 9, 8, 68, 18, 49, 36, 55, 9, + 36, 31, 8, 71, 80, 84, 111, 111, 115, 69, 42, + 28, 19, 9, 12, 2, 65, 68, 76, 81, 74, 0, 69, + 7, 8, 0, 2, 8, 11, 12, 6, 15, 14, 1, 37, 32, + 22, 15, 10, 4, 81, 86, 91 }, + + { + + 42, + 6, 79, 42, 6, 79, 76, 1, 21, 11, 0, 74, 20, + 25, 53, 14, 13, 66, 9, 18, 0, 70, 65, 80, 96, + 4, 11, 114, 119, 120, 20, 67, 68, 9, 18, 0, + 75, 64, 13, 2, 0, 70, 76, 65, 79, 76, 91, 5, + 70, 75, 0, 76, 75, 88, 9, 64, 66, 70, 4, 4, + 22, 0, 0, 0, 66, 93, 97, 4, 64, 67, 52, 7, 69, + 96, 78, 82, 2, 68, 78, 70, 83, 1, 78, 75, 72, + 92, 93, 93, 95, 22, 64, 7, 80, 64, 78, 71, 72, + 72, 86, 78, 90, 15, 72, 0, 85, 80, 67, 71, 2, + 8, 3, 1, 14, 11, 75, 65, 1, 65, 2, 74, 12, 64, + 10, 19, 6, 17, 17, 10, 73, 11, 1, 69, 1, 87, + 84, 72, 77, 67, 72, 69, 8, 67, 70, 0, 4, 71, + 72, 64, 91, 87, 72, 95, 67, 75, 8, 65, 73, 18, + 1, 65, 75, 19, 73, 83, 1, 93, 20, 27, 24, 22, + 27, 21, 8, 24, 24, 69, 6, 11, 4, 1, 73, 4, 64, + 66, 2, 67, 74, 1, 67, 88, 74, 73, 81, 76, 89, + 35, 41, 33, 34, 33, 27, 23, 27, 19, 12, 15, 6, + 68, 69, 91, 15, 12, 69, 12, 4, 3, 65, 70, 73, + 76, 79, 92, 73, 85, 98, 89, 100, 65, 32, 23, + 15, 5, 5, 64, 68, 72, 74, 6, 46, 30, 23, 17, + 28, 10, 4, 66, 67, 70, 36, 22, 12, 6, 12, 2, + 67, 71, 79, 77, 16, 4, 1, 66, 1, 72, 77, 82, + 0, 40, 23, 14, 7, 16, 3, 64, 66, 71, 62, 82, + 73, 64, 66, 69, 1, 7, 8, 65, 7, 13, 14, 77, + 71, 73, 1, 14, 0, 65, 7, 13, 18, 72, 64, 12, + 14, 80, 76, 80, 82, 77, 67, 75, 70, 72, 68, + 64, 73, 71, 70, 65, 76, 74, 81, 86, 69, 98, + 76, 64, 70, 8, 69, 65, 16, 67, 69, 66, 12, 74, + 73, 91, 30, 34, 35, 28, 18, 29, 25, 20, 19, + 17, 16, 12, 69, 70, 77, 3, 4, 67, 83, 68, 69, + 75, 75, 75, 73, 74, 88, 85, 87, 33, 32, 31, + 26, 16, 20, 17, 6, 11, 5, 4, 70, 72, 76, 81, + 14, 16, 69, 14, 15, 5, 64, 64, 66, 69, 68, 73, + 81, 83, 87, 91, 100, 77, 87, 95, 89, 75, 81, + 65, 64, 0, 40, 10, 9, 68, 19, 51, 37, 57, 9, + 35, 29, 6, 74, 82, 87, 113, 113, 116, 69, 42, + 28, 19, 9, 12, 2, 65, 68, 75, 80, 73, 1, 68, + 8, 9, 1, 3, 9, 12, 13, 7, 16, 15, 1, 35, 31, + 21, 14, 8, 2, 83, 88, 92 }, + + { + + 41, + 6, 79, 41, 6, 79, 74, 3, 22, 11, 64, 76, 18, + 24, 53, 14, 15, 66, 10, 19, 0, 71, 65, 81, 97, + 4, 9, 115, 120, 120, 23, 66, 68, 10, 19, 0, + 74, 0, 14, 2, 0, 69, 75, 66, 80, 76, 91, 5, + 70, 75, 0, 76, 75, 88, 10, 64, 66, 70, 5, 4, + 22, 0, 0, 0, 66, 93, 97, 5, 65, 67, 52, 7, 69, + 95, 77, 82, 3, 67, 76, 68, 82, 2, 76, 74, 70, + 91, 92, 92, 94, 23, 64, 8, 79, 64, 78, 71, 70, + 72, 85, 77, 89, 15, 72, 1, 83, 79, 66, 71, 2, + 8, 3, 2, 15, 12, 76, 65, 1, 65, 2, 74, 12, 65, + 10, 19, 5, 17, 17, 10, 75, 11, 0, 71, 0, 88, + 83, 71, 76, 66, 71, 69, 9, 67, 69, 1, 5, 71, + 72, 64, 92, 87, 71, 95, 67, 75, 9, 65, 74, 19, + 1, 65, 76, 20, 73, 84, 1, 94, 18, 26, 23, 21, + 26, 20, 8, 23, 23, 70, 5, 11, 4, 1, 74, 3, 65, + 67, 1, 68, 75, 0, 67, 90, 75, 73, 82, 77, 89, + 34, 39, 32, 32, 31, 25, 21, 25, 16, 9, 13, 3, + 70, 72, 93, 13, 10, 70, 10, 2, 0, 68, 72, 76, + 78, 81, 95, 74, 87, 99, 90, 100, 65, 33, 23, + 15, 5, 5, 64, 68, 72, 74, 6, 46, 30, 23, 17, + 29, 10, 4, 65, 67, 69, 37, 22, 13, 6, 13, 2, + 66, 71, 78, 76, 17, 5, 2, 66, 2, 71, 76, 81, + 0, 40, 23, 14, 7, 17, 3, 0, 65, 70, 62, 81, + 72, 64, 66, 69, 1, 7, 8, 65, 7, 13, 14, 77, + 71, 73, 1, 15, 64, 66, 6, 12, 18, 73, 65, 12, + 14, 80, 77, 80, 81, 76, 66, 74, 70, 71, 68, 0, + 73, 71, 70, 64, 76, 74, 81, 85, 68, 98, 76, + 64, 70, 9, 69, 65, 17, 67, 69, 66, 13, 75, 73, + 92, 29, 33, 34, 27, 17, 28, 24, 19, 18, 16, + 15, 11, 70, 71, 78, 1, 3, 68, 85, 70, 70, 76, + 75, 76, 73, 74, 88, 85, 86, 31, 30, 29, 24, + 13, 18, 15, 3, 9, 3, 2, 73, 74, 78, 82, 12, + 15, 71, 12, 13, 3, 66, 66, 68, 71, 70, 75, 84, + 85, 89, 93, 101, 79, 89, 96, 88, 74, 80, 64, + 0, 1, 41, 11, 10, 68, 20, 52, 39, 58, 10, 33, + 28, 3, 77, 85, 90, 116, 115, 117, 69, 42, 28, + 19, 9, 12, 2, 65, 68, 75, 79, 72, 2, 67, 9, 9, + 1, 4, 9, 12, 13, 7, 16, 15, 1, 34, 29, 19, 12, + 6, 0, 85, 90, 93 }, + + { + + 40, + 6, 79, 40, 6, 79, 72, 4, 22, 11, 64, 77, 17, + 23, 53, 14, 17, 66, 11, 20, 0, 72, 65, 82, 98, + 3, 7, 117, 121, 121, 26, 65, 68, 11, 20, 0, + 74, 1, 14, 1, 0, 69, 74, 66, 80, 76, 91, 5, + 70, 74, 0, 76, 75, 88, 10, 64, 66, 70, 5, 4, + 22, 0, 0, 0, 65, 93, 97, 5, 66, 67, 52, 7, 69, + 94, 76, 82, 3, 66, 74, 66, 80, 3, 74, 73, 68, + 90, 91, 91, 93, 24, 64, 9, 78, 64, 77, 70, 68, + 72, 85, 77, 88, 15, 72, 1, 82, 79, 65, 71, 2, + 8, 3, 3, 15, 12, 76, 65, 1, 65, 2, 74, 12, 65, + 9, 19, 4, 17, 17, 10, 77, 11, 0, 73, 64, 89, + 82, 70, 75, 65, 70, 68, 10, 66, 69, 2, 6, 71, + 72, 64, 93, 86, 70, 95, 67, 75, 10, 65, 75, + 20, 1, 65, 77, 21, 74, 85, 1, 95, 17, 25, 23, + 20, 25, 19, 7, 22, 23, 71, 4, 11, 4, 1, 74, 2, + 66, 68, 0, 69, 76, 64, 68, 92, 76, 74, 83, 78, + 89, 32, 38, 30, 30, 29, 23, 19, 23, 14, 7, 11, + 1, 73, 74, 96, 11, 8, 72, 8, 0, 65, 70, 74, + 78, 81, 83, 97, 75, 89, 100, 91, 100, 65, 33, + 23, 15, 5, 5, 64, 68, 72, 73, 6, 46, 30, 23, + 17, 30, 11, 5, 65, 66, 68, 38, 23, 13, 6, 14, + 3, 66, 70, 77, 75, 18, 6, 2, 66, 3, 70, 75, + 80, 1, 40, 23, 14, 7, 18, 4, 1, 64, 69, 62, + 81, 72, 0, 66, 69, 1, 7, 8, 65, 7, 13, 15, 77, + 71, 73, 0, 15, 65, 67, 5, 11, 18, 74, 66, 12, + 14, 81, 78, 81, 80, 75, 65, 74, 69, 71, 67, 1, + 73, 71, 70, 64, 77, 74, 81, 85, 67, 98, 76, + 64, 70, 10, 69, 65, 18, 67, 69, 66, 14, 76, + 73, 93, 28, 33, 34, 26, 16, 27, 23, 18, 17, + 15, 14, 10, 71, 72, 79, 0, 2, 70, 87, 71, 71, + 77, 76, 76, 73, 74, 88, 86, 85, 29, 28, 27, + 22, 10, 16, 13, 1, 7, 1, 0, 75, 76, 79, 83, + 10, 13, 72, 10, 11, 1, 68, 68, 70, 73, 72, 77, + 86, 87, 91, 95, 102, 81, 91, 98, 87, 73, 79, + 0, 1, 2, 43, 12, 11, 68, 21, 54, 40, 60, 11, + 32, 26, 1, 80, 88, 93, 119, 117, 118, 69, 42, + 28, 19, 9, 12, 2, 65, 68, 74, 78, 71, 3, 66, + 10, 10, 2, 5, 10, 13, 14, 7, 17, 15, 1, 33, + 28, 17, 10, 4, 65, 87, 92, 94 }, + + { + + 38, + 5, 80, 38, 5, 80, 71, 5, 22, 11, 65, 79, 15, + 21, 52, 14, 19, 67, 11, 20, 64, 73, 65, 84, + 100, 2, 5, 119, 122, 122, 28, 64, 69, 11, 20, + 64, 74, 2, 14, 0, 0, 69, 74, 67, 81, 76, 92, + 5, 70, 74, 64, 77, 75, 88, 10, 64, 66, 70, 5, + 3, 22, 0, 0, 0, 65, 94, 97, 5, 67, 68, 52, 6, + 69, 93, 76, 82, 3, 65, 73, 65, 79, 4, 73, 72, + 67, 89, 91, 90, 93, 24, 64, 9, 78, 64, 77, 70, + 67, 72, 85, 77, 87, 15, 72, 1, 81, 79, 65, 71, + 2, 8, 3, 3, 15, 12, 77, 66, 1, 66, 2, 74, 11, + 66, 8, 19, 3, 16, 17, 9, 79, 10, 64, 75, 65, + 90, 82, 70, 75, 65, 70, 68, 11, 66, 69, 2, 7, + 72, 72, 64, 95, 86, 70, 95, 67, 76, 10, 65, + 76, 20, 1, 65, 79, 21, 75, 86, 1, 96, 15, 24, + 22, 19, 24, 18, 6, 21, 22, 73, 3, 10, 3, 1, + 75, 1, 67, 69, 64, 70, 77, 65, 69, 94, 78, 75, + 85, 80, 89, 30, 36, 28, 28, 27, 20, 16, 20, + 11, 4, 8, 65, 76, 77, 99, 9, 6, 74, 5, 66, 68, + 73, 77, 81, 84, 86, 100, 76, 91, 102, 92, 101, + 65, 33, 23, 15, 5, 5, 65, 68, 72, 73, 6, 46, + 30, 23, 17, 31, 11, 5, 65, 66, 68, 38, 23, 13, + 6, 14, 3, 66, 70, 76, 75, 19, 6, 2, 66, 3, 70, + 75, 79, 1, 40, 23, 14, 7, 18, 4, 1, 64, 69, + 62, 81, 72, 0, 66, 69, 1, 7, 8, 65, 7, 13, 15, + 77, 71, 74, 64, 15, 66, 68, 4, 10, 17, 76, 68, + 11, 13, 82, 80, 82, 80, 75, 64, 74, 69, 71, + 67, 1, 74, 71, 70, 64, 78, 74, 81, 85, 67, 99, + 76, 64, 70, 10, 70, 65, 18, 68, 70, 66, 14, + 77, 74, 94, 27, 32, 33, 25, 14, 26, 22, 17, + 16, 14, 13, 9, 72, 74, 81, 65, 0, 72, 89, 73, + 73, 78, 77, 77, 73, 74, 88, 87, 85, 26, 25, + 25, 19, 7, 13, 10, 65, 4, 65, 66, 78, 79, 81, + 84, 8, 11, 74, 8, 8, 65, 71, 71, 73, 75, 75, + 80, 89, 89, 94, 97, 104, 83, 93, 100, 86, 72, + 78, 0, 1, 2, 44, 12, 11, 68, 21, 55, 41, 61, + 11, 30, 24, 65, 84, 91, 96, 122, 120, 120, 69, + 42, 27, 18, 9, 12, 2, 66, 68, 74, 78, 71, 3, + 66, 11, 10, 2, 5, 10, 13, 14, 7, 17, 15, 0, + 31, 26, 15, 8, 2, 67, 90, 94, 95 }, + + { + + 37, + 5, 80, 37, 5, 80, 69, 7, 23, 12, 65, 80, 14, + 20, 52, 14, 22, 67, 12, 21, 64, 73, 64, 85, + 101, 2, 4, 120, 123, 122, 31, 1, 69, 12, 21, + 64, 73, 4, 15, 0, 1, 68, 73, 67, 81, 75, 92, + 5, 69, 73, 64, 77, 74, 88, 11, 0, 65, 69, 6, + 3, 22, 0, 0, 0, 64, 94, 97, 6, 67, 68, 52, 6, + 69, 91, 75, 82, 4, 0, 71, 0, 77, 6, 71, 70, + 65, 87, 90, 89, 92, 25, 0, 10, 77, 0, 76, 69, + 65, 71, 84, 76, 85, 16, 71, 2, 79, 78, 64, 70, + 3, 9, 4, 4, 16, 13, 77, 66, 2, 66, 2, 73, 11, + 66, 8, 19, 3, 16, 17, 9, 80, 10, 64, 76, 66, + 90, 81, 69, 74, 64, 69, 67, 12, 65, 68, 3, 9, + 72, 72, 0, 96, 85, 69, 95, 66, 76, 11, 64, 76, + 21, 1, 65, 80, 22, 75, 87, 1, 96, 14, 24, 22, + 19, 24, 18, 6, 21, 22, 74, 3, 10, 3, 1, 75, 1, + 67, 69, 64, 70, 78, 65, 69, 95, 79, 75, 86, + 81, 89, 29, 35, 27, 27, 26, 18, 14, 18, 9, 2, + 6, 67, 78, 79, 101, 8, 5, 75, 3, 68, 70, 75, + 79, 83, 86, 88, 102, 76, 92, 103, 92, 101, 64, + 34, 24, 16, 6, 5, 65, 67, 71, 72, 7, 47, 31, + 24, 17, 32, 12, 6, 64, 65, 67, 39, 24, 14, 7, + 15, 4, 65, 69, 74, 74, 21, 7, 3, 65, 4, 69, + 74, 77, 2, 41, 24, 15, 8, 19, 5, 2, 0, 68, 62, + 80, 71, 1, 66, 68, 1, 8, 9, 64, 8, 14, 16, 76, + 71, 74, 64, 16, 66, 68, 3, 10, 17, 77, 69, 11, + 13, 82, 81, 82, 79, 74, 0, 73, 68, 70, 66, 2, + 74, 70, 69, 0, 78, 73, 80, 84, 66, 99, 76, 0, + 70, 11, 70, 65, 19, 68, 70, 65, 15, 77, 74, + 94, 27, 32, 33, 25, 13, 26, 22, 17, 16, 14, + 13, 9, 72, 75, 82, 66, 64, 73, 90, 74, 74, 78, + 77, 77, 72, 73, 87, 87, 84, 24, 23, 23, 17, 5, + 11, 8, 67, 2, 67, 68, 80, 81, 82, 84, 7, 10, + 75, 7, 6, 67, 73, 73, 75, 77, 77, 82, 91, 90, + 96, 98, 105, 84, 94, 101, 84, 70, 76, 1, 2, 3, + 46, 13, 12, 68, 22, 57, 43, 62, 12, 29, 23, + 67, 87, 93, 98, 124, 122, 121, 68, 43, 27, 18, + 9, 13, 2, 66, 68, 73, 77, 70, 4, 65, 13, 11, + 3, 6, 11, 14, 15, 8, 18, 16, 0, 30, 25, 14, 7, + 1, 68, 92, 95, 95 }, + + { + + 36, + 5, 80, 36, 5, 80, 67, 8, 23, 12, 65, 81, 13, + 19, 52, 14, 24, 67, 13, 22, 64, 74, 64, 86, + 102, 1, 2, 122, 124, 123, 34, 2, 69, 13, 22, + 64, 73, 5, 15, 64, 1, 68, 72, 67, 81, 75, 92, + 5, 69, 72, 64, 77, 74, 88, 11, 0, 65, 69, 6, + 3, 22, 0, 0, 0, 0, 94, 97, 6, 68, 68, 52, 6, + 69, 90, 74, 82, 4, 1, 69, 2, 76, 7, 69, 69, 0, + 86, 89, 88, 91, 26, 0, 11, 76, 0, 76, 68, 0, + 71, 84, 76, 84, 16, 71, 2, 78, 78, 0, 70, 3, + 9, 4, 5, 16, 13, 78, 66, 2, 66, 2, 73, 11, 66, + 7, 19, 2, 16, 17, 9, 82, 10, 64, 78, 67, 91, + 80, 68, 73, 0, 68, 66, 13, 64, 68, 4, 10, 72, + 72, 0, 97, 85, 68, 95, 66, 76, 12, 64, 77, 22, + 1, 65, 81, 23, 76, 88, 1, 97, 12, 23, 21, 18, + 23, 17, 5, 20, 22, 75, 2, 10, 3, 1, 75, 0, 68, + 70, 65, 71, 79, 66, 70, 97, 80, 76, 87, 82, + 89, 27, 34, 25, 25, 24, 16, 12, 16, 6, 0, 4, + 70, 81, 81, 104, 6, 3, 77, 1, 70, 72, 77, 81, + 85, 89, 90, 104, 77, 94, 104, 93, 101, 64, 34, + 24, 16, 6, 5, 65, 67, 71, 71, 7, 47, 31, 24, + 17, 33, 12, 7, 64, 64, 66, 40, 25, 14, 7, 16, + 4, 65, 68, 73, 73, 22, 8, 3, 65, 5, 68, 73, + 76, 2, 41, 24, 15, 8, 20, 6, 3, 1, 67, 62, 80, + 71, 1, 66, 68, 1, 8, 9, 64, 8, 14, 17, 76, 71, + 74, 65, 16, 67, 69, 2, 9, 17, 78, 70, 11, 13, + 83, 82, 83, 78, 73, 1, 73, 68, 70, 65, 3, 74, + 70, 69, 0, 79, 73, 80, 84, 65, 99, 76, 0, 70, + 12, 70, 65, 20, 68, 70, 65, 16, 78, 74, 95, + 26, 32, 33, 24, 12, 25, 21, 16, 15, 13, 12, 8, + 73, 76, 83, 68, 65, 75, 92, 75, 75, 79, 78, + 77, 72, 73, 87, 88, 83, 22, 21, 21, 15, 2, 9, + 6, 70, 0, 69, 70, 82, 83, 83, 85, 5, 8, 76, 5, + 4, 69, 75, 75, 77, 79, 79, 84, 93, 92, 98, + 100, 106, 86, 96, 103, 83, 69, 75, 2, 3, 4, + 48, 14, 13, 68, 23, 58, 44, 62, 13, 28, 21, + 70, 90, 96, 101, 126, 124, 122, 68, 43, 27, + 18, 9, 13, 2, 66, 68, 72, 76, 69, 5, 64, 14, + 12, 4, 7, 12, 15, 16, 8, 19, 16, 0, 29, 23, + 12, 5, 64, 70, 94, 97, 96 }, + + { + + 35, + 5, 80, 35, 5, 80, 65, 10, 24, 12, 66, 83, 11, + 18, 52, 14, 26, 67, 14, 23, 64, 75, 64, 87, + 103, 1, 0, 123, 125, 123, 37, 3, 69, 14, 23, + 64, 72, 6, 16, 64, 1, 67, 71, 68, 82, 75, 92, + 5, 69, 72, 64, 77, 74, 88, 12, 0, 65, 69, 7, + 3, 22, 0, 0, 0, 0, 94, 97, 7, 69, 68, 52, 6, + 69, 89, 73, 82, 5, 2, 67, 4, 74, 8, 67, 68, 2, + 85, 88, 87, 90, 27, 0, 12, 75, 0, 75, 68, 2, + 71, 83, 75, 83, 16, 71, 3, 76, 77, 1, 70, 3, + 9, 4, 6, 17, 14, 78, 66, 2, 66, 2, 73, 11, 67, + 7, 19, 1, 16, 17, 9, 84, 10, 65, 80, 68, 92, + 79, 67, 72, 1, 67, 66, 14, 64, 67, 5, 11, 72, + 72, 0, 98, 84, 67, 95, 66, 76, 13, 64, 78, 23, + 1, 65, 82, 24, 76, 89, 1, 98, 11, 22, 21, 17, + 22, 16, 5, 19, 21, 76, 1, 10, 3, 1, 76, 64, + 69, 71, 66, 72, 80, 67, 70, 99, 81, 76, 88, + 83, 89, 26, 32, 24, 23, 22, 14, 10, 14, 4, 66, + 2, 72, 83, 84, 106, 4, 1, 78, 64, 72, 75, 80, + 83, 88, 91, 92, 107, 78, 96, 105, 94, 101, 64, + 35, 24, 16, 6, 5, 65, 67, 71, 71, 7, 47, 31, + 24, 17, 34, 13, 7, 0, 64, 65, 41, 25, 15, 7, + 17, 5, 64, 68, 72, 72, 23, 9, 4, 65, 6, 67, + 72, 75, 3, 41, 24, 15, 8, 21, 6, 4, 2, 66, 62, + 79, 70, 2, 66, 68, 1, 8, 9, 64, 8, 14, 17, 76, + 71, 74, 65, 17, 68, 70, 1, 8, 17, 79, 71, 11, + 13, 83, 83, 83, 77, 72, 2, 72, 67, 69, 65, 4, + 74, 70, 69, 1, 79, 73, 80, 83, 64, 99, 76, 0, + 70, 13, 70, 65, 21, 68, 70, 65, 17, 79, 74, + 96, 25, 31, 32, 23, 11, 24, 20, 15, 14, 12, + 11, 7, 74, 77, 84, 69, 66, 76, 94, 77, 76, 80, + 78, 78, 72, 73, 87, 88, 82, 20, 19, 19, 13, + 64, 7, 4, 72, 65, 71, 72, 85, 85, 85, 86, 3, + 7, 78, 3, 2, 71, 77, 77, 79, 81, 81, 86, 96, + 94, 100, 102, 107, 88, 98, 104, 82, 68, 74, 3, + 4, 5, 49, 15, 14, 68, 24, 60, 46, 62, 14, 26, + 20, 72, 93, 99, 104, 126, 126, 123, 68, 43, + 27, 18, 9, 13, 2, 66, 68, 72, 75, 68, 6, 0, + 15, 12, 4, 8, 12, 15, 16, 8, 19, 16, 0, 28, + 22, 10, 3, 66, 72, 96, 99, 97 }, + + { + + 33, + 5, 80, 33, 5, 80, 64, 11, 24, 12, 66, 84, 10, + 16, 52, 14, 29, 68, 15, 23, 64, 76, 64, 88, + 104, 0, 65, 125, 126, 124, 40, 4, 69, 15, 23, + 64, 72, 7, 16, 65, 1, 67, 71, 68, 82, 74, 92, + 5, 69, 71, 65, 78, 74, 88, 12, 1, 65, 68, 7, + 3, 22, 0, 0, 0, 1, 95, 97, 7, 70, 68, 52, 6, + 69, 88, 72, 82, 5, 4, 66, 6, 73, 9, 66, 66, 4, + 84, 88, 86, 90, 27, 0, 13, 74, 0, 75, 67, 4, + 71, 83, 75, 82, 16, 70, 3, 75, 77, 1, 70, 4, + 9, 4, 6, 17, 14, 79, 67, 2, 66, 2, 73, 11, 67, + 6, 19, 1, 16, 17, 8, 86, 10, 65, 82, 69, 93, + 78, 66, 71, 2, 67, 65, 15, 0, 67, 6, 12, 72, + 72, 1, 99, 84, 66, 95, 66, 76, 13, 64, 79, 24, + 1, 65, 84, 25, 77, 90, 1, 99, 9, 21, 20, 16, + 21, 15, 4, 18, 21, 78, 0, 9, 3, 1, 76, 65, 69, + 72, 66, 73, 81, 68, 71, 101, 82, 77, 89, 84, + 89, 24, 31, 22, 21, 20, 12, 7, 12, 1, 68, 0, + 75, 86, 86, 109, 2, 0, 80, 67, 74, 77, 82, 86, + 90, 94, 94, 109, 79, 97, 107, 95, 101, 64, 35, + 25, 16, 6, 5, 65, 67, 71, 70, 8, 48, 31, 24, + 17, 35, 13, 8, 0, 0, 65, 42, 26, 15, 7, 17, 5, + 64, 67, 71, 72, 24, 9, 4, 65, 7, 66, 72, 74, + 3, 42, 24, 15, 8, 22, 7, 4, 3, 65, 62, 79, 70, + 2, 66, 68, 1, 8, 9, 0, 8, 14, 18, 76, 71, 74, + 66, 17, 69, 71, 0, 7, 17, 81, 72, 10, 13, 84, + 84, 84, 77, 72, 3, 72, 67, 69, 64, 5, 75, 70, + 69, 1, 80, 73, 80, 83, 0, 99, 76, 1, 70, 13, + 70, 65, 22, 68, 71, 65, 18, 79, 75, 97, 24, + 31, 32, 22, 10, 23, 19, 14, 13, 11, 10, 6, 75, + 78, 85, 71, 68, 78, 96, 78, 77, 81, 79, 78, + 72, 73, 87, 89, 82, 17, 16, 17, 10, 67, 5, 2, + 75, 67, 73, 74, 87, 87, 86, 87, 1, 5, 79, 1, + 0, 73, 79, 80, 81, 83, 83, 88, 98, 96, 102, + 104, 108, 90, 100, 106, 81, 67, 73, 3, 5, 6, + 51, 16, 15, 68, 25, 61, 47, 62, 14, 25, 18, + 75, 96, 102, 107, 126, 126, 124, 68, 43, 27, + 18, 9, 13, 2, 66, 68, 71, 74, 67, 7, 0, 16, + 13, 5, 9, 13, 16, 17, 9, 20, 17, 0, 26, 20, 8, + 1, 68, 74, 98, 101, 98 }, + + { + + 32, + 5, 80, 32, 5, 80, 1, 13, 24, 12, 67, 86, 8, + 15, 52, 14, 31, 68, 16, 24, 64, 76, 0, 89, + 105, 0, 67, 126, 126, 124, 43, 6, 69, 16, 24, + 64, 72, 8, 16, 65, 1, 67, 70, 68, 83, 74, 92, + 5, 69, 70, 65, 78, 74, 88, 13, 1, 64, 68, 8, + 3, 22, 0, 0, 0, 1, 95, 97, 8, 71, 68, 52, 6, + 69, 87, 71, 82, 6, 5, 64, 8, 71, 10, 64, 65, + 6, 83, 87, 85, 89, 28, 0, 14, 73, 0, 74, 66, + 6, 71, 83, 74, 80, 17, 70, 4, 74, 76, 2, 70, + 4, 10, 5, 7, 18, 15, 79, 67, 2, 66, 2, 73, 11, + 68, 5, 19, 0, 16, 17, 8, 88, 10, 66, 83, 70, + 94, 77, 65, 70, 3, 66, 64, 16, 1, 66, 7, 13, + 72, 72, 1, 100, 83, 65, 95, 65, 76, 14, 64, + 80, 25, 1, 65, 85, 26, 77, 91, 1, 99, 8, 20, + 20, 15, 20, 14, 3, 18, 21, 79, 64, 9, 3, 1, + 77, 65, 70, 73, 67, 74, 82, 68, 72, 103, 83, + 78, 90, 85, 89, 22, 30, 20, 19, 19, 10, 5, 10, + 64, 70, 65, 77, 88, 88, 111, 0, 65, 82, 69, + 76, 79, 84, 88, 92, 97, 96, 112, 80, 99, 108, + 95, 101, 64, 35, 25, 16, 6, 5, 65, 67, 71, 70, + 8, 48, 31, 24, 17, 36, 14, 9, 0, 1, 64, 43, + 27, 15, 8, 18, 6, 0, 67, 70, 71, 26, 10, 4, + 64, 8, 65, 71, 73, 4, 42, 25, 15, 8, 23, 8, 5, + 4, 64, 62, 79, 70, 3, 66, 68, 1, 8, 9, 0, 8, + 15, 18, 76, 71, 74, 67, 18, 70, 72, 64, 7, 17, + 82, 73, 10, 13, 85, 85, 84, 76, 71, 4, 71, 66, + 68, 64, 6, 75, 70, 69, 1, 81, 73, 80, 82, 1, + 99, 76, 1, 70, 14, 70, 65, 23, 68, 71, 65, 19, + 80, 75, 97, 23, 31, 31, 22, 9, 22, 18, 13, 13, + 10, 9, 5, 76, 79, 86, 72, 69, 79, 98, 79, 78, + 82, 80, 79, 72, 72, 87, 89, 81, 15, 14, 15, 8, + 69, 3, 0, 77, 69, 75, 76, 89, 89, 88, 88, 64, + 3, 80, 64, 65, 75, 81, 82, 83, 85, 85, 90, + 101, 98, 104, 106, 109, 92, 102, 107, 80, 65, + 72, 4, 6, 7, 53, 17, 16, 68, 26, 62, 49, 62, + 15, 23, 16, 77, 99, 104, 110, 126, 126, 125, + 68, 43, 27, 18, 9, 13, 2, 66, 68, 71, 73, 66, + 8, 1, 17, 14, 5, 10, 14, 17, 18, 9, 20, 17, 0, + 25, 19, 7, 0, 70, 76, 100, 103, 99 }, + + { + + 31, + 5, 81, 31, 5, 81, 3, 14, 25, 12, 67, 87, 7, + 14, 52, 14, 33, 68, 16, 25, 64, 77, 0, 90, + 106, 64, 68, 126, 126, 125, 46, 7, 69, 16, 25, + 64, 71, 9, 17, 66, 2, 66, 69, 69, 83, 74, 92, + 5, 68, 70, 65, 78, 73, 88, 13, 1, 64, 68, 8, + 3, 22, 0, 0, 0, 2, 95, 97, 8, 71, 69, 52, 6, + 69, 86, 71, 82, 6, 6, 1, 10, 70, 11, 1, 64, 8, + 82, 86, 84, 88, 29, 0, 15, 73, 1, 74, 66, 8, + 71, 82, 74, 79, 17, 70, 4, 72, 76, 3, 69, 4, + 10, 5, 8, 18, 15, 80, 67, 2, 66, 2, 73, 11, + 68, 5, 19, 64, 16, 17, 8, 90, 10, 66, 85, 71, + 95, 77, 64, 70, 3, 65, 64, 17, 1, 66, 7, 15, + 72, 72, 1, 101, 83, 64, 95, 65, 76, 15, 0, 81, + 25, 1, 65, 86, 27, 78, 92, 1, 100, 6, 19, 19, + 15, 19, 14, 3, 17, 20, 80, 65, 9, 3, 1, 77, + 66, 71, 74, 68, 74, 83, 69, 72, 105, 84, 78, + 91, 86, 89, 21, 28, 19, 17, 17, 8, 3, 8, 67, + 73, 67, 80, 91, 91, 114, 65, 67, 83, 71, 79, + 82, 87, 90, 95, 99, 99, 114, 81, 101, 109, 96, + 101, 0, 36, 25, 17, 6, 5, 65, 67, 71, 69, 8, + 48, 32, 24, 17, 37, 14, 9, 1, 1, 0, 44, 27, + 16, 8, 19, 6, 0, 66, 69, 70, 27, 11, 5, 64, 8, + 65, 70, 71, 4, 42, 25, 15, 9, 23, 8, 6, 4, 0, + 62, 78, 69, 3, 66, 68, 1, 8, 9, 0, 9, 15, 19, + 76, 71, 75, 67, 18, 71, 73, 65, 6, 17, 83, 74, + 10, 13, 85, 86, 85, 75, 70, 5, 71, 66, 68, 0, + 6, 75, 69, 68, 2, 81, 73, 80, 82, 2, 99, 76, + 1, 70, 15, 70, 65, 24, 69, 71, 65, 19, 81, 75, + 98, 22, 30, 31, 21, 8, 22, 18, 12, 12, 10, 8, + 4, 77, 80, 87, 74, 70, 81, 100, 81, 80, 83, + 80, 79, 72, 72, 86, 90, 80, 13, 12, 13, 6, 72, + 1, 65, 80, 71, 78, 78, 92, 91, 89, 88, 65, 2, + 82, 66, 68, 77, 83, 84, 85, 87, 88, 92, 103, + 100, 106, 107, 111, 94, 104, 109, 79, 64, 71, + 5, 7, 8, 54, 18, 17, 68, 27, 62, 50, 62, 16, + 22, 15, 80, 102, 107, 112, 126, 126, 126, 67, + 43, 27, 18, 9, 13, 2, 66, 68, 70, 72, 66, 9, + 2, 18, 14, 6, 11, 14, 17, 18, 9, 21, 17, 0, + 24, 17, 5, 65, 71, 77, 103, 104, 100 }, + + { + + 30, + 5, 81, 30, 5, 81, 5, 16, 25, 12, 68, 89, 5, + 12, 52, 14, 36, 69, 17, 25, 64, 78, 0, 91, + 107, 64, 70, 126, 126, 125, 49, 8, 69, 17, 25, + 64, 71, 10, 17, 66, 2, 66, 69, 69, 84, 73, 92, + 5, 68, 69, 66, 78, 73, 88, 14, 2, 64, 67, 9, + 3, 22, 0, 0, 0, 2, 95, 97, 9, 72, 69, 52, 6, + 69, 85, 70, 82, 7, 8, 2, 12, 68, 12, 2, 1, 10, + 81, 85, 83, 88, 30, 0, 16, 72, 1, 73, 65, 10, + 71, 82, 73, 78, 17, 69, 5, 71, 75, 3, 69, 5, + 10, 5, 9, 19, 16, 80, 68, 2, 66, 2, 73, 11, + 69, 4, 19, 64, 16, 17, 7, 92, 10, 67, 87, 72, + 96, 76, 0, 69, 4, 64, 0, 18, 2, 65, 8, 16, 72, + 72, 2, 102, 82, 0, 95, 65, 76, 15, 0, 82, 26, + 1, 65, 88, 28, 78, 93, 1, 101, 5, 18, 19, 14, + 18, 13, 2, 16, 20, 81, 66, 9, 3, 1, 78, 67, + 71, 75, 68, 75, 84, 70, 73, 107, 85, 79, 92, + 87, 89, 19, 27, 17, 15, 15, 6, 0, 6, 69, 75, + 69, 82, 93, 93, 116, 67, 68, 85, 74, 81, 84, + 89, 93, 97, 102, 101, 117, 82, 102, 111, 97, + 101, 0, 36, 26, 17, 6, 5, 65, 67, 71, 69, 9, + 49, 32, 24, 17, 38, 15, 10, 1, 2, 1, 45, 28, + 16, 8, 20, 7, 1, 66, 68, 70, 28, 11, 5, 64, 9, + 64, 70, 70, 5, 43, 25, 15, 9, 24, 9, 7, 5, 1, + 62, 78, 69, 4, 66, 68, 1, 8, 9, 1, 9, 15, 19, + 76, 71, 75, 68, 19, 72, 74, 66, 5, 17, 84, 75, + 9, 13, 86, 87, 85, 74, 70, 6, 70, 65, 67, 0, + 7, 75, 69, 68, 2, 82, 73, 80, 81, 3, 99, 76, + 2, 70, 15, 70, 65, 25, 69, 71, 65, 20, 81, 76, + 99, 21, 30, 30, 20, 7, 21, 17, 11, 11, 9, 7, + 3, 78, 81, 88, 75, 72, 82, 102, 82, 81, 84, + 81, 80, 72, 72, 86, 90, 79, 11, 10, 11, 3, 75, + 64, 67, 82, 73, 80, 80, 94, 93, 91, 89, 67, 0, + 83, 68, 70, 79, 85, 86, 87, 89, 90, 94, 106, + 102, 108, 109, 112, 96, 106, 110, 78, 0, 70, + 5, 8, 9, 56, 19, 18, 68, 28, 62, 52, 62, 16, + 20, 13, 82, 105, 110, 115, 126, 126, 126, 67, + 43, 27, 18, 9, 13, 2, 66, 68, 70, 71, 65, 10, + 3, 19, 15, 6, 12, 15, 18, 19, 10, 21, 18, 0, + 22, 16, 3, 67, 73, 79, 105, 106, 101 }, + + { + + 28, + 4, 81, 28, 4, 81, 6, 17, 25, 12, 68, 90, 4, + 11, 52, 14, 38, 69, 18, 26, 64, 79, 0, 92, + 109, 65, 72, 126, 126, 126, 51, 9, 69, 18, 26, + 64, 71, 11, 17, 67, 2, 66, 68, 70, 84, 73, 93, + 5, 68, 69, 66, 79, 73, 88, 14, 2, 64, 67, 9, + 3, 22, 0, 0, 0, 3, 96, 97, 9, 73, 69, 52, 6, + 69, 84, 69, 82, 7, 9, 4, 14, 67, 13, 4, 2, 11, + 80, 85, 82, 87, 30, 0, 17, 71, 1, 73, 65, 11, + 71, 82, 73, 77, 17, 69, 5, 70, 75, 4, 69, 5, + 10, 5, 9, 19, 16, 81, 68, 2, 66, 2, 73, 11, + 69, 3, 19, 65, 16, 17, 7, 94, 10, 67, 89, 73, + 97, 75, 1, 68, 5, 64, 0, 19, 2, 65, 9, 17, 72, + 72, 2, 104, 82, 1, 95, 65, 77, 16, 0, 83, 27, + 1, 65, 89, 29, 79, 94, 1, 102, 3, 17, 18, 13, + 17, 12, 1, 15, 19, 83, 67, 8, 3, 1, 78, 68, + 72, 76, 69, 76, 85, 71, 74, 109, 87, 80, 93, + 88, 89, 17, 25, 15, 13, 13, 4, 65, 4, 72, 78, + 71, 85, 96, 96, 119, 69, 70, 87, 76, 83, 87, + 92, 95, 100, 105, 103, 119, 83, 104, 112, 98, + 102, 0, 36, 26, 17, 6, 5, 65, 67, 71, 68, 9, + 49, 32, 24, 17, 39, 15, 10, 1, 2, 1, 46, 28, + 16, 8, 20, 7, 1, 65, 67, 69, 29, 12, 5, 64, + 10, 0, 69, 69, 5, 43, 25, 15, 9, 25, 9, 7, 6, + 1, 62, 78, 69, 4, 66, 68, 1, 8, 9, 1, 9, 15, + 20, 76, 71, 75, 69, 19, 73, 75, 67, 4, 17, 86, + 77, 9, 13, 87, 88, 86, 74, 69, 7, 70, 65, 67, + 1, 8, 76, 69, 68, 2, 83, 73, 80, 81, 3, 100, + 76, 2, 70, 16, 70, 65, 25, 69, 72, 65, 21, 82, + 76, 100, 20, 29, 30, 19, 5, 20, 16, 10, 10, 8, + 6, 2, 79, 82, 89, 77, 73, 84, 104, 84, 82, 85, + 82, 80, 72, 72, 86, 91, 79, 8, 7, 9, 1, 78, + 67, 70, 85, 75, 82, 82, 97, 95, 92, 90, 69, + 65, 85, 70, 72, 82, 88, 89, 90, 91, 92, 97, + 108, 104, 111, 111, 113, 98, 108, 112, 77, 1, + 69, 6, 9, 9, 57, 20, 18, 68, 28, 62, 53, 62, + 17, 19, 11, 85, 108, 113, 118, 126, 126, 126, + 67, 43, 27, 18, 9, 13, 2, 66, 68, 69, 71, 64, + 11, 3, 20, 15, 7, 12, 15, 18, 19, 10, 22, 18, + 64, 21, 14, 1, 69, 75, 81, 107, 108, 102 }, + + { + + 27, + 4, 81, 27, 4, 81, 8, 18, 26, 12, 68, 91, 3, + 10, 52, 14, 40, 69, 19, 27, 64, 79, 1, 93, + 110, 66, 74, 126, 126, 126, 54, 11, 69, 19, + 27, 64, 70, 12, 18, 68, 2, 65, 67, 70, 84, 73, + 93, 5, 68, 68, 66, 79, 73, 88, 14, 2, 0, 67, + 9, 3, 22, 0, 0, 0, 4, 96, 97, 9, 74, 69, 52, + 6, 69, 83, 68, 82, 7, 10, 6, 16, 65, 15, 6, 3, + 13, 79, 84, 81, 86, 31, 1, 18, 70, 1, 72, 64, + 13, 71, 81, 73, 75, 18, 69, 5, 68, 75, 5, 69, + 5, 11, 6, 10, 19, 16, 81, 68, 3, 66, 2, 72, + 11, 69, 3, 19, 66, 16, 17, 7, 96, 10, 67, 90, + 74, 97, 74, 2, 67, 6, 0, 1, 20, 3, 65, 10, 18, + 72, 72, 2, 105, 81, 2, 95, 64, 77, 17, 0, 84, + 28, 1, 65, 90, 30, 80, 95, 1, 102, 2, 16, 18, + 12, 16, 11, 1, 15, 19, 84, 68, 8, 3, 1, 78, + 68, 73, 77, 70, 77, 86, 71, 74, 110, 88, 80, + 94, 89, 89, 16, 24, 14, 12, 12, 2, 67, 2, 74, + 80, 73, 87, 99, 98, 122, 70, 72, 88, 78, 85, + 89, 94, 97, 102, 107, 105, 121, 83, 106, 113, + 98, 102, 0, 37, 26, 17, 7, 5, 65, 66, 71, 67, + 9, 49, 32, 25, 17, 40, 16, 11, 2, 3, 2, 47, + 29, 17, 9, 21, 8, 1, 64, 66, 68, 31, 13, 6, 0, + 11, 1, 68, 68, 6, 43, 26, 16, 9, 26, 10, 8, 7, + 2, 62, 77, 68, 5, 66, 68, 1, 9, 10, 1, 9, 16, + 21, 76, 71, 75, 69, 19, 74, 75, 68, 4, 17, 87, + 78, 9, 13, 87, 89, 87, 73, 68, 8, 70, 64, 67, + 2, 9, 76, 69, 68, 3, 83, 73, 80, 81, 4, 100, + 76, 2, 70, 17, 70, 65, 26, 69, 72, 65, 22, 83, + 76, 100, 19, 29, 30, 19, 4, 19, 15, 9, 10, 7, + 5, 2, 79, 83, 90, 78, 74, 86, 106, 85, 83, 85, + 82, 80, 71, 71, 86, 92, 78, 6, 5, 7, 64, 80, + 69, 72, 87, 77, 84, 84, 99, 97, 93, 91, 71, + 66, 86, 72, 74, 84, 90, 91, 92, 93, 94, 99, + 110, 105, 113, 113, 114, 100, 110, 114, 76, 3, + 67, 7, 10, 10, 59, 21, 19, 68, 29, 62, 54, 62, + 18, 18, 10, 87, 111, 115, 121, 126, 126, 126, + 67, 43, 27, 18, 9, 14, 2, 66, 68, 68, 70, 0, + 12, 4, 22, 16, 8, 13, 16, 19, 20, 10, 23, 18, + 64, 20, 13, 0, 70, 77, 83, 109, 110, 102 }, + + { + + 26, + 4, 81, 26, 4, 81, 10, 20, 26, 12, 69, 93, 1, + 8, 52, 14, 43, 70, 20, 27, 64, 80, 1, 94, 111, + 66, 76, 126, 126, 126, 57, 12, 69, 20, 27, 64, + 70, 13, 18, 68, 2, 65, 67, 70, 85, 72, 93, 5, + 68, 67, 67, 79, 73, 88, 15, 3, 0, 66, 10, 3, + 22, 0, 0, 0, 4, 96, 97, 10, 75, 69, 52, 6, 69, + 82, 67, 82, 8, 12, 7, 18, 64, 16, 7, 5, 15, + 78, 83, 80, 86, 32, 1, 19, 69, 1, 72, 0, 15, + 71, 81, 72, 74, 18, 68, 6, 67, 74, 5, 69, 6, + 11, 6, 11, 20, 17, 82, 69, 3, 66, 2, 72, 11, + 70, 2, 19, 66, 16, 17, 6, 98, 10, 68, 92, 75, + 98, 73, 3, 66, 7, 1, 2, 21, 4, 64, 11, 19, 72, + 72, 3, 106, 81, 3, 95, 64, 77, 17, 0, 85, 29, + 1, 65, 92, 31, 80, 96, 1, 103, 0, 15, 17, 11, + 15, 10, 0, 14, 19, 85, 69, 8, 3, 1, 79, 69, + 73, 78, 70, 78, 87, 72, 75, 112, 89, 81, 95, + 90, 89, 14, 23, 12, 10, 10, 0, 70, 0, 77, 82, + 75, 90, 101, 100, 124, 72, 73, 90, 81, 87, 91, + 96, 100, 104, 110, 107, 124, 84, 107, 115, 99, + 102, 0, 37, 27, 17, 7, 5, 65, 66, 71, 67, 10, + 50, 32, 25, 17, 41, 16, 12, 2, 4, 3, 48, 30, + 17, 9, 22, 8, 2, 64, 65, 68, 32, 13, 6, 0, 12, + 2, 68, 67, 6, 44, 26, 16, 9, 27, 11, 9, 8, 3, + 62, 77, 68, 5, 66, 68, 1, 9, 10, 2, 9, 16, 21, + 76, 71, 75, 70, 20, 75, 76, 69, 3, 17, 88, 79, + 8, 13, 88, 90, 87, 72, 68, 9, 69, 64, 66, 2, + 10, 76, 69, 68, 3, 84, 73, 80, 80, 5, 100, 76, + 3, 70, 17, 70, 65, 27, 69, 72, 65, 23, 83, 77, + 101, 18, 29, 29, 18, 3, 18, 14, 8, 9, 6, 4, 1, + 80, 84, 91, 80, 76, 87, 108, 86, 84, 86, 83, + 81, 71, 71, 86, 92, 77, 4, 3, 5, 67, 83, 71, + 74, 90, 79, 86, 86, 101, 99, 95, 92, 73, 68, + 87, 74, 76, 86, 92, 93, 94, 95, 96, 101, 113, + 107, 115, 115, 115, 102, 112, 115, 75, 4, 66, + 7, 11, 11, 61, 22, 20, 68, 30, 62, 56, 62, 18, + 16, 8, 90, 114, 118, 124, 126, 126, 126, 67, + 43, 27, 18, 9, 14, 2, 66, 68, 68, 69, 1, 13, + 5, 23, 17, 8, 14, 17, 20, 21, 11, 23, 19, 64, + 18, 11, 65, 72, 79, 85, 111, 112, 103 }, + + { + + 25, + 4, 82, 25, 4, 82, 12, 21, 27, 12, 69, 94, 0, + 7, 52, 14, 45, 70, 20, 28, 64, 81, 1, 95, 112, + 67, 77, 126, 126, 126, 60, 13, 69, 20, 28, 64, + 69, 14, 19, 69, 3, 64, 66, 71, 85, 72, 93, 5, + 67, 67, 67, 79, 72, 88, 15, 3, 0, 66, 10, 3, + 22, 0, 0, 0, 5, 96, 97, 10, 75, 70, 52, 6, 69, + 81, 67, 82, 8, 13, 9, 20, 1, 17, 9, 6, 17, 77, + 82, 79, 85, 33, 1, 20, 69, 2, 71, 0, 17, 71, + 80, 72, 73, 18, 68, 6, 65, 74, 6, 68, 6, 11, + 6, 12, 20, 17, 82, 69, 3, 66, 2, 72, 11, 70, + 2, 19, 67, 16, 17, 6, 100, 10, 68, 94, 76, 99, + 73, 4, 66, 7, 2, 2, 22, 4, 64, 11, 21, 72, 72, + 3, 107, 80, 4, 95, 64, 77, 18, 1, 86, 29, 1, + 65, 93, 32, 81, 97, 1, 104, 64, 14, 17, 11, + 14, 10, 0, 13, 18, 86, 70, 8, 3, 1, 79, 70, + 74, 79, 71, 78, 88, 73, 75, 114, 90, 81, 96, + 91, 89, 13, 21, 11, 8, 8, 65, 72, 65, 79, 85, + 77, 92, 104, 103, 126, 74, 75, 91, 83, 90, 94, + 99, 102, 107, 112, 110, 126, 85, 109, 116, + 100, 102, 1, 38, 27, 18, 7, 5, 65, 66, 71, 66, + 10, 50, 33, 25, 17, 42, 17, 12, 3, 4, 4, 49, + 30, 18, 9, 23, 9, 2, 0, 64, 67, 33, 14, 7, 0, + 12, 2, 67, 65, 7, 44, 26, 16, 10, 27, 11, 10, + 8, 4, 62, 76, 67, 6, 66, 68, 1, 9, 10, 2, 10, + 16, 22, 76, 71, 76, 70, 20, 76, 77, 70, 2, 17, + 89, 80, 8, 13, 88, 91, 88, 71, 67, 10, 69, 0, + 66, 3, 10, 76, 68, 67, 4, 84, 73, 80, 80, 6, + 100, 76, 3, 70, 18, 70, 65, 28, 70, 72, 65, + 23, 84, 77, 102, 17, 28, 29, 17, 2, 18, 14, 7, + 8, 6, 3, 0, 81, 85, 92, 81, 77, 89, 110, 88, + 86, 87, 83, 81, 71, 71, 85, 93, 76, 2, 1, 3, + 69, 86, 73, 76, 92, 81, 89, 88, 104, 101, 96, + 92, 74, 69, 89, 76, 79, 88, 94, 95, 96, 97, + 99, 103, 115, 109, 117, 116, 117, 104, 114, + 117, 74, 5, 65, 8, 12, 12, 62, 23, 21, 68, 31, + 62, 57, 62, 19, 15, 7, 92, 117, 121, 126, 126, + 126, 126, 66, 43, 27, 18, 9, 14, 2, 66, 68, + 67, 68, 1, 14, 6, 24, 17, 9, 15, 17, 20, 21, + 11, 24, 19, 64, 17, 10, 67, 74, 80, 86, 114, + 113, 104 }, + + { + + 23, + 4, 82, 23, 4, 82, 13, 23, 27, 12, 70, 96, 65, + 6, 52, 14, 47, 70, 21, 29, 64, 82, 1, 96, 113, + 67, 79, 126, 126, 126, 62, 14, 69, 21, 29, 64, + 69, 15, 19, 69, 3, 64, 65, 71, 86, 72, 93, 5, + 67, 66, 67, 80, 72, 88, 16, 3, 0, 66, 11, 3, + 22, 0, 0, 0, 5, 97, 97, 11, 76, 70, 52, 6, 69, + 80, 66, 82, 9, 14, 11, 22, 2, 18, 11, 7, 19, + 76, 82, 78, 84, 33, 1, 21, 68, 2, 71, 1, 19, + 71, 80, 71, 72, 18, 68, 7, 64, 73, 7, 68, 6, + 11, 6, 12, 21, 18, 83, 69, 3, 66, 2, 72, 11, + 71, 1, 19, 68, 16, 17, 6, 102, 10, 69, 96, 77, + 100, 72, 5, 65, 8, 2, 3, 23, 5, 0, 12, 22, 72, + 72, 3, 108, 80, 5, 95, 64, 77, 19, 1, 87, 30, + 1, 65, 94, 33, 81, 98, 1, 105, 66, 13, 16, 10, + 13, 9, 64, 12, 18, 88, 71, 7, 3, 1, 80, 71, + 75, 80, 72, 79, 89, 74, 76, 116, 91, 82, 97, + 92, 89, 11, 20, 9, 6, 6, 67, 74, 67, 82, 87, + 79, 95, 106, 105, 126, 76, 77, 93, 85, 92, 96, + 101, 104, 109, 115, 112, 126, 86, 111, 117, + 101, 102, 1, 38, 27, 18, 7, 5, 65, 66, 71, 66, + 10, 50, 33, 25, 17, 43, 17, 13, 3, 5, 4, 50, + 31, 18, 9, 23, 9, 3, 0, 0, 66, 34, 15, 7, 0, + 13, 3, 66, 64, 7, 44, 26, 16, 10, 28, 12, 10, + 9, 5, 62, 76, 67, 6, 66, 68, 1, 9, 10, 2, 10, + 16, 22, 76, 71, 76, 71, 21, 77, 78, 71, 1, 17, + 91, 81, 8, 13, 89, 92, 88, 71, 66, 11, 68, 0, + 65, 3, 11, 77, 68, 67, 4, 85, 73, 80, 79, 7, + 100, 76, 3, 70, 19, 70, 65, 29, 70, 73, 65, + 24, 85, 77, 103, 16, 28, 28, 16, 1, 17, 13, 6, + 7, 5, 2, 64, 82, 86, 93, 83, 78, 90, 112, 89, + 87, 88, 84, 82, 71, 71, 85, 93, 76, 64, 65, 1, + 71, 89, 75, 78, 95, 83, 91, 90, 106, 103, 98, + 93, 76, 71, 90, 78, 81, 90, 96, 98, 98, 99, + 101, 105, 118, 111, 119, 118, 118, 106, 116, + 118, 73, 6, 64, 9, 13, 13, 62, 24, 22, 68, 32, + 62, 59, 62, 20, 13, 5, 95, 120, 124, 126, 126, + 126, 126, 66, 43, 27, 18, 9, 14, 2, 66, 68, + 67, 67, 2, 15, 6, 25, 18, 9, 16, 18, 21, 22, + 11, 24, 19, 64, 16, 8, 69, 76, 82, 88, 116, + 115, 105 }, + + { + + 22, + 4, 82, 22, 4, 82, 15, 24, 27, 12, 70, 97, 66, + 4, 52, 14, 50, 71, 22, 29, 64, 82, 2, 97, 114, + 68, 81, 126, 126, 126, 62, 16, 69, 22, 29, 64, + 69, 16, 19, 70, 3, 64, 65, 71, 86, 71, 93, 5, + 67, 65, 68, 80, 72, 88, 16, 4, 1, 65, 11, 3, + 22, 0, 0, 0, 6, 97, 97, 11, 77, 70, 52, 6, 69, + 79, 65, 82, 9, 16, 12, 24, 4, 19, 12, 9, 21, + 75, 81, 77, 84, 34, 1, 22, 67, 2, 70, 2, 21, + 71, 80, 71, 70, 19, 67, 7, 0, 73, 7, 68, 7, + 12, 7, 13, 21, 18, 83, 70, 3, 66, 2, 72, 11, + 71, 0, 19, 68, 16, 17, 5, 104, 10, 69, 97, 78, + 101, 71, 6, 64, 9, 3, 4, 24, 6, 0, 13, 23, 72, + 72, 4, 109, 79, 6, 95, 0, 77, 19, 1, 88, 31, + 1, 65, 96, 34, 82, 99, 1, 105, 67, 12, 16, 9, + 12, 8, 65, 12, 18, 89, 72, 7, 3, 1, 80, 71, + 75, 81, 72, 80, 90, 74, 77, 118, 92, 83, 98, + 93, 89, 9, 19, 7, 4, 5, 69, 77, 69, 84, 89, + 81, 97, 109, 107, 126, 78, 78, 95, 88, 94, 98, + 103, 107, 111, 118, 114, 126, 87, 112, 119, + 101, 102, 1, 38, 28, 18, 7, 5, 65, 66, 71, 65, + 11, 51, 33, 25, 17, 44, 18, 14, 3, 6, 5, 51, + 32, 18, 10, 24, 10, 3, 1, 1, 66, 36, 15, 7, 1, + 14, 4, 66, 0, 8, 45, 27, 16, 10, 29, 13, 11, + 10, 6, 62, 76, 67, 7, 66, 68, 1, 9, 10, 3, 10, + 17, 23, 76, 71, 76, 72, 21, 78, 79, 72, 1, 17, + 92, 82, 7, 13, 90, 93, 89, 70, 66, 12, 68, 1, + 65, 4, 12, 77, 68, 67, 4, 86, 73, 80, 79, 8, + 100, 76, 4, 70, 19, 70, 65, 30, 70, 73, 65, + 25, 85, 78, 103, 15, 28, 28, 16, 0, 16, 12, 5, + 7, 4, 1, 65, 83, 87, 94, 84, 80, 92, 114, 90, + 88, 89, 85, 82, 71, 70, 85, 94, 75, 66, 67, + 64, 74, 91, 77, 80, 97, 85, 93, 92, 108, 105, + 99, 94, 78, 73, 91, 80, 83, 92, 98, 100, 100, + 101, 103, 107, 120, 113, 121, 120, 119, 108, + 118, 120, 72, 8, 0, 9, 14, 14, 62, 25, 23, 68, + 33, 62, 60, 62, 20, 12, 3, 97, 123, 126, 126, + 126, 126, 126, 66, 43, 27, 18, 9, 14, 2, 66, + 68, 66, 66, 3, 16, 7, 26, 19, 10, 17, 19, 22, + 23, 12, 25, 20, 64, 14, 7, 70, 77, 84, 90, + 118, 117, 106 }, + + { + + 21, + 4, 82, 21, 4, 82, 17, 26, 28, 12, 71, 99, 68, + 3, 52, 14, 52, 71, 23, 30, 64, 83, 2, 98, 115, + 68, 83, 126, 126, 126, 62, 17, 69, 23, 30, 64, + 68, 17, 20, 70, 3, 0, 64, 72, 87, 71, 93, 5, + 67, 65, 68, 80, 72, 88, 17, 4, 1, 65, 12, 3, + 22, 0, 0, 0, 6, 97, 97, 12, 78, 70, 52, 6, 69, + 78, 64, 82, 10, 17, 14, 26, 5, 20, 14, 10, 23, + 74, 80, 76, 83, 35, 1, 23, 66, 2, 70, 2, 23, + 71, 79, 70, 69, 19, 67, 8, 2, 72, 8, 68, 7, + 12, 7, 14, 22, 19, 84, 70, 3, 66, 2, 72, 11, + 72, 0, 19, 69, 16, 17, 5, 106, 10, 70, 99, 79, + 102, 70, 7, 0, 10, 4, 4, 25, 6, 1, 14, 24, 72, + 72, 4, 110, 79, 7, 95, 0, 77, 20, 1, 89, 32, + 1, 65, 97, 35, 82, 100, 1, 106, 69, 11, 15, 8, + 11, 7, 65, 11, 17, 90, 73, 7, 3, 1, 81, 72, + 76, 82, 73, 81, 91, 75, 77, 120, 93, 83, 99, + 94, 89, 8, 17, 6, 2, 3, 71, 79, 71, 87, 92, + 83, 100, 111, 110, 126, 80, 80, 96, 90, 96, + 101, 106, 109, 114, 120, 116, 126, 88, 114, + 120, 102, 102, 1, 39, 28, 18, 7, 5, 65, 66, + 71, 65, 11, 51, 33, 25, 17, 45, 18, 14, 4, 6, + 6, 52, 32, 19, 10, 25, 10, 4, 1, 2, 65, 37, + 16, 8, 1, 15, 5, 65, 1, 8, 45, 27, 16, 10, 30, + 13, 12, 11, 7, 62, 75, 66, 7, 66, 68, 1, 9, + 10, 3, 10, 17, 23, 76, 71, 76, 72, 22, 79, 80, + 73, 0, 17, 93, 83, 7, 13, 90, 94, 89, 69, 65, + 13, 67, 1, 64, 4, 13, 77, 68, 67, 5, 86, 73, + 80, 78, 9, 100, 76, 4, 70, 20, 70, 65, 31, 70, + 73, 65, 26, 86, 78, 104, 14, 27, 27, 15, 64, + 15, 11, 4, 6, 3, 0, 66, 84, 88, 95, 86, 81, + 93, 116, 92, 89, 90, 85, 83, 71, 70, 85, 94, + 74, 68, 69, 66, 76, 94, 79, 82, 100, 87, 95, + 94, 111, 107, 101, 95, 80, 74, 93, 82, 85, 94, + 100, 102, 102, 103, 105, 109, 123, 115, 123, + 122, 120, 110, 120, 121, 71, 9, 1, 10, 15, 15, + 62, 26, 24, 68, 34, 62, 62, 62, 21, 10, 2, + 100, 126, 126, 126, 126, 126, 126, 66, 43, 27, + 18, 9, 14, 2, 66, 68, 66, 65, 4, 17, 8, 27, + 19, 10, 18, 19, 22, 23, 12, 25, 20, 64, 13, 5, + 72, 79, 86, 92, 120, 119, 107 }, + + { + + 20, + 4, 82, 20, 4, 82, 19, 27, 28, 12, 71, 100, 69, + 2, 52, 14, 54, 71, 24, 31, 64, 84, 2, 99, 116, + 69, 85, 126, 126, 126, 62, 18, 69, 24, 31, 64, + 68, 18, 20, 71, 3, 0, 0, 72, 87, 71, 93, 5, + 67, 64, 68, 80, 72, 88, 17, 4, 1, 65, 12, 3, + 22, 0, 0, 0, 7, 97, 97, 12, 79, 70, 52, 6, 69, + 77, 0, 82, 10, 18, 16, 28, 7, 21, 16, 11, 25, + 73, 79, 75, 82, 36, 1, 24, 65, 2, 69, 3, 25, + 71, 79, 70, 68, 19, 67, 8, 3, 72, 9, 68, 7, + 12, 7, 15, 22, 19, 84, 70, 3, 66, 2, 72, 11, + 72, 64, 19, 70, 16, 17, 5, 108, 10, 70, 101, + 80, 103, 69, 8, 1, 11, 5, 5, 26, 7, 1, 15, 25, + 72, 72, 4, 111, 78, 8, 95, 0, 77, 21, 1, 90, + 33, 1, 65, 98, 36, 83, 101, 1, 107, 70, 10, + 15, 7, 10, 6, 66, 10, 17, 91, 74, 7, 3, 1, 81, + 73, 77, 83, 74, 82, 92, 76, 78, 122, 94, 84, + 100, 95, 89, 6, 16, 4, 0, 1, 73, 81, 73, 89, + 94, 85, 102, 114, 112, 126, 82, 82, 98, 92, + 98, 103, 108, 111, 116, 123, 118, 126, 89, + 116, 121, 103, 102, 1, 39, 28, 18, 7, 5, 65, + 66, 71, 64, 11, 51, 33, 25, 17, 46, 19, 15, 4, + 7, 7, 53, 33, 19, 10, 26, 11, 4, 2, 3, 64, 38, + 17, 8, 1, 16, 6, 64, 2, 9, 45, 27, 16, 10, 31, + 14, 13, 12, 8, 62, 75, 66, 8, 66, 68, 1, 9, + 10, 3, 10, 17, 24, 76, 71, 76, 73, 22, 80, 81, + 74, 64, 17, 94, 84, 7, 13, 91, 95, 90, 68, 64, + 14, 67, 2, 64, 5, 14, 77, 68, 67, 5, 87, 73, + 80, 78, 10, 100, 76, 4, 70, 21, 70, 65, 32, + 70, 73, 65, 27, 87, 78, 105, 13, 27, 27, 14, + 65, 14, 10, 3, 5, 2, 64, 67, 85, 89, 96, 87, + 82, 95, 118, 93, 90, 91, 86, 83, 71, 70, 85, + 95, 73, 70, 71, 68, 78, 97, 81, 84, 102, 89, + 97, 96, 113, 109, 102, 96, 82, 76, 94, 84, 87, + 96, 102, 104, 104, 105, 107, 111, 125, 117, + 125, 124, 121, 112, 122, 123, 70, 10, 2, 11, + 16, 16, 62, 27, 25, 68, 35, 62, 62, 62, 22, 9, + 0, 102, 126, 126, 126, 126, 126, 126, 66, 43, + 27, 18, 9, 14, 2, 66, 68, 65, 64, 5, 18, 9, + 28, 20, 11, 19, 20, 23, 24, 12, 26, 20, 64, + 12, 4, 74, 81, 88, 94, 122, 121, 108 }, + + { + + 18, + 3, 83, 18, 3, 83, 20, 28, 28, 12, 72, 102, 71, + 0, 51, 14, 56, 72, 24, 31, 65, 85, 2, 101, + 118, 70, 87, 126, 126, 126, 62, 19, 70, 24, + 31, 65, 68, 19, 20, 72, 3, 0, 0, 73, 88, 71, + 94, 5, 67, 64, 69, 81, 72, 88, 17, 4, 1, 65, + 12, 2, 22, 0, 0, 0, 7, 98, 97, 12, 80, 71, 52, + 5, 69, 76, 0, 82, 10, 19, 17, 29, 8, 22, 17, + 12, 26, 72, 79, 74, 82, 36, 1, 24, 65, 2, 69, + 3, 26, 71, 79, 70, 67, 19, 67, 8, 4, 72, 9, + 68, 7, 12, 7, 15, 22, 19, 85, 71, 3, 67, 2, + 72, 10, 73, 65, 19, 71, 15, 17, 4, 110, 9, 71, + 103, 81, 104, 69, 8, 1, 11, 5, 5, 27, 7, 1, + 15, 26, 73, 72, 4, 113, 78, 8, 95, 0, 78, 21, + 1, 91, 33, 1, 65, 100, 36, 84, 102, 1, 108, + 72, 9, 14, 6, 9, 5, 67, 9, 16, 93, 75, 6, 2, + 1, 82, 74, 78, 84, 75, 83, 93, 77, 79, 124, + 96, 85, 102, 97, 89, 4, 14, 2, 65, 64, 76, 84, + 76, 92, 97, 88, 105, 117, 115, 126, 84, 84, + 100, 95, 101, 106, 111, 114, 119, 126, 121, + 126, 90, 118, 123, 104, 103, 1, 39, 28, 18, 7, + 5, 66, 66, 71, 64, 11, 51, 33, 25, 17, 47, 19, + 15, 4, 7, 7, 53, 33, 19, 10, 26, 11, 4, 2, 4, + 64, 39, 17, 8, 1, 16, 6, 64, 3, 9, 45, 27, 16, + 10, 31, 14, 13, 12, 8, 62, 75, 66, 8, 66, 68, + 1, 9, 10, 3, 10, 17, 24, 76, 71, 77, 74, 22, + 81, 82, 75, 65, 16, 96, 86, 6, 12, 92, 97, 91, + 68, 64, 15, 67, 2, 64, 5, 14, 78, 68, 67, 5, + 88, 73, 80, 78, 10, 101, 76, 4, 70, 21, 71, + 65, 32, 71, 74, 65, 27, 88, 79, 106, 12, 26, + 26, 13, 67, 13, 9, 2, 4, 1, 65, 68, 86, 91, + 98, 89, 84, 97, 120, 95, 92, 92, 87, 84, 71, + 70, 85, 96, 73, 73, 74, 70, 81, 100, 84, 87, + 105, 92, 100, 99, 116, 112, 104, 97, 84, 78, + 96, 86, 90, 99, 105, 107, 107, 107, 110, 114, + 126, 119, 126, 126, 123, 114, 124, 125, 69, + 11, 3, 11, 16, 16, 62, 27, 25, 68, 35, 62, 62, + 62, 22, 7, 65, 105, 126, 126, 126, 126, 126, + 126, 66, 43, 26, 17, 9, 14, 2, 67, 68, 65, 64, + 5, 18, 9, 29, 20, 11, 19, 20, 23, 24, 12, 26, + 20, 65, 10, 2, 76, 83, 90, 96, 125, 123, 109 }, + + { + + 17, + 3, 83, 17, 3, 83, 22, 30, 29, 13, 72, 103, 72, + 64, 51, 14, 59, 72, 25, 32, 65, 85, 3, 102, + 119, 70, 88, 126, 126, 126, 62, 21, 70, 25, + 32, 65, 67, 21, 21, 72, 4, 1, 1, 73, 88, 70, + 94, 5, 66, 0, 69, 81, 71, 88, 18, 5, 2, 64, + 13, 2, 22, 0, 0, 0, 8, 98, 97, 13, 80, 71, 52, + 5, 69, 74, 1, 82, 11, 21, 19, 31, 10, 24, 19, + 14, 28, 70, 78, 73, 81, 37, 2, 25, 64, 3, 68, + 4, 28, 70, 78, 69, 65, 20, 66, 9, 6, 71, 10, + 67, 8, 13, 8, 16, 23, 20, 85, 71, 4, 67, 2, + 71, 10, 73, 65, 19, 71, 15, 17, 4, 111, 9, 71, + 104, 82, 104, 68, 9, 2, 12, 6, 6, 28, 8, 2, + 16, 28, 73, 72, 5, 114, 77, 9, 95, 1, 78, 22, + 2, 91, 34, 1, 65, 101, 37, 84, 103, 1, 108, + 73, 9, 14, 6, 9, 5, 67, 9, 16, 94, 75, 6, 2, + 1, 82, 74, 78, 84, 75, 83, 94, 77, 79, 125, + 97, 85, 103, 98, 89, 3, 13, 1, 66, 65, 78, 86, + 78, 94, 99, 90, 107, 119, 117, 126, 85, 85, + 101, 97, 103, 108, 113, 116, 121, 126, 123, + 126, 90, 119, 124, 104, 103, 2, 40, 29, 19, 8, + 5, 66, 65, 70, 0, 12, 52, 34, 26, 17, 48, 20, + 16, 5, 8, 8, 54, 34, 20, 11, 27, 12, 5, 3, 6, + 0, 41, 18, 9, 2, 17, 7, 0, 5, 10, 46, 28, 17, + 11, 32, 15, 14, 13, 9, 62, 74, 65, 9, 66, 67, + 1, 10, 11, 4, 11, 18, 25, 75, 71, 77, 74, 23, + 81, 82, 76, 65, 16, 97, 87, 6, 12, 92, 98, 91, + 67, 0, 16, 66, 3, 0, 6, 15, 78, 67, 66, 6, 88, + 72, 79, 77, 11, 101, 76, 5, 70, 22, 71, 65, + 33, 71, 74, 64, 28, 88, 79, 106, 12, 26, 26, + 13, 68, 13, 9, 2, 4, 1, 65, 68, 86, 92, 99, + 90, 85, 98, 121, 96, 93, 92, 87, 84, 70, 69, + 84, 96, 72, 75, 76, 72, 83, 102, 86, 89, 107, + 94, 102, 101, 118, 114, 105, 97, 85, 79, 97, + 87, 92, 101, 107, 109, 109, 109, 112, 116, + 126, 120, 126, 126, 124, 115, 125, 126, 67, + 13, 5, 12, 17, 17, 62, 28, 26, 68, 36, 62, 62, + 62, 23, 6, 66, 107, 126, 126, 126, 126, 126, + 126, 65, 44, 26, 17, 9, 15, 2, 67, 68, 64, 0, + 6, 19, 10, 31, 21, 12, 20, 21, 24, 25, 13, 27, + 21, 65, 9, 1, 77, 84, 91, 97, 126, 124, 109 }, + + { + + 16, + 3, 83, 16, 3, 83, 24, 31, 29, 13, 72, 104, 73, + 65, 51, 14, 61, 72, 26, 33, 65, 86, 3, 103, + 120, 71, 90, 126, 126, 126, 62, 22, 70, 26, + 33, 65, 67, 22, 21, 73, 4, 1, 2, 73, 88, 70, + 94, 5, 66, 1, 69, 81, 71, 88, 18, 5, 2, 64, + 13, 2, 22, 0, 0, 0, 9, 98, 97, 13, 81, 71, 52, + 5, 69, 73, 2, 82, 11, 22, 21, 33, 11, 25, 21, + 15, 30, 69, 77, 72, 80, 38, 2, 26, 0, 3, 68, + 5, 30, 70, 78, 69, 64, 20, 66, 9, 7, 71, 11, + 67, 8, 13, 8, 17, 23, 20, 86, 71, 4, 67, 2, + 71, 10, 73, 66, 19, 72, 15, 17, 4, 113, 9, 71, + 106, 83, 105, 67, 10, 3, 13, 7, 7, 29, 9, 2, + 17, 29, 73, 72, 5, 115, 77, 10, 95, 1, 78, 23, + 2, 92, 35, 1, 65, 102, 38, 85, 104, 1, 109, + 75, 8, 13, 5, 8, 4, 68, 8, 16, 95, 76, 6, 2, + 1, 82, 75, 79, 85, 76, 84, 95, 78, 80, 126, + 98, 86, 104, 99, 89, 1, 12, 64, 68, 67, 80, + 88, 80, 97, 101, 92, 110, 122, 119, 126, 87, + 87, 103, 99, 105, 110, 115, 118, 123, 126, + 125, 126, 91, 121, 125, 105, 103, 2, 40, 29, + 19, 8, 5, 66, 65, 70, 1, 12, 52, 34, 26, 17, + 49, 20, 17, 5, 9, 9, 55, 35, 20, 11, 28, 12, + 5, 4, 7, 1, 42, 19, 9, 2, 18, 8, 1, 6, 10, 46, + 28, 17, 11, 33, 16, 15, 14, 10, 62, 74, 65, 9, + 66, 67, 1, 10, 11, 4, 11, 18, 26, 75, 71, 77, + 75, 23, 82, 83, 77, 66, 16, 98, 88, 6, 12, 93, + 99, 92, 66, 1, 17, 66, 3, 0, 7, 16, 78, 67, + 66, 6, 89, 72, 79, 77, 12, 101, 76, 5, 70, 23, + 71, 65, 34, 71, 74, 64, 29, 89, 79, 107, 11, + 26, 26, 12, 69, 12, 8, 1, 3, 0, 66, 69, 87, + 93, 100, 92, 86, 100, 123, 97, 94, 93, 88, 84, + 70, 69, 84, 97, 71, 77, 78, 74, 85, 105, 88, + 91, 110, 96, 104, 103, 120, 116, 106, 98, 87, + 81, 98, 89, 94, 103, 109, 111, 111, 111, 114, + 118, 126, 122, 126, 126, 125, 117, 126, 126, + 66, 14, 6, 13, 18, 18, 62, 29, 27, 68, 37, 62, + 62, 62, 24, 5, 68, 110, 126, 126, 126, 126, + 126, 126, 65, 44, 26, 17, 9, 15, 2, 67, 68, 0, + 1, 7, 20, 11, 32, 22, 13, 21, 22, 25, 26, 13, + 28, 21, 65, 8, 64, 79, 86, 93, 99, 126, 126, + 110 }, + + { + + 15, + 3, 83, 15, 3, 83, 26, 33, 30, 13, 73, 106, 75, + 66, 51, 14, 62, 72, 27, 34, 65, 87, 3, 104, + 121, 71, 92, 126, 126, 126, 62, 23, 70, 27, + 34, 65, 66, 23, 22, 73, 4, 2, 3, 74, 89, 70, + 94, 5, 66, 1, 69, 81, 71, 88, 19, 5, 2, 64, + 14, 2, 22, 0, 0, 0, 9, 98, 97, 14, 82, 71, 52, + 5, 69, 72, 3, 82, 12, 23, 23, 35, 13, 26, 23, + 16, 32, 68, 76, 71, 79, 39, 2, 27, 1, 3, 67, + 5, 32, 70, 77, 68, 0, 20, 66, 10, 9, 70, 12, + 67, 8, 13, 8, 18, 24, 21, 86, 71, 4, 67, 2, + 71, 10, 74, 66, 19, 73, 15, 17, 4, 115, 9, 72, + 108, 84, 106, 66, 11, 4, 14, 8, 7, 30, 9, 3, + 18, 30, 73, 72, 5, 116, 76, 11, 95, 1, 78, 24, + 2, 93, 36, 1, 65, 103, 39, 85, 105, 1, 110, + 76, 7, 13, 4, 7, 3, 68, 7, 15, 96, 77, 6, 2, + 1, 83, 76, 80, 86, 77, 85, 96, 79, 80, 126, + 99, 86, 105, 100, 89, 0, 10, 65, 70, 69, 82, + 90, 82, 99, 104, 94, 112, 124, 122, 126, 89, + 89, 104, 101, 107, 113, 118, 120, 126, 126, + 126, 126, 92, 123, 126, 106, 103, 2, 41, 29, + 19, 8, 5, 66, 65, 70, 1, 12, 52, 34, 26, 17, + 50, 21, 17, 6, 9, 10, 56, 35, 21, 11, 29, 13, + 6, 4, 8, 2, 43, 20, 10, 2, 19, 9, 2, 7, 11, + 46, 28, 17, 11, 34, 16, 16, 15, 11, 62, 73, + 64, 10, 66, 67, 1, 10, 11, 4, 11, 18, 26, 75, + 71, 77, 75, 24, 83, 84, 78, 67, 16, 99, 89, 6, + 12, 93, 100, 92, 65, 2, 18, 65, 4, 1, 7, 17, + 78, 67, 66, 7, 89, 72, 79, 76, 13, 101, 76, 5, + 70, 24, 71, 65, 35, 71, 74, 64, 30, 90, 79, + 108, 10, 25, 25, 11, 70, 11, 7, 0, 2, 64, 67, + 70, 88, 94, 101, 93, 87, 101, 125, 99, 95, 94, + 88, 85, 70, 69, 84, 97, 70, 79, 80, 76, 87, + 108, 90, 93, 112, 98, 106, 105, 123, 118, 108, + 99, 89, 82, 100, 91, 96, 105, 111, 113, 113, + 113, 116, 120, 126, 124, 126, 126, 126, 119, + 126, 126, 65, 15, 7, 14, 19, 19, 62, 30, 28, + 68, 38, 62, 62, 62, 25, 3, 69, 112, 126, 126, + 126, 126, 126, 126, 65, 44, 26, 17, 9, 15, 2, + 67, 68, 0, 2, 8, 21, 12, 33, 22, 13, 22, 22, + 25, 26, 13, 28, 21, 65, 7, 65, 81, 88, 95, + 101, 126, 126, 111 }, + + }, + + }; + +#endif diff --git a/decoder/ih264d_compute_bs.c b/decoder/ih264d_compute_bs.c new file mode 100755 index 0000000..4a6750a --- /dev/null +++ b/decoder/ih264d_compute_bs.c @@ -0,0 +1,2394 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264d_structs.h" +#include "ih264d_defs.h" +#include "ih264d_deblocking.h" +#include "string.h" +#include "ih264d_debug.h" +#include "ih264d_tables.h" + +UWORD16 ih264d_update_csbp_8x8(UWORD16 u2_luma_csbp) +{ + UWORD16 u2_mod_csbp; + + u2_mod_csbp = u2_luma_csbp; + + if(u2_mod_csbp & 0x0033) + { + u2_mod_csbp |= 0x0033; + } + + if(u2_mod_csbp & 0x00CC) + { + u2_mod_csbp |= 0x00CC; + } + + if(u2_mod_csbp & 0x3300) + { + u2_mod_csbp |= 0x3300; + } + + if(u2_mod_csbp & 0xCC00) + { + u2_mod_csbp |= 0xCC00; + } + + return u2_mod_csbp; +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_fill_bs2_horz_vert */ +/* */ +/* Description : This function fills boundray strength (=2) for all horz */ +/* and vert edges of current mb based on coded sub block */ +/* pattern of current, top and left mb */ +/* Inputs : */ +/* pu4_bs : Base pointer of BS table which gets updated */ +/* u4_left_mb_csbp : left mb's coded sub block pattern */ +/* u4_top_mb_csbp : top mb's coded sub block pattern */ +/* u4_cur_mb_csbp : current mb's coded sub block pattern */ +/* */ +/* Globals : <Does it use any global variables?> */ +/* Processing : */ +/* */ +/* csbp for each 4x4 block in a mb is bit packet in reverse */ +/* raster scan order for each mb as shown below: */ +/* 15|14|13|12|11|10|9|8|7|6|5|4|3|2|1|0. */ +/* */ +/* BS=2 for a 4x4 edge if any of adjacent blocks forming edge */ +/* are coded. Keeping this in mind, bs=2 for all horz and vert */ +/* edges can be derived using a lookup table for each edge */ +/* after "ORing" the csbp values as follows: */ +/* (C means current Mb, T means top mb and L means left mb) */ +/* */ +/* All Horz edges: */ +/* 15C|14C|13C|12C|11C|10C|9C|8C|7C|6C|5C|4C|3C |2C |1C |0C */ +/* (or with) 11C|10C| 9C| 8C| 7C|6C |5C|4C|3C|2C|1C|0C|15T|14T|13T|12T */ +/* -----BS[3]-----|----BS[2]----|---BS[1]---|----BS[0]-----| */ +/* */ +/* All Vert edges: */ +/* 15C|14C|13C|12C|11C|10C|9C| 8C|7C|6C|5C|4C|3C |2C |1C |0C */ +/* (or with) 14C|13C|12C|15L|10C| 9C|8C|11L|6C|5C|4C|7L|2C |1C |0C |3L */ +/* Do 4x4 transpose of resulting pattern to get vertBS[4]-BS[7] */ +/* */ +/* Outputs : <What does the function produce?> */ +/* Returns : <What does the function return?> */ +/* */ +/* Issues : <List any issues or problems with this function> */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 16 10 2008 Jay Draft */ +/* */ +/*****************************************************************************/ +#define CSBP_LEFT_BLOCK_MASK 0x1111 +#define CSBP_RIGHT_BLOCK_MASK 0x8888 + +void ih264d_fill_bs2_horz_vert(UWORD32 *pu4_bs, /* Base pointer of BS table */ + WORD32 u4_left_mb_csbp, /* csbp of left mb */ + WORD32 u4_top_mb_csbp, /* csbp of top mb */ + WORD32 u4_cur_mb_csbp, /* csbp of current mb */ + const UWORD32 *pu4_packed_bs2, const UWORD16 *pu2_4x4_v2h_reorder) +{ + /*************************************************************************/ + /*u4_nbr_horz_csbp=11C|10C|9C|8C|7C|6C|5C|4C|3C|2C|1C|0C|15T|14T|13T|12T */ + /*************************************************************************/ + UWORD32 u4_nbr_horz_csbp = (u4_cur_mb_csbp << 4) | (u4_top_mb_csbp >> 12); + UWORD32 u4_horz_bs2_dec = u4_cur_mb_csbp | u4_nbr_horz_csbp; + + /*************************************************************************/ + /*u4_left_mb_masked_csbp = 15L|0|0|0|11L|0|0|0|7L|0|0|0|3L|0|0|0 */ + /*************************************************************************/ + UWORD32 u4_left_mb_masked_csbp = u4_left_mb_csbp & CSBP_RIGHT_BLOCK_MASK; + + /*************************************************************************/ + /*u4_cur_mb_masked_csbp =14C|13C|12C|x|10C|9C|8C|x|6C|5C|4C|x|2C|1C|0C|x */ + /*************************************************************************/ + UWORD32 u4_cur_mb_masked_csbp = (u4_cur_mb_csbp << 1) + & (~CSBP_LEFT_BLOCK_MASK); + + /*************************************************************************/ + /*u4_nbr_vert_csbp=14C|13C|12C|15L|10C|9C|8C|11L|6C|5C|4C|7L|2C|1C|0C|3L */ + /*************************************************************************/ + UWORD32 u4_nbr_vert_csbp = (u4_cur_mb_masked_csbp) + | (u4_left_mb_masked_csbp >> 3); + + UWORD32 u4_vert_bs2_dec = u4_cur_mb_csbp | u4_nbr_vert_csbp; + + UWORD32 u4_reordered_vert_bs2_dec, u4_temp; + + PROFILE_DISABLE_BOUNDARY_STRENGTH() + + /*************************************************************************/ + /* Fill horz edges (0,1,2,3) boundary strengths 2 using look up table */ + /*************************************************************************/ + pu4_bs[0] = pu4_packed_bs2[u4_horz_bs2_dec & 0xF]; + pu4_bs[1] = pu4_packed_bs2[(u4_horz_bs2_dec >> 4) & 0xF]; + pu4_bs[2] = pu4_packed_bs2[(u4_horz_bs2_dec >> 8) & 0xF]; + pu4_bs[3] = pu4_packed_bs2[(u4_horz_bs2_dec >> 12) & 0xF]; + + /*************************************************************************/ + /* Do 4x4 tranpose of u4_vert_bs2_dec by using look up table for reorder */ + /*************************************************************************/ + u4_reordered_vert_bs2_dec = pu2_4x4_v2h_reorder[u4_vert_bs2_dec & 0xF]; + u4_temp = pu2_4x4_v2h_reorder[(u4_vert_bs2_dec >> 4) & 0xF]; + u4_reordered_vert_bs2_dec |= (u4_temp << 1); + u4_temp = pu2_4x4_v2h_reorder[(u4_vert_bs2_dec >> 8) & 0xF]; + u4_reordered_vert_bs2_dec |= (u4_temp << 2); + u4_temp = pu2_4x4_v2h_reorder[(u4_vert_bs2_dec >> 12) & 0xF]; + u4_reordered_vert_bs2_dec |= (u4_temp << 3); + + /*************************************************************************/ + /* Fill vert edges (4,5,6,7) boundary strengths 2 using look up table */ + /*************************************************************************/ + pu4_bs[4] = pu4_packed_bs2[u4_reordered_vert_bs2_dec & 0xF]; + pu4_bs[5] = pu4_packed_bs2[(u4_reordered_vert_bs2_dec >> 4) & 0xF]; + pu4_bs[6] = pu4_packed_bs2[(u4_reordered_vert_bs2_dec >> 8) & 0xF]; + pu4_bs[7] = pu4_packed_bs2[(u4_reordered_vert_bs2_dec >> 12) & 0xF]; +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_fill_bs1_16x16mb_pslice */ +/* */ +/* Description : This function fills boundray strength (=1) for those */ +/* horz and vert mb edges of 16x16mb which are set to 0 by */ +/* ih264d_fill_bs2_horz_vert. This function is used for p slices */ +/* */ +/* Inputs : <What inputs does the function take?> */ +/* Globals : <Does it use any global variables?> */ +/* Processing : If any motion vector component of adjacent 4x4 blocks */ +/* differs by more than 1 integer pel or if reference */ +/* pictures are different, Bs is set to 1. */ +/* */ +/* Outputs : <What does the function produce?> */ +/* Returns : <What does the function return?> */ +/* */ +/* Issues : <List any issues or problems with this function> */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 16 10 2008 Jay Draft */ +/* */ +/*****************************************************************************/ +void ih264d_fill_bs1_16x16mb_pslice(mv_pred_t *ps_cur_mv_pred, + mv_pred_t *ps_top_mv_pred, + void **ppv_map_ref_idx_to_poc, + UWORD32 *pu4_bs_table, /* pointer to the BsTable array */ + mv_pred_t *ps_leftmost_mv_pred, + neighbouradd_t *ps_left_addr, + void **u4_pic_addrress, /* picture address for BS calc */ + WORD32 i4_ver_mvlimit) +{ + WORD16 i2_q_mv0, i2_q_mv1; + WORD16 i2_p_mv0, i2_p_mv1; + void *pv_cur_pic_addr0, *pv_cur_pic_addr1; + void *pv_nbr_pic_addr0, *pv_nbr_pic_addr1; + void **ppv_map_ref_idx_to_poc_l0; //,*ppv_map_ref_idx_to_poc_l1; + UWORD32 i; + UWORD32 u4_bs_horz = pu4_bs_table[0]; + UWORD32 u4_bs_vert = pu4_bs_table[4]; + + PROFILE_DISABLE_BOUNDARY_STRENGTH() + + ppv_map_ref_idx_to_poc_l0 = ppv_map_ref_idx_to_poc; + + i2_q_mv0 = ps_cur_mv_pred->i2_mv[0]; + i2_q_mv1 = ps_cur_mv_pred->i2_mv[1]; + pv_cur_pic_addr0 = ppv_map_ref_idx_to_poc_l0[ps_cur_mv_pred->i1_ref_frame[0]]; + pv_cur_pic_addr1 = 0; + + /*********************************/ + /* Computing Bs for the top edge */ + /*********************************/ + for(i = 0; i < 4; i++, ps_top_mv_pred++) + { + UWORD32 u4_idx = 24 - (i << 3); + + /*********************************/ + /* check if Bs is already set */ + /*********************************/ + if(!((u4_bs_horz >> u4_idx) & 0xf)) + { + /************************************************************/ + /* If Bs is not set, use left edge and current edge mvs and */ + /* reference pictures addresses to evaluate Bs==1 */ + /************************************************************/ + UWORD32 u4_bs_temp1; + UWORD32 u4_bs; + + /*********************************************************/ + /* If any motion vector component differs by more than 1 */ + /* integer pel or if reference pictures are different Bs */ + /* is set to 1. Note that this condition shall be met for*/ + /* both (fwd-fwd,bwd-bwd) and (fwd-bwd,bwd-fwd) direction*/ + /*********************************************************/ + i2_p_mv0 = ps_top_mv_pred->i2_mv[0]; + i2_p_mv1 = ps_top_mv_pred->i2_mv[1]; + pv_nbr_pic_addr0 = u4_pic_addrress[i & 2]; + pv_nbr_pic_addr1 = u4_pic_addrress[1 + (i & 2)]; + + u4_bs_temp1 = ((ABS((i2_p_mv0 - i2_q_mv0)) >= 4) || + (ABS((i2_p_mv1 - i2_q_mv1)) >= i4_ver_mvlimit)); + + u4_bs = ((pv_cur_pic_addr0 != pv_nbr_pic_addr0) + || (pv_cur_pic_addr1 != pv_nbr_pic_addr1) + || u4_bs_temp1); + + u4_bs_horz |= (u4_bs << u4_idx); + } + } + pu4_bs_table[0] = u4_bs_horz; + + /***********************************/ + /* Computing Bs for the left edge */ + /***********************************/ + for(i = 0; i < 4; i++, ps_leftmost_mv_pred += 4) + { + UWORD32 u4_idx = 24 - (i << 3); + + /*********************************/ + /* check if Bs is already set */ + /*********************************/ + if(!((u4_bs_vert >> u4_idx) & 0xf)) + { + /****************************************************/ + /* If Bs is not set, evalaute conditions for Bs=1 */ + /****************************************************/ + UWORD32 u4_bs_temp1; + UWORD32 u4_bs; + /*********************************************************/ + /* If any motion vector component differs by more than 1 */ + /* integer pel or if reference pictures are different Bs */ + /* is set to 1. Note that this condition shall be met for*/ + /* both (fwd-fwd,bwd-bwd) and (fwd-bwd,bwd-fwd) direction*/ + /*********************************************************/ + + i2_p_mv0 = ps_leftmost_mv_pred->i2_mv[0]; + i2_p_mv1 = ps_leftmost_mv_pred->i2_mv[1]; + pv_nbr_pic_addr0 = ps_left_addr->u4_add[i & 2]; + pv_nbr_pic_addr1 = ps_left_addr->u4_add[1 + (i & 2)]; + + u4_bs_temp1 = + ((ABS((i2_p_mv0 - i2_q_mv0)) + >= 4) + | (ABS((i2_p_mv1 - i2_q_mv1)) + >= i4_ver_mvlimit)); + + u4_bs = ((pv_cur_pic_addr0 != pv_nbr_pic_addr0) + || (pv_cur_pic_addr1 != pv_nbr_pic_addr1) + || u4_bs_temp1); + + u4_bs_vert |= (u4_bs << u4_idx); + } + } + pu4_bs_table[4] = u4_bs_vert; + + return; +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_fill_bs1_non16x16mb_pslice */ +/* */ +/* Description : This function fills boundray strength (=1) for those */ +/* horz and vert edges of non16x16mb which are set to 0 by */ +/* ih264d_fill_bs2_horz_vert. This function is used for p slices */ +/* */ +/* Inputs : <What inputs does the function take?> */ +/* Globals : <Does it use any global variables?> */ +/* Processing : If any motion vector component of adjacent 4x4 blocks */ +/* differs by more than 1 integer pel or if reference */ +/* pictures are different, Bs is set to 1. */ +/* */ +/* Outputs : <What does the function produce?> */ +/* Returns : <What does the function return?> */ +/* */ +/* Issues : <List any issues or problems with this function> */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 16 10 2008 Jay Draft */ +/* */ +/*****************************************************************************/ +void ih264d_fill_bs1_non16x16mb_pslice(mv_pred_t *ps_cur_mv_pred, + mv_pred_t *ps_top_mv_pred, + void **ppv_map_ref_idx_to_poc, + UWORD32 *pu4_bs_table, /* pointer to the BsTable array */ + mv_pred_t *ps_leftmost_mv_pred, + neighbouradd_t *ps_left_addr, + void **u4_pic_addrress, + WORD32 i4_ver_mvlimit) +{ + UWORD32 edge; + void **ppv_map_ref_idx_to_poc_l0; //,*ppv_map_ref_idx_to_poc_l1; + + PROFILE_DISABLE_BOUNDARY_STRENGTH() + + ppv_map_ref_idx_to_poc_l0 = ppv_map_ref_idx_to_poc; + + + for(edge = 0; edge < 4; edge++, ps_top_mv_pred = ps_cur_mv_pred - 4) + { + /*********************************************************************/ + /* Each iteration of this loop fills the four BS values of one HORIZ */ + /* edge and one BS value for each of the four VERT edges. */ + /*********************************************************************/ + WORD32 i; + UWORD32 u4_vert_idx = 24 - (edge << 3); + UWORD32 u4_bs_horz = pu4_bs_table[edge]; + mv_pred_t *ps_left_mv_pred = ps_leftmost_mv_pred + (edge << 2); + + for(i = 0; i < 4; i++, ps_top_mv_pred++, ps_cur_mv_pred++) + { + WORD16 i2_cur_mv0, i2_cur_mv1; + WORD8 i1_cur_ref0; + void *pv_cur_pic_addr0, *pv_cur_pic_addr1 = 0; + void *pv_nbr_pic_addr0, *pv_nbr_pic_addr1; + + /******************************************************/ + /* Each iteration of this inner loop computes a HORIZ */ + /* and a VERT BS value for a 4x4 block */ + /******************************************************/ + UWORD32 u4_bs_vert = (pu4_bs_table[i + 4] >> u4_vert_idx) & 0xf; + UWORD32 u4_horz_idx = 24 - (i << 3); + + /*****************************************************/ + /* check if vert Bs for this block is already set */ + /*****************************************************/ + if(!u4_bs_vert) + { + WORD16 i2_left_mv0, i2_left_mv1; + /************************************************************/ + /* If Bs is not set, use left edge and current edge mvs and */ + /* reference pictures addresses to evaluate Bs==1 */ + /************************************************************/ + i2_left_mv0 = ps_left_mv_pred->i2_mv[0]; + i2_left_mv1 = ps_left_mv_pred->i2_mv[1]; + + i2_cur_mv0 = ps_cur_mv_pred->i2_mv[0]; + i2_cur_mv1 = ps_cur_mv_pred->i2_mv[1]; + + i1_cur_ref0 = ps_cur_mv_pred->i1_ref_frame[0]; + + pv_cur_pic_addr0 = ppv_map_ref_idx_to_poc_l0[i1_cur_ref0]; + if(i) + { + WORD8 i1_left_ref0 = ps_left_mv_pred->i1_ref_frame[0]; + pv_nbr_pic_addr0 = ppv_map_ref_idx_to_poc_l0[i1_left_ref0]; + pv_nbr_pic_addr1 = 0; + } + else + { + pv_nbr_pic_addr0 = ps_left_addr->u4_add[edge & 2]; + pv_nbr_pic_addr1 = ps_left_addr->u4_add[1 + (edge & 2)]; + } + + { + UWORD32 u4_bs_temp1; + /*********************************************************/ + /* If any motion vector component differs by more than 1 */ + /* integer pel or if reference pictures are different Bs */ + /* is set to 1. Note that this condition shall be met for*/ + /* both (fwd-fwd,bwd-bwd) and (fwd-bwd,bwd-fwd) direction*/ + /*********************************************************/ + + u4_bs_temp1 = + ((ABS((i2_left_mv0 - i2_cur_mv0)) + >= 4) + | (ABS((i2_left_mv1 + - i2_cur_mv1)) + >= i4_ver_mvlimit)); + + u4_bs_vert = ((pv_nbr_pic_addr0 != pv_cur_pic_addr0) + || (pv_nbr_pic_addr1 != pv_cur_pic_addr1) + || u4_bs_temp1); + + pu4_bs_table[i + 4] |= (u4_bs_vert << u4_vert_idx); + } + } + + /*****************************************************/ + /* check if horz Bs for this block is already set */ + /*****************************************************/ + if(!((u4_bs_horz >> u4_horz_idx) & 0xf)) + { + WORD16 i2_top_mv0, i2_top_mv1; + /************************************************************/ + /* If Bs is not set, use top edge and current edge mvs and */ + /* reference pictures addresses to evaluate Bs==1 */ + /************************************************************/ + i2_cur_mv0 = ps_cur_mv_pred->i2_mv[0]; + i2_cur_mv1 = ps_cur_mv_pred->i2_mv[1]; + + i1_cur_ref0 = ps_cur_mv_pred->i1_ref_frame[0]; + + i2_top_mv0 = ps_top_mv_pred->i2_mv[0]; + i2_top_mv1 = ps_top_mv_pred->i2_mv[1]; + + pv_cur_pic_addr0 = ppv_map_ref_idx_to_poc_l0[i1_cur_ref0]; + if(edge) + { + WORD8 i1_top_ref0 = ps_top_mv_pred->i1_ref_frame[0]; + pv_nbr_pic_addr0 = ppv_map_ref_idx_to_poc_l0[i1_top_ref0]; + pv_nbr_pic_addr1 = 0; + } + else + { + pv_nbr_pic_addr0 = u4_pic_addrress[i & 2]; + pv_nbr_pic_addr1 = u4_pic_addrress[1 + (i & 2)]; + } + + { + UWORD32 u4_bs_temp1; + UWORD32 u4_bs; + /*********************************************************/ + /* If any motion vector component differs by more than 1 */ + /* integer pel or if reference pictures are different Bs */ + /* is set to 1. Note that this condition shall be met for*/ + /* both (fwd-fwd,bwd-bwd) and (fwd-bwd,bwd-fwd) direction*/ + /*********************************************************/ + + u4_bs_temp1 = + ((ABS((i2_top_mv0 - i2_cur_mv0)) + >= 4) + | (ABS((i2_top_mv1 + - i2_cur_mv1)) + >= i4_ver_mvlimit)); + + u4_bs = ((pv_nbr_pic_addr0 != pv_cur_pic_addr0) + || (pv_nbr_pic_addr1 != pv_cur_pic_addr1) + || u4_bs_temp1); + + u4_bs_horz |= (u4_bs << u4_horz_idx); + } + } + + ps_left_mv_pred = ps_cur_mv_pred; + } + + pu4_bs_table[edge] = u4_bs_horz; + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_fill_bs1_16x16mb_bslice */ +/* */ +/* Description : This function fills boundray strength (=1) for those */ +/* horz and vert mb edges of 16x16mb which are set to 0 by */ +/* ih264d_fill_bs2_horz_vert. This function is used for b slices */ +/* */ +/* Inputs : <What inputs does the function take?> */ +/* Globals : <Does it use any global variables?> */ +/* Processing : If any motion vector component of adjacent 4x4 blocks */ +/* differs by more than 1 integer pel or if reference */ +/* pictures are different, Bs is set to 1. */ +/* */ +/* Outputs : <What does the function produce?> */ +/* Returns : <What does the function return?> */ +/* */ +/* Issues : <List any issues or problems with this function> */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 16 10 2008 Jay Draft */ +/* */ +/*****************************************************************************/ +void ih264d_fill_bs1_16x16mb_bslice(mv_pred_t *ps_cur_mv_pred, + mv_pred_t *ps_top_mv_pred, + void **ppv_map_ref_idx_to_poc, + UWORD32 *pu4_bs_table, /* pointer to the BsTable array */ + mv_pred_t *ps_leftmost_mv_pred, + neighbouradd_t *ps_left_addr, + void **u4_pic_addrress, + WORD32 i4_ver_mvlimit) +{ + WORD16 i2_q_mv0, i2_q_mv1, i2_q_mv2, i2_q_mv3; + WORD16 i2_p_mv0, i2_p_mv1, i2_p_mv2, i2_p_mv3; + void *pv_cur_pic_addr0, *pv_cur_pic_addr1; + void *pv_nbr_pic_addr0, *pv_nbr_pic_addr1; + void **ppv_map_ref_idx_to_poc_l0, **ppv_map_ref_idx_to_poc_l1; + UWORD32 i; + UWORD32 u4_bs_horz = pu4_bs_table[0]; + UWORD32 u4_bs_vert = pu4_bs_table[4]; + + PROFILE_DISABLE_BOUNDARY_STRENGTH() + + ppv_map_ref_idx_to_poc_l0 = ppv_map_ref_idx_to_poc; + ppv_map_ref_idx_to_poc_l1 = ppv_map_ref_idx_to_poc + POC_LIST_L0_TO_L1_DIFF; + i2_q_mv0 = ps_cur_mv_pred->i2_mv[0]; + i2_q_mv1 = ps_cur_mv_pred->i2_mv[1]; + i2_q_mv2 = ps_cur_mv_pred->i2_mv[2]; + i2_q_mv3 = ps_cur_mv_pred->i2_mv[3]; + pv_cur_pic_addr0 = ppv_map_ref_idx_to_poc_l0[ps_cur_mv_pred->i1_ref_frame[0]]; + pv_cur_pic_addr1 = ppv_map_ref_idx_to_poc_l1[ps_cur_mv_pred->i1_ref_frame[1]]; + + /*********************************/ + /* Computing Bs for the top edge */ + /*********************************/ + for(i = 0; i < 4; i++, ps_top_mv_pred++) + { + UWORD32 u4_idx = 24 - (i << 3); + + /*********************************/ + /* check if Bs is already set */ + /*********************************/ + if(!((u4_bs_horz >> u4_idx) & 0xf)) + { + /************************************************************/ + /* If Bs is not set, use left edge and current edge mvs and */ + /* reference pictures addresses to evaluate Bs==1 */ + /************************************************************/ + UWORD32 u4_bs_temp1, u4_bs_temp2; + UWORD32 u4_bs; + + /*********************************************************/ + /* If any motion vector component differs by more than 1 */ + /* integer pel or if reference pictures are different Bs */ + /* is set to 1. Note that this condition shall be met for*/ + /* both (fwd-fwd,bwd-bwd) and (fwd-bwd,bwd-fwd) direction*/ + /*********************************************************/ + i2_p_mv0 = ps_top_mv_pred->i2_mv[0]; + i2_p_mv1 = ps_top_mv_pred->i2_mv[1]; + i2_p_mv2 = ps_top_mv_pred->i2_mv[2]; + i2_p_mv3 = ps_top_mv_pred->i2_mv[3]; + pv_nbr_pic_addr0 = u4_pic_addrress[i & 2]; + pv_nbr_pic_addr1 = u4_pic_addrress[1 + (i & 2)]; + + u4_bs_temp1 = + ((ABS((i2_p_mv0 - i2_q_mv0)) + >= 4) + | (ABS((i2_p_mv1 - i2_q_mv1)) + >= i4_ver_mvlimit) + | (ABS((i2_p_mv2 - i2_q_mv2)) + >= 4) + | (ABS((i2_p_mv3 - i2_q_mv3)) + >= i4_ver_mvlimit)); + + u4_bs_temp2 = + ((ABS((i2_p_mv0 - i2_q_mv2)) + >= 4) + | (ABS((i2_p_mv1 - i2_q_mv3)) + >= i4_ver_mvlimit) + | (ABS((i2_p_mv2 - i2_q_mv0)) + >= 4) + | (ABS((i2_p_mv3 - i2_q_mv1)) + >= i4_ver_mvlimit)); + + u4_bs = ((pv_cur_pic_addr0 != pv_nbr_pic_addr0) + || (pv_cur_pic_addr1 != pv_nbr_pic_addr1) + || u4_bs_temp1) + && ((pv_cur_pic_addr0 != pv_nbr_pic_addr1) + || (pv_cur_pic_addr1 + != pv_nbr_pic_addr0) + || u4_bs_temp2); + + u4_bs_horz |= (u4_bs << u4_idx); + } + } + pu4_bs_table[0] = u4_bs_horz; + + /***********************************/ + /* Computing Bs for the left edge */ + /***********************************/ + for(i = 0; i < 4; i++, ps_leftmost_mv_pred += 4) + { + UWORD32 u4_idx = 24 - (i << 3); + + /*********************************/ + /* check if Bs is already set */ + /*********************************/ + if(!((u4_bs_vert >> u4_idx) & 0xf)) + { + /****************************************************/ + /* If Bs is not set, evalaute conditions for Bs=1 */ + /****************************************************/ + UWORD32 u4_bs_temp1, u4_bs_temp2; + UWORD32 u4_bs; + /*********************************************************/ + /* If any motion vector component differs by more than 1 */ + /* integer pel or if reference pictures are different Bs */ + /* is set to 1. Note that this condition shall be met for*/ + /* both (fwd-fwd,bwd-bwd) and (fwd-bwd,bwd-fwd) direction*/ + /*********************************************************/ + + i2_p_mv0 = ps_leftmost_mv_pred->i2_mv[0]; + i2_p_mv1 = ps_leftmost_mv_pred->i2_mv[1]; + i2_p_mv2 = ps_leftmost_mv_pred->i2_mv[2]; + i2_p_mv3 = ps_leftmost_mv_pred->i2_mv[3]; + pv_nbr_pic_addr0 = ps_left_addr->u4_add[i & 2]; + pv_nbr_pic_addr1 = ps_left_addr->u4_add[1 + (i & 2)]; + + u4_bs_temp1 = + ((ABS((i2_p_mv0 - i2_q_mv0)) + >= 4) + | (ABS((i2_p_mv1 - i2_q_mv1)) + >= i4_ver_mvlimit) + | (ABS((i2_p_mv2 - i2_q_mv2)) + >= 4) + | (ABS((i2_p_mv3 - i2_q_mv3)) + >= i4_ver_mvlimit)); + + u4_bs_temp2 = + ((ABS((i2_p_mv0 - i2_q_mv2)) + >= 4) + | (ABS((i2_p_mv1 - i2_q_mv3)) + >= i4_ver_mvlimit) + | (ABS((i2_p_mv2 - i2_q_mv0)) + >= 4) + | (ABS((i2_p_mv3 - i2_q_mv1)) + >= i4_ver_mvlimit)); + + u4_bs = ((pv_cur_pic_addr0 != pv_nbr_pic_addr0) + || (pv_cur_pic_addr1 != pv_nbr_pic_addr1) + || u4_bs_temp1) + && ((pv_cur_pic_addr0 != pv_nbr_pic_addr1) + || (pv_cur_pic_addr1 + != pv_nbr_pic_addr0) + || u4_bs_temp2); + + u4_bs_vert |= (u4_bs << u4_idx); + } + } + pu4_bs_table[4] = u4_bs_vert; + + return; +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_fill_bs1_non16x16mb_bslice */ +/* */ +/* Description : This function fills boundray strength (=1) for those */ +/* horz and vert edges of non16x16mb which are set to 0 by */ +/* ih264d_fill_bs2_horz_vert. This function is used for b slices */ +/* */ +/* Inputs : <What inputs does the function take?> */ +/* Globals : <Does it use any global variables?> */ +/* Processing : If any motion vector component of adjacent 4x4 blocks */ +/* differs by more than 1 integer pel or if reference */ +/* pictures are different, Bs is set to 1. */ +/* */ +/* Outputs : <What does the function produce?> */ +/* Returns : <What does the function return?> */ +/* */ +/* Issues : <List any issues or problems with this function> */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 16 10 2008 Jay Draft */ +/* */ +/*****************************************************************************/ +void ih264d_fill_bs1_non16x16mb_bslice(mv_pred_t *ps_cur_mv_pred, + mv_pred_t *ps_top_mv_pred, + void **ppv_map_ref_idx_to_poc, + UWORD32 *pu4_bs_table, /* pointer to the BsTable array */ + mv_pred_t *ps_leftmost_mv_pred, + neighbouradd_t *ps_left_addr, + void **u4_pic_addrress, + WORD32 i4_ver_mvlimit) +{ + UWORD32 edge; + void **ppv_map_ref_idx_to_poc_l0, **ppv_map_ref_idx_to_poc_l1; + ppv_map_ref_idx_to_poc_l0 = ppv_map_ref_idx_to_poc; + ppv_map_ref_idx_to_poc_l1 = ppv_map_ref_idx_to_poc + POC_LIST_L0_TO_L1_DIFF; + + PROFILE_DISABLE_BOUNDARY_STRENGTH() + + for(edge = 0; edge < 4; edge++, ps_top_mv_pred = ps_cur_mv_pred - 4) + { + /*********************************************************************/ + /* Each iteration of this loop fills the four BS values of one HORIZ */ + /* edge and one BS value for each of the four VERT edges. */ + /*********************************************************************/ + WORD32 i; + UWORD32 u4_vert_idx = 24 - (edge << 3); + UWORD32 u4_bs_horz = pu4_bs_table[edge]; + mv_pred_t *ps_left_mv_pred = ps_leftmost_mv_pred + (edge << 2); + + for(i = 0; i < 4; i++, ps_top_mv_pred++, ps_cur_mv_pred++) + { + WORD16 i2_cur_mv0, i2_cur_mv1, i16_curMv2, i16_curMv3; + WORD8 i1_cur_ref0, i1_cur_ref1; + void *pv_cur_pic_addr0, *pv_cur_pic_addr1; + void *pv_nbr_pic_addr0, *pv_nbr_pic_addr1; + + /******************************************************/ + /* Each iteration of this inner loop computes a HORIZ */ + /* and a VERT BS value for a 4x4 block */ + /******************************************************/ + UWORD32 u4_bs_vert = (pu4_bs_table[i + 4] >> u4_vert_idx) & 0xf; + UWORD32 u4_horz_idx = 24 - (i << 3); + + /*****************************************************/ + /* check if vert Bs for this block is already set */ + /*****************************************************/ + if(!u4_bs_vert) + { + WORD16 i2_left_mv0, i2_left_mv1, i2_left_mv2, i2_left_mv3; + /************************************************************/ + /* If Bs is not set, use left edge and current edge mvs and */ + /* reference pictures addresses to evaluate Bs==1 */ + /************************************************************/ + i2_left_mv0 = ps_left_mv_pred->i2_mv[0]; + i2_left_mv1 = ps_left_mv_pred->i2_mv[1]; + i2_left_mv2 = ps_left_mv_pred->i2_mv[2]; + i2_left_mv3 = ps_left_mv_pred->i2_mv[3]; + + i2_cur_mv0 = ps_cur_mv_pred->i2_mv[0]; + i2_cur_mv1 = ps_cur_mv_pred->i2_mv[1]; + i16_curMv2 = ps_cur_mv_pred->i2_mv[2]; + i16_curMv3 = ps_cur_mv_pred->i2_mv[3]; + i1_cur_ref0 = ps_cur_mv_pred->i1_ref_frame[0]; + i1_cur_ref1 = ps_cur_mv_pred->i1_ref_frame[1]; + pv_cur_pic_addr0 = ppv_map_ref_idx_to_poc_l0[i1_cur_ref0]; + pv_cur_pic_addr1 = ppv_map_ref_idx_to_poc_l1[i1_cur_ref1]; + + if(i) + { + WORD8 i1_left_ref0, i1_left_ref1; + i1_left_ref0 = ps_left_mv_pred->i1_ref_frame[0]; + i1_left_ref1 = ps_left_mv_pred->i1_ref_frame[1]; + pv_nbr_pic_addr0 = ppv_map_ref_idx_to_poc_l0[i1_left_ref0]; + pv_nbr_pic_addr1 = ppv_map_ref_idx_to_poc_l1[i1_left_ref1]; + } + else + { + pv_nbr_pic_addr0 = ps_left_addr->u4_add[edge & 2]; + pv_nbr_pic_addr1 = ps_left_addr->u4_add[1 + (edge & 2)]; + } + + { + UWORD32 u4_bs_temp1, u4_bs_temp2; + /*********************************************************/ + /* If any motion vector component differs by more than 1 */ + /* integer pel or if reference pictures are different Bs */ + /* is set to 1. Note that this condition shall be met for*/ + /* both (fwd-fwd,bwd-bwd) and (fwd-bwd,bwd-fwd) direction*/ + /*********************************************************/ + + u4_bs_temp1 = + ((ABS((i2_left_mv0 - i2_cur_mv0)) + >= 4) + | (ABS((i2_left_mv1 + - i2_cur_mv1)) + >= i4_ver_mvlimit) + | (ABS((i2_left_mv2 + - i16_curMv2)) + >= 4) + | (ABS((i2_left_mv3 + - i16_curMv3)) + >= i4_ver_mvlimit)); + + u4_bs_temp2 = + ((ABS((i2_left_mv0 - i16_curMv2)) + >= 4) + | (ABS((i2_left_mv1 + - i16_curMv3)) + >= i4_ver_mvlimit) + | (ABS((i2_left_mv2 + - i2_cur_mv0)) + >= 4) + | (ABS((i2_left_mv3 + - i2_cur_mv1)) + >= i4_ver_mvlimit)); + + u4_bs_vert = + ((pv_nbr_pic_addr0 != pv_cur_pic_addr0) + || (pv_nbr_pic_addr1 + != pv_cur_pic_addr1) + || u4_bs_temp1) + && ((pv_nbr_pic_addr0 + != pv_cur_pic_addr1) + || (pv_nbr_pic_addr1 + != pv_cur_pic_addr0) + || u4_bs_temp2); + + pu4_bs_table[i + 4] |= (u4_bs_vert << u4_vert_idx); + } + } + + /*****************************************************/ + /* check if horz Bs for this block is already set */ + /*****************************************************/ + if(!((u4_bs_horz >> u4_horz_idx) & 0xf)) + { + WORD16 i2_top_mv0, i2_top_mv1, i16_topMv2, i16_topMv3; + /************************************************************/ + /* If Bs is not set, use top edge and current edge mvs and */ + /* reference pictures addresses to evaluate Bs==1 */ + /************************************************************/ + i2_cur_mv0 = ps_cur_mv_pred->i2_mv[0]; + i2_cur_mv1 = ps_cur_mv_pred->i2_mv[1]; + i16_curMv2 = ps_cur_mv_pred->i2_mv[2]; + i16_curMv3 = ps_cur_mv_pred->i2_mv[3]; + i1_cur_ref0 = ps_cur_mv_pred->i1_ref_frame[0]; + i1_cur_ref1 = ps_cur_mv_pred->i1_ref_frame[1]; + + i2_top_mv0 = ps_top_mv_pred->i2_mv[0]; + i2_top_mv1 = ps_top_mv_pred->i2_mv[1]; + i16_topMv2 = ps_top_mv_pred->i2_mv[2]; + i16_topMv3 = ps_top_mv_pred->i2_mv[3]; + pv_cur_pic_addr0 = ppv_map_ref_idx_to_poc_l0[i1_cur_ref0]; + pv_cur_pic_addr1 = ppv_map_ref_idx_to_poc_l1[i1_cur_ref1]; + if(edge) + { + WORD8 i1_top_ref0, i1_top_ref1; + i1_top_ref0 = ps_top_mv_pred->i1_ref_frame[0]; + i1_top_ref1 = ps_top_mv_pred->i1_ref_frame[1]; + pv_nbr_pic_addr0 = ppv_map_ref_idx_to_poc_l0[i1_top_ref0]; + pv_nbr_pic_addr1 = ppv_map_ref_idx_to_poc_l1[i1_top_ref1]; + } + else + { + pv_nbr_pic_addr0 = u4_pic_addrress[i & 2]; + pv_nbr_pic_addr1 = u4_pic_addrress[1 + (i & 2)]; + } + + { + UWORD32 u4_bs_temp1, u4_bs_temp2; + UWORD32 u4_bs; + /*********************************************************/ + /* If any motion vector component differs by more than 1 */ + /* integer pel or if reference pictures are different Bs */ + /* is set to 1. Note that this condition shall be met for*/ + /* both (fwd-fwd,bwd-bwd) and (fwd-bwd,bwd-fwd) direction*/ + /*********************************************************/ + + u4_bs_temp1 = + ((ABS((i2_top_mv0 - i2_cur_mv0)) + >= 4) + | (ABS((i2_top_mv1 + - i2_cur_mv1)) + >= i4_ver_mvlimit) + | (ABS((i16_topMv2 + - i16_curMv2)) + >= 4) + | (ABS((i16_topMv3 + - i16_curMv3)) + >= i4_ver_mvlimit)); + + u4_bs_temp2 = + ((ABS((i2_top_mv0 - i16_curMv2)) + >= 4) + | (ABS((i2_top_mv1 + - i16_curMv3)) + >= i4_ver_mvlimit) + | (ABS((i16_topMv2 + - i2_cur_mv0)) + >= 4) + | (ABS((i16_topMv3 + - i2_cur_mv1)) + >= i4_ver_mvlimit)); + + u4_bs = + ((pv_nbr_pic_addr0 != pv_cur_pic_addr0) + || (pv_nbr_pic_addr1 + != pv_cur_pic_addr1) + || u4_bs_temp1) + && ((pv_nbr_pic_addr0 + != pv_cur_pic_addr1) + || (pv_nbr_pic_addr1 + != pv_cur_pic_addr0) + || u4_bs_temp2); + + u4_bs_horz |= (u4_bs << u4_horz_idx); + } + } + + ps_left_mv_pred = ps_cur_mv_pred; + } + + pu4_bs_table[edge] = u4_bs_horz; + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_fill_bs_xtra_left_edge_cur_fld */ +/* */ +/* Description : This function fills boundray strength (= 2 or 1) for */ +/* xtra left mb edge when cur mb is field and left mb is */ +/* frame. */ +/* Inputs : */ +/* */ +/* Globals : <Does it use any global variables?> */ +/* Processing : */ +/* */ +/* */ +/* Outputs : <What does the function produce?> */ +/* Returns : <What does the function return?> */ +/* */ +/* Issues : <List any issues or problems with this function> */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 16 10 2008 Jay Draft */ +/* */ +/*****************************************************************************/ +void ih264d_fill_bs_xtra_left_edge_cur_fld(UWORD32 *pu4_bs, /* Base pointer of BS table */ + WORD32 u4_left_mb_t_csbp, /* left mbpair's top csbp */ + WORD32 u4_left_mb_b_csbp, /* left mbpair's bottom csbp*/ + WORD32 u4_cur_mb_csbp, /* csbp of current mb */ + UWORD32 u4_cur_mb_top /* is top or bottom mb */ + + ) +{ + const UWORD32 *pu4_packed_bs = (const UWORD32 *)gau4_ih264d_packed_bs2; + UWORD32 u4_cur, u4_left, u4_or; + UNUSED(u4_cur_mb_top); + + PROFILE_DISABLE_BOUNDARY_STRENGTH() + + u4_left_mb_t_csbp = ((u4_left_mb_t_csbp & 0x0008) >> 3) + + ((u4_left_mb_t_csbp & 0x0080) >> 6) + + ((u4_left_mb_t_csbp & 0x0800) >> 9) + + ((u4_left_mb_t_csbp & 0x8000) >> 12); + + u4_left_mb_b_csbp = ((u4_left_mb_b_csbp & 0x0008) << 1) + + ((u4_left_mb_b_csbp & 0x0080) >> 2) + + ((u4_left_mb_b_csbp & 0x0800) >> 5) + + ((u4_left_mb_b_csbp & 0x8000) >> 8); + + /*********************************************************************/ + /* u4_cur = 0|0|0|0|0|0|0|0|12C|12C|8C|8C|4C|4C|0C|0C */ + /*********************************************************************/ + u4_cur = (u4_cur_mb_csbp & 0x0001) + ((u4_cur_mb_csbp & 0x0001) << 1) + + ((u4_cur_mb_csbp & 0x0010) >> 2) + + ((u4_cur_mb_csbp & 0x0010) >> 1) + + ((u4_cur_mb_csbp & 0x0100) >> 4) + + ((u4_cur_mb_csbp & 0x0100) >> 3) + + ((u4_cur_mb_csbp & 0x1000) >> 6) + + ((u4_cur_mb_csbp & 0x1000) >> 5); + + /*********************************************************************/ + /* u4_left =0|0|0|0|0|0|0|0|15Lb|11Lb|7Lb|3Lb|15Lt|11Lt|7Lt|3Lt */ + /*********************************************************************/ + u4_left = u4_left_mb_t_csbp + u4_left_mb_b_csbp; + + u4_or = (u4_cur | u4_left); + /*********************************************************************/ + /* Fill vert edges (4,9) boundary strengths using look up table */ + /*********************************************************************/ + pu4_packed_bs += 16; + pu4_bs[4] = pu4_packed_bs[u4_or & 0xF]; + pu4_bs[9] = pu4_packed_bs[(u4_or >> 4)]; +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_fill_bs_xtra_left_edge_cur_frm */ +/* */ +/* Description : This function fills boundray strength (= 2 or 1) for */ +/* xtra left mb edge when cur mb is frame and left mb is */ +/* field. */ +/* Inputs : */ +/* */ +/* Globals : <Does it use any global variables?> */ +/* Processing : */ +/* */ +/* */ +/* Outputs : <What does the function produce?> */ +/* Returns : <What does the function return?> */ +/* */ +/* Issues : <List any issues or problems with this function> */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 16 10 2008 Jay Draft */ +/* */ +/*****************************************************************************/ +void ih264d_fill_bs_xtra_left_edge_cur_frm(UWORD32 *pu4_bs, /* Base pointer of BS table */ + WORD32 u4_left_mb_t_csbp, /* left mbpair's top csbp */ + WORD32 u4_left_mb_b_csbp, /* left mbpair's bottom csbp*/ + WORD32 u4_cur_mb_csbp, /* csbp of current mb */ + UWORD32 u4_cur_mb_bot /* is top or bottom mb */ + + ) +{ + const UWORD32 *pu4_packed_bs = (const UWORD32 *)gau4_ih264d_packed_bs2; + UWORD32 u4_cur, u4_left, u4_or; + UWORD32 u4_right_shift = (u4_cur_mb_bot << 3); + + PROFILE_DISABLE_BOUNDARY_STRENGTH() + + u4_left_mb_t_csbp >>= u4_right_shift; + u4_left_mb_b_csbp >>= u4_right_shift; + + u4_left_mb_t_csbp = ((u4_left_mb_t_csbp & 0x08) >> 3) + + ((u4_left_mb_t_csbp & 0x08) >> 2) + + ((u4_left_mb_t_csbp & 0x80) >> 5) + + ((u4_left_mb_t_csbp & 0x80) >> 4); + + u4_left_mb_b_csbp = ((u4_left_mb_b_csbp & 0x08) << 1) + + ((u4_left_mb_b_csbp & 0x08) << 2) + + ((u4_left_mb_b_csbp & 0x80) >> 1) + + ((u4_left_mb_b_csbp & 0x80)); + + u4_cur = ((u4_cur_mb_csbp & 0x0001)) + ((u4_cur_mb_csbp & 0x0010) >> 3) + + ((u4_cur_mb_csbp & 0x0100) >> 6) + + ((u4_cur_mb_csbp & 0x1000) >> 9); + + u4_cur += (u4_cur << 4); + + u4_left = u4_left_mb_t_csbp + u4_left_mb_b_csbp; + + u4_or = (u4_cur | u4_left); + /*********************************************************************/ + /* Fill vert edges (4,9) boundary strengths using look up table */ + /*********************************************************************/ + pu4_packed_bs += 16; + pu4_bs[4] = pu4_packed_bs[u4_or & 0xF]; + pu4_bs[9] = pu4_packed_bs[(u4_or >> 4)]; +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_fill_bs_xtra_top_edge */ +/* */ +/* Description : This function fills boundray strength (= 2 or 1) for */ +/* xtra top mb edge when cur mb is top mb of frame mb pair */ +/* and top mbpair is field coded. */ +/* Inputs : */ +/* */ +/* Globals : <Does it use any global variables?> */ +/* Processing : */ +/* */ +/* */ +/* Outputs : <What does the function produce?> */ +/* Returns : <What does the function return?> */ +/* */ +/* Issues : <List any issues or problems with this function> */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 16 10 2008 Jay Draft */ +/* */ +/*****************************************************************************/ +void ih264d_fill_bs_xtra_top_edge(UWORD32 *pu4_bs, /* Base pointer of BS table */ + WORD32 u4_topmb_t_csbp, /* top mbpair's top csbp */ + WORD32 u4_topmb_b_csbp, /* top mbpair's bottom csbp*/ + WORD32 u4_cur_mb_csbp /* csbp of current mb */ + + ) +{ + const UWORD32 *pu4_packed_bs = (const UWORD32 *)gau4_ih264d_packed_bs2; + UWORD32 u4_or; + + u4_cur_mb_csbp &= 0xf; + u4_topmb_t_csbp >>= 12; + u4_topmb_b_csbp >>= 12; + + u4_or = (u4_cur_mb_csbp | u4_topmb_t_csbp); + /*********************************************************************/ + /* Fill vert edges (0,8) boundary strengths using look up table */ + /*********************************************************************/ + pu4_packed_bs += 16; + pu4_bs[8] = pu4_packed_bs[u4_or]; + + u4_or = (u4_cur_mb_csbp | u4_topmb_b_csbp); + pu4_bs[0] = pu4_packed_bs[u4_or]; +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_compute_bs_non_mbaff */ +/* */ +/* Description : This function computes the pointers of left,top & current*/ +/* : Nnz, MvPred & deblk_mb_t and supplies to FillBs function for*/ +/* : Boundary Strength Calculation */ +/* Inputs : <What inputs does the function take?> */ +/* Processing : This functions calls deblock MB in the MB increment order*/ +/* */ +/* Outputs : Produces the Boundary Strength for Current Mb */ +/* Returns : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* ITTIAM */ +/*****************************************************************************/ + +void ih264d_compute_bs_non_mbaff(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info, + const UWORD16 u2_mbxn_mb) +{ + /* Mvpred and Nnz for top and Courrent */ + mv_pred_t *ps_cur_mv_pred, *ps_top_mv_pred = NULL, *ps_left_mv_pred; + /* deblk_mb_t Params */ + deblk_mb_t *ps_cur_mb_params; /*< Parameters of current MacroBlock */ + deblkmb_neighbour_t *ps_deblk_top_mb; + + /* Reference Index to POC mapping*/ + void ** apv_map_ref_idx_to_poc; + UWORD32 u4_leftmbtype; + + UWORD16 u2_left_csbp, u2_top_csbp, u2_cur_csbp; + + /* Set of flags */ + UWORD32 u4_cur_mb_intra, u1_top_mb_typ, u4_cur_mb_fld; + UWORD32 u1_cur_mb_type; + UWORD32 * pu4_bs_table; + + /* Neighbour availability */ + /* Initialization */ + const UWORD32 u2_mbx = ps_cur_mb_info->u2_mbx; + const UWORD32 u2_mby = ps_cur_mb_info->u2_mby; + const UWORD32 u1_pingpong = u2_mbx & 0x01; + + PROFILE_DISABLE_BOUNDARY_STRENGTH() + + ps_deblk_top_mb = ps_dec->ps_deblk_top_mb + u2_mbx; + + + /* Pointer assignment for Current DeblkMB, Current Mv Pred */ + ps_cur_mb_params = ps_dec->ps_deblk_mbn + u2_mbxn_mb; + ps_cur_mv_pred = ps_dec->ps_mv_cur + (u2_mbxn_mb << 4); + + apv_map_ref_idx_to_poc = ps_dec->ppv_map_ref_idx_to_poc + 1; + u1_cur_mb_type = ps_cur_mb_params->u1_mb_type; + u1_top_mb_typ = ps_deblk_top_mb->u1_mb_type; + ps_deblk_top_mb->u1_mb_type = u1_cur_mb_type; + + { + UWORD8 mb_qp_temp; + + ps_cur_mb_params->u1_topmb_qp = ps_deblk_top_mb->u1_mb_qp; + ps_deblk_top_mb->u1_mb_qp = ps_cur_mb_params->u1_mb_qp; + + ps_cur_mb_params->u1_left_mb_qp = ps_dec->deblk_left_mb[1].u1_mb_qp; + ps_dec->deblk_left_mb[1].u1_mb_qp = ps_cur_mb_params->u1_mb_qp; + + } + + /* if no deblocking required for current Mb then continue */ + /* Check next Mbs in Mb group */ + if(ps_cur_mb_params->u1_deblocking_mode & MB_DISABLE_FILTERING) + { + void ** pu4_map_ref_idx_to_poc_l1 = apv_map_ref_idx_to_poc + + POC_LIST_L0_TO_L1_DIFF; + { + /* Store Parameter for Top MvPred refernce frame Address */ + + void ** ppv_top_mv_pred_addr = ps_cur_mb_info->ps_curmb->u4_pic_addrress; + WORD8 * p1_refTop0 = (ps_cur_mv_pred + 12)->i1_ref_frame; + WORD8 * p1_refTop1 = (ps_cur_mv_pred + 14)->i1_ref_frame; + + /* Store Left addresses for Next Mb */ + void ** ppv_left_mv_pred_addr = + ps_dec->ps_left_mvpred_addr[!u1_pingpong][1].u4_add; + WORD8 * p1_refleft0 = (ps_cur_mv_pred + 3)->i1_ref_frame; + + + ppv_top_mv_pred_addr[0] = apv_map_ref_idx_to_poc[p1_refTop0[0]]; + ppv_top_mv_pred_addr[1] = pu4_map_ref_idx_to_poc_l1[p1_refTop0[1]]; + + ppv_left_mv_pred_addr[2] = apv_map_ref_idx_to_poc[p1_refTop1[0]]; + ppv_top_mv_pred_addr[2] = apv_map_ref_idx_to_poc[p1_refTop1[0]]; + ppv_left_mv_pred_addr[3] = pu4_map_ref_idx_to_poc_l1[p1_refTop1[1]]; + ppv_top_mv_pred_addr[3] = pu4_map_ref_idx_to_poc_l1[p1_refTop1[1]]; + + ppv_left_mv_pred_addr[0] = apv_map_ref_idx_to_poc[p1_refleft0[0]]; + ppv_left_mv_pred_addr[1] = pu4_map_ref_idx_to_poc_l1[p1_refleft0[1]]; + //} + /* Storing the leftMbtype for next Mb */ + ps_dec->deblk_left_mb[1].u1_mb_type = ps_cur_mb_params->u1_mb_type; + } + + return; + } + + /* Flag for extra left Edge */ + ps_cur_mb_params->u1_single_call = 1; + + /* Update the Left deblk_mb_t and Left MvPred Parameters */ + if(!u2_mbx) + { + u4_leftmbtype = 0; + + /* Initialize the ps_left_mv_pred with Junk but Valid Location */ + /* to avoid invalid memory access */ + /* this is read only pointer */ + ps_left_mv_pred = ps_dec->ps_mv_cur + 3; + } + else + { + u4_leftmbtype = ps_dec->deblk_left_mb[1].u1_mb_type; + + /* Come to Left Most Edge of the MB */ + ps_left_mv_pred = (u2_mbxn_mb) ? + ps_dec->ps_mv_cur + ((u2_mbxn_mb - 1) << 4) + 3 : + ps_dec->ps_mv_left + 3; + } + + if(!u2_mby) + u1_top_mb_typ = 0; + + /* MvPred Pointer Calculation */ + /* CHANGED CODE */ + ps_top_mv_pred = ps_cur_mv_pred - (ps_dec->u2_frm_wd_in_mbs << 4) + 12; + + u4_cur_mb_intra = u1_cur_mb_type & D_INTRA_MB; + u4_cur_mb_fld = !!(u1_cur_mb_type & D_FLD_MB); + /* Compute BS function */ + pu4_bs_table = ps_cur_mb_params->u4_bs_table; + + u2_cur_csbp = ps_cur_mb_info->ps_curmb->u2_luma_csbp; + u2_left_csbp = ps_cur_mb_info->ps_left_mb->u2_luma_csbp; + u2_top_csbp = ps_cur_mb_info->ps_top_mb->u2_luma_csbp; + /* Compute BS function */ + if(ps_dec->ps_cur_sps->u1_profile_idc == HIGH_PROFILE_IDC) + { + if(ps_cur_mb_info->u1_tran_form8x8 == 1) + { + u2_cur_csbp = ih264d_update_csbp_8x8( + ps_cur_mb_info->ps_curmb->u2_luma_csbp); + } + + if(ps_cur_mb_info->ps_left_mb->u1_tran_form8x8 == 1) + { + u2_left_csbp = ih264d_update_csbp_8x8( + ps_cur_mb_info->ps_left_mb->u2_luma_csbp); + } + + if(ps_cur_mb_info->ps_top_mb->u1_tran_form8x8 == 1) + { + u2_top_csbp = ih264d_update_csbp_8x8( + ps_cur_mb_info->ps_top_mb->u2_luma_csbp); + } + } + if(u4_cur_mb_intra) + { + + pu4_bs_table[4] = 0x04040404; + pu4_bs_table[0] = u4_cur_mb_fld ? 0x03030303 : 0x04040404; + pu4_bs_table[1] = 0x03030303; + pu4_bs_table[2] = 0x03030303; + pu4_bs_table[3] = 0x03030303; + pu4_bs_table[5] = 0x03030303; + pu4_bs_table[6] = 0x03030303; + pu4_bs_table[7] = 0x03030303; + } + else + { + UWORD32 u4_is_non16x16 = !!(u1_cur_mb_type & D_PRED_NON_16x16); + UWORD32 u4_is_b = ps_dec->u1_B; + + ih264d_fill_bs2_horz_vert( + pu4_bs_table, u2_left_csbp, u2_top_csbp, u2_cur_csbp, + (const UWORD32 *)(gau4_ih264d_packed_bs2), + (const UWORD16 *)(gau2_ih264d_4x4_v2h_reorder)); + + if(u4_leftmbtype & D_INTRA_MB) + pu4_bs_table[4] = 0x04040404; + + if(u1_top_mb_typ & D_INTRA_MB) + pu4_bs_table[0] = u4_cur_mb_fld ? 0x03030303 : 0x04040404; + + ps_dec->pf_fill_bs1[u4_is_b][u4_is_non16x16]( + ps_cur_mv_pred, ps_top_mv_pred, apv_map_ref_idx_to_poc, + pu4_bs_table, ps_left_mv_pred, + &(ps_dec->ps_left_mvpred_addr[u1_pingpong][1]), + ps_cur_mb_info->ps_top_mb->u4_pic_addrress, + (4 >> u4_cur_mb_fld)); + } + + { + void ** pu4_map_ref_idx_to_poc_l1 = apv_map_ref_idx_to_poc + + POC_LIST_L0_TO_L1_DIFF; + { + /* Store Parameter for Top MvPred refernce frame Address */ + + void ** ppv_top_mv_pred_addr = ps_cur_mb_info->ps_curmb->u4_pic_addrress; + WORD8 * p1_refTop0 = (ps_cur_mv_pred + 12)->i1_ref_frame; + WORD8 * p1_refTop1 = (ps_cur_mv_pred + 14)->i1_ref_frame; + + /* Store Left addresses for Next Mb */ + void ** ppv_left_mv_pred_addr = + ps_dec->ps_left_mvpred_addr[!u1_pingpong][1].u4_add; + WORD8 * p1_refleft0 = (ps_cur_mv_pred + 3)->i1_ref_frame; + + ppv_top_mv_pred_addr[0] = apv_map_ref_idx_to_poc[p1_refTop0[0]]; + ppv_top_mv_pred_addr[1] = pu4_map_ref_idx_to_poc_l1[p1_refTop0[1]]; + + ppv_left_mv_pred_addr[2] = apv_map_ref_idx_to_poc[p1_refTop1[0]]; + ppv_top_mv_pred_addr[2] = apv_map_ref_idx_to_poc[p1_refTop1[0]]; + ppv_left_mv_pred_addr[3] = pu4_map_ref_idx_to_poc_l1[p1_refTop1[1]]; + ppv_top_mv_pred_addr[3] = pu4_map_ref_idx_to_poc_l1[p1_refTop1[1]]; + + ppv_left_mv_pred_addr[0] = apv_map_ref_idx_to_poc[p1_refleft0[0]]; + ppv_left_mv_pred_addr[1] = pu4_map_ref_idx_to_poc_l1[p1_refleft0[1]]; + + /* Storing the leftMbtype for next Mb */ + ps_dec->deblk_left_mb[1].u1_mb_type = ps_cur_mb_params->u1_mb_type; + + } + } + + /* For transform 8x8 disable deblocking of the intrernal edges of a 8x8 block */ + if(ps_cur_mb_info->u1_tran_form8x8) + { + pu4_bs_table[1] = 0; + pu4_bs_table[3] = 0; + pu4_bs_table[5] = 0; + pu4_bs_table[7] = 0; + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_compute_bs_mbaff */ +/* */ +/* Description : This function computes the pointers of left,top & current*/ +/* : Nnz, MvPred & deblk_mb_t and supplies to FillBs function for*/ +/* : Boundary Strength Calculation */ +/* Inputs : <What inputs does the function take?> */ +/* Processing : This functions calls deblock MB in the MB increment order*/ +/* */ +/* Outputs : Produces the Boundary Strength for Current Mb */ +/* Returns : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* ITTIAM */ +/*****************************************************************************/ + +void ih264d_compute_bs_mbaff(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info, + const UWORD16 u2_mbxn_mb) +{ + /* Mvpred and Nnz for top and Courrent */ + mv_pred_t *ps_cur_mv_pred, *ps_top_mv_pred = NULL, *ps_left_mv_pred; + /* deblk_mb_t Params */ + deblk_mb_t *ps_cur_mb_params; /*< Parameters of current MacroBlock */ + neighbouradd_t * ps_left_ngbr; + deblkmb_neighbour_t *ps_deblk_top_mb; + /* Reference Index to POC mapping*/ + void ** apv_map_ref_idx_to_poc; + + UWORD32 u4_leftmbtype; + + + UWORD16 u2_left_csbp, u2_top_csbp, u2_cur_csbp; + + /* Set of flags */ + UWORD32 u4_cur_mb_intra, u4_cur_mb_fld, u4_top_mb_fld, u1_top_mb_typ, u4_left_mb_fld; + UWORD32 u1_cur_mb_type; + UWORD32 * pu4_bs_table; + const UWORD32 u4_bot_mb = (1 - ps_cur_mb_info->u1_topmb); + /* Initialization */ + const UWORD32 u2_mbx = ps_cur_mb_info->u2_mbx; + const UWORD32 u2_mby = ps_cur_mb_info->u2_mby; + /* Load From u1_pingpong and Store in !u1_pingpong */ + const UWORD32 u1_pingpong = u2_mbx & 0x01; + + PROFILE_DISABLE_BOUNDARY_STRENGTH() + + ps_deblk_top_mb = ps_dec->ps_deblk_top_mb + (u2_mbx << 1); + + + /************************************************/ + /* Initialize the left Mb type */ + /* Left MvPred */ + /************************************************/ + + if(!u2_mbx) + { + /************************************************************/ + /* Initialize the ps_left_mv_pred with Junk but Valid Location */ + /* to avoid invalid memory access */ + /* this is read only pointer */ + /************************************************************/ + ps_left_mv_pred = ps_dec->ps_mv_cur + 16; + } + else + { + /* Come to Left Most Edge of the MB */ + ps_left_mv_pred = (u2_mbxn_mb) ? + ps_dec->ps_mv_cur + ((u2_mbxn_mb - 1) << 5) + 3 : + ps_dec->ps_mv_left + 3; + + ps_left_mv_pred += (u4_bot_mb << 4); + } + + u4_leftmbtype = ps_dec->deblk_left_mb[u4_bot_mb].u1_mb_type; + + ps_left_ngbr = &(ps_dec->ps_left_mvpred_addr[u1_pingpong][u4_bot_mb]); + + /************************************************/ + /* Pointer Assignment for Current Mb Parameters */ + /* Pointer Assignment for Current MvPred */ + /************************************************/ + ps_cur_mb_params = ps_dec->ps_deblk_mbn + (u2_mbxn_mb << 1) + u4_bot_mb; + u1_cur_mb_type = ps_cur_mb_params->u1_mb_type; + + ps_cur_mv_pred = ps_dec->ps_mv_cur + (u2_mbxn_mb << 5); + ps_cur_mv_pred += (u4_bot_mb << 4); + + /********************************************/ + /* Pointer Assignment for Top Mb Parameters */ + /* Pointer Assignment for Top MvPred and */ + /* Pointer Assignment for Top Nnz */ + /********************************************/ + + /* CHANGED CODE */ + ps_top_mv_pred = ps_cur_mv_pred - (ps_dec->u2_frm_wd_in_mbs << 5) + 12; + + u4_cur_mb_fld = !!(u1_cur_mb_type & D_FLD_MB); + u4_left_mb_fld = !!(ps_dec->deblk_left_mb[0].u1_mb_type & D_FLD_MB); + + if(u4_left_mb_fld != u4_cur_mb_fld) + { + /* Flag for extra left Edge */ + ps_cur_mb_params->u1_single_call = 0; + + if(u4_bot_mb) + { + ps_left_ngbr--; + ps_left_mv_pred -= 16; + } + } + else + ps_cur_mb_params->u1_single_call = 1; + + apv_map_ref_idx_to_poc = ps_dec->ppv_map_ref_idx_to_poc + 1; + if(u4_cur_mb_fld) + { + if(u4_bot_mb) + { + apv_map_ref_idx_to_poc += BOT_LIST_FLD_L0; + } + else + { + apv_map_ref_idx_to_poc += TOP_LIST_FLD_L0; + } + } + + /**********************************************************/ + /* if no deblocking required for current Mb then continue */ + /**********************************************************/ + if(ps_cur_mb_params->u1_deblocking_mode & MB_DISABLE_FILTERING) + { + void ** pu4_map_ref_idx_to_poc_l1 = apv_map_ref_idx_to_poc + + POC_LIST_L0_TO_L1_DIFF; + + { + /* Store Parameter for Top MvPred refernce frame Address */ + + void ** ppv_top_mv_pred_addr = ps_cur_mb_info->ps_curmb->u4_pic_addrress; + void ** ppv_left_mv_pred_addr = + ps_dec->ps_left_mvpred_addr[!u1_pingpong][u4_bot_mb].u4_add; + WORD8 * p1_refTop0 = (ps_cur_mv_pred + 12)->i1_ref_frame; + WORD8 * p1_refTop1 = (ps_cur_mv_pred + 14)->i1_ref_frame; + WORD8 * p1_refLeft0 = (ps_cur_mv_pred + 3)->i1_ref_frame; + ppv_top_mv_pred_addr[0] = apv_map_ref_idx_to_poc[p1_refTop0[0]]; + ppv_top_mv_pred_addr[1] = pu4_map_ref_idx_to_poc_l1[p1_refTop0[1]]; + ppv_left_mv_pred_addr[2] = apv_map_ref_idx_to_poc[p1_refTop1[0]]; + ppv_top_mv_pred_addr[2] = apv_map_ref_idx_to_poc[p1_refTop1[0]]; + ppv_left_mv_pred_addr[3] = pu4_map_ref_idx_to_poc_l1[p1_refTop1[1]]; + ppv_top_mv_pred_addr[3] = pu4_map_ref_idx_to_poc_l1[p1_refTop1[1]]; + ppv_left_mv_pred_addr[0] = apv_map_ref_idx_to_poc[p1_refLeft0[0]]; + ppv_left_mv_pred_addr[1] = pu4_map_ref_idx_to_poc_l1[p1_refLeft0[1]]; + } + if(u4_bot_mb) + { + /* store The Left Mb Type*/ + ps_dec->deblk_left_mb[0].u1_mb_type = + (ps_cur_mb_params - 1)->u1_mb_type; + ps_dec->deblk_left_mb[1].u1_mb_type = ps_cur_mb_params->u1_mb_type; + + } + ps_deblk_top_mb[u4_bot_mb].u1_mb_type = u1_cur_mb_type; + return; + } + + if(u2_mby) + { + u1_top_mb_typ = ps_deblk_top_mb[1].u1_mb_type; + u4_top_mb_fld = !!(u1_top_mb_typ & D_FLD_MB); + + if(!u4_bot_mb) + { + if(u4_top_mb_fld & u4_cur_mb_fld) + u1_top_mb_typ = ps_deblk_top_mb[0].u1_mb_type; + else + { + ps_top_mv_pred += 16; + } + } + } + else + { + u4_top_mb_fld = u4_cur_mb_fld; + u1_top_mb_typ = 0; + } + + if(u4_bot_mb & !u4_cur_mb_fld) + { + u1_top_mb_typ = ps_deblk_top_mb[0].u1_mb_type; + u4_top_mb_fld = u4_cur_mb_fld; + ps_top_mv_pred = ps_cur_mv_pred - 4; + } + + pu4_bs_table = ps_cur_mb_params->u4_bs_table; + u4_cur_mb_intra = u1_cur_mb_type & D_INTRA_MB; + + u2_cur_csbp = ps_cur_mb_info->ps_curmb->u2_luma_csbp; + u2_left_csbp = ps_cur_mb_info->ps_left_mb->u2_luma_csbp; + u2_top_csbp = ps_cur_mb_info->ps_top_mb->u2_luma_csbp; + /* Compute BS function */ + if(ps_dec->ps_cur_sps->u1_profile_idc == HIGH_PROFILE_IDC) + { + + if(ps_cur_mb_info->u1_tran_form8x8 == 1) + { + u2_cur_csbp = ih264d_update_csbp_8x8( + ps_cur_mb_info->ps_curmb->u2_luma_csbp); + } + + if(ps_cur_mb_info->ps_left_mb->u1_tran_form8x8 == 1) + { + u2_left_csbp = ih264d_update_csbp_8x8( + ps_cur_mb_info->ps_left_mb->u2_luma_csbp); + } + + if(ps_cur_mb_info->ps_top_mb->u1_tran_form8x8 == 1) + { + u2_top_csbp = ih264d_update_csbp_8x8( + ps_cur_mb_info->ps_top_mb->u2_luma_csbp); + } + } + if(u4_cur_mb_intra) + { + + pu4_bs_table[4] = 0x04040404; + if((0 == u4_cur_mb_fld) && (0 == u4_top_mb_fld)) + { + pu4_bs_table[0] = 0x04040404; + } + else + { + pu4_bs_table[0] = 0x03030303; + } + + pu4_bs_table[1] = 0x03030303; + pu4_bs_table[2] = 0x03030303; + pu4_bs_table[3] = 0x03030303; + pu4_bs_table[5] = 0x03030303; + pu4_bs_table[6] = 0x03030303; + pu4_bs_table[7] = 0x03030303; + + /*********************************************************************/ + /* Fill Bs of xtra top and left edge unconditionally to avoid checks */ + /*********************************************************************/ + pu4_bs_table[8] = 0x03030303; + pu4_bs_table[9] = 0x04040404; + } + else + { + UWORD32 u4_is_non16x16 = !!(u1_cur_mb_type & D_PRED_NON_16x16); + UWORD32 u4_is_b = ps_dec->u1_B; + + ih264d_fill_bs2_horz_vert( + pu4_bs_table, u2_left_csbp, u2_top_csbp, u2_cur_csbp, + (const UWORD32 *)(gau4_ih264d_packed_bs2), + (const UWORD16 *)(gau2_ih264d_4x4_v2h_reorder)); + + if(u4_leftmbtype & D_INTRA_MB) + pu4_bs_table[4] = 0x04040404; + + if(u1_top_mb_typ & D_INTRA_MB) + pu4_bs_table[0] = u4_cur_mb_fld ? 0x03030303 : 0x04040404; + else if(u4_cur_mb_fld != u4_top_mb_fld) + { + /****************************************************/ + /* Setting BS for mixed mode edge=1 when (Bs!=2) */ + /****************************************************/ + pu4_bs_table[0] = (pu4_bs_table[0] >> 1) + 0x01010101; + } + + { + /* Call to Compute Boundary Strength for Extra Left Edge */ + if(u2_mbx + && !(ps_cur_mb_params->u1_deblocking_mode + & MB_DISABLE_LEFT_EDGE)) + { + if(u4_cur_mb_fld != u4_left_mb_fld) + { + UWORD32 u4_left_mb_t_csbp = + ps_cur_mb_info->ps_left_mb[0].u2_luma_csbp; + UWORD32 u4_left_mb_b_csbp = + ps_cur_mb_info->ps_left_mb[1].u2_luma_csbp; + if(1 == ps_cur_mb_info->ps_left_mb[0].u1_tran_form8x8) + { + u4_left_mb_t_csbp = (UWORD32)ih264d_update_csbp_8x8( + (UWORD16)u4_left_mb_t_csbp); + } + + if(1 == ps_cur_mb_info->ps_left_mb[1].u1_tran_form8x8) + { + u4_left_mb_b_csbp = (UWORD32)ih264d_update_csbp_8x8( + (UWORD16)u4_left_mb_b_csbp); + } + ps_dec->pf_fill_bs_xtra_left_edge[u4_cur_mb_fld]( + pu4_bs_table, u4_left_mb_t_csbp, + u4_left_mb_b_csbp, u2_cur_csbp, u4_bot_mb); + + if(ps_dec->deblk_left_mb[0].u1_mb_type & D_INTRA_MB) + pu4_bs_table[4] = 0x04040404; + + if(ps_dec->deblk_left_mb[1].u1_mb_type & D_INTRA_MB) + pu4_bs_table[9] = 0x04040404; + + } + } + /* Call to Compute Boundary Strength for Extra Top Edge */ + if(u2_mby + && !(ps_cur_mb_params->u1_deblocking_mode + & MB_DISABLE_TOP_EDGE)) + { + if((((!u4_bot_mb) & (!u4_cur_mb_fld)) && u4_top_mb_fld)) + { + UWORD32 u4_topmb_t_csbp = + ps_cur_mb_info->ps_top_mb[-1].u2_luma_csbp; + UWORD32 u4_topmb_b_csbp = + ps_cur_mb_info->ps_top_mb[0].u2_luma_csbp; + if(1 == ps_cur_mb_info->ps_top_mb[-1].u1_tran_form8x8) + { + u4_topmb_t_csbp = (UWORD32)ih264d_update_csbp_8x8( + (UWORD16)u4_topmb_t_csbp); + } + + if(1 == ps_cur_mb_info->ps_top_mb[0].u1_tran_form8x8) + { + u4_topmb_b_csbp = (UWORD32)ih264d_update_csbp_8x8( + (UWORD16)u4_topmb_b_csbp); + } + ih264d_fill_bs_xtra_top_edge(pu4_bs_table, u4_topmb_t_csbp, + u4_topmb_b_csbp, u2_cur_csbp); + + if(ps_deblk_top_mb[0].u1_mb_type & D_INTRA_MB) + pu4_bs_table[8] = 0x03030303; + + if(ps_deblk_top_mb[1].u1_mb_type & D_INTRA_MB) + pu4_bs_table[0] = 0x03030303; + } + } + } + + ps_dec->pf_fill_bs1[u4_is_b][u4_is_non16x16]( + ps_cur_mv_pred, ps_top_mv_pred, apv_map_ref_idx_to_poc, + pu4_bs_table, ps_left_mv_pred, ps_left_ngbr, + ps_cur_mb_info->ps_top_mb->u4_pic_addrress, + (4 >> u4_cur_mb_fld)); + } + + { + void ** pu4_map_ref_idx_to_poc_l1 = apv_map_ref_idx_to_poc + + POC_LIST_L0_TO_L1_DIFF; + + { + /* Store Parameter for Top MvPred refernce frame Address */ + void ** ppv_top_mv_pred_addr = ps_cur_mb_info->ps_curmb->u4_pic_addrress; + void ** ppv_left_mv_pred_addr = + ps_dec->ps_left_mvpred_addr[!u1_pingpong][u4_bot_mb].u4_add; + WORD8 * p1_refTop0 = (ps_cur_mv_pred + 12)->i1_ref_frame; + WORD8 * p1_refTop1 = (ps_cur_mv_pred + 14)->i1_ref_frame; + WORD8 * p1_refLeft0 = (ps_cur_mv_pred + 3)->i1_ref_frame; + ppv_top_mv_pred_addr[0] = apv_map_ref_idx_to_poc[p1_refTop0[0]]; + ppv_top_mv_pred_addr[1] = pu4_map_ref_idx_to_poc_l1[p1_refTop0[1]]; + ppv_left_mv_pred_addr[2] = apv_map_ref_idx_to_poc[p1_refTop1[0]]; + ppv_top_mv_pred_addr[2] = apv_map_ref_idx_to_poc[p1_refTop1[0]]; + ppv_left_mv_pred_addr[3] = pu4_map_ref_idx_to_poc_l1[p1_refTop1[1]]; + ppv_top_mv_pred_addr[3] = pu4_map_ref_idx_to_poc_l1[p1_refTop1[1]]; + ppv_left_mv_pred_addr[0] = apv_map_ref_idx_to_poc[p1_refLeft0[0]]; + ppv_left_mv_pred_addr[1] = pu4_map_ref_idx_to_poc_l1[p1_refLeft0[1]]; + } + if(u4_bot_mb) + { + /* store The Left Mb Type*/ + ps_dec->deblk_left_mb[0].u1_mb_type = + (ps_cur_mb_params - 1)->u1_mb_type; + ps_dec->deblk_left_mb[1].u1_mb_type = ps_cur_mb_params->u1_mb_type; + + } + ps_deblk_top_mb[u4_bot_mb].u1_mb_type = u1_cur_mb_type; + } + /* For transform 8x8 disable deblocking of the intrernal edges of a 8x8 block */ + if(ps_cur_mb_info->u1_tran_form8x8) + { + pu4_bs_table[1] = 0; + pu4_bs_table[3] = 0; + pu4_bs_table[5] = 0; + pu4_bs_table[7] = 0; + } + +} + + + +/*! + ************************************************************************** + * \if Function name : ih264d_fill_bs_for_mb \endif + * + * \brief + * Determines the boundary strength (Bs), for the complete MB. Bs is + * determined for each block boundary between two neighbouring 4x4 + * luma blocks, then packed in a UWORD32, first Bs placed in MSB and + * so on. Such packed Bs values for all 8 edges are kept in an array. + * + * \return + * Returns the packed boundary strength(Bs) MSB -> LSB Bs0|Bs1|Bs2|Bs3 + * + ************************************************************************** + */ + +void ih264d_fill_bs_for_mb(deblk_mb_t * ps_cur_mb_params, + deblk_mb_t * ps_top_mb_params, + deblk_mb_t * ps_left_mb_params, + mv_pred_t *ps_cur_mv_pred, + mv_pred_t *ps_top_mv_pred, + UWORD8 *puc_cur_nnz, + UWORD8 *puc_top_nnz, + void **ppv_map_ref_idx_to_poc, + UWORD32 ui_mbAff, + UWORD32 ui_bs_table[], /* pointer to the BsTable array */ + mv_pred_t *ps_leftmost_mv_pred, + neighbouradd_t *ps_left_addr, + neighbouradd_t *ps_top_add) +{ + UWORD32 u4_bs_horz = 0; + UWORD8 edge, u1_top_intra = 0, u1_left_intra = 0; + mv_pred_t *ps_left_mv_pred; + WORD16 i2_cur_mv0, i2_cur_mv1, i16_curMv2, i16_curMv3; + WORD16 i2_left_mv0, i2_left_mv1, i2_left_mv2, i2_left_mv3; + WORD16 i2_top_mv0, i2_top_mv1, i16_topMv2, i16_topMv3; + WORD8 i1_cur_ref0, i1_cur_ref1, i1_left_ref0, i1_left_ref1, i1_top_ref0, i1_top_ref1; + UWORD8 uc_cur_nnz, uc_left_nnz, uc_top_nnz, u1_mb_type, uc_Bslice; + void **ppv_map_ref_idx_to_poc_l0, **ppv_map_ref_idx_to_poc_l1; + UWORD8 uc_temp; + UWORD8 uc_cur_mb_fld, uc_top_mb_fld; + UWORD32 c_mv_limit; + + u1_mb_type = ps_cur_mb_params->u1_mb_type; + uc_Bslice = u1_mb_type & D_B_SLICE; + ppv_map_ref_idx_to_poc_l0 = ppv_map_ref_idx_to_poc; + ppv_map_ref_idx_to_poc_l1 = ppv_map_ref_idx_to_poc + POC_LIST_L0_TO_L1_DIFF; + + ps_top_mb_params = ps_top_mb_params ? ps_top_mb_params : ps_cur_mb_params; + u1_top_intra = ps_top_mb_params->u1_mb_type & D_INTRA_MB; + u1_left_intra = ps_left_mb_params->u1_mb_type & D_INTRA_MB; + + ui_bs_table[4] = 0x04040404; //Default for INTRA MB Boundary edges. + uc_cur_mb_fld = (ps_cur_mb_params->u1_mb_type & D_FLD_MB) >> 7; + uc_top_mb_fld = (ps_top_mb_params->u1_mb_type & D_FLD_MB) >> 7; + + c_mv_limit = 4 >> uc_cur_mb_fld; + if((0 == uc_cur_mb_fld) && (0 == uc_top_mb_fld)) + { + ui_bs_table[0] = 0x04040404; + } + else + { + ui_bs_table[0] = 0x03030303; + } + + for(edge = 0; edge < 4; + edge++, ps_top_mv_pred = ps_cur_mv_pred - 4, puc_top_nnz = + puc_cur_nnz - 4) + { + //Each iteration of this loop fills the four BS values of one HORIZ edge and + //one BS value for each of the four VERT edges. + WORD8 i = 0; + UWORD8 uc_bs_horiz, uc_bs_vert; + UWORD32 ui_cnd; + void *ui_ref_pic_addr[4]; + UWORD8 uc_mixed_mode_edge; + + uc_mixed_mode_edge = 0; + + uc_temp = (ui_mbAff << 4) + 13; + + uc_cur_nnz = *(puc_cur_nnz - uc_temp); + ps_left_mv_pred = ps_leftmost_mv_pred + (edge << 2); + + for(i = 0; i < 4; i++, ps_top_mv_pred++, ps_cur_mv_pred++) + { + //Each iteration of this inner loop computes a HORIZ + //and a VERT BS value for a 4x4 block + + uc_left_nnz = uc_cur_nnz; + uc_cur_nnz = *puc_cur_nnz++; + uc_top_nnz = *puc_top_nnz++; + + //VERT edge is assigned BS values first + ui_cnd = !(uc_left_nnz || uc_cur_nnz); + uc_bs_vert = 2; + + if(ui_cnd) + { + i2_left_mv0 = ps_left_mv_pred->i2_mv[0]; + i2_left_mv1 = ps_left_mv_pred->i2_mv[1]; + i2_left_mv2 = ps_left_mv_pred->i2_mv[2]; + i2_left_mv3 = ps_left_mv_pred->i2_mv[3]; + + i2_cur_mv0 = ps_cur_mv_pred->i2_mv[0]; + i2_cur_mv1 = ps_cur_mv_pred->i2_mv[1]; + i16_curMv2 = ps_cur_mv_pred->i2_mv[2]; + i16_curMv3 = ps_cur_mv_pred->i2_mv[3]; + i1_cur_ref0 = ps_cur_mv_pred->i1_ref_frame[0]; + i1_cur_ref1 = ps_cur_mv_pred->i1_ref_frame[1]; + ui_ref_pic_addr[2] = ppv_map_ref_idx_to_poc_l0[i1_cur_ref0]; + ui_ref_pic_addr[3] = ppv_map_ref_idx_to_poc_l1[i1_cur_ref1]; + + if(i) + { + i1_left_ref0 = ps_left_mv_pred->i1_ref_frame[0]; + i1_left_ref1 = ps_left_mv_pred->i1_ref_frame[1]; + ui_ref_pic_addr[0] = ppv_map_ref_idx_to_poc_l0[i1_left_ref0]; + ui_ref_pic_addr[1] = ppv_map_ref_idx_to_poc_l1[i1_left_ref1]; + } + else + { + ui_ref_pic_addr[0] = ps_left_addr->u4_add[edge & 2]; + ui_ref_pic_addr[1] = ps_left_addr->u4_add[1 + (edge & 2)]; + } + if(!uc_Bslice) + { + uc_bs_vert = + (ui_ref_pic_addr[0] != ui_ref_pic_addr[2]) + | (ABS((i2_left_mv0 + - i2_cur_mv0)) + >= 4) + | (ABS((i2_left_mv1 + - i2_cur_mv1)) + >= (UWORD8)c_mv_limit); + } + else + { + UWORD8 uc_bs_temp1, uc_bs_temp2; + + uc_bs_vert = 1; + + uc_bs_temp1 = + ((ABS((i2_left_mv0 - i2_cur_mv0)) + >= 4) + | (ABS((i2_left_mv1 + - i2_cur_mv1)) + >= (UWORD8)c_mv_limit) + | (ABS((i2_left_mv2 + - i16_curMv2)) + >= 4) + | (ABS((i2_left_mv3 + - i16_curMv3)) + >= (UWORD8)c_mv_limit)); + + uc_bs_temp2 = + ((ABS((i2_left_mv0 - i16_curMv2)) + >= 4) + | (ABS((i2_left_mv1 + - i16_curMv3)) + >= (UWORD8)c_mv_limit) + | (ABS((i2_left_mv2 + - i2_cur_mv0)) + >= 4) + | (ABS((i2_left_mv3 + - i2_cur_mv1)) + >= (UWORD8)c_mv_limit)); + + uc_bs_vert = + (((ui_ref_pic_addr[0] != ui_ref_pic_addr[2]) + || (ui_ref_pic_addr[1] + != ui_ref_pic_addr[3])) + || (uc_bs_temp1)) + && (((ui_ref_pic_addr[0] + != ui_ref_pic_addr[3]) + || (ui_ref_pic_addr[1] + != ui_ref_pic_addr[2])) + || (uc_bs_temp2)); + + } + } + //Fill the VERT BS, only if valid i.e., + //if it is a non-edge OR it is an edge, which is not yet filled + uc_bs_vert = (!i && u1_left_intra) ? 4 : uc_bs_vert; + ui_bs_table[i + 4] = (ui_bs_table[i + 4] << 8) | uc_bs_vert; + + //HORIZ edge is assigned BS values next + ui_cnd = !(uc_top_nnz || uc_cur_nnz); + uc_bs_horiz = 2; + + if(ui_cnd) + { + uc_mixed_mode_edge = + (0 == edge) ? (uc_top_mb_fld != uc_cur_mb_fld) : 0; + ui_cnd = 1 - uc_mixed_mode_edge; + uc_bs_horiz = uc_mixed_mode_edge; + } + + if(ui_cnd) + { + i2_cur_mv0 = ps_cur_mv_pred->i2_mv[0]; + i2_cur_mv1 = ps_cur_mv_pred->i2_mv[1]; + i16_curMv2 = ps_cur_mv_pred->i2_mv[2]; + i16_curMv3 = ps_cur_mv_pred->i2_mv[3]; + i1_cur_ref0 = ps_cur_mv_pred->i1_ref_frame[0]; + i1_cur_ref1 = ps_cur_mv_pred->i1_ref_frame[1]; + + i2_top_mv0 = ps_top_mv_pred->i2_mv[0]; + i2_top_mv1 = ps_top_mv_pred->i2_mv[1]; + i16_topMv2 = ps_top_mv_pred->i2_mv[2]; + i16_topMv3 = ps_top_mv_pred->i2_mv[3]; + ui_ref_pic_addr[2] = ppv_map_ref_idx_to_poc_l0[i1_cur_ref0]; + ui_ref_pic_addr[3] = ppv_map_ref_idx_to_poc_l1[i1_cur_ref1]; + if(edge) + { + i1_top_ref0 = ps_top_mv_pred->i1_ref_frame[0]; + i1_top_ref1 = ps_top_mv_pred->i1_ref_frame[1]; + ui_ref_pic_addr[0] = ppv_map_ref_idx_to_poc_l0[i1_top_ref0]; + ui_ref_pic_addr[1] = ppv_map_ref_idx_to_poc_l1[i1_top_ref1]; + } + else + { + ui_ref_pic_addr[0] = ps_top_add->u4_add[i & 2]; + ui_ref_pic_addr[1] = ps_top_add->u4_add[1 + (i & 2)]; + } + if(!uc_Bslice) + { + uc_bs_horiz = + (ui_ref_pic_addr[0] != ui_ref_pic_addr[2]) + | (ABS((i2_top_mv0 + - i2_cur_mv0)) + >= 4) + | (ABS((i2_top_mv1 + - i2_cur_mv1)) + >= (UWORD8)c_mv_limit); + } + else + { + UWORD8 uc_bs_temp1, uc_bs_temp2; + + uc_bs_horiz = 1; + + uc_bs_temp1 = + ((ABS((i2_top_mv0 - i2_cur_mv0)) + >= 4) + | (ABS((i2_top_mv1 + - i2_cur_mv1)) + >= (UWORD8)c_mv_limit) + | (ABS((i16_topMv2 + - i16_curMv2)) + >= 4) + | (ABS((i16_topMv3 + - i16_curMv3)) + >= (UWORD8)c_mv_limit)); + + uc_bs_temp2 = + ((ABS((i2_top_mv0 - i16_curMv2)) + >= 4) + | (ABS((i2_top_mv1 + - i16_curMv3)) + >= (UWORD8)c_mv_limit) + | (ABS((i16_topMv2 + - i2_cur_mv0)) + >= 4) + | (ABS((i16_topMv3 + - i2_cur_mv1)) + >= (UWORD8)c_mv_limit)); + + uc_bs_horiz = + (((ui_ref_pic_addr[0] != ui_ref_pic_addr[2]) + || (ui_ref_pic_addr[1] + != ui_ref_pic_addr[3])) + || (uc_bs_temp1)) + && (((ui_ref_pic_addr[0] + != ui_ref_pic_addr[3]) + || (ui_ref_pic_addr[1] + != ui_ref_pic_addr[2])) + || (uc_bs_temp2)); + + } + } + ps_left_mv_pred = ps_cur_mv_pred; + u4_bs_horz = (u4_bs_horz << 8) + uc_bs_horiz; + } + //Fill the HORIZ BS, only if valid i.e., + //if it is a non-edge OR it is an edge, which is not yet filled + if(edge || (!edge && !u1_top_intra)) + ui_bs_table[edge] = u4_bs_horz; + } +} + +/*! + ************************************************************************** + * \if Function name : ih264d_fill_bs_for_extra_left_edge \endif + * + * \brief + * Fills the boundary strength (Bs), for the top extra edge. ock + * + * \return + * Returns the packed boundary strength(Bs) MSB -> LSB Bs0|Bs1|Bs2|Bs3 + * + ************************************************************************** + */ +void ih264d_fill_bs_for_extra_left_edge(deblk_mb_t *ps_cur_deblk_mb, + deblk_mb_t *ps_leftDeblkMb, + UWORD8* puc_cur_nnz, + UWORD8 uc_botMb) +{ + /* Set the Flag in uc_deblocking_mode variable of current MB*/ + /* for mixed mode edge*/ + ps_cur_deblk_mb->u1_single_call = 0; + + if(ps_cur_deblk_mb->u1_mb_type & D_INTRA_MB) + { + ps_cur_deblk_mb->u4_bs_table[4] = 0x04040404; + ps_cur_deblk_mb->u4_bs_table[9] = 0x04040404; + } + else if((ps_leftDeblkMb->u1_mb_type & D_INTRA_MB) + && ((ps_leftDeblkMb + 1)->u1_mb_type & D_INTRA_MB)) + { + ps_cur_deblk_mb->u4_bs_table[4] = 0x04040404; + ps_cur_deblk_mb->u4_bs_table[9] = 0x04040404; + } + else + { + /* Get strengths of left MB edge */ + UWORD32 u4_bs; + UWORD8 uc_Bs; + WORD32 i; + UWORD32 ui_curMbFld; + UWORD8 *puc_left_nnz; + UWORD32 ui_bs_left_edge[2]; + + ui_curMbFld = (ps_cur_deblk_mb->u1_mb_type & D_FLD_MB) >> 7; + + puc_left_nnz = puc_cur_nnz - 29; + if((ui_curMbFld == 0) && uc_botMb) + { + puc_left_nnz -= 8; + } + else if(ui_curMbFld && uc_botMb) + { + puc_left_nnz -= 16; + } + + if(ui_curMbFld) + { + if(ps_leftDeblkMb->u1_mb_type & D_INTRA_MB) + { + ui_bs_left_edge[0] = 0x04040404; + puc_left_nnz += 16; + puc_cur_nnz += 8; + } + else + { + u4_bs = 0; + for(i = 4; i > 0; i--) + { + uc_Bs = ((*puc_cur_nnz || *puc_left_nnz)) ? 2 : 1; + u4_bs = (u4_bs << 8) | uc_Bs; + puc_left_nnz += 4; + if(i & 0x01) + puc_cur_nnz += 4; + } + ui_bs_left_edge[0] = u4_bs; + } + + if((ps_leftDeblkMb + 1)->u1_mb_type & D_INTRA_MB) + { + ui_bs_left_edge[1] = 0x04040404; + } + else + { + u4_bs = 0; + for(i = 4; i > 0; i--) + { + uc_Bs = ((*puc_cur_nnz || *puc_left_nnz)) ? 2 : 1; + u4_bs = (u4_bs << 8) | uc_Bs; + puc_left_nnz += 4; + if(i & 0x01) + puc_cur_nnz += 4; + } + ui_bs_left_edge[1] = u4_bs; + } + } + else + { + UWORD8 *puc_curNnzB, *puc_leftNnzB; + puc_curNnzB = puc_cur_nnz; + puc_leftNnzB = puc_left_nnz + 16; + if(ps_leftDeblkMb->u1_mb_type & D_INTRA_MB) + { + ui_bs_left_edge[0] = 0x04040404; + } + else + { + u4_bs = 0; + for(i = 4; i > 0; i--, puc_cur_nnz += 4) + { + uc_Bs = ((*puc_cur_nnz || *puc_left_nnz)) ? 2 : 1; + u4_bs = (u4_bs << 8) | uc_Bs; + if(i & 0x01) + puc_left_nnz += 4; + } + ui_bs_left_edge[0] = u4_bs; + } + + if((ps_leftDeblkMb + 1)->u1_mb_type & D_INTRA_MB) + { + ui_bs_left_edge[1] = 0x04040404; + } + else + { + u4_bs = 0; + for(i = 4; i > 0; i--, puc_curNnzB += 4) + { + uc_Bs = ((*puc_curNnzB || *puc_leftNnzB)) ? 2 : 1; + u4_bs = (u4_bs << 8) | uc_Bs; + if(i & 0x01) + puc_leftNnzB += 4; + } + ui_bs_left_edge[1] = u4_bs; + } + } + /* Copy The Values in Cur Deblk Mb Parameters */ + ps_cur_deblk_mb->u4_bs_table[4] = ui_bs_left_edge[0]; + ps_cur_deblk_mb->u4_bs_table[9] = ui_bs_left_edge[1]; + } + +} + +/*! + ************************************************************************** + * \if Function name : ih264d_fill_bs_for_extra_top_edge \endif + * + * \brief + * Fills the boundary strength (Bs), for the top extra edge. ock + * + * \return + * Returns the packed boundary strength(Bs) MSB -> LSB Bs0|Bs1|Bs2|Bs3 + * + ************************************************************************** + */ +void ih264d_fill_bs_for_extra_top_edge(deblk_mb_t *ps_cur_mb_params, + UWORD8 u1_Edge0_mb_typ, + UWORD8 u1_Edge1_mb_typ, + UWORD8 *pu1_curNnz, + UWORD8 *pu1_topNnz) +{ + UWORD32 u4_bs; + UWORD8 uc_Bs; + WORD32 i; + UWORD8 *pu1_cur_nnz_tmp; + UWORD8 *pu1_top_nnz_tmp; + UWORD8 u1_top_edge; + UWORD8 u1_top_mb_type; + for(u1_top_edge = 0; u1_top_edge < 2; u1_top_edge++) + { + u1_top_mb_type = u1_top_edge ? u1_Edge1_mb_typ : u1_Edge0_mb_typ; + pu1_cur_nnz_tmp = pu1_curNnz; + pu1_top_nnz_tmp = pu1_topNnz + (u1_top_edge << 2); + + if((ps_cur_mb_params->u1_mb_type & D_INTRA_MB) + + (u1_top_mb_type & D_INTRA_MB)) + { + u4_bs = 0x03030303; + } + else + { + u4_bs = 0; + for(i = 4; i > 0; i--, pu1_cur_nnz_tmp += 1, pu1_top_nnz_tmp += 1) + { + uc_Bs = ((*pu1_cur_nnz_tmp || *pu1_top_nnz_tmp)) ? 2 : 1; + u4_bs = (u4_bs << 8) | uc_Bs; + } + } + if(u1_top_edge) + ps_cur_mb_params->u4_bs_table[0] = u4_bs; + else + ps_cur_mb_params->u4_bs_table[8] = u4_bs; + } +} + + +void ih264d_fill_bs_mbedge_4(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info, + const UWORD16 u2_mbxn_mb) +{ + + /* deblk_mb_t Params */ + deblk_mb_t *ps_cur_mb_params; /*< Parameters of current MacroBlock */ + deblkmb_neighbour_t *ps_deblk_top_mb; + UWORD32 * pu4_bs_table; + UWORD8 u1_cur_mb_type; + + /* Neighbour availability */ + /* Initialization */ + const UWORD32 u2_mbx = ps_cur_mb_info->u2_mbx; + const UWORD32 u2_mby = ps_cur_mb_info->u2_mby; + const UWORD32 u1_pingpong = u2_mbx & 0x01; + ps_deblk_top_mb = ps_dec->ps_deblk_top_mb + u2_mbx; + + + /* Pointer assignment for Current DeblkMB, Current Mv Pred */ + ps_cur_mb_params = ps_dec->ps_deblk_mbn + u2_mbxn_mb; + + u1_cur_mb_type = ps_cur_mb_params->u1_mb_type; + + ps_deblk_top_mb->u1_mb_type = u1_cur_mb_type; + + { + UWORD8 mb_qp_temp; + + ps_cur_mb_params->u1_topmb_qp = ps_deblk_top_mb->u1_mb_qp; + ps_deblk_top_mb->u1_mb_qp = ps_cur_mb_params->u1_mb_qp; + + ps_cur_mb_params->u1_left_mb_qp = ps_dec->deblk_left_mb[1].u1_mb_qp; + ps_dec->deblk_left_mb[1].u1_mb_qp = ps_cur_mb_params->u1_mb_qp; + + } + + ps_cur_mb_params->u1_single_call = 1; + + ps_dec->deblk_left_mb[1].u1_mb_type = ps_cur_mb_params->u1_mb_type; + /* if no deblocking required for current Mb then continue */ + /* Check next Mbs in Mb group */ + if(ps_cur_mb_params->u1_deblocking_mode & MB_DISABLE_FILTERING) + { + /* Storing the leftMbtype for next Mb */ + return; + } + + /* Compute BS function */ + pu4_bs_table = ps_cur_mb_params->u4_bs_table; + + pu4_bs_table[4] = 0x04040404; + pu4_bs_table[0] = 0x04040404; + pu4_bs_table[1] = 0; + pu4_bs_table[2] = 0; + pu4_bs_table[3] = 0; + pu4_bs_table[5] = 0; + pu4_bs_table[6] = 0; + pu4_bs_table[7] = 0; + +} + +void ih264d_fill_bs_mbedge_2(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info, + const UWORD16 u2_mbxn_mb) +{ + + /* deblk_mb_t Params */ + deblk_mb_t *ps_cur_mb_params; /*< Parameters of current MacroBlock */ + deblkmb_neighbour_t *ps_deblk_top_mb; + UWORD32 * pu4_bs_table; + UWORD8 u1_cur_mb_type; + + /* Neighbour availability */ + /* Initialization */ + const UWORD32 u2_mbx = ps_cur_mb_info->u2_mbx; + const UWORD32 u2_mby = ps_cur_mb_info->u2_mby; + const UWORD32 u1_pingpong = u2_mbx & 0x01; + ps_deblk_top_mb = ps_dec->ps_deblk_top_mb + u2_mbx; + + + /* Pointer assignment for Current DeblkMB, Current Mv Pred */ + ps_cur_mb_params = ps_dec->ps_deblk_mbn + u2_mbxn_mb; + + u1_cur_mb_type = ps_cur_mb_params->u1_mb_type; + + ps_deblk_top_mb->u1_mb_type = u1_cur_mb_type; + + { + UWORD8 mb_qp_temp; + + ps_cur_mb_params->u1_topmb_qp = ps_deblk_top_mb->u1_mb_qp; + ps_deblk_top_mb->u1_mb_qp = ps_cur_mb_params->u1_mb_qp; + + ps_cur_mb_params->u1_left_mb_qp = ps_dec->deblk_left_mb[1].u1_mb_qp; + ps_dec->deblk_left_mb[1].u1_mb_qp = ps_cur_mb_params->u1_mb_qp; + + } + + ps_cur_mb_params->u1_single_call = 1; + + ps_dec->deblk_left_mb[1].u1_mb_type = ps_cur_mb_params->u1_mb_type; + /* if no deblocking required for current Mb then continue */ + /* Check next Mbs in Mb group */ + if(ps_cur_mb_params->u1_deblocking_mode & MB_DISABLE_FILTERING) + { + /* Storing the leftMbtype for next Mb */ + return; + } + + /* Compute BS function */ + pu4_bs_table = ps_cur_mb_params->u4_bs_table; + + { + UWORD32 top_mb_csbp, left_mb_csbp, cur_mb_csbp; + UWORD32 top_edge, left_edge; + + top_mb_csbp = ps_cur_mb_info->ps_top_mb->u2_luma_csbp; + left_mb_csbp = ps_cur_mb_info->ps_left_mb->u2_luma_csbp; + cur_mb_csbp = ps_cur_mb_info->ps_curmb->u2_luma_csbp; + + top_mb_csbp = top_mb_csbp >> 12; + top_edge = top_mb_csbp | (cur_mb_csbp & 0xf); + + if(top_edge) + pu4_bs_table[0] = 0x02020202; + else + pu4_bs_table[0] = 0; + + cur_mb_csbp = cur_mb_csbp & CSBP_LEFT_BLOCK_MASK; + left_mb_csbp = left_mb_csbp & CSBP_RIGHT_BLOCK_MASK; + + left_edge = cur_mb_csbp | left_mb_csbp; + + if(left_edge) + pu4_bs_table[4] = 0x02020202; + else + pu4_bs_table[4] = 0; + + pu4_bs_table[1] = 0; + pu4_bs_table[2] = 0; + pu4_bs_table[3] = 0; + pu4_bs_table[5] = 0; + pu4_bs_table[6] = 0; + pu4_bs_table[7] = 0; + } + +} diff --git a/decoder/ih264d_deblocking.c b/decoder/ih264d_deblocking.c new file mode 100755 index 0000000..ad4ce08 --- /dev/null +++ b/decoder/ih264d_deblocking.c @@ -0,0 +1,2134 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +#include <string.h> + +#include "ih264_typedefs.h" +#include "iv.h" +#include "ivd.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264d_debug.h" +#include "ih264d_defs.h" +#include "ih264d_defs.h" +#include "ih264d_structs.h" +#include "ih264d_deblocking.h" +#include "ih264d_mb_utils.h" +#include "ih264d_error_handler.h" +#include "ih264d_utils.h" + + +#include "ih264d_defs.h" +#include "ih264d_format_conv.h" +#include "ih264d_deblocking.h" +#include "ih264d_tables.h" +//extern UWORD8 *g_dest_y, *g_dest_uv; + +/*! + ************************************************************************* + * \file ih264d_deblocking.c + * + * \brief + * Decoder specific deblocking routines + * + * \author AI + ************************************************************************* + */ + +/*! + ************************************************************************** + * \if Function name : HorizonPad \endif + * + * \brief + * Does the Horizontal padding on a whole pic. + * + * \return + * None + ************************************************************************** + */ + +/*! + ************************************************************************** + * \if Function name : FilterBoundaryLeft \endif + * + * \brief + * Filters MacroBlock Left Boundary egdes. + * + * \return + * None + ************************************************************************** + */ +void ih264d_filter_boundary_left_nonmbaff(dec_struct_t *ps_dec, + tfr_ctxt_t * ps_tfr_cxt, + WORD8 i1_cb_qp_idx_ofst, + WORD8 i1_cr_qp_idx_ofst, + deblk_mb_t * ps_cur_mb, + UWORD16 i4_strd_y, + UWORD16 i4_strd_uv, + deblk_mb_t * ps_left_mb, + UWORD32 pu4_bs_tab[], + UWORD8 u1_cur_fld) +{ + UWORD8 *pu1_y, *pu1_u, *pu1_v; + WORD32 uc_tmp, qp_avg; + WORD32 alpha_u = 0, beta_u = 0, alpha_v = 0, beta_v = 0; + WORD32 alpha_y = 0, beta_y = 0; + + WORD32 idx_b_u, idx_a_u, idx_b_v, idx_a_v; + WORD32 idx_b_y, idx_a_y; + + UWORD32 u4_bs_val; + + UWORD8 *pu1_cliptab_u, *pu1_cliptab_v, *pu1_cliptab_y; + + UWORD8 u1_double_cl = !ps_cur_mb->u1_single_call; + WORD32 ofst_a = ps_cur_mb->i1_slice_alpha_c0_offset; + WORD32 ofst_b = ps_cur_mb->i1_slice_beta_offset; + + PROFILE_DISABLE_DEBLK() + + pu1_y = ps_tfr_cxt->pu1_mb_y; + pu1_u = ps_tfr_cxt->pu1_mb_u; + pu1_v = ps_tfr_cxt->pu1_mb_v; + + /* LUMA values */ + /* Deblock rounding change */ + qp_avg = + (UWORD8)((ps_cur_mb->u1_left_mb_qp + ps_cur_mb->u1_mb_qp + 1) + >> 1); + + idx_a_y = qp_avg + ofst_a; + alpha_y = gau1_ih264d_alpha_table[12 + idx_a_y]; + idx_b_y = qp_avg + ofst_b; + beta_y = gau1_ih264d_beta_table[12 + idx_b_y]; + + /* Chroma cb values */ + { + UWORD8 u1_mb_qp1, u1_mb_qp2; + u1_mb_qp1 = (ps_cur_mb->u1_left_mb_qp + i1_cb_qp_idx_ofst); + u1_mb_qp2 = (ps_cur_mb->u1_mb_qp + i1_cb_qp_idx_ofst); + qp_avg = (UWORD8)((gau1_ih264d_qp_scale_cr[12 + u1_mb_qp1] + + gau1_ih264d_qp_scale_cr[12 + u1_mb_qp2] + 1) >> 1); + } + idx_a_u = qp_avg + ofst_a; + alpha_u = gau1_ih264d_alpha_table[12 + idx_a_u]; + idx_b_u = qp_avg + ofst_b; + beta_u = gau1_ih264d_beta_table[12 + idx_b_u]; + /* Chroma cr values */ + { + UWORD8 u1_mb_qp1, u1_mb_qp2; + u1_mb_qp1 = (ps_cur_mb->u1_left_mb_qp + i1_cr_qp_idx_ofst); + u1_mb_qp2 = (ps_cur_mb->u1_mb_qp + i1_cr_qp_idx_ofst); + qp_avg = (UWORD8)((gau1_ih264d_qp_scale_cr[12 + u1_mb_qp1] + + gau1_ih264d_qp_scale_cr[12 + u1_mb_qp2] + 1) >> 1); + } + idx_a_v = qp_avg + ofst_a; + alpha_v = gau1_ih264d_alpha_table[12 + idx_a_v]; + idx_b_v = qp_avg + ofst_b; + beta_v = gau1_ih264d_beta_table[12 + idx_b_v]; + + if(u1_double_cl == 0) + { + u4_bs_val = pu4_bs_tab[4]; + + if(0x04040404 == u4_bs_val) + { + ps_dec->pf_deblk_luma_vert_bs4(pu1_y, i4_strd_y, alpha_y, beta_y); + ps_dec->pf_deblk_chroma_vert_bs4(pu1_u, i4_strd_uv, alpha_u, + beta_u, alpha_v, beta_v); + } + else + { + if(u4_bs_val) + { + + pu1_cliptab_y = (UWORD8 *)&gau1_ih264d_clip_table[12 + + idx_a_y]; + pu1_cliptab_u = (UWORD8 *)&gau1_ih264d_clip_table[12 + + idx_a_u]; + pu1_cliptab_v = (UWORD8 *)&gau1_ih264d_clip_table[12 + + idx_a_v]; + ps_dec->pf_deblk_luma_vert_bslt4(pu1_y, i4_strd_y, alpha_y, + beta_y, u4_bs_val, + pu1_cliptab_y); + ps_dec->pf_deblk_chroma_vert_bslt4(pu1_u, i4_strd_uv, alpha_u, + beta_u, alpha_v, beta_v, + u4_bs_val, pu1_cliptab_u, + pu1_cliptab_v); + + } + } + + } + else + { + + i4_strd_y <<= (!u1_cur_fld); + u4_bs_val = pu4_bs_tab[4]; + i4_strd_uv <<= (!u1_cur_fld); + + if(0x04040404 == u4_bs_val) + { + + ps_dec->pf_deblk_luma_vert_bs4_mbaff(pu1_y, i4_strd_y, alpha_y, + beta_y); + ps_dec->pf_deblk_chroma_vert_bs4_mbaff(pu1_u, i4_strd_uv, alpha_u, + beta_u, alpha_v, beta_v); + + } + else + { + if(u4_bs_val) + { + + pu1_cliptab_y = (UWORD8 *)&gau1_ih264d_clip_table[12 + + idx_a_y]; + pu1_cliptab_u = (UWORD8 *)&gau1_ih264d_clip_table[12 + + idx_a_u]; + pu1_cliptab_v = (UWORD8 *)&gau1_ih264d_clip_table[12 + + idx_a_v]; + + ps_dec->pf_deblk_luma_vert_bslt4_mbaff(pu1_y, i4_strd_y, + alpha_y, beta_y, + u4_bs_val, + pu1_cliptab_y); + ps_dec->pf_deblk_chroma_vert_bslt4_mbaff(pu1_u, i4_strd_uv, + alpha_u, beta_u, + alpha_v, beta_v, + u4_bs_val, + pu1_cliptab_u, + pu1_cliptab_v); + } + } + + { + + UWORD16 u2_shift = (i4_strd_y >> 1) << (u1_cur_fld ? 4 : 0); + pu1_y += u2_shift; + u2_shift = (i4_strd_uv >> 1) << (u1_cur_fld ? 3 : 0); + pu1_u += u2_shift; + pu1_v += u2_shift; + } + + qp_avg = (((ps_left_mb + 1)->u1_mb_qp + ps_cur_mb->u1_mb_qp + 1) >> 1); + + idx_a_y = qp_avg + ofst_a; + alpha_y = gau1_ih264d_alpha_table[12 + idx_a_y]; + idx_b_y = qp_avg + ofst_b; + beta_y = gau1_ih264d_beta_table[12 + idx_b_y]; + u4_bs_val = pu4_bs_tab[9]; + + { + UWORD8 u1_mb_qp1, u1_mb_qp2; + u1_mb_qp1 = ((ps_left_mb + 1)->u1_mb_qp + i1_cb_qp_idx_ofst); + u1_mb_qp2 = (ps_cur_mb->u1_mb_qp + i1_cb_qp_idx_ofst); + qp_avg = (UWORD8)((gau1_ih264d_qp_scale_cr[12 + u1_mb_qp1] + + gau1_ih264d_qp_scale_cr[12 + u1_mb_qp2] + 1) >> 1); + } + idx_a_u = qp_avg + ofst_a; + alpha_u = gau1_ih264d_alpha_table[12 + idx_a_u]; + idx_b_u = qp_avg + ofst_b; + beta_u = gau1_ih264d_beta_table[12 + idx_b_u]; + u4_bs_val = pu4_bs_tab[9]; + { + UWORD8 u1_mb_qp1, u1_mb_qp2; + u1_mb_qp1 = ((ps_left_mb + 1)->u1_mb_qp + i1_cr_qp_idx_ofst); + u1_mb_qp2 = (ps_cur_mb->u1_mb_qp + i1_cr_qp_idx_ofst); + qp_avg = (UWORD8)((gau1_ih264d_qp_scale_cr[12 + u1_mb_qp1] + + gau1_ih264d_qp_scale_cr[12 + u1_mb_qp2] + 1) >> 1); + } + idx_a_v = qp_avg + ofst_a; + alpha_v = gau1_ih264d_alpha_table[12 + idx_a_v]; + idx_b_v = qp_avg + ofst_b; + beta_v = gau1_ih264d_beta_table[12 + idx_b_v]; + + if(0x04040404 == u4_bs_val) + { + ps_dec->pf_deblk_luma_vert_bs4_mbaff(pu1_y, i4_strd_y, alpha_y, + beta_y); + ps_dec->pf_deblk_chroma_vert_bs4_mbaff(pu1_u, i4_strd_uv, alpha_u, + beta_u, alpha_v, beta_v); + + } + else + { + if(u4_bs_val) + { + + pu1_cliptab_y = (UWORD8 *)&gau1_ih264d_clip_table[12 + + idx_a_y]; + pu1_cliptab_u = (UWORD8 *)&gau1_ih264d_clip_table[12 + + idx_a_u]; + pu1_cliptab_v = (UWORD8 *)&gau1_ih264d_clip_table[12 + + idx_a_v]; + + ps_dec->pf_deblk_luma_vert_bslt4_mbaff(pu1_y, i4_strd_y, + alpha_y, beta_y, + u4_bs_val, + pu1_cliptab_y); + ps_dec->pf_deblk_chroma_vert_bslt4_mbaff(pu1_u, i4_strd_uv, + alpha_u, beta_u, + alpha_v, beta_v, + u4_bs_val, + pu1_cliptab_u, + pu1_cliptab_v); + + } + } + } + +} + +/*! + ************************************************************************** + * \if Function name : FilterBoundaryTop \endif + * + * \brief + * Filters MacroBlock Top Boundary egdes. + * + * \return + * None + ************************************************************************** + */ + +void ih264d_filter_boundary_top_nonmbaff(dec_struct_t *ps_dec, + tfr_ctxt_t * ps_tfr_cxt, + WORD8 i1_cb_qp_idx_ofst, + WORD8 i1_cr_qp_idx_ofst, + deblk_mb_t * ps_cur_mb, + UWORD16 i4_strd_y, + UWORD16 i4_strd_uv, + deblk_mb_t * ps_top_mb, + UWORD32 u4_bs) +{ + UWORD8 *pu1_y, *pu1_u; + WORD32 alpha_u = 0, beta_u = 0, alpha_v = 0, beta_v = 0; + WORD32 alpha_y = 0, beta_y = 0; + WORD32 qp_avg; + WORD32 uc_QPav_Y; + WORD32 idx_b_u, idx_a_u, idx_b_v, idx_a_v; + WORD32 idx_b_y, idx_a_y; + UWORD16 uc_tmp; + + UWORD8 *pu1_cliptab_u, *pu1_cliptab_v, *pu1_cliptab_y; + WORD32 ofst_a = ps_cur_mb->i1_slice_alpha_c0_offset; + WORD32 ofst_b = ps_cur_mb->i1_slice_beta_offset; + + UNUSED(ps_top_mb); + /* LUMA values */ + /* Deblock rounding change */ + uc_tmp = ((ps_cur_mb->u1_topmb_qp + ps_cur_mb->u1_mb_qp + 1) >> 1); + uc_QPav_Y = (UWORD8)uc_tmp; + idx_a_y = uc_QPav_Y + ofst_a; + alpha_y = gau1_ih264d_alpha_table[12 + idx_a_y]; + idx_b_y = uc_QPav_Y + ofst_b; + beta_y = gau1_ih264d_beta_table[12 + idx_b_y]; + pu1_y = ps_tfr_cxt->pu1_mb_y; + + /* CHROMA cb values */ + { + UWORD8 u1_mb_qp1, u1_mb_qp2; + u1_mb_qp1 = (ps_cur_mb->u1_topmb_qp + i1_cb_qp_idx_ofst); + u1_mb_qp2 = (ps_cur_mb->u1_mb_qp + i1_cb_qp_idx_ofst); + qp_avg = (UWORD8)((gau1_ih264d_qp_scale_cr[12 + u1_mb_qp1] + + gau1_ih264d_qp_scale_cr[12 + u1_mb_qp2] + 1) >> 1); + } + + idx_a_u = qp_avg + ofst_a; + alpha_u = gau1_ih264d_alpha_table[12 + idx_a_u]; + idx_b_u = qp_avg + ofst_b; + beta_u = gau1_ih264d_beta_table[12 + idx_b_u]; + /* CHROMA cr values */ + { + UWORD8 u1_mb_qp1, u1_mb_qp2; + u1_mb_qp1 = (ps_cur_mb->u1_topmb_qp + i1_cr_qp_idx_ofst); + u1_mb_qp2 = (ps_cur_mb->u1_mb_qp + i1_cr_qp_idx_ofst); + qp_avg = (UWORD8)((gau1_ih264d_qp_scale_cr[12 + u1_mb_qp1] + + gau1_ih264d_qp_scale_cr[12 + u1_mb_qp2] + 1) >> 1); + } + + idx_a_v = qp_avg + ofst_a; + alpha_v = gau1_ih264d_alpha_table[12 + idx_a_v]; + idx_b_v = qp_avg + ofst_b; + beta_v = gau1_ih264d_beta_table[12 + idx_b_v]; + pu1_u = ps_tfr_cxt->pu1_mb_u; + + if(u4_bs == 0x04040404) + { + /* Code specific to the assembly module */ + + ps_dec->pf_deblk_luma_horz_bs4(pu1_y, i4_strd_y, alpha_y, beta_y); + ps_dec->pf_deblk_chroma_horz_bs4(pu1_u, i4_strd_uv, alpha_u, beta_u, + alpha_v, beta_v); + } + else + { + if(u4_bs) + { + + pu1_cliptab_y = (UWORD8 *)&gau1_ih264d_clip_table[12 + idx_a_y]; + pu1_cliptab_u = + (UWORD8 *)&gau1_ih264d_clip_table[12 + idx_a_u]; + pu1_cliptab_v = + (UWORD8 *)&gau1_ih264d_clip_table[12 + idx_a_v]; + + ps_dec->pf_deblk_luma_horz_bslt4(pu1_y, i4_strd_y, alpha_y, beta_y, + u4_bs, pu1_cliptab_y); + ps_dec->pf_deblk_chroma_horz_bslt4(pu1_u, i4_strd_uv, alpha_u, + beta_u, alpha_v, beta_v, + u4_bs, pu1_cliptab_u, + pu1_cliptab_v); + + } + } + +} + +void ih264d_deblock_mb_nonmbaff(dec_struct_t *ps_dec, + tfr_ctxt_t * ps_tfr_cxt, + WORD8 i1_cb_qp_idx_ofst, + WORD8 i1_cr_qp_idx_ofst, + deblk_mb_t * ps_cur_mb, + WORD32 i4_strd_y, + WORD32 i4_strd_uv, + deblk_mb_t * ps_top_mb, + deblk_mb_t * ps_left_mb) +{ + UWORD8 *pu1_y, *pu1_u; + UWORD32 u4_bs; + + WORD32 alpha, beta, alpha_u, beta_u, alpha_v, beta_v; + + UWORD8 *pu1_cliptab_u; + UWORD8 *pu1_cliptab_v; + UWORD8 *pu1_cliptab_y; + + UWORD32 * pu4_bs_tab = ps_cur_mb->u4_bs_table; + WORD32 idx_a_y, idx_a_u, idx_a_v; + + PROFILE_DISABLE_DEBLK() + /* Return from here to switch off deblocking */ + + /*---------------------------------------------------------------------*/ + /* Filter wrt Left edge */ + /* except */ + /* - Left Egde is Picture Boundary */ + /* - Left Egde is part of Slice Boundary and Deblocking */ + /* parameters of slice disable Filtering of Slice Boundary Edges*/ + /*---------------------------------------------------------------------*/ + if(ps_left_mb) + ih264d_filter_boundary_left_nonmbaff(ps_dec, ps_tfr_cxt, + i1_cb_qp_idx_ofst, + i1_cr_qp_idx_ofst, ps_cur_mb, + i4_strd_y, i4_strd_uv, ps_left_mb, + pu4_bs_tab, 0); + + /*--------------------------------------------------------------------*/ + /* Filter wrt Other Vertical Edges */ + /*--------------------------------------------------------------------*/ + { + WORD32 ofst_a, ofst_b, idx_b_y, idx_b_u, + idx_b_v; + WORD32 qp_avg, qp_avg_u, qp_avg_v; + ofst_a = ps_cur_mb->i1_slice_alpha_c0_offset; + ofst_b = ps_cur_mb->i1_slice_beta_offset; + + qp_avg = ps_cur_mb->u1_mb_qp; + + idx_a_y = qp_avg + ofst_a; + alpha = gau1_ih264d_alpha_table[12 + idx_a_y]; + idx_b_y = qp_avg + ofst_b; + beta = gau1_ih264d_beta_table[12 + idx_b_y]; + + /* CHROMA values */ + /* CHROMA Cb values */ + qp_avg_u = (qp_avg + i1_cb_qp_idx_ofst); + qp_avg_u = gau1_ih264d_qp_scale_cr[12 + qp_avg_u]; + idx_a_u = qp_avg_u + ofst_a; + alpha_u = gau1_ih264d_alpha_table[12 + idx_a_u]; + idx_b_u = qp_avg_u + ofst_b; + beta_u = gau1_ih264d_beta_table[12 + idx_b_u]; + /* CHROMA Cr values */ + qp_avg_v = (qp_avg + i1_cr_qp_idx_ofst); + qp_avg_v = gau1_ih264d_qp_scale_cr[12 + qp_avg_v]; + idx_a_v = qp_avg_v + ofst_a; + alpha_v = gau1_ih264d_alpha_table[12 + idx_a_v]; + idx_b_v = qp_avg_v + ofst_b; + beta_v = gau1_ih264d_beta_table[12 + idx_b_v]; + } + + pu1_cliptab_y = (UWORD8 *)&gau1_ih264d_clip_table[12 + idx_a_y]; //this for Luma + pu1_cliptab_u = (UWORD8 *)&gau1_ih264d_clip_table[12 + idx_a_u]; //this for chroma + pu1_cliptab_v = (UWORD8 *)&gau1_ih264d_clip_table[12 + idx_a_v]; //this for chroma + + //edge=1 + + + u4_bs = pu4_bs_tab[5]; + pu1_y = ps_tfr_cxt->pu1_mb_y; + pu1_u = ps_tfr_cxt->pu1_mb_u; + + if(u4_bs) + { + + ps_dec->pf_deblk_luma_vert_bslt4(pu1_y + 4, i4_strd_y, alpha, beta, + u4_bs, pu1_cliptab_y); + + } + //edge=2 + + u4_bs = pu4_bs_tab[6]; + if(u4_bs) + { + ps_dec->pf_deblk_luma_vert_bslt4(pu1_y + 8, i4_strd_y, alpha, beta, + u4_bs, pu1_cliptab_y); + ps_dec->pf_deblk_chroma_vert_bslt4(pu1_u + 4 * YUV420SP_FACTOR, + i4_strd_uv, alpha_u, beta_u, + alpha_v, beta_v, u4_bs, + pu1_cliptab_u, pu1_cliptab_v); + + } + //edge=3 + + u4_bs = pu4_bs_tab[7]; + if(u4_bs) + { + ps_dec->pf_deblk_luma_vert_bslt4(pu1_y + 12, i4_strd_y, alpha, beta, + u4_bs, pu1_cliptab_y); + + } + + /*--------------------------------------------------------------------*/ + /* Filter wrt Top edge */ + /* except */ + /* - Top Egde is Picture Boundary */ + /* - Top Egde is part of Slice Boundary and Deblocking */ + /* parameters of slice disable Filtering of Slice Boundary Edges*/ + /*--------------------------------------------------------------------*/ + if(ps_top_mb) + { + /** if top MB and MB AFF and cur MB is frame and top is field then */ + /* one extra top edge needs to be deblocked */ + + ih264d_filter_boundary_top_nonmbaff(ps_dec, ps_tfr_cxt, + i1_cb_qp_idx_ofst, + i1_cr_qp_idx_ofst, ps_cur_mb, + i4_strd_y, i4_strd_uv, ps_top_mb, + pu4_bs_tab[0]); + + } + + /*--------------------------------------------------------------------*/ + /* Filter wrt Other Horizontal Edges */ + /*--------------------------------------------------------------------*/ + + //edge1 + u4_bs = pu4_bs_tab[1]; + + if(u4_bs) + { + ps_dec->pf_deblk_luma_horz_bslt4(pu1_y + (i4_strd_y << 2), i4_strd_y, + alpha, beta, u4_bs, pu1_cliptab_y); + + } + //edge2 + u4_bs = pu4_bs_tab[2]; + + if(u4_bs) + { + + ps_dec->pf_deblk_luma_horz_bslt4(pu1_y + (i4_strd_y << 3), i4_strd_y, + alpha, beta, u4_bs, pu1_cliptab_y); + ps_dec->pf_deblk_chroma_horz_bslt4(pu1_u + (i4_strd_uv << 2), + i4_strd_uv, alpha_u, beta_u, + alpha_v, beta_v, u4_bs, + pu1_cliptab_u, pu1_cliptab_v); + + } + //edge3 + u4_bs = pu4_bs_tab[3]; + if(u4_bs) + { + ps_dec->pf_deblk_luma_horz_bslt4( + (pu1_y + (i4_strd_y << 3) + (i4_strd_y << 2)), + i4_strd_y, alpha, beta, u4_bs, pu1_cliptab_y); + + } + +} + +/************************************************************************** + * + * Function Name : ih264d_init_deblk_tfr_ctxt + * + * Description : This function is called once per deblockpicture call + * This sets up the transfer address contexts + * + * Revision History: + * + * DD MM YYYY Author(s) Changes (Describe the changes made) + * 14 06 2005 SWRN Draft + **************************************************************************/ +void ih264d_init_deblk_tfr_ctxt(dec_struct_t * ps_dec, + pad_mgr_t *ps_pad_mgr, + tfr_ctxt_t *ps_tfr_cxt, + UWORD16 u2_image_wd_mb, + UWORD8 u1_mbaff) +{ + + UWORD32 i4_wd_y; + UWORD32 i4_wd_uv; + UWORD8 u1_field_pic_flag = ps_dec->ps_cur_slice->u1_field_pic_flag; /*< Field u4_flag */ + UNUSED(u2_image_wd_mb); + ps_tfr_cxt->pu1_src_y = ps_dec->s_cur_pic.pu1_buf1 - 4; + ps_tfr_cxt->pu1_src_u = ps_dec->s_cur_pic.pu1_buf2 - 4; + ps_tfr_cxt->pu1_src_v = ps_dec->s_cur_pic.pu1_buf3 - 4; + ps_tfr_cxt->pu1_dest_y = ps_tfr_cxt->pu1_src_y; + ps_tfr_cxt->pu1_dest_u = ps_tfr_cxt->pu1_src_u; + ps_tfr_cxt->pu1_dest_v = ps_tfr_cxt->pu1_src_v; + + i4_wd_y = ps_dec->u2_frm_wd_y << u1_field_pic_flag; + i4_wd_uv = ps_dec->u2_frm_wd_uv << u1_field_pic_flag; + ps_tfr_cxt->u4_y_inc = ((i4_wd_y << u1_mbaff) * 16 + - (ps_dec->u2_frm_wd_in_mbs << 4)); + + ps_tfr_cxt->u4_uv_inc = (i4_wd_uv << u1_mbaff) * 8 + - (ps_dec->u2_frm_wd_in_mbs << 4); + + /* padding related initialisations */ + if(ps_dec->ps_cur_slice->u1_nal_ref_idc) + { + ps_pad_mgr->u1_vert_pad_top = !(ps_dec->ps_cur_slice->u1_field_pic_flag + && ps_dec->ps_cur_slice->u1_bottom_field_flag); + ps_pad_mgr->u1_vert_pad_bot = + ((!ps_dec->ps_cur_slice->u1_field_pic_flag) + || ps_dec->ps_cur_slice->u1_bottom_field_flag); + ps_pad_mgr->u1_horz_pad = 1; + } + else + { + ps_pad_mgr->u1_horz_pad = 0; + ps_pad_mgr->u1_vert_pad_top = 0; + ps_pad_mgr->u1_vert_pad_bot = 0; + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_deblock_picture_mbaff */ +/* */ +/* Description : This function carries out deblocking on a whole picture */ +/* with MBAFF */ +/* */ +/* Inputs : <What inputs does the function take?> */ +/* Processing : This functions calls deblock MB in the MB increment order*/ +/* */ +/* Outputs : Produces the deblocked picture */ +/* Returns : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 17 02 2005 NS Creation */ +/* 14 06 2005 SWRN clean-up */ +/*****************************************************************************/ + +void ih264d_deblock_picture_mbaff(dec_struct_t * ps_dec) +{ + WORD16 i2_mb_x, i2_mb_y; + deblk_mb_t *ps_cur_mb; + deblk_mb_t *ps_top_mb; + deblk_mb_t *ps_left_mb; + + UWORD8 u1_vert_pad_top = 1; + UWORD8 u1_cur_fld, u1_top_fld, u1_left_fld; + UWORD8 u1_first_row; + + UWORD8 * pu1_deb_y, *pu1_deb_u, *pu1_deb_v; + UWORD8 u1_deb_mode, u1_extra_top_edge; + WORD32 i4_wd_y, i4_wd_uv; + + UWORD8 u1_field_pic_flag = ps_dec->ps_cur_slice->u1_field_pic_flag; /*< Field u4_flag */ + UWORD8 u1_bottom_field_flag = ps_dec->ps_cur_slice->u1_bottom_field_flag; /*< Bottom field u4_flag*/ + + /**************************************************/ + /* one time loads from ps_dec which will be used */ + /* frequently throughout the deblocking procedure */ + /**************************************************/ + pad_mgr_t * ps_pad_mgr = &ps_dec->s_pad_mgr; + tfr_ctxt_t s_tfr_ctxt; + tfr_ctxt_t * ps_tfr_cxt = &s_tfr_ctxt; + + UWORD16 u2_image_wd_mb = ps_dec->u2_frm_wd_in_mbs; + UWORD16 u2_image_ht_mb = ps_dec->u2_frm_ht_in_mbs; + UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag; + WORD8 i1_cb_qp_idx_ofst = ps_dec->ps_cur_pps->i1_chroma_qp_index_offset; + WORD8 i1_cr_qp_idx_ofst = + ps_dec->ps_cur_pps->i1_second_chroma_qp_index_offset; + + /* Set up Parameter for DMA transfer */ + ih264d_init_deblk_tfr_ctxt(ps_dec, ps_pad_mgr, ps_tfr_cxt, u2_image_wd_mb, + u1_mbaff); + + /* Pic level Initialisations */ + i2_mb_y = u2_image_ht_mb; + i2_mb_x = 0; + u1_extra_top_edge = 0; + + u1_first_row = 1; + + i4_wd_y = ps_dec->u2_frm_wd_y << u1_field_pic_flag; + i4_wd_uv = ps_dec->u2_frm_wd_uv << u1_field_pic_flag; + /* Initial filling of the buffers with deblocking data */ + + pu1_deb_y = ps_tfr_cxt->pu1_src_y + 4; + pu1_deb_u = ps_tfr_cxt->pu1_src_u + 4; + pu1_deb_v = ps_tfr_cxt->pu1_src_v + 4; + ps_cur_mb = ps_dec->ps_deblk_pic; + + if(ps_dec->u4_app_disable_deblk_frm == 0) + { + if(ps_dec->u4_mb_level_deblk == 0 || ps_dec->u4_num_cores >= 3) + { + + while(i2_mb_y > 0) + { + do + { + + u1_deb_mode = ps_cur_mb->u1_deblocking_mode; + if(!(u1_deb_mode & MB_DISABLE_FILTERING)) + { + ps_tfr_cxt->pu1_mb_y = pu1_deb_y; + ps_tfr_cxt->pu1_mb_u = pu1_deb_u; + ps_tfr_cxt->pu1_mb_v = pu1_deb_v; + + u1_cur_fld = (ps_cur_mb->u1_mb_type & D_FLD_MB) >> 7; + u1_cur_fld &= 1; + if(i2_mb_x) + { + ps_left_mb = ps_cur_mb - 2; + } + else + { + ps_left_mb = NULL; + } + if(!u1_first_row) + { + ps_top_mb = ps_cur_mb - (u2_image_wd_mb << 1) + 1; + u1_top_fld = (ps_top_mb->u1_mb_type & D_FLD_MB) + >> 7; + } + else + { + ps_top_mb = NULL; + u1_top_fld = 0; + } + + if((!u1_first_row) & u1_top_fld & u1_cur_fld) + ps_top_mb--; + + /********************************************************/ + /* if top MB and MB AFF and cur MB is frame and top is */ + /* field, then one extra top edge needs to be deblocked */ + /********************************************************/ + u1_extra_top_edge = (!u1_cur_fld) & u1_top_fld; + + if(u1_deb_mode & MB_DISABLE_LEFT_EDGE) + ps_left_mb = NULL; + if(u1_deb_mode & MB_DISABLE_TOP_EDGE) + ps_top_mb = NULL; + + ih264d_deblock_mb_mbaff(ps_dec, ps_tfr_cxt, + i1_cb_qp_idx_ofst, + i1_cr_qp_idx_ofst, ps_cur_mb, + i4_wd_y, i4_wd_uv, ps_top_mb, + ps_left_mb, u1_cur_fld, + u1_extra_top_edge); + } + + ps_cur_mb++; + + u1_deb_mode = ps_cur_mb->u1_deblocking_mode; + if(!(u1_deb_mode & MB_DISABLE_FILTERING)) + { + ps_tfr_cxt->pu1_mb_y = pu1_deb_y; + ps_tfr_cxt->pu1_mb_u = pu1_deb_u; + ps_tfr_cxt->pu1_mb_v = pu1_deb_v; + + u1_cur_fld = (ps_cur_mb->u1_mb_type & D_FLD_MB) >> 7; + u1_cur_fld &= 1; + if(i2_mb_x) + { + ps_left_mb = ps_cur_mb - 2; + u1_left_fld = (ps_left_mb->u1_mb_type & D_FLD_MB) + >> 7; + } + else + { + ps_left_mb = NULL; + u1_left_fld = u1_cur_fld; + } + if(!u1_first_row) + { + ps_top_mb = ps_cur_mb - (u2_image_wd_mb << 1); + } + else + { + ps_top_mb = NULL; + } + + { + UWORD8 u1_row_shift_y = 0, u1_row_shift_uv = 0; + if(!u1_cur_fld) + { + ps_top_mb = ps_cur_mb - 1; + u1_top_fld = (ps_top_mb->u1_mb_type & D_FLD_MB) + >> 7; + u1_row_shift_y = 4; + u1_row_shift_uv = 3; + } + ps_tfr_cxt->pu1_mb_y += i4_wd_y << u1_row_shift_y; + ps_tfr_cxt->pu1_mb_u += + (i4_wd_uv << u1_row_shift_uv); + ps_tfr_cxt->pu1_mb_v += i4_wd_uv << u1_row_shift_uv; + } + + /* point to A if top else A+1 */ + if(u1_left_fld ^ u1_cur_fld) + ps_left_mb--; + + /********************************************************/ + /* if top MB and MB AFF and cur MB is frame and top is */ + /* field, then one extra top edge needs to be deblocked */ + /********************************************************/ + u1_extra_top_edge = 0; + + if(u1_deb_mode & MB_DISABLE_LEFT_EDGE) + ps_left_mb = NULL; + if(u1_deb_mode & MB_DISABLE_TOP_EDGE) + ps_top_mb = NULL; + + ih264d_deblock_mb_mbaff(ps_dec, ps_tfr_cxt, + i1_cb_qp_idx_ofst, + i1_cr_qp_idx_ofst, ps_cur_mb, + i4_wd_y, i4_wd_uv, ps_top_mb, + ps_left_mb, u1_cur_fld, + u1_extra_top_edge); + } + + ps_cur_mb++; + i2_mb_x++; + + pu1_deb_y += 16; + pu1_deb_u += 8 * YUV420SP_FACTOR; + pu1_deb_v += 8; + + } + while(u2_image_wd_mb > i2_mb_x); + + pu1_deb_y += ps_tfr_cxt->u4_y_inc; + pu1_deb_u += ps_tfr_cxt->u4_uv_inc; + pu1_deb_v += ps_tfr_cxt->u4_uv_inc; + + i2_mb_x = 0; + i2_mb_y -= 2; + + u1_first_row = 0; + + } + } + + } + //Padd the Picture + //Horizontal Padd + + if(ps_pad_mgr->u1_horz_pad) + { + UWORD32 u1_field_pic_flag = ps_dec->ps_cur_slice->u1_field_pic_flag; + ps_dec->pf_pad_left_luma(ps_tfr_cxt->pu1_src_y + 4, + ps_dec->u2_frm_wd_y << u1_field_pic_flag, + ps_dec->u2_pic_ht >> u1_field_pic_flag, + PAD_LEN_Y_H); + ps_dec->pf_pad_right_luma( + ps_tfr_cxt->pu1_src_y + 4 + + (ps_dec->u2_frm_wd_in_mbs << 4), + ps_dec->u2_frm_wd_y << u1_field_pic_flag, + ps_dec->u2_pic_ht >> u1_field_pic_flag, PAD_LEN_Y_H); + + ps_dec->pf_pad_left_chroma(ps_tfr_cxt->pu1_src_u + 4, + ps_dec->u2_frm_wd_uv << u1_field_pic_flag, + (ps_dec->u2_pic_ht / 2) >> u1_field_pic_flag, + PAD_LEN_UV_H * YUV420SP_FACTOR); + ps_dec->pf_pad_right_chroma( + ps_tfr_cxt->pu1_src_u + 4 + + (ps_dec->u2_frm_wd_in_mbs << 4), + ps_dec->u2_frm_wd_uv << u1_field_pic_flag, + (ps_dec->u2_pic_ht / 2) >> u1_field_pic_flag, + PAD_LEN_UV_H * YUV420SP_FACTOR); + + } + +//Vertical Padd Top + if(ps_pad_mgr->u1_vert_pad_top) + { + ps_dec->pf_pad_top(ps_dec->ps_cur_pic->pu1_buf1 - PAD_LEN_Y_H, + ps_dec->u2_frm_wd_y, ps_dec->u2_frm_wd_y, + ps_pad_mgr->u1_pad_len_y_v); + ps_dec->pf_pad_top( + ps_dec->ps_cur_pic->pu1_buf2 + - PAD_LEN_UV_H * YUV420SP_FACTOR, + ps_dec->u2_frm_wd_uv, ps_dec->u2_frm_wd_uv, + ps_pad_mgr->u1_pad_len_cr_v); + ps_pad_mgr->u1_vert_pad_top = 0; + } + +//Vertical Padd Bottom + if(ps_pad_mgr->u1_vert_pad_bot) + { + + UWORD8 *pu1_buf; + pu1_buf = ps_dec->ps_cur_pic->pu1_buf1 - PAD_LEN_Y_H; + pu1_buf += ps_dec->u2_pic_ht * ps_dec->u2_frm_wd_y; + ps_dec->pf_pad_bottom(pu1_buf, ps_dec->u2_frm_wd_y, ps_dec->u2_frm_wd_y, + ps_pad_mgr->u1_pad_len_y_v); + pu1_buf = ps_dec->ps_cur_pic->pu1_buf2 - PAD_LEN_UV_H * YUV420SP_FACTOR; + pu1_buf += (ps_dec->u2_pic_ht >> 1) * ps_dec->u2_frm_wd_uv; + + ps_dec->pf_pad_bottom(pu1_buf, ps_dec->u2_frm_wd_uv, + ps_dec->u2_frm_wd_uv, + ps_pad_mgr->u1_pad_len_cr_v); + + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_deblock_picture_non_mbaff */ +/* */ +/* Description : This function carries out deblocking on a whole picture */ +/* without MBAFF */ +/* */ +/* Inputs : <What inputs does the function take?> */ +/* Processing : This functions calls deblock MB in the MB increment order*/ +/* */ +/* Outputs : Produces the deblocked picture */ +/* Returns : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 17 02 2005 NS Creation */ +/* 14 06 2005 SWRN clean-up */ +/*****************************************************************************/ + +void ih264d_deblock_picture_non_mbaff(dec_struct_t * ps_dec) +{ + WORD16 i2_mb_x, i2_mb_y; + deblk_mb_t *ps_cur_mb; + deblk_mb_t *ps_top_mb; + deblk_mb_t *ps_left_mb; + + UWORD8 u1_vert_pad_top = 1; + UWORD8 u1_first_row; + + UWORD8 u1_deb_mode; + WORD32 i4_wd_y, i4_wd_uv; + + UWORD8 u1_field_pic_flag = ps_dec->ps_cur_slice->u1_field_pic_flag; /*< Field u4_flag */ + UWORD8 u1_bottom_field_flag = ps_dec->ps_cur_slice->u1_bottom_field_flag; /*< Bottom field u4_flag */ + + /**************************************************/ + /* one time loads from ps_dec which will be used */ + /* frequently throughout the deblocking procedure */ + /**************************************************/ + pad_mgr_t * ps_pad_mgr = &ps_dec->s_pad_mgr; + tfr_ctxt_t s_tfr_ctxt; + tfr_ctxt_t * ps_tfr_cxt = &s_tfr_ctxt; // = &ps_dec->s_tran_addrecon; + + UWORD16 u2_image_wd_mb = ps_dec->u2_frm_wd_in_mbs; + UWORD16 u2_image_ht_mb = ps_dec->u2_frm_ht_in_mbs; + WORD8 i1_cb_qp_idx_ofst = ps_dec->ps_cur_pps->i1_chroma_qp_index_offset; + WORD8 i1_cr_qp_idx_ofst = + ps_dec->ps_cur_pps->i1_second_chroma_qp_index_offset; + + /* Set up Parameter for DMA transfer */ + ih264d_init_deblk_tfr_ctxt(ps_dec, ps_pad_mgr, ps_tfr_cxt, u2_image_wd_mb, + 0); + + /* Pic level Initialisations */ + i2_mb_y = u2_image_ht_mb; + i2_mb_x = 0; + + u1_first_row = 1; + + i4_wd_y = ps_dec->u2_frm_wd_y << u1_field_pic_flag; + i4_wd_uv = ps_dec->u2_frm_wd_uv << u1_field_pic_flag; + /* Initial filling of the buffers with deblocking data */ + + ps_tfr_cxt->pu1_mb_y = ps_tfr_cxt->pu1_src_y + 4; + ps_tfr_cxt->pu1_mb_u = ps_tfr_cxt->pu1_src_u + 4; + ps_tfr_cxt->pu1_mb_v = ps_tfr_cxt->pu1_src_v + 4; + ps_cur_mb = ps_dec->ps_deblk_pic; + + if(ps_dec->u4_app_disable_deblk_frm == 0) + { + if((ps_dec->u4_mb_level_deblk == 0) && (ps_dec->u4_num_cores != 3)) + { + + while(i2_mb_y > 0) + { + do + { + + u1_deb_mode = ps_cur_mb->u1_deblocking_mode; + if(!(u1_deb_mode & MB_DISABLE_FILTERING)) + { + if(i2_mb_x) + { + ps_left_mb = ps_cur_mb - 1; + } + else + { + ps_left_mb = NULL; + } + if(!u1_first_row) + { + ps_top_mb = ps_cur_mb - (u2_image_wd_mb); + } + else + { + ps_top_mb = NULL; + } + + if(u1_deb_mode & MB_DISABLE_LEFT_EDGE) + ps_left_mb = NULL; + if(u1_deb_mode & MB_DISABLE_TOP_EDGE) + ps_top_mb = NULL; + + ih264d_deblock_mb_nonmbaff(ps_dec, ps_tfr_cxt, + i1_cb_qp_idx_ofst, + i1_cr_qp_idx_ofst, ps_cur_mb, + i4_wd_y, i4_wd_uv, ps_top_mb, + ps_left_mb); + } + + ps_cur_mb++; + i2_mb_x++; + + ps_tfr_cxt->pu1_mb_y += 16; + ps_tfr_cxt->pu1_mb_u += 8 * YUV420SP_FACTOR; + ps_tfr_cxt->pu1_mb_v += 8; + + } + while(i2_mb_x < u2_image_wd_mb); + + ps_tfr_cxt->pu1_mb_y += ps_tfr_cxt->u4_y_inc; + ps_tfr_cxt->pu1_mb_u += ps_tfr_cxt->u4_uv_inc; + ps_tfr_cxt->pu1_mb_v += ps_tfr_cxt->u4_uv_inc; + + i2_mb_x = 0; + i2_mb_y--; + u1_first_row = 0; + + } + } + + } + + //Padd the Picture + //Horizontal Padd + if(ps_pad_mgr->u1_horz_pad) + { + UWORD32 u1_field_pic_flag = ps_dec->ps_cur_slice->u1_field_pic_flag; + ps_dec->pf_pad_left_luma(ps_tfr_cxt->pu1_src_y + 4, + ps_dec->u2_frm_wd_y << u1_field_pic_flag, + ps_dec->u2_pic_ht >> u1_field_pic_flag, + PAD_LEN_Y_H); + ps_dec->pf_pad_right_luma( + ps_tfr_cxt->pu1_src_y + 4 + + (ps_dec->u2_frm_wd_in_mbs << 4), + ps_dec->u2_frm_wd_y << u1_field_pic_flag, + ps_dec->u2_pic_ht >> u1_field_pic_flag, PAD_LEN_Y_H); + + ps_dec->pf_pad_left_chroma(ps_tfr_cxt->pu1_src_u + 4, + ps_dec->u2_frm_wd_uv << u1_field_pic_flag, + (ps_dec->u2_pic_ht / 2) >> u1_field_pic_flag, + PAD_LEN_UV_H * YUV420SP_FACTOR); + ps_dec->pf_pad_right_chroma( + ps_tfr_cxt->pu1_src_u + 4 + + (ps_dec->u2_frm_wd_in_mbs << 4), + ps_dec->u2_frm_wd_uv << u1_field_pic_flag, + (ps_dec->u2_pic_ht / 2) >> u1_field_pic_flag, + PAD_LEN_UV_H * YUV420SP_FACTOR); + + } + +//Vertical Padd Top + if(ps_pad_mgr->u1_vert_pad_top) + { + ps_dec->pf_pad_top(ps_dec->ps_cur_pic->pu1_buf1 - PAD_LEN_Y_H, + ps_dec->u2_frm_wd_y, ps_dec->u2_frm_wd_y, + ps_pad_mgr->u1_pad_len_y_v); + ps_dec->pf_pad_top( + ps_dec->ps_cur_pic->pu1_buf2 + - PAD_LEN_UV_H * YUV420SP_FACTOR, + ps_dec->u2_frm_wd_uv, ps_dec->u2_frm_wd_uv, + ps_pad_mgr->u1_pad_len_cr_v); + ps_pad_mgr->u1_vert_pad_top = 0; + } + +//Vertical Padd Bottom + if(ps_pad_mgr->u1_vert_pad_bot) + { + + UWORD8 *pu1_buf; + pu1_buf = ps_dec->ps_cur_pic->pu1_buf1 - PAD_LEN_Y_H; + pu1_buf += ps_dec->u2_pic_ht * ps_dec->u2_frm_wd_y; + ps_dec->pf_pad_bottom(pu1_buf, ps_dec->u2_frm_wd_y, ps_dec->u2_frm_wd_y, + ps_pad_mgr->u1_pad_len_y_v); + pu1_buf = ps_dec->ps_cur_pic->pu1_buf2 - PAD_LEN_UV_H * YUV420SP_FACTOR; + pu1_buf += (ps_dec->u2_pic_ht >> 1) * ps_dec->u2_frm_wd_uv; + + ps_dec->pf_pad_bottom(pu1_buf, ps_dec->u2_frm_wd_uv, + ps_dec->u2_frm_wd_uv, + ps_pad_mgr->u1_pad_len_cr_v); + + } +} + +void ih264d_deblock_picture_progressive(dec_struct_t * ps_dec) +{ + WORD16 i2_mb_x, i2_mb_y; + + deblk_mb_t *ps_cur_mb; + deblk_mb_t *ps_top_mb; + deblk_mb_t *ps_left_mb; + + UWORD8 u1_vert_pad_top = 1; + UWORD8 u1_mbs_next, u1_first_row; + UWORD8 u1_deb_mode; + WORD32 i4_wd_y, i4_wd_uv; + + + /**************************************************/ + /* one time loads from ps_dec which will be used */ + /* frequently throughout the deblocking procedure */ + /**************************************************/ + pad_mgr_t * ps_pad_mgr = &ps_dec->s_pad_mgr; + + tfr_ctxt_t s_tfr_ctxt; + tfr_ctxt_t * ps_tfr_cxt = &s_tfr_ctxt; // = &ps_dec->s_tran_addrecon; + UWORD16 u2_image_wd_mb = ps_dec->u2_frm_wd_in_mbs; + UWORD16 u2_image_ht_mb = ps_dec->u2_frm_ht_in_mbs; + UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag; + + WORD8 i1_cb_qp_idx_ofst = ps_dec->ps_cur_pps->i1_chroma_qp_index_offset; + WORD8 i1_cr_qp_idx_ofst = + ps_dec->ps_cur_pps->i1_second_chroma_qp_index_offset; + + /* Set up Parameter for deblocking */ + ih264d_init_deblk_tfr_ctxt(ps_dec, ps_pad_mgr, ps_tfr_cxt, u2_image_wd_mb, + 0); + + /* Pic level Initialisations */ + i2_mb_y = u2_image_ht_mb; + i2_mb_x = 0; + + u1_first_row = 1; + + i4_wd_y = ps_dec->u2_frm_wd_y; + i4_wd_uv = ps_dec->u2_frm_wd_uv; + /* Initial filling of the buffers with deblocking data */ + + ps_tfr_cxt->pu1_mb_y = ps_tfr_cxt->pu1_src_y + 4; + ps_tfr_cxt->pu1_mb_u = ps_tfr_cxt->pu1_src_u + 4; + ps_tfr_cxt->pu1_mb_v = ps_tfr_cxt->pu1_src_v + 4; + ps_cur_mb = ps_dec->ps_deblk_pic; + + if(ps_dec->u4_app_disable_deblk_frm == 0) + { + + if((ps_dec->u4_mb_level_deblk == 0) && (ps_dec->u4_num_cores != 3)) + { + + while(i2_mb_y > 0) + { + + u1_deb_mode = ps_cur_mb->u1_deblocking_mode; + if(!(u1_deb_mode & MB_DISABLE_FILTERING)) + { + + if(i2_mb_x) + { + ps_left_mb = ps_cur_mb - 1; + + } + else + { + ps_left_mb = NULL; + + } + if(!u1_first_row) + { + ps_top_mb = ps_cur_mb - (u2_image_wd_mb); + } + else + { + ps_top_mb = NULL; + } + + if(u1_deb_mode & MB_DISABLE_LEFT_EDGE) + ps_left_mb = NULL; + if(u1_deb_mode & MB_DISABLE_TOP_EDGE) + ps_top_mb = NULL; + + ih264d_deblock_mb_nonmbaff(ps_dec, ps_tfr_cxt, + i1_cb_qp_idx_ofst, + i1_cr_qp_idx_ofst, ps_cur_mb, + i4_wd_y, i4_wd_uv, ps_top_mb, + ps_left_mb); + } + + ps_cur_mb++; + i2_mb_x++; + u1_mbs_next = u2_image_wd_mb - i2_mb_x; + + ps_tfr_cxt->pu1_mb_y += 16; + ps_tfr_cxt->pu1_mb_u += 8 * YUV420SP_FACTOR; + ps_tfr_cxt->pu1_mb_v += 8; + + if(!u1_mbs_next) + { + ps_tfr_cxt->pu1_mb_y += ps_tfr_cxt->u4_y_inc; + ps_tfr_cxt->pu1_mb_u += ps_tfr_cxt->u4_uv_inc; + ps_tfr_cxt->pu1_mb_v += ps_tfr_cxt->u4_uv_inc; + + i2_mb_x = 0; + i2_mb_y--; + u1_first_row = 0; + } + + } + } + + } + + //Padd the Picture + //Horizontal Padd + if(ps_pad_mgr->u1_horz_pad) + { + UWORD32 u1_field_pic_flag = ps_dec->ps_cur_slice->u1_field_pic_flag; + ps_dec->pf_pad_left_luma(ps_tfr_cxt->pu1_src_y + 4, + ps_dec->u2_frm_wd_y << u1_field_pic_flag, + ps_dec->u2_pic_ht >> u1_field_pic_flag, + PAD_LEN_Y_H); + ps_dec->pf_pad_right_luma( + ps_tfr_cxt->pu1_src_y + 4 + + (ps_dec->u2_frm_wd_in_mbs << 4), + ps_dec->u2_frm_wd_y << u1_field_pic_flag, + ps_dec->u2_pic_ht >> u1_field_pic_flag, PAD_LEN_Y_H); + + ps_dec->pf_pad_left_chroma(ps_tfr_cxt->pu1_src_u + 4, + ps_dec->u2_frm_wd_uv << u1_field_pic_flag, + (ps_dec->u2_pic_ht / 2) >> u1_field_pic_flag, + PAD_LEN_UV_H * YUV420SP_FACTOR); + ps_dec->pf_pad_right_chroma( + ps_tfr_cxt->pu1_src_u + 4 + + (ps_dec->u2_frm_wd_in_mbs << 4), + ps_dec->u2_frm_wd_uv << u1_field_pic_flag, + (ps_dec->u2_pic_ht / 2) >> u1_field_pic_flag, + PAD_LEN_UV_H * YUV420SP_FACTOR); + + } + +//Vertical Padd Top + if(ps_pad_mgr->u1_vert_pad_top) + { + ps_dec->pf_pad_top(ps_dec->ps_cur_pic->pu1_buf1 - PAD_LEN_Y_H, + ps_dec->u2_frm_wd_y, ps_dec->u2_frm_wd_y, + ps_pad_mgr->u1_pad_len_y_v); + ps_dec->pf_pad_top( + ps_dec->ps_cur_pic->pu1_buf2 + - PAD_LEN_UV_H * YUV420SP_FACTOR, + ps_dec->u2_frm_wd_uv, ps_dec->u2_frm_wd_uv, + ps_pad_mgr->u1_pad_len_cr_v); + + } + +//Vertical Padd Bottom + if(ps_pad_mgr->u1_vert_pad_bot) + { + + UWORD8 *pu1_buf; + pu1_buf = ps_dec->ps_cur_pic->pu1_buf1 - PAD_LEN_Y_H; + pu1_buf += ps_dec->u2_pic_ht * ps_dec->u2_frm_wd_y; + ps_dec->pf_pad_bottom(pu1_buf, ps_dec->u2_frm_wd_y, ps_dec->u2_frm_wd_y, + ps_pad_mgr->u1_pad_len_y_v); + pu1_buf = ps_dec->ps_cur_pic->pu1_buf2 - PAD_LEN_UV_H * YUV420SP_FACTOR; + pu1_buf += (ps_dec->u2_pic_ht >> 1) * ps_dec->u2_frm_wd_uv; + + ps_dec->pf_pad_bottom(pu1_buf, ps_dec->u2_frm_wd_uv, + ps_dec->u2_frm_wd_uv, + ps_pad_mgr->u1_pad_len_cr_v); + + } +} + +/*! + ************************************************************************** + * \if Function name : ih264d_set_deblocking_parameters \endif + * + * \brief + * Sets the deblocking parameters of the macroblock + * + * \return + * 0 on Success and Error code otherwise + * + * \note + * Given the neighbour availablity information, and the deblocking + * parameters of the slice,this function will set the deblocking + * mode of the macroblock. + ************************************************************************** + */ + +WORD8 ih264d_set_deblocking_parameters(deblk_mb_t * ps_cur_mb, + dec_slice_params_t * ps_slice, + UWORD8 u1_mb_ngbr_availablity, + UWORD8 u1_mb_field_decoding_flag) +{ + /*------------------------------------------------------------------*/ + /* Set the deblocking parameters */ + /*------------------------------------------------------------------*/ + ps_cur_mb->i1_slice_alpha_c0_offset = ps_slice->i1_slice_alpha_c0_offset; + ps_cur_mb->i1_slice_beta_offset = ps_slice->i1_slice_beta_offset; + ps_cur_mb->u1_mb_type = (u1_mb_field_decoding_flag << 7); + + switch(ps_slice->u1_disable_dblk_filter_idc) + { + case DBLK_ENABLED: + ps_cur_mb->u1_deblocking_mode = MB_ENABLE_FILTERING; + break; + case DBLK_DISABLED: + ps_cur_mb->u1_deblocking_mode = MB_DISABLE_FILTERING; + break; + case SLICE_BOUNDARY_DBLK_DISABLED: + { + ps_cur_mb->u1_deblocking_mode = MB_ENABLE_FILTERING; + if(!(u1_mb_ngbr_availablity & LEFT_MB_AVAILABLE_MASK)) + ps_cur_mb->u1_deblocking_mode |= MB_DISABLE_LEFT_EDGE; + if(!(u1_mb_ngbr_availablity & TOP_MB_AVAILABLE_MASK)) + ps_cur_mb->u1_deblocking_mode |= MB_DISABLE_TOP_EDGE; + break; + } + } + + return (0); +} + +void ih264d_copy_intra_pred_line(dec_struct_t *ps_dec, + dec_mb_info_t *ps_cur_mb_info, + UWORD32 nmb_index) +{ + UWORD8 *pu1_mb_last_row, u1_mb_field_decoding_flag; + UWORD32 u4_recWidth, u4_recwidth_cr; + + u1_mb_field_decoding_flag = ps_cur_mb_info->u1_mb_field_decodingflag; + + u4_recWidth = ps_dec->u2_frm_wd_y << u1_mb_field_decoding_flag; + u4_recwidth_cr = ps_dec->u2_frm_wd_uv << u1_mb_field_decoding_flag; + + pu1_mb_last_row = ps_dec->s_tran_addrecon.pu1_dest_y + + (u4_recWidth * (MB_SIZE - 1)); + pu1_mb_last_row += MB_SIZE * nmb_index; + MEMCPY_16BYTES(ps_dec->pu1_cur_y_intra_pred_line, pu1_mb_last_row); + + pu1_mb_last_row = ps_dec->s_tran_addrecon.pu1_dest_u + + (u4_recwidth_cr * (BLK8x8SIZE - 1)); + pu1_mb_last_row += BLK8x8SIZE * nmb_index * YUV420SP_FACTOR; + + MEMCPY_16BYTES(ps_dec->pu1_cur_u_intra_pred_line, pu1_mb_last_row); + + ps_dec->pu1_cur_y_intra_pred_line = ps_dec->pu1_cur_y_intra_pred_line_base + + (MB_SIZE * (ps_cur_mb_info->u2_mbx + 1)); + ps_dec->pu1_cur_u_intra_pred_line = ps_dec->pu1_cur_u_intra_pred_line_base + + (BLK8x8SIZE * (ps_cur_mb_info->u2_mbx + 1)) + * YUV420SP_FACTOR; + ps_dec->pu1_cur_v_intra_pred_line = ps_dec->pu1_cur_v_intra_pred_line_base + + (BLK8x8SIZE * (ps_cur_mb_info->u2_mbx + 1)); + + if(ps_cur_mb_info->u2_mbx == (ps_dec->u2_frm_wd_in_mbs - 1)) + { + UWORD8* pu1_temp; + + ps_dec->pu1_cur_y_intra_pred_line = + ps_dec->pu1_cur_y_intra_pred_line_base; + ps_dec->pu1_cur_u_intra_pred_line = + ps_dec->pu1_cur_u_intra_pred_line_base; + ps_dec->pu1_cur_v_intra_pred_line = + ps_dec->pu1_cur_v_intra_pred_line_base; + + /*swap current and previous rows*/ + pu1_temp = ps_dec->pu1_cur_y_intra_pred_line; + ps_dec->pu1_cur_y_intra_pred_line = ps_dec->pu1_prev_y_intra_pred_line; + ps_dec->pu1_prev_y_intra_pred_line = pu1_temp; + + pu1_temp = ps_dec->pu1_cur_u_intra_pred_line; + ps_dec->pu1_cur_u_intra_pred_line = ps_dec->pu1_prev_u_intra_pred_line; + ps_dec->pu1_prev_u_intra_pred_line = pu1_temp; + + pu1_temp = ps_dec->pu1_cur_v_intra_pred_line; + ps_dec->pu1_cur_v_intra_pred_line = ps_dec->pu1_prev_v_intra_pred_line; + ps_dec->pu1_prev_v_intra_pred_line = pu1_temp; + + ps_dec->pu1_cur_y_intra_pred_line_base = + ps_dec->pu1_cur_y_intra_pred_line; + ps_dec->pu1_cur_u_intra_pred_line_base = + ps_dec->pu1_cur_u_intra_pred_line; + ps_dec->pu1_cur_v_intra_pred_line_base = + ps_dec->pu1_cur_v_intra_pred_line; + + + + + + } + +} + +void ih264d_deblock_mb_level(dec_struct_t *ps_dec, + dec_mb_info_t *ps_cur_mb_info, + UWORD32 nmb_index) +{ + UWORD8 u1_deb_mode; + deblk_mb_t *ps_cur_mb, *ps_left_mb, *ps_top_mb; + UWORD16 u2_image_wd_mb = ps_dec->u2_frm_wd_in_mbs; + UWORD16 u2_image_ht_mb = ps_dec->u2_frm_ht_in_mbs; + WORD8 i1_cb_qp_idx_ofst = ps_dec->ps_cur_pps->i1_chroma_qp_index_offset; + WORD8 i1_cr_qp_idx_ofst = + ps_dec->ps_cur_pps->i1_second_chroma_qp_index_offset; + WORD32 i4_wd_y, i4_wd_uv; + tfr_ctxt_t * ps_tfr_cxt = &ps_dec->s_tran_addrecon; + WORD16 i2_mb_y, i2_mb_x; + UWORD8 u1_mb_field_decoding_flag = ps_cur_mb_info->u1_mb_field_decodingflag; + deblk_mb_t *ps_deblk_cur_mb; + + /*Copy the last row of every MB ,to be used for intra prediction f next row*/ + { + UWORD8 *pu1_mb_last_row, u1_mb_field_decoding_flag; + UWORD32 u4_recWidth, u4_recwidth_cr; + + u1_mb_field_decoding_flag = ps_cur_mb_info->u1_mb_field_decodingflag; + + u4_recWidth = ps_dec->u2_frm_wd_y << u1_mb_field_decoding_flag; + u4_recwidth_cr = ps_dec->u2_frm_wd_uv << u1_mb_field_decoding_flag; + + pu1_mb_last_row = ps_dec->s_tran_addrecon.pu1_dest_y + + (u4_recWidth * (MB_SIZE - 1)); + pu1_mb_last_row += MB_SIZE * nmb_index; + MEMCPY_16BYTES(ps_dec->pu1_cur_y_intra_pred_line, pu1_mb_last_row); + + pu1_mb_last_row = ps_dec->s_tran_addrecon.pu1_dest_u + + (u4_recwidth_cr * (BLK8x8SIZE - 1)); + pu1_mb_last_row += BLK8x8SIZE * nmb_index * YUV420SP_FACTOR; + + MEMCPY_16BYTES(ps_dec->pu1_cur_u_intra_pred_line, pu1_mb_last_row); + + ps_dec->pu1_cur_y_intra_pred_line = + ps_dec->pu1_cur_y_intra_pred_line_base + + (MB_SIZE + * (ps_cur_mb_info->u2_mbx + + 1)); + ps_dec->pu1_cur_u_intra_pred_line = + ps_dec->pu1_cur_u_intra_pred_line_base + + (BLK8x8SIZE + * (ps_cur_mb_info->u2_mbx + + 1)) + * YUV420SP_FACTOR; + ps_dec->pu1_cur_v_intra_pred_line = + ps_dec->pu1_cur_v_intra_pred_line_base + + (BLK8x8SIZE + * (ps_cur_mb_info->u2_mbx + + 1)); + } + + i2_mb_y = ps_cur_mb_info->u2_mby; + i4_wd_y = ps_dec->u2_frm_wd_y << u1_mb_field_decoding_flag; + i4_wd_uv = ps_dec->u2_frm_wd_uv << u1_mb_field_decoding_flag; + + if(ps_cur_mb_info->u2_mbx != 0) + { + /*Deblock the previous MB*/ + deblk_mb_t *ps_deblk_cur_mb; + + if(ps_dec->u1_separate_parse == 1) + { + ps_deblk_cur_mb = ps_dec->ps_deblk_mbn_dec_thrd + nmb_index - 1; + + } + else + { + + if(nmb_index == 0) + /*if first mb in Nmb ,pick up the context from previous Nmb data*/ + ps_deblk_cur_mb = ps_dec->ps_deblk_mbn_prev + + ps_dec->u4_num_mbs_prev_nmb - 1; + else + ps_deblk_cur_mb = ps_dec->ps_deblk_mbn + nmb_index - 1; + } + + ps_cur_mb = ps_deblk_cur_mb; + + u1_deb_mode = ps_cur_mb->u1_deblocking_mode; + + i2_mb_x = ps_cur_mb_info->u2_mbx - 1; + + if(ps_dec->u4_app_disable_deblk_frm == 1) + u1_deb_mode = MB_DISABLE_FILTERING; + if(!(u1_deb_mode & MB_DISABLE_FILTERING)) + { + + if(i2_mb_x) + { + ps_left_mb = ps_cur_mb - 1; + + } + else + { + ps_left_mb = NULL; + + } + if(i2_mb_y) + { + ps_top_mb = ps_cur_mb - (u2_image_wd_mb); + } + else + { + ps_top_mb = NULL; + } + + if(u1_deb_mode & MB_DISABLE_LEFT_EDGE) + ps_left_mb = NULL; + if(u1_deb_mode & MB_DISABLE_TOP_EDGE) + ps_top_mb = NULL; + + ih264d_deblock_mb_nonmbaff(ps_dec, ps_tfr_cxt, i1_cb_qp_idx_ofst, + i1_cr_qp_idx_ofst, ps_cur_mb, i4_wd_y, + i4_wd_uv, ps_top_mb, ps_left_mb); + } + + ps_tfr_cxt->pu1_mb_y += MB_SIZE; + ps_tfr_cxt->pu1_mb_u += (MB_SIZE >> 1) * YUV420SP_FACTOR; + ps_tfr_cxt->pu1_mb_v += (MB_SIZE >> 1); + } + + if(ps_cur_mb_info->u2_mbx == (ps_dec->u2_frm_wd_in_mbs - 1)) + { + /*Deblock the previous MB*/ + deblk_mb_t *ps_deblk_cur_mb; + UWORD8 *pu1_temp; + + if(ps_dec->u1_separate_parse == 1) + ps_deblk_cur_mb = ps_dec->ps_deblk_mbn_dec_thrd + nmb_index; + else + ps_deblk_cur_mb = ps_dec->ps_deblk_mbn + nmb_index; + + i2_mb_x = ps_cur_mb_info->u2_mbx; + + ps_cur_mb = ps_deblk_cur_mb; + u1_deb_mode = ps_cur_mb->u1_deblocking_mode; + + if(ps_dec->u4_app_disable_deblk_frm == 1) + u1_deb_mode = MB_DISABLE_FILTERING; + + if(!(u1_deb_mode & MB_DISABLE_FILTERING)) + { + + if(i2_mb_x) + { + ps_left_mb = ps_cur_mb - 1; + + } + else + { + ps_left_mb = NULL; + + } + if(i2_mb_y) + { + ps_top_mb = ps_cur_mb - (u2_image_wd_mb); + } + else + { + ps_top_mb = NULL; + } + + if(u1_deb_mode & MB_DISABLE_LEFT_EDGE) + ps_left_mb = NULL; + if(u1_deb_mode & MB_DISABLE_TOP_EDGE) + ps_top_mb = NULL; + + ih264d_deblock_mb_nonmbaff(ps_dec, ps_tfr_cxt, i1_cb_qp_idx_ofst, + i1_cr_qp_idx_ofst, ps_cur_mb, i4_wd_y, + i4_wd_uv, ps_top_mb, ps_left_mb); + } + + ps_dec->pu1_cur_y_intra_pred_line = + ps_dec->pu1_cur_y_intra_pred_line_base; + ps_dec->pu1_cur_u_intra_pred_line = + ps_dec->pu1_cur_u_intra_pred_line_base; + ps_dec->pu1_cur_v_intra_pred_line = + ps_dec->pu1_cur_v_intra_pred_line_base; + + /*swap current and previous rows*/ + pu1_temp = ps_dec->pu1_cur_y_intra_pred_line; + ps_dec->pu1_cur_y_intra_pred_line = ps_dec->pu1_prev_y_intra_pred_line; + ps_dec->pu1_prev_y_intra_pred_line = pu1_temp; + + pu1_temp = ps_dec->pu1_cur_u_intra_pred_line; + ps_dec->pu1_cur_u_intra_pred_line = ps_dec->pu1_prev_u_intra_pred_line; + ps_dec->pu1_prev_u_intra_pred_line = pu1_temp; + + pu1_temp = ps_dec->pu1_cur_v_intra_pred_line; + ps_dec->pu1_cur_v_intra_pred_line = ps_dec->pu1_prev_v_intra_pred_line; + ps_dec->pu1_prev_v_intra_pred_line = pu1_temp; + + ps_dec->pu1_cur_y_intra_pred_line_base = + ps_dec->pu1_cur_y_intra_pred_line; + ps_dec->pu1_cur_u_intra_pred_line_base = + ps_dec->pu1_cur_u_intra_pred_line; + ps_dec->pu1_cur_v_intra_pred_line_base = + ps_dec->pu1_cur_v_intra_pred_line; + + } + +} + +void ih264d_filter_boundary_left_mbaff(dec_struct_t *ps_dec, + tfr_ctxt_t * ps_tfr_cxt, + WORD8 i1_cb_qp_idx_ofst, + WORD8 i1_cr_qp_idx_ofst, + deblk_mb_t * ps_cur_mb, + UWORD16 i4_strd_y, + UWORD16 i4_strd_uv, + deblk_mb_t * ps_left_mb, /* Neighbouring MB parameters */ + UWORD32 pu4_bs_tab[], /* pointer to the BsTable array */ + UWORD8 u1_cur_fld) +{ + UWORD8 *pu1_y, *pu1_u, *pu1_v; + UWORD8 uc_tmp, qp_avg, uc_QPav_Y; + WORD32 alpha_u = 0, beta_u = 0, alpha_v = 0, beta_v = 0; + WORD32 alpha_y = 0, beta_y = 0; + + WORD32 idx_b_u, idx_a_u, idx_b_v, idx_a_v; + WORD32 idx_b_y, idx_a_y; + + UWORD32 u4_bs_val; + + UWORD8 *pu1_cliptab_u, *pu1_cliptab_v, *pu1_cliptab_y; + + UWORD8 u1_double_cl = !ps_cur_mb->u1_single_call; + WORD32 ofst_a = ps_cur_mb->i1_slice_alpha_c0_offset; + WORD32 ofst_b = ps_cur_mb->i1_slice_beta_offset; + + PROFILE_DISABLE_DEBLK() + + pu1_y = ps_tfr_cxt->pu1_mb_y; + pu1_u = ps_tfr_cxt->pu1_mb_u; + pu1_v = ps_tfr_cxt->pu1_mb_v; + + /* LUMA values */ + /* Deblock rounding change */ + uc_tmp = (UWORD8)((ps_left_mb->u1_mb_qp + ps_cur_mb->u1_mb_qp + 1) >> 1); + uc_QPav_Y = uc_tmp; + idx_a_y = uc_QPav_Y + ofst_a; + alpha_y = gau1_ih264d_alpha_table[12 + idx_a_y]; + idx_b_y = uc_QPav_Y + ofst_b; + beta_y = gau1_ih264d_beta_table[12 + idx_b_y]; + + /* Chroma cb values */ + { + UWORD8 u1_mb_qp1, u1_mb_qp2; + u1_mb_qp1 = (ps_left_mb->u1_mb_qp + i1_cb_qp_idx_ofst); + u1_mb_qp2 = (ps_cur_mb->u1_mb_qp + i1_cb_qp_idx_ofst); + qp_avg = (UWORD8)((gau1_ih264d_qp_scale_cr[12 + u1_mb_qp1] + + gau1_ih264d_qp_scale_cr[12 + u1_mb_qp2] + 1) >> 1); + } + idx_a_u = qp_avg + ofst_a; + alpha_u = gau1_ih264d_alpha_table[12 + idx_a_u]; + idx_b_u = qp_avg + ofst_b; + beta_u = gau1_ih264d_beta_table[12 + idx_b_u]; + + /* Chroma cr values */ + { + UWORD8 u1_mb_qp1, u1_mb_qp2; + u1_mb_qp1 = (ps_left_mb->u1_mb_qp + i1_cr_qp_idx_ofst); + u1_mb_qp2 = (ps_cur_mb->u1_mb_qp + i1_cr_qp_idx_ofst); + qp_avg = (UWORD8)((gau1_ih264d_qp_scale_cr[12 + u1_mb_qp1] + + gau1_ih264d_qp_scale_cr[12 + u1_mb_qp2] + 1) >> 1); + } + idx_a_v = qp_avg + ofst_a; + alpha_v = gau1_ih264d_alpha_table[12 + idx_a_v]; + idx_b_v = qp_avg + ofst_b; + beta_v = gau1_ih264d_beta_table[12 + idx_b_v]; + + if(u1_double_cl == 0) + { + u4_bs_val = pu4_bs_tab[4]; + + if(0x04040404 == u4_bs_val) + { + ps_dec->pf_deblk_luma_vert_bs4(pu1_y, i4_strd_y, alpha_y, beta_y); + ps_dec->pf_deblk_chroma_vert_bs4(pu1_u, i4_strd_uv, alpha_u, + beta_u, alpha_v, beta_v); + + } + else + { + if(u4_bs_val) + { + + pu1_cliptab_y = (UWORD8 *)&gau1_ih264d_clip_table[12 + + idx_a_y]; + pu1_cliptab_u = (UWORD8 *)&gau1_ih264d_clip_table[12 + + idx_a_u]; + pu1_cliptab_v = (UWORD8 *)&gau1_ih264d_clip_table[12 + + idx_a_v]; + + ps_dec->pf_deblk_luma_vert_bslt4(pu1_y, i4_strd_y, alpha_y, + beta_y, u4_bs_val, + pu1_cliptab_y); + ps_dec->pf_deblk_chroma_vert_bslt4(pu1_u, i4_strd_uv, alpha_u, + beta_u, alpha_v, beta_v, + u4_bs_val, pu1_cliptab_u, + pu1_cliptab_v); + + } + } + + } + else + { + + i4_strd_y <<= (!u1_cur_fld); + u4_bs_val = pu4_bs_tab[4]; + i4_strd_uv <<= (!u1_cur_fld); + + if(0x04040404 == u4_bs_val) + { + ps_dec->pf_deblk_luma_vert_bs4_mbaff(pu1_y, i4_strd_y, alpha_y, + beta_y); + ps_dec->pf_deblk_chroma_vert_bs4_mbaff(pu1_u, i4_strd_uv, alpha_u, + beta_u, alpha_v, beta_v); + } + else + { + if(u4_bs_val) + { + + pu1_cliptab_y = (UWORD8 *)&gau1_ih264d_clip_table[12 + + idx_a_y]; + pu1_cliptab_u = (UWORD8 *)&gau1_ih264d_clip_table[12 + + idx_a_u]; + pu1_cliptab_v = (UWORD8 *)&gau1_ih264d_clip_table[12 + + idx_a_v]; + ps_dec->pf_deblk_luma_vert_bslt4_mbaff(pu1_y, i4_strd_y, + alpha_y, beta_y, + u4_bs_val, + pu1_cliptab_y); + ps_dec->pf_deblk_chroma_vert_bslt4_mbaff(pu1_u, i4_strd_uv, + alpha_u, beta_u, + alpha_v, beta_v, + u4_bs_val, + pu1_cliptab_u, + pu1_cliptab_v); + + } + } + + { + + UWORD16 u2_shift = (i4_strd_y >> 1) << (u1_cur_fld ? 4 : 0); + pu1_y += u2_shift; + u2_shift = (i4_strd_uv >> 1) << (u1_cur_fld ? 3 : 0); + pu1_u += u2_shift; + pu1_v += u2_shift; + } + + uc_tmp = (((ps_left_mb + 1)->u1_mb_qp + ps_cur_mb->u1_mb_qp + 1) >> 1); + uc_QPav_Y = uc_tmp; + idx_a_y = uc_QPav_Y + ofst_a; + alpha_y = gau1_ih264d_alpha_table[12 + idx_a_y]; + idx_b_y = uc_QPav_Y + ofst_b; + beta_y = gau1_ih264d_beta_table[12 + idx_b_y]; + u4_bs_val = pu4_bs_tab[9]; + + { + UWORD8 u1_mb_qp1, u1_mb_qp2; + u1_mb_qp1 = ((ps_left_mb + 1)->u1_mb_qp + i1_cb_qp_idx_ofst); + u1_mb_qp2 = (ps_cur_mb->u1_mb_qp + i1_cb_qp_idx_ofst); + qp_avg = (UWORD8)((gau1_ih264d_qp_scale_cr[12 + u1_mb_qp1] + + gau1_ih264d_qp_scale_cr[12 + u1_mb_qp2] + 1) >> 1); + } + idx_a_u = qp_avg + ofst_a; + alpha_u = gau1_ih264d_alpha_table[12 + idx_a_u]; + idx_b_u = qp_avg + ofst_b; + beta_u = gau1_ih264d_beta_table[12 + idx_b_u]; + u4_bs_val = pu4_bs_tab[9]; + { + UWORD8 u1_mb_qp1, u1_mb_qp2; + u1_mb_qp1 = ((ps_left_mb + 1)->u1_mb_qp + i1_cr_qp_idx_ofst); + u1_mb_qp2 = (ps_cur_mb->u1_mb_qp + i1_cr_qp_idx_ofst); + qp_avg = (UWORD8)((gau1_ih264d_qp_scale_cr[12 + u1_mb_qp1] + + gau1_ih264d_qp_scale_cr[12 + u1_mb_qp2] + 1) >> 1); + } + idx_a_v = qp_avg + ofst_a; + alpha_v = gau1_ih264d_alpha_table[12 + idx_a_v]; + idx_b_v = qp_avg + ofst_b; + beta_v = gau1_ih264d_beta_table[12 + idx_b_v]; + + if(0x04040404 == u4_bs_val) + { + ps_dec->pf_deblk_luma_vert_bs4_mbaff(pu1_y, i4_strd_y, alpha_y, + beta_y); + ps_dec->pf_deblk_chroma_vert_bs4_mbaff(pu1_u, i4_strd_uv, alpha_u, + beta_u, alpha_v, beta_v); + + } + else + { + if(u4_bs_val) + { + + pu1_cliptab_y = (UWORD8 *)&gau1_ih264d_clip_table[12 + + idx_a_y]; + pu1_cliptab_u = (UWORD8 *)&gau1_ih264d_clip_table[12 + + idx_a_u]; + pu1_cliptab_v = (UWORD8 *)&gau1_ih264d_clip_table[12 + + idx_a_v]; + + ps_dec->pf_deblk_luma_vert_bslt4_mbaff(pu1_y, i4_strd_y, + alpha_y, beta_y, + u4_bs_val, + pu1_cliptab_y); + ps_dec->pf_deblk_chroma_vert_bslt4_mbaff(pu1_u, i4_strd_uv, + alpha_u, beta_u, + alpha_v, beta_v, + u4_bs_val, + pu1_cliptab_u, + pu1_cliptab_v); + + } + } + } + +} + +void ih264d_filter_boundary_topmbaff(dec_struct_t *ps_dec, + tfr_ctxt_t * ps_tfr_cxt, + WORD8 i1_cb_qp_idx_ofst, + WORD8 i1_cr_qp_idx_ofst, + deblk_mb_t * ps_cur_mb, + UWORD16 i4_strd_y, + UWORD16 i4_strd_uv, + deblk_mb_t * ps_top_mb, + UWORD32 u4_bs) +{ + UWORD8 *pu1_y, *pu1_u; + WORD32 alpha_u = 0, beta_u = 0, alpha_v = 0, beta_v = 0; + WORD32 alpha_y = 0, beta_y = 0; + WORD32 qp_avg; + WORD32 uc_QPav_Y; + WORD32 idx_b_u, idx_a_u, idx_b_v, idx_a_v; + WORD32 idx_b_y, idx_a_y; + UWORD16 uc_tmp; + + UWORD8 *pu1_cliptab_u, *pu1_cliptab_v, *pu1_cliptab_y; + WORD32 ofst_a = ps_cur_mb->i1_slice_alpha_c0_offset; + WORD32 ofst_b = ps_cur_mb->i1_slice_beta_offset; + + /* LUMA values */ + /* Deblock rounding change */ + uc_tmp = ((ps_top_mb->u1_mb_qp + ps_cur_mb->u1_mb_qp + 1) >> 1); + uc_QPav_Y = (UWORD8)uc_tmp; + idx_a_y = uc_QPav_Y + ofst_a; + alpha_y = gau1_ih264d_alpha_table[12 + idx_a_y]; + idx_b_y = uc_QPav_Y + ofst_b; + beta_y = gau1_ih264d_beta_table[12 + idx_b_y]; + pu1_y = ps_tfr_cxt->pu1_mb_y; + + /* CHROMA cb values */ + { + UWORD8 u1_mb_qp1, u1_mb_qp2; + u1_mb_qp1 = (ps_top_mb->u1_mb_qp + i1_cb_qp_idx_ofst); + u1_mb_qp2 = (ps_cur_mb->u1_mb_qp + i1_cb_qp_idx_ofst); + qp_avg = (UWORD8)((gau1_ih264d_qp_scale_cr[12 + u1_mb_qp1] + + gau1_ih264d_qp_scale_cr[12 + u1_mb_qp2] + 1) >> 1); + } + + idx_a_u = qp_avg + ofst_a; + alpha_u = gau1_ih264d_alpha_table[12 + idx_a_u]; + idx_b_u = qp_avg + ofst_b; + beta_u = gau1_ih264d_beta_table[12 + idx_b_u]; + /* CHROMA cr values */ + { + UWORD8 u1_mb_qp1, u1_mb_qp2; + u1_mb_qp1 = (ps_top_mb->u1_mb_qp + i1_cr_qp_idx_ofst); + u1_mb_qp2 = (ps_cur_mb->u1_mb_qp + i1_cr_qp_idx_ofst); + qp_avg = (UWORD8)((gau1_ih264d_qp_scale_cr[12 + u1_mb_qp1] + + gau1_ih264d_qp_scale_cr[12 + u1_mb_qp2] + 1) >> 1); + } + + idx_a_v = qp_avg + ofst_a; + alpha_v = gau1_ih264d_alpha_table[12 + idx_a_v]; + idx_b_v = qp_avg + ofst_b; + beta_v = gau1_ih264d_beta_table[12 + idx_b_v]; + pu1_u = ps_tfr_cxt->pu1_mb_u; + + if(u4_bs == 0x04040404) + { + /* Code specific to the assembly module */ + ps_dec->pf_deblk_luma_horz_bs4(pu1_y, i4_strd_y, alpha_y, beta_y); + ps_dec->pf_deblk_chroma_horz_bs4(pu1_u, i4_strd_uv, alpha_u, beta_u, + alpha_v, beta_v); + + } + else + { + if(u4_bs) + { + + pu1_cliptab_y = (UWORD8 *)&gau1_ih264d_clip_table[12 + idx_a_y]; + pu1_cliptab_u = + (UWORD8 *)&gau1_ih264d_clip_table[12 + idx_a_u]; + pu1_cliptab_v = + (UWORD8 *)&gau1_ih264d_clip_table[12 + idx_a_v]; + + ps_dec->pf_deblk_luma_horz_bslt4(pu1_y, i4_strd_y, alpha_y, beta_y, + u4_bs, pu1_cliptab_y); + ps_dec->pf_deblk_chroma_horz_bslt4(pu1_u, i4_strd_uv, alpha_u, + beta_u, alpha_v, beta_v, + u4_bs, pu1_cliptab_u, + pu1_cliptab_v); + + } + } + +} + +void ih264d_deblock_mb_mbaff(dec_struct_t *ps_dec, + tfr_ctxt_t * ps_tfr_cxt, + WORD8 i1_cb_qp_idx_ofst, + WORD8 i1_cr_qp_idx_ofst, + deblk_mb_t * ps_cur_mb, + WORD32 i4_strd_y, + WORD32 i4_strd_uv, + deblk_mb_t * ps_top_mb, + deblk_mb_t * ps_left_mb, + UWORD8 u1_cur_fld, + UWORD8 u1_extra_top_edge) +{ + UWORD8 *pu1_y, *pu1_u; + UWORD32 u4_bs; +// WORD8 edge; + WORD32 alpha, beta, alpha_u, beta_u, alpha_v, beta_v; + + UWORD8 *pu1_cliptab_u; + UWORD8 *pu1_cliptab_v; + UWORD8 *pu1_cliptab_y; + + UWORD32 * pu4_bs_tab = ps_cur_mb->u4_bs_table; + WORD32 idx_a_y, idx_a_u, idx_a_v; + /* Return from here to switch off deblocking */ + PROFILE_DISABLE_DEBLK() + + i4_strd_y <<= u1_cur_fld; + i4_strd_uv <<= u1_cur_fld; + /*--------------------------------------------------------------------*/ + /* Filter wrt Left edge */ + /* except */ + /* - Left Egde is Picture Boundary */ + /* - Left Egde is part of Slice Boundary and Deblocking */ + /* parameters of slice disable Filtering of Slice Boundary Edges*/ + /*--------------------------------------------------------------------*/ + if(ps_left_mb) + ih264d_filter_boundary_left_mbaff(ps_dec, ps_tfr_cxt, i1_cb_qp_idx_ofst, + i1_cr_qp_idx_ofst, ps_cur_mb, + i4_strd_y, i4_strd_uv, ps_left_mb, + pu4_bs_tab, u1_cur_fld); + + /*--------------------------------------------------------------------*/ + /* Filter wrt Other Vertical Edges */ + /*--------------------------------------------------------------------*/ + { + WORD32 ofst_a, ofst_b, idx_b_y, idx_b_u, + idx_b_v; + WORD32 qp_avg, qp_avg_u, qp_avg_v; + ofst_a = ps_cur_mb->i1_slice_alpha_c0_offset; + ofst_b = ps_cur_mb->i1_slice_beta_offset; + qp_avg = ps_cur_mb->u1_mb_qp; + idx_a_y = qp_avg + ofst_a; + alpha = gau1_ih264d_alpha_table[12 + idx_a_y]; + idx_b_y = qp_avg + ofst_b; + beta = gau1_ih264d_beta_table[12 + idx_b_y]; + + /* CHROMA Cb values */ + qp_avg_u = (qp_avg + i1_cb_qp_idx_ofst); + qp_avg_u = gau1_ih264d_qp_scale_cr[12 + qp_avg_u]; + idx_a_u = qp_avg_u + ofst_a; + alpha_u = gau1_ih264d_alpha_table[12 + idx_a_u]; + idx_b_u = qp_avg_u + ofst_b; + beta_u = gau1_ih264d_beta_table[12 + idx_b_u]; + /* CHROMA Cr values */ + qp_avg_v = (qp_avg + i1_cr_qp_idx_ofst); + qp_avg_v = gau1_ih264d_qp_scale_cr[12 + qp_avg_v]; + idx_a_v = qp_avg_v + ofst_a; + alpha_v = gau1_ih264d_alpha_table[12 + idx_a_v]; + idx_b_v = qp_avg_v + ofst_b; + beta_v = gau1_ih264d_beta_table[12 + idx_b_v]; + } + + //STARTL4_FILTER_VERT; + + pu1_cliptab_y = (UWORD8 *)&gau1_ih264d_clip_table[12 + idx_a_y]; //this for Luma + pu1_cliptab_u = (UWORD8 *)&gau1_ih264d_clip_table[12 + idx_a_u]; //this for chroma + pu1_cliptab_v = (UWORD8 *)&gau1_ih264d_clip_table[12 + idx_a_v]; //this for chroma + + //edge=1 + + + u4_bs = pu4_bs_tab[5]; + pu1_y = ps_tfr_cxt->pu1_mb_y; + pu1_u = ps_tfr_cxt->pu1_mb_u; + + if(u4_bs) + { + + ps_dec->pf_deblk_luma_vert_bslt4(pu1_y + 4, i4_strd_y, alpha, beta, + u4_bs, pu1_cliptab_y); + + } + //edge=2 + + u4_bs = pu4_bs_tab[6]; + if(u4_bs) + { + + ps_dec->pf_deblk_luma_vert_bslt4(pu1_y + 8, i4_strd_y, alpha, beta, + u4_bs, pu1_cliptab_y); + ps_dec->pf_deblk_chroma_vert_bslt4(pu1_u + 4 * YUV420SP_FACTOR, + i4_strd_uv, alpha_u, beta_u, + alpha_v, beta_v, u4_bs, + pu1_cliptab_u, pu1_cliptab_v); + } + //edge=3 + + u4_bs = pu4_bs_tab[7]; + if(u4_bs) + { + + ps_dec->pf_deblk_luma_vert_bslt4(pu1_y + 12, i4_strd_y, alpha, beta, + u4_bs, pu1_cliptab_y); + + } + + /*--------------------------------------------------------------------*/ + /* Filter wrt Top edge */ + /* except */ + /* - Top Egde is Picture Boundary */ + /* - Top Egde is part of Slice Boundary and Deblocking */ + /* parameters of slice disable Filtering of Slice Boundary Edges*/ + /*--------------------------------------------------------------------*/ + if(ps_top_mb) + { + /** if top MB and MB AFF and cur MB is frame and top is field then */ + /* one extra top edge needs to be deblocked */ + if(u1_extra_top_edge) + { + ih264d_filter_boundary_topmbaff(ps_dec, ps_tfr_cxt, + i1_cb_qp_idx_ofst, + i1_cr_qp_idx_ofst, ps_cur_mb, + (UWORD16)(i4_strd_y << 1), + (UWORD16)(i4_strd_uv << 1), + ps_top_mb - 1, pu4_bs_tab[8]); + ps_tfr_cxt->pu1_mb_y += i4_strd_y; + ps_tfr_cxt->pu1_mb_u += i4_strd_uv; + ps_tfr_cxt->pu1_mb_v += i4_strd_uv; + + ih264d_filter_boundary_topmbaff(ps_dec, ps_tfr_cxt, + i1_cb_qp_idx_ofst, + i1_cr_qp_idx_ofst, ps_cur_mb, + (UWORD16)(i4_strd_y << 1), + (UWORD16)(i4_strd_uv << 1), + ps_top_mb, pu4_bs_tab[0]); + ps_tfr_cxt->pu1_mb_y -= i4_strd_y; + ps_tfr_cxt->pu1_mb_u -= i4_strd_uv; + ps_tfr_cxt->pu1_mb_v -= i4_strd_uv; + } + else + { + ih264d_filter_boundary_topmbaff(ps_dec, ps_tfr_cxt, + i1_cb_qp_idx_ofst, + i1_cr_qp_idx_ofst, ps_cur_mb, + i4_strd_y, i4_strd_uv, ps_top_mb, + pu4_bs_tab[0]); + } + } + + /*--------------------------------------------------------------------*/ + /* Filter wrt Other Horizontal Edges */ + /*--------------------------------------------------------------------*/ + + //edge1 + u4_bs = pu4_bs_tab[1]; + + if(u4_bs) + { + ps_dec->pf_deblk_luma_horz_bslt4(pu1_y + (i4_strd_y << 2), i4_strd_y, + alpha, beta, u4_bs, pu1_cliptab_y); + + } + //edge2 + u4_bs = pu4_bs_tab[2]; + + if(u4_bs) + { + + ps_dec->pf_deblk_luma_horz_bslt4(pu1_y + (i4_strd_y << 3), i4_strd_y, + alpha, beta, u4_bs, pu1_cliptab_y); + ps_dec->pf_deblk_chroma_horz_bslt4(pu1_u + (i4_strd_uv << 2), + i4_strd_uv, alpha_u, beta_u, + alpha_v, beta_v, u4_bs, + pu1_cliptab_u, pu1_cliptab_v); + + } + //edge3 + u4_bs = pu4_bs_tab[3]; + if(u4_bs) + { + + ps_dec->pf_deblk_luma_horz_bslt4( + (pu1_y + (i4_strd_y << 3) + (i4_strd_y << 2)), + i4_strd_y, alpha, beta, u4_bs, pu1_cliptab_y); + + } + +} + diff --git a/decoder/ih264d_deblocking.h b/decoder/ih264d_deblocking.h new file mode 100755 index 0000000..21601aa --- /dev/null +++ b/decoder/ih264d_deblocking.h @@ -0,0 +1,173 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +#ifndef _IH264D_DEBLOCKING_H_ +#define _IH264D_DEBLOCKING_H_ +/*! + ************************************************************************** + * \file ih264d_deblocking.h + * + * \brief + * Declarations of deblocking functions + * + * \date + * 23/11/2002 + * + * \author AI + ************************************************************************** + */ +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264d_structs.h" + +WORD8 ih264d_set_deblocking_parameters(deblk_mb_t * ps_cur_deblk_mb, + dec_slice_params_t * ps_slice, + UWORD8 u1_mb_ngbr_availablity, + UWORD8 u1_mb_field_decoding_flag); + +void FilterBoundaryLeft(tfr_ctxt_t * const ps_tfr_cxt, + const WORD8 i1_cb_qp_idx_ofst, + const WORD8 i1_cr_qp_idx_ofst, + deblk_mb_t * const ps_cur_mb, + UWORD16 u2_strd_y, + UWORD16 u2_strd_uv, + deblk_mb_t * const ps_left_mb, + const UWORD32 pu4_bs_tab[], + const UWORD8 u1_cur_fld); +void FilterBoundaryTop(tfr_ctxt_t * const ps_tfr_cxt, + const WORD8 i1_cb_qp_idx_ofst, + const WORD8 i1_cr_qp_idx_ofst, + deblk_mb_t * const ps_cur_mb, + const UWORD16 u2_strd_y, + const UWORD16 u2_strd_uv, + deblk_mb_t * const ps_top_mb, + const UWORD32 u4_bs); +void deblock_mb(tfr_ctxt_t * const ps_tfr_cxt, + const WORD8 i1_cb_qp_idx_ofst, + const WORD8 i1_cr_qp_idx_ofst, + deblk_mb_t * const ps_cur_mb, + WORD32 i4_strd_y, + WORD32 i4_strd_uv, + deblk_mb_t * const ps_top_mb, + deblk_mb_t * const ps_left_mb, + const UWORD8 u1_cur_fld, + const UWORD8 u1_extra_top_edge); +void ih264d_deblock_mb_mbaff(dec_struct_t *ps_dec, + tfr_ctxt_t * const ps_tfr_cxt, + const WORD8 i1_cb_qp_idx_ofst, + const WORD8 i1_cr_qp_idx_ofst, + deblk_mb_t * const ps_cur_mb, + WORD32 i4_strd_y, + WORD32 i4_strd_uv, + deblk_mb_t * const ps_top_mb, + deblk_mb_t * const ps_left_mb, + const UWORD8 u1_cur_fld, + const UWORD8 u1_extra_top_edge); + +void ih264d_deblock_picture_mbaff(dec_struct_t * const ps_dec); + +void ih264d_deblock_picture_non_mbaff(dec_struct_t * const ps_dec); + +void ih264d_deblock_picture_progressive(dec_struct_t * const ps_dec); + +void ih264d_compute_bs_mbaff(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info, + const UWORD16 u2_mbxn_mb); +void ih264d_compute_bs_non_mbaff(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info, + const UWORD16 u2_mbxn_mb); + +void ih264d_fill_bs_mbedge_2(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info, + const UWORD16 u2_mbxn_mb); + +void ih264d_fill_bs_mbedge_4(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info, + const UWORD16 u2_mbxn_mb); + +void ih264d_fill_bs1_16x16mb_pslice(mv_pred_t *ps_cur_mv_pred, + mv_pred_t *ps_top_mv_pred, + void **ppv_map_ref_idx_to_poc, + UWORD32 *pu4_bs_table, + mv_pred_t *ps_leftmost_mv_pred, + neighbouradd_t *ps_left_addr, + void **u4_pic_addrress, + WORD32 i4_ver_mvlimit); + +void ih264d_fill_bs1_non16x16mb_pslice(mv_pred_t *ps_cur_mv_pred, + mv_pred_t *ps_top_mv_pred, + void **ppv_map_ref_idx_to_poc, + UWORD32 *pu4_bs_table, + mv_pred_t *ps_leftmost_mv_pred, + neighbouradd_t *ps_left_addr, + void **u4_pic_addrress, + WORD32 i4_ver_mvlimit); + +void ih264d_fill_bs1_16x16mb_bslice(mv_pred_t *ps_cur_mv_pred, + mv_pred_t *ps_top_mv_pred, + void **ppv_map_ref_idx_to_poc, + UWORD32 *pu4_bs_table, + mv_pred_t *ps_leftmost_mv_pred, + neighbouradd_t *ps_left_addr, + void **u4_pic_addrress, + WORD32 i4_ver_mvlimit); + +void ih264d_fill_bs1_non16x16mb_bslice(mv_pred_t *ps_cur_mv_pred, + mv_pred_t *ps_top_mv_pred, + void **ppv_map_ref_idx_to_poc, + UWORD32 *pu4_bs_table, + mv_pred_t *ps_leftmost_mv_pred, + neighbouradd_t *ps_left_addr, + void **u4_pic_addrress, + WORD32 i4_ver_mvlimit); + +void ih264d_fill_bs_xtra_left_edge_cur_fld(UWORD32 *pu4_bs, + WORD32 u4_left_mb_t_csbp, + WORD32 u4_left_mb_b_csbp, + WORD32 u4_cur_mb_csbp, + UWORD32 u4_cur_mb_top); + +void ih264d_fill_bs_xtra_left_edge_cur_frm(UWORD32 *pu4_bs, + WORD32 u4_left_mb_t_csbp, + WORD32 u4_left_mb_b_csbp, + WORD32 u4_cur_mb_csbp, + UWORD32 u4_cur_mb_top); + +void ih264d_deblock_mb_nonmbaff(dec_struct_t *ps_dec, + tfr_ctxt_t * const ps_tfr_cxt, + const WORD8 i1_cb_qp_idx_ofst, + const WORD8 i1_cr_qp_idx_ofst, + deblk_mb_t * const ps_cur_mb, + WORD32 i4_strd_y, + WORD32 i4_strd_uv, + deblk_mb_t * const ps_top_mb, + deblk_mb_t * const ps_left_mb); + +void ih264d_init_deblk_tfr_ctxt(dec_struct_t * ps_dec, + pad_mgr_t *ps_pad_mgr, + tfr_ctxt_t *ps_tfr_cxt, + UWORD16 u2_image_wd_mb, + UWORD8 u1_mbaff); + +void ih264d_deblock_mb_level(dec_struct_t *ps_dec, + dec_mb_info_t *ps_cur_mb_info, + UWORD32 nmb_index); + +#endif /* _IH264D_DEBLOCKING_H_ */ diff --git a/decoder/ih264d_debug.c b/decoder/ih264d_debug.c new file mode 100755 index 0000000..5650e20 --- /dev/null +++ b/decoder/ih264d_debug.c @@ -0,0 +1,40 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/*! + ************************************************************************** + * \file ih264d_debug.c + * + * \brief + * Contains routines that can be used in debugging + * + * \date + * 20/11/2002 + * + * \author AI + ************************************************************************** + */ +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264d_bitstrm.h" +#include "ih264d_debug.h" +#include "ih264d_defs.h" + diff --git a/decoder/ih264d_debug.h b/decoder/ih264d_debug.h new file mode 100755 index 0000000..787b697 --- /dev/null +++ b/decoder/ih264d_debug.h @@ -0,0 +1,135 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +#ifndef _IH264D_DEBUG_H_ +#define _IH264D_DEBUG_H_ + +/*! + ************************************************************************** + * \file ih264d_debug.h + * + * \brief + * Contains declarations used for debugging + * + * \date + * 2/12/2002 + * + * \author AI + ************************************************************************** + */ +#ifdef DEBUG_DEC +#define H264_DEC_DEBUG_PRINT(...) printf("\n[H264_DEBUG] %s/%d:: ", __FUNCTION__, __LINE__);printf(__VA_ARGS__) +#else //DEBUG_DEC +#define H264_DEC_DEBUG_PRINT(...) {} +#endif //DEBUG_DEC +#define STRENGTH_DEBLOCKING 0 //sanjeev +#define DEBUG_RECONSTRUCT_LUMA 0 +#define DEBUG_RECONSTRUCT_CHROMA 0 + +#define DEBUG_IDCT 0 +#define DEBUG_LUMA_IDCT 0 +#define DEBUG_REF_IDCT 0 + +#define BIN_BIT_RATIO 0 +#define MB_PART_HIST 0 + +#define MB_INTRA_PREDICTION 1 + +#ifdef WIN32 +#define CHK_PURIFY 0 +#else +#define CHK_PURIFY 0 +#endif + +#if MB_INTRA_PREDICTION +#define MB_INTRA_CHROMA_PREDICTION_ON 1 +#define MB_INTRA_4x4_PREDICTION_ON 1 +#define MB_INTRA_16x16_PREDICTION_ON 1 +#endif + +#define TRACE 0 +#define DEBUG_CABAC 0 +#define DEBUG_ABS_MVD 0 +#define DEBUG_INTRA_PRED_MODES 0 +#define DEBUG_DEBLOCKING 0 + +#define COPYTHECONTEXT(s,val) +#define PRINT_TRACE +#define PRINT_TRACE_CAB +#define SWITCHOFFTRACE +#define SWITCHONTRACE +#define SWITCHOFFTRACECABAC +#define SWITCHONTRACECABAC + +#define INC_BIN_COUNT(ps_cab_env) +#define INC_DECISION_BINS(ps_cab_env) +#define INC_BYPASS_BINS(ps_cab_env) +#define INC_SYM_COUNT(ps_cab_env) +#define PRINT_BIN_BIT_RATIO(ps_dec) +#define RESET_BIN_COUNTS(ps_cab_env) + + +#ifdef PROFILE_DIS_DEBLK +#define PROFILE_DISABLE_DEBLK() return; +#else +#define PROFILE_DISABLE_DEBLK() ; +#endif + +#ifdef PROFILE_DIS_IQ_IT_RECON +#define PROFILE_DISABLE_IQ_IT_RECON() if (0) +#define PROFILE_DISABLE_IQ_IT_RECON_RETURN() return; +#else +#define PROFILE_DISABLE_IQ_IT_RECON() ; +#define PROFILE_DISABLE_IQ_IT_RECON_RETURN() ; +#endif + +#ifdef PROFILE_DIS_INTRA_PRED +#define PROFILE_DISABLE_INTRA_PRED() if (0) +#else +#define PROFILE_DISABLE_INTRA_PRED() ; +#endif + +#ifdef PROFILE_DIS_UNPACK +#define PROFILE_DISABLE_UNPACK_LUMA() return 0; +#define PROFILE_DISABLE_UNPACK_CHROMA() return ; +#else +#define PROFILE_DISABLE_UNPACK_LUMA() ; +#define PROFILE_DISABLE_UNPACK_CHROMA() ; +#endif + +#ifdef PROFILE_DIS_INTER_PRED +#define PROFILE_DISABLE_INTER_PRED() return; +#else +#define PROFILE_DISABLE_INTER_PRED() ; +#endif + +#ifdef PROFILE_DIS_BOUNDARY_STRENGTH +#define PROFILE_DISABLE_BOUNDARY_STRENGTH() return; +#else +#define PROFILE_DISABLE_BOUNDARY_STRENGTH() ; +#endif + +#ifdef PROFILE_DIS_MB_PART_INFO +#define PROFILE_DISABLE_MB_PART_INFO() return 0; +#else +#define PROFILE_DISABLE_MB_PART_INFO() ; +#endif + +#endif /* _IH264D_DEBUG_H_ */ + diff --git a/decoder/ih264d_defs.h b/decoder/ih264d_defs.h new file mode 100755 index 0000000..3f8bc58 --- /dev/null +++ b/decoder/ih264d_defs.h @@ -0,0 +1,671 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +#ifndef _IH264D_DEFS_H_ +#define _IH264D_DEFS_H_ + +/** + ************************************************************************ + * \file ih264d_defs.h + * + * \brief + * Type definitions used in the code + * + * \date + * 19/11/2002 + * + * \author Sriram Sethuraman + * + ************************************************************************ + */ +#define H264_MAX_FRAME_WIDTH 3840 +#define H264_MAX_FRAME_HEIGHT 2160 + +#define H264_MIN_FRAME_WIDTH 16 +#define H264_MIN_FRAME_HEIGHT 16 + +#define IH264DEC_MAX_NAL_UNIT_SIZE 311040 +#define IH264DEC_NUM_ZEROS_IN_START_CODE 2 +#define H264DEC_MEM_ALLOC_SUCCESS 1 +#define H264DEC_MEM_ALLOC_FAILURE 0 +#define H264DEC_CREATE_FAILED (NULL) + +#define H264_NO_BUF_TO_DISPLAY -1 +#define H264_DISPLAY_BUF_FOUND 0 +#define IH264DEC_YUV420 0 +#define IH264DEC_YUV422 1 +#define IH264DEC_YUV422INTERLACED 2 +#define IH264DEC_RGB 4 // Original Size +/* Ceiling of variables to the nearest power of 2 */ +#define FILL_POWEROF2(x,y) (size_t)(((x) & ((1<<(y))-1))?((1<<(y)) - ((x) & ((1<<(y))-1))): 0) +#define ALIGN_POWEROF2(x,y) (x) = (x)+FILL_POWEROF2((size_t)(x),y) + +/** Bit manipulation macros */ +#define CHECKBIT(a,i) ((a) & (1 << i)) +#define CLEARBIT(a,i) ((a) &= ~(1 << i)) + +/** Macro to convert a integer to a boolean value */ +#define BOOLEAN(x) (!!(x)) + +/** Arithmetic operations */ +#define MOD(x,y) ((x)%(y)) +#define DIV(x,y) ((x)/(y)) +#define MUL(x,y) ((x)*(y)) +#define SIGN_POW2_DIV(x, y) (((x) < 0) ? (-((-(x)) >> (y))) : ((x) >> (y))) + +#define MB_ENABLE_FILTERING 0x00 +#define MB_DISABLE_FILTERING 0x01 +#define MB_DISABLE_TOP_EDGE 0x02 +#define MB_DISABLE_LEFT_EDGE 0x04 + +/** Maximum number of reference pics */ +#define MAX_REF_BUFS 32 +#define MAX_DISP_BUFS_NEW 64 +#define MAX_FRAMES 16 +#define MAX_MBS_IN_ROW (720/16) +#define INVALID_FRAME_NUM 0x0fffffff +#define GAP_FRAME_NUM 0x1fffffff +#define MAX_PIC_SIZE 622080 // 720 * 576 * 1.5 +/** macros for reference picture lists, refIdx to POC mapping */ +// 1 extra entry into reference picture lists for refIdx = -1. +// this entry is always 0. this saves conditional checks in +// FillBs modules. +#define POC_LIST_L0_TO_L1_DIFF (( 2*MAX_FRAMES) + 1) +#define POC_LIST_L0_TO_L1_DIFF_1 ((MAX_FRAMES) + 1) + +#define FRM_LIST_L0 0 //0 +#define FRM_LIST_L1 1 * POC_LIST_L0_TO_L1_DIFF//FRM_LIST_L0 + POC_LIST_L0_TO_L1_DIFF //0+33 //(1 * POC_LIST_L0_TO_L1_DIFF) +#define TOP_LIST_FLD_L0 2 * POC_LIST_L0_TO_L1_DIFF//FRM_LIST_L1 + POC_LIST_L0_TO_L1_DIFF //0+33+33 //(2 * POC_LIST_L0_TO_L1_DIFF) +#define TOP_LIST_FLD_L1 3 * POC_LIST_L0_TO_L1_DIFF//TOP_LIST_FLD_L0 + POC_LIST_L0_TO_L1_DIFF_1 //0+33+33+17 //(3 * POC_LIST_L0_TO_L1_DIFF) +#define BOT_LIST_FLD_L0 4 * POC_LIST_L0_TO_L1_DIFF//TOP_LIST_FLD_L1 + POC_LIST_L0_TO_L1_DIFF_1 //0+33+33+17+17 +#define BOT_LIST_FLD_L1 5 * POC_LIST_L0_TO_L1_DIFF//BOT_LIST_FLD_L0 + POC_LIST_L0_TO_L1_DIFF_1 //0+33+33+17+17+17 +#define TOTAL_LIST_ENTRIES 6 * POC_LIST_L0_TO_L1_DIFF//BOT_LIST_FLD_L1 + POC_LIST_L0_TO_L1_DIFF_1 //0+33+33+17+17+17+17 +#define PAD_MV_BANK_ROW 64 +#define OFFSET_MV_BANK_ROW ((PAD_MV_BANK_ROW)>>1) +#define PAD_PUC_CURNNZ 32 +#define OFFSET_PUC_CURNNZ (PAD_PUC_CURNNZ) +#define PAD_MAP_IDX_POC (1) +#define OFFSET_MAP_IDX_POC (1) + +#define OFFSET_MAP_IDX_POC (1) + +#define NAL_REF_IDC(nal_first_byte) ((nal_first_byte >> 5) & 0x3) +#define NAL_FORBIDDEN_BIT(nal_first_byte) (nal_first_byte>>7) +#define NAL_UNIT_TYPE(nal_first_byte) (nal_first_byte & 0x1F) + +#define INT_PIC_TYPE_I (0x00) + +#define YIELD_CNT_THRESHOLD 8 +#define ENABLE_420P_UV_SHARING 1 + +#define OK 0 +#define END 1 +#define NOT_OK -1 + +/* For 420SP */ +#define YUV420SP_FACTOR 2 + + +/** + *************************************************************************** + * Enum to hold various mem records being request + **************************************************************************** + */ +enum +{ + /** + * Codec Object at API level + */ + MEM_REC_IV_OBJ, + + /** + * Codec context + */ + MEM_REC_CODEC, + + /** + * Bitstream buffer which holds emulation prevention removed bytes + */ + MEM_REC_BITSBUF, + + /** + * Buffer to hold coeff data + */ + MEM_REC_COEFF_DATA, + + /** + * Motion vector bank + */ + MEM_REC_MVBANK, + + /** + * Holds mem records passed to the codec. + */ + MEM_REC_BACKUP, + + /** + * Holds SPS + */ + MEM_REC_SPS, + + /** + * Holds PPS + */ + MEM_REC_PPS, + + /** + * Holds Slice Headers + */ + MEM_REC_SLICE_HDR, + + /** + * Holds thread handles + */ + MEM_REC_THREAD_HANDLE, + + /** + * Contains i4_status map indicating parse i4_status per MB basis + */ + MEM_REC_PARSE_MAP, + + /** + * Contains i4_status map indicating processing i4_status per MB basis + */ + MEM_REC_PROC_MAP, + + /** + * Contains slice number info for each MB + */ + + MEM_REC_SLICE_NUM_MAP, + + /** + * Holds dpb manager context + */ + MEM_REC_DPB_MGR, + + /** + * Holds neighbors' info + */ + MEM_REC_NEIGHBOR_INFO, + + /** + * Holds neighbors' info + */ + MEM_REC_PRED_INFO, + + + /** + * Holds inter pred inforamation on packed format info + */ + MEM_REC_PRED_INFO_PKD, + /** + * Holds neighbors' info + */ + MEM_REC_MB_INFO, + + /** + * Holds deblock Mb info structure frame level) + */ + MEM_REC_DEBLK_MB_INFO, + + /** + * Holds reference picture buffers in non-shared mode + */ + MEM_REC_REF_PIC, + + /** + * Holds some misc intermediate_buffers + */ + MEM_REC_EXTRA_MEM, + + /** + * Holds some misc intermediate_buffers + */ + MEM_REC_INTERNAL_SCRATCH, + + /** + * Holds some misc intermediate_buffers + */ + MEM_REC_INTERNAL_PERSIST, + + /* holds structures related to picture buffer manager*/ + MEM_REC_PIC_BUF_MGR, + + /*holds structure related to MV buffer manager*/ + MEM_REC_MV_BUF_MGR, + + /** + * Place holder to compute number of memory records. + */ + MEM_REC_CNT +/* Do not add anything below */ +}; + +#ifdef DEBLOCK_THREAD +#define H264_MUTEX_LOCK(lock) ithread_mutex_lock(lock) +#define H264_MUTEX_UNLOCK(lock) ithread_mutex_unlock(lock) +#else //DEBLOCK_THREAD +#define H264_MUTEX_LOCK(lock) +#define H264_MUTEX_UNLOCK(lock) + +#define DEBUG_THREADS_PRINTF(...) +#define DEBUG_PERF_PRINTF(...) + +/** Profile Types*/ +#define BASE_PROFILE_IDC 66 +#define MAIN_PROFILE_IDC 77 +#define HIGH_PROFILE_IDC 100 +#define MAIN_PROFILE 1 + +#define MB_SIZE 16 +#define BLK8x8SIZE 8 +#define BLK_SIZE 4 +#define NUM_BLKS_PER_MB 24 +#define NUM_LUM_BLKS_PER_MB 16 +#define LUM_BLK 0 +#define CHROM_BLK 1 +#define NUM_PELS_IN_MB 64 + +/* Level Types */ +#define H264_LEVEL_1_0 10 +#define H264_LEVEL_1_1 11 +#define H264_LEVEL_1_2 12 +#define H264_LEVEL_1_3 13 +#define H264_LEVEL_2_0 20 +#define H264_LEVEL_2_1 21 +#define H264_LEVEL_2_2 22 +#define H264_LEVEL_3_0 30 +#define H264_LEVEL_3_1 31 +#define H264_LEVEL_3_2 32 +#define H264_LEVEL_4_0 40 +#define H264_LEVEL_4_1 41 +#define H264_LEVEL_4_2 42 +#define H264_LEVEL_5_0 50 +#define H264_LEVEL_5_1 51 + +#define MAX_MBS_LEVEL_51 36864 +#define MAX_MBS_LEVEL_50 22080 +#define MAX_MBS_LEVEL_42 8704 +#define MAX_MBS_LEVEL_41 8192 +#define MAX_MBS_LEVEL_40 8192 +#define MAX_MBS_LEVEL_32 5120 +#define MAX_MBS_LEVEL_31 3600 +#define MAX_MBS_LEVEL_30 1620 +#define MAX_MBS_LEVEL_22 1620 +#define MAX_MBS_LEVEL_21 792 +#define MAX_MBS_LEVEL_20 396 +#define MAX_MBS_LEVEL_13 396 +#define MAX_MBS_LEVEL_12 396 +#define MAX_MBS_LEVEL_11 396 +#define MAX_MBS_LEVEL_10 99 + + +/* + | Legend: + | LVL Level*10 + | MPR Macroblk processing rate + | MMF Max Mbs/Frm + | MDK Max DbpSize (in kB) + | MDB max DbpSize (in bytes) + | MFS FrmSizeYUV (in bytes) + | MDP Max DBPics + | MDC Ceiling DBPics + | FPS Frame/Second + | + | LVL MPR MMF MDK MDB MFS MDP MDC FPS + | 10 1485 99 148.5 152064 38016 4.00 4.00 15.00 + | 11 3000 396 337.5 345600 152064 2.27 3.00 7.58 + | 12 6000 396 891 912384 152064 6.00 6.00 15.15 + | 13 11880 396 891 912384 152064 6.00 6.00 30.00 + | 20 11880 396 891 912384 152064 6.00 6.00 30.00 + | 21 19800 792 1782 1824768 304128 6.00 6.00 25.00 + | 22 20250 1620 3037.5 3110400 622080 5.00 5.00 12.50 + | 30 40500 1620 3037.5 3110400 622080 5.00 5.00 25.00 + */ +#define MAX_REF_LEVEL_1_0 4 +#define MAX_REF_LEVEL_1_1 3 +#define MAX_REF_LEVEL_1_2 6 +#define MAX_REF_LEVEL_1_3 6 +#define MAX_REF_LEVEL_2_0 6 +#define MAX_REF_LEVEL_2_1 6 +#define MAX_REF_LEVEL_2_2 5 +#define MAX_REF_LEVEL_3_0 5 +#define H264_MAX_REF_PICS 16 + +#define MIN_LEVEL_SUPPORTED 10 +#define MAX_LEVEL_SUPPORTED 64 + +/** NAL Types */ +#define SLICE_NAL 1 +#define SLICE_DATA_PARTITION_A_NAL 2 +#define SLICE_DATA_PARTITION_B_NAL 3 +#define SLICE_DATA_PARTITION_C_NAL 4 +#define IDR_SLICE_NAL 5 +#define SEI_NAL 6 +#define SEQ_PARAM_NAL 7 +#define PIC_PARAM_NAL 8 +#define ACCESS_UNIT_DELIMITER_RBSP 9 +#define END_OF_SEQ_RBSP 10 +#define END_OF_STREAM_RBSP 11 +#define FILLER_DATA_NAL 12 + +/** Entropy coding modes */ +#define CAVLC 0 +#define CABAC 1 + +/** Picture Types */ +#define I_PIC 0 +#define IP_PIC 1 +#define IPB_PIC 2 +#define SI_PIC 3 +#define SIP_PIC 4 +#define ISI_PIC 5 +#define ISI_PSP_PIC 6 +#define ALL_PIC 7 + +/* Frame or field picture type */ +#define FRM_PIC 0x00 +#define TOP_FLD 0x01 +#define BOT_FLD 0x02 +#define COMP_FLD_PAIR 0x03 /* TOP_FLD | BOT_FLD */ +#define AFRM_PIC 0x04 +#define TOP_REF 0x08 +#define BOT_REF 0x10 +#define PIC_MASK 0x03 +#define NON_EXISTING 0xff + +/* field picture type for display */ +#define DISP_TOP_FLD 0x00 +#define DISP_BOT_FLD 0x01 + +/** Slice Types */ +#define P_SLICE 0 +#define B_SLICE 1 +#define I_SLICE 2 +#define SP_SLICE 3 +#define SI_SLICE 4 + +/* Definition for picture skip */ +#define SKIP_NONE (0x0) +#define I_SLC_BIT (0x1) +#define P_SLC_BIT (0x2) +#define B_SLC_BIT (0x4) + +/** Macros used for Deblocking */ +#define D_INTER_MB 0 +#define D_INTRA_MB 1 +#define D_PRED_NON_16x16 2 +#define D_B_SLICE 4 +#define D_B_SUBMB 6 //D_B_SLICE | D_PRED_NON_16x16 | D_INTER_MB +#define D_FLD_MB 0x80 + +/** Macros for Cabac checks */ +/** MbType */ +/** |x|x|I_PCM|SKIP| + |S|Inter/Intra|P/B|NON-BD16x16/BD16x16,I16x16/I4x4| */ +#define CAB_INTRA 0x00 /* 0000 00xx */ +#define CAB_INTER 0x04 /* 0000 01xx */ +#define CAB_I4x4 0x00 /* 0000 00x0 */ +#define CAB_I16x16 0x01 /* 0000 00x1 */ +#define CAB_BD16x16 0x04 /* 0000 0100 */ +#define CAB_NON_BD16x16 0x05 /* 0000 0101 */ +#define CAB_P 0x07 /* 0000 0111 */ +#define CAB_SI4x4 0x08 /* 0000 10x0 */ +#define CAB_SI16x16 0x09 /* 0000 10x1 */ +#define CAB_SKIP_MASK 0x10 /* 0001 0000 */ +#define CAB_SKIP 0x10 /* 0001 0000 */ +#define CAB_P_SKIP 0x16 /* 0001 x11x */ +#define CAB_B_SKIP 0x14 /* 0001 x100 */ +#define CAB_BD16x16_MASK 0x07 /* 0000 0111 */ +#define CAB_INTRA_MASK 0x04 /* 0000 0100 */ +#define CAB_I_PCM 0x20 /* 001x xxxx */ + +/**< Binarization types for CABAC */ +/* |x|x|x|x|MSB_FIRST_FLC|FLC|TUNARY|UNARY| */ +#define UNARY 1 +#define TUNARY 2 +#define FLC 4 +#define MSB_FIRST_FLC 12 + +/** Macroblock Types */ +#define I_4x4_MB 0 +#define I_16x16_MB 1 +#define P_MB 2 +#define B_MB 3 +#define SI_MB 4 +#define SP_MB 5 +#define I_PCM_MB 6 + +#define SI4x4_MB 0xFF + +/** Intra luma 16x16 and chroma 8x8 prediction modes */ +#define NUM_INTRA_PRED_MODES 4 +#define VERT 0 +#define HORIZ 1 +#define DC 2 +#define PLANE 3 +#define NOT_VALID -1 +#define DC_DC_DC_DC 0x02020202 /*packed 4 bytes used in Decode Intra Mb*/ + +/** Intra luma 4x4 prediction modes */ +#define NUM_INTRA4x4_PRED_MODES 9 + +/** VERT, HORIZ, DC are applicable to 4x4 as well */ +/** D - Down; U - Up; L - Left; R - Right */ +#define DIAG_DL 3 +#define DIAG_DR 4 +#define VERT_R 5 +#define HORIZ_D 6 +#define VERT_L 7 +#define HORIZ_U 8 + +/** P_MB prediction modes */ +#define NUM_INTER_MB_PRED_MODES 5 +#define PRED_16x16 0 +#define PRED_16x8 1 +#define PRED_8x16 2 +#define PRED_8x8 3 +#define PRED_8x8R0 4 +#define MAGIC_16x16 5 +#define MB_SKIP 255 + +/* P_MB submb modes */ +#define P_L0_8x8 0 +#define P_L0_8x4 1 +#define P_L0_4x8 2 +#define P_L0_4x4 3 + +/* B_MB submb modes */ +#define B_DIRECT_8x8 0 +#define B_L0_8x8 1 +#define B_L1_8x8 2 +#define B_BI_8x8 3 +#define B_L0_8x4 4 +#define B_L0_4x8 5 +#define B_L1_8x4 6 +#define B_L1_4x8 7 +#define B_BI_8x4 8 +#define B_BI_4x8 9 +#define B_L0_4x4 10 +#define B_L1_4x4 11 +#define B_BI_4x4 12 + +/** B_MB prediction modes */ +#define B_8x8 22 +#define PRED_INVALID -1 +#define B_DIRECT 0 +#define PRED_L0 1 +#define PRED_L1 2 +#define BI_PRED 3 +#define B_DIRECT_BI_PRED 23 +#define B_DIRECT_PRED_L0 24 +#define B_DIRECT_PRED_L1 25 +#define B_DIRECT_SPATIAL 26 + +#define B_DIRECT8x8_BI_PRED 13 +#define B_DIRECT8x8_PRED_L0 14 +#define B_DIRECT8x8_PRED_L1 15 + +#define ONE_TO_ONE 0 +#define FRM_TO_FLD 1 +#define FLD_TO_FRM 2 + +/** Inter Sub MB Pred modes */ +#define NUM_INTER_SUBMB_PRED_MODES 4 +#define SUBMB_8x8 0 +#define SUBMB_8x4 1 +#define SUBMB_4x8 2 +#define SUBMB_4x4 3 + +/** Coded Block Pattern - Chroma */ +#define CBPC_ALLZERO 0 +#define CBPC_ACZERO 1 +#define CBPC_NONZERO 2 + +/** Index for accessing the left MB in the MV predictor array */ +#define LEFT 0 +/** Index for accessing the top MB in the MV predictor array */ +#define TOP 1 +/** Index for accessing the top right MB in the MV predictor array */ +#define TOP_R 2 +/** Index for accessing the top Left MB in the MV predictor array */ +#define TOP_L 3 + +/** Maximum number of Sequence Parameter sets */ +#define MAX_NUM_SEQ_PARAMS 32 + +/** Maximum number of Picture Parameter sets */ +#define MAX_NUM_PIC_PARAMS 256 + +#define MASK_ERR_SEQ_SET_ID (0xFFFFFFE0) +#define MASK_ERR_PIC_SET_ID (0xFFFFFF00) + +#define MAX_PIC_ORDER_CNT_TYPE 2 + +#define MAX_BITS_IN_FRAME_NUM 16 +#define MAX_BITS_IN_POC_LSB 16 + +#define H264_MAX_REF_PICS 16 +#define H264_MAX_REF_IDX 32 +#define MAX_WEIGHT_BIPRED_IDC 2 +#define MAX_CABAC_INIT_IDC 2 + +#define H264_DEFAULT_NUM_CORES 1 +#define DEFAULT_SEPARATE_PARSE (H264_DEFAULT_NUM_CORES == 2)? 1 :0 + +/** Maximum number of Slice groups */ +#define MAX_NUM_SLICE_GROUPS 8 +#define MAX_NUM_REF_FRAMES_OFFSET 255 + +/** Deblocking modes for a slice */ +#define SLICE_BOUNDARY_DBLK_DISABLED 2 +#define DBLK_DISABLED 1 +#define DBLK_ENABLED 0 +#define MIN_DBLK_FIL_OFF -12 +#define MAX_DBLK_FIL_OFF 12 + +/** Width of the predictor buffers used for MC */ +#define MB_SIZE 16 +#define BLK8x8SIZE 8 +#define BLK_SIZE 4 +#define NUM_BLKS_PER_MB 24 +#define NUM_LUM_BLKS_PER_MB 16 + +#define SUB_BLK_WIDTH 4 +#define SUB_SUB_BLK_SIZE 4 /* 2x2 pixel i4_size */ +#define SUB_BLK_SIZE ((SUB_BLK_WIDTH) * (SUB_BLK_WIDTH)) +#define MB_LUM_SIZE 256 +#define MB_CHROM_SIZE 64 + +/**< Width to pad the luminance frame buff */ +/**< Height to pad the luminance frame buff */ +/**< Width to pad the chrominance frame buff */ +/**< Height to pad the chrominance frame buff */ + +#define PAD_LEN_Y_H 32 +#define PAD_LEN_Y_V 20 +#define PAD_LEN_UV_H 16 +#define PAD_LEN_UV_V 8 + +#define PAD_MV_BANK_ROW 64 + +/**< Maimum u4_ofst by which the Mvs could point outside the frame buffers + horizontally in the left and vertically in the top direction */ +#define MAX_OFFSET_OUTSIDE_X_FRM -20 +#define MAX_OFFSET_OUTSIDE_Y_FRM -20 +#define MAX_OFFSET_OUTSIDE_UV_FRM -8 + +/** UVLC parsing macros */ +#define UEV 1 +#define SEV 2 +#define TEV 3 + +/** Defines for Boolean values */ +#ifndef TRUE +#define TRUE 1 +#define FALSE 0 +#endif + +#define UNUSED_FOR_REF 0 +#define IS_SHORT_TERM 1 +#define IS_LONG_TERM 2 + +/** Defines for which field gets displayed first */ +#define MAX_FRAMES 16 +#define INVALID_FRAME_NUM 0x0fffffff +#define DO_NOT_DISP 254 +#define DISP_FLD_FIRST_UNDEF 0 +#define DISP_TOP_FLD_FIRST 1 +#define DISP_BOT_FLD_FIRST 2 + +/** Misc error resilience requirements*/ +#define MASK_LOG2_WEIGHT_DENOM 0xFFFFFFF8 +#define MASK_PRED_WEIGHT_OFFSET 0xFFFFFF00 +#define MAX_REDUNDANT_PIC_CNT 127 + +#define DPB_HACK 0 +#define DPB_HACK_NEW 0 + + + +#define PD_MB_BUF_SIZE (H264_MAX_FRAME_WIDTH * H264_MAX_FRAME_WIDTH / 256) +#define PD_MB_BUF_SIZE_MOD 0xffffffff +#define MAX_PRED_INFO_LIMIT (PD_MB_BUF_SIZE * 32 * 2) + +#endif //DEBLOCK_THREAD + + +#define NO_DC_SB 0 +#define SUB_BLK_MASK 0xFFFFFF00 +#define NUM_COEFFS_IN_4x4BLK 16 + + +#define MEMSET_16BYTES(pu4_start,value) \ + { \ + memset(pu4_start,value,16); \ + } + +#define MEMCPY_16BYTES(dst,src) \ +{ \ + memcpy(dst,src,16); \ +} + + +#endif /*_IH264D_DEFS_H_*/ diff --git a/decoder/ih264d_dpb_manager.h b/decoder/ih264d_dpb_manager.h new file mode 100755 index 0000000..a9539c8 --- /dev/null +++ b/decoder/ih264d_dpb_manager.h @@ -0,0 +1,173 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +#ifndef _IH264D_DPB_MANAGER_H_ +#define _IH264D_DPB_MANAGER_H_ +/*! +*************************************************************************** +* \file ih264d_dpb_manager.h +* +* \brief +* Decoded Picture Buffer Manager Include File +* +* Detailed_description +* +* \date +* 19-12-2002 +* +* \author Sriram Sethuraman +*************************************************************************** +*/ +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264d_bitstrm.h" +#include "ih264d_defs.h" + +#define END_OF_MMCO 0 +#define MARK_ST_PICNUM_AS_NONREF 1 +#define MARK_LT_INDEX_AS_NONREF 2 +#define MARK_ST_PICNUM_AS_LT_INDEX 3 +#define SET_MAX_LT_INDEX 4 +#define RESET_REF_PICTURES 5 +#define SET_LT_INDEX 6 +#define RESET_NONREF_PICTURES 7 +#define RESET_ALL_PICTURES 8 + +struct field_t +{ + /* picNum of tbe reference field */ + WORD32 i4_pic_num; + + /* assigned when used for long term reference */ + /* else MAX_REF_BUFS+1 */ + UWORD8 u1_long_term_frame_idx; + + /* 0 : unused for reference */ + /* 1 : used for short term reference */ + /* 2 : used for long term reference */ + UWORD8 u1_reference_info; +}; + + +struct dpb_info_t +{ + struct pic_buffer_t *ps_pic_buf; /** Pointer to picture buffer structure */ + WORD32 i4_frame_num; /** frame number of picture - unique for each ref*/ + struct dpb_info_t *ps_prev_short;/** Link to the DPB with previous picNum */ + struct dpb_info_t *ps_prev_long; /** Link to the DPB with previous long term frame*/ + struct field_t s_top_field; /** Contains information of the top_field + reference info, pic num and longt term frame idx */ + struct field_t s_bot_field; /** Contains information of the bot_field + reference info, pic num and longt term frame idx */ + UWORD8 u1_buf_id; /** bufID from bufAPI */ + UWORD8 u1_used_as_ref; /** whether buffer is used as ref for frame or + complementary reference field pair */ + UWORD8 u1_lt_idx; /** If buf is assigned long-term index; else MAX_REF_BUFS+1 */ + +}; + +typedef struct +{ + struct pic_buffer_t *ps_def_dpb[MAX_REF_BUFS];/** DPB in default index order */ + struct pic_buffer_t *ps_mod_dpb[2][2 * MAX_REF_BUFS];/** DPB in reordered index order, 0-fwd,1-bwd */ + struct pic_buffer_t *ps_init_dpb[2][2 * MAX_REF_BUFS];/** DPB in reordered index order, 0-fwd,1-bwd */ + struct dpb_info_t *ps_dpb_st_head; /** Pointer to the most recent picNum */ + struct dpb_info_t *ps_dpb_ht_head; /** Pointer to the smallest LT index */ + struct dpb_info_t as_dpb_info[MAX_REF_BUFS]; /** Physical storage for dpbInfo for ref bufs */ + UWORD8 u1_num_st_ref_bufs; /** Number of short term ref. buffers */ + UWORD8 u1_num_lt_ref_bufs; /** Number of long term ref. buffer */ + UWORD8 u1_max_lt_pic_idx_plus1; /** Maximum long term pictures - 0 to max_long_term_pic_idx */ + UWORD8 u1_num_gaps; /** Total number of outstanding gaps */ + void * pv_codec_handle; /* For Error Handling */ + WORD32 i4_max_frm_num; /** Max frame number */ + WORD32 ai4_gaps_start_frm_num[MAX_FRAMES];/** start frame number for a gap seqn */ + WORD32 ai4_gaps_end_frm_num[MAX_FRAMES]; /** start frame number for a gap seqn */ + WORD8 ai1_gaps_per_seq[MAX_FRAMES]; /** number of gaps with each gap seqn */ + WORD32 ai4_poc_buf_id_map[MAX_FRAMES][3]; + WORD8 i1_poc_buf_id_entries; + WORD8 i1_gaps_deleted; + UWORD16 u2_pic_wd; + UWORD16 u2_pic_ht; +}dpb_manager_t; + +/** Structure store the MMC Commands */ +struct MMCParams +{ + UWORD32 u4_mmco; /** memory managemet control operation */ + UWORD32 u4_diff_pic_num; /** diff Of Pic Nums Minus1 */ + UWORD32 u4_lt_idx; /** Long Term Pic Idx */ + UWORD32 u4_max_lt_idx_plus1; /** MaxLongTermPicIdxPlus1 */ +}; + +typedef struct +{ + UWORD8 u1_dpb_commands_read; /** Flag to indicate that DBP commands are read */ + UWORD8 u1_buf_mode; /** decoder Pic bugffering mode*/ + UWORD8 u1_num_of_commands; /** Number of MMC commands */ + /* These variables are ised in case of IDR pictures only */ + UWORD8 u1_idr_pic; /** = 1 ,IDR pic */ + UWORD8 u1_no_output_of_prior_pics_flag; + UWORD8 u1_long_term_reference_flag; + struct MMCParams as_mmc_params[MAX_REF_BUFS]; /* < Buffer to store MMC commands */ + UWORD8 u1_dpb_commands_read_slc; +}dpb_commands_t; + +void ih264d_init_ref_bufs(dpb_manager_t *ps_dpb_mgr); + +WORD32 ih264d_insert_st_node(dpb_manager_t *ps_dpb_mgr, + struct pic_buffer_t *ps_pic_buf, + UWORD8 u1_buf_id, + UWORD32 u2_cur_pic_num); +WORD32 ih264d_update_default_index_list(dpb_manager_t *ps_dpb_mgr); +WORD32 ih264d_do_mmco_buffer(dpb_commands_t *ps_dpb_cmds, + dpb_manager_t *ps_dpb_mgr, + UWORD8 u1_numRef_frames_for_seq, + UWORD32 u4_cur_pic_num, + UWORD32 u2_u4_max_pic_num_minus1, + UWORD8 u1_nal_unit_type, + struct pic_buffer_t *ps_pic_buf, + UWORD8 u1_buf_id, + UWORD8 u1_fld_pic_flag, + UWORD8 u1_curr_pic_in_err); +void ih264d_release_pics_in_dpb(void *pv_dec, + UWORD8 u1_disp_bufs); +void ih264d_reset_ref_bufs(dpb_manager_t *ps_dpb_mgr); +WORD32 ih264d_delete_st_node_or_make_lt(dpb_manager_t *ps_dpb_mgr, + WORD32 u4_pic_num, + UWORD32 u4_lt_idx, + UWORD8 u1_fld_pic_flag); + +WORD32 ih264d_delete_gap_frm_mmco(dpb_manager_t *ps_dpb_mgr, + WORD32 i4_frame_num, + UWORD8 *pu1_del_node); + +WORD32 ih264d_delete_gap_frm_sliding(dpb_manager_t *ps_dpb_mgr, + WORD32 i4_frame_num, + UWORD8 *pu1_del_node); + +WORD32 ih264d_do_mmco_for_gaps(dpb_manager_t *ps_dpb_mgr, + UWORD8 u1_num_ref_frames); + +WORD32 ih264d_insert_pic_in_display_list(dpb_manager_t *ps_dpb_mgr, + UWORD8 u1_buf_id, + WORD32 i4_display_poc, + UWORD32 u4_frame_num); +void ih264d_delete_nonref_nondisplay_pics(dpb_manager_t *ps_dpb_mgr); +#endif /* _IH264D_DPB_MANAGER_H_ */ diff --git a/decoder/ih264d_dpb_mgr.c b/decoder/ih264d_dpb_mgr.c new file mode 100755 index 0000000..205bc9b --- /dev/null +++ b/decoder/ih264d_dpb_mgr.c @@ -0,0 +1,1987 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "iv.h" +#include "ih264d_dpb_manager.h" +#include "ih264d_bitstrm.h" +#include "ih264d_parse_cavlc.h" +#include "ih264d_defs.h" +#include "ih264d_structs.h" +#include "ih264d_process_bslice.h" +#include "ih264d_debug.h" +#include "ih264d_tables.h" +#include "ih264d_error_handler.h" +#include "string.h" +#include "ih264d_defs.h" +#include "ih264_error.h" +#include "ih264_buf_mgr.h" +#include "assert.h" + +/*! + *************************************************************************** + * \file ih264d_dpb_mgr.c + * + * \brief + * Functions for managing the decoded picture buffer + * + * Detailed_description + * + * \date + * 19-12-2002 + * + * \author Sriram Sethuraman + *************************************************************************** + */ + +/*! + ************************************************************************** + * \if Function name : ih264d_init_ref_bufs \endif + * + * \brief + * Called at the start for initialization. + * + * \return + * none + ************************************************************************** + */ +void ih264d_init_ref_bufs(dpb_manager_t *ps_dpb_mgr) +{ + UWORD32 i; + struct dpb_info_t *ps_dpb_info = ps_dpb_mgr->as_dpb_info; + for(i = 0; i < MAX_REF_BUFS; i++) + { + ps_dpb_info[i].u1_used_as_ref = UNUSED_FOR_REF; + ps_dpb_info[i].u1_lt_idx = MAX_REF_BUFS + 1; + ps_dpb_info[i].ps_prev_short = NULL; + ps_dpb_info[i].ps_prev_long = NULL; + ps_dpb_info[i].ps_pic_buf = NULL; + ps_dpb_info[i].s_top_field.u1_reference_info = UNUSED_FOR_REF; + ps_dpb_info[i].s_bot_field.u1_reference_info = UNUSED_FOR_REF; + ps_dpb_info[i].s_top_field.u1_long_term_frame_idx = MAX_REF_BUFS + 1; + ps_dpb_info[i].s_bot_field.u1_long_term_frame_idx = MAX_REF_BUFS + 1; + + } + ps_dpb_mgr->u1_num_st_ref_bufs = ps_dpb_mgr->u1_num_lt_ref_bufs = 0; + ps_dpb_mgr->ps_dpb_st_head = NULL; + ps_dpb_mgr->ps_dpb_ht_head = NULL; + ps_dpb_mgr->i1_gaps_deleted = 0; + ps_dpb_mgr->i1_poc_buf_id_entries = 0; + + ps_dpb_mgr->u1_num_gaps = 0; + for(i = 0; i < MAX_FRAMES; i++) + { + ps_dpb_mgr->ai4_gaps_start_frm_num[i] = INVALID_FRAME_NUM; + ps_dpb_mgr->ai4_gaps_end_frm_num[i] = 0; + ps_dpb_mgr->ai1_gaps_per_seq[i] = 0; + ps_dpb_mgr->ai4_poc_buf_id_map[i][0] = -1; + ps_dpb_mgr->ai4_poc_buf_id_map[i][1] = 0x7fffffff; + ps_dpb_mgr->ai4_poc_buf_id_map[i][2] = 0; + } + +} + +void ih264d_free_ref_pic_mv_bufs(void* pv_dec, UWORD8 pic_buf_id) +{ + dec_struct_t *ps_dec = (dec_struct_t *)pv_dec; + + if((pic_buf_id == ps_dec->u1_pic_buf_id) && + ps_dec->ps_cur_slice->u1_field_pic_flag && + (ps_dec->u1_top_bottom_decoded == 0)) + { + return; + } + + ih264_buf_mgr_release((buf_mgr_t *)ps_dec->pv_pic_buf_mgr, + pic_buf_id, + BUF_MGR_REF); + ih264_buf_mgr_release((buf_mgr_t *)ps_dec->pv_mv_buf_mgr, + ps_dec->au1_pic_buf_id_mv_buf_id_map[pic_buf_id], + BUF_MGR_REF); +} +/*! + ************************************************************************** + * \if Function name : ih264d_delete_lt_node \endif + * + * \brief + * Delete a buffer with a long term index from the LT linked list + * + * \return + * none + ************************************************************************** + */ +WORD32 ih264d_delete_lt_node(dpb_manager_t *ps_dpb_mgr, + UWORD32 u4_lt_idx, + UWORD8 u1_fld_pic_flag, + struct dpb_info_t *ps_lt_node_to_insert, + WORD32 *pi4_status) +{ + *pi4_status = 0; + if(ps_dpb_mgr->u1_num_lt_ref_bufs > 0) + { + WORD32 i; + struct dpb_info_t *ps_next_dpb; + /* ps_unmark_node points to the node to be removed */ + /* from long term list. */ + struct dpb_info_t *ps_unmark_node; + //Find the node with matching LTIndex + ps_next_dpb = ps_dpb_mgr->ps_dpb_ht_head; + if(ps_next_dpb->u1_lt_idx == u4_lt_idx) + { + ps_unmark_node = ps_next_dpb; + } + else + { + for(i = 1; i < ps_dpb_mgr->u1_num_lt_ref_bufs; i++) + { + if(ps_next_dpb->ps_prev_long->u1_lt_idx == u4_lt_idx) + break; + ps_next_dpb = ps_next_dpb->ps_prev_long; + } + if(i == ps_dpb_mgr->u1_num_lt_ref_bufs) + *pi4_status = 1; + else + ps_unmark_node = ps_next_dpb->ps_prev_long; + } + + if(*pi4_status == 0) + { + if(u1_fld_pic_flag) + { + if(ps_lt_node_to_insert != ps_unmark_node) + { + UWORD8 u1_deleted = 0; + /* for the ps_unmark_node mark the corresponding field */ + /* field as unused for reference */ + + if(ps_unmark_node->s_top_field.u1_long_term_frame_idx + == u4_lt_idx) + { + ps_unmark_node->s_top_field.u1_reference_info = + UNUSED_FOR_REF; + ps_unmark_node->s_top_field.u1_long_term_frame_idx = + MAX_REF_BUFS + 1; + u1_deleted = 1; + } + if(ps_unmark_node->s_bot_field.u1_long_term_frame_idx + == u4_lt_idx) + { + ps_unmark_node->s_bot_field.u1_reference_info = + UNUSED_FOR_REF; + ps_unmark_node->s_bot_field.u1_long_term_frame_idx = + MAX_REF_BUFS + 1; + u1_deleted = 1; + } + + if(!u1_deleted) + { + + UWORD32 i4_error_code; + i4_error_code = ERROR_DBP_MANAGER_T; + + return i4_error_code; + } + } + + ps_unmark_node->u1_used_as_ref = + ps_unmark_node->s_top_field.u1_reference_info + | ps_unmark_node->s_bot_field.u1_reference_info; + } + else + ps_unmark_node->u1_used_as_ref = UNUSED_FOR_REF; + + if(UNUSED_FOR_REF == ps_unmark_node->u1_used_as_ref) + { + if(ps_unmark_node == ps_dpb_mgr->ps_dpb_ht_head) + ps_dpb_mgr->ps_dpb_ht_head = ps_next_dpb->ps_prev_long; + + ps_unmark_node->u1_lt_idx = MAX_REF_BUFS + 1; + ps_unmark_node->s_top_field.u1_reference_info = + UNUSED_FOR_REF; + ps_unmark_node->s_bot_field.u1_reference_info = + UNUSED_FOR_REF; + // Release the physical buffer + ih264d_free_ref_pic_mv_bufs(ps_dpb_mgr->pv_codec_handle, + ps_unmark_node->u1_buf_id); + ps_next_dpb->ps_prev_long = ps_unmark_node->ps_prev_long; //update link + ps_unmark_node->ps_prev_long = NULL; + ps_dpb_mgr->u1_num_lt_ref_bufs--; //decrement LT buf count + } + } + } + return OK; +} + +/*! + ************************************************************************** + * \if Function name : ih264d_insert_lt_node \endif + * + * \brief + * Insert a buffer into the LT linked list at a given LT index + * + * \return + * none + ************************************************************************** + */ +WORD32 ih264d_insert_lt_node(dpb_manager_t *ps_dpb_mgr, + struct dpb_info_t *ps_mov_node, + UWORD32 u4_lt_idx, + UWORD8 u1_fld_pic_flag) +{ + UWORD8 u1_mark_top_field_long_term = 0; + UWORD8 u1_mark_bot_field_long_term = 0; + + { + if(u1_fld_pic_flag) + { + /* Assign corresponding field (top or bottom) long_term_frame_idx */ + + if((ps_mov_node->s_top_field.u1_reference_info == IS_LONG_TERM) + && (ps_mov_node->s_bot_field.u1_reference_info + == IS_LONG_TERM)) + { + if(ps_mov_node->u1_lt_idx == u4_lt_idx) + u1_mark_bot_field_long_term = 1; + else + { + + UWORD32 i4_error_code; + i4_error_code = ERROR_DBP_MANAGER_T; + + return i4_error_code; + + } + } + else if(ps_mov_node->s_top_field.u1_reference_info == IS_LONG_TERM) + { + u1_mark_top_field_long_term = 1; + } + + if(!(u1_mark_top_field_long_term || u1_mark_bot_field_long_term)) + { + UWORD32 i4_error_code; + i4_error_code = ERROR_DBP_MANAGER_T; + return i4_error_code; + } + } + else + { + ps_mov_node->s_top_field.u1_reference_info = IS_LONG_TERM; + ps_mov_node->s_bot_field.u1_reference_info = IS_LONG_TERM; + ps_mov_node->s_top_field.u1_long_term_frame_idx = u4_lt_idx; + ps_mov_node->s_bot_field.u1_long_term_frame_idx = u4_lt_idx; + } + + ps_mov_node->u1_lt_idx = u4_lt_idx; //Assign the LT index to the node + ps_mov_node->ps_pic_buf->u1_long_term_frm_idx = u4_lt_idx; + ps_mov_node->u1_used_as_ref = IS_LONG_TERM; + + /* Insert the new long term in the LT list with u4_lt_idx */ + /* in ascending order. */ + if(ps_dpb_mgr->u1_num_lt_ref_bufs > 0) + { + struct dpb_info_t *ps_next_dpb = ps_dpb_mgr->ps_dpb_ht_head; + if(u4_lt_idx < ps_next_dpb->u1_lt_idx) + { + //LTIndex to be inserted is the smallest LT index + //Update head and point prev to the next higher index + ps_mov_node->ps_prev_long = ps_next_dpb; + ps_dpb_mgr->ps_dpb_ht_head = ps_mov_node; + } + else + { + WORD32 i; + struct dpb_info_t *ps_nxtDPB = ps_next_dpb; + ps_next_dpb = ps_next_dpb->ps_prev_long; + for(i = 1; i < ps_dpb_mgr->u1_num_lt_ref_bufs; i++) + { + if(ps_next_dpb->u1_lt_idx > u4_lt_idx) + break; + ps_nxtDPB = ps_next_dpb; + ps_next_dpb = ps_next_dpb->ps_prev_long; + } + + ps_nxtDPB->ps_prev_long = ps_mov_node; + ps_mov_node->ps_prev_long = ps_next_dpb; + } + } + else + { + ps_dpb_mgr->ps_dpb_ht_head = ps_mov_node; + ps_mov_node->ps_prev_long = NULL; + } + /* Identify the picture buffer as a long term picture buffer */ + ps_mov_node->ps_pic_buf->u1_is_short = 0; + + /* Increment LT buf count only if new LT node inserted */ + /* If Increment during top_field is done, don't increment */ + /* for bottom field, as both them are part of same pic. */ + if(!u1_mark_bot_field_long_term) + ps_dpb_mgr->u1_num_lt_ref_bufs++; + + } + return OK; +} + +/*! + ************************************************************************** + * \if Function name : ih264d_insert_st_node \endif + * + * \brief + * Adds a short term reference picture into the ST linked list + * + * \return + * None + * + * \note + * Called only for a new coded picture with nal_ref_idc!=0 + ************************************************************************** + */ +WORD32 ih264d_insert_st_node(dpb_manager_t *ps_dpb_mgr, + struct pic_buffer_t *ps_pic_buf, + UWORD8 u1_buf_id, + UWORD32 u4_cur_pic_num) +{ + WORD32 i; + struct dpb_info_t *ps_dpb_info = ps_dpb_mgr->as_dpb_info; + UWORD8 u1_picture_type = ps_pic_buf->u1_picturetype; + /* Find an unused dpb location */ + for(i = 0; i < MAX_REF_BUFS; i++) + { + if((ps_dpb_info[i].ps_pic_buf == ps_pic_buf) + && ps_dpb_info[i].u1_used_as_ref) + { + /* Can occur only for field bottom pictures */ + ps_dpb_info[i].s_bot_field.u1_reference_info = IS_SHORT_TERM; + return 0; + } + + if((ps_dpb_info[i].u1_used_as_ref == UNUSED_FOR_REF) + && (ps_dpb_info[i].s_top_field.u1_reference_info + == UNUSED_FOR_REF) + && (ps_dpb_info[i].s_bot_field.u1_reference_info + == UNUSED_FOR_REF)) + break; + } + if(i == MAX_REF_BUFS) + { + UWORD32 i4_error_code; + i4_error_code = ERROR_DBP_MANAGER_T; + return i4_error_code; + } + + /* Create dpb info */ + ps_dpb_info[i].ps_pic_buf = ps_pic_buf; + ps_dpb_info[i].ps_prev_short = ps_dpb_mgr->ps_dpb_st_head; + ps_dpb_info[i].u1_buf_id = u1_buf_id; + ps_dpb_info[i].u1_used_as_ref = TRUE; + ps_dpb_info[i].u1_lt_idx = MAX_REF_BUFS + 1; + ps_dpb_info[i].i4_frame_num = u4_cur_pic_num; + ps_dpb_info[i].ps_pic_buf->i4_frame_num = u4_cur_pic_num; + + /* update the head node of linked list to point to the cur Pic */ + ps_dpb_mgr->ps_dpb_st_head = ps_dpb_info + i; + + // Increment Short term bufCount + ps_dpb_mgr->u1_num_st_ref_bufs++; + /* Identify the picture as a short term picture buffer */ + ps_pic_buf->u1_is_short = IS_SHORT_TERM; + + if((u1_picture_type & 0x03) == FRM_PIC) + { + ps_dpb_info[i].u1_used_as_ref = IS_SHORT_TERM; + ps_dpb_info[i].s_top_field.u1_reference_info = IS_SHORT_TERM; + ps_dpb_info[i].s_bot_field.u1_reference_info = IS_SHORT_TERM; + } + + if((u1_picture_type & 0x03) == TOP_FLD) + ps_dpb_info[i].s_top_field.u1_reference_info = IS_SHORT_TERM; + + if((u1_picture_type & 0x03) == BOT_FLD) + ps_dpb_info[i].s_bot_field.u1_reference_info = IS_SHORT_TERM; + + return OK; +} + +/*! + ************************************************************************** + * \if Function name : ih264d_delete_st_node_or_make_lt \endif + * + * \brief + * Delete short term ref with a given picNum from the ST linked list or + * make it an LT node + * + * \return + * 0 - if successful; -1 - otherwise + * + * \note + * Common parts to MMCO==1 and MMCO==3 have been combined here + ************************************************************************** + */ +WORD32 ih264d_delete_st_node_or_make_lt(dpb_manager_t *ps_dpb_mgr, + WORD32 i4_pic_num, + UWORD32 u4_lt_idx, + UWORD8 u1_fld_pic_flag) +{ + WORD32 i; + struct dpb_info_t *ps_next_dpb; + WORD32 i4_frame_num = i4_pic_num; + struct dpb_info_t *ps_unmark_node = NULL; + UWORD8 u1_del_node = 0, u1_del_st = 0; + UWORD8 u1_reference_type = UNUSED_FOR_REF; + WORD32 ret; + + if(u1_fld_pic_flag) + { + i4_frame_num = i4_frame_num >> 1; + + if(u4_lt_idx == (MAX_REF_BUFS + 1)) + u1_reference_type = UNUSED_FOR_REF; + else + u1_reference_type = IS_LONG_TERM; + } + + //Find the node with matching picNum + ps_next_dpb = ps_dpb_mgr->ps_dpb_st_head; + if((WORD32)ps_next_dpb->i4_frame_num == i4_frame_num) + { + ps_unmark_node = ps_next_dpb; + } + else + { + for(i = 1; i < ps_dpb_mgr->u1_num_st_ref_bufs; i++) + { + if((WORD32)ps_next_dpb->ps_prev_short->i4_frame_num == i4_frame_num) + break; + ps_next_dpb = ps_next_dpb->ps_prev_short; + } + + if(i == ps_dpb_mgr->u1_num_st_ref_bufs) + { + if(ps_dpb_mgr->u1_num_gaps) + { + ret = ih264d_delete_gap_frm_mmco(ps_dpb_mgr, i4_frame_num, &u1_del_st); + if(ret != OK) + return ret; + } + else + { + UWORD32 i4_error_code; + i4_error_code = ERROR_DBP_MANAGER_T; + + return i4_error_code; + } + + if(u1_del_st) + { + UWORD32 i4_error_code; + i4_error_code = ERROR_DBP_MANAGER_T; + return i4_error_code; + } + else + { + return 0; + } + } + else + ps_unmark_node = ps_next_dpb->ps_prev_short; + } + + if(u1_fld_pic_flag) + { + /* Mark the corresponding field ( top or bot) as */ + /* UNUSED_FOR_REF or IS_LONG_TERM depending on */ + /* u1_reference_type. */ + if(ps_unmark_node->s_top_field.i4_pic_num == i4_pic_num) + { + ps_unmark_node->s_top_field.u1_reference_info = u1_reference_type; + ps_unmark_node->s_top_field.u1_long_term_frame_idx = u4_lt_idx; + { + UWORD8 *pu1_src = ps_unmark_node->ps_pic_buf->pu1_col_zero_flag; + WORD32 i4_size = ((ps_dpb_mgr->u2_pic_wd + * ps_dpb_mgr->u2_pic_ht) >> 5); + /* memset the colocated zero u4_flag buffer */ + memset(pu1_src, 0, i4_size); + } + } + + else if(ps_unmark_node->s_bot_field.i4_pic_num == i4_pic_num) + { + + ps_unmark_node->s_bot_field.u1_reference_info = u1_reference_type; + ps_unmark_node->s_bot_field.u1_long_term_frame_idx = u4_lt_idx; + { + UWORD8 *pu1_src = + ps_unmark_node->ps_pic_buf->pu1_col_zero_flag + + ((ps_dpb_mgr->u2_pic_wd + * ps_dpb_mgr->u2_pic_ht) + >> 5); + WORD32 i4_size = ((ps_dpb_mgr->u2_pic_wd + * ps_dpb_mgr->u2_pic_ht) >> 5); + /* memset the colocated zero u4_flag buffer */ + memset(pu1_src, 0, i4_size); + } + } + ps_unmark_node->u1_used_as_ref = + ps_unmark_node->s_top_field.u1_reference_info + | ps_unmark_node->s_bot_field.u1_reference_info; + } + else + { + ps_unmark_node->u1_used_as_ref = UNUSED_FOR_REF; + ps_unmark_node->s_top_field.u1_reference_info = UNUSED_FOR_REF; + ps_unmark_node->s_bot_field.u1_reference_info = UNUSED_FOR_REF; + + { + UWORD8 *pu1_src = ps_unmark_node->ps_pic_buf->pu1_col_zero_flag; + + WORD32 i4_size = ((ps_dpb_mgr->u2_pic_wd + * ps_dpb_mgr->u2_pic_ht) >> 4); + /* memset the colocated zero u4_flag buffer */ + memset(pu1_src, 0, i4_size); + } + } + + if(!(ps_unmark_node->u1_used_as_ref & IS_SHORT_TERM)) + { + if(ps_unmark_node == ps_dpb_mgr->ps_dpb_st_head) + ps_dpb_mgr->ps_dpb_st_head = ps_next_dpb->ps_prev_short; + else + ps_next_dpb->ps_prev_short = ps_unmark_node->ps_prev_short; //update link + ps_dpb_mgr->u1_num_st_ref_bufs--; //decrement ST buf count + u1_del_node = 1; + } + + if(u4_lt_idx == MAX_REF_BUFS + 1) + { + if(u1_del_node) + { + // Release the physical buffer + ih264d_free_ref_pic_mv_bufs(ps_dpb_mgr->pv_codec_handle, + ps_unmark_node->u1_buf_id); + ps_unmark_node->ps_prev_short = NULL; + } + } + else + { + WORD32 i4_status; + //If another node has the same LT index, delete that node + ret = ih264d_delete_lt_node(ps_dpb_mgr, u4_lt_idx, + u1_fld_pic_flag, ps_unmark_node, &i4_status); + if(ret != OK) + return ret; + // Now insert the short term node as a long term node + ret = ih264d_insert_lt_node(ps_dpb_mgr, ps_unmark_node, u4_lt_idx, + u1_fld_pic_flag); + if(ret != OK) + return ret; + } + return OK; +} +/*! + ************************************************************************** + * \if Function name : ih264d_reset_ref_bufs \endif + * + * \brief + * Called if MMCO==5/7 or on the first slice of an IDR picture + * + * \return + * none + ************************************************************************** + */ +void ih264d_reset_ref_bufs(dpb_manager_t *ps_dpb_mgr) +{ + WORD32 i; + struct dpb_info_t *ps_dpb_info = ps_dpb_mgr->as_dpb_info; + + for(i = 0; i < MAX_REF_BUFS; i++) + { + if(ps_dpb_info[i].u1_used_as_ref) + { + ps_dpb_info[i].u1_used_as_ref = UNUSED_FOR_REF; + ps_dpb_info[i].u1_lt_idx = MAX_REF_BUFS + 1; + ps_dpb_info[i].ps_prev_short = NULL; + ps_dpb_info[i].ps_prev_long = NULL; + ps_dpb_info[i].ps_pic_buf = NULL; + ps_dpb_info[i].s_top_field.u1_reference_info = UNUSED_FOR_REF; + ps_dpb_info[i].s_bot_field.u1_reference_info = UNUSED_FOR_REF; + ps_dpb_info[i].s_top_field.u1_long_term_frame_idx = MAX_REF_BUFS + 1; + ps_dpb_info[i].s_bot_field.u1_long_term_frame_idx = MAX_REF_BUFS + 1; + + //Release physical buffer + ih264d_free_ref_pic_mv_bufs(ps_dpb_mgr->pv_codec_handle, + ps_dpb_info[i].u1_buf_id); + } + } + ps_dpb_mgr->u1_num_st_ref_bufs = ps_dpb_mgr->u1_num_lt_ref_bufs = 0; + ps_dpb_mgr->ps_dpb_st_head = NULL; + ps_dpb_mgr->ps_dpb_ht_head = NULL; + + /* release all gaps */ + ps_dpb_mgr->u1_num_gaps = 0; + for(i = 0; i < MAX_FRAMES; i++) + { + ps_dpb_mgr->ai4_gaps_start_frm_num[i] = INVALID_FRAME_NUM; + ps_dpb_mgr->ai4_gaps_end_frm_num[i] = 0; + ps_dpb_mgr->ai1_gaps_per_seq[i] = 0; + } +} + +/*! + ************************************************************************** + * \if Function name : Name \endif + * + * \brief + * create the default index list after an MMCO + * + * \return + * 0 - if no_error; -1 - error + * + ************************************************************************** + */ +WORD32 ih264d_update_default_index_list(dpb_manager_t *ps_dpb_mgr) +{ + WORD32 i; + struct dpb_info_t *ps_next_dpb = ps_dpb_mgr->ps_dpb_st_head; + + for(i = 0; i < ps_dpb_mgr->u1_num_st_ref_bufs; i++) + { + ps_dpb_mgr->ps_def_dpb[i] = ps_next_dpb->ps_pic_buf; + ps_next_dpb = ps_next_dpb->ps_prev_short; + } + + ps_next_dpb = ps_dpb_mgr->ps_dpb_ht_head; + for(;i< ps_dpb_mgr->u1_num_st_ref_bufs + ps_dpb_mgr->u1_num_lt_ref_bufs; i++) + { + ps_dpb_mgr->ps_def_dpb[i] = ps_next_dpb->ps_pic_buf; + ps_next_dpb = ps_next_dpb->ps_prev_long; + } + return 0; +} + +/*! + ************************************************************************** + * \if Function name : ref_idx_reordering \endif + * + * \brief + * Parse the bitstream and reorder indices for the current slice + * + * \return + * 0 - if no_error; -1 - error + * + * \note + * Called only if ref_idx_reordering_flag_l0 is decoded as 1 + * Remove error checking for unmatching picNum or LTIndex later (if not needed) + * \para + * This section implements 7.3.3.1 and 8.2.6.4 + * Uses the default index list as the starting point and + * remaps the picNums sent to the next higher index in the + * modified list. The unmodified ones are copied from the + * default to modified list retaining their order in the default list. + * + ************************************************************************** + */ +WORD32 ih264d_ref_idx_reordering(dec_struct_t *ps_dec, UWORD8 uc_lx) +{ + dpb_manager_t *ps_dpb_mgr = ps_dec->ps_dpb_mgr; + UWORD16 u4_cur_pic_num = ps_dec->ps_cur_slice->u2_frame_num; + /*< Maximum Picture Number Minus 1 */ + UWORD16 ui_max_frame_num = + ps_dec->ps_cur_sps->u2_u4_max_pic_num_minus1 + 1; + + WORD32 i; + UWORD32 ui_remapIdc, ui_nextUev; + WORD16 u2_pred_frame_num = u4_cur_pic_num; + WORD32 i_temp; + UWORD16 u2_def_mod_flag = 0; /* Flag to keep track of which indices have been remapped */ + UWORD8 modCount = 0; + UWORD32 *pu4_bitstrm_buf = ps_dec->ps_bitstrm->pu4_buffer; + UWORD32 *pu4_bitstrm_ofst = &ps_dec->ps_bitstrm->u4_ofst; + dec_slice_params_t *ps_cur_slice = ps_dec->ps_cur_slice; + UWORD8 u1_field_pic_flag = ps_cur_slice->u1_field_pic_flag; + + if(u1_field_pic_flag) + { + u4_cur_pic_num = u4_cur_pic_num * 2 + 1; + ui_max_frame_num = ui_max_frame_num * 2; + } + + u2_pred_frame_num = u4_cur_pic_num; + + ui_remapIdc = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf); + + while(ui_remapIdc != 3) + { + ui_nextUev = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf); + if(ui_remapIdc != 2) + { + ui_nextUev = ui_nextUev + 1; + if(ui_remapIdc == 0) + { + // diffPicNum is -ve + i_temp = u2_pred_frame_num - ui_nextUev; + if(i_temp < 0) + i_temp += ui_max_frame_num; + } + else + { + // diffPicNum is +ve + i_temp = u2_pred_frame_num + ui_nextUev; + if(i_temp >= ui_max_frame_num) + i_temp -= ui_max_frame_num; + } + /* Find the dpb with the matching picNum (picNum==frameNum for framePic) */ + + if(i_temp > u4_cur_pic_num) + i_temp = i_temp - ui_max_frame_num; + + for(i = 0; i < (ps_cur_slice->u1_initial_list_size[uc_lx]); i++) + { + if(ps_dpb_mgr->ps_init_dpb[uc_lx][i]->i4_pic_num == i_temp) + break; + } + if(i == (ps_cur_slice->u1_initial_list_size[uc_lx])) + { + UWORD32 i4_error_code; + i4_error_code = ERROR_DBP_MANAGER_T; + return i4_error_code; + } + + u2_def_mod_flag |= (1 << i); + ps_dpb_mgr->ps_mod_dpb[uc_lx][modCount++] = + ps_dpb_mgr->ps_init_dpb[uc_lx][i]; + u2_pred_frame_num = i_temp; //update predictor to be the picNum just obtained + } + else //2 + { + UWORD8 u1_lt_idx = (UWORD8)ui_nextUev; + + for(i = 0; i < (ps_cur_slice->u1_initial_list_size[uc_lx]); i++) + { + if(!ps_dpb_mgr->ps_init_dpb[uc_lx][i]->u1_is_short) + { + if(ps_dpb_mgr->ps_init_dpb[uc_lx][i]->u1_long_term_pic_num + == u1_lt_idx) + break; + } + } + if(i == (ps_cur_slice->u1_initial_list_size[uc_lx])) + { + UWORD32 i4_error_code; + i4_error_code = ERROR_DBP_MANAGER_T; + return i4_error_code; + } + + u2_def_mod_flag |= (1 << i); + ps_dpb_mgr->ps_mod_dpb[uc_lx][modCount++] = + ps_dpb_mgr->ps_init_dpb[uc_lx][i]; + } + + ui_remapIdc = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf); + /* Get the remapping_idc - 0/1/2/3 */ + } + + //Handle the ref indices that were not remapped + for(i = 0; i < (ps_cur_slice->u1_num_ref_idx_lx_active[uc_lx]); i++) + { + if(!(u2_def_mod_flag & (1 << i))) + ps_dpb_mgr->ps_mod_dpb[uc_lx][modCount++] = + ps_dpb_mgr->ps_init_dpb[uc_lx][i]; + } + return OK; +} +/*! + ************************************************************************** + * \if Function name : ih264d_read_mmco_commands \endif + * + * \brief + * Parses MMCO commands and stores them in a structure for later use. + * + * \return + * 0 - No error; -1 - Error + * + * \note + * This function stores MMCO commands in structure only for the first time. + * In case of MMCO commands being issued for same Picture Number, they are + * just parsed and not stored them in the structure. + * + ************************************************************************** + */ +WORD32 ih264d_read_mmco_commands(struct _DecStruct * ps_dec) +{ + dec_bit_stream_t *ps_bitstrm = ps_dec->ps_bitstrm; + dpb_commands_t *ps_dpb_cmds = ps_dec->ps_dpb_cmds; + dec_slice_params_t * ps_slice = ps_dec->ps_cur_slice; + WORD32 j; + UWORD8 u1_buf_mode; + struct MMCParams *ps_mmc_params; + UWORD32 *pu4_bitstrm_buf = ps_dec->ps_bitstrm->pu4_buffer; + UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst; + UWORD32 u4_bit_ofst = ps_dec->ps_bitstrm->u4_ofst; + + ps_slice->u1_mmco_equalto5 = 0; + { + if(ps_dec->u1_nal_unit_type == IDR_SLICE_NAL) + { + ps_slice->u1_no_output_of_prior_pics_flag = + ih264d_get_bit_h264(ps_bitstrm); + COPYTHECONTEXT("SH: no_output_of_prior_pics_flag", + ps_slice->u1_no_output_of_prior_pics_flag); + ps_slice->u1_long_term_reference_flag = ih264d_get_bit_h264( + ps_bitstrm); + COPYTHECONTEXT("SH: long_term_reference_flag", + ps_slice->u1_long_term_reference_flag); + ps_dpb_cmds->u1_idr_pic = 1; + ps_dpb_cmds->u1_no_output_of_prior_pics_flag = + ps_slice->u1_no_output_of_prior_pics_flag; + ps_dpb_cmds->u1_long_term_reference_flag = + ps_slice->u1_long_term_reference_flag; + } + else + { + u1_buf_mode = ih264d_get_bit_h264(ps_bitstrm); //0 - sliding window; 1 - arbitrary + COPYTHECONTEXT("SH: adaptive_ref_pic_buffering_flag", u1_buf_mode); + ps_dpb_cmds->u1_buf_mode = u1_buf_mode; + j = 0; + + if(u1_buf_mode == 1) + { + UWORD32 u4_mmco; + UWORD32 u4_diff_pic_num; + UWORD32 u4_lt_idx, u4_max_lt_idx; + + u4_mmco = ih264d_uev(pu4_bitstrm_ofst, + pu4_bitstrm_buf); + while(u4_mmco != END_OF_MMCO) + { + ps_mmc_params = &ps_dpb_cmds->as_mmc_params[j]; + ps_mmc_params->u4_mmco = u4_mmco; + switch(u4_mmco) + { + case MARK_ST_PICNUM_AS_NONREF: + u4_diff_pic_num = ih264d_uev(pu4_bitstrm_ofst, + pu4_bitstrm_buf); + //Get absDiffPicnumMinus1 + ps_mmc_params->u4_diff_pic_num = u4_diff_pic_num; + break; + + case MARK_LT_INDEX_AS_NONREF: + u4_lt_idx = ih264d_uev(pu4_bitstrm_ofst, + pu4_bitstrm_buf); + ps_mmc_params->u4_lt_idx = u4_lt_idx; + break; + + case MARK_ST_PICNUM_AS_LT_INDEX: + u4_diff_pic_num = ih264d_uev(pu4_bitstrm_ofst, + pu4_bitstrm_buf); + ps_mmc_params->u4_diff_pic_num = u4_diff_pic_num; + u4_lt_idx = ih264d_uev(pu4_bitstrm_ofst, + pu4_bitstrm_buf); + ps_mmc_params->u4_lt_idx = u4_lt_idx; + break; + + case SET_MAX_LT_INDEX: + { + u4_max_lt_idx = ih264d_uev(pu4_bitstrm_ofst, + pu4_bitstrm_buf); + ps_mmc_params->u4_max_lt_idx_plus1 = u4_max_lt_idx; + break; + } + case RESET_REF_PICTURES: + { + ps_slice->u1_mmco_equalto5 = 1; + break; + } + + case SET_LT_INDEX: + u4_lt_idx = ih264d_uev(pu4_bitstrm_ofst, + pu4_bitstrm_buf); + ps_mmc_params->u4_lt_idx = u4_lt_idx; + break; + + default: + break; + } + u4_mmco = ih264d_uev(pu4_bitstrm_ofst, + pu4_bitstrm_buf); + + j++; + } + ps_dpb_cmds->u1_num_of_commands = j; + + } + } + ps_dpb_cmds->u1_dpb_commands_read = 1; + ps_dpb_cmds->u1_dpb_commands_read_slc = 1; + + } + u4_bit_ofst = ps_dec->ps_bitstrm->u4_ofst - u4_bit_ofst; + return u4_bit_ofst; +} + +/*! + ************************************************************************** + * \if Function name : ih264d_do_mmco_buffer \endif + * + * \brief + * Perform decoded picture buffer memory management control operations + * + * \return + * 0 - No error; -1 - Error + * + * \note + * Bitstream is also parsed here to get the MMCOs + * + ************************************************************************** + */ +WORD32 ih264d_do_mmco_buffer(dpb_commands_t *ps_dpb_cmds, + dpb_manager_t *ps_dpb_mgr, + UWORD8 u1_numRef_frames_for_seq, /*!< num_ref_frames from active SeqParSet*/ + UWORD32 u4_cur_pic_num, + UWORD32 u2_u4_max_pic_num_minus1, + UWORD8 u1_nal_unit_type, + struct pic_buffer_t *ps_pic_buf, + UWORD8 u1_buf_id, + UWORD8 u1_fld_pic_flag, + UWORD8 u1_curr_pic_in_err) +{ + WORD32 i; + UWORD8 u1_buf_mode, u1_marked_lt; + struct dpb_info_t *ps_next_dpb; + UWORD8 u1_num_gaps; + UWORD8 u1_del_node = 1; + UWORD8 u1_insert_st_pic = 1; + WORD32 ret; + UNUSED(u1_nal_unit_type); + UNUSED(u2_u4_max_pic_num_minus1); + u1_buf_mode = ps_dpb_cmds->u1_buf_mode; //0 - sliding window; 1 - Adaptive + u1_marked_lt = 0; + u1_num_gaps = ps_dpb_mgr->u1_num_gaps; + + if(!u1_buf_mode) + { + //Sliding window - implements 8.2.5.3 + if((ps_dpb_mgr->u1_num_st_ref_bufs + + ps_dpb_mgr->u1_num_lt_ref_bufs + u1_num_gaps) + == u1_numRef_frames_for_seq) + { + UWORD8 u1_new_node_flag = 1; + if((0 == ps_dpb_mgr->u1_num_st_ref_bufs) && (0 == u1_num_gaps)) + { + UWORD32 i4_error_code; + i4_error_code = ERROR_DBP_MANAGER_T; + return i4_error_code; + } + + // Chase the links to reach the last but one picNum, if available + ps_next_dpb = ps_dpb_mgr->ps_dpb_st_head; + + if(ps_dpb_mgr->u1_num_st_ref_bufs > 1) + { + if(ps_next_dpb->i4_frame_num == (WORD32)u4_cur_pic_num) + { + /* Incase of filed pictures top_field has been allocated */ + /* picture buffer and complementary bottom field pair comes */ + /* then the sliding window mechanism should not allocate a */ + /* new node */ + u1_new_node_flag = 0; + } + + for(i = 1; i < (ps_dpb_mgr->u1_num_st_ref_bufs - 1); i++) + { + if(ps_next_dpb == NULL) + { + UWORD32 i4_error_code; + i4_error_code = ERROR_DBP_MANAGER_T; + return i4_error_code; + } + if(ps_next_dpb->i4_frame_num == (WORD32)u4_cur_pic_num) + { + /* Incase of field pictures top_field has been allocated */ + /* picture buffer and complementary bottom field pair comes */ + /* then the sliding window mechanism should not allocate a */ + /* new node */ + u1_new_node_flag = 0; + } + ps_next_dpb = ps_next_dpb->ps_prev_short; + } + + if(ps_next_dpb->ps_prev_short->ps_prev_short != NULL) + { + UWORD32 i4_error_code; + i4_error_code = ERROR_DBP_MANAGER_T; + return i4_error_code; + } + + if(u1_new_node_flag) + { + if(u1_num_gaps) + { + ret = ih264d_delete_gap_frm_sliding(ps_dpb_mgr, + ps_next_dpb->ps_prev_short->i4_frame_num, + &u1_del_node); + if(ret != OK) + return ret; + } + + if(u1_del_node) + { + ps_dpb_mgr->u1_num_st_ref_bufs--; + ps_next_dpb->ps_prev_short->u1_used_as_ref = + UNUSED_FOR_REF; + ps_next_dpb->ps_prev_short->s_top_field.u1_reference_info = + UNUSED_FOR_REF; + ps_next_dpb->ps_prev_short->s_bot_field.u1_reference_info = + UNUSED_FOR_REF; + ih264d_free_ref_pic_mv_bufs(ps_dpb_mgr->pv_codec_handle, + ps_next_dpb->ps_prev_short->u1_buf_id); + ps_next_dpb->ps_prev_short->ps_pic_buf = NULL; + ps_next_dpb->ps_prev_short = NULL; + } + } + } + else + { + if(ps_dpb_mgr->u1_num_st_ref_bufs) + { + ret = ih264d_delete_gap_frm_sliding(ps_dpb_mgr, + ps_next_dpb->i4_frame_num, + &u1_del_node); + if(ret != OK) + return ret; + if((ps_next_dpb->i4_frame_num != (WORD32)u4_cur_pic_num) + && u1_del_node) + { + ps_dpb_mgr->u1_num_st_ref_bufs--; + ps_next_dpb->u1_used_as_ref = FALSE; + ps_next_dpb->s_top_field.u1_reference_info = + UNUSED_FOR_REF; + ps_next_dpb->s_bot_field.u1_reference_info = + UNUSED_FOR_REF; + ih264d_free_ref_pic_mv_bufs(ps_dpb_mgr->pv_codec_handle, + ps_next_dpb->u1_buf_id); + ps_next_dpb->ps_pic_buf = NULL; + ps_next_dpb->ps_prev_short = NULL; + ps_dpb_mgr->ps_dpb_st_head = NULL; + ps_next_dpb = NULL; + } + else if(ps_next_dpb->i4_frame_num == (WORD32)u4_cur_pic_num) + { + if(u1_curr_pic_in_err) + { + u1_insert_st_pic = 0; + } + else if(ps_dpb_mgr->u1_num_st_ref_bufs > 0) + { + ps_dpb_mgr->u1_num_st_ref_bufs--; + ps_next_dpb->u1_used_as_ref = FALSE; + ps_next_dpb->s_top_field.u1_reference_info = + UNUSED_FOR_REF; + ps_next_dpb->s_bot_field.u1_reference_info = + UNUSED_FOR_REF; + ih264d_free_ref_pic_mv_bufs(ps_dpb_mgr->pv_codec_handle, + ps_next_dpb->u1_buf_id); + ps_next_dpb->ps_pic_buf = NULL; + ps_next_dpb = NULL; + } + } + } + else + { + ret = ih264d_delete_gap_frm_sliding(ps_dpb_mgr, + INVALID_FRAME_NUM, + &u1_del_node); + if(ret != OK) + return ret; + if(u1_del_node) + { + UWORD32 i4_error_code; + i4_error_code = ERROR_DBP_MANAGER_T; + return i4_error_code; + } + } + } + } + } + else + { + //Adaptive memory control - implements 8.2.5.4 + UWORD32 u4_mmco; + UWORD32 u4_diff_pic_num; + WORD32 i4_pic_num; + UWORD32 u4_lt_idx; + WORD32 j; + struct MMCParams *ps_mmc_params; + + for(j = 0; j < ps_dpb_cmds->u1_num_of_commands; j++) + { + ps_mmc_params = &ps_dpb_cmds->as_mmc_params[j]; + u4_mmco = ps_mmc_params->u4_mmco; //Get MMCO + + switch(u4_mmco) + { + case MARK_ST_PICNUM_AS_NONREF: + { + + { + UWORD32 i4_cur_pic_num = u4_cur_pic_num; + u4_diff_pic_num = ps_mmc_params->u4_diff_pic_num; //Get absDiffPicnumMinus1 + if(u1_fld_pic_flag) + i4_cur_pic_num = i4_cur_pic_num * 2 + 1; + i4_pic_num = i4_cur_pic_num - (u4_diff_pic_num + 1); + } + + if(ps_dpb_mgr->u1_num_st_ref_bufs > 0) + { + ret = ih264d_delete_st_node_or_make_lt(ps_dpb_mgr, + i4_pic_num, + MAX_REF_BUFS + 1, + u1_fld_pic_flag); + if(ret != OK) + return ret; + } + else + { + UWORD8 u1_dummy; + ret = ih264d_delete_gap_frm_mmco(ps_dpb_mgr, i4_pic_num, &u1_dummy); + if(ret != OK) + return ret; + } + break; + } + case MARK_LT_INDEX_AS_NONREF: + { + WORD32 i4_status; + u4_lt_idx = ps_mmc_params->u4_lt_idx; //Get long term index + ret = ih264d_delete_lt_node(ps_dpb_mgr, + u4_lt_idx, + u1_fld_pic_flag, + 0, &i4_status); + if(ret != OK) + return ret; + if(i4_status) + { + UWORD32 i4_error_code; + i4_error_code = ERROR_DBP_MANAGER_T; + return i4_error_code; + } + break; + } + + case MARK_ST_PICNUM_AS_LT_INDEX: + { + { + UWORD32 i4_cur_pic_num = u4_cur_pic_num; + u4_diff_pic_num = ps_mmc_params->u4_diff_pic_num; //Get absDiffPicnumMinus1 + if(u1_fld_pic_flag) + i4_cur_pic_num = i4_cur_pic_num * 2 + 1; + + i4_pic_num = i4_cur_pic_num - (u4_diff_pic_num + 1); + } + + u4_lt_idx = ps_mmc_params->u4_lt_idx; //Get long term index + if(ps_dpb_mgr->u1_num_st_ref_bufs > 0) + { + ret = ih264d_delete_st_node_or_make_lt(ps_dpb_mgr, + i4_pic_num, u4_lt_idx, + u1_fld_pic_flag); + if(ret != OK) + return ret; + } + break; + } + case SET_MAX_LT_INDEX: + { + UWORD8 uc_numLT = ps_dpb_mgr->u1_num_lt_ref_bufs; + u4_lt_idx = ps_mmc_params->u4_max_lt_idx_plus1; //Get Max_long_term_index_plus1 + if(u4_lt_idx < ps_dpb_mgr->u1_max_lt_pic_idx_plus1 + && uc_numLT > 0) + { + struct dpb_info_t *ps_nxtDPB; + //Set all LT buffers with index >= u4_lt_idx to nonreference + ps_nxtDPB = ps_dpb_mgr->ps_dpb_ht_head; + ps_next_dpb = ps_nxtDPB->ps_prev_long; + if(ps_nxtDPB->u1_lt_idx >= u4_lt_idx) + { + i = 0; + ps_dpb_mgr->ps_dpb_ht_head = NULL; + } + else + { + for(i = 1; i < uc_numLT; i++) + { + if(ps_next_dpb->u1_lt_idx >= u4_lt_idx) + break; + ps_nxtDPB = ps_next_dpb; + ps_next_dpb = ps_next_dpb->ps_prev_long; + } + ps_nxtDPB->ps_prev_long = NULL; //Terminate the link of the closest LTIndex that is <=Max + } + ps_dpb_mgr->u1_num_lt_ref_bufs = i; + if(i == 0) + ps_next_dpb = ps_nxtDPB; + + for(; i < uc_numLT; i++) + { + ps_nxtDPB = ps_next_dpb; + ps_nxtDPB->u1_lt_idx = MAX_REF_BUFS + 1; + ps_nxtDPB->u1_used_as_ref = UNUSED_FOR_REF; + ps_nxtDPB->s_top_field.u1_reference_info = + UNUSED_FOR_REF; + ps_nxtDPB->s_bot_field.u1_reference_info = + UNUSED_FOR_REF; + + ps_nxtDPB->ps_pic_buf = NULL; + //Release buffer + ih264d_free_ref_pic_mv_bufs(ps_dpb_mgr->pv_codec_handle, + ps_nxtDPB->u1_buf_id); + ps_next_dpb = ps_nxtDPB->ps_prev_long; + ps_nxtDPB->ps_prev_long = NULL; + } + } + ps_dpb_mgr->u1_max_lt_pic_idx_plus1 = u4_lt_idx; + + break; + } + case SET_LT_INDEX: + { + u4_lt_idx = ps_mmc_params->u4_lt_idx; //Get long term index + ret = ih264d_insert_st_node(ps_dpb_mgr, ps_pic_buf, u1_buf_id, + u4_cur_pic_num); + if(ret != OK) + return ret; + ret = ih264d_delete_st_node_or_make_lt(ps_dpb_mgr, + u4_cur_pic_num, u4_lt_idx, + u1_fld_pic_flag); + if(ret != OK) + return ret; + u1_marked_lt = 1; + break; + } + + default: + break; + } + if(u4_mmco == RESET_REF_PICTURES || u4_mmco == RESET_ALL_PICTURES) + { + ih264d_reset_ref_bufs(ps_dpb_mgr); + u4_cur_pic_num = 0; + } + } + } + if(!u1_marked_lt && u1_insert_st_pic) + { + ret = ih264d_insert_st_node(ps_dpb_mgr, ps_pic_buf, u1_buf_id, + u4_cur_pic_num); + if(ret != OK) + return ret; + } + return OK; +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_release_pics_in_dpb */ +/* */ +/* Description : This function deletes all pictures from DPB */ +/* */ +/* Inputs : h_pic_buf_api: pointer to picture buffer API */ +/* u1_disp_bufs: number pictures ready for display */ +/* */ +/* Globals : None */ +/* Outputs : None */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 22 06 2005 NS Draft */ +/* */ +/*****************************************************************************/ +void ih264d_release_pics_in_dpb(void *pv_dec, + UWORD8 u1_disp_bufs) +{ + WORD8 i; + dec_struct_t *ps_dec = (dec_struct_t *)pv_dec; + + for(i = 0; i < u1_disp_bufs; i++) + { + ih264_buf_mgr_release((buf_mgr_t *)ps_dec->pv_pic_buf_mgr, + i, + BUF_MGR_REF); + ih264_buf_mgr_release((buf_mgr_t *)ps_dec->pv_mv_buf_mgr, + ps_dec->au1_pic_buf_id_mv_buf_id_map[i], + BUF_MGR_REF); + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_delete_gap_frm_sliding */ +/* */ +/* Description : This function deletes a picture from the list of gaps, */ +/* if the frame number of gap frame is lesser than the one */ +/* to be deleted by sliding window */ +/* Inputs : ps_dpb_mgr: pointer to dpb manager */ +/* i4_frame_num: frame number of picture that's going to */ +/* be deleted by sliding window */ +/* pu1_del_node: holds 0 if a gap is deleted else 1 */ +/* Globals : None */ +/* Processing : Function searches for frame number lesser than */ +/* i4_frame_num in the gaps list */ +/* Outputs : None */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 22 06 2005 NS Draft */ +/* */ +/*****************************************************************************/ +WORD32 ih264d_delete_gap_frm_sliding(dpb_manager_t *ps_dpb_mgr, + WORD32 i4_frame_num, + UWORD8 *pu1_del_node) +{ + WORD8 i1_gap_idx, i, j, j_min; + WORD32 *pi4_gaps_start_frm_num, *pi4_gaps_end_frm_num, i4_gap_frame_num; + WORD32 i4_start_frm_num, i4_end_frm_num; + WORD32 i4_max_frm_num; + WORD32 i4_frm_num, i4_gap_frm_num_min; + + /* find the least frame num from gaps and current DPB node */ + /* Delete the least one */ + *pu1_del_node = 1; + if(0 == ps_dpb_mgr->u1_num_gaps) + return OK; + pi4_gaps_start_frm_num = ps_dpb_mgr->ai4_gaps_start_frm_num; + pi4_gaps_end_frm_num = ps_dpb_mgr->ai4_gaps_end_frm_num; + i4_gap_frame_num = INVALID_FRAME_NUM; + i4_max_frm_num = ps_dpb_mgr->i4_max_frm_num; + + i1_gap_idx = -1; + if(INVALID_FRAME_NUM != i4_frame_num) + { + i4_gap_frame_num = i4_frame_num; + for(i = 0; i < MAX_FRAMES; i++) + { + i4_start_frm_num = pi4_gaps_start_frm_num[i]; + if(INVALID_FRAME_NUM != i4_start_frm_num) + { + i4_end_frm_num = pi4_gaps_end_frm_num[i]; + if(i4_end_frm_num < i4_max_frm_num) + { + if(i4_start_frm_num <= i4_gap_frame_num) + { + i4_gap_frame_num = i4_start_frm_num; + i1_gap_idx = i; + } + } + else + { + if(((i4_start_frm_num <= i4_gap_frame_num) + && (i4_gap_frame_num <= i4_max_frm_num)) + || ((i4_start_frm_num >= i4_gap_frame_num) + && ((i4_gap_frame_num + + i4_max_frm_num) + >= i4_end_frm_num))) + { + i4_gap_frame_num = i4_start_frm_num; + i1_gap_idx = i; + } + } + } + } + } + else + { + /* no valid short term buffers, delete one gap from the least start */ + /* of gap sequence */ + i4_gap_frame_num = pi4_gaps_start_frm_num[0]; + i1_gap_idx = 0; + for(i = 1; i < MAX_FRAMES; i++) + { + if(INVALID_FRAME_NUM != pi4_gaps_start_frm_num[i]) + { + if(pi4_gaps_start_frm_num[i] < i4_gap_frame_num) + { + i4_gap_frame_num = pi4_gaps_start_frm_num[i]; + i1_gap_idx = i; + } + } + } + if(INVALID_FRAME_NUM == i4_gap_frame_num) + { + UWORD32 i4_error_code; + i4_error_code = ERROR_DBP_MANAGER_T; + return i4_error_code; + } + } + + if(-1 != i1_gap_idx) + { + /* find least frame_num in the poc_map, which is in this range */ + i4_start_frm_num = pi4_gaps_start_frm_num[i1_gap_idx]; + if(i4_start_frm_num < 0) + i4_start_frm_num += i4_max_frm_num; + i4_end_frm_num = pi4_gaps_end_frm_num[i1_gap_idx]; + if(i4_end_frm_num < 0) + i4_end_frm_num += i4_max_frm_num; + + i4_gap_frm_num_min = 0xfffffff; + j_min = MAX_FRAMES; + for(j = 0; j < MAX_FRAMES; j++) + { + i4_frm_num = ps_dpb_mgr->ai4_poc_buf_id_map[j][2]; + if((i4_start_frm_num <= i4_frm_num) + && (i4_end_frm_num >= i4_frm_num)) + { + if(i4_frm_num < i4_gap_frm_num_min) + { + j_min = j; + i4_gap_frm_num_min = i4_frm_num; + } + } + } + + if(j_min != MAX_FRAMES) + { + + ps_dpb_mgr->ai4_poc_buf_id_map[j_min][0] = -1; + ps_dpb_mgr->ai4_poc_buf_id_map[j_min][1] = 0x7fffffff; + ps_dpb_mgr->ai4_poc_buf_id_map[j_min][2] = GAP_FRAME_NUM; + ps_dpb_mgr->i1_gaps_deleted++; + + ps_dpb_mgr->ai1_gaps_per_seq[i1_gap_idx]--; + ps_dpb_mgr->u1_num_gaps--; + *pu1_del_node = 0; + if(0 == ps_dpb_mgr->ai1_gaps_per_seq[i1_gap_idx]) + { + ps_dpb_mgr->ai4_gaps_start_frm_num[i1_gap_idx] = + INVALID_FRAME_NUM; + ps_dpb_mgr->ai4_gaps_end_frm_num[i1_gap_idx] = 0; + } + } + } + + return OK; +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_delete_gap_frm_mmco */ +/* */ +/* Description : This function deletes a picture from the list of gaps, */ +/* if the frame number (specified by mmco commands) to be */ +/* deleted is in the range by gap sequence. */ +/* */ +/* Inputs : ps_dpb_mgr: pointer to dpb manager */ +/* i4_frame_num: frame number of picture that's going to */ +/* be deleted by mmco */ +/* pu1_del_node: holds 0 if a gap is deleted else 1 */ +/* Globals : None */ +/* Processing : Function searches for frame number lesser in the range */ +/* specified by gap sequence */ +/* Outputs : None */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 22 06 2005 NS Draft */ +/* */ +/*****************************************************************************/ +WORD32 ih264d_delete_gap_frm_mmco(dpb_manager_t *ps_dpb_mgr, + WORD32 i4_frame_num, + UWORD8 *pu1_del_node) +{ + WORD8 i, j; + WORD32 *pi4_start, *pi4_end; + WORD32 i4_start_frm_num, i4_end_frm_num, i4_max_frm_num; + + /* find the least frame num from gaps and current DPB node */ + /* Delete the gaps */ + *pu1_del_node = 1; + pi4_start = ps_dpb_mgr->ai4_gaps_start_frm_num; + pi4_end = ps_dpb_mgr->ai4_gaps_end_frm_num; + i4_max_frm_num = ps_dpb_mgr->i4_max_frm_num; + + if(0 == ps_dpb_mgr->u1_num_gaps) + return OK; + + if(i4_frame_num < 0) + i4_frame_num += i4_max_frm_num; + for(i = 0; i < MAX_FRAMES; i++) + { + i4_start_frm_num = pi4_start[i]; + if(i4_start_frm_num < 0) + i4_start_frm_num += i4_max_frm_num; + if(INVALID_FRAME_NUM != i4_start_frm_num) + { + i4_end_frm_num = pi4_end[i]; + if(i4_end_frm_num < 0) + i4_end_frm_num += i4_max_frm_num; + + if((i4_frame_num >= i4_start_frm_num) + && (i4_frame_num <= i4_end_frm_num)) + { + break; + } + else + { + if(((i4_frame_num + i4_max_frm_num) >= i4_start_frm_num) + && ((i4_frame_num + i4_max_frm_num) + <= i4_end_frm_num)) + { + UWORD32 i4_error_code; + i4_error_code = ERROR_DBP_MANAGER_T; + return i4_error_code; + } + } + } + } + + /* find frame_num index, in the poc_map which needs to be deleted */ + for(j = 0; j < MAX_FRAMES; j++) + { + if(i4_frame_num == ps_dpb_mgr->ai4_poc_buf_id_map[j][2]) + break; + } + + if(MAX_FRAMES != i) + { + if(j == MAX_FRAMES) + { + UWORD32 i4_error_code; + i4_error_code = ERROR_DBP_MANAGER_T; + return i4_error_code; + } + + ps_dpb_mgr->ai4_poc_buf_id_map[j][0] = -1; + ps_dpb_mgr->ai4_poc_buf_id_map[j][1] = 0x7fffffff; + ps_dpb_mgr->ai4_poc_buf_id_map[j][2] = GAP_FRAME_NUM; + ps_dpb_mgr->i1_gaps_deleted++; + + ps_dpb_mgr->ai1_gaps_per_seq[i]--; + ps_dpb_mgr->u1_num_gaps--; + *pu1_del_node = 0; + if(0 == ps_dpb_mgr->ai1_gaps_per_seq[i]) + { + ps_dpb_mgr->ai4_gaps_start_frm_num[i] = INVALID_FRAME_NUM; + ps_dpb_mgr->ai4_gaps_end_frm_num[i] = 0; + } + } + else + { + UWORD32 i4_error_code; + i4_error_code = ERROR_DBP_MANAGER_T; + return i4_error_code; + } + + return OK; +} + +/*! + ************************************************************************** + * \if Function name : ih264d_do_mmco_for_gaps \endif + * + * \brief + * Perform decoded picture buffer memory management control operations + * + * \return + * 0 - No error; -1 - Error + * + * \note + * Bitstream is also parsed here to get the MMCOs + * + ************************************************************************** + */ +WORD32 ih264d_do_mmco_for_gaps(dpb_manager_t *ps_dpb_mgr, + UWORD8 u1_num_ref_frames /*!< num_ref_frames from active SeqParSet*/ + ) +{ + struct dpb_info_t *ps_next_dpb; + UWORD8 u1_num_gaps; + UWORD8 u1_st_ref_bufs, u1_lt_ref_bufs, u1_del_node; + WORD8 i; + WORD32 i4_frame_gaps = 1; + WORD32 ret; + + //Sliding window - implements 8.2.5.3, flush out buffers + u1_st_ref_bufs = ps_dpb_mgr->u1_num_st_ref_bufs; + u1_lt_ref_bufs = ps_dpb_mgr->u1_num_lt_ref_bufs; + + while(1) + { + u1_num_gaps = ps_dpb_mgr->u1_num_gaps; + if((u1_st_ref_bufs + u1_lt_ref_bufs + u1_num_gaps + i4_frame_gaps) + > u1_num_ref_frames) + { + if(0 == (u1_st_ref_bufs + u1_num_gaps)) + { + i4_frame_gaps = 0; + ps_dpb_mgr->u1_num_gaps = (u1_num_ref_frames + - u1_lt_ref_bufs); + } + else + { + u1_del_node = 1; + ps_next_dpb = ps_dpb_mgr->ps_dpb_st_head; + + if(u1_st_ref_bufs > 1) + { + for(i = 1; i < (u1_st_ref_bufs - 1); i++) + { + if(ps_next_dpb == NULL) + { + UWORD32 i4_error_code; + i4_error_code = ERROR_DBP_MANAGER_T; + return i4_error_code; + } + ps_next_dpb = ps_next_dpb->ps_prev_short; + } + + if(ps_next_dpb->ps_prev_short->ps_prev_short != NULL) + { + return ERROR_DBP_MANAGER_T; + } + + if(u1_num_gaps) + { + ret = ih264d_delete_gap_frm_sliding(ps_dpb_mgr, + ps_next_dpb->ps_prev_short->i4_frame_num, + &u1_del_node); + if(ret != OK) + return ret; + } + + if(u1_del_node) + { + u1_st_ref_bufs--; + ps_next_dpb->ps_prev_short->u1_used_as_ref = + UNUSED_FOR_REF; + ps_next_dpb->ps_prev_short->s_top_field.u1_reference_info = + UNUSED_FOR_REF; + ps_next_dpb->ps_prev_short->s_bot_field.u1_reference_info = + UNUSED_FOR_REF; + ih264d_free_ref_pic_mv_bufs(ps_dpb_mgr->pv_codec_handle, + ps_next_dpb->ps_prev_short->u1_buf_id); + ps_next_dpb->ps_prev_short->ps_pic_buf = NULL; + ps_next_dpb->ps_prev_short = NULL; + } + } + else + { + if(u1_st_ref_bufs) + { + if(u1_num_gaps) + { + ret = ih264d_delete_gap_frm_sliding(ps_dpb_mgr, + ps_next_dpb->i4_frame_num, + &u1_del_node); + if(ret != OK) + return ret; + } + + if(u1_del_node) + { + u1_st_ref_bufs--; + ps_next_dpb->u1_used_as_ref = FALSE; + ps_next_dpb->s_top_field.u1_reference_info = + UNUSED_FOR_REF; + ps_next_dpb->s_bot_field.u1_reference_info = + UNUSED_FOR_REF; + ih264d_free_ref_pic_mv_bufs(ps_dpb_mgr->pv_codec_handle, + ps_next_dpb->u1_buf_id); + ps_next_dpb->ps_pic_buf = NULL; + ps_next_dpb = NULL; + ps_dpb_mgr->ps_dpb_st_head = NULL; + ps_dpb_mgr->u1_num_st_ref_bufs = u1_st_ref_bufs; + } + } + else + { + ret = ih264d_delete_gap_frm_sliding(ps_dpb_mgr, + INVALID_FRAME_NUM, + &u1_del_node); + if(ret != OK) + return ret; + if(u1_del_node) + { + return ERROR_DBP_MANAGER_T; + } + } + } + } + } + else + { + ps_dpb_mgr->u1_num_gaps += i4_frame_gaps; + break; + } + } + + ps_dpb_mgr->u1_num_st_ref_bufs = u1_st_ref_bufs; + + return OK; +} +/****************************************************************************/ +/* */ +/* Function Name : ih264d_free_node_from_dpb */ +/* */ +/* Description : */ +/* */ +/* Inputs : */ +/* */ +/* Globals : */ +/* */ +/* Processing : */ +/* */ +/* Outputs : */ +/* */ +/* Returns : */ +/* */ +/* Known Issues : */ +/* */ +/* Revision History */ +/* */ +/* DD MM YY Author Changes */ +/* Sarat */ +/****************************************************************************/ +/**** Function Added for Error Resilience *****/ +WORD32 ih264d_free_node_from_dpb(dpb_manager_t *ps_dpb_mgr, + UWORD32 u4_cur_pic_num, + UWORD8 u1_numRef_frames_for_seq) +{ + WORD32 i; + UWORD8 u1_num_gaps = ps_dpb_mgr->u1_num_gaps; + struct dpb_info_t *ps_next_dpb; + UWORD8 u1_del_node = 1; + WORD32 ret; + + //Sliding window - implements 8.2.5.3 + if((ps_dpb_mgr->u1_num_st_ref_bufs + ps_dpb_mgr->u1_num_lt_ref_bufs + + u1_num_gaps) == u1_numRef_frames_for_seq) + { + UWORD8 u1_new_node_flag = 1; + if((0 == ps_dpb_mgr->u1_num_st_ref_bufs) && (0 == u1_num_gaps)) + { + return ERROR_DBP_MANAGER_T; + } + + // Chase the links to reach the last but one picNum, if available + ps_next_dpb = ps_dpb_mgr->ps_dpb_st_head; + + if(ps_dpb_mgr->u1_num_st_ref_bufs > 1) + { + if(ps_next_dpb->i4_frame_num == (WORD32)u4_cur_pic_num) + { + /* Incase of filed pictures top_field has been allocated */ + /* picture buffer and complementary bottom field pair comes */ + /* then the sliding window mechanism should not allocate a */ + /* new node */ + u1_new_node_flag = 0; + } + + for(i = 1; i < (ps_dpb_mgr->u1_num_st_ref_bufs - 1); i++) + { + if(ps_next_dpb == NULL) + return ERROR_DBP_MANAGER_T; + + if(ps_next_dpb->i4_frame_num == (WORD32)u4_cur_pic_num) + { + /* Incase of field pictures top_field has been allocated */ + /* picture buffer and complementary bottom field pair comes */ + /* then the sliding window mechanism should not allocate a */ + /* new node */ + u1_new_node_flag = 0; + } + ps_next_dpb = ps_next_dpb->ps_prev_short; + } + + if(ps_next_dpb->ps_prev_short->ps_prev_short != NULL) + return ERROR_DBP_MANAGER_T; + + if(u1_new_node_flag) + { + if(u1_num_gaps) + { + ret = ih264d_delete_gap_frm_sliding(ps_dpb_mgr, + ps_next_dpb->ps_prev_short->i4_frame_num, + &u1_del_node); + if(ret != OK) + return ret; + } + + if(u1_del_node) + { + ps_dpb_mgr->u1_num_st_ref_bufs--; + ps_next_dpb->ps_prev_short->u1_used_as_ref = UNUSED_FOR_REF; + ps_next_dpb->ps_prev_short->s_top_field.u1_reference_info = + UNUSED_FOR_REF; + ps_next_dpb->ps_prev_short->s_bot_field.u1_reference_info = + UNUSED_FOR_REF; + ih264d_free_ref_pic_mv_bufs(ps_dpb_mgr->pv_codec_handle, + ps_next_dpb->ps_prev_short->u1_buf_id); + ps_next_dpb->ps_prev_short->ps_pic_buf = NULL; + ps_next_dpb->ps_prev_short = NULL; + } + } + } + else + { + if(ps_dpb_mgr->u1_num_st_ref_bufs) + { + ret = ih264d_delete_gap_frm_sliding(ps_dpb_mgr, + ps_next_dpb->i4_frame_num, + &u1_del_node); + if(ret != OK) + return ret; + if((ps_next_dpb->i4_frame_num != (WORD32)u4_cur_pic_num) + && u1_del_node) + { + ps_dpb_mgr->u1_num_st_ref_bufs--; + ps_next_dpb->u1_used_as_ref = FALSE; + ps_next_dpb->s_top_field.u1_reference_info = UNUSED_FOR_REF; + ps_next_dpb->s_bot_field.u1_reference_info = UNUSED_FOR_REF; + ih264d_free_ref_pic_mv_bufs(ps_dpb_mgr->pv_codec_handle, + ps_next_dpb->u1_buf_id); + ps_next_dpb->ps_pic_buf = NULL; + ps_next_dpb = NULL; + } + } + else + { + ret = ih264d_delete_gap_frm_sliding(ps_dpb_mgr, INVALID_FRAME_NUM, &u1_del_node); + if(ret != OK) + return ret; + if(u1_del_node) + return ERROR_DBP_MANAGER_T; + } + } + } + return OK; +} +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_delete_nonref_nondisplay_pics */ +/* */ +/* Description : */ +/* */ +/* */ +/* Inputs : */ +/* Globals : */ +/* Processing : */ +/* */ +/* Outputs : */ +/* Returns : */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 05 06 2007 Varun Draft */ +/* */ +/*****************************************************************************/ + +void ih264d_delete_nonref_nondisplay_pics(dpb_manager_t *ps_dpb_mgr) +{ + WORD8 i; + WORD32 (*i4_poc_buf_id_map)[3] = ps_dpb_mgr->ai4_poc_buf_id_map; + + /* remove all gaps marked as unused for ref */ + for(i = 0; (i < MAX_FRAMES) && ps_dpb_mgr->i1_gaps_deleted; i++) + { + if(GAP_FRAME_NUM == i4_poc_buf_id_map[i][2]) + { + ps_dpb_mgr->i1_gaps_deleted--; + ps_dpb_mgr->i1_poc_buf_id_entries--; + i4_poc_buf_id_map[i][0] = -1; + i4_poc_buf_id_map[i][1] = 0x7fffffff; + i4_poc_buf_id_map[i][2] = 0; + } + } +} +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_insert_pic_in_display_list */ +/* */ +/* Description : */ +/* */ +/* */ +/* Inputs : */ +/* Globals : */ +/* Processing : */ +/* */ +/* Outputs : */ +/* Returns : */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 05 06 2007 Varun Draft */ +/* */ +/*****************************************************************************/ + +WORD32 ih264d_insert_pic_in_display_list(dpb_manager_t *ps_dpb_mgr, + UWORD8 u1_buf_id, + WORD32 i4_display_poc, + UWORD32 u4_frame_num) +{ + WORD8 i; + WORD32 (*i4_poc_buf_id_map)[3] = ps_dpb_mgr->ai4_poc_buf_id_map; + + for(i = 0; i < MAX_FRAMES; i++) + { + /* Find an empty slot */ + if(i4_poc_buf_id_map[i][0] == -1) + { + if(GAP_FRAME_NUM == i4_poc_buf_id_map[i][2]) + ps_dpb_mgr->i1_gaps_deleted--; + else + ps_dpb_mgr->i1_poc_buf_id_entries++; + + i4_poc_buf_id_map[i][0] = u1_buf_id; + i4_poc_buf_id_map[i][1] = i4_display_poc; + i4_poc_buf_id_map[i][2] = u4_frame_num; + + break; + } + } + + if(MAX_FRAMES == i) + { + + UWORD32 i4_error_code; + i4_error_code = ERROR_GAPS_IN_FRM_NUM; + return i4_error_code; + } + return OK; +} + diff --git a/decoder/ih264d_error_handler.h b/decoder/ih264d_error_handler.h new file mode 100755 index 0000000..20c0f89 --- /dev/null +++ b/decoder/ih264d_error_handler.h @@ -0,0 +1,115 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +#ifndef _IH264D_ERROR_HANDLER_H_ +#define _IH264D_ERROR_HANDLER_H_ + +/*! + ************************************************************************* + * \file ih264d_error_handler.h + * + * \brief + * Contains declaration of ih264d_global_error_handler function + * + * \date + * 21/11/2002 + * + * \author AI + ************************************************************************* + */ + +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" + +typedef enum +{ + + ERROR_MEM_ALLOC_ISRAM_T = 0x50, + ERROR_MEM_ALLOC_SDRAM_T = 0x51, + ERROR_BUF_MGR = 0x52, + ERROR_DBP_MANAGER_T = 0x53, + ERROR_GAPS_IN_FRM_NUM = 0x54, + ERROR_UNKNOWN_NAL = 0x55, + ERROR_INV_MB_SLC_GRP_T = 0x56, + ERROR_MULTIPLE_SLC_GRP_T = 0x57, + ERROR_UNKNOWN_LEVEL = 0x58, + ERROR_FEATURE_UNAVAIL = 0x59, + ERROR_NOT_SUPP_RESOLUTION = 0x5A, + ERROR_INVALID_PIC_PARAM = 0x5B, + ERROR_INVALID_SEQ_PARAM = 0x5C, + ERROR_EGC_EXCEED_32_1_T = 0x5D, + ERROR_EGC_EXCEED_32_2_T = 0x5E, + ERROR_INV_RANGE_TEV_T = 0x5F, + ERROR_INV_SLC_TYPE_T = 0x60, + ERROR_UNAVAIL_PICBUF_T = 0x61, + ERROR_UNAVAIL_MVBUF_T = 0x62, + ERROR_UNAVAIL_DISPBUF_T = 0x63, + ERROR_INV_POC_TYPE_T = 0x64, + ERROR_PIC1_NOT_FOUND_T = 0x65, + ERROR_PIC0_NOT_FOUND_T = 0x66, + ERROR_NUM_REF = 0x67, + ERROR_REFIDX_ORDER_T = 0x68, + ERROR_EOB_FLUSHBITS_T = 0x69, + ERROR_EOB_GETBITS_T = 0x6A, + ERROR_EOB_GETBIT_T = 0x6B, + ERROR_EOB_BYPASS_T = 0x6C, + ERROR_EOB_DECISION_T = 0x6D, + ERROR_EOB_TERMINATE_T = 0x6E, + ERROR_EOB_READCOEFF4X4CAB_T = 0x6F, + ERROR_INV_RANGE_QP_T = 0x70, + ERROR_END_OF_FRAME_EXPECTED_T = 0x71, + ERROR_MB_TYPE = 0x72, + ERROR_SUB_MB_TYPE = 0x73, + ERROR_CBP = 0x74, + ERROR_REF_IDX = 0x75, + ERROR_NUM_MV = 0x76, + ERROR_CHROMA_PRED_MODE = 0x77, + ERROR_INTRAPRED = 0x78, + ERROR_NEXT_MB_ADDRESS_T = 0x79, + ERROR_MB_ADDRESS_T = 0x7A, + ERROR_MB_GROUP_ASSGN_T = 0x7B, + ERROR_CAVLC_NUM_COEFF_T = 0x7C, + ERROR_CAVLC_SCAN_POS_T = 0x7D, + ERROR_CABAC_RENORM_T = 0x7E, + ERROR_CABAC_SIG_COEFF1_T = 0x7F, + ERROR_CABAC_SIG_COEFF2_T = 0x80, + ERROR_CABAC_ENCODE_COEFF_T = 0x81, + ERROR_INV_SPS_PPS_T = 0x82, + ERROR_INV_SLICE_HDR_T = 0x83, + ERROR_PRED_WEIGHT_TABLE_T = 0x84, + IH264D_VERS_BUF_INSUFFICIENT = 0x85, + ERROR_ACTUAL_LEVEL_GREATER_THAN_INIT = 0x86, + ERROR_CORRUPTED_SLICE = 0x87, + ERROR_FRAME_LIMIT_OVER = 0x88, + ERROR_ACTUAL_RESOLUTION_GREATER_THAN_INIT = 0x89, + ERROR_PROFILE_NOT_SUPPORTED = 0x8A, + ERROR_DISP_WIDTH_RESET_TO_PIC_WIDTH = 0x8B, + ERROR_DISP_WIDTH_INVALID = 0x8C, + ERROR_DANGLING_FIELD_IN_PIC = 0x8D, + ERROR_DYNAMIC_RESOLUTION_NOT_SUPPORTED = 0x8E, + ERROR_INIT_NOT_DONE = 0x8F, + ERROR_LEVEL_UNSUPPORTED = 0x90, + ERROR_START_CODE_NOT_FOUND = 0x91, + ERROR_PIC_NUM_IS_REPEATED = 0x92, + +} h264_decoder_error_code_t; + +#endif /* _IH264D_ERROR_HANDLER_H_ */ diff --git a/decoder/ih264d_format_conv.c b/decoder/ih264d_format_conv.c new file mode 100755 index 0000000..9a8494e --- /dev/null +++ b/decoder/ih264d_format_conv.c @@ -0,0 +1,838 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*****************************************************************************/ +/* */ +/* File Name : ih264d_format_conv.c */ +/* */ +/* Description : Contains functions needed to convert the images in */ +/* different color spaces to yuv 422i color space */ +/* */ +/* */ +/* Issues / Problems : None */ +/* */ +/* Revision History : */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 28 08 2007 Naveen Kumar T Draft */ +/* */ +/*****************************************************************************/ +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include <string.h> +/* User include files */ +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264d_structs.h" +#include "ih264d_format_conv.h" +#include "ih264d_defs.h" + + + +#ifdef LOGO_EN +#include "ih264d_ittiam_logo.h" +#define INSERT_LOGO(pu1_buf_y,pu1_buf_u,pu1_buf_v, u4_stride, u4_x_pos, u4_y_pos, u4_yuv_fmt, u4_disp_wd, u4_disp_ht) \ + ih264d_insert_logo(pu1_buf_y,pu1_buf_u,pu1_buf_v, u4_stride,\ + u4_x_pos, u4_y_pos, u4_yuv_fmt, u4_disp_wd, u4_disp_ht) +#else +#define INSERT_LOGO(pu1_buf_y,pu1_buf_u,pu1_buf_v, u4_stride, u4_x_pos, u4_y_pos, u4_yuv_fmt, u4_disp_wd, u4_disp_ht) +#endif + +/** + ******************************************************************************* + * + * @brief Function used from copying a 420SP buffer + * + * @par Description + * Function used from copying a 420SP buffer + * + * @param[in] pu1_y_src + * Input Y pointer + * + * @param[in] pu1_uv_src + * Input UV pointer (UV is interleaved either in UV or VU format) + * + * @param[in] pu1_y_dst + * Output Y pointer + * + * @param[in] pu1_uv_dst + * Output UV pointer (UV is interleaved in the same format as that of input) + * + * @param[in] wd + * Width + * + * @param[in] ht + * Height + * + * @param[in] src_y_strd + * Input Y Stride + * + * @param[in] src_uv_strd + * Input UV stride + * + * @param[in] dst_y_strd + * Output Y stride + * + * @param[in] dst_uv_strd + * Output UV stride + * + * @returns None + * + * @remarks In case there is a need to perform partial frame copy then + * by passion appropriate source and destination pointers and appropriate + * values for wd and ht it can be done + * + ******************************************************************************* + */ +void ih264d_fmt_conv_420sp_to_rgb565(UWORD8 *pu1_y_src, + UWORD8 *pu1_uv_src, + UWORD16 *pu2_rgb_dst, + WORD32 wd, + WORD32 ht, + WORD32 src_y_strd, + WORD32 src_uv_strd, + WORD32 dst_strd, + WORD32 is_u_first) +{ + + WORD16 i2_r, i2_g, i2_b; + UWORD32 u4_r, u4_g, u4_b; + WORD16 i2_i, i2_j; + UWORD8 *pu1_y_src_nxt; + UWORD16 *pu2_rgb_dst_next_row; + + UWORD8 *pu1_u_src, *pu1_v_src; + + if(is_u_first) + { + pu1_u_src = (UWORD8 *)pu1_uv_src; + pu1_v_src = (UWORD8 *)pu1_uv_src + 1; + } + else + { + pu1_u_src = (UWORD8 *)pu1_uv_src + 1; + pu1_v_src = (UWORD8 *)pu1_uv_src; + } + + pu1_y_src_nxt = pu1_y_src + src_y_strd; + pu2_rgb_dst_next_row = pu2_rgb_dst + dst_strd; + + for(i2_i = 0; i2_i < (ht >> 1); i2_i++) + { + for(i2_j = (wd >> 1); i2_j > 0; i2_j--) + { + i2_b = ((*pu1_u_src - 128) * COEFF4 >> 13); + i2_g = ((*pu1_u_src - 128) * COEFF2 + (*pu1_v_src - 128) * COEFF3) + >> 13; + i2_r = ((*pu1_v_src - 128) * COEFF1) >> 13; + + pu1_u_src += 2; + pu1_v_src += 2; + /* pixel 0 */ + /* B */ + u4_b = CLIP_U8(*pu1_y_src + i2_b); + u4_b >>= 3; + /* G */ + u4_g = CLIP_U8(*pu1_y_src + i2_g); + u4_g >>= 2; + /* R */ + u4_r = CLIP_U8(*pu1_y_src + i2_r); + u4_r >>= 3; + + pu1_y_src++; + *pu2_rgb_dst++ = ((u4_r << 11) | (u4_g << 5) | u4_b); + + /* pixel 1 */ + /* B */ + u4_b = CLIP_U8(*pu1_y_src + i2_b); + u4_b >>= 3; + /* G */ + u4_g = CLIP_U8(*pu1_y_src + i2_g); + u4_g >>= 2; + /* R */ + u4_r = CLIP_U8(*pu1_y_src + i2_r); + u4_r >>= 3; + + pu1_y_src++; + *pu2_rgb_dst++ = ((u4_r << 11) | (u4_g << 5) | u4_b); + + /* pixel 2 */ + /* B */ + u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b); + u4_b >>= 3; + /* G */ + u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g); + u4_g >>= 2; + /* R */ + u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r); + u4_r >>= 3; + + pu1_y_src_nxt++; + *pu2_rgb_dst_next_row++ = ((u4_r << 11) | (u4_g << 5) | u4_b); + + /* pixel 3 */ + /* B */ + u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b); + u4_b >>= 3; + /* G */ + u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g); + u4_g >>= 2; + /* R */ + u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r); + u4_r >>= 3; + + pu1_y_src_nxt++; + *pu2_rgb_dst_next_row++ = ((u4_r << 11) | (u4_g << 5) | u4_b); + + } + + pu1_u_src = pu1_u_src + src_uv_strd - wd; + pu1_v_src = pu1_v_src + src_uv_strd - wd; + + pu1_y_src = pu1_y_src + (src_y_strd << 1) - wd; + pu1_y_src_nxt = pu1_y_src_nxt + (src_y_strd << 1) - wd; + + pu2_rgb_dst = pu2_rgb_dst_next_row - wd + dst_strd; + pu2_rgb_dst_next_row = pu2_rgb_dst_next_row + (dst_strd << 1) - wd; + } + +} + +void ih264d_fmt_conv_420sp_to_rgba8888(UWORD8 *pu1_y_src, + UWORD8 *pu1_uv_src, + UWORD32 *pu4_rgba_dst, + WORD32 wd, + WORD32 ht, + WORD32 src_y_strd, + WORD32 src_uv_strd, + WORD32 dst_strd, + WORD32 is_u_first) +{ + + WORD16 i2_r, i2_g, i2_b; + UWORD32 u4_r, u4_g, u4_b; + WORD16 i2_i, i2_j; + UWORD8 *pu1_y_src_nxt; + UWORD32 *pu4_rgba_dst_next_row; + + UWORD8 *pu1_u_src, *pu1_v_src; + + if(is_u_first) + { + pu1_u_src = (UWORD8 *)pu1_uv_src; + pu1_v_src = (UWORD8 *)pu1_uv_src + 1; + } + else + { + pu1_u_src = (UWORD8 *)pu1_uv_src + 1; + pu1_v_src = (UWORD8 *)pu1_uv_src; + } + + pu1_y_src_nxt = pu1_y_src + src_y_strd; + pu4_rgba_dst_next_row = pu4_rgba_dst + dst_strd; + + for(i2_i = 0; i2_i < (ht >> 1); i2_i++) + { + for(i2_j = (wd >> 1); i2_j > 0; i2_j--) + { + i2_b = ((*pu1_u_src - 128) * COEFF4 >> 13); + i2_g = ((*pu1_u_src - 128) * COEFF2 + (*pu1_v_src - 128) * COEFF3) + >> 13; + i2_r = ((*pu1_v_src - 128) * COEFF1) >> 13; + + pu1_u_src += 2; + pu1_v_src += 2; + /* pixel 0 */ + /* B */ + u4_b = CLIP_U8(*pu1_y_src + i2_b); + /* G */ + u4_g = CLIP_U8(*pu1_y_src + i2_g); + /* R */ + u4_r = CLIP_U8(*pu1_y_src + i2_r); + + pu1_y_src++; + *pu4_rgba_dst++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0)); + + /* pixel 1 */ + /* B */ + u4_b = CLIP_U8(*pu1_y_src + i2_b); + /* G */ + u4_g = CLIP_U8(*pu1_y_src + i2_g); + /* R */ + u4_r = CLIP_U8(*pu1_y_src + i2_r); + + pu1_y_src++; + *pu4_rgba_dst++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0)); + + /* pixel 2 */ + /* B */ + u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b); + /* G */ + u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g); + /* R */ + u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r); + + pu1_y_src_nxt++; + *pu4_rgba_dst_next_row++ = + ((u4_r << 16) | (u4_g << 8) | (u4_b << 0)); + + /* pixel 3 */ + /* B */ + u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b); + /* G */ + u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g); + /* R */ + u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r); + + pu1_y_src_nxt++; + *pu4_rgba_dst_next_row++ = + ((u4_r << 16) | (u4_g << 8) | (u4_b << 0)); + + } + + pu1_u_src = pu1_u_src + src_uv_strd - wd; + pu1_v_src = pu1_v_src + src_uv_strd - wd; + + pu1_y_src = pu1_y_src + (src_y_strd << 1) - wd; + pu1_y_src_nxt = pu1_y_src_nxt + (src_y_strd << 1) - wd; + + pu4_rgba_dst = pu4_rgba_dst_next_row - wd + dst_strd; + pu4_rgba_dst_next_row = pu4_rgba_dst_next_row + (dst_strd << 1) - wd; + } + +} + +/** + ******************************************************************************* + * + * @brief Function used from copying a 420SP buffer + * + * @par Description + * Function used from copying a 420SP buffer + * + * @param[in] pu1_y_src + * Input Y pointer + * + * @param[in] pu1_uv_src + * Input UV pointer (UV is interleaved either in UV or VU format) + * + * @param[in] pu1_y_dst + * Output Y pointer + * + * @param[in] pu1_uv_dst + * Output UV pointer (UV is interleaved in the same format as that of input) + * + * @param[in] wd + * Width + * + * @param[in] ht + * Height + * + * @param[in] src_y_strd + * Input Y Stride + * + * @param[in] src_uv_strd + * Input UV stride + * + * @param[in] dst_y_strd + * Output Y stride + * + * @param[in] dst_uv_strd + * Output UV stride + * + * @returns None + * + * @remarks In case there is a need to perform partial frame copy then + * by passion appropriate source and destination pointers and appropriate + * values for wd and ht it can be done + * + ******************************************************************************* + */ + +void ih264d_fmt_conv_420sp_to_420sp(UWORD8 *pu1_y_src, + UWORD8 *pu1_uv_src, + UWORD8 *pu1_y_dst, + UWORD8 *pu1_uv_dst, + WORD32 wd, + WORD32 ht, + WORD32 src_y_strd, + WORD32 src_uv_strd, + WORD32 dst_y_strd, + WORD32 dst_uv_strd) +{ + UWORD8 *pu1_src, *pu1_dst; + WORD32 num_rows, num_cols, src_strd, dst_strd; + WORD32 i; + + /* copy luma */ + pu1_src = (UWORD8 *)pu1_y_src; + pu1_dst = (UWORD8 *)pu1_y_dst; + + num_rows = ht; + num_cols = wd; + + src_strd = src_y_strd; + dst_strd = dst_y_strd; + + for(i = 0; i < num_rows; i++) + { + memcpy(pu1_dst, pu1_src, num_cols); + pu1_dst += dst_strd; + pu1_src += src_strd; + } + + /* copy U and V */ + pu1_src = (UWORD8 *)pu1_uv_src; + pu1_dst = (UWORD8 *)pu1_uv_dst; + + num_rows = ht >> 1; + num_cols = wd; + + src_strd = src_uv_strd; + dst_strd = dst_uv_strd; + + for(i = 0; i < num_rows; i++) + { + memcpy(pu1_dst, pu1_src, num_cols); + pu1_dst += dst_strd; + pu1_src += src_strd; + } + return; +} + +/** + ******************************************************************************* + * + * @brief Function used from copying a 420SP buffer + * + * @par Description + * Function used from copying a 420SP buffer + * + * @param[in] pu1_y_src + * Input Y pointer + * + * @param[in] pu1_uv_src + * Input UV pointer (UV is interleaved either in UV or VU format) + * + * @param[in] pu1_y_dst + * Output Y pointer + * + * @param[in] pu1_uv_dst + * Output UV pointer (UV is interleaved in the same format as that of input) + * + * @param[in] wd + * Width + * + * @param[in] ht + * Height + * + * @param[in] src_y_strd + * Input Y Stride + * + * @param[in] src_uv_strd + * Input UV stride + * + * @param[in] dst_y_strd + * Output Y stride + * + * @param[in] dst_uv_strd + * Output UV stride + * + * @returns None + * + * @remarks In case there is a need to perform partial frame copy then + * by passion appropriate source and destination pointers and appropriate + * values for wd and ht it can be done + * + ******************************************************************************* + */ +void ih264d_fmt_conv_420sp_to_420sp_swap_uv(UWORD8 *pu1_y_src, + UWORD8 *pu1_uv_src, + UWORD8 *pu1_y_dst, + UWORD8 *pu1_uv_dst, + WORD32 wd, + WORD32 ht, + WORD32 src_y_strd, + WORD32 src_uv_strd, + WORD32 dst_y_strd, + WORD32 dst_uv_strd) +{ + UWORD8 *pu1_src, *pu1_dst; + WORD32 num_rows, num_cols, src_strd, dst_strd; + WORD32 i; + + /* copy luma */ + pu1_src = (UWORD8 *)pu1_y_src; + pu1_dst = (UWORD8 *)pu1_y_dst; + + num_rows = ht; + num_cols = wd; + + src_strd = src_y_strd; + dst_strd = dst_y_strd; + + for(i = 0; i < num_rows; i++) + { + memcpy(pu1_dst, pu1_src, num_cols); + pu1_dst += dst_strd; + pu1_src += src_strd; + } + + /* copy U and V */ + pu1_src = (UWORD8 *)pu1_uv_src; + pu1_dst = (UWORD8 *)pu1_uv_dst; + + num_rows = ht >> 1; + num_cols = wd; + + src_strd = src_uv_strd; + dst_strd = dst_uv_strd; + + for(i = 0; i < num_rows; i++) + { + WORD32 j; + for(j = 0; j < num_cols; j += 2) + { + pu1_dst[j + 0] = pu1_src[j + 1]; + pu1_dst[j + 1] = pu1_src[j + 0]; + } + pu1_dst += dst_strd; + pu1_src += src_strd; + } + return; +} +/** + ******************************************************************************* + * + * @brief Function used from copying a 420SP buffer + * + * @par Description + * Function used from copying a 420SP buffer + * + * @param[in] pu1_y_src + * Input Y pointer + * + * @param[in] pu1_uv_src + * Input UV pointer (UV is interleaved either in UV or VU format) + * + * @param[in] pu1_y_dst + * Output Y pointer + * + * @param[in] pu1_u_dst + * Output U pointer + * + * @param[in] pu1_v_dst + * Output V pointer + * + * @param[in] wd + * Width + * + * @param[in] ht + * Height + * + * @param[in] src_y_strd + * Input Y Stride + * + * @param[in] src_uv_strd + * Input UV stride + * + * @param[in] dst_y_strd + * Output Y stride + * + * @param[in] dst_uv_strd + * Output UV stride + * + * @param[in] is_u_first + * Flag to indicate if U is the first byte in input chroma part + * + * @returns none + * + * @remarks In case there is a need to perform partial frame copy then + * by passion appropriate source and destination pointers and appropriate + * values for wd and ht it can be done + * + ******************************************************************************* + */ + +void ih264d_fmt_conv_420sp_to_420p(UWORD8 *pu1_y_src, + UWORD8 *pu1_uv_src, + UWORD8 *pu1_y_dst, + UWORD8 *pu1_u_dst, + UWORD8 *pu1_v_dst, + WORD32 wd, + WORD32 ht, + WORD32 src_y_strd, + WORD32 src_uv_strd, + WORD32 dst_y_strd, + WORD32 dst_uv_strd, + WORD32 is_u_first, + WORD32 disable_luma_copy) +{ + UWORD8 *pu1_src, *pu1_dst; + UWORD8 *pu1_u_src, *pu1_v_src; + WORD32 num_rows, num_cols, src_strd, dst_strd; + WORD32 i, j; + + if(0 == disable_luma_copy) + { + /* copy luma */ + pu1_src = (UWORD8 *)pu1_y_src; + pu1_dst = (UWORD8 *)pu1_y_dst; + + num_rows = ht; + num_cols = wd; + + src_strd = src_y_strd; + dst_strd = dst_y_strd; + + for(i = 0; i < num_rows; i++) + { + memcpy(pu1_dst, pu1_src, num_cols); + pu1_dst += dst_strd; + pu1_src += src_strd; + } + } + /* de-interleave U and V and copy to destination */ + if(is_u_first) + { + pu1_u_src = (UWORD8 *)pu1_uv_src; + pu1_v_src = (UWORD8 *)pu1_uv_src + 1; + } + else + { + pu1_u_src = (UWORD8 *)pu1_uv_src + 1; + pu1_v_src = (UWORD8 *)pu1_uv_src; + } + + num_rows = ht >> 1; + num_cols = wd >> 1; + + src_strd = src_uv_strd; + dst_strd = dst_uv_strd; + + for(i = 0; i < num_rows; i++) + { + for(j = 0; j < num_cols; j++) + { + pu1_u_dst[j] = pu1_u_src[j * 2]; + pu1_v_dst[j] = pu1_v_src[j * 2]; + } + + pu1_u_dst += dst_strd; + pu1_v_dst += dst_strd; + pu1_u_src += src_strd; + pu1_v_src += src_strd; + } + return; +} + +/*****************************************************************************/ +/* Function Name : ih264d_format_convert */ +/* */ +/* Description : Implements format conversion/frame copy */ +/* Inputs : ps_dec - Decoder parameters */ +/* Globals : None */ +/* Processing : Refer bumping process in the standard */ +/* Outputs : Assigns display sequence number. */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 27 04 2005 NS Draft */ +/* */ +/*****************************************************************************/ +void ih264d_format_convert(dec_struct_t *ps_dec, + ivd_get_display_frame_op_t *pv_disp_op, + UWORD32 u4_start_y, + UWORD32 u4_num_rows_y) +{ + UWORD32 convert_uv_only = 0; + iv_yuv_buf_t *ps_op_frm; + + if(1 == pv_disp_op->u4_error_code) + return; + + ps_op_frm = &(ps_dec->s_disp_frame_info); + + /* Requires u4_start_y and u4_num_rows_y to be even */ + if(u4_start_y & 1) + { + H264_DEC_DEBUG_PRINT( + "Requires even number of rows and even u4_start_y for format conversion\n"); + return; + } + + if((1 == ps_dec->u4_share_disp_buf) + && ((pv_disp_op->e_output_format == IV_YUV_420SP_UV))) + { + return; + } + if(pv_disp_op->e_output_format == IV_YUV_420P) + { + UWORD8 *pu1_src, *pu1_dst; + UWORD16 i; + UWORD16 iter; + + IV_COLOR_FORMAT_T e_output_format = pv_disp_op->e_output_format; + UWORD32 start_uv = u4_start_y >> 1; + UWORD32 num_rows_uv = (u4_num_rows_y + 1) >> 1; + if(0 == ps_dec->u4_share_disp_buf) + { + convert_uv_only = 0; + } + else + { + convert_uv_only = 1; + } + { + + UWORD8 *pu1_y_src, *pu1_u_src, *pu1_v_src; + UWORD8 *pu1_y_dst, *pu1_u_dst, *pu1_v_dst; + UWORD32 width, height; + UWORD32 src_luma_stride, src_chroma_stride; + UWORD32 dst_luma_stride, dst_chroma_stride; + + pu1_y_src = (UWORD8 *)ps_op_frm->pv_y_buf; + pu1_y_src += u4_start_y * ps_op_frm->u4_y_strd; + + pu1_y_dst = (UWORD8 *)pv_disp_op->s_disp_frm_buf.pv_y_buf; + pu1_y_dst += u4_start_y * pv_disp_op->s_disp_frm_buf.u4_y_strd; + + pu1_u_src = (UWORD8 *)ps_op_frm->pv_u_buf; + pu1_u_src += start_uv * ps_op_frm->u4_u_strd; + + pu1_u_dst = (UWORD8 *)pv_disp_op->s_disp_frm_buf.pv_u_buf; + pu1_u_dst += start_uv * pv_disp_op->s_disp_frm_buf.u4_u_strd; + + pu1_v_src = (UWORD8 *)ps_op_frm->pv_v_buf; + pu1_v_src += start_uv * ps_op_frm->u4_v_strd; + + pu1_v_dst = (UWORD8 *)pv_disp_op->s_disp_frm_buf.pv_v_buf; + pu1_v_dst += start_uv * pv_disp_op->s_disp_frm_buf.u4_v_strd; + + src_luma_stride = ps_op_frm->u4_y_strd; + src_chroma_stride = ps_op_frm->u4_u_strd; + + dst_luma_stride = pv_disp_op->s_disp_frm_buf.u4_y_strd; + dst_chroma_stride = pv_disp_op->s_disp_frm_buf.u4_u_strd; + + width = ps_op_frm->u4_y_wd; + height = u4_num_rows_y; + ih264d_fmt_conv_420sp_to_420p(pu1_y_src, pu1_u_src, pu1_y_dst, + pu1_u_dst, pu1_v_dst, width, height, + src_luma_stride, src_chroma_stride, + dst_luma_stride, dst_chroma_stride, 1, + convert_uv_only); + } + + } + + else if((pv_disp_op->e_output_format == IV_YUV_420SP_UV) + || (pv_disp_op->e_output_format == IV_YUV_420SP_VU)) + + { + + UWORD32 start_uv = u4_start_y >> 1; + UWORD32 num_rows_uv = (u4_num_rows_y + 1) >> 1; + + + if(pv_disp_op->e_output_format == IV_YUV_420SP_UV) + { + ih264d_fmt_conv_420sp_to_420sp( + (UWORD8 *)ps_op_frm->pv_y_buf + + u4_start_y * ps_op_frm->u4_y_strd, + ((UWORD8 *)ps_op_frm->pv_u_buf + + start_uv * ps_op_frm->u4_u_strd), + ((UWORD8 *)pv_disp_op->s_disp_frm_buf.pv_y_buf + + u4_start_y + * pv_disp_op->s_disp_frm_buf.u4_y_strd), + ((UWORD8 *)pv_disp_op->s_disp_frm_buf.pv_u_buf + + start_uv + * pv_disp_op->s_disp_frm_buf.u4_u_strd), + ps_op_frm->u4_y_wd, u4_num_rows_y, + ps_op_frm->u4_y_strd, ps_op_frm->u4_u_strd, + pv_disp_op->s_disp_frm_buf.u4_y_strd, + pv_disp_op->s_disp_frm_buf.u4_u_strd); + } + else + { + + ih264d_fmt_conv_420sp_to_420sp_swap_uv( + (UWORD8 *)ps_op_frm->pv_y_buf + + u4_start_y * ps_op_frm->u4_y_strd, + ((UWORD8 *)ps_op_frm->pv_u_buf + + start_uv * ps_op_frm->u4_u_strd), + ((UWORD8 *)pv_disp_op->s_disp_frm_buf.pv_y_buf + + u4_start_y + * pv_disp_op->s_disp_frm_buf.u4_y_strd), + ((UWORD8 *)pv_disp_op->s_disp_frm_buf.pv_u_buf + + start_uv + * pv_disp_op->s_disp_frm_buf.u4_u_strd), + ps_op_frm->u4_y_wd, u4_num_rows_y, + ps_op_frm->u4_y_strd, ps_op_frm->u4_u_strd, + pv_disp_op->s_disp_frm_buf.u4_y_strd, + pv_disp_op->s_disp_frm_buf.u4_u_strd); + + } + + } + else if(pv_disp_op->e_output_format == IV_RGB_565) + { + UWORD32 temp = 0; + UWORD32 u2_width_rem; + + UWORD32 start_uv = u4_start_y >> 1; + + ih264d_fmt_conv_420sp_to_rgb565( + (UWORD8 *)ps_op_frm->pv_y_buf + + u4_start_y * ps_op_frm->u4_y_strd, + ((UWORD8 *)ps_op_frm->pv_u_buf + + start_uv * ps_op_frm->u4_u_strd), + ((UWORD16 *)pv_disp_op->s_disp_frm_buf.pv_y_buf + + u4_start_y + * pv_disp_op->s_disp_frm_buf.u4_y_strd), + ps_op_frm->u4_y_wd, u4_num_rows_y, ps_op_frm->u4_y_strd, + ps_op_frm->u4_u_strd, + pv_disp_op->s_disp_frm_buf.u4_y_strd, 1); + + + } + + if((u4_start_y + u4_num_rows_y) >= ps_dec->s_disp_frame_info.u4_y_ht) + { + + INSERT_LOGO(pv_disp_op->s_disp_frm_buf.pv_y_buf, + pv_disp_op->s_disp_frm_buf.pv_u_buf, + pv_disp_op->s_disp_frm_buf.pv_v_buf, pv_disp_op->s_disp_frm_buf.u4_y_strd, + ps_dec->u2_disp_width, + ps_dec->u2_disp_height, + pv_disp_op->e_output_format, + ps_op_frm->u4_y_wd, + ps_op_frm->u4_y_ht); + } + + return; +} diff --git a/decoder/ih264d_format_conv.h b/decoder/ih264d_format_conv.h new file mode 100755 index 0000000..81a8a0f --- /dev/null +++ b/decoder/ih264d_format_conv.h @@ -0,0 +1,120 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*****************************************************************************/ +/* */ +/* File Name : ih264d_format_conv.h */ +/* */ +/* Description : Contains coefficients and constant reqquired for */ +/* converting from rgb and gray color spaces to yuv422i */ +/* color space */ +/* */ +/* List of Functions : None */ +/* */ +/* Issues / Problems : None */ +/* */ +/* Revision History : */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 27 08 2007 Naveen Kumar T Draft */ +/* */ +/*****************************************************************************/ + +#ifndef _IH264D_FORMAT_CONV_H_ +#define _IH264D_FORMAT_CONV_H_ + +/*****************************************************************************/ +/* Typedefs */ +/*****************************************************************************/ + +#define COEFF_0_Y 66 +#define COEFF_1_Y 129 +#define COEFF_2_Y 25 +#define COEFF_0_U -38 +#define COEFF_1_U -75 +#define COEFF_2_U 112 +#define COEFF_0_V 112 +#define COEFF_1_V -94 +#define COEFF_2_V -18 +#define CONST_RGB_YUV1 4096 +#define CONST_RGB_YUV2 32768 +#define CONST_GRAY_YUV 128 +#define COEF_2_V2_U 0xFFEE0070 + +#define COF_2Y_0Y 0X00190042 +#define COF_1U_0U 0XFFB5FFDA +#define COF_1V_0V 0XFFA20070 + +void ih264d_fmt_conv_420sp_to_420p(UWORD8 *pu1_y_src, + UWORD8 *pu1_uv_src, + UWORD8 *pu1_y_dst, + UWORD8 *pu1_u_dst, + UWORD8 *pu1_v_dst, + WORD32 wd, + WORD32 ht, + WORD32 src_y_strd, + WORD32 src_uv_strd, + WORD32 dst_y_strd, + WORD32 dst_uv_strd, + WORD32 is_u_first, + WORD32 disable_luma_copy); + +void ih264d_fmt_conv_420sp_to_420sp_swap_uv(UWORD8 *pu1_y_src, + UWORD8 *pu1_uv_src, + UWORD8 *pu1_y_dst, + UWORD8 *pu1_uv_dst, + WORD32 wd, + WORD32 ht, + WORD32 src_y_strd, + WORD32 src_uv_strd, + WORD32 dst_y_strd, + WORD32 dst_uv_strd); + +void ih264d_fmt_conv_420sp_to_420sp(UWORD8 *pu1_y_src, + UWORD8 *pu1_uv_src, + UWORD8 *pu1_y_dst, + UWORD8 *pu1_uv_dst, + WORD32 wd, + WORD32 ht, + WORD32 src_y_strd, + WORD32 src_uv_strd, + WORD32 dst_y_strd, + WORD32 dst_uv_strd); + +void ih264d_fmt_conv_420sp_to_rgb565(UWORD8 *pu1_y_src, + UWORD8 *pu1_uv_src, + UWORD16 *pu2_rgb_dst, + WORD32 wd, + WORD32 ht, + WORD32 src_y_strd, + WORD32 src_uv_strd, + WORD32 dst_strd, + WORD32 is_u_first); +#define COEFF1 13073 +#define COEFF2 -3207 +#define COEFF3 -6664 +#define COEFF4 16530 + +void ih264d_format_convert(dec_struct_t *ps_dec, + ivd_get_display_frame_op_t *pv_disp_op, + UWORD32 u4_start_y, + UWORD32 u4_num_rows_y); + + +#endif /* _IH264D_FORMAT_CONV_H_ */ diff --git a/decoder/ih264d_function_selector.h b/decoder/ih264d_function_selector.h new file mode 100755 index 0000000..92ad959 --- /dev/null +++ b/decoder/ih264d_function_selector.h @@ -0,0 +1,75 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** + ******************************************************************************* + * @file + * ih264d_function_selector.h + * + * @brief + * Structure definitions used in the decoder + * + * @author + * Harish + * + * @par List of Functions: + * + * @remarks + * None + * + ******************************************************************************* + */ + +#ifndef _IH264D_FUNCTION_SELECTOR_H_ +#define _IH264D_FUNCTION_SELECTOR_H_ + +#define D_ARCH_NA 1 +#define D_ARCH_ARM_NONEON 2 +#define D_ARCH_ARM_A9Q 3 +#define D_ARCH_ARM_A9A 4 +#define D_ARCH_ARM_A9 5 +#define D_ARCH_ARM_A7 6 +#define D_ARCH_ARM_A5 7 +#define D_ARCH_ARM_A15 8 +#define D_ARCH_ARM_NEONINTR 9 +#define D_ARCH_ARMV8_GENERIC 10 +#define D_ARCH_X86_GENERIC 11 +#define D_ARCH_X86_SSSE3 12 +#define D_ARCH_X86_SSE42 13 +#define D_ARCH_X86_AVX2 14 +#define D_ARCH_MIPS_GENERIC 15 +#define D_ARCH_MIPS_32 16 + +void ih264d_init_arch(dec_struct_t *ps_codec); + +void ih264d_init_function_ptr(dec_struct_t *ps_codec); + +void ih264d_init_function_ptr_generic(dec_struct_t *ps_codec); +void ih264d_init_function_ptr_ssse3(dec_struct_t *ps_codec); +void ih264d_init_function_ptr_sse42(dec_struct_t *ps_codec); + +#ifndef DISABLE_AVX2 +void ih264d_init_function_ptr_avx2(dec_struct_t *ps_codec); +#endif + +void ih264d_init_function_ptr_a9q(dec_struct_t *ps_codec); +void ih264d_init_function_ptr_av8(dec_struct_t *ps_codec); + +#endif /* _IH264D_FUNCTION_SELECTOR_H_ */ diff --git a/decoder/ih264d_function_selector_generic.c b/decoder/ih264d_function_selector_generic.c new file mode 100755 index 0000000..48956ef --- /dev/null +++ b/decoder/ih264d_function_selector_generic.c @@ -0,0 +1,222 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** + ******************************************************************************* + * @file + * ih264e_function_selector_generic.c + * + * @brief + * Contains functions to initialize function pointers of codec context + * + * @author + * Ittiam + * + * @par List of Functions: + * - ih264e_init_function_ptr_generic + * + * @remarks + * None + * + ******************************************************************************* + */ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System Include files */ +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +/* User Include files */ +#include "ih264_typedefs.h" +#include "iv.h" +#include "ivd.h" +#include "ih264_defs.h" +#include "ih264_size_defs.h" +#include "ih264_error.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include "ih264_inter_pred_filters.h" + +#include "ih264d_structs.h" +#include "ih264d_function_selector.h" + +/** + ******************************************************************************* + * + * @brief Initialize the intra/inter/transform/deblk function pointers of + * codec context + * + * @par Description: the current routine initializes the function pointers of + * codec context basing on the architecture in use + * + * @param[in] ps_codec + * Codec context pointer + * + * @returns none + * + * @remarks none + * + ******************************************************************************* + */ +void ih264d_init_function_ptr_generic(dec_struct_t *ps_codec) +{ + + WORD32 i = 0; + + /* Init function pointers for intra pred leaf level functions luma + * Intra 16x16 */ + ps_codec->apf_intra_pred_luma_16x16[0] = + ih264_intra_pred_luma_16x16_mode_vert; + ps_codec->apf_intra_pred_luma_16x16[1] = + ih264_intra_pred_luma_16x16_mode_horz; + ps_codec->apf_intra_pred_luma_16x16[2] = + ih264_intra_pred_luma_16x16_mode_dc; + ps_codec->apf_intra_pred_luma_16x16[3] = + ih264_intra_pred_luma_16x16_mode_plane; + + /* Init function pointers for intra pred leaf level functions luma + * Intra 4x4 */ + ps_codec->apf_intra_pred_luma_4x4[0] = ih264_intra_pred_luma_4x4_mode_vert; + ps_codec->apf_intra_pred_luma_4x4[1] = ih264_intra_pred_luma_4x4_mode_horz; + ps_codec->apf_intra_pred_luma_4x4[2] = ih264_intra_pred_luma_4x4_mode_dc; + ps_codec->apf_intra_pred_luma_4x4[3] = + ih264_intra_pred_luma_4x4_mode_diag_dl; + ps_codec->apf_intra_pred_luma_4x4[4] = + ih264_intra_pred_luma_4x4_mode_diag_dr; + ps_codec->apf_intra_pred_luma_4x4[5] = + ih264_intra_pred_luma_4x4_mode_vert_r; + ps_codec->apf_intra_pred_luma_4x4[6] = + ih264_intra_pred_luma_4x4_mode_horz_d; + ps_codec->apf_intra_pred_luma_4x4[7] = + ih264_intra_pred_luma_4x4_mode_vert_l; + ps_codec->apf_intra_pred_luma_4x4[8] = + ih264_intra_pred_luma_4x4_mode_horz_u; + + /* Init function pointers for intra pred leaf level functions luma + * Intra 8x8 */ + ps_codec->apf_intra_pred_luma_8x8[0] = ih264_intra_pred_luma_8x8_mode_vert; + ps_codec->apf_intra_pred_luma_8x8[1] = ih264_intra_pred_luma_8x8_mode_horz; + ps_codec->apf_intra_pred_luma_8x8[2] = ih264_intra_pred_luma_8x8_mode_dc; + ps_codec->apf_intra_pred_luma_8x8[3] = + ih264_intra_pred_luma_8x8_mode_diag_dl; + ps_codec->apf_intra_pred_luma_8x8[4] = + ih264_intra_pred_luma_8x8_mode_diag_dr; + ps_codec->apf_intra_pred_luma_8x8[5] = + ih264_intra_pred_luma_8x8_mode_vert_r; + ps_codec->apf_intra_pred_luma_8x8[6] = + ih264_intra_pred_luma_8x8_mode_horz_d; + ps_codec->apf_intra_pred_luma_8x8[7] = + ih264_intra_pred_luma_8x8_mode_vert_l; + ps_codec->apf_intra_pred_luma_8x8[8] = + ih264_intra_pred_luma_8x8_mode_horz_u; + + ps_codec->pf_intra_pred_ref_filtering = + ih264_intra_pred_luma_8x8_mode_ref_filtering; + + /* Init function pointers for intra pred leaf level functions chroma + * Intra 8x8 */ + ps_codec->apf_intra_pred_chroma[0] = ih264_intra_pred_chroma_8x8_mode_vert; + ps_codec->apf_intra_pred_chroma[1] = ih264_intra_pred_chroma_8x8_mode_horz; + ps_codec->apf_intra_pred_chroma[2] = ih264_intra_pred_chroma_8x8_mode_dc; + ps_codec->apf_intra_pred_chroma[3] = ih264_intra_pred_chroma_8x8_mode_plane; + + ps_codec->pf_default_weighted_pred_luma = ih264_default_weighted_pred_luma; + ps_codec->pf_default_weighted_pred_chroma = + ih264_default_weighted_pred_chroma; + ps_codec->pf_weighted_pred_luma = ih264_weighted_pred_luma; + ps_codec->pf_weighted_pred_chroma = ih264_weighted_pred_chroma; + ps_codec->pf_weighted_bi_pred_luma = ih264_weighted_bi_pred_luma; + ps_codec->pf_weighted_bi_pred_chroma = ih264_weighted_bi_pred_chroma; + + /* Padding Functions */ + ps_codec->pf_pad_top = ih264_pad_top; + ps_codec->pf_pad_bottom = ih264_pad_bottom; + ps_codec->pf_pad_left_luma = ih264_pad_left_luma; + ps_codec->pf_pad_left_chroma = ih264_pad_left_chroma; + ps_codec->pf_pad_right_luma = ih264_pad_right_luma; + ps_codec->pf_pad_right_chroma = ih264_pad_right_chroma; + + ps_codec->pf_iquant_itrans_recon_luma_4x4 = ih264_iquant_itrans_recon_4x4; + ps_codec->pf_iquant_itrans_recon_luma_4x4_dc = + ih264_iquant_itrans_recon_4x4_dc; + ps_codec->pf_iquant_itrans_recon_luma_8x8 = ih264_iquant_itrans_recon_8x8; + ps_codec->pf_iquant_itrans_recon_luma_8x8_dc = + ih264_iquant_itrans_recon_8x8_dc; + ps_codec->pf_iquant_itrans_recon_chroma_4x4 = + ih264_iquant_itrans_recon_chroma_4x4; + ps_codec->pf_iquant_itrans_recon_chroma_4x4_dc = + ih264_iquant_itrans_recon_chroma_4x4_dc; + ps_codec->pf_ihadamard_scaling_4x4 = ih264_ihadamard_scaling_4x4; + + /* Init fn ptr luma deblocking */ + ps_codec->pf_deblk_luma_vert_bs4 = ih264_deblk_luma_vert_bs4; + ps_codec->pf_deblk_luma_vert_bslt4 = ih264_deblk_luma_vert_bslt4; + ps_codec->pf_deblk_luma_vert_bs4_mbaff = ih264_deblk_luma_vert_bs4_mbaff; + ps_codec->pf_deblk_luma_vert_bslt4_mbaff = + ih264_deblk_luma_vert_bslt4_mbaff; + + ps_codec->pf_deblk_luma_horz_bs4 = ih264_deblk_luma_horz_bs4; + ps_codec->pf_deblk_luma_horz_bslt4 = ih264_deblk_luma_horz_bslt4; + + /* Init fn ptr chroma deblocking */ + ps_codec->pf_deblk_chroma_vert_bs4 = ih264_deblk_chroma_vert_bs4; + ps_codec->pf_deblk_chroma_vert_bslt4 = ih264_deblk_chroma_vert_bslt4; + ps_codec->pf_deblk_chroma_vert_bs4_mbaff = + ih264_deblk_chroma_vert_bs4_mbaff; + ps_codec->pf_deblk_chroma_vert_bslt4_mbaff = + ih264_deblk_chroma_vert_bslt4_mbaff; + + ps_codec->pf_deblk_chroma_horz_bs4 = ih264_deblk_chroma_horz_bs4; + ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4; + + /* Inter pred leaf level functions */ + ps_codec->apf_inter_pred_luma[0] = ih264_inter_pred_luma_copy; + ps_codec->apf_inter_pred_luma[1] = ih264_inter_pred_luma_horz_qpel; + ps_codec->apf_inter_pred_luma[2] = ih264_inter_pred_luma_horz; + ps_codec->apf_inter_pred_luma[3] = ih264_inter_pred_luma_horz_qpel; + ps_codec->apf_inter_pred_luma[4] = ih264_inter_pred_luma_vert_qpel; + ps_codec->apf_inter_pred_luma[5] = + ih264_inter_pred_luma_horz_qpel_vert_qpel; + ps_codec->apf_inter_pred_luma[6] = + ih264_inter_pred_luma_horz_hpel_vert_qpel; + ps_codec->apf_inter_pred_luma[7] = + ih264_inter_pred_luma_horz_qpel_vert_qpel; + ps_codec->apf_inter_pred_luma[8] = ih264_inter_pred_luma_vert; + ps_codec->apf_inter_pred_luma[9] = + ih264_inter_pred_luma_horz_qpel_vert_hpel; + ps_codec->apf_inter_pred_luma[10] = + ih264_inter_pred_luma_horz_hpel_vert_hpel; + ps_codec->apf_inter_pred_luma[11] = + ih264_inter_pred_luma_horz_qpel_vert_hpel; + ps_codec->apf_inter_pred_luma[12] = ih264_inter_pred_luma_vert_qpel; + ps_codec->apf_inter_pred_luma[13] = + ih264_inter_pred_luma_horz_qpel_vert_qpel; + ps_codec->apf_inter_pred_luma[14] = + ih264_inter_pred_luma_horz_hpel_vert_qpel; + ps_codec->apf_inter_pred_luma[15] = + ih264_inter_pred_luma_horz_qpel_vert_qpel; + + ps_codec->pf_inter_pred_chroma = ih264_inter_pred_chroma; + + return; +} diff --git a/decoder/ih264d_inter_pred.c b/decoder/ih264d_inter_pred.c new file mode 100755 index 0000000..fa818b5 --- /dev/null +++ b/decoder/ih264d_inter_pred.c @@ -0,0 +1,1614 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*! + ************************************************************************** + * \file ih264d_inter_pred.c + * + * \brief + * This file contains routines to perform MotionCompensation tasks + * + * Detailed_description + * + * \date + * 20/11/2002 + * + * \author Arvind Raman + ************************************************************************** + */ + +#include <string.h> +#include "ih264d_defs.h" +#include "ih264d_mvpred.h" +#include "ih264d_error_handler.h" +#include "ih264d_structs.h" +#include "ih264d_defs.h" +#include "ih264d_inter_pred.h" +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264d_debug.h" +#include "ih264d_tables.h" +#include "ih264d_mb_utils.h" + + +void ih264d_pad_on_demand(pred_info_t *ps_pred, UWORD8 lum_chrom_blk); + + + +void ih264d_copy_multiplex_data(UWORD8 *puc_Source, + UWORD8 *puc_To, + UWORD32 uc_w, + UWORD32 uc_h, + UWORD32 ui16_sourceWidth, + UWORD32 ui16_toWidth) +{ + UWORD8 uc_i, uc_j; + + for(uc_i = 0; uc_i < uc_h; uc_i++) + { + memcpy(puc_To, puc_Source, uc_w); + puc_To += ui16_toWidth; + puc_Source += ui16_sourceWidth; + } +} + + +/*! + ************************************************************************** + * \if Function name : dma_2d1d \endif + * + * \brief + * 2D -> 1D linear DMA into the reference buffers + * + * \return + * None + ************************************************************************** + */ +void ih264d_copy_2d1d(UWORD8 *puc_src, + UWORD8 *puc_dest, + UWORD16 ui16_srcWidth, + UWORD16 ui16_widthToFill, + UWORD16 ui16_heightToFill) +{ + UWORD32 uc_w, uc_h; + for(uc_h = ui16_heightToFill; uc_h != 0; uc_h--) + { + memcpy(puc_dest, puc_src, ui16_widthToFill); + puc_dest += ui16_widthToFill; + puc_src += ui16_srcWidth; + } +} + +/*! + ************************************************************************** + * \if Function name : ih264d_fill_pred_info \endif + * + * \brief + * Fills inter prediction related info + * + * \return + * None + ************************************************************************** + */ +void ih264d_fill_pred_info(WORD16 *pi2_mv,WORD32 part_width,WORD32 part_height, WORD32 sub_mb_num, + WORD32 pred_dir,pred_info_pkd_t *ps_pred_pkd,WORD8 i1_buf_id, + WORD8 i1_ref_idx,UWORD32 *pu4_wt_offset,UWORD8 u1_pic_type) +{ + WORD32 insert_bits; + + ps_pred_pkd->i2_mv[0] = pi2_mv[0]; + ps_pred_pkd->i2_mv[1] = pi2_mv[1]; + + insert_bits = sub_mb_num & 3; /*sub mb x*/ + ps_pred_pkd->i1_size_pos_info = insert_bits; + insert_bits = sub_mb_num >> 2;/*sub mb y*/ + ps_pred_pkd->i1_size_pos_info |= insert_bits << 2; + insert_bits = part_width >> 1; + ps_pred_pkd->i1_size_pos_info |= insert_bits << 4; + insert_bits = part_height >> 1; + ps_pred_pkd->i1_size_pos_info |= insert_bits << 6; + + ps_pred_pkd->i1_ref_idx_info = i1_ref_idx; + ps_pred_pkd->i1_ref_idx_info |= (pred_dir << 6); + ps_pred_pkd->i1_buf_id = i1_buf_id; + ps_pred_pkd->pu4_wt_offst = pu4_wt_offset; + ps_pred_pkd->u1_pic_type = u1_pic_type; + + +} + + + + + + + +/*****************************************************************************/ +/* \if Function name : formMbPartInfo \endif */ +/* */ +/* \brief */ +/* Form the Mb partition information structure, to be used by the MC */ +/* routine */ +/* */ +/* \return */ +/* None */ +/* \note */ +/* c_bufx is used to select PredBuffer, */ +/* if it's only Forward/Backward prediction always buffer used is */ +/* puc_MbLumaPredBuffer[0 to X1],pu1_mb_cb_pred_buffer[0 to X1] and */ +/* pu1_mb_cr_pred_buffer[0 to X1] */ +/* */ +/* if it's bidirect for forward ..PredBuffer[0 to X1] buffer is used and */ +/* ..PredBuffer[X2 to X3] for backward prediction. and */ +/* */ +/* Final predicted samples values are the average of ..PredBuffer[0 to X1]*/ +/* and ..PredBuffer[X2 to X3] */ +/* */ +/* X1 is 255 for Luma and 63 for Chroma */ +/* X2 is 256 for Luma and 64 for Chroma */ +/* X3 is 511 for Luma and 127 for Chroma */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 11 05 2005 SWRN Modified to handle pod */ +/*****************************************************************************/ + +WORD32 ih264d_form_mb_part_info_bp(pred_info_pkd_t *ps_pred_pkd, + dec_struct_t * ps_dec, + UWORD16 u2_mb_x, + UWORD16 u2_mb_y, + WORD32 mb_index, + dec_mb_info_t *ps_cur_mb_info) +{ + /* The reference buffer pointer */ + WORD32 i2_frm_x, i2_frm_y; + WORD32 i2_tmp_mv_x, i2_tmp_mv_y; + WORD32 i2_rec_x, i2_rec_y; + + WORD32 u2_pic_ht; + WORD32 u2_frm_wd; + WORD32 u2_rec_wd; + UWORD8 u1_sub_x = 0,u1_sub_y=0 ; + UWORD8 u1_part_wd = 0,u1_part_ht = 0; + WORD16 i2_mv_x,i2_mv_y; + + + + /********************************************/ + /* i1_mc_wd width reqd for mcomp */ + /* u1_dma_ht height reqd for mcomp */ + /* u1_dma_wd width aligned to 4 bytes */ + /* u1_dx fractional part of width */ + /* u1_dx fractional part of height */ + /********************************************/ + WORD32 u1_ofst_in_word; + UWORD32 i1_mc_wd; + + WORD32 u1_dma_ht; + + UWORD32 u1_dma_wd; + UWORD32 u1_dx; + UWORD32 u1_dy; + pred_info_t * ps_pred = ps_dec->ps_pred + ps_dec->u4_pred_info_idx; + dec_slice_params_t * const ps_cur_slice = ps_dec->ps_cur_slice; + tfr_ctxt_t *ps_frame_buf; + struct pic_buffer_t *ps_ref_frm; + UWORD8 u1_scale_ref,u1_mbaff,u1_field; + pic_buffer_t **pps_ref_frame; + WORD8 i1_size_pos_info,i1_buf_id; + + PROFILE_DISABLE_MB_PART_INFO() + + UNUSED(ps_cur_mb_info); + i1_size_pos_info = ps_pred_pkd->i1_size_pos_info; + GET_XPOS_PRED(u1_sub_x,i1_size_pos_info); + GET_YPOS_PRED(u1_sub_y,i1_size_pos_info); + GET_WIDTH_PRED(u1_part_wd,i1_size_pos_info); + GET_HEIGHT_PRED(u1_part_ht,i1_size_pos_info); + i2_mv_x = ps_pred_pkd->i2_mv[0]; + i2_mv_y = ps_pred_pkd->i2_mv[1]; + i1_buf_id = ps_pred_pkd->i1_buf_id; + + + ps_ref_frm = ps_dec->apv_buf_id_pic_buf_map[i1_buf_id]; + + + { + ps_frame_buf = &ps_dec->s_tran_addrecon; + } + + + /* Transfer Setup Y */ + { + UWORD8 *pu1_pred, *pu1_rec; + + /* calculating rounded motion vectors and fractional components */ + i2_tmp_mv_x = i2_mv_x; + i2_tmp_mv_y = i2_mv_y; + u1_dx = i2_tmp_mv_x & 0x3; + u1_dy = i2_tmp_mv_y & 0x3; + i2_tmp_mv_x >>= 2; + i2_tmp_mv_y >>= 2; + i1_mc_wd = u1_part_wd << 2; + u1_dma_ht = u1_part_ht << 2; + if(u1_dx) + { + i2_tmp_mv_x -= 2; + i1_mc_wd += 5; + } + if(u1_dy) + { + i2_tmp_mv_y -= 2; + u1_dma_ht += 5; + } + + /********************************************************************/ + /* Calulating the horizontal and the vertical u4_ofst from top left */ + /* edge of the reference frame, and subsequent clipping */ + /********************************************************************/ + u2_pic_ht = ps_dec->u2_pic_ht; + u2_frm_wd = ps_dec->u2_frm_wd_y; + i2_rec_x = u1_sub_x << 2; + i2_rec_y = u1_sub_y << 2; + + i2_frm_x = (u2_mb_x << 4) + i2_rec_x + i2_tmp_mv_x; + i2_frm_y = (u2_mb_y << 4) + i2_rec_y + i2_tmp_mv_y; + + i2_frm_x = CLIP3(MAX_OFFSET_OUTSIDE_X_FRM, (ps_dec->u2_pic_wd - 1), + i2_frm_x); + i2_frm_y = CLIP3(((1 - u1_dma_ht)), (u2_pic_ht - (1)), i2_frm_y); + + pu1_pred = ps_ref_frm->pu1_buf1 + i2_frm_y * u2_frm_wd + i2_frm_x; + + + u1_ofst_in_word = 0; + u1_dma_wd = (i1_mc_wd + u1_ofst_in_word + 3) & 0xFC; + + /********************************************************************/ + /* Calulating the horizontal and the vertical u4_ofst from top left */ + /* edge of the recon buffer */ + /********************************************************************/ + /* CHANGED CODE */ + u2_rec_wd = MB_SIZE; + { + u2_rec_wd = ps_dec->u2_frm_wd_y; + i2_rec_x += (mb_index << 4); + pu1_rec = ps_frame_buf->pu1_dest_y + i2_rec_y * u2_rec_wd + + i2_rec_x; + } + + /* CHANGED CODE */ + + /* filling the pred and dma structures for Y */ + u2_frm_wd = ps_dec->u2_frm_wd_y; + + ps_pred->u2_u1_ref_buf_wd = u1_dma_wd; + ps_pred->i1_dma_ht = u1_dma_ht; + ps_pred->i1_mc_wd = i1_mc_wd; + ps_pred->u2_frm_wd = u2_frm_wd; + ps_pred->pu1_rec_y_u = pu1_rec; + ps_pred->u2_dst_stride = u2_rec_wd; + + ps_pred->i1_mb_partwidth = u1_part_wd << 2; + ps_pred->i1_mb_partheight = u1_part_ht << 2; + ps_pred->u1_mc_addr_ofst = u1_ofst_in_word; + ps_pred->u1_dydx = (u1_dy << 2) + u1_dx; + + ps_pred->pu1_y_ref = pu1_pred; + + } + + /* Increment ps_pred index */ + ps_pred++; + + /* Transfer Setup U & V */ + { + WORD32 i4_ref_offset, i4_rec_offset; + UWORD8 *pu1_pred_u, *pu1_pred_v; + + + /* calculating rounded motion vectors and fractional components */ + i2_tmp_mv_x = i2_mv_x; + i2_tmp_mv_y = i2_mv_y; + + /************************************************************************/ + /* Table 8-9: Derivation of the vertical component of the chroma vector */ + /* in field coding mode */ + /************************************************************************/ + + /* Eighth sample of the chroma MV */ + u1_dx = i2_tmp_mv_x & 0x7; + u1_dy = i2_tmp_mv_y & 0x7; + + /********************************************************************/ + /* Calculating the full pel MV for chroma which is 1/2 of the Luma */ + /* MV in full pel units */ + /********************************************************************/ + i2_mv_x = i2_tmp_mv_x; + i2_mv_y = i2_tmp_mv_y; + i2_tmp_mv_x = SIGN_POW2_DIV(i2_tmp_mv_x, 3); + i2_tmp_mv_y = SIGN_POW2_DIV(i2_tmp_mv_y, 3); + i1_mc_wd = u1_part_wd << 1; + u1_dma_ht = u1_part_ht << 1; + if(u1_dx) + { + i2_tmp_mv_x -= (i2_mv_x < 0); + i1_mc_wd++; + } + if(u1_dy != 0) + { + i2_tmp_mv_y -= (i2_mv_y < 0); + u1_dma_ht++; + } + + /********************************************************************/ + /* Calulating the horizontal and the vertical u4_ofst from top left */ + /* edge of the reference frame, and subsequent clipping */ + /********************************************************************/ + u2_pic_ht >>= 1; + u2_frm_wd = ps_dec->u2_frm_wd_uv; + i2_rec_x = u1_sub_x << 1; + i2_rec_y = u1_sub_y << 1; + + i2_frm_x = (u2_mb_x << 3) + i2_rec_x + i2_tmp_mv_x; + i2_frm_y = (u2_mb_y << 3) + i2_rec_y + i2_tmp_mv_y; + + i2_frm_x = CLIP3(MAX_OFFSET_OUTSIDE_UV_FRM, + ((ps_dec->u2_pic_wd >> 1) - 1), i2_frm_x); + i2_frm_y = CLIP3(((1 - u1_dma_ht)), (u2_pic_ht - (1)), i2_frm_y); + + i4_ref_offset = i2_frm_y * u2_frm_wd + i2_frm_x * YUV420SP_FACTOR; + u1_ofst_in_word = 0; + u1_dma_wd = (i1_mc_wd + u1_ofst_in_word + 3) & 0xFC; + i4_ref_offset -= u1_ofst_in_word; + + /********************************************************************/ + /* Calulating the horizontal and the vertical u4_ofst from top left */ + /* edge of the recon buffer */ + /********************************************************************/ + /* CHANGED CODE */ + u2_rec_wd = BLK8x8SIZE * YUV420SP_FACTOR; + i4_rec_offset = i2_rec_y * u2_rec_wd + i2_rec_x * YUV420SP_FACTOR; + + { + u2_rec_wd = ps_dec->u2_frm_wd_uv; + i2_rec_x += (mb_index << 3); + i4_rec_offset = i2_rec_y * u2_rec_wd + i2_rec_x * YUV420SP_FACTOR; + ps_pred->pu1_rec_y_u = ps_frame_buf->pu1_dest_u + i4_rec_offset; + ps_pred->u1_pi1_wt_ofst_rec_v = ps_frame_buf->pu1_dest_v + + i4_rec_offset; + } + + /* CHANGED CODE */ + + /* filling the common pred structures for U */ + u2_frm_wd = ps_dec->u2_frm_wd_uv; + + ps_pred->u2_u1_ref_buf_wd = u1_dma_wd; + ps_pred->i1_dma_ht = u1_dma_ht; + ps_pred->i1_mc_wd = i1_mc_wd; + + ps_pred->u2_frm_wd = u2_frm_wd; + ps_pred->u2_dst_stride = u2_rec_wd; + + ps_pred->i1_mb_partwidth = u1_part_wd << 1; + ps_pred->i1_mb_partheight = u1_part_ht << 1; + ps_pred->u1_mc_addr_ofst = u1_ofst_in_word; + ps_pred->u1_dydx = (u1_dy << 3) + u1_dx; + + pu1_pred_u = ps_ref_frm->pu1_buf2 + i4_ref_offset; + pu1_pred_v = ps_ref_frm->pu1_buf3 + i4_ref_offset; + + /* Copy U & V partitions */ + ps_pred->pu1_u_ref = pu1_pred_u; + + /* Increment the reference buffer Index */ + ps_pred->pu1_v_ref = pu1_pred_v; + } + + /* Increment ps_pred index */ + ps_dec->u4_pred_info_idx += 2; + + return OK; + +} + + +/*****************************************************************************/ +/* \if Function name : formMbPartInfo \endif */ +/* */ +/* \brief */ +/* Form the Mb partition information structure, to be used by the MC */ +/* routine */ +/* */ +/* \return */ +/* None */ +/* \note */ +/* c_bufx is used to select PredBuffer, */ +/* if it's only Forward/Backward prediction always buffer used is */ +/* puc_MbLumaPredBuffer[0 to X1],pu1_mb_cb_pred_buffer[0 to X1] and */ +/* pu1_mb_cr_pred_buffer[0 to X1] */ +/* */ +/* if it's bidirect for forward ..PredBuffer[0 to X1] buffer is used and */ +/* ..PredBuffer[X2 to X3] for backward prediction. and */ +/* */ +/* Final predicted samples values are the average of ..PredBuffer[0 to X1]*/ +/* and ..PredBuffer[X2 to X3] */ +/* */ +/* X1 is 255 for Luma and 63 for Chroma */ +/* X2 is 256 for Luma and 64 for Chroma */ +/* X3 is 511 for Luma and 127 for Chroma */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 11 05 2005 SWRN Modified to handle pod */ +/*****************************************************************************/ +WORD32 ih264d_form_mb_part_info_mp(pred_info_pkd_t *ps_pred_pkd, + dec_struct_t * ps_dec, + UWORD16 u2_mb_x, + UWORD16 u2_mb_y, + WORD32 mb_index, + dec_mb_info_t *ps_cur_mb_info) +{ + /* The reference buffer pointer */ + UWORD8 *pu1_ref_buf; + WORD16 i2_frm_x, i2_frm_y, i2_tmp_mv_x, i2_tmp_mv_y, i2_pod_ht; + WORD16 i2_rec_x, i2_rec_y; + UWORD16 u2_pic_ht, u2_frm_wd, u2_rec_wd; + UWORD8 u1_wght_pred_type, u1_wted_bipred_idc; + UWORD16 u2_tot_ref_scratch_size; + UWORD8 u1_sub_x = 0; + UWORD8 u1_sub_y = 0; + UWORD8 u1_is_bi_dir = 0; + + /********************************************/ + /* i1_mc_wd width reqd for mcomp */ + /* u1_dma_ht height reqd for mcomp */ + /* u1_dma_wd width aligned to 4 bytes */ + /* u1_dx fractional part of width */ + /* u1_dx fractional part of height */ + /********************************************/ + UWORD8 u1_ofst_in_word, i1_mc_wd, u1_dma_ht, u1_dma_wd, u1_dx, u1_dy; + pred_info_t * ps_pred ; + dec_slice_params_t * const ps_cur_slice = ps_dec->ps_cur_slice; + const UWORD8 u1_slice_type = ps_cur_slice->u1_slice_type; + UWORD8 u1_pod_bot, u1_pod_top; + + /* load the pictype for pod u4_flag & chroma motion vector derivation */ + UWORD8 u1_ref_pic_type ; + + /* set default value to flags specifying field nature of picture & mb */ + UWORD32 u1_mb_fld = 0, u1_mb_or_pic_fld; + UWORD32 u1_mb_bot = 0, u1_pic_bot = 0, u1_mb_or_pic_bot; + tfr_ctxt_t *ps_frame_buf; + /* calculate flags specifying field nature of picture & mb */ + const UWORD32 u1_pic_fld = ps_cur_slice->u1_field_pic_flag; + WORD8 i1_pred; + WORD8 i1_size_pos_info,i1_buf_id,i1_ref_idx; + UWORD8 u1_part_wd,u1_part_ht; + WORD16 i2_mv_x,i2_mv_y; + struct pic_buffer_t *ps_ref_frm; + UWORD32 *pu4_wt_offset; + UWORD8 *pu1_buf1,*pu1_buf2,*pu1_buf3; + + + PROFILE_DISABLE_MB_PART_INFO() + + ps_pred = ps_dec->ps_pred + ps_dec->u4_pred_info_idx; + + + i1_size_pos_info = ps_pred_pkd->i1_size_pos_info; + GET_XPOS_PRED(u1_sub_x,i1_size_pos_info); + GET_YPOS_PRED(u1_sub_y,i1_size_pos_info); + GET_WIDTH_PRED(u1_part_wd,i1_size_pos_info); + GET_HEIGHT_PRED(u1_part_ht,i1_size_pos_info); + i2_mv_x = ps_pred_pkd->i2_mv[0]; + i2_mv_y = ps_pred_pkd->i2_mv[1]; + i1_ref_idx = ps_pred_pkd->i1_ref_idx_info & 0x3f; + i1_buf_id = ps_pred_pkd->i1_buf_id; + ps_ref_frm = ps_dec->apv_buf_id_pic_buf_map[i1_buf_id]; + + i1_pred = (ps_pred_pkd->i1_ref_idx_info & 0xC0) >> 6; + u1_is_bi_dir = (i1_pred == BI_PRED); + + + u1_ref_pic_type = ps_pred_pkd->u1_pic_type & PIC_MASK; + + pu1_buf1 = ps_ref_frm->pu1_buf1; + pu1_buf2 = ps_ref_frm->pu1_buf2; + pu1_buf3 = ps_ref_frm->pu1_buf3; + + if(u1_ref_pic_type == BOT_FLD) + { + pu1_buf1 += ps_ref_frm->u2_frm_wd_y; + pu1_buf2 += ps_ref_frm->u2_frm_wd_uv; + pu1_buf3 += ps_ref_frm->u2_frm_wd_uv; + + } + + + + if(ps_dec->ps_cur_pps->u1_wted_pred_flag) + { + pu4_wt_offset = (UWORD32*)&ps_dec->pu4_wt_ofsts[2 + * X3(i1_ref_idx)]; + } + + + pu4_wt_offset = ps_pred_pkd->pu4_wt_offst; + + + /* Pointer to the frame buffer */ + { + ps_frame_buf = &ps_dec->s_tran_addrecon; + /* CHANGED CODE */ + } + + if(!u1_pic_fld) + { + u1_mb_fld = ps_cur_mb_info->u1_mb_field_decodingflag; + u1_mb_bot = 1 - ps_cur_mb_info->u1_topmb; + } + else + u1_pic_bot = ps_cur_slice->u1_bottom_field_flag; + + /****************************************************************/ + /* calculating the flags the tell whether to use frame-padding */ + /* or use software pad-on-demand */ + /****************************************************************/ + u1_mb_or_pic_bot = u1_mb_bot | u1_pic_bot; + u1_mb_or_pic_fld = u1_mb_fld | u1_pic_fld; + u1_pod_bot = u1_mb_or_pic_fld && (u1_ref_pic_type == TOP_FLD); + u1_pod_top = u1_mb_or_pic_fld && (u1_ref_pic_type == BOT_FLD); + + /* Weighted Pred additions */ + u1_wted_bipred_idc = ps_dec->ps_cur_pps->u1_wted_bipred_idc; + + if((u1_slice_type == P_SLICE) || (u1_slice_type == SP_SLICE)) + { + /* P Slice only */ + u1_wght_pred_type = ps_dec->ps_cur_pps->u1_wted_pred_flag; + + } + else + { + /* B Slice only */ + u1_wght_pred_type = 1 + u1_is_bi_dir; + if(u1_wted_bipred_idc == 0) + u1_wght_pred_type = 0; + if((u1_wted_bipred_idc == 2) && (!u1_is_bi_dir)) + u1_wght_pred_type = 0; + } + /* load the scratch reference buffer index */ + pu1_ref_buf = ps_dec->pu1_ref_buff + ps_dec->u4_dma_buf_idx; + u2_tot_ref_scratch_size = 0; + + + /* Transfer Setup Y */ + { + UWORD8 *pu1_pred, *pu1_rec; + /* calculating rounded motion vectors and fractional components */ + i2_tmp_mv_x = i2_mv_x; + i2_tmp_mv_y = i2_mv_y; + + u1_dx = i2_tmp_mv_x & 0x3; + u1_dy = i2_tmp_mv_y & 0x3; + i2_tmp_mv_x >>= 2; + i2_tmp_mv_y >>= 2; + i1_mc_wd = u1_part_wd << 2; + u1_dma_ht = u1_part_ht << 2; + if(u1_dx) + { + i2_tmp_mv_x -= 2; + i1_mc_wd += 5; + } + if(u1_dy) + { + i2_tmp_mv_y -= 2; + u1_dma_ht += 5; + } + + /********************************************************************/ + /* Calulating the horizontal and the vertical u4_ofst from top left */ + /* edge of the reference frame, and subsequent clipping */ + /********************************************************************/ + u2_pic_ht = ps_dec->u2_pic_ht >> u1_pic_fld; + u2_frm_wd = ps_dec->u2_frm_wd_y << u1_pic_fld; + i2_frm_x = (u2_mb_x << 4) + (u1_sub_x << 2) + i2_tmp_mv_x; + i2_frm_y = ((u2_mb_y + (u1_mb_bot && !u1_mb_fld)) << 4) + + (((u1_sub_y << 2) + i2_tmp_mv_y) << u1_mb_fld); + + i2_frm_x = CLIP3(MAX_OFFSET_OUTSIDE_X_FRM, (ps_dec->u2_pic_wd - 1), + i2_frm_x); + i2_frm_y = CLIP3(((1 - u1_dma_ht) << u1_mb_fld), + (u2_pic_ht - (1 << u1_mb_fld)), i2_frm_y); + + pu1_pred = pu1_buf1 + i2_frm_y * u2_frm_wd + i2_frm_x; + u1_ofst_in_word = 0; + + u1_dma_wd = (i1_mc_wd + u1_ofst_in_word + 3) & 0xFC; + + /********************************************************************/ + /* Calulating the horizontal and the vertical u4_ofst from top left */ + /* edge of the recon buffer */ + /********************************************************************/ + /* CHANGED CODE */ + u2_rec_wd = MB_SIZE; + i2_rec_x = u1_sub_x << 2; + i2_rec_y = u1_sub_y << 2; + { + u2_rec_wd = ps_dec->u2_frm_wd_y << u1_mb_or_pic_fld; + i2_rec_x += (mb_index << 4); + pu1_rec = ps_frame_buf->pu1_dest_y + i2_rec_y * u2_rec_wd + + i2_rec_x; + if(u1_mb_bot) + pu1_rec += ps_dec->u2_frm_wd_y << ((u1_mb_fld) ? 0 : 4); + } + + /* CHANGED CODE */ + + /* filling the pred and dma structures for Y */ + u2_frm_wd = ps_dec->u2_frm_wd_y << u1_mb_or_pic_fld; + + ps_pred->pu1_dma_dest_addr = pu1_ref_buf; + ps_pred->u2_u1_ref_buf_wd = u1_dma_wd; + ps_pred->u2_frm_wd = u2_frm_wd; + ps_pred->i1_dma_ht = u1_dma_ht; + ps_pred->i1_mc_wd = i1_mc_wd; + ps_pred->pu1_rec_y_u = pu1_rec; + ps_pred->u2_dst_stride = u2_rec_wd; + + ps_pred->i1_mb_partwidth = u1_part_wd << 2; + ps_pred->i1_mb_partheight = u1_part_ht << 2; + ps_pred->u1_mc_addr_ofst = u1_ofst_in_word; + ps_pred->u1_dydx = (u1_dy << 2) + u1_dx; + ps_pred->u1_is_bi_direct = u1_is_bi_dir; + ps_pred->u1_pi1_wt_ofst_rec_v = (UWORD8 *)pu4_wt_offset; + ps_pred->u1_wght_pred_type = u1_wght_pred_type; + ps_pred->i1_pod_ht = 0; + + /* Increment the Reference buffer Indices */ + pu1_ref_buf += u1_dma_wd * u1_dma_ht; + u2_tot_ref_scratch_size += u1_dma_wd * u1_dma_ht; + + /* unrestricted field motion comp for top region outside frame */ + i2_pod_ht = (-i2_frm_y) >> u1_mb_fld; + if((i2_pod_ht > 0) && u1_pod_top) + { + ps_pred->i1_pod_ht = (WORD8)(-i2_pod_ht); + u1_dma_ht -= i2_pod_ht; + pu1_pred += i2_pod_ht * u2_frm_wd; + } + /* unrestricted field motion comp for bottom region outside frame */ + else if(u1_pod_bot) + { + i2_pod_ht = u1_dma_ht + ((i2_frm_y - u2_pic_ht) >> u1_mb_fld); + if(i2_pod_ht > 0) + { + u1_dma_ht -= i2_pod_ht; + ps_pred->i1_pod_ht = (WORD8)i2_pod_ht; + } + } + + /* Copy Y partition */ + + /* + * ps_pred->i1_pod_ht is non zero when MBAFF is present. In case of MBAFF the reference data + * is copied in the Scrath buffer so that the padding_on_demand doesnot corrupt the frame data + */ + if(ps_pred->i1_pod_ht) + { + ps_pred->pu1_pred = pu1_pred; + ps_pred->u1_dma_ht_y = u1_dma_ht; + ps_pred->u1_dma_wd_y = u1_dma_wd; + } + ps_pred->pu1_y_ref = pu1_pred; + } + + + + /* Increment ps_pred index */ + ps_pred++; + + /* Transfer Setup U & V */ + { + WORD32 i4_ref_offset, i4_rec_offset; + UWORD8 *pu1_pred_u, *pu1_pred_v, u1_tmp_dma_ht; + /* CHANGED CODE */ + UWORD8 u1_chroma_cbp = (UWORD8)(ps_cur_mb_info->u1_cbp >> 4); + /* CHANGED CODE */ + + /* calculating rounded motion vectors and fractional components */ + i2_tmp_mv_x = i2_mv_x; + i2_tmp_mv_y = i2_mv_y; + + /************************************************************************/ + /* Table 8-9: Derivation of the vertical component of the chroma vector */ + /* in field coding mode */ + /************************************************************************/ + if(u1_pod_bot && u1_mb_or_pic_bot) + i2_tmp_mv_y += 2; + if(u1_pod_top && !u1_mb_or_pic_bot) + i2_tmp_mv_y -= 2; + + /* Eighth sample of the chroma MV */ + u1_dx = i2_tmp_mv_x & 0x7; + u1_dy = i2_tmp_mv_y & 0x7; + + /********************************************************************/ + /* Calculating the full pel MV for chroma which is 1/2 of the Luma */ + /* MV in full pel units */ + /********************************************************************/ + i2_mv_x = i2_tmp_mv_x; + i2_mv_y = i2_tmp_mv_y; + i2_tmp_mv_x = SIGN_POW2_DIV(i2_tmp_mv_x, 3); + i2_tmp_mv_y = SIGN_POW2_DIV(i2_tmp_mv_y, 3); + i1_mc_wd = u1_part_wd << 1; + u1_dma_ht = u1_part_ht << 1; + if(u1_dx) + { + if(i2_mv_x < 0) + i2_tmp_mv_x -= 1; + i1_mc_wd++; + } + if(u1_dy != 0) + { + if(i2_mv_y < 0) + i2_tmp_mv_y -= 1; + u1_dma_ht++; + } + + /********************************************************************/ + /* Calulating the horizontal and the vertical u4_ofst from top left */ + /* edge of the reference frame, and subsequent clipping */ + /********************************************************************/ + u2_pic_ht >>= 1; + u2_frm_wd = ps_dec->u2_frm_wd_uv << u1_pic_fld; + i2_frm_x = (u2_mb_x << 3) + (u1_sub_x << 1) + i2_tmp_mv_x; + i2_frm_y = ((u2_mb_y + (u1_mb_bot && !u1_mb_fld)) << 3) + + (((u1_sub_y << 1) + i2_tmp_mv_y) << u1_mb_fld); + + i2_frm_x = CLIP3(MAX_OFFSET_OUTSIDE_UV_FRM, + ((ps_dec->u2_pic_wd >> 1) - 1), i2_frm_x); + i2_frm_y = CLIP3(((1 - u1_dma_ht) << u1_mb_fld), + (u2_pic_ht - (1 << u1_mb_fld)), i2_frm_y); + + i4_ref_offset = i2_frm_y * u2_frm_wd + i2_frm_x * YUV420SP_FACTOR; + u1_ofst_in_word = 0; + u1_dma_wd = (i1_mc_wd + u1_ofst_in_word + 3) & 0xFC; + i4_ref_offset -= u1_ofst_in_word; + + /********************************************************************/ + /* Calulating the horizontal and the vertical u4_ofst from top left */ + /* edge of the recon buffer */ + /********************************************************************/ + /* CHANGED CODE */ + u2_rec_wd = BLK8x8SIZE * YUV420SP_FACTOR; + i2_rec_x = u1_sub_x << 1; + i2_rec_y = u1_sub_y << 1; + i4_rec_offset = i2_rec_y * u2_rec_wd + i2_rec_x * YUV420SP_FACTOR; + { + u2_rec_wd = ps_dec->u2_frm_wd_uv << u1_mb_or_pic_fld; + + i2_rec_x += (mb_index << 3); + i4_rec_offset = i2_rec_y * u2_rec_wd + i2_rec_x * YUV420SP_FACTOR; + if(u1_mb_bot) + i4_rec_offset += ps_dec->u2_frm_wd_uv << ((u1_mb_fld) ? 0 : 3); + ps_pred->pu1_rec_y_u = ps_frame_buf->pu1_dest_u + i4_rec_offset; + ps_pred->u1_pi1_wt_ofst_rec_v = ps_frame_buf->pu1_dest_v + + i4_rec_offset; + + } + + /* CHANGED CODE */ + + /* filling the common pred structures for U */ + u2_frm_wd = ps_dec->u2_frm_wd_uv << u1_mb_or_pic_fld; + u1_tmp_dma_ht = u1_dma_ht; + ps_pred->u2_u1_ref_buf_wd = u1_dma_wd; + ps_pred->u2_frm_wd = u2_frm_wd; + ps_pred->i1_dma_ht = u1_dma_ht; + ps_pred->i1_mc_wd = i1_mc_wd; + ps_pred->u2_dst_stride = u2_rec_wd; + + ps_pred->i1_mb_partwidth = u1_part_wd << 1; + ps_pred->i1_mb_partheight = u1_part_ht << 1; + ps_pred->u1_mc_addr_ofst = u1_ofst_in_word; + ps_pred->u1_dydx = (u1_dy << 3) + u1_dx; + ps_pred->u1_is_bi_direct = u1_is_bi_dir; + ps_pred->u1_wght_pred_type = u1_wght_pred_type; + ps_pred->i1_pod_ht = 0; + + ps_pred->pu1_dma_dest_addr = pu1_ref_buf; + + /* unrestricted field motion comp for top region outside frame */ + i2_pod_ht = (-i2_frm_y) >> u1_mb_fld; + if((i2_pod_ht > 0) && u1_pod_top) + { + i4_ref_offset += i2_pod_ht * u2_frm_wd; + u1_dma_ht -= i2_pod_ht; + ps_pred->i1_pod_ht = (WORD8)(-i2_pod_ht); + } + /* unrestricted field motion comp for bottom region outside frame */ + else if(u1_pod_bot) + { + i2_pod_ht = u1_dma_ht + ((i2_frm_y - u2_pic_ht) >> u1_mb_fld); + if(i2_pod_ht > 0) + { + u1_dma_ht -= i2_pod_ht; + ps_pred->i1_pod_ht = (WORD8)i2_pod_ht; + } + } + + pu1_pred_u = pu1_buf2 + i4_ref_offset; + pu1_pred_v = pu1_buf3 + i4_ref_offset; + + /* Copy U & V partitions */ + if(ps_pred->i1_pod_ht) + { + ps_pred->pu1_pred_u = pu1_pred_u; + ps_pred->u1_dma_ht_uv = u1_dma_ht; + ps_pred->u1_dma_wd_uv = u1_dma_wd; + + } + ps_pred->pu1_u_ref = pu1_pred_u; + + /* Increment the reference buffer Index */ + u2_tot_ref_scratch_size += (u1_dma_wd * u1_tmp_dma_ht) << 1; + + if(ps_pred->i1_pod_ht) + { + ps_pred->pu1_pred_v = pu1_pred_v; + ps_pred->u1_dma_ht_uv = u1_dma_ht; + ps_pred->u1_dma_wd_uv = u1_dma_wd; + } + + ps_pred->pu1_v_ref = pu1_pred_v; + } + + /* Increment ps_pred index */ + ps_dec->u4_pred_info_idx += 2; + + + /* Increment the reference buffer Index */ + ps_dec->u4_dma_buf_idx += u2_tot_ref_scratch_size; + + if(ps_dec->u4_dma_buf_idx > ps_dec->u4_ref_buf_size) + return ERROR_NUM_MV; + + return OK; + + + +} + + +/*! + ************************************************************************** + * \if Function name : MotionCompensate \endif + * + * \brief + * The routine forms predictor blocks for the entire MB and stores it in + * predictor buffers.This function works only for BASELINE profile + * + * \param ps_dec: Pointer to the structure decStruct. This is used to get + * pointers to the current and the reference frame and to the MbParams + * structure. + * + * \return + * None + * + * \note + * The routine forms predictors for all the luma and the chroma MB + * partitions. + ************************************************************************** + */ + +void ih264d_motion_compensate_bp(dec_struct_t * ps_dec, dec_mb_info_t *ps_cur_mb_info) +{ + pred_info_t *ps_pred ; + UWORD8 *puc_ref, *pu1_dest_y; + UWORD8 *pu1_dest_u; + UWORD32 u2_num_pels, u2_ref_wd_y, u2_ref_wd_uv, u2_dst_wd; + + UWORD32 u4_wd_y, u4_ht_y, u4_wd_uv; + UWORD32 u4_ht_uv; + UWORD8 *puc_pred0 = (UWORD8 *)(ps_dec->pi2_pred1); + + + PROFILE_DISABLE_INTER_PRED() + UNUSED(ps_cur_mb_info); + ps_pred = ps_dec->ps_pred ; + + for(u2_num_pels = 0; u2_num_pels < 256;) + { + UWORD32 uc_dx, uc_dy; + /* Pointer to the destination buffer. If the CBPs of all 8x8 blocks in + the MB partition are zero then it would be better to copy the + predictor valus directly to the current frame buffer */ + /* + * ps_pred->i1_pod_ht is non zero when MBAFF is present. In case of MBAFF the reference data + * is copied in the Scrath buffer so that the padding_on_demand doesnot corrupt the frame data + */ + + u2_ref_wd_y = ps_pred->u2_frm_wd; + puc_ref = ps_pred->pu1_y_ref; + if(ps_pred->u1_dydx & 0x3) + puc_ref += 2; + if(ps_pred->u1_dydx >> 2) + puc_ref += 2 * u2_ref_wd_y; + + u4_wd_y = ps_pred->i1_mb_partwidth; + u4_ht_y = ps_pred->i1_mb_partheight; + uc_dx = ps_pred->u1_dydx; + uc_dy = uc_dx >> 2; + uc_dx &= 0x3; + + pu1_dest_y = ps_pred->pu1_rec_y_u; + u2_dst_wd = ps_pred->u2_dst_stride; + + ps_dec->apf_inter_pred_luma[ps_pred->u1_dydx](puc_ref, pu1_dest_y, + u2_ref_wd_y, + u2_dst_wd, + u4_ht_y, + u4_wd_y, puc_pred0, + ps_pred->u1_dydx); + + ps_pred++; + + /* Interpolate samples for the chroma components */ + { + UWORD8 *pu1_ref_u; + + u2_ref_wd_uv = ps_pred->u2_frm_wd; + pu1_ref_u = ps_pred->pu1_u_ref + ps_pred->u1_mc_addr_ofst; + + u4_wd_uv = ps_pred->i1_mb_partwidth; + u4_ht_uv = ps_pred->i1_mb_partheight; + uc_dx = ps_pred->u1_dydx; /* 8*dy + dx */ + uc_dy = uc_dx >> 3; + uc_dx &= 0x7; + + pu1_dest_u = ps_pred->pu1_rec_y_u; + u2_dst_wd = ps_pred->u2_dst_stride; + + ps_pred++; + ps_dec->pf_inter_pred_chroma(pu1_ref_u, pu1_dest_u, u2_ref_wd_uv, + u2_dst_wd, uc_dx, uc_dy, + u4_ht_uv, u4_wd_uv); + + } + + u2_num_pels += (UWORD8)u4_wd_y * (UWORD8)u4_ht_y; + + } +} + + +/* + ************************************************************************** + * \if Function name : MotionCompensateB \endif + * + * \brief + * The routine forms predictor blocks for the entire MB and stores it in + * predictor buffers. + * + * \param ps_dec: Pointer to the structure decStruct. This is used to get + * pointers to the current and the reference frame and to the MbParams + * structure. + * + * \return + * None + * + * \note + * The routine forms predictors for all the luma and the chroma MB + * partitions. + ************************************************************************** + */ + +void ih264d_motion_compensate_mp(dec_struct_t * ps_dec, dec_mb_info_t *ps_cur_mb_info) +{ + pred_info_t *ps_pred ; + pred_info_t *ps_pred_y_forw, *ps_pred_y_back, *ps_pred_cr_forw; + UWORD8 *puc_ref, *pu1_dest_y, *puc_pred0, *puc_pred1; + UWORD8 *pu1_dest_u, *pu1_dest_v; + WORD16 *pi16_intm; + UWORD32 u2_num_pels, u2_ref_wd_y, u2_ref_wd_uv, u2_dst_wd; + UWORD32 u2_dest_wd_y, u2_dest_wd_uv; + UWORD32 u2_row_buf_wd_y = ps_dec->u2_mb_group_cols_y1; + UWORD32 u2_row_buf_wd_uv = ps_dec->u2_mb_group_cols_cr1; + UWORD32 u2_log2Y_crwd = ps_dec->ps_cur_slice->u2_log2Y_crwd; + UWORD32 u4_wd_y, u4_ht_y, u1_dir, u4_wd_uv; + UWORD32 u4_ht_uv; + UWORD8 *pu1_temp_mc_buffer = ps_dec->pu1_temp_mc_buffer; + WORD32 i2_pod_ht; + UWORD32 u2_pic_ht, u2_frm_wd, u2_rec_wd; + UWORD32 u1_pod_bot, u1_pod_top; + UWORD8 *pu1_pred, *pu1_dma_dst; + UWORD32 u1_dma_wd, u1_dma_ht; + + dec_slice_params_t * const ps_cur_slice = ps_dec->ps_cur_slice; + + /* set default value to flags specifying field nature of picture & mb */ + UWORD32 u1_mb_fld = 0, u1_mb_or_pic_fld; + UWORD32 u1_mb_or_pic_bot; + /* calculate flags specifying field nature of picture & mb */ + const UWORD8 u1_pic_fld = ps_cur_slice->u1_field_pic_flag; + + PROFILE_DISABLE_INTER_PRED() + ps_pred = ps_dec->ps_pred ; + /* Initialize both ps_pred_y_forw an y_back to avoid static analysis warnigns */ + ps_pred_y_forw = ps_pred; + ps_pred_y_back = ps_pred; + + if(ps_dec->u1_separate_parse) + u2_log2Y_crwd = ps_dec->ps_decode_cur_slice->u2_log2Y_crwd; + + if(!u1_pic_fld) + { + u1_mb_fld = ps_cur_mb_info->u1_mb_field_decodingflag; + } + + u1_mb_or_pic_fld = u1_mb_fld | u1_pic_fld; + + pi16_intm = ps_dec->pi2_pred1; + puc_pred0 = (UWORD8 *)pi16_intm; + puc_pred1 = puc_pred0 + MB_SIZE * MB_SIZE; + + for(u2_num_pels = 0; u2_num_pels < 256;) + { + UWORD8 uc_dx, uc_dy; + const UWORD8 u1_is_bi_direct = ps_pred->u1_is_bi_direct; + for(u1_dir = 0; u1_dir <= u1_is_bi_direct; u1_dir++) + { + /* Pointer to the destination buffer. If the CBPs of all 8x8 blocks in + the MB partition are zero then it would be better to copy the + predictor valus directly to the current frame buffer */ + /* + * ps_pred->i1_pod_ht is non zero when MBAFF is present. In case of MBAFF the reference data + * is copied in the Scrath buffer so that the padding_on_demand doesnot corrupt the frame data + */ + + if(ps_pred->i1_pod_ht) + { + u2_ref_wd_y = ps_pred->u2_u1_ref_buf_wd; + puc_ref = ps_pred->pu1_dma_dest_addr; + } + else + { + u2_ref_wd_y = ps_pred->u2_frm_wd; + puc_ref = ps_pred->pu1_y_ref; + + } + + if(ps_pred->u1_dydx & 0x3) + puc_ref += 2; + if(ps_pred->u1_dydx >> 2) + puc_ref += 2 * u2_ref_wd_y; + u4_wd_y = ps_pred->i1_mb_partwidth; + u4_ht_y = ps_pred->i1_mb_partheight; + + if(ps_pred->i1_pod_ht) + { + pu1_pred = ps_pred->pu1_pred; + pu1_dma_dst = ps_pred->pu1_dma_dest_addr; + u1_dma_wd = ps_pred->u1_dma_wd_y; + u1_dma_ht = ps_pred->u1_dma_ht_y; + u2_frm_wd = ps_dec->u2_frm_wd_y << u1_mb_or_pic_fld; + } + + uc_dx = ps_pred->u1_dydx; + uc_dy = uc_dx >> 2; + uc_dx &= 0x3; + if(u1_dir == 0) + { + pu1_dest_y = ps_pred->pu1_rec_y_u; + u2_row_buf_wd_y = ps_pred->u2_dst_stride; + u2_dst_wd = ps_pred->u2_dst_stride; + u2_dest_wd_y = u2_dst_wd; + ps_pred_y_forw = ps_pred; + } + else + { + pu1_dest_y = pu1_temp_mc_buffer; + u2_dst_wd = MB_SIZE; + u2_dest_wd_y = u2_dst_wd; + ps_pred_y_back = ps_pred; + ps_pred_y_back->pu1_rec_y_u = pu1_dest_y; + } + + /* padding on demand (POD) for y done here */ + + if(ps_pred->i1_pod_ht) + { + if(ps_pred->i1_pod_ht < 0) + { + pu1_dma_dst = + pu1_dma_dst + - (ps_pred->i1_pod_ht + * ps_pred->u2_u1_ref_buf_wd); + } + ih264d_copy_2d1d(pu1_pred, pu1_dma_dst, u2_frm_wd, u1_dma_wd, + u1_dma_ht); + ih264d_pad_on_demand(ps_pred, LUM_BLK); + } + ps_dec->apf_inter_pred_luma[ps_pred->u1_dydx](puc_ref, pu1_dest_y, + u2_ref_wd_y, + u2_dst_wd, + u4_ht_y, + u4_wd_y, + puc_pred0, + ps_pred->u1_dydx); + ps_pred++; + + /* Interpolate samples for the chroma components */ + { + UWORD8 *pu1_ref_u; + UWORD32 u1_dma_ht; + + /* padding on demand (POD) for U and V done here */ + u1_dma_ht = ps_pred->i1_dma_ht; + + if(ps_pred->i1_pod_ht) + { + pu1_pred = ps_pred->pu1_pred_u; + pu1_dma_dst = ps_pred->pu1_dma_dest_addr; + u1_dma_ht = ps_pred->u1_dma_ht_uv; + u1_dma_wd = ps_pred->u1_dma_wd_uv * YUV420SP_FACTOR; + u2_frm_wd = ps_dec->u2_frm_wd_uv << u1_mb_or_pic_fld; + if(ps_pred->i1_pod_ht < 0) + { + /*Top POD*/ + pu1_dma_dst -= (ps_pred->i1_pod_ht + * ps_pred->u2_u1_ref_buf_wd + * YUV420SP_FACTOR); + } + + ih264d_copy_2d1d(pu1_pred, pu1_dma_dst, u2_frm_wd, + u1_dma_wd, u1_dma_ht); + + pu1_dma_dst += (ps_pred->i1_dma_ht + * ps_pred->u2_u1_ref_buf_wd); + pu1_pred = ps_pred->pu1_pred_v; + + ih264d_pad_on_demand(ps_pred, CHROM_BLK); + } + + if(ps_pred->i1_pod_ht) + { + pu1_ref_u = ps_pred->pu1_dma_dest_addr; + + u2_ref_wd_uv = ps_pred->u2_u1_ref_buf_wd + * YUV420SP_FACTOR; + } + else + { + u2_ref_wd_uv = ps_pred->u2_frm_wd; + pu1_ref_u = ps_pred->pu1_u_ref; + + } + + u4_wd_uv = ps_pred->i1_mb_partwidth; + u4_ht_uv = ps_pred->i1_mb_partheight; + uc_dx = ps_pred->u1_dydx; /* 8*dy + dx */ + uc_dy = uc_dx >> 3; + uc_dx &= 0x7; + if(u1_dir == 0) + { + pu1_dest_u = ps_pred->pu1_rec_y_u; + + pu1_dest_v = ps_pred->u1_pi1_wt_ofst_rec_v; + u2_row_buf_wd_uv = ps_pred->u2_dst_stride; + u2_dst_wd = ps_pred->u2_dst_stride; + u2_dest_wd_uv = u2_dst_wd; + ps_pred_cr_forw = ps_pred; + } + else + { + pu1_dest_u = puc_pred0; + + pu1_dest_v = puc_pred1; + u2_dest_wd_uv = BUFFER_WIDTH; + u2_dst_wd = BUFFER_WIDTH; + ps_pred->pu1_rec_y_u = pu1_dest_u; + ps_pred->u1_pi1_wt_ofst_rec_v = pu1_dest_v; + } + + ps_pred++; + ps_dec->pf_inter_pred_chroma(pu1_ref_u, pu1_dest_u, + u2_ref_wd_uv, u2_dst_wd, + uc_dx, uc_dy, u4_ht_uv, + u4_wd_uv); + + if(ps_cur_mb_info->u1_Mux == 1) + { + /******************************************************************/ + /* padding on demand (POD) for U and V done here */ + /* ps_pred now points to the Y entry of the 0,0 component */ + /* Y need not be checked for POD because Y lies within */ + /* the picture((0,0) mv for Y doesnot get changed. But (0,0) for */ + /* U and V can need POD beacause of cross-field mv adjustments */ + /* (Table 8-9 of standard) */ + /******************************************************************/ + if((ps_pred + 1)->i1_pod_ht) + + { + + pu1_pred = (ps_pred + 1)->pu1_pred_u; + pu1_dma_dst = (ps_pred + 1)->pu1_dma_dest_addr; + u1_dma_ht = (ps_pred + 1)->u1_dma_ht_uv; + u1_dma_wd = (ps_pred + 1)->u1_dma_wd_uv + * YUV420SP_FACTOR; + u2_frm_wd = ps_dec->u2_frm_wd_uv << u1_mb_or_pic_fld; + if((ps_pred + 1)->i1_pod_ht < 0) + { + /*Top POD*/ + pu1_dma_dst -= ((ps_pred + 1)->i1_pod_ht + * (ps_pred + 1)->u2_u1_ref_buf_wd + * YUV420SP_FACTOR); + } + ih264d_copy_2d1d(pu1_pred, pu1_dma_dst, u2_frm_wd, + u1_dma_wd, u1_dma_ht); + pu1_dma_dst += ((ps_pred + 1)->i1_dma_ht + * (ps_pred + 1)->u2_u1_ref_buf_wd); //(u1_dma_ht * u1_dma_wd);// + pu1_pred = (ps_pred + 1)->pu1_pred_v; + ih264d_pad_on_demand(ps_pred + 1, CHROM_BLK); + + } + + ih264d_multiplex_ref_data(ps_dec, ps_pred, pu1_dest_y, + pu1_dest_u, pu1_dest_v, ps_cur_mb_info, + u2_dest_wd_y, u2_dest_wd_uv, + u1_dir); + ps_pred += 2; + } + } + } + if(u1_dir != 0) + u2_ref_wd_y = MB_SIZE; + + u2_num_pels += u4_wd_y * u4_ht_y; + /* if BI_DIRECT, average the two pred's, and put in ..PredBuffer[0] */ + if((u1_is_bi_direct != 0) || (ps_pred_y_forw->u1_wght_pred_type != 0)) + { + + switch(ps_pred_y_forw->u1_wght_pred_type) + { + case 0: + ps_dec->pf_default_weighted_pred_luma( + ps_pred_y_forw->pu1_rec_y_u, pu1_dest_y, + ps_pred_y_forw->pu1_rec_y_u, + u2_row_buf_wd_y, u2_ref_wd_y, + u2_row_buf_wd_y, u4_ht_uv * 2, + u4_wd_uv * 2); + + ps_dec->pf_default_weighted_pred_chroma( + ps_pred_cr_forw->pu1_rec_y_u, pu1_dest_u, + ps_pred_cr_forw->pu1_rec_y_u, + u2_row_buf_wd_uv, u2_dst_wd, + u2_row_buf_wd_uv, u4_ht_uv, + u4_wd_uv); + + break; + case 1: + { + UWORD32 *pu4_weight_ofst = + (UWORD32*)ps_pred_y_forw->u1_pi1_wt_ofst_rec_v; + UWORD32 u4_wt_ofst_u, u4_wt_ofst_v; + UWORD32 u4_wt_ofst_y = + (UWORD32)(pu4_weight_ofst[0]); + WORD32 weight = (WORD16)(u4_wt_ofst_y & 0xffff); + WORD32 ofst = (WORD8)(u4_wt_ofst_y >> 16); + + ps_dec->pf_weighted_pred_luma(ps_pred_y_forw->pu1_rec_y_u, + ps_pred_y_forw->pu1_rec_y_u, + u2_row_buf_wd_y, + u2_row_buf_wd_y, + (u2_log2Y_crwd & 0x0ff), + weight, ofst, u4_ht_y, + u4_wd_y); + + u4_wt_ofst_u = (UWORD32)(pu4_weight_ofst[2]); + u4_wt_ofst_v = (UWORD32)(pu4_weight_ofst[4]); + weight = ((u4_wt_ofst_v & 0xffff) << 16) + | (u4_wt_ofst_u & 0xffff); + ofst = ((u4_wt_ofst_v >> 16) << 8) + | ((u4_wt_ofst_u >> 16) & 0xFF); + + ps_dec->pf_weighted_pred_chroma( + ps_pred_cr_forw->pu1_rec_y_u, + ps_pred_cr_forw->pu1_rec_y_u, + u2_row_buf_wd_uv, u2_row_buf_wd_uv, + (u2_log2Y_crwd >> 8), weight, ofst, + u4_ht_y >> 1, u4_wd_y >> 1); + } + + break; + case 2: + { + UWORD32 *pu4_weight_ofst = + (UWORD32*)ps_pred_y_forw->u1_pi1_wt_ofst_rec_v; + UWORD32 u4_wt_ofst_u, u4_wt_ofst_v; + UWORD32 u4_wt_ofst_y; + WORD32 weight1, weight2; + WORD32 ofst1, ofst2; + + u4_wt_ofst_y = (UWORD32)(pu4_weight_ofst[0]); + + weight1 = (WORD16)(u4_wt_ofst_y & 0xffff); + ofst1 = (WORD8)(u4_wt_ofst_y >> 16); + + u4_wt_ofst_y = (UWORD32)(pu4_weight_ofst[1]); + weight2 = (WORD16)(u4_wt_ofst_y & 0xffff); + ofst2 = (WORD8)(u4_wt_ofst_y >> 16); + + ps_dec->pf_weighted_bi_pred_luma(ps_pred_y_forw->pu1_rec_y_u, + ps_pred_y_back->pu1_rec_y_u, + ps_pred_y_forw->pu1_rec_y_u, + u2_row_buf_wd_y, + u2_ref_wd_y, + u2_row_buf_wd_y, + (u2_log2Y_crwd & 0x0ff), + weight1, weight2, ofst1, + ofst2, u4_ht_y, + u4_wd_y); + + u4_wt_ofst_u = (UWORD32)(pu4_weight_ofst[2]); + u4_wt_ofst_v = (UWORD32)(pu4_weight_ofst[4]); + weight1 = ((u4_wt_ofst_v & 0xffff) << 16) + | (u4_wt_ofst_u & 0xffff); + ofst1 = ((u4_wt_ofst_v >> 16) << 8) + | ((u4_wt_ofst_u >> 16) & 0xFF); + + u4_wt_ofst_u = (UWORD32)(pu4_weight_ofst[3]); + u4_wt_ofst_v = (UWORD32)(pu4_weight_ofst[5]); + weight2 = ((u4_wt_ofst_v & 0xffff) << 16) + | (u4_wt_ofst_u & 0xffff); + ofst2 = ((u4_wt_ofst_v >> 16) << 8) + | ((u4_wt_ofst_u >> 16) & 0xFF); + + ps_dec->pf_weighted_bi_pred_chroma( + (ps_pred_y_forw + 1)->pu1_rec_y_u, + (ps_pred_y_back + 1)->pu1_rec_y_u, + (ps_pred_y_forw + 1)->pu1_rec_y_u, + u2_row_buf_wd_uv, u2_dst_wd, + u2_row_buf_wd_uv, (u2_log2Y_crwd >> 8), + weight1, weight2, ofst1, ofst2, + u4_ht_y >> 1, u4_wd_y >> 1); + } + + break; + } + + } + } +} + + +/*! + ************************************************************************** + * \if Function name : ih264d_multiplex_ref_data \endif + * + * \brief + * Initializes forward and backward refernce lists for B slice decoding. + * + * + * \return + * 0 on Success and Error code otherwise + ************************************************************************** + */ + +void ih264d_multiplex_ref_data(dec_struct_t * ps_dec, + pred_info_t *ps_pred, + UWORD8* pu1_dest_y, + UWORD8* pu1_dest_u, + UWORD8* pu1_dest_v, + dec_mb_info_t *ps_cur_mb_info, + UWORD16 u2_dest_wd_y, + UWORD16 u2_dest_wd_uv, + UWORD8 u1_dir) +{ + UWORD16 u2_mask = ps_cur_mb_info->u2_mask[u1_dir]; + UWORD8 *pu1_ref_y, *pu1_ref_u, *pu1_ref_v; + UWORD8 uc_cond, i, j, u1_dydx; + UWORD16 u2_ref_wd_y, u2_ref_wd_uv; + + PROFILE_DISABLE_INTER_PRED() + + if(ps_pred->i1_pod_ht) + { + pu1_ref_y = ps_pred->pu1_dma_dest_addr + ps_pred->u1_mc_addr_ofst; + + u2_ref_wd_y = ps_pred->u2_u1_ref_buf_wd; + } + else + { + pu1_ref_y = ps_pred->pu1_y_ref + ps_pred->u1_mc_addr_ofst; + u2_ref_wd_y = ps_pred->u2_frm_wd; + } + + ps_pred++; + if(ps_pred->i1_pod_ht) + { + pu1_ref_u = ps_pred->pu1_dma_dest_addr + ps_pred->u1_mc_addr_ofst; + pu1_ref_v = pu1_ref_u + ps_pred->u2_u1_ref_buf_wd * ps_pred->i1_dma_ht; + u2_ref_wd_uv = ps_pred->u2_u1_ref_buf_wd * YUV420SP_FACTOR; + + } + else + { + pu1_ref_u = ps_pred->pu1_u_ref + ps_pred->u1_mc_addr_ofst; + pu1_ref_v = ps_pred->pu1_v_ref + ps_pred->u1_mc_addr_ofst; + u2_ref_wd_uv = ps_pred->u2_frm_wd; + + } + + u1_dydx = ps_pred->u1_dydx; + + { + UWORD8 uc_dx, uc_dy; + UWORD8 *pu1_scratch_v, *pu1_scratch_u; + + uc_dx = u1_dydx & 0x3; + uc_dy = u1_dydx >> 3; + if(u1_dydx != 0) + { + pred_info_t * ps_prv_pred = ps_pred - 2; + pu1_scratch_u = ps_prv_pred->pu1_dma_dest_addr + + ps_prv_pred->u1_mc_addr_ofst; + pu1_scratch_v = pu1_scratch_u + + ps_prv_pred->u2_u1_ref_buf_wd + * ps_prv_pred->i1_dma_ht; + ps_dec->pf_inter_pred_chroma(pu1_ref_u, pu1_scratch_u, + u2_ref_wd_uv, 16, uc_dx, uc_dy, 8, + 8); + + /* Modify ref pointer and refWidth to point to scratch */ + /* buffer to be used below in ih264d_copy_multiplex_data functions */ + /* CHANGED CODE */ + pu1_ref_u = pu1_scratch_u; + pu1_ref_v = pu1_scratch_v; + u2_ref_wd_uv = 8 * YUV420SP_FACTOR; + } + } + { + for(i = 0; i < 4; i++) + { + for(j = 0; j < 4; j++) + { + + uc_cond = u2_mask & 1; + u2_mask >>= 1; + if(uc_cond) + { + *(UWORD32 *)(pu1_dest_y + u2_dest_wd_y) = + *(UWORD32 *)(pu1_ref_y + u2_ref_wd_y); + *(UWORD32 *)(pu1_dest_y + 2 * u2_dest_wd_y) = + *(UWORD32 *)(pu1_ref_y + 2 * u2_ref_wd_y); + *(UWORD32 *)(pu1_dest_y + 3 * u2_dest_wd_y) = + *(UWORD32 *)(pu1_ref_y + 3 * u2_ref_wd_y); + { + UWORD32 *dst, *src; + dst = (UWORD32 *)pu1_dest_y; + src = (UWORD32 *)pu1_ref_y; + *dst = *src; + dst++; + src++; + pu1_dest_y = (UWORD8 *)dst; + pu1_ref_y = (UWORD8 *)src; + } + *(UWORD32 *)(pu1_dest_u + u2_dest_wd_uv) = + *(UWORD32 *)(pu1_ref_u + u2_ref_wd_uv); + { + UWORD32 *dst, *src; + dst = (UWORD32 *)pu1_dest_u; + src = (UWORD32 *)pu1_ref_u; + *dst = *src; + dst++; + src++; + pu1_dest_u = (UWORD8 *)dst; + pu1_ref_u = (UWORD8 *)src; + } + + } + else + { + pu1_dest_y += 4; + pu1_ref_y += 4; + pu1_dest_u += 2 * YUV420SP_FACTOR; + pu1_ref_u += 2 * YUV420SP_FACTOR; + pu1_dest_v += 2; + pu1_ref_v += 2; + } + } + pu1_ref_y += 4 * (u2_ref_wd_y - 4); + pu1_ref_u += 2 * (u2_ref_wd_uv - 4 * YUV420SP_FACTOR); + pu1_ref_v += 2 * (u2_ref_wd_uv - 4); + pu1_dest_y += 4 * (u2_dest_wd_y - 4); + pu1_dest_u += 2 * (u2_dest_wd_uv - 4 * YUV420SP_FACTOR); + pu1_dest_v += 2 * (u2_dest_wd_uv - 4); + } + } +} + +void ih264d_pad_on_demand(pred_info_t *ps_pred, UWORD8 lum_chrom_blk) +{ + if(CHROM_BLK == lum_chrom_blk) + { + UWORD32 *pu4_pod_src_u, *pu4_pod_dst_u; + UWORD32 *pu4_pod_src_v, *pu4_pod_dst_v; + WORD32 j, u1_wd_stride; + WORD32 i, u1_dma_ht, i1_ht; + UWORD32 u2_dma_size; + u1_wd_stride = (ps_pred->u2_u1_ref_buf_wd >> 2) * YUV420SP_FACTOR; + u1_dma_ht = ps_pred->i1_dma_ht; + u2_dma_size = u1_wd_stride * u1_dma_ht; + pu4_pod_src_u = (UWORD32 *)ps_pred->pu1_dma_dest_addr; + pu4_pod_dst_u = pu4_pod_src_u; + + pu4_pod_src_v = pu4_pod_src_u + u2_dma_size; + pu4_pod_dst_v = pu4_pod_src_v; + + i1_ht = ps_pred->i1_pod_ht; + pu4_pod_src_u -= u1_wd_stride * i1_ht; + pu4_pod_src_v -= u1_wd_stride * i1_ht; + if(i1_ht < 0) + /* Top POD */ + i1_ht = -i1_ht; + else + { + /* Bottom POD */ + pu4_pod_src_u += (u1_dma_ht - 1) * u1_wd_stride; + pu4_pod_dst_u += (u1_dma_ht - i1_ht) * u1_wd_stride; + pu4_pod_src_v += (u1_dma_ht - 1) * u1_wd_stride; + pu4_pod_dst_v += (u1_dma_ht - i1_ht) * u1_wd_stride; + } + + for(i = 0; i < i1_ht; i++) + for(j = 0; j < u1_wd_stride; j++) + { + *pu4_pod_dst_u++ = *(pu4_pod_src_u + j); + + } + } + else + { + UWORD32 *pu4_pod_src, *pu4_pod_dst; + WORD32 j, u1_wd_stride; + WORD32 i, i1_ht; + pu4_pod_src = (UWORD32 *)ps_pred->pu1_dma_dest_addr; + pu4_pod_dst = pu4_pod_src; + u1_wd_stride = ps_pred->u2_u1_ref_buf_wd >> 2; + i1_ht = ps_pred->i1_pod_ht; + pu4_pod_src -= u1_wd_stride * i1_ht; + if(i1_ht < 0) + /* Top POD */ + i1_ht = -i1_ht; + else + { + /* Bottom POD */ + pu4_pod_src += (ps_pred->i1_dma_ht - 1) * u1_wd_stride; + pu4_pod_dst += (ps_pred->i1_dma_ht - i1_ht) * u1_wd_stride; + } + + for(i = 0; i < i1_ht; i++) + for(j = 0; j < u1_wd_stride; j++) + *pu4_pod_dst++ = *(pu4_pod_src + j); + } +} + diff --git a/decoder/ih264d_inter_pred.h b/decoder/ih264d_inter_pred.h new file mode 100755 index 0000000..52d648a --- /dev/null +++ b/decoder/ih264d_inter_pred.h @@ -0,0 +1,93 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +#ifndef _IH264D_INTER_PRED_H_ +#define _IH264D_INTER_PRED_H_ + +/*! + ************************************************************************** + * \file ih264d_inter_pred.h + * + * \brief + * Decalaration for routines defined in MorionCompensate.c + * + * Detailed_description + * + * \date + * creation_date + * + * \author Arvind Raman + ************************************************************************** + */ + +#include "ih264d_structs.h" + +#define BUFFER_WIDTH 16 +/*! + ************************************************************************** + * \brief PRED_BUFFER_WIDTH / HEIGHT + * + * Width and height of the 16 bit (also reused a 2 8 bits buffers). The + * required dimensions for these buffers are 21x21, however to align the + * start of every row to a WORD aligned boundary the width has been increased + * to 24. + ************************************************************************** + */ +//#define PRED_BUFFER_WIDTH 24 +//#define PRED_BUFFER_HEIGHT 21 +#define PRED_BUFFER_WIDTH 24*2 +#define PRED_BUFFER_HEIGHT 24*2 + +void ih264d_fill_pred_info(WORD16 *pi2_mv,WORD32 part_width,WORD32 part_height, WORD32 sub_mb_num, + WORD32 pred_dir,pred_info_pkd_t *ps_pred_pkd,WORD8 i1_buf_id, + WORD8 i1_ref_idx,UWORD32 *pu4_wt_offset,UWORD8 u1_pic_type); + +WORD32 ih264d_form_mb_part_info_bp(pred_info_pkd_t *ps_pred_pkd, + dec_struct_t * ps_dec, + UWORD16 u2_mb_x, + UWORD16 u2_mb_y, + WORD32 mb_index, + dec_mb_info_t *ps_cur_mb_info); + +WORD32 ih264d_form_mb_part_info_mp(pred_info_pkd_t *ps_pred_pkd, + dec_struct_t * ps_dec, + UWORD16 u2_mb_x, + UWORD16 u2_mb_y, + WORD32 mb_index, + dec_mb_info_t *ps_cur_mb_info); + + +void ih264d_motion_compensate_bp(dec_struct_t * ps_dec, dec_mb_info_t *ps_cur_mb_info); +void ih264d_motion_compensate_mp(dec_struct_t * ps_dec, dec_mb_info_t *ps_cur_mb_info); + + +void TransferRefBuffs(dec_struct_t *ps_dec); + +void ih264d_multiplex_ref_data(dec_struct_t * ps_dec, + pred_info_t *ps_pred, + UWORD8* pu1_dest_y, + UWORD8* pu1_dest_u, + UWORD8* pu1_dest_v, + dec_mb_info_t *ps_cur_mb_info, + UWORD16 u2_dest_wd_y, + UWORD16 u2_dest_wd_uv, + UWORD8 u1_dir); +#endif /* _IH264D_INTER_PRED_H_ */ + diff --git a/decoder/ih264d_mb_utils.c b/decoder/ih264d_mb_utils.c new file mode 100755 index 0000000..4cbfca5 --- /dev/null +++ b/decoder/ih264d_mb_utils.c @@ -0,0 +1,1496 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*! + ************************************************************************** + * \file ih264d_mb_utils.c + * + * \brief + * Contains utitlity functions needed for Macroblock decoding + * + * \date + * 18/12/2002 + * + * \author AI + ************************************************************************** + */ +#include <string.h> +#include <stdlib.h> +#include "ih264d_bitstrm.h" +#include "ih264d_defs.h" +#include "ih264d_debug.h" +#include "ih264d_structs.h" +#include "ih264d_defs.h" +#include "ih264d_mb_utils.h" +#include "ih264d_parse_slice.h" +#include "ih264d_error_handler.h" +#include "ih264d_parse_mb_header.h" +#include "ih264d_cabac.h" +#include "ih264d_defs.h" +#include "ih264d_tables.h" + +/*****************************************************************************/ +/* */ +/* Function Name : get_mb_info_cavlc */ +/* */ +/* Description : This function sets the following information of cur MB */ +/* (a) mb_x and mb_y */ +/* (b) Neighbour availablity */ +/* (c) Macroblock location in the frame buffer */ +/* (e) For mbaff predicts field/frame u4_flag for topMb */ +/* and sets the field/frame for botMb. This is */ +/* written in ps_dec->u1_cur_mb_fld_dec_flag */ +/* */ +/* Inputs : pointer to decstruct */ +/* pointer to current mb info */ +/* currentMbaddress */ +/* */ +/* Processing : leftMb and TopMb params are used by DecMbskip and */ +/* DecCtxMbfield modules so that these modules do not */ +/* check for neigbour availability and then find the */ +/* neigbours for context increments */ +/* */ +/* Returns : OK */ +/* */ +/* Issues : <List any issues or problems with this function> */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 13 07 2002 Jay Draft */ +/* */ +/*****************************************************************************/ + +UWORD32 ih264d_get_mb_info_cavlc_nonmbaff(dec_struct_t *ps_dec, + const UWORD16 u2_cur_mb_address, + dec_mb_info_t * ps_cur_mb_info, + UWORD32 u4_mbskip_run) +{ + UWORD16 u2_mb_x; + UWORD16 u2_mb_y; + UWORD8 u1_mb_ngbr_avail = 0; + UWORD16 u2_frm_width_in_mb = ps_dec->u2_frm_wd_in_mbs; + WORD16 i2_prev_slice_mbx = ps_dec->i2_prev_slice_mbx; + UWORD16 u2_top_right_mask = TOP_RIGHT_DEFAULT_AVAILABLE; + UWORD16 u2_top_left_mask = TOP_LEFT_DEFAULT_AVAILABLE; + UNUSED(u4_mbskip_run); + /*--------------------------------------------------------------------*/ + /* Calculate values of mb_x and mb_y */ + /*--------------------------------------------------------------------*/ + u2_mb_x = ps_dec->u2_mbx; + u2_mb_y = ps_dec->u2_mby; + + if(ps_dec->u1_separate_parse) + { + ps_dec->u2_cur_mb_addr = u2_cur_mb_address; + } + u2_mb_x++; + + if(u2_mb_x == u2_frm_width_in_mb) + { + u2_mb_x = 0; + u2_mb_y++; + } + if(u2_mb_y > ps_dec->i2_prev_slice_mby) + { + /* if not in the immemdiate row of prev slice end then top + will be available */ + if(u2_mb_y > (ps_dec->i2_prev_slice_mby + 1)) + i2_prev_slice_mbx = -1; + + if(u2_mb_x > i2_prev_slice_mbx) + { + u1_mb_ngbr_avail |= TOP_MB_AVAILABLE_MASK; + u2_top_right_mask |= TOP_RIGHT_TOP_AVAILABLE; + u2_top_left_mask |= TOP_LEFT_TOP_AVAILABLE; + } + + if((u2_mb_x > (i2_prev_slice_mbx - 1)) + && (u2_mb_x != (u2_frm_width_in_mb - 1))) + { + u1_mb_ngbr_avail |= TOP_RIGHT_MB_AVAILABLE_MASK; + u2_top_right_mask |= TOP_RIGHT_TOPR_AVAILABLE; + } + + if(u2_mb_x > (i2_prev_slice_mbx + 1)) + { + u1_mb_ngbr_avail |= TOP_LEFT_MB_AVAILABLE_MASK; + u2_top_left_mask |= TOP_LEFT_TOPL_AVAILABLE; + } + + /* Next row Left will be available*/ + i2_prev_slice_mbx = -1; + } + + /* Same row */ + if(u2_mb_x > (i2_prev_slice_mbx + 1)) + { + u1_mb_ngbr_avail |= LEFT_MB_AVAILABLE_MASK; + u2_top_left_mask |= TOP_LEFT_LEFT_AVAILABLE; + } + + { + mb_neigbour_params_t *ps_cur_mb_row = ps_dec->ps_cur_mb_row; + mb_neigbour_params_t *ps_top_mb_row = ps_dec->ps_top_mb_row; + + /* copy the parameters of topleft Mb */ + ps_cur_mb_info->u1_topleft_mbtype = ps_dec->u1_topleft_mbtype; + /* Neighbour pointer assignments*/ + ps_cur_mb_info->ps_curmb = ps_cur_mb_row + u2_mb_x; + ps_cur_mb_info->ps_left_mb = ps_cur_mb_row + u2_mb_x - 1; + ps_cur_mb_info->ps_top_mb = ps_top_mb_row + u2_mb_x; + ps_cur_mb_info->ps_top_right_mb = ps_top_mb_row + u2_mb_x + 1; + + /* Update the parameters of topleftmb*/ + ps_dec->u1_topleft_mbtype = ps_cur_mb_info->ps_top_mb->u1_mb_type; + } + + ps_dec->u2_mby = u2_mb_y; + ps_dec->u2_mbx = u2_mb_x; + ps_cur_mb_info->u2_mbx = u2_mb_x; + ps_cur_mb_info->u2_mby = u2_mb_y; + ps_cur_mb_info->u1_topmb = 1; + ps_dec->i4_submb_ofst += SUB_BLK_SIZE; + ps_dec->u1_mb_ngbr_availablity = u1_mb_ngbr_avail; + ps_cur_mb_info->u1_mb_ngbr_availablity = u1_mb_ngbr_avail; + ps_cur_mb_info->ps_curmb->u1_mb_fld = ps_dec->u1_cur_mb_fld_dec_flag; + ps_cur_mb_info->u1_mb_field_decodingflag = ps_dec->u1_cur_mb_fld_dec_flag; + ps_cur_mb_info->u2_top_left_avail_mask = u2_top_left_mask; + ps_cur_mb_info->u2_top_right_avail_mask = u2_top_right_mask; + return (OK); + +} + +/*****************************************************************************/ +/* */ +/* Function Name : get_mb_info_cavlc */ +/* */ +/* Description : This function sets the following information of cur MB */ +/* (a) mb_x and mb_y */ +/* (b) Neighbour availablity */ +/* (c) Macroblock location in the frame buffer */ +/* (e) For mbaff predicts field/frame u4_flag for topMb */ +/* and sets the field/frame for botMb. This is */ +/* written in ps_dec->u1_cur_mb_fld_dec_flag */ +/* */ +/* Inputs : pointer to decstruct */ +/* pointer to current mb info */ +/* currentMbaddress */ +/* */ +/* Processing : leftMb and TopMb params are used by DecMbskip and */ +/* DecCtxMbfield modules so that these modules do not */ +/* check for neigbour availability and then find the */ +/* neigbours for context increments */ +/* */ +/* Returns : OK */ +/* */ +/* Issues : <List any issues or problems with this function> */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 13 07 2002 Jay Draft */ +/* */ +/*****************************************************************************/ + +UWORD32 ih264d_get_mb_info_cavlc_mbaff(dec_struct_t *ps_dec, + const UWORD16 u2_cur_mb_address, + dec_mb_info_t * ps_cur_mb_info, + UWORD32 u4_mbskip_run) +{ + UWORD16 u2_mb_x; + UWORD16 u2_mb_y; + UWORD8 u1_mb_ngbr_avail = 0; + UWORD16 u2_frm_width_in_mb = ps_dec->u2_frm_wd_in_mbs; + + UWORD8 u1_top_mb = 1 - (u2_cur_mb_address & 0x01); + WORD16 i2_prev_slice_mbx = ps_dec->i2_prev_slice_mbx; + UWORD8 u1_cur_mb_field = 0; + UWORD16 u2_top_right_mask = TOP_RIGHT_DEFAULT_AVAILABLE; + UWORD16 u2_top_left_mask = TOP_LEFT_DEFAULT_AVAILABLE; + + /*--------------------------------------------------------------------*/ + /* Calculate values of mb_x and mb_y */ + /*--------------------------------------------------------------------*/ + u2_mb_x = ps_dec->u2_mbx; + u2_mb_y = ps_dec->u2_mby; + + if(ps_dec->u1_separate_parse) + { + ps_dec->u2_cur_mb_addr = u2_cur_mb_address; + } + + + if(u1_top_mb) + { + u2_mb_x++; + if(u2_mb_x == u2_frm_width_in_mb) + { + u2_mb_x = 0; + u2_mb_y += 2; + } + if(u2_mb_y > ps_dec->i2_prev_slice_mby) + { + /* if not in the immemdiate row of prev slice end then top + will be available */ + if(u2_mb_y > (ps_dec->i2_prev_slice_mby + 2)) + i2_prev_slice_mbx = -1; + if(u2_mb_x > i2_prev_slice_mbx) + { + u1_mb_ngbr_avail |= TOP_MB_AVAILABLE_MASK; + u1_cur_mb_field = ps_dec->ps_top_mb_row[u2_mb_x << 1].u1_mb_fld; + u2_top_right_mask |= TOP_RIGHT_TOP_AVAILABLE; + u2_top_left_mask |= TOP_LEFT_TOP_AVAILABLE; + } + if((u2_mb_x > (i2_prev_slice_mbx - 1)) + && (u2_mb_x != (u2_frm_width_in_mb - 1))) + { + u1_mb_ngbr_avail |= TOP_RIGHT_MB_AVAILABLE_MASK; + u2_top_right_mask |= TOP_RIGHT_TOPR_AVAILABLE; + } + + if(u2_mb_x > (i2_prev_slice_mbx + 1)) + { + u1_mb_ngbr_avail |= TOP_LEFT_MB_AVAILABLE_MASK; + u2_top_left_mask |= TOP_LEFT_TOPL_AVAILABLE; + } + + i2_prev_slice_mbx = -1; + } + /* Same row */ + if(u2_mb_x > (i2_prev_slice_mbx + 1)) + { + u1_mb_ngbr_avail |= LEFT_MB_AVAILABLE_MASK; + u1_cur_mb_field = + ps_dec->ps_cur_mb_row[(u2_mb_x << 1) - 1].u1_mb_fld; + u2_top_left_mask |= TOP_LEFT_LEFT_AVAILABLE; + } + /* Read u1_cur_mb_field from the bitstream if u4_mbskip_run <= 1*/ + if(u4_mbskip_run <= 1) + u1_cur_mb_field = (UWORD8)ih264d_get_bit_h264(ps_dec->ps_bitstrm); + + ps_dec->u1_cur_mb_fld_dec_flag = u1_cur_mb_field; + ps_dec->u2_top_left_mask = u2_top_left_mask; + ps_dec->u2_top_right_mask = u2_top_right_mask; + } + else + { + u1_mb_ngbr_avail = ps_dec->u1_mb_ngbr_availablity; + u1_cur_mb_field = ps_dec->u1_cur_mb_fld_dec_flag; + u2_top_left_mask = ps_dec->u2_top_left_mask; + u2_top_right_mask = ps_dec->u2_top_right_mask; + + if(!u1_cur_mb_field) + { + /* Top is available */ + u1_mb_ngbr_avail |= TOP_MB_AVAILABLE_MASK; + u2_top_right_mask |= TOP_RIGHT_TOP_AVAILABLE; + u2_top_left_mask |= TOP_LEFT_TOP_AVAILABLE; + /* Top Right not available */ + u1_mb_ngbr_avail &= TOP_RT_SUBBLOCK_MASK_MOD; + u2_top_right_mask &= (~TOP_RIGHT_TOPR_AVAILABLE); + + if(u1_mb_ngbr_avail & LEFT_MB_AVAILABLE_MASK) + { + u1_mb_ngbr_avail |= TOP_LEFT_MB_AVAILABLE_MASK; + u2_top_left_mask |= TOP_LEFT_LEFT_AVAILABLE; + u2_top_left_mask |= TOP_LEFT_TOPL_AVAILABLE; + } + } + } + + ps_dec->u2_mby = u2_mb_y; + ps_dec->u2_mbx = u2_mb_x; + ps_cur_mb_info->u2_mbx = u2_mb_x; + ps_cur_mb_info->u2_mby = u2_mb_y; + ps_cur_mb_info->u1_topmb = u1_top_mb; + ps_dec->i4_submb_ofst += SUB_BLK_SIZE; + ps_dec->u1_mb_ngbr_availablity = u1_mb_ngbr_avail; + ps_cur_mb_info->u1_mb_ngbr_availablity = u1_mb_ngbr_avail; + ps_cur_mb_info->u1_mb_field_decodingflag = u1_cur_mb_field; + ps_cur_mb_info->u2_top_left_avail_mask = u2_top_left_mask; + ps_cur_mb_info->u2_top_right_avail_mask = u2_top_right_mask; + ih264d_get_mbaff_neighbours(ps_dec, ps_cur_mb_info, u1_cur_mb_field); + return (OK); +} + +/*****************************************************************************/ +/* */ +/* Function Name : get_mb_info_cabac */ +/* */ +/* Description : This function sets the following information of cur MB */ +/* (a) mb_x and mb_y */ +/* (b) Neighbour availablity */ +/* (c) Macroblock location in the frame buffer */ +/* (e) leftMb parama and TopMb params of curMB */ +/* (f) For Mbaff case leftMb params and TopMb params of */ +/* bottomMb are also set if curMB is top */ +/* (g) For mbaff predicts field/frame u4_flag for topMb */ +/* and sets the field/frame for botMb. This is */ +/* written in ps_dec->u1_cur_mb_fld_dec_flag */ +/* */ +/* Inputs : pointer to decstruct */ +/* pointer to current mb info */ +/* currentMbaddress */ +/* */ +/* Processing : leftMb and TopMb params are used by DecMbskip and */ +/* DecCtxMbfield modules so that these modules do not */ +/* check for neigbour availability and then find the */ +/* neigbours for context increments */ +/* */ +/* Returns : OK */ +/* */ +/* Issues : <List any issues or problems with this function> */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 13 07 2002 Jay Draft */ +/* */ +/*****************************************************************************/ +UWORD32 ih264d_get_mb_info_cabac_nonmbaff(dec_struct_t *ps_dec, + const UWORD16 u2_cur_mb_address, + dec_mb_info_t * ps_cur_mb_info, + UWORD32 u4_mbskip) +{ + WORD32 u2_mb_x; + WORD32 u2_mb_y; + UWORD32 u1_mb_ngbr_avail = 0; + UWORD32 u2_frm_width_in_mb = ps_dec->u2_frm_wd_in_mbs; + UWORD32 u1_top_mb = 1; + WORD32 i2_prev_slice_mbx = ps_dec->i2_prev_slice_mbx; + UWORD32 u2_top_right_mask = TOP_RIGHT_DEFAULT_AVAILABLE; + UWORD32 u2_top_left_mask = TOP_LEFT_DEFAULT_AVAILABLE; + ctxt_inc_mb_info_t * const p_ctx_inc_mb_map = ps_dec->p_ctxt_inc_mb_map; + + /*--------------------------------------------------------------------*/ + /* Calculate values of mb_x and mb_y */ + /*--------------------------------------------------------------------*/ + u2_mb_x = (WORD16)ps_dec->u2_mbx; + u2_mb_y = ps_dec->u2_mby; + + if(ps_dec->u1_separate_parse) + { + ps_dec->u2_cur_mb_addr = u2_cur_mb_address; + } + + u2_mb_x++; + if((UWORD32)u2_mb_x == u2_frm_width_in_mb) + { + u2_mb_x = 0; + u2_mb_y++; + } + /*********************************************************************/ + /* Cabac Context Initialisations */ + /*********************************************************************/ + ps_dec->ps_curr_ctxt_mb_info = p_ctx_inc_mb_map + u2_mb_x; + ps_dec->p_left_ctxt_mb_info = p_ctx_inc_mb_map - 1; + ps_dec->p_top_ctxt_mb_info = p_ctx_inc_mb_map - 1; + + /********************************************************************/ + /* neighbour availablility */ + /********************************************************************/ + if(u2_mb_y > ps_dec->i2_prev_slice_mby) + { + /* if not in the immemdiate row of prev slice end then top + will be available */ + if(u2_mb_y > (ps_dec->i2_prev_slice_mby + 1)) + i2_prev_slice_mbx = -1; + + if(u2_mb_x > i2_prev_slice_mbx) + { + u1_mb_ngbr_avail |= TOP_MB_AVAILABLE_MASK; + u2_top_right_mask |= TOP_RIGHT_TOP_AVAILABLE; + u2_top_left_mask |= TOP_LEFT_TOP_AVAILABLE; + ps_dec->p_top_ctxt_mb_info = ps_dec->ps_curr_ctxt_mb_info; + } + if((u2_mb_x > (i2_prev_slice_mbx - 1)) + && ((UWORD32)u2_mb_x != (u2_frm_width_in_mb - 1))) + { + u1_mb_ngbr_avail |= TOP_RIGHT_MB_AVAILABLE_MASK; + u2_top_right_mask |= TOP_RIGHT_TOPR_AVAILABLE; + } + + if(u2_mb_x > (i2_prev_slice_mbx + 1)) + { + u1_mb_ngbr_avail |= TOP_LEFT_MB_AVAILABLE_MASK; + u2_top_left_mask |= TOP_LEFT_TOPL_AVAILABLE; + } + /* Next row */ + i2_prev_slice_mbx = -1; + } + /* Same row */ + if(u2_mb_x > (i2_prev_slice_mbx + 1)) + { + u1_mb_ngbr_avail |= LEFT_MB_AVAILABLE_MASK; + u2_top_left_mask |= TOP_LEFT_LEFT_AVAILABLE; + ps_dec->p_left_ctxt_mb_info = ps_dec->ps_curr_ctxt_mb_info - 1; + } + { + mb_neigbour_params_t *ps_cur_mb_row = ps_dec->ps_cur_mb_row; + mb_neigbour_params_t *ps_top_mb_row = ps_dec->ps_top_mb_row; + /* copy the parameters of topleft Mb */ + ps_cur_mb_info->u1_topleft_mbtype = ps_dec->u1_topleft_mbtype; + /* Neighbour pointer assignments*/ + ps_cur_mb_info->ps_curmb = ps_cur_mb_row + u2_mb_x; + ps_cur_mb_info->ps_left_mb = ps_cur_mb_row + u2_mb_x - 1; + ps_cur_mb_info->ps_top_mb = ps_top_mb_row + u2_mb_x; + ps_cur_mb_info->ps_top_right_mb = ps_top_mb_row + u2_mb_x + 1; + + /* Update the parameters of topleftmb*/ + ps_dec->u1_topleft_mbtype = ps_cur_mb_info->ps_top_mb->u1_mb_type; + } + + ps_dec->u2_mby = u2_mb_y; + ps_dec->u2_mbx = u2_mb_x; + ps_cur_mb_info->u2_mbx = u2_mb_x; + ps_cur_mb_info->u2_mby = u2_mb_y; + ps_cur_mb_info->u1_topmb = u1_top_mb; + ps_dec->i4_submb_ofst += SUB_BLK_SIZE; + ps_dec->u1_mb_ngbr_availablity = u1_mb_ngbr_avail; + ps_cur_mb_info->u1_mb_ngbr_availablity = u1_mb_ngbr_avail; + ps_cur_mb_info->ps_curmb->u1_mb_fld = ps_dec->u1_cur_mb_fld_dec_flag; + ps_cur_mb_info->u1_mb_field_decodingflag = ps_dec->u1_cur_mb_fld_dec_flag; + ps_cur_mb_info->u2_top_left_avail_mask = u2_top_left_mask; + ps_cur_mb_info->u2_top_right_avail_mask = u2_top_right_mask; + + /*********************************************************************/ + /* Assign the neigbours */ + /*********************************************************************/ + if(u4_mbskip) + { + UWORD32 u4_ctx_inc = + 2 + - ((!!(ps_dec->p_top_ctxt_mb_info->u1_mb_type + & CAB_SKIP_MASK)) + + (!!(ps_dec->p_left_ctxt_mb_info->u1_mb_type + & CAB_SKIP_MASK))); + + u4_mbskip = ih264d_decode_bin(u4_ctx_inc, ps_dec->p_mb_skip_flag_t, + ps_dec->ps_bitstrm, &ps_dec->s_cab_dec_env); + + if(!u4_mbskip) + { + if(!(u1_mb_ngbr_avail & LEFT_MB_AVAILABLE_MASK)) + { + UWORD32 *pu4_buf; + UWORD8 *pu1_buf; + + pu1_buf = ps_dec->pu1_left_nnz_y; + pu4_buf = (UWORD32 *)pu1_buf; + *pu4_buf = 0; + pu1_buf = ps_dec->pu1_left_nnz_uv; + pu4_buf = (UWORD32 *)pu1_buf; + *pu4_buf = 0; + + + *(ps_dec->pu1_left_yuv_dc_csbp) = 0; + MEMSET_16BYTES(&ps_dec->pu1_left_mv_ctxt_inc[0][0], 0); + *(UWORD32 *)ps_dec->pi1_left_ref_idx_ctxt_inc = 0; + } + if(!(u1_mb_ngbr_avail & TOP_MB_AVAILABLE_MASK)) + { + MEMSET_16BYTES(ps_dec->ps_curr_ctxt_mb_info->u1_mv, 0); + memset(ps_dec->ps_curr_ctxt_mb_info->i1_ref_idx, 0, 4); + } + } + } + return (u4_mbskip); +} + +/*****************************************************************************/ +/* */ +/* Function Name : get_mb_info_cabac */ +/* */ +/* Description : This function sets the following information of cur MB */ +/* (a) mb_x and mb_y */ +/* (b) Neighbour availablity */ +/* (c) Macroblock location in the frame buffer */ +/* (e) leftMb parama and TopMb params of curMB */ +/* (f) For Mbaff case leftMb params and TopMb params of */ +/* bottomMb are also set if curMB is top */ +/* (g) For mbaff predicts field/frame u4_flag for topMb */ +/* and sets the field/frame for botMb. This is */ +/* written in ps_dec->u1_cur_mb_fld_dec_flag */ +/* */ +/* Inputs : pointer to decstruct */ +/* pointer to current mb info */ +/* currentMbaddress */ +/* */ +/* Processing : leftMb and TopMb params are used by DecMbskip and */ +/* DecCtxMbfield modules so that these modules do not */ +/* check for neigbour availability and then find the */ +/* neigbours for context increments */ +/* */ +/* Returns : OK */ +/* */ +/* Issues : <List any issues or problems with this function> */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 13 07 2002 Jay Draft */ +/* */ +/*****************************************************************************/ + +UWORD32 ih264d_get_mb_info_cabac_mbaff(dec_struct_t *ps_dec, + const UWORD16 u2_cur_mb_address, + dec_mb_info_t * ps_cur_mb_info, + UWORD32 u4_mbskip) +{ + UWORD16 u2_mb_x; + UWORD16 u2_mb_y; + UWORD8 u1_mb_ngbr_avail = 0; + UWORD16 u2_frm_width_in_mb = ps_dec->u2_frm_wd_in_mbs; + ctxt_inc_mb_info_t * const p_ctx_inc_mb_map = ps_dec->p_ctxt_inc_mb_map; + ctxt_inc_mb_info_t *ps_curr_ctxt, *ps_top_ctxt, *ps_left_ctxt; + mb_neigbour_params_t *ps_cur_mb_row = ps_dec->ps_cur_mb_row; + mb_neigbour_params_t *ps_top_mb_row = ps_dec->ps_top_mb_row; + UWORD32 u4_left_mb_pair_fld = 0; + UWORD32 u4_top_mb_pair_fld = 0; + UWORD8 u1_cur_mb_field = 0; + UWORD8 u1_top_mb = 1 - (u2_cur_mb_address & 0x01); + WORD16 i2_prev_slice_mbx = ps_dec->i2_prev_slice_mbx; + UWORD16 u2_top_right_mask = TOP_RIGHT_DEFAULT_AVAILABLE; + UWORD16 u2_top_left_mask = TOP_LEFT_DEFAULT_AVAILABLE; + + /*--------------------------------------------------------------------*/ + /* Calculate values of mb_x and mb_y */ + /*--------------------------------------------------------------------*/ + u2_mb_x = ps_dec->u2_mbx; + u2_mb_y = ps_dec->u2_mby; + + if(ps_dec->u1_separate_parse) + { + ps_dec->u2_cur_mb_addr = u2_cur_mb_address; + } + + ps_top_ctxt = ps_left_ctxt = p_ctx_inc_mb_map - 1; + + if(u1_top_mb) + { + ctxt_inc_mb_info_t *ps_left_mb_of_bot = ps_left_ctxt; + ctxt_inc_mb_info_t *ps_top_mb_of_bot = ps_top_ctxt; + + u2_mb_x++; + + if(u2_mb_x == u2_frm_width_in_mb) + { + u2_mb_x = 0; + u2_mb_y += 2; + } + + ps_curr_ctxt = p_ctx_inc_mb_map + (u2_mb_x << 1); + if(u2_mb_y > ps_dec->i2_prev_slice_mby) + { + UWORD8 u1_cur_mb_fld_flag_known = 0; + /* Next row */ + if(u2_mb_x > 0) + { + /***********************************************************************/ + /* Left Mb is avialable */ + /***********************************************************************/ + u1_mb_ngbr_avail |= LEFT_MB_AVAILABLE_MASK; + ps_left_ctxt = ps_curr_ctxt - 2; + ps_left_mb_of_bot = ps_curr_ctxt - 1; + u1_cur_mb_field = u4_left_mb_pair_fld = ps_cur_mb_row[(u2_mb_x + << 1) - 1].u1_mb_fld; + u1_cur_mb_fld_flag_known = 1; + u2_top_left_mask |= TOP_LEFT_LEFT_AVAILABLE; + } + /* if not in the immemdiate row of prev slice end then top + will be available */ + if(u2_mb_y > (ps_dec->i2_prev_slice_mby + 2)) + i2_prev_slice_mbx = -1; + if(u2_mb_x > i2_prev_slice_mbx) + { + /*********************************************************************/ + /* Top Mb is avialable */ + /*********************************************************************/ + u1_mb_ngbr_avail |= TOP_MB_AVAILABLE_MASK; + u2_top_right_mask |= TOP_RIGHT_TOP_AVAILABLE; + u2_top_left_mask |= TOP_LEFT_TOP_AVAILABLE; + + /* point to MbAddrB + 1 */ + ps_top_ctxt = ps_curr_ctxt + 1; + u4_top_mb_pair_fld = ps_top_mb_row[(u2_mb_x << 1)].u1_mb_fld; + + u1_cur_mb_field = + u1_cur_mb_fld_flag_known ? + u1_cur_mb_field : + u4_top_mb_pair_fld; + ps_top_mb_of_bot = u1_cur_mb_field ? ps_top_ctxt : ps_curr_ctxt; + + /* MbAddrB */ + ps_top_ctxt -= (u1_cur_mb_field && u4_top_mb_pair_fld); + } + + if((u2_mb_x > (i2_prev_slice_mbx - 1)) + && (u2_mb_x != (u2_frm_width_in_mb - 1))) + { + u1_mb_ngbr_avail |= TOP_RIGHT_MB_AVAILABLE_MASK; + u2_top_right_mask |= TOP_RIGHT_TOPR_AVAILABLE; + } + + if(u2_mb_x > (i2_prev_slice_mbx + 1)) + { + u1_mb_ngbr_avail |= TOP_LEFT_MB_AVAILABLE_MASK; + u2_top_left_mask |= TOP_LEFT_TOPL_AVAILABLE; + } + } + else + { + /* Same row */ + if(u2_mb_x > (i2_prev_slice_mbx + 1)) + { + /***************************************************************/ + /* Left Mb is avialable */ + /***************************************************************/ + u1_mb_ngbr_avail |= LEFT_MB_AVAILABLE_MASK; + + u1_cur_mb_field = u4_left_mb_pair_fld = ps_cur_mb_row[(u2_mb_x + << 1) - 1].u1_mb_fld; + ps_left_ctxt = ps_curr_ctxt - 2; + ps_left_mb_of_bot = ps_curr_ctxt - 1; + u2_top_left_mask |= TOP_LEFT_LEFT_AVAILABLE; + } + } + /*********************************************************/ + /* Check whether the call is from I slice or Inter slice */ + /*********************************************************/ + if(u4_mbskip) + { + UWORD32 u4_ctx_inc = 2 + - ((!!(ps_top_ctxt->u1_mb_type & CAB_SKIP_MASK)) + + (!!(ps_left_ctxt->u1_mb_type + & CAB_SKIP_MASK))); + dec_bit_stream_t * const ps_bitstrm = ps_dec->ps_bitstrm; + decoding_envirnoment_t *ps_cab_dec_env = &ps_dec->s_cab_dec_env; + bin_ctxt_model_t *p_mb_skip_flag_t = ps_dec->p_mb_skip_flag_t; + + ps_dec->u4_next_mb_skip = 0; + u4_mbskip = ih264d_decode_bin(u4_ctx_inc, p_mb_skip_flag_t, + ps_bitstrm, ps_cab_dec_env); + + if(u4_mbskip) + { + UWORD32 u4_next_mbskip; + ps_curr_ctxt->u1_mb_type = CAB_SKIP; + + u4_ctx_inc = + 2 + - ((!!(ps_top_mb_of_bot->u1_mb_type + & CAB_SKIP_MASK)) + + (!!(ps_left_mb_of_bot->u1_mb_type + & CAB_SKIP_MASK))); + + /* Decode the skip u4_flag of bottom Mb */ + u4_next_mbskip = ih264d_decode_bin(u4_ctx_inc, p_mb_skip_flag_t, + ps_bitstrm, + ps_cab_dec_env); + + ps_dec->u4_next_mb_skip = u4_next_mbskip; + + if(!u4_next_mbskip) + { + u4_ctx_inc = u4_top_mb_pair_fld + u4_left_mb_pair_fld; + + u1_cur_mb_field = ih264d_decode_bin( + u4_ctx_inc, ps_dec->p_mb_field_dec_flag_t, + ps_bitstrm, ps_cab_dec_env); + } + } + } + + if(!u4_mbskip) + { + UWORD32 u4_ctx_inc = u4_top_mb_pair_fld + u4_left_mb_pair_fld; + u1_cur_mb_field = ih264d_decode_bin(u4_ctx_inc, + ps_dec->p_mb_field_dec_flag_t, + ps_dec->ps_bitstrm, + &ps_dec->s_cab_dec_env); + } + + ps_dec->u1_cur_mb_fld_dec_flag = u1_cur_mb_field; + ps_dec->u2_top_left_mask = u2_top_left_mask; + ps_dec->u2_top_right_mask = u2_top_right_mask; + ps_dec->u2_mby = u2_mb_y; + ps_dec->u2_mbx = u2_mb_x; + } + else + { + u1_cur_mb_field = ps_dec->u1_cur_mb_fld_dec_flag; + u1_mb_ngbr_avail = ps_dec->u1_mb_ngbr_availablity; + u2_top_left_mask = ps_dec->u2_top_left_mask; + u2_top_right_mask = ps_dec->u2_top_right_mask; + ps_curr_ctxt = p_ctx_inc_mb_map + (u2_mb_x << 1) + 1; + + if(u1_mb_ngbr_avail & LEFT_MB_AVAILABLE_MASK) + { + u4_left_mb_pair_fld = ps_cur_mb_row[(u2_mb_x << 1) - 1].u1_mb_fld; + + /* point to A if top else A+1 */ + ps_left_ctxt = ps_curr_ctxt - 2 + - (u4_left_mb_pair_fld != u1_cur_mb_field); + } + + if(u1_cur_mb_field) + { + if(u1_mb_ngbr_avail & TOP_MB_AVAILABLE_MASK) + { + /* point to MbAddrB + 1 */ + ps_top_ctxt = ps_curr_ctxt; + } + } + else + { + /* Top is available */ + u1_mb_ngbr_avail |= TOP_MB_AVAILABLE_MASK; + u2_top_right_mask |= TOP_RIGHT_TOP_AVAILABLE; + u2_top_left_mask |= TOP_LEFT_TOP_AVAILABLE; + /* Top Right not available */ + u1_mb_ngbr_avail &= TOP_RT_SUBBLOCK_MASK_MOD; + u2_top_right_mask &= (~TOP_RIGHT_TOPR_AVAILABLE); + + if(u1_mb_ngbr_avail & LEFT_MB_AVAILABLE_MASK) + { + u1_mb_ngbr_avail |= TOP_LEFT_MB_AVAILABLE_MASK; + u2_top_left_mask |= TOP_LEFT_LEFT_AVAILABLE; + u2_top_left_mask |= TOP_LEFT_TOPL_AVAILABLE; + } + + /* CurMbAddr - 1 */ + ps_top_ctxt = ps_curr_ctxt - 1; + } + + if(u4_mbskip) + { + if(ps_curr_ctxt[-1].u1_mb_type & CAB_SKIP_MASK) + { + /* If previous mb is skipped, return value of next mb skip */ + u4_mbskip = ps_dec->u4_next_mb_skip; + + } + else + { + /* If previous mb is not skipped then call DecMbSkip */ + UWORD32 u4_ctx_inc = + 2 + - ((!!(ps_top_ctxt->u1_mb_type + & CAB_SKIP_MASK)) + + (!!(ps_left_ctxt->u1_mb_type + & CAB_SKIP_MASK))); + + u4_mbskip = ih264d_decode_bin(u4_ctx_inc, + ps_dec->p_mb_skip_flag_t, + ps_dec->ps_bitstrm, + &ps_dec->s_cab_dec_env); + } + } + } + + ps_cur_mb_info->u2_mbx = u2_mb_x; + ps_cur_mb_info->u2_mby = u2_mb_y; + ps_cur_mb_info->u1_topmb = u1_top_mb; + ps_dec->i4_submb_ofst += SUB_BLK_SIZE; + ps_dec->u1_mb_ngbr_availablity = u1_mb_ngbr_avail; + ps_cur_mb_info->u1_mb_ngbr_availablity = u1_mb_ngbr_avail; + ps_cur_mb_info->u1_mb_field_decodingflag = u1_cur_mb_field; + ps_cur_mb_info->u2_top_left_avail_mask = u2_top_left_mask; + ps_cur_mb_info->u2_top_right_avail_mask = u2_top_right_mask; + + ih264d_get_mbaff_neighbours(ps_dec, ps_cur_mb_info, u1_cur_mb_field); + { + ih264d_get_cabac_context_mbaff(ps_dec, ps_cur_mb_info, u4_mbskip); + } + + { + bin_ctxt_model_t *p_cabac_ctxt_table_t = ps_dec->p_cabac_ctxt_table_t; + + if(u1_cur_mb_field) + { + p_cabac_ctxt_table_t += SIGNIFICANT_COEFF_FLAG_FLD; + } + else + { + p_cabac_ctxt_table_t += SIGNIFICANT_COEFF_FLAG_FRAME; + } + { + bin_ctxt_model_t * * p_significant_coeff_flag_t = + ps_dec->p_significant_coeff_flag_t; + p_significant_coeff_flag_t[0] = p_cabac_ctxt_table_t + + SIG_COEFF_CTXT_CAT_0_OFFSET; + p_significant_coeff_flag_t[1] = p_cabac_ctxt_table_t + + SIG_COEFF_CTXT_CAT_1_OFFSET; + p_significant_coeff_flag_t[2] = p_cabac_ctxt_table_t + + SIG_COEFF_CTXT_CAT_2_OFFSET; + p_significant_coeff_flag_t[3] = p_cabac_ctxt_table_t + + SIG_COEFF_CTXT_CAT_3_OFFSET; + p_significant_coeff_flag_t[4] = p_cabac_ctxt_table_t + + SIG_COEFF_CTXT_CAT_4_OFFSET; + p_significant_coeff_flag_t[5] = p_cabac_ctxt_table_t + + SIG_COEFF_CTXT_CAT_5_OFFSET; + + } + } + return (u4_mbskip); +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_get_cabac_context_mbaff */ +/* */ +/* Description : Gets the current macroblock Cabac Context and sets the */ +/* top and left cabac context ptrs in CtxIncMbMap */ +/* 1. For Coss field left neigbours it alters coded block */ +/* u4_flag , motion vectors, reference indices, cbp of */ +/* the left neigbours which increases the code i4_size */ +/* 2. For Coss field top neigbours it alters motion */ +/* vectors reference indices of the top neigbours */ +/* which further increases the code i4_size */ +/* */ +/* Inputs : 1. dec_struct_t */ +/* 2. CurMbAddr used for Mbaff (only to see if curMB */ +/* is top or bottom) */ +/* 3. uc_curMbFldDecFlag only for Mbaff */ +/* */ +/* Returns : 0 */ +/* */ +/* Issues : code i4_size can be reduced if ui_CodedBlockFlag storage */ +/* structure in context is changed. This change however */ +/* would break the parseResidual4x4Cabac asm routine. */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 18 06 2005 Jay */ +/* */ +/*****************************************************************************/ +UWORD32 ih264d_get_cabac_context_mbaff(dec_struct_t * ps_dec, + dec_mb_info_t *ps_cur_mb_info, + UWORD32 u4_mbskip) +{ + const UWORD8 u1_mb_ngbr_availablity = ps_dec->u1_mb_ngbr_availablity; + ctxt_inc_mb_info_t * const p_ctx_inc_mb_map = ps_dec->p_ctxt_inc_mb_map; + + UWORD8 (*pu1_left_mv_ctxt_inc_2d)[4] = &ps_dec->pu1_left_mv_ctxt_inc[0]; + WORD8 (*pi1_left_ref_idx_ctxt_inc) = ps_dec->pi1_left_ref_idx_ctxt_inc; + const UWORD8 u1_cur_mb_fld_flag = ps_cur_mb_info->u1_mb_field_decodingflag; + const UWORD8 u1_topmb = ps_cur_mb_info->u1_topmb; + const UWORD8 uc_botMb = 1 - ps_cur_mb_info->u1_topmb; + + ctxt_inc_mb_info_t * ps_leftMB; + + ps_dec->ps_curr_ctxt_mb_info = p_ctx_inc_mb_map + (ps_dec->u2_mbx << 1); + ps_dec->p_top_ctxt_mb_info = ps_dec->ps_curr_ctxt_mb_info; + + if(u1_topmb) + { + pu1_left_mv_ctxt_inc_2d = ps_dec->u1_left_mv_ctxt_inc_arr[0]; + pi1_left_ref_idx_ctxt_inc = &ps_dec->i1_left_ref_idx_ctx_inc_arr[0][0]; + ps_dec->pu1_left_yuv_dc_csbp = &ps_dec->u1_yuv_dc_csbp_topmb; + } + else + { + /* uc_botMb */ + pu1_left_mv_ctxt_inc_2d = ps_dec->u1_left_mv_ctxt_inc_arr[1]; + pi1_left_ref_idx_ctxt_inc = &ps_dec->i1_left_ref_idx_ctx_inc_arr[1][0]; + ps_dec->pu1_left_yuv_dc_csbp = &ps_dec->u1_yuv_dc_csbp_bot_mb; + ps_dec->ps_curr_ctxt_mb_info += 1; + } + + ps_dec->pu1_left_mv_ctxt_inc = pu1_left_mv_ctxt_inc_2d; + ps_dec->pi1_left_ref_idx_ctxt_inc = pi1_left_ref_idx_ctxt_inc; + + if(u1_mb_ngbr_availablity & LEFT_MB_AVAILABLE_MASK) + { + const UWORD8 u1_left_mb_fld_flag = ps_cur_mb_info->ps_left_mb->u1_mb_fld; + + ps_leftMB = ps_dec->ps_curr_ctxt_mb_info - 2; + if(u1_left_mb_fld_flag != u1_cur_mb_fld_flag) + { + ctxt_inc_mb_info_t *ps_tempLeft; + UWORD8 u1_cbp_t, u1_cbp_b; + UWORD8 u1_cr_cpb; + + ps_leftMB -= uc_botMb; + ps_tempLeft = ps_dec->ps_left_mb_ctxt_info; + ps_tempLeft->u1_mb_type = ps_leftMB->u1_mb_type; + ps_tempLeft->u1_intra_chroma_pred_mode = + ps_leftMB->u1_intra_chroma_pred_mode; + + ps_tempLeft->u1_transform8x8_ctxt = ps_leftMB->u1_transform8x8_ctxt; + + u1_cr_cpb = ps_leftMB->u1_cbp; + /*****************************************************************/ + /* reform RefIdx, CBP, MV and CBF ctxInc taking care of A and A+1*/ + /*****************************************************************/ + if(u1_cur_mb_fld_flag) + { + /* current MB is a FLD and left a FRM */ + UWORD8 (* const pu1_left_mv_ctxt_inc_2d_arr_top)[4] = + ps_dec->u1_left_mv_ctxt_inc_arr[0]; + UWORD8 (* const pu1_left_mv_ctxt_inc_2d_arr_bot)[4] = + ps_dec->u1_left_mv_ctxt_inc_arr[1]; + WORD8 (* const i1_left_ref_idx_ctxt_inc_arr_top) = + &ps_dec->i1_left_ref_idx_ctx_inc_arr[0][0]; + WORD8 (* const i1_left_ref_idx_ctxt_inc_arr_bot) = + &ps_dec->i1_left_ref_idx_ctx_inc_arr[1][0]; + + u1_cbp_t = ps_leftMB->u1_cbp; + u1_cbp_b = (ps_leftMB + 1)->u1_cbp; + ps_tempLeft->u1_cbp = (u1_cbp_t & 0x02) + | ((u1_cbp_b & 0x02) << 2); + + // set motionvectors as + // 0T = 0T 0B = 0T + // 1T = 2T 1B = 2T + // 2T = 0B 2B = 0B + // 3T = 2B 3B = 2B + if(u1_topmb) + { + /********************************************/ + /* Bottoms DC CBF = Top DC CBF */ + /********************************************/ + ps_dec->u1_yuv_dc_csbp_bot_mb = + ps_dec->u1_yuv_dc_csbp_topmb; + + *(UWORD32 *)pu1_left_mv_ctxt_inc_2d[3] = + *(UWORD32 *)pu1_left_mv_ctxt_inc_2d_arr_bot[2]; + *(UWORD32 *)pu1_left_mv_ctxt_inc_2d[1] = + *(UWORD32 *)pu1_left_mv_ctxt_inc_2d_arr_top[2]; + *(UWORD32 *)pu1_left_mv_ctxt_inc_2d[2] = + *(UWORD32 *)pu1_left_mv_ctxt_inc_2d_arr_bot[0]; + *(UWORD32 *)pu1_left_mv_ctxt_inc_2d[0] = + *(UWORD32 *)pu1_left_mv_ctxt_inc_2d_arr_top[0]; + + i1_left_ref_idx_ctxt_inc_arr_top[1] = + i1_left_ref_idx_ctxt_inc_arr_bot[0]; + i1_left_ref_idx_ctxt_inc_arr_top[3] = + i1_left_ref_idx_ctxt_inc_arr_bot[2]; + + *(UWORD32 *)(i1_left_ref_idx_ctxt_inc_arr_bot) = + *(UWORD32 *)(i1_left_ref_idx_ctxt_inc_arr_top); + + memcpy(pu1_left_mv_ctxt_inc_2d_arr_bot, + pu1_left_mv_ctxt_inc_2d_arr_top, 16); + } + + { + UWORD8 i; + for(i = 0; i < 4; i++) + { + pu1_left_mv_ctxt_inc_2d[i][1] >>= 1; + pu1_left_mv_ctxt_inc_2d[i][3] >>= 1; + } + } + } + else + { + /* current MB is a FRM and left FLD */ + if(u1_topmb) + { + u1_cbp_t = ps_leftMB->u1_cbp; + u1_cbp_t = (u1_cbp_t & 0x02); + ps_tempLeft->u1_cbp = (u1_cbp_t | (u1_cbp_t << 2)); + + /********************************************/ + /* Bottoms DC CBF = Top DC CBF */ + /********************************************/ + ps_dec->u1_yuv_dc_csbp_bot_mb = + ps_dec->u1_yuv_dc_csbp_topmb; + + // set motionvectors as + // 3B = 2B = 3T + // 1B = 0B = 2T + // 3T = 2T = 1T + // 1T = 0T = 0T + + *(UWORD32 *)pu1_left_mv_ctxt_inc_2d[7] = + *(UWORD32 *)pu1_left_mv_ctxt_inc_2d[3]; + *(UWORD32 *)pu1_left_mv_ctxt_inc_2d[6] = + *(UWORD32 *)pu1_left_mv_ctxt_inc_2d[3]; + + *(UWORD32 *)pu1_left_mv_ctxt_inc_2d[5] = + *(UWORD32 *)pu1_left_mv_ctxt_inc_2d[2]; + *(UWORD32 *)pu1_left_mv_ctxt_inc_2d[4] = + *(UWORD32 *)pu1_left_mv_ctxt_inc_2d[2]; + + *(UWORD32 *)pu1_left_mv_ctxt_inc_2d[3] = + *(UWORD32 *)pu1_left_mv_ctxt_inc_2d[1]; + *(UWORD32 *)pu1_left_mv_ctxt_inc_2d[2] = + *(UWORD32 *)pu1_left_mv_ctxt_inc_2d[1]; + + *(UWORD32 *)pu1_left_mv_ctxt_inc_2d[1] = + *(UWORD32 *)pu1_left_mv_ctxt_inc_2d[0]; + + pi1_left_ref_idx_ctxt_inc[7] = (pi1_left_ref_idx_ctxt_inc[3] + - 1); + pi1_left_ref_idx_ctxt_inc[6] = (pi1_left_ref_idx_ctxt_inc[3] + - 1); + + pi1_left_ref_idx_ctxt_inc[5] = (pi1_left_ref_idx_ctxt_inc[1] + - 1); + pi1_left_ref_idx_ctxt_inc[4] = (pi1_left_ref_idx_ctxt_inc[1] + - 1); + + pi1_left_ref_idx_ctxt_inc[3] = (pi1_left_ref_idx_ctxt_inc[2] + - 1); + pi1_left_ref_idx_ctxt_inc[2] = (pi1_left_ref_idx_ctxt_inc[2] + - 1); + + pi1_left_ref_idx_ctxt_inc[1] = (pi1_left_ref_idx_ctxt_inc[0] + - 1); + pi1_left_ref_idx_ctxt_inc[0] = (pi1_left_ref_idx_ctxt_inc[0] + - 1); + } + else + { + u1_cbp_t = ps_leftMB->u1_cbp; + u1_cbp_t = (u1_cbp_t & 0x08); + ps_tempLeft->u1_cbp = (u1_cbp_t | (u1_cbp_t >> 2)); + } + + { + UWORD8 i; + for(i = 0; i < 4; i++) + { + pu1_left_mv_ctxt_inc_2d[i][1] <<= 1; + pu1_left_mv_ctxt_inc_2d[i][3] <<= 1; + } + } + + } + + ps_tempLeft->u1_cbp = ps_tempLeft->u1_cbp + ((u1_cr_cpb >> 4) << 4); + ps_leftMB = ps_tempLeft; + } + + ps_dec->p_left_ctxt_mb_info = ps_leftMB; + } + else + { + ps_dec->p_left_ctxt_mb_info = p_ctx_inc_mb_map - 1; + if(!u4_mbskip) + { + *(ps_dec->pu1_left_yuv_dc_csbp) = 0; + + MEMSET_16BYTES(&pu1_left_mv_ctxt_inc_2d[0][0], 0); + *(UWORD32 *)pi1_left_ref_idx_ctxt_inc = 0; + } + } + + /*************************************************************************/ + /* Now get the top context mb info */ + /*************************************************************************/ + { + UWORD8 (*u1_top_mv_ctxt_inc_arr_2d)[4] = + ps_dec->ps_curr_ctxt_mb_info->u1_mv; + WORD8 (*pi1_top_ref_idx_ctxt_inc) = + ps_dec->ps_curr_ctxt_mb_info->i1_ref_idx; + UWORD8 uc_topMbFldDecFlag = ps_cur_mb_info->ps_top_mb->u1_mb_fld; + + if(u1_mb_ngbr_availablity & TOP_MB_AVAILABLE_MASK) + { + if(ps_cur_mb_info->i1_offset) + ps_dec->p_top_ctxt_mb_info += 1; + + if(!u4_mbskip) + { + memcpy(u1_top_mv_ctxt_inc_arr_2d, + &ps_dec->p_top_ctxt_mb_info->u1_mv, 16); + memcpy(pi1_top_ref_idx_ctxt_inc, + &ps_dec->p_top_ctxt_mb_info->i1_ref_idx, 4); + if(uc_topMbFldDecFlag ^ u1_cur_mb_fld_flag) + { + UWORD8 i; + if(u1_cur_mb_fld_flag) + { + for(i = 0; i < 4; i++) + { + u1_top_mv_ctxt_inc_arr_2d[i][1] >>= 1; + u1_top_mv_ctxt_inc_arr_2d[i][3] >>= 1; + } + } + else + { + for(i = 0; i < 4; i++) + { + u1_top_mv_ctxt_inc_arr_2d[i][1] <<= 1; + u1_top_mv_ctxt_inc_arr_2d[i][3] <<= 1; + pi1_top_ref_idx_ctxt_inc[i] -= 1; + } + } + } + } + } + else + { + ps_dec->p_top_ctxt_mb_info = p_ctx_inc_mb_map - 1; + if(!u4_mbskip) + { + + MEMSET_16BYTES(&u1_top_mv_ctxt_inc_arr_2d[0][0], 0); + memset(pi1_top_ref_idx_ctxt_inc, 0, 4); + } + } + } + + return OK; +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_update_mbaff_left_nnz */ +/* */ +/* Description : This function updates the left luma and chroma nnz for */ +/* mbaff cases. */ +/* */ +/* Inputs : <What inputs does the function take?> */ +/* Globals : <Does it use any global variables?> */ +/* Processing : <Describe how the function operates - include algorithm */ +/* description> */ +/* Outputs : <What does the function produce?> */ +/* Returns : <What does the function return?> */ +/* */ +/* Issues : <List any issues or problems with this function> */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 13 07 2002 Ittiam Draft */ +/* */ +/*****************************************************************************/ +void ih264d_update_mbaff_left_nnz(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info) +{ + UWORD32 *pu4_buf; + UWORD8 *pu1_buf; + if(ps_cur_mb_info->u1_topmb) + { + pu1_buf = ps_dec->pu1_left_nnz_y; + pu4_buf = (UWORD32 *)pu1_buf; + ps_dec->u4_n_left_temp_y = *pu4_buf; + + pu1_buf = ps_dec->pu1_left_nnz_uv; + pu4_buf = (UWORD32 *)pu1_buf; + ps_dec->u4_n_left_temp_uv = *pu4_buf; + } + else + { + + ps_dec->u4_n_leftY[0] = ps_dec->u4_n_left_temp_y; + pu1_buf = ps_dec->pu1_left_nnz_y; + pu4_buf = (UWORD32 *)pu1_buf; + ps_dec->u4_n_leftY[1] = *pu4_buf; + ps_dec->u4_n_left_cr[0] = ps_dec->u4_n_left_temp_uv; + pu1_buf = ps_dec->pu1_left_nnz_uv; + pu4_buf = (UWORD32 *)pu1_buf; + ps_dec->u4_n_left_cr[1] = *pu4_buf; + + } +} + +/*! + ************************************************************************** + * \if Function name : ih264d_get_mbaff_neighbours \endif + * + * \brief + * Gets the neighbors for the current MB if it is of type MB-AFF + * frame. + * + * \return + * None + * + ************************************************************************** + */ +void ih264d_get_mbaff_neighbours(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info, + UWORD8 uc_curMbFldDecFlag) +{ + + mb_neigbour_params_t *ps_left_mb; + mb_neigbour_params_t *ps_top_mb; + mb_neigbour_params_t *ps_top_right_mb = NULL; + mb_neigbour_params_t *ps_curmb; + const UWORD8 u1_topmb = ps_cur_mb_info->u1_topmb; + const UWORD8 uc_botMb = 1 - u1_topmb; + const UWORD32 u4_mb_x = ps_cur_mb_info->u2_mbx; + + /* Current MbParams location in top row buffer */ + ps_curmb = ps_dec->ps_cur_mb_row + (u4_mb_x << 1) + uc_botMb; + ps_left_mb = ps_curmb - 2; + /* point to A if top else A+1 */ + if(uc_botMb && (ps_left_mb->u1_mb_fld != uc_curMbFldDecFlag)) + { + /* move from A + 1 to A */ + ps_left_mb--; + } + ps_cur_mb_info->i1_offset = 0; + if((uc_curMbFldDecFlag == 0) && uc_botMb) + { + mb_neigbour_params_t *ps_topleft_mb; + /* CurMbAddr - 1 */ + ps_top_mb = ps_curmb - 1; + + /* Mark Top right Not available */ + /* point to A */ + ps_topleft_mb = ps_curmb - 3; + + if(ps_topleft_mb->u1_mb_fld) + { + /* point to A + 1 */ + ps_topleft_mb++; + } + ps_cur_mb_info->u1_topleft_mb_fld = ps_topleft_mb->u1_mb_fld; + ps_cur_mb_info->u1_topleft_mbtype = ps_topleft_mb->u1_mb_type; + } + else + { + /* Top = B + 1 */ + ps_top_mb = ps_dec->ps_top_mb_row + (u4_mb_x << 1) + 1; + ps_top_right_mb = ps_top_mb + 2; + ps_cur_mb_info->i1_offset = 4; + /* TopRight = C + 1 */ + + /* TopLeft = D+1 */ + ps_cur_mb_info->u1_topleft_mb_fld = ps_dec->u1_topleft_mb_fld_bot; + ps_cur_mb_info->u1_topleft_mbtype = ps_dec->u1_topleft_mbtype_bot; + + if(uc_curMbFldDecFlag && u1_topmb) + { + if(ps_top_mb->u1_mb_fld) + { + /* MbAddrB */ + ps_top_mb--; + ps_cur_mb_info->i1_offset = 0; + } + /* If topright is field then point to C */ + ps_top_right_mb -= ps_top_right_mb->u1_mb_fld ? 1 : 0; + if(ps_cur_mb_info->u1_topleft_mb_fld) + { + /* TopLeft = D */ + ps_cur_mb_info->u1_topleft_mb_fld = ps_dec->u1_topleft_mb_fld; + ps_cur_mb_info->u1_topleft_mbtype = ps_dec->u1_topleft_mbtype; + } + } + } + if(u1_topmb) + { + /* Update the parameters of topleftmb*/ + ps_dec->u1_topleft_mb_fld = ps_top_mb->u1_mb_fld; + ps_dec->u1_topleft_mbtype = ps_top_mb->u1_mb_type; + /* Set invscan and dequantMatrixScan*/ + if(uc_curMbFldDecFlag) + { + ps_dec->pu1_inv_scan = (UWORD8 *)gau1_ih264d_inv_scan_fld; + } + else + { + ps_dec->pu1_inv_scan = (UWORD8 *)gau1_ih264d_inv_scan; + } + ps_dec->pu2_quant_scale_y = + gau2_ih264_iquant_scale_4x4[ps_dec->u1_qp_y_rem6]; + ps_dec->pu2_quant_scale_u = + gau2_ih264_iquant_scale_4x4[ps_dec->u1_qp_u_rem6]; + ps_dec->pu2_quant_scale_v = + gau2_ih264_iquant_scale_4x4[ps_dec->u1_qp_v_rem6]; + + } + else + { + /* Update the parameters of topleftmb*/ + mb_neigbour_params_t *ps_top_mb_temp = ps_dec->ps_top_mb_row + + (u4_mb_x << 1) + 1; + ps_dec->u1_topleft_mb_fld_bot = ps_top_mb_temp->u1_mb_fld; + ps_dec->u1_topleft_mbtype_bot = ps_top_mb_temp->u1_mb_type; + } + + ps_cur_mb_info->ps_left_mb = ps_left_mb; + ps_cur_mb_info->ps_top_mb = ps_top_mb; + ps_cur_mb_info->ps_top_right_mb = ps_top_right_mb; + ps_cur_mb_info->ps_curmb = ps_curmb; + ps_curmb->u1_mb_fld = uc_curMbFldDecFlag; + + { + /* Form Left NNZ */ + UWORD8 u1_is_left_mb_fld = ps_left_mb->u1_mb_fld; + UWORD8 *pu1_left_mb_pair_nnz_y = (UWORD8 *)&ps_dec->u4_n_leftY[0]; + UWORD8 *pu1_left_mb_pair_nnz_uv = (UWORD8 *)&ps_dec->u4_n_left_cr[0]; + UWORD8 *pu1_left_nnz_y = ps_dec->pu1_left_nnz_y; + UWORD8 *pu1_left_nnz_uv = ps_dec->pu1_left_nnz_uv; + + if(uc_curMbFldDecFlag == u1_is_left_mb_fld) + { + *(UWORD32 *)pu1_left_nnz_y = *(UWORD32 *)(pu1_left_mb_pair_nnz_y + + (uc_botMb << 2)); + *(UWORD32 *)pu1_left_nnz_uv = *(UWORD32 *)(pu1_left_mb_pair_nnz_uv + + (uc_botMb << 2)); + } + else if((uc_curMbFldDecFlag == 0) && u1_topmb && u1_is_left_mb_fld) + { + /* 0 0 1 1 of u4_n_leftY[0], 0 0 2 2 of u4_n_left_cr[0] */ + pu1_left_nnz_y[0] = pu1_left_nnz_y[1] = pu1_left_mb_pair_nnz_y[0]; + pu1_left_nnz_y[2] = pu1_left_nnz_y[3] = pu1_left_mb_pair_nnz_y[1]; + pu1_left_nnz_uv[0] = pu1_left_nnz_uv[1] = + pu1_left_mb_pair_nnz_uv[0]; + pu1_left_nnz_uv[2] = pu1_left_nnz_uv[3] = + pu1_left_mb_pair_nnz_uv[2]; + } + else if((uc_curMbFldDecFlag == 0) && uc_botMb && u1_is_left_mb_fld) + { + /* 2 2 3 3 of u4_n_leftY[0] , 1 1 3 3 of u4_n_left_cr[0] */ + pu1_left_nnz_y[0] = pu1_left_nnz_y[1] = pu1_left_mb_pair_nnz_y[2]; + pu1_left_nnz_y[2] = pu1_left_nnz_y[3] = pu1_left_mb_pair_nnz_y[3]; + pu1_left_nnz_uv[0] = pu1_left_nnz_uv[1] = + pu1_left_mb_pair_nnz_uv[1]; + pu1_left_nnz_uv[2] = pu1_left_nnz_uv[3] = + pu1_left_mb_pair_nnz_uv[3]; + } + else + { + /* 0 2 0 2 of u4_n_leftY[0], u4_n_leftY[1] */ + pu1_left_nnz_y[0] = pu1_left_mb_pair_nnz_y[0]; + pu1_left_nnz_y[1] = pu1_left_mb_pair_nnz_y[2]; + pu1_left_nnz_y[2] = pu1_left_mb_pair_nnz_y[4 + 0]; + pu1_left_nnz_y[3] = pu1_left_mb_pair_nnz_y[4 + 2]; + + /* 0 of u4_n_left_cr[0] and 0 u4_n_left_cr[1] + 2 of u4_n_left_cr[0] and 2 u4_n_left_cr[1] */ + pu1_left_nnz_uv[0] = pu1_left_mb_pair_nnz_uv[0]; + pu1_left_nnz_uv[1] = pu1_left_mb_pair_nnz_uv[4 + 0]; + pu1_left_nnz_uv[2] = pu1_left_mb_pair_nnz_uv[2]; + pu1_left_nnz_uv[3] = pu1_left_mb_pair_nnz_uv[4 + 2]; + } + } +} + +/* + ************************************************************************** + * \if Function name : ih264d_transfer_mb_group_data \endif + * + * \brief + * Transfer the Following things + * N-Mb DeblkParams Data ( To Ext DeblkParams Buffer ) + * N-Mb Recon Data ( To Ext Frame Buffer ) + * N-Mb Intrapredline Data ( Updated Internally) + * N-Mb MV Data ( To Ext MV Buffer ) + * N-Mb MVTop/TopRight Data ( To Int MV Top Scratch Buffers) + * + * \return + * None + * + ************************************************************************** + */ +void ih264d_transfer_mb_group_data(dec_struct_t * ps_dec, + const WORD8 c_numMbs, + const UWORD8 u1_end_of_row, /* Cur n-Mb End of Row Flag */ + const UWORD8 u1_end_of_row_next /* Next n-Mb End of Row Flag */ + ) +{ + dec_mb_info_t *ps_cur_mb_info = ps_dec->ps_nmb_info; + tfr_ctxt_t *ps_trns_addr = &ps_dec->s_tran_addrecon; + UWORD16 u2_mb_y; + UWORD32 y_offset; + UWORD32 u4_frame_stride; + mb_neigbour_params_t *ps_temp; + const UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag; + UNUSED(u1_end_of_row_next); + + ps_trns_addr->pu1_dest_y += ps_trns_addr->u4_inc_y[u1_end_of_row]; + ps_trns_addr->pu1_dest_u += ps_trns_addr->u4_inc_uv[u1_end_of_row]; + ps_trns_addr->pu1_dest_v += ps_trns_addr->u4_inc_uv[u1_end_of_row]; + + /* Swap top and current pointers */ + if(u1_end_of_row) + { + + if(ps_dec->u1_separate_parse) + { + u2_mb_y = ps_dec->i2_dec_thread_mb_y; + } + else + { + ps_temp = ps_dec->ps_cur_mb_row; + ps_dec->ps_cur_mb_row = ps_dec->ps_top_mb_row; + ps_dec->ps_top_mb_row = ps_temp; + + u2_mb_y = ps_dec->u2_mby + (1 + u1_mbaff); + } + + u4_frame_stride = ps_dec->u2_frm_wd_y + << ps_dec->ps_cur_slice->u1_field_pic_flag; + y_offset = (u2_mb_y * u4_frame_stride) << 4; + ps_trns_addr->pu1_dest_y = ps_dec->s_cur_pic.pu1_buf1 + y_offset; + + u4_frame_stride = ps_dec->u2_frm_wd_uv + << ps_dec->ps_cur_slice->u1_field_pic_flag; + y_offset = (u2_mb_y * u4_frame_stride) << 3; + ps_trns_addr->pu1_dest_u = ps_dec->s_cur_pic.pu1_buf2 + y_offset; + ps_trns_addr->pu1_dest_v = ps_dec->s_cur_pic.pu1_buf3 + y_offset; + + ps_trns_addr->pu1_mb_y = ps_trns_addr->pu1_dest_y; + ps_trns_addr->pu1_mb_u = ps_trns_addr->pu1_dest_u; + ps_trns_addr->pu1_mb_v = ps_trns_addr->pu1_dest_v; + } + + /* + * The Slice boundary is also a valid condition to transfer. So recalculate + * the Left increment, in case the number of MBs is lesser than the + * N MB value. c_numMbs will be equal to N of N MB if the entire N Mb is + * decoded. + */ + ps_dec->s_tran_addrecon.u2_mv_left_inc = ((c_numMbs >> u1_mbaff) - 1) + << (4 + u1_mbaff); + ps_dec->s_tran_addrecon.u2_mv_top_left_inc = (c_numMbs << 2) - 1 + - (u1_mbaff << 2); + + if(ps_dec->u1_separate_parse == 0) + { + /* reassign left MV and cur MV pointers */ + ps_dec->ps_mv_left = ps_dec->ps_mv_cur + + ps_dec->s_tran_addrecon.u2_mv_left_inc; + + ps_dec->ps_mv_cur += (c_numMbs << 4); + } + + /* Increment deblock parameters pointer in external memory */ + + if(ps_dec->u1_separate_parse == 1) + { + ps_dec->ps_deblk_mbn_dec_thrd += c_numMbs; + } + else + { + if(ps_dec->u4_mb_level_deblk == 0) + ps_dec->ps_deblk_mbn += c_numMbs; + else + { + deblk_mb_t *temp; + + /*swap previous and curr pointers*/ + ps_dec->ps_deblk_mbn = ps_dec->ps_deblk_mbn_prev; + temp = ps_dec->ps_deblk_mbn_curr; + ps_dec->ps_deblk_mbn_curr = ps_dec->ps_deblk_mbn_prev; + ps_dec->ps_deblk_mbn_prev = temp; + } + } + +} + diff --git a/decoder/ih264d_mb_utils.h b/decoder/ih264d_mb_utils.h new file mode 100755 index 0000000..6e359f5 --- /dev/null +++ b/decoder/ih264d_mb_utils.h @@ -0,0 +1,293 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +#ifndef _IH264D_MB_UTILS_H_ +#define _IH264D_MB_UTILS_H_ +/*! + ************************************************************************** + * \file ih264d_mb_utils.h + * + * \brief + * Contains declarations of the utility functions needed to decode MB + * + * \date + * 18/12/2002 + * + * \author AI + ************************************************************************** + */ +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264d_structs.h" + +/*--------------------------------------------------------------------*/ +/* Macros to get raster scan position of a block[8x8] / sub block[4x4]*/ +/*--------------------------------------------------------------------*/ + +#define GET_BLK_RASTER_POS_X(x) ((x & 0x01) << 1) +#define GET_BLK_RASTER_POS_Y(y) ((y >> 1) << 1) +#define GET_SUB_BLK_RASTER_POS_X(x) ((x & 0x01)) +#define GET_SUB_BLK_RASTER_POS_Y(y) ((y >> 1)) + +/*--------------------------------------------------------------------*/ +/* Masks used in decoding of Macroblock */ +/*--------------------------------------------------------------------*/ + +#define LEFT_MB_AVAILABLE_MASK 0x01 +#define TOP_LEFT_MB_AVAILABLE_MASK 0x02 +#define TOP_MB_AVAILABLE_MASK 0x04 +#define TOP_RIGHT_MB_AVAILABLE_MASK 0x08 + +#define TOP_RT_SUBBLOCK_MASK_MOD 0xFFF7 + +#define TOP_RIGHT_DEFAULT_AVAILABLE 0x5750 +#define TOP_RIGHT_TOPR_AVAILABLE 0x0008 +#define TOP_RIGHT_TOP_AVAILABLE 0x0007 + +#define TOP_LEFT_DEFAULT_AVAILABLE 0xEEE0 +#define TOP_LEFT_TOPL_AVAILABLE 0x0001 +#define TOP_LEFT_TOP_AVAILABLE 0x000E +#define TOP_LEFT_LEFT_AVAILABLE 0x1110 + +#define CHECK_MB_MAP(u4_mb_num, mb_map, u4_cond) \ +{ \ + UWORD32 u4_bit_number; \ + volatile UWORD8 *pu1_mb_flag; \ + \ + u4_bit_number = u4_mb_num & 0x07; \ + pu1_mb_flag = (UWORD8 *)mb_map + (u4_mb_num >> 3); \ + \ + u4_cond = CHECKBIT((*pu1_mb_flag), u4_bit_number); \ +} + +#define CHECK_MB_MAP_BYTE(u4_mb_num, mb_map, u4_cond) \ +{ \ + volatile UWORD8 *pu1_mb_flag; \ + \ + pu1_mb_flag = (UWORD8 *)mb_map + (u4_mb_num ); \ + \ + u4_cond = (*pu1_mb_flag); \ +} + +#define UPDATE_MB_MAP(u2_frm_wd_in_mbs, u2_mbx, u2_mby, mb_map, mb_count) \ +{ \ + UWORD32 u4_bit_number; \ + UWORD32 u4_mb_number; \ + \ + u4_mb_number = u2_frm_wd_in_mbs * (u2_mby >> u1_mbaff) + u2_mbx; \ + \ + u4_bit_number = u4_mb_number & 0x07; \ + /* \ + * In case of MbAff, update the mb_map only if the entire MB is done. We can check that \ + * by checking if Y is odd, implying that this is the second row in the MbAff MB \ + */ \ + SET_BIT(mb_map[u4_mb_number >> 3], u4_bit_number); \ + \ + if (1 == u1_mbaff) \ + { \ + /* \ + * If MBAFF u4_flag is set, set this MB and the MB just below this. \ + * So, add frame width to the MB number and set that bit. \ + */ \ + /* \ + u4_mb_number += u2_frm_wd_in_mbs; \ + \ + u4_bit_number = u4_mb_number & 0x07; \ + \ + SET_BIT(mb_map[u4_mb_number >> 3], u4_bit_number); \ + */ \ + } \ + \ + /*H264_DEC_DEBUG_PRINT("SETBIT: %d\n", u4_mb_number);*/ \ + mb_count++; \ +} + +#define UPDATE_MB_MAP_MBNUM(mb_map, u4_mb_number) \ +{ \ + UWORD32 u4_bit_number; \ + volatile UWORD8 *pu1_mb_flag; \ + \ + u4_bit_number = u4_mb_number & 0x07; \ + pu1_mb_flag = (UWORD8 *)mb_map + (u4_mb_number >> 3); \ + /* \ + * In case of MbAff, update the mb_map only if the entire MB is done. We can check that \ + * by checking if Y is odd, implying that this is the second row in the MbAff MB \ + */ \ + SET_BIT((*pu1_mb_flag), u4_bit_number); \ +} + +#define UPDATE_MB_MAP_MBNUM_BYTE(mb_map, u4_mb_number) \ +{ \ + volatile UWORD8 *pu1_mb_flag; \ + \ + pu1_mb_flag = (UWORD8 *)mb_map + (u4_mb_number); \ + /* \ + * In case of MbAff, update the mb_map only if the entire MB is done. We can check that \ + * by checking if Y is odd, implying that this is the second row in the MbAff MB \ + */ \ + (*pu1_mb_flag) = 1; \ +} + +#define UPDATE_SLICE_NUM_MAP(slice_map, u4_mb_number,u2_slice_num) \ +{ \ + volatile UWORD16 *pu2_slice_map; \ + \ + pu2_slice_map = (UWORD16 *)slice_map + (u4_mb_number); \ + (*pu2_slice_map) = u2_slice_num; \ +} + +#define GET_SLICE_NUM_MAP(slice_map, mb_number,u2_slice_num) \ +{ \ + volatile UWORD16 *pu2_slice_map; \ + \ + pu2_slice_map = (UWORD16 *)slice_map + (mb_number); \ + u2_slice_num = (*pu2_slice_map) ; \ +} + + +#define GET_XPOS_PRED(u1_out,pkd_info) \ +{ \ + WORD32 bit_field; \ + bit_field = pkd_info & 0x3; \ + u1_out = bit_field; \ +} + + +#define GET_YPOS_PRED(u1_out,pkd_info) \ +{ \ + WORD32 bit_field; \ + bit_field = pkd_info >> 2; \ + u1_out = bit_field & 0x3; \ +} + + + +#define GET_WIDTH_PRED(u1_out,pkd_info) \ +{ \ + WORD32 bit_field; \ + bit_field = pkd_info >> 4; \ + bit_field = (bit_field & 0x3) << 1 ; \ + u1_out = (bit_field == 0)?1:bit_field; \ + } + +#define GET_HEIGHT_PRED(u1_out,pkd_info) \ +{ \ + WORD32 bit_field; \ + bit_field = pkd_info >> 6; \ + bit_field = (bit_field & 0x3) << 1 ; \ + u1_out = (bit_field == 0)?1:bit_field; \ +} + +/*! + ************************************************************************** + * \brief Masks for elements present in the first column but not on the + * first row. + ************************************************************************** + */ +#define FIRST_COL_NOT_FIRST_ROW 0xFAFB +#define FIRST_ROW_MASK 0xFFCC +/*! + ************************************************************************** + * \brief Mask for elements presen in the first row but not in the + * last column. + ************************************************************************** + */ +#define FIRST_ROW_NOT_LAST_COL 0xFFEC +/*! + ************************************************************************** + * \brief Mask for elements presen in the first row but not in the + * first column. + ************************************************************************** + */ +#define FIRST_ROW_NOT_FIRST_COL 0xFFCD +/*! + ************************************************************************** + * \brief Masks for the top right subMB of a 4x4 block + ************************************************************************** + */ +#define TOP_RT_SUBBLOCK_MASK 0xFFDF +/*! + ************************************************************************** + * \brief Masks for the top left subMB of a 4x4 block + ************************************************************************** + */ +#define TOP_LT_SUBBLOCK_MASK 0xFFFE +/*! + ************************************************************************** + * \brief Indicates if a subMB has a top right subMB available + ************************************************************************** + */ +#define TOP_RT_SUBBLOCK_MB_MASK 0x5F4C + +#define FIRST_COL_MASK 0xFAFA + +/*--------------------------------------------------------------------*/ +/* Macros to calculate the current position of a MB wrt picture */ +/*--------------------------------------------------------------------*/ +#define MB_LUMA_PIC_OFFSET(mb_x,mb_y,frmWidthY) (((mb_y)*(frmWidthY) + (mb_x))<<4) +#define MB_CHROMA_PIC_OFFSET(mb_x,mb_y,frmWidthUV) (((mb_y)*(frmWidthUV) + (mb_x))<<3) + +/*--------------------------------------------------------------------*/ +/* Macros to calculate the current position of a MB wrt N[ Num coeff] Array */ +/*--------------------------------------------------------------------*/ +#define MB_PARAM_OFFSET(mb_x,mb_y,frmWidthInMbs,u1_mbaff,u1_topmb) \ + ((mb_x << u1_mbaff) + (1 - u1_topmb) + (mb_y * frmWidthInMbs)) + +UWORD32 ih264d_get_mb_info_cavlc_mbaff(dec_struct_t * ps_dec, + const UWORD16 ui16_curMbAddress, + dec_mb_info_t * ps_cur_mb_info, + UWORD32 u4_mbskip_run); +UWORD32 ih264d_get_mb_info_cavlc_nonmbaff(dec_struct_t * ps_dec, + const UWORD16 ui16_curMbAddress, + dec_mb_info_t * ps_cur_mb_info, + UWORD32 u4_mbskip_run); + +UWORD32 ih264d_get_mb_info_cabac_mbaff(dec_struct_t * ps_dec, + const UWORD16 ui16_curMbAddress, + dec_mb_info_t * ps_cur_mb_info, + UWORD32 u4_mbskip_run); + +UWORD32 ih264d_get_mb_info_cabac_nonmbaff(dec_struct_t * ps_dec, + const UWORD16 ui16_curMbAddress, + dec_mb_info_t * ps_cur_mb_info, + UWORD32 u4_mbskip_run); + +UWORD8 get_cabac_context_non_mbaff(dec_struct_t * ps_dec, UWORD16 u2_mbskip); + +UWORD32 ih264d_get_cabac_context_mbaff(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info, + UWORD32 u4_mbskip); + +WORD32 PutMbToFrame(dec_struct_t * ps_dec); +void ih264d_get_mbaff_neighbours(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info, + UWORD8 uc_curMbFldDecFlag); + +void ih264d_update_mbaff_left_nnz(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info); +void ih264d_transfer_mb_group_data(dec_struct_t * ps_dec, + const WORD8 c_numMbs, + const UWORD8 u1_end_of_row, /* Cur n-Mb End of Row Flag */ + const UWORD8 u1_end_of_row_next /* Next n-Mb End of Row Flag */ + ); + +//void FillRandomData(UWORD8 *pu1_buf, WORD32 u4_bufSize); + +#endif /* _MB_UTILS_H_ */ diff --git a/decoder/ih264d_mem_request.h b/decoder/ih264d_mem_request.h new file mode 100755 index 0000000..3c60c72 --- /dev/null +++ b/decoder/ih264d_mem_request.h @@ -0,0 +1,82 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +#ifndef _IH264D_MEM_REQUEST_H_ +#define _IH264D_MEM_REQUEST_H_ +/*! + *************************************************************************** + * \file ih264d_mem_request.h + * + * \brief + * This file contains declarations and data structures of the API's which + * required to interact with Picture Buffer. + * + * + * \date + * 11/12/2002 + * + * \author NS + ***************************************************************************/ +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264d_defs.h" +#include "ih264d_structs.h" + +#define MAX_MEM_BLOCKS 64 + 8 + +struct MemBlock +{ + void ** v_memLocation; /** memory location where address of allocated memory should be stored*/ + UWORD32 u4_mem_size; /** Size of the memory block */ +}; + +struct MemReq +{ + UWORD32 u4_num_memBlocks; /** Number of memory blocks */ + struct MemBlock s_memBlock[MAX_MEM_BLOCKS]; /** Pointer to the first memory block */ +}; + +struct PicMemBlock +{ + void * buf1; /** memory location for buf1 */ + void * buf2; /** memory location for buf2 */ + void * buf3; /** memory location for buf3 */ +}; + +struct PicMemReq +{ + WORD32 i4_num_pic_memBlocks; /** Number of memory blocks */ + UWORD32 u4_size1; /** Size of the buf1 in PicMemBlock */ + UWORD32 u4_size2; /** Size of the buf2 in PicMemBlock */ + UWORD32 u4_size3; /** Size of the buf3 in PicMemBlock */ + struct PicMemBlock s_PicMemBlock[MAX_DISP_BUFS_NEW]; +}; + +WORD32 ih264d_create_pic_buffers(UWORD8 u1_num_of_buf, + dec_struct_t *ps_dec); + +WORD32 ih264d_create_mv_bank(void * pv_codec_handle, + UWORD32 u4_wd, + UWORD32 u4_ht); +WORD16 ih264d_get_memory_dec_params(dec_struct_t * ps_dec); + + +#endif /* _IH264D_MEM_REQUEST_H_ */ diff --git a/decoder/ih264d_mvpred.c b/decoder/ih264d_mvpred.c new file mode 100755 index 0000000..fb4932f --- /dev/null +++ b/decoder/ih264d_mvpred.c @@ -0,0 +1,1193 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*! + ************************************************************************** + * \file ih264d_mvpred.c + * + * \brief + * This file contains function specific to decoding Motion vector. + * + * Detailed_description + * + * \date + * 10-12-2002 + * + * \author Arvind Raman + ************************************************************************** + */ +#include <string.h> +#include "ih264d_parse_cavlc.h" +#include "ih264d_error_handler.h" +#include "ih264d_structs.h" +#include "ih264d_defs.h" +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264d_mb_utils.h" +#include "ih264d_defs.h" +#include "ih264d_debug.h" +#include "ih264d_tables.h" +#include "ih264d_process_bslice.h" +#include "ih264d_mvpred.h" +#include "ih264d_inter_pred.h" +#include "ih264d_tables.h" + +/*! + ************************************************************************** + * \if ih264d_get_motion_vector_predictor name : Name \endif + * + * \brief + * The routine calculates the motion vector predictor for a given block, + * given the candidate MV predictors. + * + * \param ps_mv_pred: Candidate predictors for the current block + * \param ps_currMv: Pointer to the left top edge of the current block in + * the MV bank + * + * \return + * _mvPred: The x & y components of the MV predictor. + * + * \note + * The code implements the logic as described in sec 8.4.1.2.1. Given + * the candidate predictors and the pointer to the top left edge of the + * block in the MV bank. + * + ************************************************************************** + */ + +void ih264d_get_motion_vector_predictor(mv_pred_t * ps_result, + mv_pred_t **ps_mv_pred, + UWORD8 u1_ref_idx, + UWORD8 u1_B, + const UWORD8 *pu1_mv_pred_condition) +{ + WORD8 c_temp; + UWORD8 uc_B2 = (u1_B << 1); + + /* If only one of the candidate blocks has a reference frame equal to + the current block then use the same block as the final predictor */ + c_temp = + (ps_mv_pred[LEFT]->i1_ref_frame[u1_B] == u1_ref_idx) + | ((ps_mv_pred[TOP]->i1_ref_frame[u1_B] + == u1_ref_idx) << 1) + | ((ps_mv_pred[TOP_R]->i1_ref_frame[u1_B] + == u1_ref_idx) << 2); + c_temp = pu1_mv_pred_condition[c_temp]; + + if(c_temp != -1) + { + /* Case when only when one of the cadidate block has the same + reference frame as the current block */ + ps_result->i2_mv[uc_B2 + 0] = ps_mv_pred[c_temp]->i2_mv[uc_B2 + 0]; + ps_result->i2_mv[uc_B2 + 1] = ps_mv_pred[c_temp]->i2_mv[uc_B2 + 1]; + } + else + { + WORD32 D0, D1; + D0 = MIN(ps_mv_pred[0]->i2_mv[uc_B2 + 0], + ps_mv_pred[1]->i2_mv[uc_B2 + 0]); + D1 = MAX(ps_mv_pred[0]->i2_mv[uc_B2 + 0], + ps_mv_pred[1]->i2_mv[uc_B2 + 0]); + D1 = MIN(D1, ps_mv_pred[2]->i2_mv[uc_B2 + 0]); + ps_result->i2_mv[uc_B2 + 0] = (WORD16)(MAX(D0, D1)); + + D0 = MIN(ps_mv_pred[0]->i2_mv[uc_B2 + 1], + ps_mv_pred[1]->i2_mv[uc_B2 + 1]); + D1 = MAX(ps_mv_pred[0]->i2_mv[uc_B2 + 1], + ps_mv_pred[1]->i2_mv[uc_B2 + 1]); + D1 = MIN(D1, ps_mv_pred[2]->i2_mv[uc_B2 + 1]); + ps_result->i2_mv[uc_B2 + 1] = (WORD16)(MAX(D0, D1)); + + } +} + +/*! + ************************************************************************** + * \if ih264d_mbaff_mv_pred name : Name \endif + * + * \brief + * The routine calculates the motion vector predictor for a given block, + * given the candidate MV predictors. + * + * \param ps_mv_pred: Candidate predictors for the current block + * \param ps_currMv: Pointer to the left top edge of the current block in + * the MV bank + * + * \return + * _mvPred: The x & y components of the MV predictor. + * + * \note + * The code implements the logic as described in sec 8.4.1.2.1. Given + * the candidate predictors and the pointer to the top left edge of the + * block in the MV bank. + * + ************************************************************************** + */ + +void ih264d_mbaff_mv_pred(mv_pred_t **ps_mv_pred, + UWORD8 u1_sub_mb_num, + mv_pred_t *ps_mv_nmb, + mv_pred_t *ps_mv_ntop, + dec_struct_t *ps_dec, + UWORD8 uc_mb_part_width, + dec_mb_info_t *ps_cur_mb_info, + UWORD8* pu0_scale) +{ + UWORD16 u2_a_in = 0, u2_b_in = 0, u2_c_in = 0, u2_d_in = 0; + mv_pred_t *ps_mvpred_l, *ps_mvpred_tmp; + UWORD8 u1_sub_mb_x = (u1_sub_mb_num & 3), uc_sub_mb_y = (u1_sub_mb_num >> 2); + UWORD8 u1_is_cur_mb_fld, u1_is_left_mb_fld, u1_is_top_mb_fld; + UWORD8 u1_is_cur_mb_top; + + u1_is_cur_mb_fld = ps_cur_mb_info->u1_mb_field_decodingflag; + u1_is_cur_mb_top = ps_cur_mb_info->u1_topmb; + + u1_is_left_mb_fld = ps_cur_mb_info->ps_left_mb->u1_mb_fld; + u1_is_top_mb_fld = ps_cur_mb_info->ps_top_mb->u1_mb_fld; + + /* Checking in the subMB exists, calculating their motion vectors to be + used as predictors and the reference frames of those subMBs */ + ps_mv_pred[LEFT] = &ps_dec->s_default_mv_pred; + ps_mv_pred[TOP] = &(ps_dec->s_default_mv_pred); + ps_mv_pred[TOP_R] = &(ps_dec->s_default_mv_pred); + + /* Check if the left subMb is available */ + if(u1_sub_mb_x) + { + u2_a_in = 1; + ps_mv_pred[LEFT] = (ps_mv_nmb - 1); + } + else + { + UWORD8 uc_temp; + u2_a_in = (ps_cur_mb_info->u1_mb_ngbr_availablity & LEFT_MB_AVAILABLE_MASK); + if(u2_a_in) + { + ps_mvpred_l = (ps_dec->u4_num_pmbair) ? + ps_mv_nmb : + (ps_dec->ps_mv_left + (uc_sub_mb_y << 2) + 48 + - (u1_is_cur_mb_top << 4)); + uc_temp = 29; + if(u1_is_cur_mb_fld ^ u1_is_left_mb_fld) + { + if(u1_is_left_mb_fld) + { + uc_temp += + (((uc_sub_mb_y & 1) << 2) + + ((uc_sub_mb_y & 2) << 1)); + uc_temp += ((u1_is_cur_mb_top) ? 0 : 8); + } + else + { + uc_temp = uc_temp - (uc_sub_mb_y << 2); + uc_temp += ((u1_is_cur_mb_top) ? 0 : 16); + } + } + ps_mv_pred[LEFT] = (ps_mvpred_l - uc_temp); + pu0_scale[LEFT] = u1_is_cur_mb_fld - u1_is_left_mb_fld; + } + } + + /* Check if the top subMB is available */ + if((uc_sub_mb_y > 0) || ((u1_is_cur_mb_top | u1_is_cur_mb_fld) == 0)) + { + u2_b_in = 1; + ps_mv_pred[TOP] = ps_mv_nmb - 4; + } + else + { + u2_b_in = (ps_cur_mb_info->u1_mb_ngbr_availablity & TOP_MB_AVAILABLE_MASK); + if(u2_b_in) + { + /* CHANGED CODE */ + + if(u1_is_top_mb_fld && u1_is_cur_mb_fld) + ps_mvpred_tmp = ps_mv_ntop; + else + { + ps_mvpred_tmp = ps_mv_ntop; + if(u1_is_cur_mb_top) + ps_mvpred_tmp += 16; + } + + ps_mv_pred[TOP] = ps_mvpred_tmp; + pu0_scale[TOP] = u1_is_cur_mb_fld - u1_is_top_mb_fld; + } + } + + /* Check if the top right subMb is available. The top right subMb is + defined as the top right subMb at the top right corner of the MB + partition. The top right subMb index starting from the top left + corner of the MB partition is given by + TopRightSubMbIndx = TopLeftSubMbIndx + (WidthOfMbPartition - 6) / 2 + */ + u2_c_in = CHECKBIT(ps_cur_mb_info->u2_top_right_avail_mask, + (u1_sub_mb_num + uc_mb_part_width - 1)); + if(u2_c_in) + { + ps_mv_pred[TOP_R] = ps_mv_pred[TOP] + uc_mb_part_width; + pu0_scale[TOP_R] = pu0_scale[TOP]; + if((uc_sub_mb_y == 0) && ((u1_sub_mb_x + uc_mb_part_width) > 3)) + { + UWORD8 uc_isTopRtMbFld; + uc_isTopRtMbFld = ps_cur_mb_info->ps_top_right_mb->u1_mb_fld; + /* CHANGED CODE */ + ps_mvpred_tmp = ps_mv_ntop + uc_mb_part_width + 12; + ps_mvpred_tmp += (u1_is_cur_mb_top) ? 16 : 0; + ps_mvpred_tmp += (u1_is_cur_mb_fld && u1_is_cur_mb_top && uc_isTopRtMbFld) ? + 0 : 16; + ps_mv_pred[TOP_R] = ps_mvpred_tmp; + pu0_scale[TOP_R] = u1_is_cur_mb_fld - uc_isTopRtMbFld; + } + } + else + { + u2_d_in = CHECKBIT(ps_cur_mb_info->u2_top_left_avail_mask, u1_sub_mb_num); + + /* Check if the the top left subMB is available */ + if(u2_d_in) + { + UWORD8 uc_isTopLtMbFld; + + ps_mv_pred[TOP_R] = ps_mv_pred[TOP] - 1; + pu0_scale[TOP_R] = pu0_scale[TOP]; + + if(u1_sub_mb_x == 0) + { + if((uc_sub_mb_y > 0) || ((u1_is_cur_mb_top | u1_is_cur_mb_fld) == 0)) + { + uc_isTopLtMbFld = u1_is_left_mb_fld; + ps_mvpred_tmp = ps_mv_pred[LEFT] - 4; + + if((u1_is_cur_mb_fld == 0) && uc_isTopLtMbFld) + { + ps_mvpred_tmp = ps_mv_pred[LEFT] + 16; + ps_mvpred_tmp -= (uc_sub_mb_y & 1) ? 0 : 4; + } + } + else + { + UWORD32 u4_cond = ps_dec->u4_num_pmbair; + uc_isTopLtMbFld = ps_cur_mb_info->u1_topleft_mb_fld; + + /* CHANGED CODE */ + ps_mvpred_tmp = ps_mv_ntop - 29; + ps_mvpred_tmp += (u1_is_cur_mb_top) ? 16 : 0; + if(u1_is_cur_mb_fld && u1_is_cur_mb_top) + ps_mvpred_tmp -= (uc_isTopLtMbFld) ? 16 : 0; + } + ps_mv_pred[TOP_R] = ps_mvpred_tmp; + pu0_scale[TOP_R] = u1_is_cur_mb_fld - uc_isTopLtMbFld; + } + } + else if(u2_b_in == 0) + { + /* If all the subMBs B, C, D are all out of the frame then their MV + and their reference picture is equal to that of A */ + ps_mv_pred[TOP] = ps_mv_pred[LEFT]; + ps_mv_pred[TOP_R] = ps_mv_pred[LEFT]; + pu0_scale[TOP] = pu0_scale[LEFT]; + pu0_scale[TOP_R] = pu0_scale[LEFT]; + } + } +} + +/*! + ************************************************************************** + * \if ih264d_non_mbaff_mv_pred name : Name \endif + * + * \brief + * The routine calculates the motion vector predictor for a given block, + * given the candidate MV predictors. + * + * \param ps_mv_pred: Candidate predictors for the current block + * \param ps_currMv: Pointer to the left top edge of the current block in + * the MV bank + * + * \return + * _mvPred: The x & y components of the MV predictor. + * + * \note + * The code implements the logic as described in sec 8.4.1.2.1. Given + * the candidate predictors and the pointer to the top left edge of the + * block in the MV bank. + * + ************************************************************************** + */ +#if(!MVPRED_NONMBAFF) +void ih264d_non_mbaff_mv_pred(mv_pred_t **ps_mv_pred, + UWORD8 u1_sub_mb_num, + mv_pred_t *ps_mv_nmb, + mv_pred_t *ps_mv_ntop, + dec_struct_t *ps_dec, + UWORD8 uc_mb_part_width, + dec_mb_info_t *ps_cur_mb_info) +{ + UWORD16 u2_b_in = 0, u2_c_in = 0, u2_d_in = 0; + UWORD8 u1_sub_mb_x = (u1_sub_mb_num & 3), uc_sub_mb_y = (u1_sub_mb_num >> 2); + + /* Checking in the subMB exists, calculating their motion vectors to be + used as predictors and the reference frames of those subMBs */ + + ps_mv_pred[LEFT] = &ps_dec->s_default_mv_pred; + ps_mv_pred[TOP] = &(ps_dec->s_default_mv_pred); + ps_mv_pred[TOP_R] = &(ps_dec->s_default_mv_pred); + /* Check if the left subMb is available */ + + if(u1_sub_mb_x) + { + ps_mv_pred[LEFT] = (ps_mv_nmb - 1); + } + else + { + if(ps_cur_mb_info->u1_mb_ngbr_availablity & LEFT_MB_AVAILABLE_MASK) + { + ps_mv_pred[LEFT] = (ps_mv_nmb - 13); + } + } + + /* Check if the top subMB is available */ + if(uc_sub_mb_y) + { + u2_b_in = 1; + ps_mv_ntop = ps_mv_nmb - 4; + ps_mv_pred[TOP] = ps_mv_ntop; + + } + else + { + u2_b_in = (ps_cur_mb_info->u1_mb_ngbr_availablity & TOP_MB_AVAILABLE_MASK); + if(u2_b_in) + { + ps_mv_pred[TOP] = ps_mv_ntop; + } + } + + /* Check if the top right subMb is available. The top right subMb is + defined as the top right subMb at the top right corner of the MB + partition. The top right subMb index starting from the top left + corner of the MB partition is given by + TopRightSubMbIndx = TopLeftSubMbIndx + (WidthOfMbPartition - 6) / 2 + */ + u2_c_in = CHECKBIT(ps_cur_mb_info->u2_top_right_avail_mask, + (u1_sub_mb_num + uc_mb_part_width - 1)); + if(u2_c_in) + { + ps_mv_pred[TOP_R] = (ps_mv_ntop + uc_mb_part_width); + + if(uc_sub_mb_y == 0) + { + /* CHANGED CODE */ + if((u1_sub_mb_x + uc_mb_part_width) > 3) + ps_mv_pred[TOP_R] += 12; + } + } + else + { + u2_d_in = CHECKBIT(ps_cur_mb_info->u2_top_left_avail_mask, u1_sub_mb_num); + /* Check if the the top left subMB is available */ + if(u2_d_in) + { + /* CHANGED CODE */ + ps_mv_pred[TOP_R] = (ps_mv_ntop - 1); + if(u1_sub_mb_x == 0) + { + if(uc_sub_mb_y) + { + ps_mv_pred[TOP_R] = (ps_mv_nmb - 17); + } + else + { + /* CHANGED CODE */ + ps_mv_pred[TOP_R] -= 12; + } + } + } + else if(u2_b_in == 0) + { + /* If all the subMBs B, C, D are all out of the frame then their MV + and their reference picture is equal to that of A */ + ps_mv_pred[TOP] = ps_mv_pred[LEFT]; + ps_mv_pred[TOP_R] = ps_mv_pred[LEFT]; + } + } +} +#endif + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_mvpred_nonmbaffB */ +/* */ +/* Description : This function calculates the motion vector predictor, */ +/* for B-Slices */ +/* Inputs : <What inputs does the function take?> */ +/* Globals : None */ +/* Processing : The neighbours A(Left),B(Top),C(TopRight) are calculated */ +/* and based on the type of Mb the prediction is */ +/* appropriately done */ +/* Outputs : populates ps_mv_final_pred structure */ +/* Returns : u1_direct_zero_pred_flag which is used only in */ +/* decodeSpatialdirect() */ +/* */ +/* Issues : <List any issues or problems with this function> */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 03 05 2005 TA First Draft */ +/* */ +/*****************************************************************************/ +#if(!MVPRED_NONMBAFF) +UWORD8 ih264d_mvpred_nonmbaffB(dec_struct_t *ps_dec, + dec_mb_info_t *ps_cur_mb_info, + mv_pred_t *ps_mv_nmb, + mv_pred_t *ps_mv_ntop, + mv_pred_t *ps_mv_final_pred, + UWORD8 u1_sub_mb_num, + UWORD8 uc_mb_part_width, + UWORD8 u1_lx_start, + UWORD8 u1_lxend, + UWORD8 u1_mb_mc_mode) +{ + UWORD8 u1_a_in, u1_b_in, uc_temp1, uc_temp2, uc_temp3; + mv_pred_t *ps_mv_pred[3]; + UWORD8 uc_B2, uc_lx, u1_ref_idx; + UWORD8 u1_direct_zero_pred_flag = 0; + + ih264d_non_mbaff_mv_pred(ps_mv_pred, u1_sub_mb_num, ps_mv_nmb, ps_mv_ntop, + ps_dec, uc_mb_part_width, ps_cur_mb_info); + + for(uc_lx = u1_lx_start; uc_lx < u1_lxend; uc_lx++) + { + u1_ref_idx = ps_mv_final_pred->i1_ref_frame[uc_lx]; + uc_B2 = (uc_lx << 1); + switch(u1_mb_mc_mode) + { + case PRED_16x8: + /* Directional prediction for a 16x8 MB partition */ + if(u1_sub_mb_num == 0) + { + /* Calculating the MV pred for the top 16x8 block */ + if(ps_mv_pred[TOP]->i1_ref_frame[uc_lx] == u1_ref_idx) + { + /* If the reference frame used by the top subMB is same as the + reference frame used by the current block then MV predictor to + be used for the current block is same as the MV of the top + subMB */ + ps_mv_final_pred->i2_mv[uc_B2 + 0] = + ps_mv_pred[TOP]->i2_mv[uc_B2 + 0]; + ps_mv_final_pred->i2_mv[uc_B2 + 1] = + ps_mv_pred[TOP]->i2_mv[uc_B2 + 1]; + } + else + { + /* The MV predictor is calculated according to the process + defined in 8.4.1.2.1 */ + ih264d_get_motion_vector_predictor( + ps_mv_final_pred, + ps_mv_pred, + u1_ref_idx, + uc_lx, + (const UWORD8 *)gau1_ih264d_mv_pred_condition); + } + } + else + { + if(ps_mv_pred[LEFT]->i1_ref_frame[uc_lx] == u1_ref_idx) + { + /* If the reference frame used by the left subMB is same as the + reference frame used by the current block then MV predictor to + be used for the current block is same as the MV of the left + subMB */ + ps_mv_final_pred->i2_mv[uc_B2 + 0] = + ps_mv_pred[LEFT]->i2_mv[uc_B2 + 0]; + ps_mv_final_pred->i2_mv[uc_B2 + 1] = + ps_mv_pred[LEFT]->i2_mv[uc_B2 + 1]; + } + else + { + /* The MV predictor is calculated according to the process + defined in 8.4.1.2.1 */ + ih264d_get_motion_vector_predictor( + ps_mv_final_pred, + ps_mv_pred, + u1_ref_idx, + uc_lx, + (const UWORD8 *)gau1_ih264d_mv_pred_condition); + } + } + break; + case PRED_8x16: + /* Directional prediction for a 8x16 MB partition */ + if(u1_sub_mb_num == 0) + { + if(ps_mv_pred[LEFT]->i1_ref_frame[uc_lx] == u1_ref_idx) + { + /* If the reference frame used by the left subMB is same as the + reference frame used by the current block then MV predictor to + be used for the current block is same as the MV of the left + subMB */ + ps_mv_final_pred->i2_mv[uc_B2 + 0] = + ps_mv_pred[LEFT]->i2_mv[uc_B2 + 0]; + ps_mv_final_pred->i2_mv[uc_B2 + 1] = + ps_mv_pred[LEFT]->i2_mv[uc_B2 + 1]; + } + else + { + /* The MV predictor is calculated according to the process + defined in 8.4.1.2.1 */ + ih264d_get_motion_vector_predictor( + ps_mv_final_pred, + ps_mv_pred, + u1_ref_idx, + uc_lx, + (const UWORD8 *)gau1_ih264d_mv_pred_condition); + } + } + else + { + if(ps_mv_pred[TOP_R]->i1_ref_frame[uc_lx] == u1_ref_idx) + { + /* If the reference frame used by the top right subMB is same as + the reference frame used by the current block then MV + predictor to be used for the current block is same as the MV + of the left subMB */ + ps_mv_final_pred->i2_mv[uc_B2 + 0] = + ps_mv_pred[TOP_R]->i2_mv[uc_B2 + 0]; + ps_mv_final_pred->i2_mv[uc_B2 + 1] = + ps_mv_pred[TOP_R]->i2_mv[uc_B2 + 1]; + } + else + { + /* The MV predictor is calculated according to the process + defined in 8.4.1.2.1 */ + ih264d_get_motion_vector_predictor( + ps_mv_final_pred, + ps_mv_pred, + u1_ref_idx, + uc_lx, + (const UWORD8 *)gau1_ih264d_mv_pred_condition); + } + } + break; + case B_DIRECT_SPATIAL: + /* Case when the MB has been skipped */ + /* If either of left or the top subMB is not present + OR + If both the MV components of either the left or the top subMB are + zero and their reference frame pointer pointing to 0 + then MV for the skipped MB is zero + else the Median of the mv_pred_t is used */ + uc_temp1 = (UWORD8)ps_mv_pred[LEFT]->i1_ref_frame[0]; + uc_temp2 = (UWORD8)ps_mv_pred[TOP]->i1_ref_frame[0]; + uc_temp3 = (UWORD8)ps_mv_pred[TOP_R]->i1_ref_frame[0]; + + ps_mv_final_pred->i1_ref_frame[0] = MIN(uc_temp1, + MIN(uc_temp2, uc_temp3)); + + uc_temp1 = (UWORD8)ps_mv_pred[LEFT]->i1_ref_frame[1]; + uc_temp2 = (UWORD8)ps_mv_pred[TOP]->i1_ref_frame[1]; + uc_temp3 = (UWORD8)ps_mv_pred[TOP_R]->i1_ref_frame[1]; + + ps_mv_final_pred->i1_ref_frame[1] = MIN(uc_temp1, + MIN(uc_temp2, uc_temp3)); + + if((ps_mv_final_pred->i1_ref_frame[0] < 0) + && (ps_mv_final_pred->i1_ref_frame[1] < 0)) + { + u1_direct_zero_pred_flag = 1; + ps_mv_final_pred->i1_ref_frame[0] = 0; + ps_mv_final_pred->i1_ref_frame[1] = 0; + } + ih264d_get_motion_vector_predictor( + ps_mv_final_pred, ps_mv_pred, + ps_mv_final_pred->i1_ref_frame[0], 0, + (const UWORD8 *)gau1_ih264d_mv_pred_condition); + + ih264d_get_motion_vector_predictor( + ps_mv_final_pred, ps_mv_pred, + ps_mv_final_pred->i1_ref_frame[1], 1, + (const UWORD8 *)gau1_ih264d_mv_pred_condition); + + break; + case MB_SKIP: + /* Case when the MB has been skipped */ + /* If either of left or the top subMB is not present + OR + If both the MV components of either the left or the top subMB are + zero and their reference frame pointer pointing to 0 + then MV for the skipped MB is zero + else the Median of the mv_pred_t is used */ + u1_a_in = (ps_cur_mb_info->u1_mb_ngbr_availablity & + LEFT_MB_AVAILABLE_MASK); + u1_b_in = (ps_cur_mb_info->u1_mb_ngbr_availablity & + TOP_MB_AVAILABLE_MASK); + if(((u1_a_in * u1_b_in) == 0) + || ((ps_mv_pred[LEFT]->i2_mv[0] + | ps_mv_pred[LEFT]->i2_mv[1] + | ps_mv_pred[LEFT]->i1_ref_frame[0]) + == 0) + || ((ps_mv_pred[TOP]->i2_mv[0] + | ps_mv_pred[TOP]->i2_mv[1] + | ps_mv_pred[TOP]->i1_ref_frame[0]) + == 0)) + { + ps_mv_final_pred->i2_mv[0] = 0; + ps_mv_final_pred->i2_mv[1] = 0; + break; + } + /* If the condition above is not true calculate the MV predictor + according to the process defined in sec 8.4.1.2.1 */ + default: + ih264d_get_motion_vector_predictor( + ps_mv_final_pred, ps_mv_pred, u1_ref_idx, uc_lx, + (const UWORD8 *)gau1_ih264d_mv_pred_condition); + break; + } + } + return (u1_direct_zero_pred_flag); +} +#endif + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_mvpred_nonmbaff */ +/* */ +/* Description : This function calculates the motion vector predictor, */ +/* for all the slice types other than B_SLICE */ +/* Inputs : <What inputs does the function take?> */ +/* Globals : None */ +/* Processing : The neighbours A(Left),B(Top),C(TopRight) are calculated */ +/* and based on the type of Mb the prediction is */ +/* appropriately done */ +/* Outputs : populates ps_mv_final_pred structure */ +/* Returns : u1_direct_zero_pred_flag which is used only in */ +/* decodeSpatialdirect() */ +/* */ +/* Issues : <List any issues or problems with this function> */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 03 05 2005 TA First Draft */ +/* */ +/*****************************************************************************/ +#if(!MVPRED_NONMBAFF) +UWORD8 ih264d_mvpred_nonmbaff(dec_struct_t *ps_dec, + dec_mb_info_t *ps_cur_mb_info, + mv_pred_t *ps_mv_nmb, + mv_pred_t *ps_mv_ntop, + mv_pred_t *ps_mv_final_pred, + UWORD8 u1_sub_mb_num, + UWORD8 uc_mb_part_width, + UWORD8 u1_lx_start, + UWORD8 u1_lxend, + UWORD8 u1_mb_mc_mode) +{ + UWORD8 u1_a_in, u1_b_in, uc_temp1, uc_temp2, uc_temp3; + mv_pred_t *ps_mv_pred[3]; + UWORD8 u1_ref_idx; + UWORD8 u1_direct_zero_pred_flag = 0; + UNUSED(u1_lx_start); + UNUSED(u1_lxend); + ih264d_non_mbaff_mv_pred(ps_mv_pred, u1_sub_mb_num, ps_mv_nmb, ps_mv_ntop, + ps_dec, uc_mb_part_width, ps_cur_mb_info); + + u1_ref_idx = ps_mv_final_pred->i1_ref_frame[0]; + + switch(u1_mb_mc_mode) + { + case PRED_16x8: + /* Directional prediction for a 16x8 MB partition */ + if(u1_sub_mb_num == 0) + { + /* Calculating the MV pred for the top 16x8 block */ + if(ps_mv_pred[TOP]->i1_ref_frame[0] == u1_ref_idx) + { + /* If the reference frame used by the top subMB is same as the + reference frame used by the current block then MV predictor to + be used for the current block is same as the MV of the top + subMB */ + + ps_mv_final_pred->i2_mv[0] = ps_mv_pred[TOP]->i2_mv[0]; + ps_mv_final_pred->i2_mv[1] = ps_mv_pred[TOP]->i2_mv[1]; + } + else + { + /* The MV predictor is calculated according to the process + defined in 8.4.1.2.1 */ + ih264d_get_motion_vector_predictor( + ps_mv_final_pred, + ps_mv_pred, + u1_ref_idx, + 0, + (const UWORD8 *)gau1_ih264d_mv_pred_condition); + } + } + else + { + if(ps_mv_pred[LEFT]->i1_ref_frame[0] == u1_ref_idx) + { + /* If the reference frame used by the left subMB is same as the + reference frame used by the current block then MV predictor to + be used for the current block is same as the MV of the left + subMB */ + + ps_mv_final_pred->i2_mv[0] = ps_mv_pred[LEFT]->i2_mv[0]; + ps_mv_final_pred->i2_mv[1] = ps_mv_pred[LEFT]->i2_mv[1]; + } + else + { + /* The MV predictor is calculated according to the process + defined in 8.4.1.2.1 */ + ih264d_get_motion_vector_predictor( + ps_mv_final_pred, + ps_mv_pred, + u1_ref_idx, + 0, + (const UWORD8 *)gau1_ih264d_mv_pred_condition); + } + } + break; + case PRED_8x16: + /* Directional prediction for a 8x16 MB partition */ + if(u1_sub_mb_num == 0) + { + if(ps_mv_pred[LEFT]->i1_ref_frame[0] == u1_ref_idx) + { + /* If the reference frame used by the left subMB is same as the + reference frame used by the current block then MV predictor to + be used for the current block is same as the MV of the left + subMB */ + + ps_mv_final_pred->i2_mv[0] = ps_mv_pred[LEFT]->i2_mv[0]; + ps_mv_final_pred->i2_mv[1] = ps_mv_pred[LEFT]->i2_mv[1]; + } + else + { + /* The MV predictor is calculated according to the process + defined in 8.4.1.2.1 */ + ih264d_get_motion_vector_predictor( + ps_mv_final_pred, + ps_mv_pred, + u1_ref_idx, + 0, + (const UWORD8 *)gau1_ih264d_mv_pred_condition); + } + } + else + { + if(ps_mv_pred[TOP_R]->i1_ref_frame[0] == u1_ref_idx) + { + /* If the reference frame used by the top right subMB is same as + the reference frame used by the current block then MV + predictor to be used for the current block is same as the MV + of the left subMB */ + + ps_mv_final_pred->i2_mv[0] = ps_mv_pred[TOP_R]->i2_mv[0]; + ps_mv_final_pred->i2_mv[1] = ps_mv_pred[TOP_R]->i2_mv[1]; + } + else + { + /* The MV predictor is calculated according to the process + defined in 8.4.1.2.1 */ + ih264d_get_motion_vector_predictor( + ps_mv_final_pred, + ps_mv_pred, + u1_ref_idx, + 0, + (const UWORD8 *)gau1_ih264d_mv_pred_condition); + } + } + break; + case B_DIRECT_SPATIAL: + /* Case when the MB has been skipped */ + /* If either of left or the top subMB is not present + OR + If both the MV components of either the left or the top subMB are + zero and their reference frame pointer pointing to 0 + then MV for the skipped MB is zero + else the Median of the mv_pred_t is used */ + uc_temp1 = (UWORD8)ps_mv_pred[LEFT]->i1_ref_frame[0]; + uc_temp2 = (UWORD8)ps_mv_pred[TOP]->i1_ref_frame[0]; + uc_temp3 = (UWORD8)ps_mv_pred[TOP_R]->i1_ref_frame[0]; + + ps_mv_final_pred->i1_ref_frame[0] = MIN(uc_temp1, + MIN(uc_temp2, uc_temp3)); + + uc_temp1 = (UWORD8)ps_mv_pred[LEFT]->i1_ref_frame[1]; + uc_temp2 = (UWORD8)ps_mv_pred[TOP]->i1_ref_frame[1]; + uc_temp3 = (UWORD8)ps_mv_pred[TOP_R]->i1_ref_frame[1]; + + ps_mv_final_pred->i1_ref_frame[1] = MIN(uc_temp1, + MIN(uc_temp2, uc_temp3)); + + if((ps_mv_final_pred->i1_ref_frame[0] < 0) + && (ps_mv_final_pred->i1_ref_frame[1] < 0)) + { + u1_direct_zero_pred_flag = 1; + ps_mv_final_pred->i1_ref_frame[0] = 0; + ps_mv_final_pred->i1_ref_frame[1] = 0; + } + ih264d_get_motion_vector_predictor( + ps_mv_final_pred, ps_mv_pred, + ps_mv_final_pred->i1_ref_frame[0], 0, + (const UWORD8 *)gau1_ih264d_mv_pred_condition); + + ih264d_get_motion_vector_predictor( + ps_mv_final_pred, ps_mv_pred, + ps_mv_final_pred->i1_ref_frame[1], 1, + (const UWORD8 *)gau1_ih264d_mv_pred_condition); + + break; + case MB_SKIP: + /* Case when the MB has been skipped */ + /* If either of left or the top subMB is not present + OR + If both the MV components of either the left or the top subMB are + zero and their reference frame pointer pointing to 0 + then MV for the skipped MB is zero + else the Median of the mv_pred_t is used */ + u1_a_in = (ps_cur_mb_info->u1_mb_ngbr_availablity & + LEFT_MB_AVAILABLE_MASK); + u1_b_in = (ps_cur_mb_info->u1_mb_ngbr_availablity & + TOP_MB_AVAILABLE_MASK); + if(((u1_a_in * u1_b_in) == 0) + || ((ps_mv_pred[LEFT]->i2_mv[0] + | ps_mv_pred[LEFT]->i2_mv[1] + | ps_mv_pred[LEFT]->i1_ref_frame[0]) + == 0) + || ((ps_mv_pred[TOP]->i2_mv[0] + | ps_mv_pred[TOP]->i2_mv[1] + | ps_mv_pred[TOP]->i1_ref_frame[0]) + == 0)) + { + + ps_mv_final_pred->i2_mv[0] = 0; + ps_mv_final_pred->i2_mv[1] = 0; + break; + } + /* If the condition above is not true calculate the MV predictor + according to the process defined in sec 8.4.1.2.1 */ + default: + ih264d_get_motion_vector_predictor( + ps_mv_final_pred, ps_mv_pred, u1_ref_idx, 0, + (const UWORD8 *)gau1_ih264d_mv_pred_condition); + break; + } + + return (u1_direct_zero_pred_flag); +} +#endif + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_mvpred_mbaff */ +/* */ +/* Description : This function calculates the motion vector predictor, */ +/* Inputs : <What inputs does the function take?> */ +/* Globals : None */ +/* Processing : The neighbours A(Left),B(Top),C(TopRight) are calculated */ +/* and based on the type of Mb the prediction is */ +/* appropriately done */ +/* Outputs : populates ps_mv_final_pred structure */ +/* Returns : u1_direct_zero_pred_flag which is used only in */ +/* decodeSpatialdirect() */ +/* */ +/* Issues : <List any issues or problems with this function> */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 03 05 2005 TA First Draft */ +/* */ +/*****************************************************************************/ + +UWORD8 ih264d_mvpred_mbaff(dec_struct_t *ps_dec, + dec_mb_info_t *ps_cur_mb_info, + mv_pred_t *ps_mv_nmb, + mv_pred_t *ps_mv_ntop, + mv_pred_t *ps_mv_final_pred, + UWORD8 u1_sub_mb_num, + UWORD8 uc_mb_part_width, + UWORD8 u1_lx_start, + UWORD8 u1_lxend, + UWORD8 u1_mb_mc_mode) +{ + UWORD8 u1_a_in, u1_b_in, uc_temp1, uc_temp2, uc_temp3; + mv_pred_t *ps_mv_pred[3], s_mvPred[3]; + UWORD8 uc_B2, pu0_scale[3], i, uc_lx, u1_ref_idx; + UWORD8 u1_direct_zero_pred_flag = 0; + + pu0_scale[0] = pu0_scale[1] = pu0_scale[2] = 0; + ih264d_mbaff_mv_pred(ps_mv_pred, u1_sub_mb_num, ps_mv_nmb, ps_mv_ntop, ps_dec, + uc_mb_part_width, ps_cur_mb_info, pu0_scale); + for(i = 0; i < 3; i++) + { + if(pu0_scale[i] != 0) + { + memcpy(&s_mvPred[i], ps_mv_pred[i], sizeof(mv_pred_t)); + if(pu0_scale[i] == 1) + { + s_mvPred[i].i1_ref_frame[0] = s_mvPred[i].i1_ref_frame[0] << 1; + s_mvPred[i].i1_ref_frame[1] = s_mvPred[i].i1_ref_frame[1] << 1; + s_mvPred[i].i2_mv[1] = SIGN_POW2_DIV(s_mvPred[i].i2_mv[1], 1); + s_mvPred[i].i2_mv[3] = SIGN_POW2_DIV(s_mvPred[i].i2_mv[3], 1); + } + else + { + s_mvPred[i].i1_ref_frame[0] = s_mvPred[i].i1_ref_frame[0] >> 1; + s_mvPred[i].i1_ref_frame[1] = s_mvPred[i].i1_ref_frame[1] >> 1; + s_mvPred[i].i2_mv[1] = s_mvPred[i].i2_mv[1] << 1; + s_mvPred[i].i2_mv[3] = s_mvPred[i].i2_mv[3] << 1; + } + ps_mv_pred[i] = &s_mvPred[i]; + } + } + + for(uc_lx = u1_lx_start; uc_lx < u1_lxend; uc_lx++) + { + u1_ref_idx = ps_mv_final_pred->i1_ref_frame[uc_lx]; + uc_B2 = (uc_lx << 1); + switch(u1_mb_mc_mode) + { + case PRED_16x8: + /* Directional prediction for a 16x8 MB partition */ + if(u1_sub_mb_num == 0) + { + /* Calculating the MV pred for the top 16x8 block */ + if(ps_mv_pred[TOP]->i1_ref_frame[uc_lx] == u1_ref_idx) + { + /* If the reference frame used by the top subMB is same as the + reference frame used by the current block then MV predictor to + be used for the current block is same as the MV of the top + subMB */ + ps_mv_final_pred->i2_mv[uc_B2 + 0] = + ps_mv_pred[TOP]->i2_mv[uc_B2 + 0]; + ps_mv_final_pred->i2_mv[uc_B2 + 1] = + ps_mv_pred[TOP]->i2_mv[uc_B2 + 1]; + } + else + { + /* The MV predictor is calculated according to the process + defined in 8.4.1.2.1 */ + ih264d_get_motion_vector_predictor( + ps_mv_final_pred, + ps_mv_pred, + u1_ref_idx, + uc_lx, + (const UWORD8 *)gau1_ih264d_mv_pred_condition); + } + } + else + { + if(ps_mv_pred[LEFT]->i1_ref_frame[uc_lx] == u1_ref_idx) + { + /* If the reference frame used by the left subMB is same as the + reference frame used by the current block then MV predictor to + be used for the current block is same as the MV of the left + subMB */ + ps_mv_final_pred->i2_mv[uc_B2 + 0] = + ps_mv_pred[LEFT]->i2_mv[uc_B2 + 0]; + ps_mv_final_pred->i2_mv[uc_B2 + 1] = + ps_mv_pred[LEFT]->i2_mv[uc_B2 + 1]; + } + else + { + /* The MV predictor is calculated according to the process + defined in 8.4.1.2.1 */ + ih264d_get_motion_vector_predictor( + ps_mv_final_pred, + ps_mv_pred, + u1_ref_idx, + uc_lx, + (const UWORD8 *)gau1_ih264d_mv_pred_condition); + } + } + break; + case PRED_8x16: + /* Directional prediction for a 8x16 MB partition */ + if(u1_sub_mb_num == 0) + { + if(ps_mv_pred[LEFT]->i1_ref_frame[uc_lx] == u1_ref_idx) + { + /* If the reference frame used by the left subMB is same as the + reference frame used by the current block then MV predictor to + be used for the current block is same as the MV of the left + subMB */ + ps_mv_final_pred->i2_mv[uc_B2 + 0] = + ps_mv_pred[LEFT]->i2_mv[uc_B2 + 0]; + ps_mv_final_pred->i2_mv[uc_B2 + 1] = + ps_mv_pred[LEFT]->i2_mv[uc_B2 + 1]; + } + else + { + /* The MV predictor is calculated according to the process + defined in 8.4.1.2.1 */ + ih264d_get_motion_vector_predictor( + ps_mv_final_pred, + ps_mv_pred, + u1_ref_idx, + uc_lx, + (const UWORD8 *)gau1_ih264d_mv_pred_condition); + } + } + else + { + if(ps_mv_pred[TOP_R]->i1_ref_frame[uc_lx] == u1_ref_idx) + { + /* If the reference frame used by the top right subMB is same as + the reference frame used by the current block then MV + predictor to be used for the current block is same as the MV + of the left subMB */ + ps_mv_final_pred->i2_mv[uc_B2 + 0] = + ps_mv_pred[TOP_R]->i2_mv[uc_B2 + 0]; + ps_mv_final_pred->i2_mv[uc_B2 + 1] = + ps_mv_pred[TOP_R]->i2_mv[uc_B2 + 1]; + } + else + { + /* The MV predictor is calculated according to the process + defined in 8.4.1.2.1 */ + ih264d_get_motion_vector_predictor( + ps_mv_final_pred, + ps_mv_pred, + u1_ref_idx, + uc_lx, + (const UWORD8 *)gau1_ih264d_mv_pred_condition); + } + } + break; + case B_DIRECT_SPATIAL: + /* Case when the MB has been skipped */ + /* If either of left or the top subMB is not present + OR + If both the MV components of either the left or the top subMB are + zero and their reference frame pointer pointing to 0 + then MV for the skipped MB is zero + else the Median of the mv_pred_t is used */ + uc_temp1 = (UWORD8)ps_mv_pred[LEFT]->i1_ref_frame[0]; + uc_temp2 = (UWORD8)ps_mv_pred[TOP]->i1_ref_frame[0]; + uc_temp3 = (UWORD8)ps_mv_pred[TOP_R]->i1_ref_frame[0]; + + ps_mv_final_pred->i1_ref_frame[0] = MIN(uc_temp1, + MIN(uc_temp2, uc_temp3)); + + uc_temp1 = (UWORD8)ps_mv_pred[LEFT]->i1_ref_frame[1]; + uc_temp2 = (UWORD8)ps_mv_pred[TOP]->i1_ref_frame[1]; + uc_temp3 = (UWORD8)ps_mv_pred[TOP_R]->i1_ref_frame[1]; + + ps_mv_final_pred->i1_ref_frame[1] = MIN(uc_temp1, + MIN(uc_temp2, uc_temp3)); + + /* If the reference indices are negative clip the scaled reference indices to -1 */ + /* i.e invalid reference index */ + + /*if(ps_mv_final_pred->i1_ref_frame[0] < 0) + ps_mv_final_pred->i1_ref_frame[0] = -1; + + if(ps_mv_final_pred->i1_ref_frame[1] < 0) + ps_mv_final_pred->i1_ref_frame[1] = -1; */ + + if((ps_mv_final_pred->i1_ref_frame[0] < 0) + && (ps_mv_final_pred->i1_ref_frame[1] < 0)) + { + u1_direct_zero_pred_flag = 1; + ps_mv_final_pred->i1_ref_frame[0] = 0; + ps_mv_final_pred->i1_ref_frame[1] = 0; + } + ih264d_get_motion_vector_predictor( + ps_mv_final_pred, ps_mv_pred, + ps_mv_final_pred->i1_ref_frame[0], 0, + (const UWORD8 *)gau1_ih264d_mv_pred_condition); + + ih264d_get_motion_vector_predictor( + ps_mv_final_pred, ps_mv_pred, + ps_mv_final_pred->i1_ref_frame[1], 1, + (const UWORD8 *)gau1_ih264d_mv_pred_condition); + + break; + case MB_SKIP: + /* Case when the MB has been skipped */ + /* If either of left or the top subMB is not present + OR + If both the MV components of either the left or the top subMB are + zero and their reference frame pointer pointing to 0 + then MV for the skipped MB is zero + else the Median of the mv_pred_t is used */ + u1_a_in = (ps_cur_mb_info->u1_mb_ngbr_availablity & + LEFT_MB_AVAILABLE_MASK); + u1_b_in = (ps_cur_mb_info->u1_mb_ngbr_availablity & + TOP_MB_AVAILABLE_MASK); + if(((u1_a_in * u1_b_in) == 0) + || ((ps_mv_pred[LEFT]->i2_mv[0] + | ps_mv_pred[LEFT]->i2_mv[1] + | ps_mv_pred[LEFT]->i1_ref_frame[0]) + == 0) + || ((ps_mv_pred[TOP]->i2_mv[0] + | ps_mv_pred[TOP]->i2_mv[1] + | ps_mv_pred[TOP]->i1_ref_frame[0]) + == 0)) + { + ps_mv_final_pred->i2_mv[0] = 0; + ps_mv_final_pred->i2_mv[1] = 0; + break; + } + /* If the condition above is not true calculate the MV predictor + according to the process defined in sec 8.4.1.2.1 */ + default: + ih264d_get_motion_vector_predictor( + ps_mv_final_pred, ps_mv_pred, u1_ref_idx, uc_lx, + (const UWORD8 *)gau1_ih264d_mv_pred_condition); + break; + } + } + return (u1_direct_zero_pred_flag); +} + + + + +void ih264d_rep_mv_colz(dec_struct_t *ps_dec, + mv_pred_t *ps_mv_pred_src, + mv_pred_t *ps_mv_pred_dst, + UWORD8 u1_sub_mb_num, + UWORD8 u1_colz, + UWORD8 u1_ht, + UWORD8 u1_wd) +{ + + UWORD8 k, m; + UWORD8 *pu1_colz = ps_dec->pu1_col_zero_flag + ps_dec->i4_submb_ofst + + u1_sub_mb_num; + + for(k = 0; k < u1_ht; k++) + { + for(m = 0; m < u1_wd; m++) + { + *(ps_mv_pred_dst + m) = *(ps_mv_pred_src); + *(pu1_colz + m) = u1_colz; + + } + pu1_colz += SUB_BLK_WIDTH; + ps_mv_pred_dst += SUB_BLK_WIDTH; + } +} + diff --git a/decoder/ih264d_mvpred.h b/decoder/ih264d_mvpred.h new file mode 100755 index 0000000..66366ca --- /dev/null +++ b/decoder/ih264d_mvpred.h @@ -0,0 +1,153 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +#ifndef _IH264D_MVPRED_H_ +#define _IH264D_MVPRED_H_ + +/** +************************************************************************** +* \file ih264d_mvpred.h +* +* \brief +* This file contains declarations of functions specific to decoding +* Motion vector. +* +* Detailed_description +* +* \date +* 10-12-2002 +* +* \author Arvind Raman +************************************************************************** +*/ +#include "ih264d_structs.h" +#include "ih264d_defs.h" +//#include "structs.h" + +/** Reference number that is not valid */ +#define OUT_OF_RANGE_REF -1 + +#define ONE_TO_ONE 0 +#define FRM_TO_FLD 1 +#define FLD_TO_FRM 2 + +/** +************************************************************************** +* \brief POSITION_IN_MVBANK +* +* a: Pointer to the top left subMb of the MB in the MV bank array +* b: Horiz posn in terms of subMbs +* c: Vert posn in terms of subMbs +* d: subMb number +************************************************************************** +*/ +#define POSITION_IN_MVBANK(a, b, c, d) (a) + (c) * (d) + (b) + + + +/** +************************************************************************** +* \brief col4x4_t +* +* Container to return the information related to the co-located 4x4 +* sub-macroblock. +************************************************************************** +*/ +typedef struct +{ + mv_pred_t *ps_mv; /** Ptr to the Mv bank */ + UWORD16 u2_mb_addr_col; /** Addr of the co-located MB */ + WORD16 i2_mv[2]; /** Mv of the colocated MB */ + WORD8 i1_ref_idx_col; /** Ref idx of the co-located picture */ + UWORD8 u1_col_pic; /** Idx of the colocated pic */ + UWORD8 u1_yM; /** "y" coord of the colocated MB addr */ + UWORD8 u1_vert_mv_scale; /** as defined in sec 8.4.1.2.1 */ +} col4x4_t; + + + + + +void ih264d_update_nnz_for_skipmb(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info, + UWORD8 u1_entrpy); + +void ih264d_get_motion_vector_predictor(mv_pred_t * ps_result, + mv_pred_t **ps_mv_pred, + UWORD8 u1_ref_idx, + UWORD8 u1_B, + const UWORD8 *pu1_mv_pred_condition); +void ih264d_mbaff_mv_pred(mv_pred_t **ps_mv_pred, + UWORD8 u1_sub_mb_num, + mv_pred_t *ps_mv_nmb, + mv_pred_t *ps_mv_ntop, + dec_struct_t *ps_dec, + UWORD8 uc_mb_part_width, + dec_mb_info_t *ps_cur_mb_info, + UWORD8* pu0_scale); +void ih264d_non_mbaff_mv_pred(mv_pred_t **ps_mv_pred, + UWORD8 u1_sub_mb_num, + mv_pred_t *ps_mv_nmb, + mv_pred_t *ps_mv_ntop, + dec_struct_t *ps_dec, + UWORD8 uc_mb_part_width, + dec_mb_info_t *ps_cur_mb_info); +UWORD8 ih264d_mvpred_nonmbaff(dec_struct_t *ps_dec, + dec_mb_info_t *ps_cur_mb_info, + mv_pred_t *ps_mv_nmb, + mv_pred_t *ps_mv_ntop, + mv_pred_t *ps_mv_final_pred, + UWORD8 u1_sub_mb_num, + UWORD8 uc_mb_part_width, + UWORD8 u1_lx_start, + UWORD8 u1_lxend, + UWORD8 u1_mb_mc_mode); + +UWORD8 ih264d_mvpred_nonmbaffB(dec_struct_t *ps_dec, + dec_mb_info_t *ps_cur_mb_info, + mv_pred_t *ps_mv_nmb, + mv_pred_t *ps_mv_ntop, + mv_pred_t *ps_mv_final_pred, + UWORD8 u1_sub_mb_num, + UWORD8 uc_mb_part_width, + UWORD8 u1_lx_start, + UWORD8 u1_lxend, + UWORD8 u1_mb_mc_mode); + +UWORD8 ih264d_mvpred_mbaff(dec_struct_t *ps_dec, + dec_mb_info_t *ps_cur_mb_info, + mv_pred_t *ps_mv_nmb, + mv_pred_t *ps_mv_ntop, + mv_pred_t *ps_mv_final_pred, + UWORD8 u1_sub_mb_num, + UWORD8 uc_mb_part_width, + UWORD8 u1_lx_start, + UWORD8 u1_lxend, + UWORD8 u1_mb_mc_mode); + +void ih264d_rep_mv_colz(dec_struct_t *ps_dec, + mv_pred_t *ps_mv_pred_src, + mv_pred_t *ps_mv_pred_dst, + UWORD8 u1_sub_mb_num, + UWORD8 u1_colz, + UWORD8 u1_ht, + UWORD8 u1_wd); + +#endif /* _IH264D_MVPRED_H_ */ diff --git a/decoder/ih264d_nal.c b/decoder/ih264d_nal.c new file mode 100755 index 0000000..48450c8 --- /dev/null +++ b/decoder/ih264d_nal.c @@ -0,0 +1,393 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*! + ************************************************************************** + * \file ih264d_nal.c + * + * \brief NAL parsing routines + * + * Detailed_description + * + * \author + * - AI 19 11 2002 Creation + ************************************************************************** + */ +#include "ih264d_bitstrm.h" +#include "ih264d_defs.h" +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264d_defs.h" +#define NUM_OF_ZERO_BYTES_BEFORE_START_CODE 2 +#define EMULATION_PREVENTION_BYTE 0x03 + +#define NAL_FIRST_BYTE_SIZE 1 + +/*! + ************************************************************************** + * \if Function name : ih264d_find_start_code \endif + * + * \brief + * This function searches for the Start Code Prefix. + * + * \param pu1_buf : Pointer to char buffer which contains bitstream. + * \param u4_cur_pos : Current position in the buffer. + * \param u4_max_ofst : Number of bytes in Buffer. + * \param pu4_length_of_start_code : Poiter to length of Start Code. + * + * \return + * Returns 0 on success and -1 on error. + * + ************************************************************************** + */ +#define START_CODE_NOT_FOUND -1 +#define END_OF_STREAM_BUFFER -2 +#define END_OF_STREAM -1 + +void ih264d_check_if_aud(UWORD8 *pu1_buf, + UWORD32 u4_cur_pos, + UWORD32 u4_max_ofst, + UWORD32 *pu4_next_is_aud) +{ + UWORD8 u1_first_byte, u1_nal_unit_type; + if(u4_cur_pos + 1 < u4_max_ofst) + { + u1_first_byte = pu1_buf[u4_cur_pos + 1]; + u1_nal_unit_type = NAL_UNIT_TYPE(u1_first_byte); + + if(u1_nal_unit_type == ACCESS_UNIT_DELIMITER_RBSP) + { + *pu4_next_is_aud = 1; + } + } + +} +WORD32 ih264d_find_start_code(UWORD8 *pu1_buf, + UWORD32 u4_cur_pos, + UWORD32 u4_max_ofst, + UWORD32 *pu4_length_of_start_code, + UWORD32 *pu4_next_is_aud) +{ + WORD32 zero_byte_cnt = 0; + UWORD32 ui_curPosTemp; + + *pu4_length_of_start_code = 0; + /*Find first start code */ + while(u4_cur_pos < u4_max_ofst) + { + if(pu1_buf[u4_cur_pos] == 0) + zero_byte_cnt++; + else if(pu1_buf[u4_cur_pos] + == 0x01 && zero_byte_cnt >= NUM_OF_ZERO_BYTES_BEFORE_START_CODE) + { + /* Found the start code */ + u4_cur_pos++; + break; + } + else + { + zero_byte_cnt = 0; + } + u4_cur_pos++; + } + /*Find Next Start Code */ + *pu4_length_of_start_code = u4_cur_pos; + zero_byte_cnt = 0; + ui_curPosTemp = u4_cur_pos; + while(u4_cur_pos < u4_max_ofst) + { + + if(pu1_buf[u4_cur_pos] == 0) + zero_byte_cnt++; + else if(pu1_buf[u4_cur_pos] + == 0x01 && zero_byte_cnt >= NUM_OF_ZERO_BYTES_BEFORE_START_CODE) + { + /* Found the start code */ + ih264d_check_if_aud(pu1_buf, u4_cur_pos, u4_max_ofst, + pu4_next_is_aud); + return (u4_cur_pos - zero_byte_cnt - ui_curPosTemp); + } + else + { + zero_byte_cnt = 0; + } + u4_cur_pos++; + } + + return (u4_cur_pos - zero_byte_cnt - ui_curPosTemp); //(START_CODE_NOT_FOUND); +} + +/*! + ************************************************************************** + * \if Function name : ih264d_get_next_nal_unit \endif + * + * \brief + * This function reads one NAl unit. + * + * \param ps_nalStream : Poiter to NalUnitStream structure. + * \param ps_nalUnit : Pointer to NalUnit. + * + * \return + * Returns 0 on success and -1 on error. + * + ************************************************************************** + */ +WORD32 ih264d_get_next_nal_unit(UWORD8 *pu1_buf, + UWORD32 u4_cur_pos, + UWORD32 u4_max_ofst, + UWORD32 *pu4_length_of_start_code) +{ + + WORD32 i_length_of_nal_unit = 0; + UWORD32 u4_next_is_aud; + + /* NAL Thread starts */ + + ih264d_find_start_code(pu1_buf, u4_cur_pos, u4_max_ofst, + pu4_length_of_start_code, &u4_next_is_aud); + + return (i_length_of_nal_unit); +} + +/*! + ************************************************************************** + * \if Function name : ih264d_process_nal_unit \endif + * + * \brief + * This function removes emulation byte "0x03" from bitstream (EBSP to RBSP). + * It also converts bytestream format into 32 bit little-endian format. + * + * \param ps_bitstrm : Poiter to dec_bit_stream_t structure. + * \param pu1_nal_unit : Pointer to char buffer of NalUnit. + * \param u4_numbytes_in_nal_unit : Number bytes in NalUnit buffer. + * + * \return + * Returns number of bytes in RBSP ps_bitstrm. + * + * \note + * This function is same as nal_unit() of 7.3.1. Apart from nal_unit() + * implementation it converts char buffer into 32 bit Buffer. This + * facilitates efficient access of bitstream. This has been done taking + * into account present processor architectures. + * + ************************************************************************** + */ +WORD32 ih264d_process_nal_unit(dec_bit_stream_t *ps_bitstrm, + UWORD8 *pu1_nal_unit, + UWORD32 u4_numbytes_in_nal_unit) +{ + UWORD32 u4_num_bytes_in_rbsp; + UWORD8 u1_cur_byte; + WORD32 i = 0; + WORD8 c_count; + UWORD32 ui_word; + UWORD32 *puc_bitstream_buffer = (UWORD32*)pu1_nal_unit; + ps_bitstrm->pu4_buffer = puc_bitstream_buffer; + + /*--------------------------------------------------------------------*/ + /* First Byte of the NAL Unit */ + /*--------------------------------------------------------------------*/ + + ui_word = *pu1_nal_unit++; + + /*--------------------------------------------------------------------*/ + /* Convertion of the EBSP to RBSP */ + /* ie Remove the emulation_prevention_byte [equal to 0x03] */ + /*--------------------------------------------------------------------*/ + u4_num_bytes_in_rbsp = 0; + c_count = 0; + +//first iteration + + u1_cur_byte = *pu1_nal_unit++; + + ui_word = ((ui_word << 8) | u1_cur_byte); + + c_count++; + if(u1_cur_byte != 0x00) + c_count = 0; + +//second iteration + + u1_cur_byte = *pu1_nal_unit++; + + ui_word = ((ui_word << 8) | u1_cur_byte); + u4_num_bytes_in_rbsp = 2; + + c_count++; + if(u1_cur_byte != 0x00) + c_count = 0; + + if(u4_numbytes_in_nal_unit > 2) + { + i = ((u4_numbytes_in_nal_unit - 3)); + } + + for(; i > 8; i -= 4) + { + +// loop 0 + u1_cur_byte = *pu1_nal_unit++; + + if(c_count == NUM_OF_ZERO_BYTES_BEFORE_START_CODE + && u1_cur_byte == EMULATION_PREVENTION_BYTE) + { + c_count = 0; + u1_cur_byte = *pu1_nal_unit++; + i--; + } + + ui_word = ((ui_word << 8) | u1_cur_byte); + *puc_bitstream_buffer = ui_word; + puc_bitstream_buffer++; + c_count++; + if(u1_cur_byte != 0x00) + c_count = 0; + +// loop 1 + u1_cur_byte = *pu1_nal_unit++; + + if(c_count == NUM_OF_ZERO_BYTES_BEFORE_START_CODE + && u1_cur_byte == EMULATION_PREVENTION_BYTE) + { + c_count = 0; + u1_cur_byte = *pu1_nal_unit++; + i--; + } + ui_word = ((ui_word << 8) | u1_cur_byte); + + c_count++; + if(u1_cur_byte != 0x00) + c_count = 0; + +// loop 2 + u1_cur_byte = *pu1_nal_unit++; + + if(c_count == NUM_OF_ZERO_BYTES_BEFORE_START_CODE + && u1_cur_byte == EMULATION_PREVENTION_BYTE) + { + c_count = 0; + u1_cur_byte = *pu1_nal_unit++; + i--; + } + + ui_word = ((ui_word << 8) | u1_cur_byte); + + c_count++; + if(u1_cur_byte != 0x00) + c_count = 0; + +// loop 3 + u1_cur_byte = *pu1_nal_unit++; + + if(c_count == NUM_OF_ZERO_BYTES_BEFORE_START_CODE + && u1_cur_byte == EMULATION_PREVENTION_BYTE) + { + c_count = 0; + u1_cur_byte = *pu1_nal_unit++; + i--; + } + + ui_word = ((ui_word << 8) | u1_cur_byte); + + c_count++; + if(u1_cur_byte != 0x00) + c_count = 0; + + u4_num_bytes_in_rbsp += 4; + + } + + for(; i > 0; i--) + { + u1_cur_byte = *pu1_nal_unit++; + + if(c_count == NUM_OF_ZERO_BYTES_BEFORE_START_CODE + && u1_cur_byte == EMULATION_PREVENTION_BYTE) + { + c_count = 0; + i--; + u1_cur_byte = *pu1_nal_unit++; + } + + ui_word = ((ui_word << 8) | u1_cur_byte); + u4_num_bytes_in_rbsp++; + + if((u4_num_bytes_in_rbsp & 0x03) == 0x03) + { + *puc_bitstream_buffer = ui_word; + puc_bitstream_buffer++; + } + c_count++; + if(u1_cur_byte != 0x00) + c_count = 0; + + } + + *puc_bitstream_buffer = (ui_word + << ((3 - (((u4_num_bytes_in_rbsp << 30) >> 30))) << 3)); + ps_bitstrm->u4_ofst = 0; + ps_bitstrm->u4_max_ofst = ((u4_num_bytes_in_rbsp + NAL_FIRST_BYTE_SIZE) << 3); + + return (u4_num_bytes_in_rbsp); +} + + +/*! + ************************************************************************** + * \if Function name : ih264d_rbsp_to_sodb \endif + * + * \brief + * This function converts RBSP to SODB. + * + * \param ps_bitstrm : Poiter to dec_bit_stream_t structure. + * + * \return + * None. + * + ************************************************************************** + */ +void ih264d_rbsp_to_sodb(dec_bit_stream_t *ps_bitstrm) +{ + UWORD32 ui_lastWord; + UWORD32 ui_word; + UWORD8 uc_lastByte; + WORD8 i; + + ui_lastWord = (ps_bitstrm->u4_max_ofst >> 5); + i = (ps_bitstrm->u4_max_ofst >> 3) & 0x03; + + if(i) + { + ui_word = ps_bitstrm->pu4_buffer[ui_lastWord]; + uc_lastByte = ((ui_word << ((i - 1) << 3)) >> 24); + } + else + { + ui_word = ps_bitstrm->pu4_buffer[ui_lastWord - 1]; + uc_lastByte = ((ui_word << 24) >> 24); + } + /*--------------------------------------------------------------------*/ + /* Find out the rbsp_stop_bit position in the last byte of rbsp */ + /*--------------------------------------------------------------------*/ + for(i = 0; (i < 8) && !CHECKBIT(uc_lastByte, i); ++i) + ; + ps_bitstrm->u4_max_ofst = ps_bitstrm->u4_max_ofst - (i + 1); +} diff --git a/decoder/ih264d_nal.h b/decoder/ih264d_nal.h new file mode 100755 index 0000000..3778881 --- /dev/null +++ b/decoder/ih264d_nal.h @@ -0,0 +1,56 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +#ifndef _IH264D_NAL_H_ +#define _IH264D_NAL_H_ + +/*! +************************************************************************* +* \file ih264d_nal.h +* +* \brief +* short_description +* +* Detailed_description +* +* \date +* 21/11/2002 +* +* \author AI +************************************************************************* +*/ +#include <stdio.h> +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264d_bitstrm.h" + +WORD32 ih264d_process_nal_unit(dec_bit_stream_t *ps_bitstrm, + UWORD8 *pu1_nal_unit, + UWORD32 u4_numbytes_in_nal_unit); +void ih264d_rbsp_to_sodb(dec_bit_stream_t *ps_bitstrm); +WORD32 ih264d_find_start_code(UWORD8 *pu1_buf, + UWORD32 u4_cur_pos, + UWORD32 u4_max_ofst, + UWORD32 *pu4_length_of_start_code, + UWORD32 *pu4_next_is_aud); + + +#endif /* _IH264D_NAL_H_ */ diff --git a/decoder/ih264d_parse_bslice.c b/decoder/ih264d_parse_bslice.c new file mode 100755 index 0000000..89cf5ed --- /dev/null +++ b/decoder/ih264d_parse_bslice.c @@ -0,0 +1,1696 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*! + ************************************************************************** + * \file ih264d_parse_bslice.c + * + * \brief + * Contains routines that decode a I slice type + * + * Detailed_description + * + * \date + * 07/07/2003 + * + * \author NS + ************************************************************************** + */ + +#include <string.h> +#include "ih264d_bitstrm.h" +#include "ih264d_defs.h" +#include "ih264d_debug.h" +#include "ih264d_tables.h" +#include "ih264d_structs.h" +#include "ih264d_defs.h" +#include "ih264d_parse_cavlc.h" +#include "ih264d_mb_utils.h" +#include "ih264d_parse_slice.h" +#include "ih264d_process_intra_mb.h" +#include "ih264d_mvpred.h" +#include "ih264d_parse_islice.h" +#include "ih264d_inter_pred.h" +#include "ih264d_process_pslice.h" +#include "ih264d_process_bslice.h" +#include "ih264d_deblocking.h" +#include "ih264d_cabac.h" +#include "ih264d_parse_mb_header.h" +#include "ih264d_error_handler.h" +#include "ih264d_mvpred.h" +#include "ih264d_cabac.h" +#include "ih264d_utils.h" + +void ih264d_init_cabac_contexts(UWORD8 u1_slice_type, dec_struct_t * ps_dec); + +/*! + ************************************************************************** + * \if Function name : ParseMb_SubMb_PredBCav\endif + * + * \brief + * Implements sub_mb_pred() of 7.3.5.2. & mb_pred() of 7.3.5.1 + * + * \return + * None. + * + ************************************************************************** + */ +WORD32 ih264d_parse_bmb_non_direct_cavlc(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info, + UWORD8 u1_mb_num, + UWORD8 u1_num_mbsNby2) +{ + dec_bit_stream_t * ps_bitstrm = ps_dec->ps_bitstrm; + UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer; + UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst; + UWORD8 * pu1_sub_mb_pred_modes = (UWORD8 *)(gau1_ih264d_submb_pred_modes) + 4; + const UWORD8 (*pu1_mb_pred_modes)[32] = + (const UWORD8 (*)[32])gau1_ih264d_mb_pred_modes; + const UWORD8 * pu1_num_mb_part = (const UWORD8 *)gau1_ih264d_num_mb_part; + const UWORD8 * pu1_sub_mb_mc_mode = (const UWORD8 *)(gau1_ih264d_submb_mc_mode) + + 4; + + parse_pmbarams_t * ps_parse_mb_data = ps_dec->ps_parse_mb_data + + u1_num_mbsNby2; + UWORD8 * pu1_col_info = ps_parse_mb_data->u1_col_info; + WORD8 (*pi1_ref_idx)[MAX_REFIDX_INFO_PER_MB] = ps_parse_mb_data->i1_ref_idx; + UWORD8 u1_mb_type = ps_cur_mb_info->u1_mb_type; + UWORD8 u1_mb_mc_mode, u1_num_mb_part, u1_sub_mb = !(u1_mb_type ^ B_8x8); + UWORD32 u4_mb_mc_mode = 0, u4_mb_pred_mode = 0; + WORD32 ret; + + if(u1_sub_mb) + { + UWORD8 uc_i; + u1_mb_mc_mode = 0; + u1_num_mb_part = 4; + /* Reading the subMB type */ + for(uc_i = 0; uc_i < 4; uc_i++) + { + + UWORD32 ui_sub_mb_mode; + +//Inlined ih264d_uev + UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst; + UWORD32 u4_word, u4_ldz; + + /***************************************************************/ + /* Find leading zeros in next 32 bits */ + /***************************************************************/ + NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf); + u4_ldz = CLZ(u4_word); + /* Flush the ps_bitstrm */ + u4_bitstream_offset += (u4_ldz + 1); + /* Read the suffix from the ps_bitstrm */ + u4_word = 0; + if(u4_ldz) + GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, + u4_ldz); + *pu4_bitstrm_ofst = u4_bitstream_offset; + ui_sub_mb_mode = ((1 << u4_ldz) + u4_word - 1); +//Inlined ih264d_uev + + if(ui_sub_mb_mode > 12) + return ERROR_SUB_MB_TYPE; + else + { + UWORD8 u1_subMbPredMode = pu1_sub_mb_pred_modes[ui_sub_mb_mode]; + u4_mb_mc_mode = (u4_mb_mc_mode << 8) + | pu1_sub_mb_mc_mode[ui_sub_mb_mode]; + u4_mb_pred_mode = (u4_mb_pred_mode << 8) | u1_subMbPredMode; + pi1_ref_idx[0][uc_i] = ((u1_subMbPredMode & PRED_L0) - 1) >> 1; + pi1_ref_idx[1][uc_i] = ((u1_subMbPredMode & PRED_L1) - 1) >> 1; + COPYTHECONTEXT("sub_mb_type", u1_subMbPredMode); + } + /* Storing collocated Mb and SubMb mode information */ + *pu1_col_info++ = ((PRED_8x8) << 6) + | ((pu1_sub_mb_mc_mode[ui_sub_mb_mode] << 4)); + if(ui_sub_mb_mode != B_DIRECT_8x8) + { + if(ui_sub_mb_mode > B_BI_8x8) + { + ps_dec->s_high_profile.u1_no_submb_part_size_lt8x8_flag = 0; + } + } + else if(!ps_dec->s_high_profile.u1_direct_8x8_inference_flag) + { + ps_dec->s_high_profile.u1_no_submb_part_size_lt8x8_flag = 0; + } + } + } + else + { + UWORD8 u1_mb_pred_mode_idx = 5 + u1_mb_type; + UWORD8 u1_mb_pred_mode_part0 = pu1_mb_pred_modes[0][u1_mb_pred_mode_idx]; + UWORD8 u1_mb_pred_mode_part1 = pu1_mb_pred_modes[1][u1_mb_pred_mode_idx]; + u1_mb_mc_mode = ps_cur_mb_info->u1_mb_mc_mode; + u1_num_mb_part = pu1_num_mb_part[u1_mb_mc_mode]; + + pi1_ref_idx[0][0] = ((u1_mb_pred_mode_part0 & PRED_L0) - 1) >> 1; + pi1_ref_idx[1][0] = ((u1_mb_pred_mode_part0 & PRED_L1) - 1) >> 1; + pi1_ref_idx[0][1] = ((u1_mb_pred_mode_part1 & PRED_L0) - 1) >> 1; + pi1_ref_idx[1][1] = ((u1_mb_pred_mode_part1 & PRED_L1) - 1) >> 1; + + u4_mb_pred_mode = (u1_mb_pred_mode_part0 << 8) | u1_mb_pred_mode_part1; + u4_mb_mc_mode = u1_mb_mc_mode | (u1_mb_mc_mode << 8); + u4_mb_mc_mode <<= 16; + u4_mb_pred_mode <<= 16; + + /* Storing collocated Mb and SubMb mode information */ + *pu1_col_info++ = (u1_mb_mc_mode << 6); + if(u1_mb_mc_mode) + *pu1_col_info++ = (u1_mb_mc_mode << 6); + } + + { + UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag; + UWORD8 uc_field = ps_cur_mb_info->u1_mb_field_decodingflag; + UWORD8 *pu1_num_ref_idx_lx_active = + ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active; + const UWORD8 u1_mbaff_field = (u1_mbaff & uc_field); + UWORD8 u4_num_ref_idx_lx_active; + + u4_num_ref_idx_lx_active = (pu1_num_ref_idx_lx_active[0] + << u1_mbaff_field) - 1; + + if(u4_num_ref_idx_lx_active) + { + if(1 == u4_num_ref_idx_lx_active) + ih264d_parse_bmb_ref_index_cavlc_range1( + u1_num_mb_part, ps_bitstrm, pi1_ref_idx[0], + u4_num_ref_idx_lx_active); + else + { + ret = ih264d_parse_bmb_ref_index_cavlc(u1_num_mb_part, ps_bitstrm, + pi1_ref_idx[0], + u4_num_ref_idx_lx_active); + if(ret != OK) + return ret; + } + } + + u4_num_ref_idx_lx_active = (pu1_num_ref_idx_lx_active[1] + << u1_mbaff_field) - 1; + + if(u4_num_ref_idx_lx_active) + { + if(1 == u4_num_ref_idx_lx_active) + ih264d_parse_bmb_ref_index_cavlc_range1( + u1_num_mb_part, ps_bitstrm, pi1_ref_idx[1], + u4_num_ref_idx_lx_active); + else + { + ret = ih264d_parse_bmb_ref_index_cavlc(u1_num_mb_part, ps_bitstrm, + pi1_ref_idx[1], + u4_num_ref_idx_lx_active); + if(ret != OK) + return ret; + } + } + } + + /* Read MotionVectors */ + { + const UWORD8 * pu1_top_left_sub_mb_indx; + + const UWORD8 * pu1_sub_mb_indx_mod = + (const UWORD8 *)(gau1_ih264d_submb_indx_mod) + + (u1_sub_mb * 6); + const UWORD8 * pu1_sub_mb_partw = (const UWORD8 *)gau1_ih264d_submb_partw; + const UWORD8 * pu1_sub_mb_parth = (const UWORD8 *)gau1_ih264d_submb_parth; + const UWORD8 * pu1_num_sub_mb_part = + (const UWORD8 *)gau1_ih264d_num_submb_part; + const UWORD8 * pu1_mb_partw = (const UWORD8 *)gau1_ih264d_mb_partw; + const UWORD8 * pu1_mb_parth = (const UWORD8 *)gau1_ih264d_mb_parth; + UWORD8 u1_p_idx = 0, u1_num_submb_part, uc_lx; + parse_part_params_t * ps_part; + mv_pred_t *ps_mv_start = ps_dec->ps_mv_cur + (u1_mb_num << 4); + UWORD8 u1_mb_part_wd, u1_mb_part_ht; + + /* Initialisations */ + ps_part = ps_dec->ps_part; + /* Default Initialization for Non subMb Case Mode */ + u1_mb_part_wd = pu1_mb_partw[u1_mb_mc_mode]; + u1_mb_part_ht = pu1_mb_parth[u1_mb_mc_mode]; + u1_num_submb_part = 1; + + /* Decoding the MV for the subMB */ + for(uc_lx = 0; uc_lx < 2; uc_lx++) + { + UWORD8 u1_sub_mb_num = 0, u1_pred_mode, uc_i; + UWORD32 u4_mb_mc_mode_tmp = u4_mb_mc_mode; + UWORD32 u4_mb_pred_mode_tmp = u4_mb_pred_mode; + UWORD16 u2_sub_mb_num = 0x028A; // for sub mb case + UWORD8 u1_b2 = uc_lx << 1; + u1_pred_mode = (uc_lx) ? PRED_L1 : PRED_L0; + pu1_top_left_sub_mb_indx = pu1_sub_mb_indx_mod + (u1_mb_mc_mode << 1); + + for(uc_i = 0; uc_i < u1_num_mb_part; uc_i++) + { + UWORD8 u1_mb_mc_mode, uc_j; + UWORD8 i1_pred = u4_mb_pred_mode_tmp >> 24; + u1_mb_mc_mode = u4_mb_mc_mode_tmp >> 24; + u4_mb_pred_mode_tmp <<= 8; + u4_mb_mc_mode_tmp <<= 8; + /* subMb prediction mode */ + if(u1_sub_mb) + { + + u1_mb_part_wd = pu1_sub_mb_partw[u1_mb_mc_mode]; + u1_mb_part_ht = pu1_sub_mb_parth[u1_mb_mc_mode]; + u1_sub_mb_num = u2_sub_mb_num >> 12; + u1_num_submb_part = pu1_num_sub_mb_part[u1_mb_mc_mode]; + pu1_top_left_sub_mb_indx = pu1_sub_mb_indx_mod + + (u1_mb_mc_mode << 1); + u2_sub_mb_num <<= 4; + } + for(uc_j = 0; uc_j < u1_num_submb_part; + uc_j++, pu1_top_left_sub_mb_indx++) + { + mv_pred_t * ps_mv; + u1_sub_mb_num = u1_sub_mb_num + *pu1_top_left_sub_mb_indx; + ps_mv = ps_mv_start + u1_sub_mb_num; + + /* Storing Info for partitions, writing only once */ + if(uc_lx) + { + ps_part->u1_is_direct = (!i1_pred); + ps_part->u1_pred_mode = i1_pred; + ps_part->u1_sub_mb_num = u1_sub_mb_num; + ps_part->u1_partheight = u1_mb_part_ht; + ps_part->u1_partwidth = u1_mb_part_wd; + /* Increment partition Index */ + u1_p_idx++; + ps_part++; + } + + if(i1_pred & u1_pred_mode) + { + WORD16 i2_mvx, i2_mvy; + +//inlining ih264d_sev + { + UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst; + UWORD32 u4_word, u4_ldz, u4_abs_val; + + /***************************************************************/ + /* Find leading zeros in next 32 bits */ + /***************************************************************/ + NEXTBITS_32(u4_word, u4_bitstream_offset, + pu4_bitstrm_buf); + u4_ldz = CLZ(u4_word); + + /* Flush the ps_bitstrm */ + u4_bitstream_offset += (u4_ldz + 1); + + /* Read the suffix from the ps_bitstrm */ + u4_word = 0; + if(u4_ldz) + GETBITS(u4_word, u4_bitstream_offset, + pu4_bitstrm_buf, u4_ldz); + + *pu4_bitstrm_ofst = u4_bitstream_offset; + u4_abs_val = ((1 << u4_ldz) + u4_word) >> 1; + + if(u4_word & 0x1) + i2_mvx = (-(WORD32)u4_abs_val); + else + i2_mvx = (u4_abs_val); + } +//inlinined ih264d_sev + +//inlining ih264d_sev + { + UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst; + UWORD32 u4_word, u4_ldz, u4_abs_val; + + /***************************************************************/ + /* Find leading zeros in next 32 bits */ + /***************************************************************/ + NEXTBITS_32(u4_word, u4_bitstream_offset, + pu4_bitstrm_buf); + u4_ldz = CLZ(u4_word); + + /* Flush the ps_bitstrm */ + u4_bitstream_offset += (u4_ldz + 1); + + /* Read the suffix from the ps_bitstrm */ + u4_word = 0; + if(u4_ldz) + GETBITS(u4_word, u4_bitstream_offset, + pu4_bitstrm_buf, u4_ldz); + + *pu4_bitstrm_ofst = u4_bitstream_offset; + u4_abs_val = ((1 << u4_ldz) + u4_word) >> 1; + + if(u4_word & 0x1) + i2_mvy = (-(WORD32)u4_abs_val); + else + i2_mvy = (u4_abs_val); + } +//inlinined ih264d_sev + + /* Storing Mv residuals */ + ps_mv->i2_mv[u1_b2] = i2_mvx; + ps_mv->i2_mv[u1_b2 + 1] = i2_mvy; + } + } + } + } + /* write back to the scratch partition info */ + ps_dec->ps_part = ps_part; + ps_parse_mb_data->u1_num_part = u1_sub_mb ? u1_p_idx : u1_num_mb_part; + + } + return OK; +} + +/*! + ************************************************************************** + * \if Function name : ParseMb_SubMb_PredBCab\endif + * + * \brief + * Implements sub_mb_pred() of 7.3.5.2. & mb_pred() of 7.3.5.1 + * + * \return + * None. + * + ************************************************************************** + */ + +WORD32 ih264d_parse_bmb_non_direct_cabac(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info, + UWORD8 u1_mb_num, + UWORD8 u1_num_mbsNby2) +{ + /* Loads from ps_dec */ + decoding_envirnoment_t * ps_cab_env = &ps_dec->s_cab_dec_env; + dec_bit_stream_t * ps_bitstrm = ps_dec->ps_bitstrm; + ctxt_inc_mb_info_t *p_curr_ctxt = ps_dec->ps_curr_ctxt_mb_info; + parse_pmbarams_t * ps_parse_mb_data = ps_dec->ps_parse_mb_data + + u1_num_mbsNby2; + + /* table pointer loads */ + const UWORD8 * pu1_sub_mb_pred_modes = (UWORD8 *)(gau1_ih264d_submb_pred_modes) + + 4; + const UWORD8 (*pu1_mb_pred_modes)[32] = + (const UWORD8 (*)[32])gau1_ih264d_mb_pred_modes; + const UWORD8 *pu1_num_mb_part = (const UWORD8 *)gau1_ih264d_num_mb_part; + const UWORD8 *pu1_sub_mb_mc_mode = (UWORD8 *)(gau1_ih264d_submb_mc_mode) + 4; + + const UWORD8 u1_mb_type = ps_cur_mb_info->u1_mb_type; + UWORD8 * pu1_col_info = ps_parse_mb_data->u1_col_info; + WORD8 *pi1_ref_idx_l0 = &ps_parse_mb_data->i1_ref_idx[0][0]; + WORD8 *pi1_ref_idx_l1 = &ps_parse_mb_data->i1_ref_idx[1][0]; + UWORD8 u1_dec_ref_l0, u1_dec_ref_l1; + + UWORD8 u1_num_mb_part, u1_mb_mc_mode, u1_sub_mb, u1_mbpred_mode = 5 + + u1_mb_type; + UWORD32 u4_mb_mc_mode = 0, u4_mb_pred_mode = 0; + WORD32 ret; + + p_curr_ctxt->u1_mb_type = CAB_NON_BD16x16; + u1_sub_mb = !(u1_mb_type ^ B_8x8); + + { + UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag; + UWORD8 *pu1_num_ref_idx_lx_active = + ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active; + UWORD8 uc_field = ps_cur_mb_info->u1_mb_field_decodingflag; + UWORD8 u1_mbaff_field = (u1_mbaff & uc_field); + u1_dec_ref_l0 = (pu1_num_ref_idx_lx_active[0] << u1_mbaff_field) - 1; + u1_dec_ref_l1 = (pu1_num_ref_idx_lx_active[1] << u1_mbaff_field) - 1; + } + + if(u1_sub_mb) + { + const UWORD8 u1_colz = ((PRED_8x8) << 6); + UWORD8 uc_i; + u1_mb_mc_mode = 0; + u1_num_mb_part = 4; + /* Reading the subMB type */ + for(uc_i = 0; uc_i < 4; uc_i++) + { + UWORD8 u1_sub_mb_mode, u1_subMbPredModes; + u1_sub_mb_mode = ih264d_parse_submb_type_cabac( + 1, ps_cab_env, ps_bitstrm, + ps_dec->p_sub_mb_type_t); + + if(u1_sub_mb_mode > 12) + return ERROR_SUB_MB_TYPE; + + u1_subMbPredModes = pu1_sub_mb_pred_modes[u1_sub_mb_mode]; + u4_mb_mc_mode = (u4_mb_mc_mode << 8) | pu1_sub_mb_mc_mode[u1_sub_mb_mode]; + u4_mb_pred_mode = (u4_mb_pred_mode << 8) | u1_subMbPredModes; + *pi1_ref_idx_l0++ = + (u1_subMbPredModes & PRED_L0) ? u1_dec_ref_l0 : -1; + *pi1_ref_idx_l1++ = + (u1_subMbPredModes & PRED_L1) ? u1_dec_ref_l1 : -1; + COPYTHECONTEXT("sub_mb_type", u1_sub_mb_mode); + /* Storing collocated Mb and SubMb mode information */ + *pu1_col_info++ = + (u1_colz | (pu1_sub_mb_mc_mode[u1_sub_mb_mode] << 4)); + if(u1_sub_mb_mode != B_DIRECT_8x8) + { + if(u1_sub_mb_mode > B_BI_8x8) + { + ps_dec->s_high_profile.u1_no_submb_part_size_lt8x8_flag = 0; + } + } + else if(!ps_dec->s_high_profile.u1_direct_8x8_inference_flag) + { + ps_dec->s_high_profile.u1_no_submb_part_size_lt8x8_flag = 0; + } + } + pi1_ref_idx_l0 -= 4; + pi1_ref_idx_l1 -= 4; + } + else + { + UWORD8 u1_mb_pred_mode_part0 = pu1_mb_pred_modes[0][u1_mbpred_mode]; + UWORD8 u1_mb_pred_mode_part1 = pu1_mb_pred_modes[1][u1_mbpred_mode]; + u1_mb_mc_mode = ps_cur_mb_info->u1_mb_mc_mode; + u1_num_mb_part = pu1_num_mb_part[u1_mb_mc_mode]; + /* Storing collocated Mb and SubMb mode information */ + *pu1_col_info++ = (u1_mb_mc_mode << 6); + if(u1_mb_mc_mode) + *pu1_col_info++ = (u1_mb_mc_mode << 6); + u4_mb_mc_mode = u1_mb_mc_mode | (u1_mb_mc_mode << 8); + u4_mb_mc_mode <<= 16; + u4_mb_pred_mode = ((u1_mb_pred_mode_part0 << 8) | u1_mb_pred_mode_part1) << 16; + + *pi1_ref_idx_l0++ = (u1_mb_pred_mode_part0 & PRED_L0) ? u1_dec_ref_l0 : -1; + *pi1_ref_idx_l0-- = (u1_mb_pred_mode_part1 & PRED_L0) ? u1_dec_ref_l0 : -1; + *pi1_ref_idx_l1++ = (u1_mb_pred_mode_part0 & PRED_L1) ? u1_dec_ref_l1 : -1; + *pi1_ref_idx_l1-- = (u1_mb_pred_mode_part1 & PRED_L1) ? u1_dec_ref_l1 : -1; + } + { + WORD8 *pi1_lft_cxt = ps_dec->pi1_left_ref_idx_ctxt_inc; + WORD8 *pi1_top_cxt = p_curr_ctxt->i1_ref_idx; + + ret = ih264d_parse_ref_idx_cabac(u1_num_mb_part, 0, u1_dec_ref_l0, + u1_mb_mc_mode, pi1_ref_idx_l0, pi1_lft_cxt, + pi1_top_cxt, ps_cab_env, ps_bitstrm, + ps_dec->p_ref_idx_t); + if(ret != OK) + return ret; + + ret = ih264d_parse_ref_idx_cabac(u1_num_mb_part, 2, u1_dec_ref_l1, + u1_mb_mc_mode, pi1_ref_idx_l1, pi1_lft_cxt, + pi1_top_cxt, ps_cab_env, ps_bitstrm, + ps_dec->p_ref_idx_t); + if(ret != OK) + return ret; + } + /* Read MotionVectors */ + { + const UWORD8 *pu1_top_left_sub_mb_indx; + UWORD8 uc_j, uc_lx; + UWORD8 u1_mb_part_wd, u1_mb_part_ht; + + const UWORD8 *pu1_sub_mb_indx_mod = + (const UWORD8 *)gau1_ih264d_submb_indx_mod + + (u1_sub_mb * 6); + const UWORD8 *pu1_sub_mb_partw = (const UWORD8 *)gau1_ih264d_submb_partw; + const UWORD8 *pu1_sub_mb_parth = (const UWORD8 *)gau1_ih264d_submb_parth; + const UWORD8 *pu1_num_sub_mb_part = + (const UWORD8 *)gau1_ih264d_num_submb_part; + const UWORD8 *pu1_mb_partw = (const UWORD8 *)gau1_ih264d_mb_partw; + const UWORD8 *pu1_mb_parth = (const UWORD8 *)gau1_ih264d_mb_parth; + + UWORD8 u1_p_idx = 0; + UWORD8 u1_num_submb_part; + parse_part_params_t *ps_part; + /* Initialisations */ + mv_pred_t *ps_mv_start = ps_dec->ps_mv_cur + (u1_mb_num << 4); + ps_part = ps_dec->ps_part; + + /* Default initialization for non subMb case */ + u1_mb_part_wd = pu1_mb_partw[u1_mb_mc_mode]; + u1_mb_part_ht = pu1_mb_parth[u1_mb_mc_mode]; + u1_num_submb_part = 1; + + /* Decoding the MV for the subMB */ + for(uc_lx = 0; uc_lx < 2; uc_lx++) + { + UWORD8 u1_sub_mb_num = 0; + UWORD32 u4_mb_pred_mode_tmp = u4_mb_pred_mode; + UWORD32 u4_mb_mc_mode_tmp = u4_mb_mc_mode; + UWORD8 u1_mb_mc_mode_1, u1_pred_mode, uc_i; + UWORD16 u2_sub_mb_num = 0x028A; + UWORD8 u1_b2 = uc_lx << 1; + u1_pred_mode = (uc_lx) ? PRED_L1 : PRED_L0; + /* Default for Cabac */ + pu1_top_left_sub_mb_indx = pu1_sub_mb_indx_mod + (u1_mb_mc_mode << 1); + for(uc_i = 0; uc_i < u1_num_mb_part; uc_i++) + { + + WORD8 i1_pred = (UWORD8)(u4_mb_pred_mode_tmp >> 24); + u1_mb_mc_mode_1 = (UWORD8)(u4_mb_mc_mode_tmp >> 24); + u4_mb_pred_mode_tmp <<= 8; + u4_mb_mc_mode_tmp <<= 8; + + /* subMb prediction mode */ + if(u1_sub_mb) + { + u1_mb_part_wd = pu1_sub_mb_partw[u1_mb_mc_mode_1]; + u1_mb_part_ht = pu1_sub_mb_parth[u1_mb_mc_mode_1]; + u1_sub_mb_num = u2_sub_mb_num >> 12; + pu1_top_left_sub_mb_indx = pu1_sub_mb_indx_mod + (u1_mb_mc_mode_1 << 1); + u1_num_submb_part = pu1_num_sub_mb_part[u1_mb_mc_mode_1]; + u2_sub_mb_num = u2_sub_mb_num << 4; + } + + for(uc_j = 0; uc_j < u1_num_submb_part; + uc_j++, pu1_top_left_sub_mb_indx++) + { + mv_pred_t *ps_mv; + u1_sub_mb_num = u1_sub_mb_num + *pu1_top_left_sub_mb_indx; + ps_mv = ps_mv_start + u1_sub_mb_num; + + /* Storing Info for partitions, writing only once */ + if(uc_lx) + { + ps_part->u1_is_direct = (!i1_pred); + ps_part->u1_pred_mode = i1_pred; + ps_part->u1_sub_mb_num = u1_sub_mb_num; + ps_part->u1_partheight = u1_mb_part_ht; + ps_part->u1_partwidth = u1_mb_part_wd; + + /* Increment partition Index */ + u1_p_idx++; + ps_part++; + } + + ih264d_get_mvd_cabac(u1_sub_mb_num, u1_b2, u1_mb_part_wd, + u1_mb_part_ht, + (UWORD8)(i1_pred & u1_pred_mode), ps_dec, + ps_mv); + } + } + } + /* write back to the scratch partition info */ + + ps_dec->ps_part = ps_part; + ps_parse_mb_data->u1_num_part = u1_sub_mb ? u1_p_idx : u1_num_mb_part; + + } + + return OK; +} + +/*! + ************************************************************************** + * \if Function name : ih264d_parse_bmb_cabac \endif + * + * \brief + * This function parses CABAC syntax of a B MB. + * + * \return + * 0 on Success and Error code otherwise + ************************************************************************** + */ +WORD32 ih264d_parse_bmb_cabac(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info, + UWORD8 u1_mb_num, + UWORD8 u1_num_mbsNby2) +{ + UWORD8 u1_cbp; + deblk_mb_t * ps_cur_deblk_mb = ps_dec->ps_deblk_mbn + u1_mb_num; + const UWORD8 *puc_mb_mc_mode = (const UWORD8 *)gau1_ih264d_mb_mc_mode; + UWORD8 u1_mb_type = ps_cur_mb_info->u1_mb_type; + ctxt_inc_mb_info_t *p_curr_ctxt = ps_dec->ps_curr_ctxt_mb_info; + + WORD32 ret; + UWORD8 u1_Bdirect_tranform_read = 1; + ps_dec->s_high_profile.u1_no_submb_part_size_lt8x8_flag = 1; + + ps_cur_mb_info->u1_mb_mc_mode = puc_mb_mc_mode[5 + u1_mb_type]; + + ps_cur_mb_info->u1_yuv_dc_block_flag = 0; + + ps_cur_deblk_mb->u1_mb_type |= D_B_SLICE; + if(u1_mb_type != B_DIRECT) + { + ret = ih264d_parse_bmb_non_direct_cabac(ps_dec, ps_cur_mb_info, u1_mb_num, + u1_num_mbsNby2); + if(ret != OK) + return ret; + } + else + { + + /************ STORING PARTITION INFO ***********/ + parse_part_params_t * ps_part_info; + ps_part_info = ps_dec->ps_part; + ps_part_info->u1_is_direct = PART_DIRECT_16x16; + ps_part_info->u1_sub_mb_num = 0; + ps_dec->ps_part++; + p_curr_ctxt->u1_mb_type = CAB_BD16x16; + + MEMSET_16BYTES(&ps_dec->pu1_left_mv_ctxt_inc[0][0], 0); + memset(ps_dec->pi1_left_ref_idx_ctxt_inc, 0, 4); + MEMSET_16BYTES(p_curr_ctxt->u1_mv, 0); + memset(p_curr_ctxt->i1_ref_idx, 0, 4); + + /* check whether transform8x8 u4_flag to be read or not */ + u1_Bdirect_tranform_read = + ps_dec->s_high_profile.u1_direct_8x8_inference_flag; + } + + /* Read the Coded block pattern */ + u1_cbp = (WORD8)ih264d_parse_ctx_cbp_cabac(ps_dec); + p_curr_ctxt->u1_cbp = u1_cbp; + ps_cur_mb_info->u1_cbp = u1_cbp; + + if(u1_cbp > 47) + return ERROR_CBP; + + COPYTHECONTEXT("coded_block_pattern", u1_cbp); + + ps_cur_mb_info->u1_tran_form8x8 = 0; + ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0; + + if((ps_dec->s_high_profile.u1_transform8x8_present) && (u1_cbp & (0xf)) + && (ps_dec->s_high_profile.u1_no_submb_part_size_lt8x8_flag) + && (u1_Bdirect_tranform_read)) + { + ps_cur_mb_info->u1_tran_form8x8 = ih264d_parse_transform8x8flag_cabac( + ps_dec, ps_cur_mb_info); + COPYTHECONTEXT("transform_size_8x8_flag", ps_cur_mb_info->u1_tran_form8x8); + + ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = ps_cur_mb_info->u1_tran_form8x8; + p_curr_ctxt->u1_transform8x8_ctxt = ps_cur_mb_info->u1_tran_form8x8; + } + else + { + p_curr_ctxt->u1_transform8x8_ctxt = 0; + } + + p_curr_ctxt->u1_intra_chroma_pred_mode = 0; + p_curr_ctxt->u1_yuv_dc_csbp &= 0xFE; + ps_dec->pu1_left_yuv_dc_csbp[0] &= 0x6; + + /* Read mb_qp_delta */ + if(u1_cbp) + { + WORD8 c_temp; + ret = ih264d_parse_mb_qp_delta_cabac(ps_dec, &c_temp); + if(ret != OK) + return ret; + COPYTHECONTEXT("mb_qp_delta", c_temp); + if(c_temp) + { + ret = ih264d_update_qp(ps_dec, c_temp); + if(ret != OK) + return ret; + } + } + else + ps_dec->i1_prev_mb_qp_delta = 0; + + ih264d_parse_residual4x4_cabac(ps_dec, ps_cur_mb_info, 0); + if(EXCEED_OFFSET(ps_dec->ps_bitstrm)) + return ERROR_EOB_TERMINATE_T; + return OK; +} +/*! + ************************************************************************** + * \if Function name : ih264d_parse_bmb_cavlc \endif + * + * \brief + * This function parses CAVLC syntax of a B MB. + * + * \return + * 0 on Success and Error code otherwise + ************************************************************************** + */ +WORD32 ih264d_parse_bmb_cavlc(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info, + UWORD8 u1_mb_num, + UWORD8 u1_num_mbsNby2) +{ + UWORD32 u4_cbp; + deblk_mb_t * ps_cur_deblk_mb = ps_dec->ps_deblk_mbn + u1_mb_num; + dec_bit_stream_t * const ps_bitstrm = ps_dec->ps_bitstrm; + UWORD32 * pu4_bitstrm_buf = ps_bitstrm->pu4_buffer; + UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst; + const UWORD8 *puc_mb_mc_mode = (const UWORD8 *)gau1_ih264d_mb_mc_mode; + UWORD8 u1_mb_type = ps_cur_mb_info->u1_mb_type; + + WORD32 ret; + UWORD8 u1_Bdirect_tranform_read = 1; + ps_dec->s_high_profile.u1_no_submb_part_size_lt8x8_flag = 1; + ps_cur_mb_info->u1_tran_form8x8 = 0; + ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0; + + ps_cur_mb_info->u1_yuv_dc_block_flag = 0; + + ps_cur_mb_info->u1_mb_mc_mode = puc_mb_mc_mode[5 + u1_mb_type]; + + ps_cur_deblk_mb->u1_mb_type |= D_B_SLICE; + if(u1_mb_type != B_DIRECT) + { + ret = ih264d_parse_bmb_non_direct_cavlc(ps_dec, ps_cur_mb_info, u1_mb_num, + u1_num_mbsNby2); + if(ret != OK) + return ret; + } + else + { + /************ STORING PARTITION INFO ***********/ + parse_part_params_t * ps_part_info; + ps_part_info = ps_dec->ps_part; + ps_part_info->u1_is_direct = PART_DIRECT_16x16; + ps_part_info->u1_sub_mb_num = 0; + ps_dec->ps_part++; + /* check whether transform8x8 u4_flag to be read or not */ + u1_Bdirect_tranform_read = + ps_dec->s_high_profile.u1_direct_8x8_inference_flag; + } + + /* Read the Coded block pattern */ + { + const UWORD8 * puc_CbpInter = gau1_ih264d_cbp_inter; +//Inlined ih264d_uev + UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst; + UWORD32 u4_word, u4_ldz; + + /***************************************************************/ + /* Find leading zeros in next 32 bits */ + /***************************************************************/ + NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf); + u4_ldz = CLZ(u4_word); + /* Flush the ps_bitstrm */ + u4_bitstream_offset += (u4_ldz + 1); + /* Read the suffix from the ps_bitstrm */ + u4_word = 0; + if(u4_ldz) + GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, u4_ldz); + *pu4_bitstrm_ofst = u4_bitstream_offset; + u4_cbp = ((1 << u4_ldz) + u4_word - 1); +//Inlined ih264d_uev + if(u4_cbp > 47) + return ERROR_CBP; + u4_cbp = puc_CbpInter[u4_cbp]; + + if((ps_dec->s_high_profile.u1_transform8x8_present) && (u4_cbp & (0xf)) + && (ps_dec->s_high_profile.u1_no_submb_part_size_lt8x8_flag) + && (u1_Bdirect_tranform_read)) + { + ps_cur_mb_info->u1_tran_form8x8 = ih264d_get_bit_h264(ps_bitstrm); + COPYTHECONTEXT("transform_size_8x8_flag", ps_cur_mb_info->u1_tran_form8x8); + ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = ps_cur_mb_info->u1_tran_form8x8; + } + + } + + COPYTHECONTEXT("coded_block_pattern", u4_cbp); + ps_cur_mb_info->u1_cbp = u4_cbp; + + /* Read mb_qp_delta */ + if(u4_cbp) + { + WORD32 i_temp; +//inlining ih264d_sev + + UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst; + UWORD32 u4_word, u4_ldz, u4_abs_val; + + /***************************************************************/ + /* Find leading zeros in next 32 bits */ + /***************************************************************/ + NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf); + u4_ldz = CLZ(u4_word); + + /* Flush the ps_bitstrm */ + u4_bitstream_offset += (u4_ldz + 1); + + /* Read the suffix from the ps_bitstrm */ + u4_word = 0; + if(u4_ldz) + GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, u4_ldz); + + *pu4_bitstrm_ofst = u4_bitstream_offset; + u4_abs_val = ((1 << u4_ldz) + u4_word) >> 1; + + if(u4_word & 0x1) + i_temp = (-(WORD32)u4_abs_val); + else + i_temp = (u4_abs_val); + + if(i_temp < -26 || i_temp > 25) + return ERROR_INV_RANGE_QP_T; +//inlinined ih264d_sev + COPYTHECONTEXT("mb_qp_delta", i_temp); + if(i_temp) + { + ret = ih264d_update_qp(ps_dec, (WORD8)i_temp); + if(ret != OK) + return ret; + } + + ret = ih264d_parse_residual4x4_cavlc(ps_dec, ps_cur_mb_info, 0); + if(ret != OK) + return ret; + if(EXCEED_OFFSET(ps_bitstrm)) + return ERROR_EOB_TERMINATE_T; + } + else + { + ps_dec->i1_prev_mb_qp_delta = 0; + ih264d_update_nnz_for_skipmb(ps_dec, ps_cur_mb_info, CAVLC); + } + + return OK; +} + +WORD32 ih264d_mv_pred_ref_tfr_nby2_bmb(dec_struct_t * ps_dec, + UWORD8 u1_mb_idx, + UWORD8 u1_num_mbs) +{ + parse_pmbarams_t * ps_mb_part_info; + parse_part_params_t * ps_part; + mv_pred_t *ps_mv_nmb, *ps_mv_nmb_start, *ps_mv_ntop, *ps_mv_ntop_start; + pic_buffer_t * ps_ref_frame; + UWORD8 u1_direct_mode_width; + UWORD8 i, j; + dec_mb_info_t * ps_cur_mb_info; + const UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag; + UWORD8 u1_field; + WORD32 ret = 0; + + ps_dec->i4_submb_ofst -= (u1_num_mbs - u1_mb_idx) << 4; + ps_mb_part_info = ps_dec->ps_parse_mb_data; + ps_part = ps_dec->ps_parse_part_params; + + /* N/2 Mb MvPred and Transfer Setup Loop */ + for(i = u1_mb_idx; i < u1_num_mbs; i++, ps_mb_part_info++) + { + UWORD8 u1_colz = 0; + ps_dec->i4_submb_ofst += SUB_BLK_SIZE; + /* Restore the slice scratch MbX and MbY context */ + ps_cur_mb_info = ps_dec->ps_nmb_info + i; + ps_dec->u2_wait_id = i; + + u1_field = ps_cur_mb_info->u1_mb_field_decodingflag; + + ps_mv_nmb_start = ps_dec->ps_mv_cur + (i << 4); + ps_dec->u2_mbx = ps_cur_mb_info->u2_mbx; + ps_dec->u2_mby = ps_cur_mb_info->u2_mby; + ps_dec->u1_currB_type = 0; + ps_dec->u2_mv_2mb[i & 0x1] = 0; + + /* Look for MV Prediction and Reference Transfer in Non-I Mbs */ + if(!ps_mb_part_info->u1_isI_mb) + { + UWORD8 u1_blk_no; + WORD16 i1_ref_idx, i1_ref_idx1; + UWORD8 u1_pred_mode; + UWORD8 u1_sub_mb_x, u1_sub_mb_y, u1_sub_mb_num; + UWORD8 u1_lx, u1_lx_start, u1_lxend, u1_tmp_lx; + UWORD8 u1_num_part, u1_num_ref, u1_wd, u1_ht; + UWORD32 *pu4_wt_offst; + UWORD8 u1_scale_ref, u4_bot_mb; + deblk_mb_t * ps_cur_deblk_mb = ps_dec->ps_deblk_mbn + i; + WORD8 (*pi1_ref_idx)[MAX_REFIDX_INFO_PER_MB] = + ps_mb_part_info->i1_ref_idx; + WORD8 *pi1_ref_idx0 = pi1_ref_idx[0], + *pi1_ref_idx1 = pi1_ref_idx[1]; + UWORD32 **ppu4_wt_ofst = ps_mb_part_info->pu4_wt_offst; + + /* MB Level initialisations */ + ps_dec->u4_num_pmbair = i >> u1_mbaff; + ps_dec->u1_mb_idx_mv = i; + + /* CHANGED CODE */ + ps_mv_ntop_start = ps_mv_nmb_start + - (ps_dec->u2_frm_wd_in_mbs << (4 + u1_mbaff)) + 12; + + u1_num_part = ps_mb_part_info->u1_num_part; + ps_cur_deblk_mb->u1_mb_type |= (u1_num_part > 1) << 1; + u1_direct_mode_width = (1 == ps_mb_part_info->u1_num_part) ? 16 : 8; + + + ps_cur_mb_info->u4_pred_info_pkd_idx = ps_dec->u4_pred_info_pkd_idx; + ps_cur_mb_info->u1_num_pred_parts = 0; + + /****************************************************/ + /* weighted u4_ofst pointer calculations, this loop */ + /* runs maximum 4 times, even in direct cases */ + /****************************************************/ + u1_scale_ref = u1_mbaff & ps_cur_mb_info->u1_mb_field_decodingflag; + u4_bot_mb = 1 - ps_cur_mb_info->u1_topmb; + if(ps_dec->ps_cur_pps->u1_wted_bipred_idc) + { + u1_num_ref = MIN(u1_num_part, 4); + if(PART_DIRECT_16x16 != ps_part->u1_is_direct) + { + for(u1_blk_no = 0; u1_blk_no < u1_num_ref; u1_blk_no++) + { + i1_ref_idx = MAX(pi1_ref_idx0[u1_blk_no], 0); + if(u1_scale_ref) + i1_ref_idx >>= 1; + i1_ref_idx *= + ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1]; + if(u1_scale_ref) + i1_ref_idx += + (MAX(pi1_ref_idx1[u1_blk_no], 0) + >> 1); + else + i1_ref_idx += MAX(pi1_ref_idx1[u1_blk_no], 0); + pu4_wt_offst = (UWORD32*)&ps_dec->pu4_wt_ofsts[2 + * X3(i1_ref_idx)]; + + if(pi1_ref_idx0[u1_blk_no] < 0) + pu4_wt_offst += 1; + + ppu4_wt_ofst[u1_blk_no] = pu4_wt_offst; + if(u1_scale_ref + && (ps_dec->ps_cur_pps->u1_wted_bipred_idc + == 2)) + { + i1_ref_idx = MAX(pi1_ref_idx0[u1_blk_no], 0); + i1_ref_idx *= + (ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1] + << 1); + i1_ref_idx += MAX(pi1_ref_idx1[u1_blk_no], 0); + if(u4_bot_mb) + { + i1_ref_idx += + (ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[0] + << 1) + * (ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1] + << 1); + } + pu4_wt_offst = (UWORD32*)&ps_dec->pu4_mbaff_wt_mat[2 + * X3(i1_ref_idx)]; + ppu4_wt_ofst[u1_blk_no] = pu4_wt_offst; + } + } + } + } + + /**************************************************/ + /* Loop on Partitions */ + /* direct mode is reflected as a single partition */ + /**************************************************/ + ps_dec->u4_dma_buf_idx = 0; + for(j = 0; j < u1_num_part; j++, ps_part++) + { + u1_sub_mb_num = ps_part->u1_sub_mb_num; + ps_dec->u1_sub_mb_num = u1_sub_mb_num; + + if(PART_NOT_DIRECT != ps_part->u1_is_direct) + { + /**************************************************/ + /* Direct Mode, Call DecodeSpatial/TemporalDirect */ + /* only (those will in turn call FormMbPartInfo) */ + /**************************************************/ + ret = ps_dec->ps_cur_slice->pf_decodeDirect(ps_dec, + u1_direct_mode_width, + ps_cur_mb_info, i); + if(ret != OK) + return ret; + ps_cur_deblk_mb->u1_mb_type |= (ps_dec->u1_currB_type << 1); + + } + else + { + mv_pred_t s_mvPred; + /**************************************************/ + /* Non Direct Mode, Call Motion Vector Predictor */ + /* and FormMbpartInfo */ + /**************************************************/ + u1_sub_mb_x = u1_sub_mb_num & 0x03; + u1_sub_mb_y = u1_sub_mb_num >> 2; + u1_blk_no = + (u1_num_part < 4) ? + j : + (((u1_sub_mb_y >> 1) << 1) + + (u1_sub_mb_x + >> 1)); + + ps_mv_ntop = ps_mv_ntop_start + u1_sub_mb_x; + ps_mv_nmb = ps_mv_nmb_start + u1_sub_mb_num; + + u1_pred_mode = ps_part->u1_pred_mode; + u1_wd = ps_part->u1_partwidth; + u1_ht = ps_part->u1_partheight; + + u1_lx_start = 0; + u1_lxend = 2; + if( PRED_L0 == u1_pred_mode) + { + s_mvPred.i2_mv[2] = 0; + s_mvPred.i2_mv[3] = 0; + u1_lxend = 1; + } + if( PRED_L1 == u1_pred_mode) + { + s_mvPred.i2_mv[0] = 0; + s_mvPred.i2_mv[1] = 0; + u1_lx_start = 1; + } + + /* Populate the colpic info and reference frames */ + s_mvPred.i1_ref_frame[0] = pi1_ref_idx0[u1_blk_no]; + s_mvPred.i1_ref_frame[1] = pi1_ref_idx1[u1_blk_no]; + + ps_dec->pf_mvpred(ps_dec, ps_cur_mb_info, ps_mv_nmb, ps_mv_ntop, + &s_mvPred, u1_sub_mb_num, u1_wd, + u1_lx_start, u1_lxend, + ps_cur_mb_info->u1_mb_mc_mode); + + /**********************************************************/ + /* Loop on number of predictors, 1 Each for Forw Backw */ + /* Loop 2 times for BiDirect mode */ + /**********************************************************/ + for(u1_lx = u1_lx_start; u1_lx < u1_lxend; u1_lx++) + { + WORD16 i2_mv_x, i2_mv_y; + + /********************************************************/ + /* Predict Mv */ + /* Add Mv Residuals and store back */ + /********************************************************/ + i1_ref_idx = s_mvPred.i1_ref_frame[u1_lx]; + u1_tmp_lx = (u1_lx << 1); + + i2_mv_x = ps_mv_nmb->i2_mv[u1_tmp_lx]; + i2_mv_y = ps_mv_nmb->i2_mv[u1_tmp_lx + 1]; + + i2_mv_x += s_mvPred.i2_mv[u1_tmp_lx]; + i2_mv_y += s_mvPred.i2_mv[u1_tmp_lx + 1]; + s_mvPred.i2_mv[u1_tmp_lx] = i2_mv_x; + s_mvPred.i2_mv[u1_tmp_lx + 1] = i2_mv_y; + + /********************************************************/ + /* Transfer setup call */ + /* convert RefIdx if it is MbAff */ + /* Pass Weight Offset and refFrame */ + /********************************************************/ + i1_ref_idx1 = i1_ref_idx >> u1_scale_ref; + if(u1_scale_ref && ((i1_ref_idx & 0x01) != u4_bot_mb)) + i1_ref_idx1 += MAX_REF_BUFS; + ps_ref_frame = + ps_dec->ps_ref_pic_buf_lx[u1_lx][i1_ref_idx1]; + + /* Storing Colocated-Zero u4_flag */ + if(u1_lx == u1_lx_start) + { + /* Fill colocated info in MvPred structure */ + s_mvPred.u1_col_ref_pic_idx = + ps_ref_frame->u1_mv_buf_id; + s_mvPred.u1_pic_type = ps_ref_frame->u1_pic_type; + + /* Calculating colocated zero information */ + u1_colz = + (u1_field << 1) + | ((i1_ref_idx == 0) + && (ABS(i2_mv_x) + <= 1) + && (ABS(i2_mv_y) + <= 1)); + u1_colz |= ps_mb_part_info->u1_col_info[u1_blk_no]; + } + + pu4_wt_offst = ppu4_wt_ofst[u1_blk_no]; + { + pred_info_pkd_t *ps_pred_pkd; + WORD16 i2_mv[2]; + + i2_mv[0] = i2_mv_x; + i2_mv[1] = i2_mv_y; + + ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx; + ih264d_fill_pred_info(i2_mv,u1_wd,u1_ht,u1_sub_mb_num,u1_pred_mode, + ps_pred_pkd,ps_ref_frame->u1_pic_buf_id,i1_ref_idx,pu4_wt_offst, + ps_ref_frame->u1_pic_type); + ps_dec->u4_pred_info_pkd_idx++; + ps_cur_mb_info->u1_num_pred_parts++; + + + } + + } + ih264d_rep_mv_colz(ps_dec, &s_mvPred, ps_mv_nmb, + u1_sub_mb_num, u1_colz, u1_ht, + u1_wd); + } + } + + } + else + { + /* Set zero values in case of Intra Mbs */ + mv_pred_t s_mvPred = + { + { 0, 0, 0, 0 }, + { -1, -1 }, 0, 0}; + /* Storing colocated zero information */ + ih264d_rep_mv_colz(ps_dec, &s_mvPred, ps_mv_nmb_start, 0, + (UWORD8)(u1_field << 1), 4, 4); + } + + /*if num _cores is set to 3 ,compute bs will be done in another thread*/ + if(ps_dec->u4_num_cores < 3) + { + if(ps_dec->u4_app_disable_deblk_frm == 0) + ps_dec->pf_compute_bs(ps_dec, ps_cur_mb_info, + (UWORD16)(i >> u1_mbaff)); + } + } + return OK; +} +/*! + ************************************************************************** + * \if Function name : ih264d_get_implicit_weights \endif + * + * \brief + * Calculates Implicit Weights. + * + * \return + * None + * + ************************************************************************** + */ +void ih264d_get_implicit_weights(dec_struct_t *ps_dec) +{ + UWORD32 *pu4_iwt_ofst; + UWORD8 i, j; + struct pic_buffer_t *ps_pic_buff0, *ps_pic_buff1; + WORD16 i2_dist_scale_factor; + WORD16 i16_tb, i16_td, i16_tx; + UWORD32 u4_poc0, u4_poc1; + UWORD32 ui_temp0, ui_temp1; + UWORD8 uc_num_ref_idx_l0_active, uc_num_ref_idx_l1_active; + + pu4_iwt_ofst = ps_dec->pu4_wts_ofsts_mat; + uc_num_ref_idx_l0_active = + ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[0]; + uc_num_ref_idx_l1_active = + ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1]; + + for(i = 0; i < uc_num_ref_idx_l0_active; i++) + { + ps_pic_buff0 = ps_dec->ps_ref_pic_buf_lx[0][i]; + u4_poc0 = ps_pic_buff0->i4_avg_poc; + for(j = 0; j < uc_num_ref_idx_l1_active; j++) + { + ps_pic_buff1 = ps_dec->ps_ref_pic_buf_lx[1][j]; + u4_poc1 = ps_pic_buff1->i4_avg_poc; + + if(u4_poc1 != u4_poc0) + { + i16_tb = ps_dec->ps_cur_pic->i4_poc - u4_poc0; + i16_tb = CLIP3(-128, 127, i16_tb); + i16_td = u4_poc1 - u4_poc0; + i16_td = CLIP3(-128, 127, i16_td); + i16_tx = (16384 + ABS(SIGN_POW2_DIV(i16_td, 1))) / i16_td; + i2_dist_scale_factor = CLIP3(-1024, 1023, + (((i16_tb * i16_tx) + 32) >> 6)); + + if(/*((u4_poc1 - u4_poc0) == 0) ||*/ + (!(ps_pic_buff1->u1_is_short && ps_pic_buff0->u1_is_short)) + || ((i2_dist_scale_factor >> 2) < -64) + || ((i2_dist_scale_factor >> 2) > 128)) + { + /* same for forward and backward, wt=32 and Offset = 0 */ + ui_temp0 = 0x00000020; + ui_temp1 = 0x00000020; + } + else + { + ui_temp0 = 64 - (i2_dist_scale_factor >> 2); + ui_temp1 = (i2_dist_scale_factor >> 2); + } + } + else + { + ui_temp0 = 0x00000020; + ui_temp1 = 0x00000020; + } + pu4_iwt_ofst[0] = pu4_iwt_ofst[2] = pu4_iwt_ofst[4] = ui_temp0; + pu4_iwt_ofst[1] = pu4_iwt_ofst[3] = pu4_iwt_ofst[5] = ui_temp1; + pu4_iwt_ofst += 6; + } + } + if(ps_dec->ps_cur_slice->u1_mbaff_frame_flag) + { + UWORD8 k; + WORD32 i4_cur_poc = ps_dec->ps_cur_pic->i4_top_field_order_cnt; + UWORD32* pu4_wt_mat = ps_dec->pu4_mbaff_wt_mat; + /* Form the Implicit Weighted prediction matrix for field MBs also */ + for(k = 0; k < 2; k++) + { + for(i = 0; i < (uc_num_ref_idx_l0_active << 1); i++) + { + UWORD16 u2_l0_idx; + + /*u2_l0_idx = (i >= uc_num_ref_idx_l0_active) + ?(MAX_REF_BUFS + i - uc_num_ref_idx_l0_active) : (i) ;*/ + + u2_l0_idx = i >> 1; + if((i & 0x01) != k) + { + u2_l0_idx += MAX_REF_BUFS; + } + ps_pic_buff0 = ps_dec->ps_ref_pic_buf_lx[0][u2_l0_idx]; + u4_poc0 = ps_pic_buff0->i4_poc; + for(j = 0; j < (uc_num_ref_idx_l1_active << 1); j++) + { + UWORD16 u2_l1_idx; + /*u2_l1_idx = (j >= uc_num_ref_idx_l1_active) + ? (MAX_REF_BUFS + j - uc_num_ref_idx_l1_active ) : (j) ;*/ + + u2_l1_idx = j >> 1; + if((j & 0x01) != k) + { + u2_l1_idx += MAX_REF_BUFS; + } + ps_pic_buff1 = ps_dec->ps_ref_pic_buf_lx[1][u2_l1_idx]; + u4_poc1 = ps_pic_buff1->i4_poc; + if(u4_poc1 != u4_poc0) + { + i16_tb = i4_cur_poc - u4_poc0; + i16_tb = CLIP3(-128, 127, i16_tb); + i16_td = u4_poc1 - u4_poc0; + i16_td = CLIP3(-128, 127, i16_td); + i16_tx = (16384 + ABS(SIGN_POW2_DIV(i16_td, 1))) + / i16_td; + i2_dist_scale_factor = CLIP3( + -1024, 1023, + (((i16_tb * i16_tx) + 32) >> 6)); + + if(/*((u4_poc1 - u4_poc0) == 0) ||*/ + (!(ps_pic_buff1->u1_is_short && ps_pic_buff0->u1_is_short)) + || ((i2_dist_scale_factor >> 2) < -64) + || ((i2_dist_scale_factor >> 2) > 128)) + { + /* same for forward and backward, wt=32 and Offset = 0 */ + ui_temp0 = 0x00000020; + ui_temp1 = 0x00000020; + } + else + { + ui_temp0 = 64 - (i2_dist_scale_factor >> 2); + ui_temp1 = (i2_dist_scale_factor >> 2); + } + } + else + { + ui_temp0 = 0x00000020; + ui_temp1 = 0x00000020; + } + /* Store in the weight matrix */ + *pu4_wt_mat++ = ui_temp0; + *pu4_wt_mat++ = ui_temp1; + *pu4_wt_mat++ = ui_temp0; + *pu4_wt_mat++ = ui_temp1; + *pu4_wt_mat++ = ui_temp0; + *pu4_wt_mat++ = ui_temp1; + + } + } + i4_cur_poc = ps_dec->ps_cur_pic->i4_bottom_field_order_cnt; + } + } +} + +/*! + ************************************************************************** + * \if Function name : ih264d_decode_bslice \endif + * + * \brief + * Decodes a B Slice + * + * + * \return + * 0 on Success and Error code otherwise + ************************************************************************** + */ +WORD32 ih264d_parse_bslice(dec_struct_t * ps_dec, UWORD16 u2_first_mb_in_slice) +{ + dec_pic_params_t * ps_pps = ps_dec->ps_cur_pps; + dec_slice_params_t * ps_slice = ps_dec->ps_cur_slice; + dec_bit_stream_t * ps_bitstrm = ps_dec->ps_bitstrm; + UWORD8 u1_ref_idx_re_flag_lx; + UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer; + UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst; + + UWORD32 u4_temp, ui_temp1; + WORD32 i_temp; + WORD32 ret; + + /*--------------------------------------------------------------------*/ + /* Read remaining contents of the slice header */ + /*--------------------------------------------------------------------*/ + { + WORD8 *pi1_buf; + WORD16 *pi2_mv = ps_dec->s_default_mv_pred.i2_mv; + WORD32 *pi4_mv = (WORD32*)pi2_mv; + WORD16 *pi16_refFrame; + pi1_buf = ps_dec->s_default_mv_pred.i1_ref_frame; + pi16_refFrame = (WORD16*)pi1_buf; + *pi4_mv = 0; + *(pi4_mv + 1) = 0; + *pi16_refFrame = OUT_OF_RANGE_REF; + ps_dec->s_default_mv_pred.u1_col_ref_pic_idx = (UWORD8)-1; + ps_dec->s_default_mv_pred.u1_pic_type = (UWORD8)-1; + } + + ps_slice->u1_num_ref_idx_active_override_flag = ih264d_get_bit_h264( + ps_bitstrm); + COPYTHECONTEXT("SH: num_ref_idx_override_flag", + ps_slice->u1_num_ref_idx_active_override_flag); + + u4_temp = ps_dec->ps_cur_pps->u1_num_ref_idx_lx_active[0]; + ui_temp1 = ps_dec->ps_cur_pps->u1_num_ref_idx_lx_active[1]; + if(ps_slice->u1_num_ref_idx_active_override_flag) + { + u4_temp = 1 + ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf); + COPYTHECONTEXT("SH: num_ref_idx_l0_active_minus1", + u4_temp - 1); + ui_temp1 = 1 + ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf); + COPYTHECONTEXT("SH: num_ref_idx_l1_active_minus1", + ui_temp1 - 1); + } + + { + UWORD8 u1_max_ref_idx = MAX_FRAMES; + if(ps_slice->u1_field_pic_flag) + { + u1_max_ref_idx = MAX_FRAMES << 1; + } + if((u4_temp > u1_max_ref_idx) || (ui_temp1 > u1_max_ref_idx)) + { + return ERROR_NUM_REF; + } + ps_slice->u1_num_ref_idx_lx_active[0] = u4_temp; + ps_slice->u1_num_ref_idx_lx_active[1] = ui_temp1; + } + /* Initialize the Reference list once in Picture if the slice type */ + /* of first slice is between 5 to 9 defined in table 7.3 of standard */ + /* If picture contains both P & B slices then Initialize the Reference*/ + /* List only when it switches from P to B and B to P */ + + { + UWORD8 init_idx_flg = (ps_dec->u1_pr_sl_type + != ps_dec->ps_cur_slice->u1_slice_type); + if(ps_dec->u1_first_pb_nal_in_pic + || (init_idx_flg & !ps_dec->u1_sl_typ_5_9) + || ps_dec->u1_num_ref_idx_lx_active_prev + != ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[0]) + ih264d_init_ref_idx_lx_b(ps_dec); + if(ps_dec->u1_first_pb_nal_in_pic & ps_dec->u1_sl_typ_5_9) + ps_dec->u1_first_pb_nal_in_pic = 0; + } + /* Store the value for future slices in the same picture */ + ps_dec->u1_num_ref_idx_lx_active_prev = + ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[0]; + + u1_ref_idx_re_flag_lx = ih264d_get_bit_h264(ps_bitstrm); + COPYTHECONTEXT("SH: ref_pic_list_reordering_flag_l0",u1_ref_idx_re_flag_lx); + + /* Modified temporarily */ + if(u1_ref_idx_re_flag_lx) + { + WORD8 ret; + ps_dec->ps_ref_pic_buf_lx[0] = ps_dec->ps_dpb_mgr->ps_mod_dpb[0]; + ret = ih264d_ref_idx_reordering(ps_dec, 0); + if(ret == -1) + return ERROR_REFIDX_ORDER_T; + } + else + ps_dec->ps_ref_pic_buf_lx[0] = ps_dec->ps_dpb_mgr->ps_init_dpb[0]; + + u1_ref_idx_re_flag_lx = ih264d_get_bit_h264(ps_bitstrm); + COPYTHECONTEXT("SH: ref_pic_list_reordering_flag_l1",u1_ref_idx_re_flag_lx); + + /* Modified temporarily */ + if(u1_ref_idx_re_flag_lx) + { + WORD8 ret; + ps_dec->ps_ref_pic_buf_lx[1] = ps_dec->ps_dpb_mgr->ps_mod_dpb[1]; + ret = ih264d_ref_idx_reordering(ps_dec, 1); + if(ret == -1) + return ERROR_REFIDX_ORDER_T; + } + else + ps_dec->ps_ref_pic_buf_lx[1] = ps_dec->ps_dpb_mgr->ps_init_dpb[1]; + + /* Create refIdx to POC mapping */ + { + void **ppv_map_ref_idx_to_poc_lx; + WORD8 idx; + struct pic_buffer_t *ps_pic; + + ppv_map_ref_idx_to_poc_lx = ps_dec->ppv_map_ref_idx_to_poc + FRM_LIST_L0; + ppv_map_ref_idx_to_poc_lx[0] = 0; + ppv_map_ref_idx_to_poc_lx++; + for(idx = 0; idx < ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[0]; + idx++) + { + ps_pic = ps_dec->ps_ref_pic_buf_lx[0][idx]; + ppv_map_ref_idx_to_poc_lx[idx] = (ps_pic->pu1_buf1); + } + + ppv_map_ref_idx_to_poc_lx = ps_dec->ppv_map_ref_idx_to_poc + FRM_LIST_L1; + + ppv_map_ref_idx_to_poc_lx[0] = 0; + ppv_map_ref_idx_to_poc_lx++; + for(idx = 0; idx < ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1]; + idx++) + { + ps_pic = ps_dec->ps_ref_pic_buf_lx[1][idx]; + ppv_map_ref_idx_to_poc_lx[idx] = (ps_pic->pu1_buf1); + } + + if(ps_dec->ps_cur_slice->u1_mbaff_frame_flag) + { + void **ppv_map_ref_idx_to_poc_lx_t, **ppv_map_ref_idx_to_poc_lx_b; + + ppv_map_ref_idx_to_poc_lx_t = ps_dec->ppv_map_ref_idx_to_poc + + TOP_LIST_FLD_L0; + ppv_map_ref_idx_to_poc_lx_b = ps_dec->ppv_map_ref_idx_to_poc + + BOT_LIST_FLD_L0; + + ppv_map_ref_idx_to_poc_lx_t[0] = 0; + ppv_map_ref_idx_to_poc_lx_t++; + ppv_map_ref_idx_to_poc_lx_b[0] = 0; + ppv_map_ref_idx_to_poc_lx_b++; + for(idx = 0; idx < ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[0]; + idx++) + { + ps_pic = ps_dec->ps_ref_pic_buf_lx[0][idx]; + ppv_map_ref_idx_to_poc_lx_t[0] = (ps_pic->pu1_buf1); + ppv_map_ref_idx_to_poc_lx_b[1] = (ps_pic->pu1_buf1); + + ppv_map_ref_idx_to_poc_lx_b[0] = (ps_pic->pu1_buf1) + 1; + ppv_map_ref_idx_to_poc_lx_t[1] = (ps_pic->pu1_buf1) + 1; + + ppv_map_ref_idx_to_poc_lx_t += 2; + ppv_map_ref_idx_to_poc_lx_b += 2; + } + + ppv_map_ref_idx_to_poc_lx_t = ps_dec->ppv_map_ref_idx_to_poc + + TOP_LIST_FLD_L1; + ppv_map_ref_idx_to_poc_lx_b = ps_dec->ppv_map_ref_idx_to_poc + + BOT_LIST_FLD_L1; + + ppv_map_ref_idx_to_poc_lx_t[0] = 0; + ppv_map_ref_idx_to_poc_lx_t++; + ppv_map_ref_idx_to_poc_lx_b[0] = 0; + ppv_map_ref_idx_to_poc_lx_b++; + for(idx = 0; idx < ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1]; + idx++) + { + UWORD8 u1_tmp_idx = idx << 1; + ps_pic = ps_dec->ps_ref_pic_buf_lx[1][idx]; + ppv_map_ref_idx_to_poc_lx_t[u1_tmp_idx] = (ps_pic->pu1_buf1); + ppv_map_ref_idx_to_poc_lx_b[u1_tmp_idx + 1] = (ps_pic->pu1_buf1); + + ppv_map_ref_idx_to_poc_lx_b[u1_tmp_idx] = (ps_pic->pu1_buf1) + 1; + ppv_map_ref_idx_to_poc_lx_t[u1_tmp_idx + 1] = (ps_pic->pu1_buf1) + 1; + + } + } + + if(ps_dec->u4_num_cores >= 3) + { + WORD32 num_entries; + WORD32 size; + + num_entries = MIN(MAX_FRAMES, ps_dec->u4_num_ref_frames_at_init); + num_entries = 2 * ((2 * num_entries) + 1); + + size = num_entries * sizeof(void *); + size += PAD_MAP_IDX_POC * sizeof(void *); + + memcpy((void *)ps_dec->ps_parse_cur_slice->ppv_map_ref_idx_to_poc, + ps_dec->ppv_map_ref_idx_to_poc, + size); + } + + } + + if(ps_dec->ps_cur_slice->u1_mbaff_frame_flag + && (ps_dec->ps_cur_slice->u1_field_pic_flag == 0)) + { + ih264d_convert_frm_mbaff_list(ps_dec); + } + + if(ps_pps->u1_wted_bipred_idc == 1) + { + ret = ih264d_parse_pred_weight_table(ps_slice, ps_bitstrm); + if(ret != OK) + return ret; + ih264d_form_pred_weight_matrix(ps_dec); + ps_dec->pu4_wt_ofsts = ps_dec->pu4_wts_ofsts_mat; + } + else if(ps_pps->u1_wted_bipred_idc == 2) + { + /* Implicit Weighted prediction */ + ps_slice->u2_log2Y_crwd = 0x0505; + ps_dec->pu4_wt_ofsts = ps_dec->pu4_wts_ofsts_mat; + ih264d_get_implicit_weights(ps_dec); + } + else + ps_dec->ps_cur_slice->u2_log2Y_crwd = 0; + + ps_dec->ps_parse_cur_slice->u2_log2Y_crwd = + ps_dec->ps_cur_slice->u2_log2Y_crwd; + + /* G050 */ + if(ps_slice->u1_nal_ref_idc != 0) + { + if(!ps_dec->ps_dpb_cmds->u1_dpb_commands_read) + ps_dec->u4_bitoffset = ih264d_read_mmco_commands(ps_dec); + else + ps_bitstrm->u4_ofst += ps_dec->u4_bitoffset; + } + /* G050 */ + + if(ps_pps->u1_entropy_coding_mode == CABAC) + { + u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf); + if(u4_temp > MAX_CABAC_INIT_IDC) + { + return ERROR_INV_SLICE_HDR_T; + } + ps_slice->u1_cabac_init_idc = u4_temp; + COPYTHECONTEXT("SH: cabac_init_idc",ps_slice->u1_cabac_init_idc); + } + + /* Read slice_qp_delta */ + i_temp = ps_pps->u1_pic_init_qp + + ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf); + if((i_temp < 0) || (i_temp > 51)) + { + return ERROR_INV_RANGE_QP_T; + } + ps_slice->u1_slice_qp = i_temp; + COPYTHECONTEXT("SH: slice_qp_delta", + (WORD8)(ps_slice->u1_slice_qp - ps_pps->u1_pic_init_qp)); + + if(ps_pps->u1_deblocking_filter_parameters_present_flag == 1) + { + u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf); + if(u4_temp > SLICE_BOUNDARY_DBLK_DISABLED) + { + return ERROR_INV_SLICE_HDR_T; + } COPYTHECONTEXT("SH: disable_deblocking_filter_idc", u4_temp); + ps_slice->u1_disable_dblk_filter_idc = u4_temp; + if(u4_temp != 1) + { + i_temp = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf) + << 1; + if((MIN_DBLK_FIL_OFF > i_temp) || (i_temp > MAX_DBLK_FIL_OFF)) + { + return ERROR_INV_SLICE_HDR_T; + } + ps_slice->i1_slice_alpha_c0_offset = i_temp; + COPYTHECONTEXT("SH: slice_alpha_c0_offset_div2", + ps_slice->i1_slice_alpha_c0_offset >> 1); + + i_temp = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf) + << 1; + if((MIN_DBLK_FIL_OFF > i_temp) || (i_temp > MAX_DBLK_FIL_OFF)) + { + return ERROR_INV_SLICE_HDR_T; + } + ps_slice->i1_slice_beta_offset = i_temp; + COPYTHECONTEXT("SH: slice_beta_offset_div2", + ps_slice->i1_slice_beta_offset >> 1); + + } + else + { + ps_slice->i1_slice_alpha_c0_offset = 0; + ps_slice->i1_slice_beta_offset = 0; + } + } + else + { + ps_slice->u1_disable_dblk_filter_idc = 0; + ps_slice->i1_slice_alpha_c0_offset = 0; + ps_slice->i1_slice_beta_offset = 0; + } + + + /*set slice header cone to 2 ,to indicate correct header*/ + DATA_SYNC(); + + ps_dec->ps_parse_cur_slice->slice_header_done = 2; + + if(ps_pps->u1_entropy_coding_mode) + { + SWITCHOFFTRACE; SWITCHONTRACECABAC; + ps_dec->pf_parse_inter_slice = ih264d_parse_inter_slice_data_cabac; + ps_dec->pf_parse_inter_mb = ih264d_parse_bmb_cabac; + ih264d_init_cabac_contexts(B_SLICE, ps_dec); + + if(ps_dec->ps_cur_slice->u1_mbaff_frame_flag) + ps_dec->pf_get_mb_info = ih264d_get_mb_info_cabac_mbaff; + else + ps_dec->pf_get_mb_info = ih264d_get_mb_info_cabac_nonmbaff; + } + else + { + SWITCHONTRACE; SWITCHOFFTRACECABAC; + ps_dec->pf_parse_inter_slice = ih264d_parse_inter_slice_data_cavlc; + ps_dec->pf_parse_inter_mb = ih264d_parse_bmb_cavlc; + if(ps_dec->ps_cur_slice->u1_mbaff_frame_flag) + ps_dec->pf_get_mb_info = ih264d_get_mb_info_cavlc_mbaff; + else + ps_dec->pf_get_mb_info = ih264d_get_mb_info_cavlc_nonmbaff; + } + + ret = ih264d_cal_col_pic(ps_dec); + if(ret != OK) + return ret; + ps_dec->u1_B = 1; + ps_dec->pf_mvpred_ref_tfr_nby2mb = ih264d_mv_pred_ref_tfr_nby2_bmb; + ret = ps_dec->pf_parse_inter_slice(ps_dec, ps_slice, u2_first_mb_in_slice); + if(ret != OK) + return ret; + return OK; +} + diff --git a/decoder/ih264d_parse_cabac.c b/decoder/ih264d_parse_cabac.c new file mode 100755 index 0000000..9d58f33 --- /dev/null +++ b/decoder/ih264d_parse_cabac.c @@ -0,0 +1,1607 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*! + *************************************************************************** + * \file ih264d_parse_cabac.c + * + * \brief + * This file contains cabac Residual decoding routines. + * + * \date + * 20/03/2003 + * + * \author NS + *************************************************************************** + */ + +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264d_defs.h" +#include "ih264d_structs.h" + +#include "ih264d_cabac.h" +#include "ih264d_bitstrm.h" +#include "ih264d_parse_mb_header.h" +#include "ih264d_debug.h" +#include "ih264d_tables.h" +#include "ih264d_error_handler.h" +#include "ih264d_parse_cabac.h" +#include "ih264d_parse_slice.h" +#include "ih264d_tables.h" +#include "ih264d_mb_utils.h" +#include "ih264d_utils.h" + +/*! + ******************************************************************************** + * \if Function name : ih264d_read_coeff4x4_cabac \endif + * + * \brief This function encodes residual_block_cabac as defined in 7.3.5.3.2. + * + * \return + * Returns the index of last significant coeff. + * + ******************************************************************************** + */ + +UWORD8 ih264d_read_coeff4x4_cabac(dec_bit_stream_t *ps_bitstrm, + UWORD32 u4_ctxcat, + bin_ctxt_model_t *ps_ctxt_sig_coeff, + dec_struct_t *ps_dec, /*!< pointer to access global variables*/ + bin_ctxt_model_t *ps_ctxt_coded) +{ + + decoding_envirnoment_t *ps_cab_env = &ps_dec->s_cab_dec_env; + UWORD32 u4_coded_flag; + UWORD32 u4_offset, *pu4_buffer; + UWORD32 u4_code_int_range, u4_code_int_val_ofst; + tu_sblk4x4_coeff_data_t *ps_tu_4x4; + WORD16 *pi2_coeff_data; + WORD32 num_sig_coeffs = 0; + + /*loading from strcuctures*/ + + ps_tu_4x4 = (tu_sblk4x4_coeff_data_t *)ps_dec->pv_parse_tu_coeff_data; + ps_tu_4x4->u2_sig_coeff_map = 0; + pi2_coeff_data = &ps_tu_4x4->ai2_level[0]; + + u4_offset = ps_bitstrm->u4_ofst; + pu4_buffer = ps_bitstrm->pu4_buffer; + + u4_code_int_range = ps_cab_env->u4_code_int_range; + u4_code_int_val_ofst = ps_cab_env->u4_code_int_val_ofst; + + { + + /*inilined DecodeDecision_onebin begins*/ + + { + + UWORD32 u4_qnt_int_range, u4_int_range_lps; + UWORD32 u4_symbol, u1_mps_state; + + UWORD32 table_lookup; + const UWORD32 *pu4_table = (const UWORD32 *)ps_cab_env->cabac_table; + UWORD32 u4_clz; + + u1_mps_state = (ps_ctxt_coded->u1_mps_state); + u4_clz = CLZ(u4_code_int_range); + u4_qnt_int_range = u4_code_int_range << u4_clz; + u4_qnt_int_range = (u4_qnt_int_range >> 29) & 0x3; + table_lookup = + pu4_table[(u1_mps_state << 2) + u4_qnt_int_range]; + u4_int_range_lps = table_lookup & 0xff; + u4_int_range_lps = u4_int_range_lps << (23 - u4_clz); + u4_code_int_range = u4_code_int_range - u4_int_range_lps; + u4_symbol = ((u1_mps_state >> 6) & 0x1); + u1_mps_state = (table_lookup >> 8) & 0x7F; + + CHECK_IF_LPS(u4_code_int_range, u4_code_int_val_ofst, u4_symbol, + u4_int_range_lps, u1_mps_state, table_lookup) + + if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_8) + { + + RENORM_RANGE_OFFSET(u4_code_int_range, u4_code_int_val_ofst, + u4_offset, pu4_buffer) + } + + ps_ctxt_coded->u1_mps_state = u1_mps_state; + u4_coded_flag = u4_symbol; + + /*inilined DecodeDecision_onebin ends*/ + + } + + } + + if(u4_coded_flag) + { + + { + bin_ctxt_model_t *p_binCtxt_last, *p_binCtxt_last_org; + UWORD32 uc_last_coeff_idx; + UWORD32 uc_bin; + UWORD32 i; + WORD32 first_coeff_offset = 0; + + if((u4_ctxcat == CHROMA_AC_CTXCAT) || (u4_ctxcat == LUMA_AC_CTXCAT)) + { + first_coeff_offset = 1; + } + + i = 0; + if(u4_ctxcat == CHROMA_DC_CTXCAT) + { + uc_last_coeff_idx = 3; + } + else + { + UWORD32 u4_start; + u4_start = (u4_ctxcat & 1) + (u4_ctxcat >> 2); + uc_last_coeff_idx = 15 - u4_start; + } + p_binCtxt_last_org = ps_ctxt_sig_coeff + + LAST_COEFF_CTXT_MINUS_SIG_COEFF_CTXT; + + do + { + + /*inilined DecodeDecision_onebin begins*/ + { + + UWORD32 u4_qnt_int_range, u4_int_range_lps; + UWORD32 u4_symbol, u1_mps_state; + UWORD32 table_lookup; + const UWORD32 *pu4_table = + (const UWORD32 *)ps_cab_env->cabac_table; + UWORD32 u4_clz; + + u1_mps_state = (ps_ctxt_sig_coeff->u1_mps_state); + + u4_clz = CLZ(u4_code_int_range); + + u4_qnt_int_range = u4_code_int_range << u4_clz; + u4_qnt_int_range = (u4_qnt_int_range >> 29) & 0x3; + + table_lookup = pu4_table[(u1_mps_state << 2) + + u4_qnt_int_range]; + + u4_int_range_lps = table_lookup & 0xff; + + u4_int_range_lps = u4_int_range_lps << (23 - u4_clz); + u4_code_int_range = u4_code_int_range - u4_int_range_lps; + u4_symbol = ((u1_mps_state >> 6) & 0x1); + u1_mps_state = (table_lookup >> 8) & 0x7F; + + CHECK_IF_LPS(u4_code_int_range, u4_code_int_val_ofst, + u4_symbol, u4_int_range_lps, u1_mps_state, + table_lookup) + + if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_14) + { + + UWORD32 read_bits, u4_clz; + u4_clz = CLZ(u4_code_int_range); + NEXTBITS(read_bits, (u4_offset + 23), pu4_buffer, + u4_clz) + FLUSHBITS(u4_offset, (u4_clz)) + u4_code_int_range = u4_code_int_range << u4_clz; + u4_code_int_val_ofst = (u4_code_int_val_ofst << u4_clz) + | read_bits; + } + + INC_BIN_COUNT( + ps_cab_env) + + ps_ctxt_sig_coeff->u1_mps_state = u1_mps_state; + uc_bin = u4_symbol; + + } + /*incrementing pointer to point to the context of the next bin*/ + ps_ctxt_sig_coeff++; + + /*inilined DecodeDecision_onebin ends*/ + + if(uc_bin) + { + num_sig_coeffs++; + SET_BIT(ps_tu_4x4->u2_sig_coeff_map, (i + first_coeff_offset)); + + p_binCtxt_last = p_binCtxt_last_org + i; + + /*inilined DecodeDecision_onebin begins*/ + + { + + UWORD32 u4_qnt_int_range, u4_int_range_lps; + UWORD32 u4_symbol, u1_mps_state; + UWORD32 table_lookup; + const UWORD32 *pu4_table = + (const UWORD32 *)ps_cab_env->cabac_table; + UWORD32 u4_clz; + + u1_mps_state = (p_binCtxt_last->u1_mps_state); + + u4_clz = CLZ(u4_code_int_range); + u4_qnt_int_range = u4_code_int_range << u4_clz; + u4_qnt_int_range = (u4_qnt_int_range >> 29) + & 0x3; + + table_lookup = pu4_table[(u1_mps_state << 2) + + u4_qnt_int_range]; + u4_int_range_lps = table_lookup & 0xff; + + u4_int_range_lps = u4_int_range_lps + << (23 - u4_clz); + + u4_code_int_range = u4_code_int_range + - u4_int_range_lps; + u4_symbol = ((u1_mps_state >> 6) & 0x1); + u1_mps_state = (table_lookup >> 8) & 0x7F; + + CHECK_IF_LPS(u4_code_int_range, u4_code_int_val_ofst, + u4_symbol, u4_int_range_lps, + u1_mps_state, table_lookup) + + INC_BIN_COUNT(ps_cab_env) + + p_binCtxt_last->u1_mps_state = u1_mps_state; + uc_bin = u4_symbol; + + } + + /*inilined DecodeDecision_onebin ends*/ + if(uc_bin == 1) + goto label_read_levels; + + } + + i = i + 1; + + } + while(i < uc_last_coeff_idx); + + num_sig_coeffs++; + SET_BIT(ps_tu_4x4->u2_sig_coeff_map, (i + first_coeff_offset)); + + label_read_levels: ; + + } + + /// VALUE of No of Coeff in BLOCK = i + 1 for second case else i; + + /* Decode coeff_abs_level_minus1 and coeff_sign_flag */ + { + + WORD32 i2_abs_lvl; + UWORD32 u1_abs_level_equal1 = 1, u1_abs_level_gt1 = 0; + + UWORD32 u4_ctx_inc; + UWORD32 ui_prefix; + bin_ctxt_model_t *p_ctxt_abs_level; + + + p_ctxt_abs_level = ps_dec->p_coeff_abs_level_minus1_t[u4_ctxcat]; + u4_ctx_inc = ((0x51)); + + /*****************************************************/ + /* Main Loop runs for no. of Significant coefficient */ + /*****************************************************/ + + + do + { + + { + INC_SYM_COUNT(&(ps_dec.s_cab_dec_env)); + + /*****************************************************/ + /* inilining a modified ih264d_decode_bins_unary */ + /*****************************************************/ + + { + UWORD32 u4_value; + UWORD32 u4_symbol; + bin_ctxt_model_t *ps_bin_ctxt; + UWORD32 u4_ctx_Inc; + + u4_value = 0; + + u4_ctx_Inc = u4_ctx_inc & 0xf; + ps_bin_ctxt = p_ctxt_abs_level + u4_ctx_Inc; + + do + { + + { + + UWORD32 u4_qnt_int_range, + u4_int_range_lps; + UWORD32 u1_mps_state; + UWORD32 table_lookup; + const UWORD32 *pu4_table = + (const UWORD32 *)ps_cab_env->cabac_table; + UWORD32 u4_clz; + + u1_mps_state = (ps_bin_ctxt->u1_mps_state); + u4_clz = CLZ(u4_code_int_range); + u4_qnt_int_range = u4_code_int_range + << u4_clz; + u4_qnt_int_range = (u4_qnt_int_range + >> 29) & 0x3; + table_lookup = pu4_table[(u1_mps_state << 2) + + u4_qnt_int_range]; + u4_int_range_lps = table_lookup & 0xff; + + u4_int_range_lps = u4_int_range_lps + << (23 - u4_clz); + u4_code_int_range = u4_code_int_range + - u4_int_range_lps; + u4_symbol = ((u1_mps_state >> 6) & 0x1); + u1_mps_state = (table_lookup >> 8) & 0x7F; + + CHECK_IF_LPS(u4_code_int_range, + u4_code_int_val_ofst, u4_symbol, + u4_int_range_lps, u1_mps_state, + table_lookup) + + if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_9) + { + + RENORM_RANGE_OFFSET(u4_code_int_range, + u4_code_int_val_ofst, + u4_offset, pu4_buffer) + } + + INC_BIN_COUNT(ps_cab_env); + + ps_bin_ctxt->u1_mps_state = u1_mps_state; + } + + INC_BIN_COUNT(ps_cab_env);INC_DECISION_BINS(ps_cab_env); + + u4_value++; + ps_bin_ctxt = p_ctxt_abs_level + (u4_ctx_inc >> 4); + + } + while(u4_symbol && (u4_value < UCOFF_LEVEL)); + + ui_prefix = u4_value - 1 + u4_symbol; + + } + + if(ui_prefix == UCOFF_LEVEL) + { + UWORD32 ui16_sufS = 0; + UWORD32 u1_max_bins; + UWORD32 u4_value; + + i2_abs_lvl = UCOFF_LEVEL; + /*inlining ih264d_decode_bypass_bins_unary begins*/ + + { + UWORD32 uc_bin; + UWORD32 bits_to_flush; + UWORD32 max_bits = 32; + + bits_to_flush = 0; + /*renormalize to ensure there 23 bits more in the u4_code_int_val_ofst*/ + { + UWORD32 u4_clz, read_bits; + + u4_clz = CLZ(u4_code_int_range); + FLUSHBITS(u4_offset, u4_clz) + NEXTBITS(read_bits, u4_offset, pu4_buffer, 23) + u4_code_int_range = u4_code_int_range << u4_clz; + u4_code_int_val_ofst = (u4_code_int_val_ofst + << u4_clz) | read_bits; + + } + + do + { + bits_to_flush++; + + u4_code_int_range = u4_code_int_range >> 1; + + if(u4_code_int_val_ofst >= u4_code_int_range) + { + /* S=1 */ + uc_bin = 1; + u4_code_int_val_ofst -= u4_code_int_range; + } + else + { + /* S=0 */ + uc_bin = 0; + } + + INC_BIN_COUNT( + ps_cab_env);INC_BYPASS_BINS(ps_cab_env); + + } + while(uc_bin && (bits_to_flush < max_bits)); + + u4_value = (bits_to_flush - 1); + + } + /*inlining ih264d_decode_bypass_bins_unary ends*/ + + ui16_sufS = (1 << u4_value); + u1_max_bins = u4_value; + + if(u4_value > 0) + { + + /*inline bypassbins_flc begins*/ + + if(u4_value > 10) + { + UWORD32 u4_clz, read_bits; + + u4_clz = CLZ(u4_code_int_range); + FLUSHBITS(u4_offset, u4_clz) + NEXTBITS(read_bits, u4_offset, pu4_buffer, 23) + u4_code_int_range = u4_code_int_range << u4_clz; + u4_code_int_val_ofst = (u4_code_int_val_ofst + << u4_clz) | read_bits; + } + + { + UWORD32 ui_bins; + UWORD32 uc_bin; + UWORD32 bits_to_flush; + + ui_bins = 0; + bits_to_flush = 0; + + do + { + bits_to_flush++; + + u4_code_int_range = u4_code_int_range >> 1; + + if(u4_code_int_val_ofst + >= u4_code_int_range) + { + /* S=1 */ + uc_bin = 1; + u4_code_int_val_ofst -= + u4_code_int_range; + } + else + { + /* S=0 */ + uc_bin = 0; + } + + INC_BIN_COUNT( + ps_cab_env);INC_BYPASS_BINS(ps_cab_env); + + ui_bins = ((ui_bins << 1) | uc_bin); + + } + while(bits_to_flush < u1_max_bins); + + u4_value = ui_bins; + } + + /*inline bypassbins_flc ends*/ + + } + + //Value of K + ui16_sufS += u4_value; + i2_abs_lvl += ui16_sufS; + + } + else + i2_abs_lvl = 1 + ui_prefix; + + if(i2_abs_lvl > 1) + { + u1_abs_level_gt1++; + } + if(!u1_abs_level_gt1) + { + u1_abs_level_equal1++; + u4_ctx_inc = (5 << 4) + MIN(u1_abs_level_equal1, 4); + } + else + u4_ctx_inc = (5 + MIN(u1_abs_level_gt1, 4)) << 4; + + /*u4_ctx_inc = g_table_temp[u1_abs_level_gt1][u1_abs_level_equal1];*/ + + /* encode coeff_sign_flag[i] */ + + { + u4_code_int_range = u4_code_int_range >> 1; + + if(u4_code_int_val_ofst >= (u4_code_int_range)) + { + /* S=1 */ + u4_code_int_val_ofst -= u4_code_int_range; + i2_abs_lvl = (-i2_abs_lvl); + } + + } + num_sig_coeffs--; + *pi2_coeff_data++ = i2_abs_lvl; + } + } + while(num_sig_coeffs > 0); + } + } + + if(u4_coded_flag) + { + WORD32 offset; + offset = (UWORD8 *)pi2_coeff_data - (UWORD8 *)ps_tu_4x4; + offset = ALIGN4(offset); + ps_dec->pv_parse_tu_coeff_data = (void *)((UWORD8 *)ps_dec->pv_parse_tu_coeff_data + offset); + } + + + /*updating structures*/ + ps_cab_env->u4_code_int_val_ofst = u4_code_int_val_ofst; + ps_cab_env->u4_code_int_range = u4_code_int_range; + ps_bitstrm->u4_ofst = u4_offset; + return (u4_coded_flag); +} +/*! + ******************************************************************************** + * \if Function name : ih264d_read_coeff8x8_cabac \endif + * + * \brief This function encodes residual_block_cabac as defined in 7.3.5.3.2. + when transform_8x8_flag = 1 + * + * \return + * Returns the index of last significant coeff. + * + ******************************************************************************** + */ + +void ih264d_read_coeff8x8_cabac(dec_bit_stream_t *ps_bitstrm, + dec_struct_t *ps_dec, /*!< pointer to access global variables*/ + dec_mb_info_t *ps_cur_mb_info) +{ + decoding_envirnoment_t *ps_cab_env = &ps_dec->s_cab_dec_env; + UWORD32 u4_offset, *pu4_buffer; + UWORD32 u4_code_int_range, u4_code_int_val_ofst; + + /* High profile related declarations */ + UWORD8 u1_field_coding_flag = ps_cur_mb_info->ps_curmb->u1_mb_fld; + const UWORD8 *pu1_lastcoeff_context_inc = + (UWORD8 *)gau1_ih264d_lastcoeff_context_inc; + const UWORD8 *pu1_sigcoeff_context_inc; + bin_ctxt_model_t *ps_ctxt_sig_coeff; + WORD32 num_sig_coeffs = 0; + tu_blk8x8_coeff_data_t *ps_tu_8x8; + WORD16 *pi2_coeff_data; + + /*loading from strcuctures*/ + + ps_tu_8x8 = (tu_blk8x8_coeff_data_t *)ps_dec->pv_parse_tu_coeff_data; + ps_tu_8x8->au4_sig_coeff_map[0] = 0; + ps_tu_8x8->au4_sig_coeff_map[1] = 0; + pi2_coeff_data = &ps_tu_8x8->ai2_level[0]; + + + if(!u1_field_coding_flag) + { + pu1_sigcoeff_context_inc = + (UWORD8 *)gau1_ih264d_sigcoeff_context_inc_frame; + + /*******************************************************************/ + /* last coefficient context is derived from significant coeff u4_flag */ + /* only significant coefficient matrix need to be initialized */ + /*******************************************************************/ + ps_ctxt_sig_coeff = ps_dec->s_high_profile.ps_sigcoeff_8x8_frame; + } + else + { + pu1_sigcoeff_context_inc = + (UWORD8 *)gau1_ih264d_sigcoeff_context_inc_field; + + /*******************************************************************/ + /* last coefficient context is derived from significant coeff u4_flag */ + /* only significant coefficient matrix need to be initialized */ + /*******************************************************************/ + ps_ctxt_sig_coeff = ps_dec->s_high_profile.ps_sigcoeff_8x8_field; + } + + /*loading from strcuctures*/ + + u4_offset = ps_bitstrm->u4_ofst; + pu4_buffer = ps_bitstrm->pu4_buffer; + + u4_code_int_range = ps_cab_env->u4_code_int_range; + u4_code_int_val_ofst = ps_cab_env->u4_code_int_val_ofst; + + { + { + bin_ctxt_model_t *p_binCtxt_last, *p_binCtxt_last_org, + *p_ctxt_sig_coeff_org; + UWORD32 uc_last_coeff_idx; + UWORD32 uc_bin; + UWORD32 i; + + i = 0; + + uc_last_coeff_idx = 63; + + p_binCtxt_last_org = ps_ctxt_sig_coeff + + LAST_COEFF_CTXT_MINUS_SIG_COEFF_CTXT_8X8; + + p_ctxt_sig_coeff_org = ps_ctxt_sig_coeff; + + do + { + /*inilined DecodeDecision_onebin begins*/ + { + UWORD32 u4_qnt_int_range, u4_int_range_lps; + UWORD32 u4_symbol, u1_mps_state; + UWORD32 table_lookup; + const UWORD32 *pu4_table = + (const UWORD32 *)ps_cab_env->cabac_table; + UWORD32 u4_clz; + + u1_mps_state = (ps_ctxt_sig_coeff->u1_mps_state); + + u4_clz = CLZ(u4_code_int_range); + + u4_qnt_int_range = u4_code_int_range << u4_clz; + u4_qnt_int_range = (u4_qnt_int_range >> 29) & 0x3; + + table_lookup = pu4_table[(u1_mps_state << 2) + + u4_qnt_int_range]; + + u4_int_range_lps = table_lookup & 0xff; + + u4_int_range_lps = u4_int_range_lps << (23 - u4_clz); + u4_code_int_range = u4_code_int_range - u4_int_range_lps; + u4_symbol = ((u1_mps_state >> 6) & 0x1); + u1_mps_state = (table_lookup >> 8) & 0x7F; + + CHECK_IF_LPS(u4_code_int_range, u4_code_int_val_ofst, + u4_symbol, u4_int_range_lps, u1_mps_state, + table_lookup) + + if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_14) + { + UWORD32 read_bits, u4_clz; + u4_clz = CLZ(u4_code_int_range); + NEXTBITS(read_bits, (u4_offset + 23), pu4_buffer, + u4_clz) + FLUSHBITS(u4_offset, (u4_clz)) + u4_code_int_range = u4_code_int_range << u4_clz; + u4_code_int_val_ofst = (u4_code_int_val_ofst << u4_clz) + | read_bits; + } + + ps_ctxt_sig_coeff->u1_mps_state = u1_mps_state; + uc_bin = u4_symbol; + } + /*incrementing pointer to point to the context of the next bin*/ + ps_ctxt_sig_coeff = p_ctxt_sig_coeff_org + + pu1_sigcoeff_context_inc[i + 1]; + + /*inilined DecodeDecision_onebin ends*/ + if(uc_bin) + { + num_sig_coeffs++; + SET_BIT(ps_tu_8x8->au4_sig_coeff_map[i>31], (i > 31 ? i - 32:i)); + + p_binCtxt_last = p_binCtxt_last_org + + pu1_lastcoeff_context_inc[i]; + + /*inilined DecodeDecision_onebin begins*/ + + { + UWORD32 u4_qnt_int_range, u4_int_range_lps; + UWORD32 u4_symbol, u1_mps_state; + UWORD32 table_lookup; + const UWORD32 *pu4_table = + (const UWORD32 *)ps_cab_env->cabac_table; + UWORD32 u4_clz; + + u1_mps_state = (p_binCtxt_last->u1_mps_state); + + u4_clz = CLZ(u4_code_int_range); + u4_qnt_int_range = u4_code_int_range << u4_clz; + u4_qnt_int_range = (u4_qnt_int_range >> 29) + & 0x3; + + table_lookup = pu4_table[(u1_mps_state << 2) + + u4_qnt_int_range]; + u4_int_range_lps = table_lookup & 0xff; + + u4_int_range_lps = u4_int_range_lps + << (23 - u4_clz); + + u4_code_int_range = u4_code_int_range + - u4_int_range_lps; + u4_symbol = ((u1_mps_state >> 6) & 0x1); + u1_mps_state = (table_lookup >> 8) & 0x7F; + + CHECK_IF_LPS(u4_code_int_range, u4_code_int_val_ofst, + u4_symbol, u4_int_range_lps, + u1_mps_state, table_lookup) + + p_binCtxt_last->u1_mps_state = u1_mps_state; + uc_bin = u4_symbol; + } + + /*inilined DecodeDecision_onebin ends*/ + if(uc_bin == 1) + goto label_read_levels; + + } + + i = i + 1; + + } + while(i < uc_last_coeff_idx); + + num_sig_coeffs++; + SET_BIT(ps_tu_8x8->au4_sig_coeff_map[i>31], (i > 31 ? i - 32:i)); + + label_read_levels: ; + } + + /// VALUE of No of Coeff in BLOCK = i + 1 for second case else i; + + /* Decode coeff_abs_level_minus1 and coeff_sign_flag */ + { + WORD32 i2_abs_lvl; + UWORD32 u1_abs_level_equal1 = 1, u1_abs_level_gt1 = 0; + + UWORD32 u4_ctx_inc; + UWORD32 ui_prefix; + bin_ctxt_model_t *p_ctxt_abs_level; + + p_ctxt_abs_level = + ps_dec->p_coeff_abs_level_minus1_t[LUMA_8X8_CTXCAT]; + u4_ctx_inc = ((0x51)); + + /*****************************************************/ + /* Main Loop runs for no. of Significant coefficient */ + /*****************************************************/ + do + { + { + + /*****************************************************/ + /* inilining a modified ih264d_decode_bins_unary */ + /*****************************************************/ + + { + UWORD32 u4_value; + UWORD32 u4_symbol; + bin_ctxt_model_t *ps_bin_ctxt; + UWORD32 u4_ctx_Inc; + u4_value = 0; + + u4_ctx_Inc = u4_ctx_inc & 0xf; + ps_bin_ctxt = p_ctxt_abs_level + u4_ctx_Inc; + + do + { + { + UWORD32 u4_qnt_int_range, + u4_int_range_lps; + UWORD32 u1_mps_state; + UWORD32 table_lookup; + const UWORD32 *pu4_table = + (const UWORD32 *)ps_cab_env->cabac_table; + UWORD32 u4_clz; + + u1_mps_state = (ps_bin_ctxt->u1_mps_state); + u4_clz = CLZ(u4_code_int_range); + u4_qnt_int_range = u4_code_int_range + << u4_clz; + u4_qnt_int_range = (u4_qnt_int_range + >> 29) & 0x3; + table_lookup = pu4_table[(u1_mps_state << 2) + + u4_qnt_int_range]; + u4_int_range_lps = table_lookup & 0xff; + + u4_int_range_lps = u4_int_range_lps + << (23 - u4_clz); + u4_code_int_range = u4_code_int_range + - u4_int_range_lps; + u4_symbol = ((u1_mps_state >> 6) & 0x1); + u1_mps_state = (table_lookup >> 8) & 0x7F; + + CHECK_IF_LPS(u4_code_int_range, + u4_code_int_val_ofst, u4_symbol, + u4_int_range_lps, u1_mps_state, + table_lookup) + + if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_9) + { + + RENORM_RANGE_OFFSET(u4_code_int_range, + u4_code_int_val_ofst, + u4_offset, pu4_buffer) + } + + ps_bin_ctxt->u1_mps_state = u1_mps_state; + } + + u4_value++; + ps_bin_ctxt = p_ctxt_abs_level + (u4_ctx_inc >> 4); + + } + while(u4_symbol && (u4_value < UCOFF_LEVEL)); + + ui_prefix = u4_value - 1 + u4_symbol; + } + + if(ui_prefix == UCOFF_LEVEL) + { + UWORD32 ui16_sufS = 0; + UWORD32 u1_max_bins; + UWORD32 u4_value; + + i2_abs_lvl = UCOFF_LEVEL; + /*inlining ih264d_decode_bypass_bins_unary begins*/ + + { + UWORD32 uc_bin; + UWORD32 bits_to_flush; + UWORD32 max_bits = 32; + + bits_to_flush = 0; + /*renormalize to ensure there 23 bits more in the u4_code_int_val_ofst*/ + { + UWORD32 u4_clz, read_bits; + + u4_clz = CLZ(u4_code_int_range); + FLUSHBITS(u4_offset, u4_clz) + NEXTBITS(read_bits, u4_offset, pu4_buffer, 23) + u4_code_int_range = u4_code_int_range << u4_clz; + u4_code_int_val_ofst = (u4_code_int_val_ofst + << u4_clz) | read_bits; + } + + do + { + bits_to_flush++; + + u4_code_int_range = u4_code_int_range >> 1; + + if(u4_code_int_val_ofst >= u4_code_int_range) + { + /* S=1 */ + uc_bin = 1; + u4_code_int_val_ofst -= u4_code_int_range; + } + else + { + /* S=0 */ + uc_bin = 0; + } + + } + while(uc_bin && (bits_to_flush < max_bits)); + + u4_value = (bits_to_flush - 1); + } + /*inlining ih264d_decode_bypass_bins_unary ends*/ + + ui16_sufS = (1 << u4_value); + u1_max_bins = u4_value; + + if(u4_value > 0) + { + /*inline bypassbins_flc begins*/ + + if(u4_value > 10) + { + UWORD32 u4_clz, read_bits; + + u4_clz = CLZ(u4_code_int_range); + FLUSHBITS(u4_offset, u4_clz) + NEXTBITS(read_bits, u4_offset, pu4_buffer, 23) + u4_code_int_range = u4_code_int_range << u4_clz; + u4_code_int_val_ofst = (u4_code_int_val_ofst + << u4_clz) | read_bits; + } + + { + UWORD32 ui_bins; + UWORD32 uc_bin; + UWORD32 bits_to_flush; + + ui_bins = 0; + bits_to_flush = 0; + + do + { + bits_to_flush++; + + u4_code_int_range = u4_code_int_range >> 1; + + if(u4_code_int_val_ofst + >= u4_code_int_range) + { + /* S=1 */ + uc_bin = 1; + u4_code_int_val_ofst -= + u4_code_int_range; + } + else + { + /* S=0 */ + uc_bin = 0; + } + + ui_bins = ((ui_bins << 1) | uc_bin); + + } + while(bits_to_flush < u1_max_bins); + + u4_value = ui_bins; + } + /*inline bypassbins_flc ends*/ + } + + //Value of K + ui16_sufS += u4_value; + i2_abs_lvl += ui16_sufS; + } + else + { + i2_abs_lvl = 1 + ui_prefix; + } + + if(i2_abs_lvl > 1) + { + u1_abs_level_gt1++; + } + if(!u1_abs_level_gt1) + { + u1_abs_level_equal1++; + u4_ctx_inc = (5 << 4) + MIN(u1_abs_level_equal1, 4); + } + else + { + u4_ctx_inc = (5 + MIN(u1_abs_level_gt1, 4)) << 4; + } + + /*u4_ctx_inc = g_table_temp[u1_abs_level_gt1][u1_abs_level_equal1];*/ + + /* encode coeff_sign_flag[i] */ + + { + u4_code_int_range = u4_code_int_range >> 1; + + if(u4_code_int_val_ofst >= (u4_code_int_range)) + { + /* S=1 */ + u4_code_int_val_ofst -= u4_code_int_range; + i2_abs_lvl = (-i2_abs_lvl); + } + } + + *pi2_coeff_data++ = i2_abs_lvl; + num_sig_coeffs--; + } + } + while(num_sig_coeffs > 0); + } + } + + { + WORD32 offset; + offset = (UWORD8 *)pi2_coeff_data - (UWORD8 *)ps_tu_8x8; + offset = ALIGN4(offset); + ps_dec->pv_parse_tu_coeff_data = (void *)((UWORD8 *)ps_dec->pv_parse_tu_coeff_data + offset); + } + + /*updating structures*/ + ps_cab_env->u4_code_int_val_ofst = u4_code_int_val_ofst; + ps_cab_env->u4_code_int_range = u4_code_int_range; + ps_bitstrm->u4_ofst = u4_offset; +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_cabac_parse_8x8block */ +/* */ +/* Description : This function does the residual parsing of 4 subblocks */ +/* in a 8x8 block. */ +/* */ +/* Inputs : pi2_coeff_block : pointer to residual block where */ +/* decoded and inverse scan coefficients are updated */ +/* */ +/* u4_sub_block_strd : indicates the number of sublocks */ +/* in a row. It is 4 for luma and 2 for chroma. */ +/* */ +/* u4_ctx_cat : inidicates context category for residual */ +/* decoding. */ +/* */ +/* ps_dec : pointer to Decstruct (decoder context) */ +/* */ +/* pu1_top_nnz : top nnz pointer */ +/* */ +/* pu1_left_nnz : left nnz pointer */ +/* */ +/* Globals : No */ +/* Processing : Parsing for four subblocks in unrolled, top and left nnz */ +/* are updated on the fly. csbp is set in accordance to */ +/* decoded numcoeff for the subblock index in raster order */ +/* */ +/* Outputs : The updated residue buffer, nnzs and csbp current block */ +/* */ +/* Returns : Returns the coded sub block pattern csbp for the block */ +/* */ +/* Issues : <List any issues or problems with this function> */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 09 10 2008 Jay Draft */ +/* */ +/*****************************************************************************/ +UWORD32 ih264d_cabac_parse_8x8block(WORD16 *pi2_coeff_block, + UWORD32 u4_sub_block_strd, + UWORD32 u4_ctx_cat, + dec_struct_t * ps_dec, + UWORD8 *pu1_top_nnz, + UWORD8 *pu1_left_nnz) +{ + UWORD32 u4_ctxinc, u4_subblock_coded; + UWORD32 u4_top0, u4_top1; + UWORD32 u4_csbp = 0; + UWORD32 u4_idx = 0; + dec_bit_stream_t * const ps_bitstrm = ps_dec->ps_bitstrm; + bin_ctxt_model_t * const ps_cbf = ps_dec->p_cbf_t[u4_ctx_cat]; + bin_ctxt_model_t *ps_src_bin_ctxt; + bin_ctxt_model_t * const ps_sig_coeff_flag = + ps_dec->p_significant_coeff_flag_t[u4_ctx_cat]; + + UWORD8 *pu1_inv_scan = ps_dec->pu1_inv_scan; + + /*------------------------------------------------------*/ + /* Residual 4x4 decoding: SubBlock 0 */ + /*------------------------------------------------------*/ + u4_ctxinc = ((!!pu1_top_nnz[0]) << 1) + (!!pu1_left_nnz[0]); + + ps_src_bin_ctxt = ps_cbf + u4_ctxinc; + + u4_top0 = ih264d_read_coeff4x4_cabac( ps_bitstrm, + u4_ctx_cat, ps_sig_coeff_flag, ps_dec, + ps_src_bin_ctxt); + + INSERT_BIT(u4_csbp, u4_idx, u4_top0); + + /*------------------------------------------------------*/ + /* Residual 4x4 decoding: SubBlock 1 */ + /*------------------------------------------------------*/ + u4_idx++; + pi2_coeff_block += NUM_COEFFS_IN_4x4BLK; + u4_ctxinc = ((!!pu1_top_nnz[1]) << 1) + u4_top0; + + ps_src_bin_ctxt = ps_cbf + u4_ctxinc; + + u4_top1 = ih264d_read_coeff4x4_cabac(ps_bitstrm, + u4_ctx_cat, ps_sig_coeff_flag, ps_dec, + ps_src_bin_ctxt); + + INSERT_BIT(u4_csbp, u4_idx, u4_top1); + pu1_left_nnz[0] = u4_top1; + + /*------------------------------------------------------*/ + /* Residual 4x4 decoding: SubBlock 2 */ + /*------------------------------------------------------*/ + u4_idx += (u4_sub_block_strd - 1); + pi2_coeff_block += ((u4_sub_block_strd - 1) * NUM_COEFFS_IN_4x4BLK); + u4_ctxinc = (u4_top0 << 1) + (!!pu1_left_nnz[1]); + + ps_src_bin_ctxt = ps_cbf + u4_ctxinc; + + u4_subblock_coded = ih264d_read_coeff4x4_cabac(ps_bitstrm, u4_ctx_cat, + ps_sig_coeff_flag, ps_dec, + ps_src_bin_ctxt); + + INSERT_BIT(u4_csbp, u4_idx, u4_subblock_coded); + pu1_top_nnz[0] = u4_subblock_coded; + + /*------------------------------------------------------*/ + /* Residual 4x4 decoding: SubBlock 3 */ + /*------------------------------------------------------*/ + u4_idx++; + pi2_coeff_block += NUM_COEFFS_IN_4x4BLK; + u4_ctxinc = (u4_top1 << 1) + u4_subblock_coded; + + ps_src_bin_ctxt = ps_cbf + u4_ctxinc; + + u4_subblock_coded = ih264d_read_coeff4x4_cabac(ps_bitstrm, u4_ctx_cat, + ps_sig_coeff_flag, ps_dec, + ps_src_bin_ctxt); + + INSERT_BIT(u4_csbp, u4_idx, u4_subblock_coded); + pu1_top_nnz[1] = pu1_left_nnz[1] = u4_subblock_coded; + + return (u4_csbp); +} + +/*! + ************************************************************************** + * \if Function name : ih264d_parse_residual4x4_cabac \endif + * + * \brief + * This function parses CABAC syntax of a Luma and Chroma AC Residuals. + * + * \return + * 0 on Success and Error code otherwise + ************************************************************************** + */ + +WORD32 ih264d_parse_residual4x4_cabac(dec_struct_t * ps_dec, + dec_mb_info_t *ps_cur_mb_info, + UWORD8 u1_offset) +{ + UWORD8 u1_cbp = ps_cur_mb_info->u1_cbp; + UWORD16 ui16_csbp = 0; + WORD16 *pi2_residual_buf; + UWORD8 uc_ctx_cat; + UWORD8 *pu1_top_nnz = ps_cur_mb_info->ps_curmb->pu1_nnz_y; + UWORD8 *pu1_left_nnz = ps_dec->pu1_left_nnz_y; + UWORD8 *pu1_top_nnz_uv = ps_cur_mb_info->ps_curmb->pu1_nnz_uv; + ctxt_inc_mb_info_t *p_curr_ctxt = ps_dec->ps_curr_ctxt_mb_info; + ctxt_inc_mb_info_t *ps_top_ctxt = ps_dec->p_top_ctxt_mb_info; + dec_bit_stream_t * const ps_bitstrm = ps_dec->ps_bitstrm; + UWORD32 u4_nbr_avail = ps_dec->u1_mb_ngbr_availablity; + WORD16 *pi2_coeff_block = NULL; + bin_ctxt_model_t *ps_src_bin_ctxt; + + UWORD8 u1_top_dc_csbp = (ps_top_ctxt->u1_yuv_dc_csbp) >> 1; + UWORD8 u1_left_dc_csbp = (ps_dec->pu1_left_yuv_dc_csbp[0]) >> 1; + + + if(!(u4_nbr_avail & TOP_MB_AVAILABLE_MASK)) + { + if(p_curr_ctxt->u1_mb_type & CAB_INTRA_MASK) + { + *(UWORD32 *)pu1_top_nnz = 0; + u1_top_dc_csbp = 0; + *(UWORD32 *)pu1_top_nnz_uv = 0; + } + else + { + *(UWORD32 *)pu1_top_nnz = 0x01010101; + u1_top_dc_csbp = 0x3; + *(UWORD32 *)pu1_top_nnz_uv = 0x01010101; + } + } + else + { + UWORD32 *pu4_buf; + UWORD8 *pu1_buf; + pu1_buf = ps_cur_mb_info->ps_top_mb->pu1_nnz_y; + pu4_buf = (UWORD32 *)pu1_buf; + *(UWORD32 *)(pu1_top_nnz) = *pu4_buf; + + pu1_buf = ps_cur_mb_info->ps_top_mb->pu1_nnz_uv; + pu4_buf = (UWORD32 *)pu1_buf; + *(UWORD32 *)(pu1_top_nnz_uv) = *pu4_buf; + + } + + if(!(u4_nbr_avail & LEFT_MB_AVAILABLE_MASK)) + { + if(p_curr_ctxt->u1_mb_type & CAB_INTRA_MASK) + { + UWORD32 *pu4_buf; + UWORD8 *pu1_buf; + *(UWORD32 *)pu1_left_nnz = 0; + u1_left_dc_csbp = 0; + pu1_buf = ps_dec->pu1_left_nnz_uv; + pu4_buf = (UWORD32 *)pu1_buf; + *pu4_buf = 0; + } + else + { + UWORD32 *pu4_buf; + UWORD8 *pu1_buf; + *(UWORD32 *)pu1_left_nnz = 0x01010101; + u1_left_dc_csbp = 0x3; + pu1_buf = ps_dec->pu1_left_nnz_uv; + pu4_buf = (UWORD32 *)pu1_buf; + *pu4_buf = 0x01010101; + } + } + + uc_ctx_cat = u1_offset ? LUMA_AC_CTXCAT : LUMA_4X4_CTXCAT; + + ps_cur_mb_info->u1_qp_div6 = ps_dec->u1_qp_y_div6; + ps_cur_mb_info->u1_qpc_div6 = ps_dec->u1_qp_u_div6; + ps_cur_mb_info->u1_qp_rem6 = ps_dec->u1_qp_y_rem6; + ps_cur_mb_info->u1_qpc_rem6 = ps_dec->u1_qp_u_rem6; + // CHECK_THIS + ps_cur_mb_info->u1_qpcr_div6 = ps_dec->u1_qp_v_div6; + ps_cur_mb_info->u1_qpcr_rem6 = ps_dec->u1_qp_v_rem6; + + if(u1_cbp & 0x0f) + { + if(ps_cur_mb_info->u1_tran_form8x8 == 0) + { + /*******************************************************************/ + /* Block 0 residual decoding, check cbp and proceed (subblock = 0) */ + /*******************************************************************/ + if(!(u1_cbp & 0x1)) + { + *(UWORD16 *)(pu1_top_nnz) = 0; + *(UWORD16 *)(pu1_left_nnz) = 0; + } + else + { + ui16_csbp = ih264d_cabac_parse_8x8block(pi2_coeff_block, 4, + uc_ctx_cat, ps_dec, + pu1_top_nnz, + pu1_left_nnz); + } + + /*******************************************************************/ + /* Block 1 residual decoding, check cbp and proceed (subblock = 2) */ + /*******************************************************************/ + pi2_coeff_block += (2 * NUM_COEFFS_IN_4x4BLK); + if(!(u1_cbp & 0x2)) + { + *(UWORD16 *)(pu1_top_nnz + 2) = 0; + *(UWORD16 *)(pu1_left_nnz) = 0; + } + else + { + UWORD32 u4_temp = ih264d_cabac_parse_8x8block(pi2_coeff_block, + 4, uc_ctx_cat, + ps_dec, + (pu1_top_nnz + 2), + pu1_left_nnz); + ui16_csbp |= (u4_temp << 2); + } + + /*******************************************************************/ + /* Block 2 residual decoding, check cbp and proceed (subblock = 8) */ + /*******************************************************************/ + pi2_coeff_block += (6 * NUM_COEFFS_IN_4x4BLK); + if(!(u1_cbp & 0x4)) + { + *(UWORD16 *)(pu1_top_nnz) = 0; + *(UWORD16 *)(pu1_left_nnz + 2) = 0; + } + else + { + UWORD32 u4_temp = ih264d_cabac_parse_8x8block( + pi2_coeff_block, 4, uc_ctx_cat, ps_dec, + pu1_top_nnz, (pu1_left_nnz + 2)); + ui16_csbp |= (u4_temp << 8); + } + + /*******************************************************************/ + /* Block 3 residual decoding, check cbp and proceed (subblock = 10)*/ + /*******************************************************************/ + pi2_coeff_block += (2 * NUM_COEFFS_IN_4x4BLK); + if(!(u1_cbp & 0x8)) + { + *(UWORD16 *)(pu1_top_nnz + 2) = 0; + *(UWORD16 *)(pu1_left_nnz + 2) = 0; + } + else + { + UWORD32 u4_temp = ih264d_cabac_parse_8x8block( + pi2_coeff_block, 4, uc_ctx_cat, ps_dec, + (pu1_top_nnz + 2), (pu1_left_nnz + 2)); + ui16_csbp |= (u4_temp << 10); + } + + } + else + { + ui16_csbp = 0; + + /*******************************************************************/ + /* Block 0 residual decoding, check cbp and proceed (subblock = 0) */ + /*******************************************************************/ + if(!(u1_cbp & 0x1)) + { + *(UWORD16 *)(pu1_top_nnz) = 0; + *(UWORD16 *)(pu1_left_nnz) = 0; + } + else + { + + dec_bit_stream_t * const ps_bitstrm = ps_dec->ps_bitstrm; + + ih264d_read_coeff8x8_cabac( ps_bitstrm, + ps_dec, ps_cur_mb_info); + + pu1_left_nnz[0] = 1; + pu1_left_nnz[1] = 1; + + pu1_top_nnz[0] = 1; + pu1_top_nnz[1] = 1; + + /* added to be used by BS computation module */ + ui16_csbp |= 0x0033; + } + + /*******************************************************************/ + /* Block 1 residual decoding, check cbp and proceed (subblock = 2) */ + /*******************************************************************/ + pi2_coeff_block += 64; + + if(!(u1_cbp & 0x2)) + { + *(UWORD16 *)(pu1_top_nnz + 2) = 0; + *(UWORD16 *)(pu1_left_nnz) = 0; + } + else + { + + + dec_bit_stream_t * const ps_bitstrm = ps_dec->ps_bitstrm; + + ih264d_read_coeff8x8_cabac(ps_bitstrm, + ps_dec, ps_cur_mb_info); + + pu1_left_nnz[0] = 1; + pu1_left_nnz[1] = 1; + + pu1_top_nnz[2] = 1; + pu1_top_nnz[3] = 1; + + /* added to be used by BS computation module */ + ui16_csbp |= 0x00CC; + + } + + /*******************************************************************/ + /* Block 2 residual decoding, check cbp and proceed (subblock = 8) */ + /*******************************************************************/ + pi2_coeff_block += 64; + if(!(u1_cbp & 0x4)) + { + *(UWORD16 *)(pu1_top_nnz) = 0; + *(UWORD16 *)(pu1_left_nnz + 2) = 0; + } + else + { + + dec_bit_stream_t * const ps_bitstrm = ps_dec->ps_bitstrm; + + ih264d_read_coeff8x8_cabac(ps_bitstrm, + ps_dec, ps_cur_mb_info); + + pu1_left_nnz[2] = 1; + pu1_left_nnz[3] = 1; + + pu1_top_nnz[0] = 1; + pu1_top_nnz[1] = 1; + + /* added to be used by BS computation module */ + ui16_csbp |= 0x3300; + } + + /*******************************************************************/ + /* Block 3 residual decoding, check cbp and proceed (subblock = 10)*/ + /*******************************************************************/ + pi2_coeff_block += 64; + + if(!(u1_cbp & 0x8)) + { + *(UWORD16 *)(pu1_top_nnz + 2) = 0; + *(UWORD16 *)(pu1_left_nnz + 2) = 0; + } + else + { + + dec_bit_stream_t * const ps_bitstrm = ps_dec->ps_bitstrm; + + ih264d_read_coeff8x8_cabac(ps_bitstrm, + ps_dec, ps_cur_mb_info); + + pu1_left_nnz[2] = 1; + pu1_left_nnz[3] = 1; + + pu1_top_nnz[2] = 1; + pu1_top_nnz[3] = 1; + + /* added to be used by BS computation module */ + ui16_csbp |= 0xCC00; + } + } + } + else + { + *(UWORD32 *)(pu1_top_nnz) = 0; + *(UWORD32 *)(pu1_left_nnz) = 0; + } + /*--------------------------------------------------------------------*/ + /* Store the last row of N values to top row */ + /*--------------------------------------------------------------------*/ + ps_cur_mb_info->u2_luma_csbp = ui16_csbp; + ps_cur_mb_info->ps_curmb->u2_luma_csbp = ui16_csbp; + { + WORD8 i; + UWORD16 u2_chroma_csbp = 0; + ps_cur_mb_info->u2_chroma_csbp = 0; + + u1_cbp >>= 4; + pu1_top_nnz = pu1_top_nnz_uv; + pu1_left_nnz = ps_dec->pu1_left_nnz_uv; + /*--------------------------------------------------------------------*/ + /* if Chroma Component not present OR no ac values present */ + /* Set the values of N to zero */ + /*--------------------------------------------------------------------*/ + if(u1_cbp == CBPC_ALLZERO) + { + ps_dec->pu1_left_yuv_dc_csbp[0] &= 0x1; + *(UWORD32 *)(pu1_top_nnz) = 0; + *(UWORD32 *)(pu1_left_nnz) = 0; + p_curr_ctxt->u1_yuv_dc_csbp &= 0x1; + return (0); + } + + /*--------------------------------------------------------------------*/ + /* Decode Chroma DC values */ + /*--------------------------------------------------------------------*/ + for(i = 0; i < 2; i++) + { + UWORD8 uc_a = 1, uc_b = 1; + UWORD32 u4_ctx_inc; + UWORD8 uc_codedBlockFlag; + UWORD8 pu1_inv_scan[4] = + { 0, 1, 2, 3 }; + WORD32 u4_scale; + WORD32 i4_mb_inter_inc; + tu_sblk4x4_coeff_data_t *ps_tu_4x4 = + (tu_sblk4x4_coeff_data_t *)ps_dec->pv_parse_tu_coeff_data; + WORD16 *pi2_coeff_data = + (WORD16 *)ps_dec->pv_parse_tu_coeff_data; + WORD16 ai2_dc_coef[4]; + + INC_SYM_COUNT(&(ps_dec->s_cab_dec_env)); + u4_scale = (i) ? + (ps_dec->pu2_quant_scale_v[0] + << ps_dec->u1_qp_v_div6) : + (ps_dec->pu2_quant_scale_u[0] + << ps_dec->u1_qp_u_div6); + + /*--------------------------------------------------------------------*/ + /* Decode Bitstream to get the DC coeff */ + /*--------------------------------------------------------------------*/ + uc_a = (u1_left_dc_csbp >> i) & 0x01; + uc_b = (u1_top_dc_csbp >> i) & 0x01; + u4_ctx_inc = (uc_a + (uc_b << 1)); + + ps_src_bin_ctxt = (ps_dec->p_cbf_t[CHROMA_DC_CTXCAT]) + u4_ctx_inc; + + uc_codedBlockFlag = + ih264d_read_coeff4x4_cabac(ps_bitstrm, + CHROMA_DC_CTXCAT, + ps_dec->p_significant_coeff_flag_t[CHROMA_DC_CTXCAT], + ps_dec, ps_src_bin_ctxt); + + i4_mb_inter_inc = (!((ps_cur_mb_info->ps_curmb->u1_mb_type == I_4x4_MB) + || (ps_cur_mb_info->ps_curmb->u1_mb_type == I_16x16_MB))) + * 3; + + if(ps_dec->s_high_profile.u1_scaling_present) + { + u4_scale *= + ps_dec->s_high_profile.i2_scalinglist4x4[i4_mb_inter_inc + + 1 + i][0]; + + } + else + { + u4_scale <<= 4; + } + + if(uc_codedBlockFlag) + { + WORD32 i_z0, i_z1, i_z2, i_z3; + WORD32 *pi4_scale; + + SET_BIT(u1_top_dc_csbp, i); + SET_BIT(u1_left_dc_csbp, i); + + ai2_dc_coef[0] = 0; + ai2_dc_coef[1] = 0; + ai2_dc_coef[2] = 0; + ai2_dc_coef[3] = 0; + + ih264d_unpack_coeff4x4_dc_4x4blk(ps_tu_4x4, + ai2_dc_coef, + pu1_inv_scan); + i_z0 = (ai2_dc_coef[0] + ai2_dc_coef[2]); + i_z1 = (ai2_dc_coef[0] - ai2_dc_coef[2]); + i_z2 = (ai2_dc_coef[1] - ai2_dc_coef[3]); + i_z3 = (ai2_dc_coef[1] + ai2_dc_coef[3]); + + /*-----------------------------------------------------------*/ + /* Scaling and storing the values back */ + /*-----------------------------------------------------------*/ + *pi2_coeff_data++ = ((i_z0 + i_z3) * u4_scale) >> 5; + *pi2_coeff_data++ = ((i_z0 - i_z3) * u4_scale) >> 5; + *pi2_coeff_data++ = ((i_z1 + i_z2) * u4_scale) >> 5; + *pi2_coeff_data++ = ((i_z1 - i_z2) * u4_scale) >> 5; + + ps_dec->pv_parse_tu_coeff_data = (void *)pi2_coeff_data; + + SET_BIT(ps_cur_mb_info->u1_yuv_dc_block_flag,(i+1)); + } + else + { + CLEARBIT(u1_top_dc_csbp, i); + CLEARBIT(u1_left_dc_csbp, i); + } + } + + /*********************************************************************/ + /* Update the DC csbp */ + /*********************************************************************/ + ps_dec->pu1_left_yuv_dc_csbp[0] &= 0x1; + p_curr_ctxt->u1_yuv_dc_csbp &= 0x1; + ps_dec->pu1_left_yuv_dc_csbp[0] |= (u1_left_dc_csbp << 1); + p_curr_ctxt->u1_yuv_dc_csbp |= (u1_top_dc_csbp << 1); + if(u1_cbp == CBPC_ACZERO) + { + *(UWORD32 *)(pu1_top_nnz) = 0; + *(UWORD32 *)(pu1_left_nnz) = 0; + return (0); + } + /*--------------------------------------------------------------------*/ + /* Decode Chroma AC values */ + /*--------------------------------------------------------------------*/ + { + UWORD32 u4_temp; + /*****************************************************************/ + /* U Block residual decoding, check cbp and proceed (subblock=0)*/ + /*****************************************************************/ + u2_chroma_csbp = ih264d_cabac_parse_8x8block(pi2_coeff_block, 2, + CHROMA_AC_CTXCAT, + ps_dec, pu1_top_nnz, + pu1_left_nnz); + + pi2_coeff_block += MB_CHROM_SIZE; + /*****************************************************************/ + /* V Block residual decoding, check cbp and proceed (subblock=1)*/ + /*****************************************************************/ + u4_temp = ih264d_cabac_parse_8x8block(pi2_coeff_block, 2, + CHROMA_AC_CTXCAT, + ps_dec, (pu1_top_nnz + 2), + (pu1_left_nnz + 2)); + u2_chroma_csbp |= (u4_temp << 4); + } + /*********************************************************************/ + /* Update the AC csbp */ + /*********************************************************************/ + ps_cur_mb_info->u2_chroma_csbp = u2_chroma_csbp; + } + + return (0); +} + diff --git a/decoder/ih264d_parse_cabac.h b/decoder/ih264d_parse_cabac.h new file mode 100755 index 0000000..eb66e8c --- /dev/null +++ b/decoder/ih264d_parse_cabac.h @@ -0,0 +1,60 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/*! + *************************************************************************** + * \file ih264d_parse_cabac.h + * + * \brief + * This file contains cabac Residual decoding routines. + * + * \date + * 20/03/2003 + * + * \author NS + *************************************************************************** + */ +#ifndef _IH264D_PARSE_CABAC_H_ +#define _IH264D_PARSE_CABAC_H_ + +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" + +#define UCOFF_LEVEL 14 + + +UWORD8 ih264d_read_coeff4x4_cabac(dec_bit_stream_t *ps_bitstrm, + UWORD32 u4_ctxcat, + bin_ctxt_model_t *ps_ctxt_sig_coeff, + dec_struct_t *ps_dec, + bin_ctxt_model_t *ps_ctxt_coded); + +void ih264d_read_coeff8x8_cabac(dec_bit_stream_t *ps_bitstrm, + dec_struct_t *ps_dec, + dec_mb_info_t *ps_cur_mb_info); + +UWORD32 cabac_parse_8x8block_transform8x8_set(WORD16 *pi2_coeff_block, + dec_struct_t * ps_dec, + UWORD8 *pu1_top_nnz, + UWORD8 *pu1_left_nnz, + dec_mb_info_t *ps_cur_mb_info); + +#endif /* _IH264D_PARSE_CABAC_H_ */ diff --git a/decoder/ih264d_parse_cavlc.c b/decoder/ih264d_parse_cavlc.c new file mode 100755 index 0000000..a3f345c --- /dev/null +++ b/decoder/ih264d_parse_cavlc.c @@ -0,0 +1,2694 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*! + *************************************************************************** + * \file ih264d_parse_cavlc.c + * + * \brief + * This file contains UVLC related functions. + * + * \date + * 20/11/2002 + * + * \author NS + *************************************************************************** + */ + +#include <string.h> +#include <stdio.h> + +#include "ih264d_bitstrm.h" +#include "ih264d_parse_cavlc.h" +#include "ih264d_error_handler.h" +#include "ih264d_defs.h" +#include "ih264d_debug.h" +#include "ih264d_cabac.h" +#include "ih264d_structs.h" +#include "ih264d_tables.h" +#include "ih264d_tables.h" +#include "ih264d_mb_utils.h" + +void ih264d_unpack_coeff4x4_dc_4x4blk(tu_sblk4x4_coeff_data_t *ps_tu_4x4, + WORD16 *pi2_out_coeff_data, + UWORD8 *pu1_inv_scan); + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_uev */ +/* */ +/* Description : Reads the unsigned Exp Golomb codec syntax from the */ +/* ps_bitstrm as specified in section 9.1 of H264 standard */ +/* It also increases bitstream u4_ofst by the number of bits */ +/* parsed for UEV decode operation */ +/* */ +/* Inputs : bitstream base pointer and bitsream u4_ofst in bits */ +/* Globals : None */ +/* Processing : */ +/* Outputs : UEV decoded syntax element and incremented ps_bitstrm u4_ofst */ +/* Returns : UEV decoded syntax element */ +/* */ +/* Issues : Does not check if ps_bitstrm u4_ofst exceeds max ps_bitstrm i4_size */ +/* for performamce. Caller might have to do error resilence */ +/* check for bitstream overflow */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 19 09 2008 Jay Draft */ +/* */ +/*****************************************************************************/ +UWORD32 ih264d_uev(UWORD32 *pu4_bitstrm_ofst, UWORD32 *pu4_bitstrm_buf) +{ + UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst; + UWORD32 u4_word, u4_ldz; + + /***************************************************************/ + /* Find leading zeros in next 32 bits */ + /***************************************************************/ + NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf); + u4_ldz = CLZ(u4_word); + /* Flush the ps_bitstrm */ + u4_bitstream_offset += (u4_ldz + 1); + /* Read the suffix from the ps_bitstrm */ + u4_word = 0; + if(u4_ldz) + GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, u4_ldz); + *pu4_bitstrm_ofst = u4_bitstream_offset; + return ((1 << u4_ldz) + u4_word - 1); +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_sev */ +/* */ +/* Description : Reads the signed Exp Golomb codec syntax from the ps_bitstrm */ +/* as specified in section 9.1 of H264 standard. */ +/* It also increases bitstream u4_ofst by the number of bits */ +/* parsed for SEV decode operation */ +/* */ +/* Inputs : bitstream base pointer and bitsream u4_ofst in bits */ +/* Globals : None */ +/* Processing : */ +/* Outputs : SEV decoded syntax element and incremented ps_bitstrm u4_ofst */ +/* Returns : SEV decoded syntax element */ +/* */ +/* Issues : Does not check if ps_bitstrm u4_ofst exceeds max ps_bitstrm i4_size */ +/* for performamce. Caller might have to do error resilence */ +/* check for bitstream overflow */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 19 09 2008 Jay Draft */ +/* */ +/*****************************************************************************/ +WORD32 ih264d_sev(UWORD32 *pu4_bitstrm_ofst, UWORD32 *pu4_bitstrm_buf) +{ + UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst; + UWORD32 u4_word, u4_ldz, u4_abs_val; + + /***************************************************************/ + /* Find leading zeros in next 32 bits */ + /***************************************************************/ + NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf); + u4_ldz = CLZ(u4_word); + + /* Flush the ps_bitstrm */ + u4_bitstream_offset += (u4_ldz + 1); + + /* Read the suffix from the ps_bitstrm */ + u4_word = 0; + if(u4_ldz) + GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, u4_ldz); + + *pu4_bitstrm_ofst = u4_bitstream_offset; + u4_abs_val = ((1 << u4_ldz) + u4_word) >> 1; + + if(u4_word & 0x1) + return (-(WORD32)u4_abs_val); + else + return (u4_abs_val); +} + +/*****************************************************************************/ +/* */ +/* Function Name : get_tev_range_1 */ +/* */ +/* Description : Reads the TEV Exp Golomb codec syntax from the ps_bitstrm */ +/* as specified in section 9.1 of H264 standard. This will */ +/* called only when the input range is 1 for TEV decode. */ +/* If range is more than 1, then UEV decode is done */ +/* */ +/* Inputs : bitstream base pointer and bitsream u4_ofst in bits */ +/* Globals : None */ +/* Processing : */ +/* Outputs : TEV decoded syntax element and incremented ps_bitstrm u4_ofst */ +/* Returns : TEV decoded syntax element */ +/* */ +/* Issues : Does not check if ps_bitstrm u4_ofst exceeds max ps_bitstrm i4_size */ +/* for performamce. Caller might have to do error resilence */ +/* check for bitstream overflow */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 19 09 2008 Jay Draft */ +/* */ +/*****************************************************************************/ +UWORD32 ih264d_tev_range1(UWORD32 *pu4_bitstrm_ofst, UWORD32 *pu4_bitstrm_buf) +{ + UWORD32 u4_code; + GETBIT(u4_code, *pu4_bitstrm_ofst, pu4_bitstrm_buf); + return (!u4_code); +} + +/*! + ************************************************************************** + * \if Function name : ih264d_uvlc \endif + * + * \brief + * + * Reads the unsigned/signed/truncated integer Exp-Golomb-coded syntax element + * with the left bit first. The parsing process for this descriptor is specified + * in subclause 9.1. + * + * \param ps_bitstrm : Pointer to Bitstream Structure . + * \param u4_range : Range value in case of Truncated Exp-Golomb-code + * \param pi_bitstrm_ofst : Pointer to the local copy of Bitstream u4_ofst + * \param u1_flag : Flag indicating the case of UEV,SEV or TEV + * \param u4_bitstrm_ofst : Local copy of Bitstream u4_ofst + * \param pu4_bitstrm_buf : Pointer to the Bitstream buffer + * + * \return + * Returns Code Value. + * + ************************************************************************** + */ + +WORD32 ih264d_uvlc(dec_bit_stream_t *ps_bitstrm, + UWORD32 u4_range, + UWORD32 *pi_bitstrm_ofst, + UWORD8 u1_flag, + UWORD32 u4_bitstrm_ofst, + UWORD32 *pu4_bitstrm_buf) +{ + UWORD32 word, word2, cur_bit, cur_word, code_val, code_num, clz; + + SWITCHOFFTRACE; + cur_bit = u4_bitstrm_ofst & 0x1F; + cur_word = u4_bitstrm_ofst >> 5; + word = pu4_bitstrm_buf[cur_word]; + word2 = pu4_bitstrm_buf[cur_word + 1]; + + if(cur_bit != 0) + { + word <<= cur_bit; + word2 >>= (32 - cur_bit); + word |= word2; + } + + if(u1_flag == TEV && u4_range == 1) + { + word >>= 31; + word = 1 - word; + (*pi_bitstrm_ofst)++; + ps_bitstrm->u4_ofst = *pi_bitstrm_ofst; + return (WORD32)word; + } + + //finding clz + { + UWORD32 ui32_code, ui32_mask; + + ui32_code = word; + ui32_mask = 0x80000000; + clz = 0; + + /* DSP implements this with LMBD instruction */ + /* so there we don't need to break the loop */ + while(!(ui32_code & ui32_mask)) + { + clz++; + ui32_mask >>= 1; + if(0 == ui32_mask) + break; + } + } + + if(clz == 0) + { + *pi_bitstrm_ofst = *pi_bitstrm_ofst + (2 * clz) + 1; + ps_bitstrm->u4_ofst = *pi_bitstrm_ofst; + return 0; + } + + word <<= (clz + 1); + word >>= (32 - clz); + code_num = (1 << clz) + word - 1; + *pi_bitstrm_ofst = *pi_bitstrm_ofst + (2 * clz) + 1; + ps_bitstrm->u4_ofst = *pi_bitstrm_ofst; + + if(u1_flag == TEV || u1_flag == UEV) + return (WORD32)code_num; + + code_val = (code_num + 1) >> 1; + if(!(code_num & 0x01)) + return -((WORD32)code_val); + return (WORD32)code_val; + +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_cavlc_4x4res_block_totalcoeff_1 */ +/* */ +/* Description : This function does cavlc decoding of 4x4 block residual */ +/* coefficient when total coeff is equal to 1. The parsing */ +/* is done as defined in section 9.2.2 and 9.2.3 of the */ +/* H264 standard. */ +/* */ +/* Inputs : <What inputs does the function take?> */ +/* Globals : <Does it use any global variables?> */ +/* Processing : <Describe how the function operates - include algorithm */ +/* description> */ +/* Outputs : <What does the function produce?> */ +/* Returns : <What does the function return?> */ +/* */ +/* Issues : <List any issues or problems with this function> */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 25 09 2008 Jay Draft */ +/* */ +/*****************************************************************************/ +WORD32 ih264d_cavlc_4x4res_block_totalcoeff_1(UWORD32 u4_isdc, + UWORD32 u4_total_coeff_trail_one, + dec_bit_stream_t *ps_bitstrm) +{ + + UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer; + UWORD32 u4_bitstream_offset = ps_bitstrm->u4_ofst; + UWORD32 u4_trailing_ones = u4_total_coeff_trail_one & 0xFFFF; + WORD32 i2_level; + UWORD32 u4_tot_zero, u4_ldz, u4_scan_pos; + + tu_sblk4x4_coeff_data_t *ps_tu_4x4; + WORD16 *pi2_coeff_data; + dec_struct_t *ps_dec = (dec_struct_t *)ps_bitstrm->pv_codec_handle; + + ps_tu_4x4 = (tu_sblk4x4_coeff_data_t *)ps_dec->pv_parse_tu_coeff_data; + ps_tu_4x4->u2_sig_coeff_map = 0; + pi2_coeff_data = &ps_tu_4x4->ai2_level[0]; + + + if(u4_trailing_ones) + { + UWORD32 u4_sign; + /****************************************************************/ + /* Decode Trailing One as in section 9.2.2 */ + /****************************************************************/ + GETBIT(u4_sign, u4_bitstream_offset, pu4_bitstrm_buf); + i2_level = u4_sign ? -1 : 1; + } + else + { + /****************************************************************/ + /* Decoding Level based on prefix and suffix as in 9.2.2 */ + /****************************************************************/ + UWORD32 u4_lev_suffix, u4_lev_suffix_size; + WORD32 u2_lev_code, u2_abs_value; + UWORD32 u4_lev_prefix; + /***************************************************************/ + /* Find leading zeros in next 32 bits */ + /***************************************************************/ + FIND_ONE_IN_STREAM_32(u4_lev_prefix, u4_bitstream_offset, + pu4_bitstrm_buf); + u2_lev_code = (2 + MIN(u4_lev_prefix, 15)); + + if(14 == u4_lev_prefix) + u4_lev_suffix_size = 4; + else if(15 <= u4_lev_prefix) + { + u2_lev_code += 15; + u4_lev_suffix_size = u4_lev_prefix - 3; + } + else + u4_lev_suffix_size = 0; + + //HP_LEVEL_PREFIX + if(16 <= u4_lev_prefix) + { + u2_lev_code += ((1 << (u4_lev_prefix - 3)) - 4096); + } + if(u4_lev_suffix_size) + { + GETBITS(u4_lev_suffix, u4_bitstream_offset, pu4_bitstrm_buf, + u4_lev_suffix_size); + u2_lev_code += u4_lev_suffix; + } + + u2_abs_value = (u2_lev_code + 2) >> 1; + /*********************************************************/ + /* If Level code is odd, level is negative else positive */ + /*********************************************************/ + i2_level = (u2_lev_code & 1) ? -u2_abs_value : u2_abs_value; + + } + + /****************************************************************/ + /* Decoding total zeros as in section 9.2.3, table 9.7 */ + /****************************************************************/ + FIND_ONE_IN_STREAM_LEN(u4_ldz, u4_bitstream_offset, pu4_bitstrm_buf, 8); + + if(u4_ldz) + { + GETBIT(u4_tot_zero, u4_bitstream_offset, pu4_bitstrm_buf); + u4_tot_zero = (u4_ldz << 1) - u4_tot_zero; + } + else + u4_tot_zero = 0; + + /***********************************************************************/ + /* Inverse scan and store residual coeff. Update the bitstream u4_ofst */ + /***********************************************************************/ + u4_scan_pos = u4_tot_zero + u4_isdc; + if(u4_scan_pos > 15) + return -1; + + SET_BIT(ps_tu_4x4->u2_sig_coeff_map, u4_scan_pos); + *pi2_coeff_data++ = i2_level; + + + { + WORD32 offset; + offset = (UWORD8 *)pi2_coeff_data - (UWORD8 *)ps_tu_4x4; + offset = ALIGN4(offset); + ps_dec->pv_parse_tu_coeff_data = (void *)((UWORD8 *)ps_dec->pv_parse_tu_coeff_data + offset); + } + + ps_bitstrm->u4_ofst = u4_bitstream_offset; + return 0; +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_cavlc_4x4res_block_totalcoeff_2to10 */ +/* */ +/* Description : This function does cavlc decoding of 4x4 block residual */ +/* coefficient when total coeffs are between two and ten */ +/* inclusive. Parsing is done as defined in section 9.2.2 */ +/* and 9.2.3 the H264 standard. */ +/* */ +/* Inputs : <What inputs does the function take?> */ +/* Globals : <Does it use any global variables?> */ +/* Processing : <Describe how the function operates - include algorithm */ +/* description> */ +/* Outputs : <What does the function produce?> */ +/* Returns : <What does the function return?> */ +/* */ +/* Issues : <List any issues or problems with this function> */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 25 09 2008 Jay Draft */ +/* */ +/*****************************************************************************/ + +WORD32 ih264d_cavlc_4x4res_block_totalcoeff_2to10(UWORD32 u4_isdc, + UWORD32 u4_total_coeff_trail_one, /*!<TotalCoefficients<<16+trailingones*/ + dec_bit_stream_t *ps_bitstrm) +{ + UWORD32 u4_total_zeroes; + WORD32 i; + UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer; + UWORD32 u4_bitstream_offset = ps_bitstrm->u4_ofst; + UWORD32 u4_trailing_ones = u4_total_coeff_trail_one & 0xFFFF; + UWORD32 u4_total_coeff = u4_total_coeff_trail_one >> 16; + WORD16 i2_level_arr[16]; + + tu_sblk4x4_coeff_data_t *ps_tu_4x4; + WORD16 *pi2_coeff_data; + dec_struct_t *ps_dec = (dec_struct_t *)ps_bitstrm->pv_codec_handle; + + ps_tu_4x4 = (tu_sblk4x4_coeff_data_t *)ps_dec->pv_parse_tu_coeff_data; + ps_tu_4x4->u2_sig_coeff_map = 0; + pi2_coeff_data = &ps_tu_4x4->ai2_level[0]; + + i = u4_total_coeff - 1; + + if(u4_trailing_ones) + { + /*********************************************************************/ + /* Decode Trailing Ones */ + /* read the sign of T1's and put them in level array */ + /*********************************************************************/ + UWORD32 u4_signs, u4_cnt = u4_trailing_ones; + WORD16 (*ppi2_trlone_lkup)[3] = + (WORD16 (*)[3])gai2_ih264d_trailing_one_level; + WORD16 *pi2_trlone_lkup; + + GETBITS(u4_signs, u4_bitstream_offset, pu4_bitstrm_buf, u4_cnt); + + pi2_trlone_lkup = ppi2_trlone_lkup[(1 << u4_cnt) - 2 + u4_signs]; + + while(u4_cnt--) + i2_level_arr[i--] = *pi2_trlone_lkup++; + } + + /****************************************************************/ + /* Decoding Levels Begins */ + /****************************************************************/ + if(i >= 0) + { + /****************************************************************/ + /* First level is decoded outside the loop as it has lot of */ + /* special cases. */ + /****************************************************************/ + UWORD32 u4_lev_suffix, u4_suffix_len, u4_lev_suffix_size; + WORD32 u2_lev_code, u2_abs_value; + UWORD32 u4_lev_prefix; + + /***************************************************************/ + /* u4_suffix_len = 0, Find leading zeros in next 32 bits */ + /***************************************************************/ + FIND_ONE_IN_STREAM_32(u4_lev_prefix, u4_bitstream_offset, + pu4_bitstrm_buf); + + /*********************************************************/ + /* Special decoding case when trailing ones are 3 */ + /*********************************************************/ + u2_lev_code = MIN(15, u4_lev_prefix); + + u2_lev_code += (3 == u4_trailing_ones) ? 0 : 2; + + if(14 == u4_lev_prefix) + u4_lev_suffix_size = 4; + else if(15 <= u4_lev_prefix) + { + u2_lev_code += 15; + u4_lev_suffix_size = u4_lev_prefix - 3; + } + else + u4_lev_suffix_size = 0; + + //HP_LEVEL_PREFIX + if(16 <= u4_lev_prefix) + { + u2_lev_code += ((1 << (u4_lev_prefix - 3)) - 4096); + } + if(u4_lev_suffix_size) + { + GETBITS(u4_lev_suffix, u4_bitstream_offset, pu4_bitstrm_buf, + u4_lev_suffix_size); + u2_lev_code += u4_lev_suffix; + } + + u2_abs_value = (u2_lev_code + 2) >> 1; + /*********************************************************/ + /* If Level code is odd, level is negative else positive */ + /*********************************************************/ + i2_level_arr[i--] = (u2_lev_code & 1) ? -u2_abs_value : u2_abs_value; + + u4_suffix_len = (u2_abs_value > 3) ? 2 : 1; + + /*********************************************************/ + /* Now loop over the remaining levels */ + /*********************************************************/ + while(i >= 0) + { + + /***************************************************************/ + /* Find leading zeros in next 32 bits */ + /***************************************************************/ + FIND_ONE_IN_STREAM_32(u4_lev_prefix, u4_bitstream_offset, + pu4_bitstrm_buf); + + u4_lev_suffix_size = + (15 <= u4_lev_prefix) ? + (u4_lev_prefix - 3) : u4_suffix_len; + + /*********************************************************/ + /* Compute level code using prefix and suffix */ + /*********************************************************/ + GETBITS(u4_lev_suffix, u4_bitstream_offset, pu4_bitstrm_buf, + u4_lev_suffix_size); + u2_lev_code = (MIN(15,u4_lev_prefix) << u4_suffix_len) + + u4_lev_suffix; + + //HP_LEVEL_PREFIX + if(16 <= u4_lev_prefix) + { + u2_lev_code += ((1 << (u4_lev_prefix - 3)) - 4096); + } + u2_abs_value = (u2_lev_code + 2) >> 1; + + /*********************************************************/ + /* If Level code is odd, level is negative else positive */ + /*********************************************************/ + i2_level_arr[i--] = + (u2_lev_code & 1) ? -u2_abs_value : u2_abs_value; + + /*********************************************************/ + /* Increment suffix length if required */ + /*********************************************************/ + u4_suffix_len += + (u4_suffix_len < 6) ? + (u2_abs_value + > (3 + << (u4_suffix_len + - 1))) : + 0; + } + + /****************************************************************/ + /* Decoding Levels Ends */ + /****************************************************************/ + } + + /****************************************************************/ + /* Decoding total zeros as in section 9.2.3, table 9.7 */ + /****************************************************************/ + { + UWORD32 u4_index; + const UWORD8 (*ppu1_total_zero_lkup)[64] = + (const UWORD8 (*)[64])gau1_ih264d_table_total_zero_2to10; + + NEXTBITS(u4_index, u4_bitstream_offset, pu4_bitstrm_buf, 6); + u4_total_zeroes = ppu1_total_zero_lkup[u4_total_coeff - 2][u4_index]; + + FLUSHBITS(u4_bitstream_offset, (u4_total_zeroes >> 4)); + u4_total_zeroes &= 0xf; + } + + /**************************************************************/ + /* Decode the runs and form the coefficient buffer */ + /**************************************************************/ + { + const UWORD8 *pu1_table_runbefore; + UWORD32 u4_run; + WORD32 k; + UWORD32 u4_scan_pos = u4_total_coeff + u4_total_zeroes - 1 + u4_isdc; + WORD32 u4_zeroes_left = u4_total_zeroes; + k = u4_total_coeff - 1; + + /**************************************************************/ + /* Decoding Runs Begin for zeros left > 6 */ + /**************************************************************/ + while((u4_zeroes_left > 6) && k) + { + UWORD32 u4_code; + + NEXTBITS(u4_code, u4_bitstream_offset, pu4_bitstrm_buf, 3); + + if(u4_code != 0) + { + FLUSHBITS(u4_bitstream_offset, 3); + u4_run = (7 - u4_code); + } + else + { + + FIND_ONE_IN_STREAM_LEN(u4_code, u4_bitstream_offset, + pu4_bitstrm_buf, 11); + u4_run = (4 + u4_code); + } + + SET_BIT(ps_tu_4x4->u2_sig_coeff_map, u4_scan_pos); + *pi2_coeff_data++ = i2_level_arr[k--]; + u4_zeroes_left -= u4_run; + u4_scan_pos -= (u4_run + 1); + } + + /**************************************************************/ + /* Decoding Runs for 0 < zeros left <=6 */ + /**************************************************************/ + pu1_table_runbefore = (UWORD8 *)gau1_ih264d_table_run_before; + while((u4_zeroes_left > 0) && k) + { + UWORD32 u4_code; + NEXTBITS(u4_code, u4_bitstream_offset, pu4_bitstrm_buf, 3); + + u4_code = pu1_table_runbefore[u4_code + (u4_zeroes_left << 3)]; + u4_run = u4_code >> 2; + + FLUSHBITS(u4_bitstream_offset, (u4_code & 0x03)); + + SET_BIT(ps_tu_4x4->u2_sig_coeff_map, u4_scan_pos); + *pi2_coeff_data++ = i2_level_arr[k--]; + u4_zeroes_left -= u4_run; + u4_scan_pos -= (u4_run + 1); + } + /**************************************************************/ + /* Decoding Runs End */ + /**************************************************************/ + + /**************************************************************/ + /* Copy the remaining coefficients */ + /**************************************************************/ + if(u4_zeroes_left < 0) + return -1; + while(k >= 0) + { + + SET_BIT(ps_tu_4x4->u2_sig_coeff_map, u4_scan_pos); + *pi2_coeff_data++ = i2_level_arr[k--]; + u4_scan_pos--; + } + } + + { + WORD32 offset; + offset = (UWORD8 *)pi2_coeff_data - (UWORD8 *)ps_tu_4x4; + offset = ALIGN4(offset); + ps_dec->pv_parse_tu_coeff_data = (void *)((UWORD8 *)ps_dec->pv_parse_tu_coeff_data + offset); + } + + ps_bitstrm->u4_ofst = u4_bitstream_offset; + return 0; +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_cavlc_4x4res_block_totalcoeff_11to16 */ +/* */ +/* Description : This function does cavlc decoding of 4x4 block residual */ +/* coefficient when total coeffs are greater than ten. */ +/* Parsing is done as defined in section 9.2.2 and 9.2.3 of */ +/* the H264 standard. */ +/* */ +/* Inputs : <What inputs does the function take?> */ +/* Globals : <Does it use any global variables?> */ +/* Processing : <Describe how the function operates - include algorithm */ +/* description> */ +/* Outputs : <What does the function produce?> */ +/* Returns : <What does the function return?> */ +/* */ +/* Issues : <List any issues or problems with this function> */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 25 09 2008 Jay Draft */ +/* */ +/*****************************************************************************/ + +WORD32 ih264d_cavlc_4x4res_block_totalcoeff_11to16(UWORD32 u4_isdc, + UWORD32 u4_total_coeff_trail_one, /*!<TotalCoefficients<<16+trailingones*/ + dec_bit_stream_t *ps_bitstrm ) +{ + UWORD32 u4_total_zeroes; + WORD32 i; + UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer; + UWORD32 u4_bitstream_offset = ps_bitstrm->u4_ofst; + UWORD32 u4_trailing_ones = u4_total_coeff_trail_one & 0xFFFF; + UWORD32 u4_total_coeff = u4_total_coeff_trail_one >> 16; + WORD16 i2_level_arr[16]; + + tu_sblk4x4_coeff_data_t *ps_tu_4x4; + WORD16 *pi2_coeff_data; + dec_struct_t *ps_dec = (dec_struct_t *)ps_bitstrm->pv_codec_handle; + + ps_tu_4x4 = (tu_sblk4x4_coeff_data_t *)ps_dec->pv_parse_tu_coeff_data; + ps_tu_4x4->u2_sig_coeff_map = 0; + pi2_coeff_data = &ps_tu_4x4->ai2_level[0]; + + i = u4_total_coeff - 1; + if(u4_trailing_ones) + { + /*********************************************************************/ + /* Decode Trailing Ones */ + /* read the sign of T1's and put them in level array */ + /*********************************************************************/ + UWORD32 u4_signs, u4_cnt = u4_trailing_ones; + WORD16 (*ppi2_trlone_lkup)[3] = + (WORD16 (*)[3])gai2_ih264d_trailing_one_level; + WORD16 *pi2_trlone_lkup; + + GETBITS(u4_signs, u4_bitstream_offset, pu4_bitstrm_buf, u4_cnt); + + pi2_trlone_lkup = ppi2_trlone_lkup[(1 << u4_cnt) - 2 + u4_signs]; + + while(u4_cnt--) + i2_level_arr[i--] = *pi2_trlone_lkup++; + } + + /****************************************************************/ + /* Decoding Levels Begins */ + /****************************************************************/ + if(i >= 0) + { + /****************************************************************/ + /* First level is decoded outside the loop as it has lot of */ + /* special cases. */ + /****************************************************************/ + UWORD32 u4_lev_suffix, u4_suffix_len, u4_lev_suffix_size; + UWORD16 u2_lev_code, u2_abs_value; + UWORD32 u4_lev_prefix; + + if(u4_trailing_ones < 3) + { + /*********************************************************/ + /* u4_suffix_len = 1 */ + /*********************************************************/ + /***************************************************************/ + /* Find leading zeros in next 32 bits */ + /***************************************************************/ + FIND_ONE_IN_STREAM_32(u4_lev_prefix, u4_bitstream_offset, + pu4_bitstrm_buf); + + u4_lev_suffix_size = + (15 <= u4_lev_prefix) ? (u4_lev_prefix - 3) : 1; + + GETBITS(u4_lev_suffix, u4_bitstream_offset, pu4_bitstrm_buf, + u4_lev_suffix_size); + u2_lev_code = 2 + (MIN(u4_lev_prefix,15) << 1) + u4_lev_suffix; + + //HP_LEVEL_PREFIX + if(16 <= u4_lev_prefix) + { + u2_lev_code += ((1 << (u4_lev_prefix - 3)) - 4096); + } + } + else + { + /*********************************************************/ + /*u4_suffix_len = 0 */ + /*********************************************************/ + /***************************************************************/ + /* Find leading zeros in next 32 bits */ + /***************************************************************/ + FIND_ONE_IN_STREAM_32(u4_lev_prefix, u4_bitstream_offset, + pu4_bitstrm_buf); + + /*********************************************************/ + /* Special decoding case when trailing ones are 3 */ + /*********************************************************/ + u2_lev_code = MIN(15, u4_lev_prefix); + + u2_lev_code += (3 == u4_trailing_ones) ? 0 : (2); + + if(14 == u4_lev_prefix) + u4_lev_suffix_size = 4; + else if(15 <= u4_lev_prefix) + { + u2_lev_code += 15; + u4_lev_suffix_size = (u4_lev_prefix - 3); + } + else + u4_lev_suffix_size = 0; + + //HP_LEVEL_PREFIX + if(16 <= u4_lev_prefix) + { + u2_lev_code += ((1 << (u4_lev_prefix - 3)) - 4096); + } + if(u4_lev_suffix_size) + { + GETBITS(u4_lev_suffix, u4_bitstream_offset, pu4_bitstrm_buf, + u4_lev_suffix_size); + u2_lev_code += u4_lev_suffix; + } + } + + u2_abs_value = (u2_lev_code + 2) >> 1; + /*********************************************************/ + /* If Level code is odd, level is negative else positive */ + /*********************************************************/ + i2_level_arr[i--] = (u2_lev_code & 1) ? -u2_abs_value : u2_abs_value; + + u4_suffix_len = (u2_abs_value > 3) ? 2 : 1; + + /*********************************************************/ + /* Now loop over the remaining levels */ + /*********************************************************/ + while(i >= 0) + { + + /***************************************************************/ + /* Find leading zeros in next 32 bits */ + /***************************************************************/ + FIND_ONE_IN_STREAM_32(u4_lev_prefix, u4_bitstream_offset, + pu4_bitstrm_buf); + + u4_lev_suffix_size = + (15 <= u4_lev_prefix) ? + (u4_lev_prefix - 3) : u4_suffix_len; + + /*********************************************************/ + /* Compute level code using prefix and suffix */ + /*********************************************************/ + GETBITS(u4_lev_suffix, u4_bitstream_offset, pu4_bitstrm_buf, + u4_lev_suffix_size); + u2_lev_code = (MIN(15,u4_lev_prefix) << u4_suffix_len) + + u4_lev_suffix; + + //HP_LEVEL_PREFIX + if(16 <= u4_lev_prefix) + { + u2_lev_code += ((1 << (u4_lev_prefix - 3)) - 4096); + } + u2_abs_value = (u2_lev_code + 2) >> 1; + + /*********************************************************/ + /* If Level code is odd, level is negative else positive */ + /*********************************************************/ + i2_level_arr[i--] = + (u2_lev_code & 1) ? -u2_abs_value : u2_abs_value; + + /*********************************************************/ + /* Increment suffix length if required */ + /*********************************************************/ + u4_suffix_len += + (u4_suffix_len < 6) ? + (u2_abs_value + > (3 + << (u4_suffix_len + - 1))) : + 0; + } + + /****************************************************************/ + /* Decoding Levels Ends */ + /****************************************************************/ + } + + if(u4_total_coeff < (16 - u4_isdc)) + { + UWORD32 u4_index; + const UWORD8 (*ppu1_total_zero_lkup)[16] = + (const UWORD8 (*)[16])gau1_ih264d_table_total_zero_11to15; + + NEXTBITS(u4_index, u4_bitstream_offset, pu4_bitstrm_buf, 4); + u4_total_zeroes = ppu1_total_zero_lkup[u4_total_coeff - 11][u4_index]; + + FLUSHBITS(u4_bitstream_offset, (u4_total_zeroes >> 4)); + u4_total_zeroes &= 0xf; + } + else + u4_total_zeroes = 0; + + /**************************************************************/ + /* Decode the runs and form the coefficient buffer */ + /**************************************************************/ + { + const UWORD8 *pu1_table_runbefore; + UWORD32 u4_run; + WORD32 k; + UWORD32 u4_scan_pos = u4_total_coeff + u4_total_zeroes - 1 + u4_isdc; + WORD32 u4_zeroes_left = u4_total_zeroes; + k = u4_total_coeff - 1; + + /**************************************************************/ + /* Decoding Runs for 0 < zeros left <=6 */ + /**************************************************************/ + pu1_table_runbefore = (UWORD8 *)gau1_ih264d_table_run_before; + while((u4_zeroes_left > 0) && k) + { + UWORD32 u4_code; + NEXTBITS(u4_code, u4_bitstream_offset, pu4_bitstrm_buf, 3); + + u4_code = pu1_table_runbefore[u4_code + (u4_zeroes_left << 3)]; + u4_run = u4_code >> 2; + + FLUSHBITS(u4_bitstream_offset, (u4_code & 0x03)); + SET_BIT(ps_tu_4x4->u2_sig_coeff_map, u4_scan_pos); + *pi2_coeff_data++ = i2_level_arr[k--]; + u4_zeroes_left -= u4_run; + u4_scan_pos -= (u4_run + 1); + } + /**************************************************************/ + /* Decoding Runs End */ + /**************************************************************/ + + /**************************************************************/ + /* Copy the remaining coefficients */ + /**************************************************************/ + if(u4_zeroes_left < 0) + return -1; + while(k >= 0) + { + SET_BIT(ps_tu_4x4->u2_sig_coeff_map, u4_scan_pos); + *pi2_coeff_data++ = i2_level_arr[k--]; + u4_scan_pos--; + } + } + + { + WORD32 offset; + offset = (UWORD8 *)pi2_coeff_data - (UWORD8 *)ps_tu_4x4; + offset = ALIGN4(offset); + ps_dec->pv_parse_tu_coeff_data = (void *)((UWORD8 *)ps_dec->pv_parse_tu_coeff_data + offset); + } + + ps_bitstrm->u4_ofst = u4_bitstream_offset; + return 0; +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_rest_of_residual_cav_chroma_dc_block */ +/* */ +/* Description : This function does the Cavlc parsing of the bitstream */ +/* for chroma dc coefficients */ +/* Inputs : <What inputs does the function take?> */ +/* Globals : <Does it use any global variables?> */ +/* Processing : <Describe how the function operates - include algorithm */ +/* description> */ +/* Outputs : <What does the function produce?> */ +/* Returns : <What does the function return?> */ +/* */ +/* Issues : <List any issues or problems with this function> */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 15 09 2008 Jay Draft */ +/* */ +/*****************************************************************************/ +void ih264d_rest_of_residual_cav_chroma_dc_block(UWORD32 u4_total_coeff_trail_one, + dec_bit_stream_t *ps_bitstrm) +{ + UWORD32 u4_total_zeroes; + WORD16 i; + UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer; + UWORD32 u4_bitstream_offset = ps_bitstrm->u4_ofst; + UWORD32 u4_trailing_ones = u4_total_coeff_trail_one & 0xFFFF; + UWORD32 u4_total_coeff = u4_total_coeff_trail_one >> 16; + WORD16 i2_level_arr[4]; + + tu_sblk4x4_coeff_data_t *ps_tu_4x4; + WORD16 *pi2_coeff_data; + dec_struct_t *ps_dec = (dec_struct_t *)ps_bitstrm->pv_codec_handle; + + ps_tu_4x4 = (tu_sblk4x4_coeff_data_t *)ps_dec->pv_parse_tu_coeff_data; + ps_tu_4x4->u2_sig_coeff_map = 0; + pi2_coeff_data = &ps_tu_4x4->ai2_level[0]; + + i = u4_total_coeff - 1; + if(u4_trailing_ones) + { + /*********************************************************************/ + /* Decode Trailing Ones */ + /* read the sign of T1's and put them in level array */ + /*********************************************************************/ + UWORD32 u4_signs, u4_cnt = u4_trailing_ones; + WORD16 (*ppi2_trlone_lkup)[3] = + (WORD16 (*)[3])gai2_ih264d_trailing_one_level; + WORD16 *pi2_trlone_lkup; + + GETBITS(u4_signs, u4_bitstream_offset, pu4_bitstrm_buf, u4_cnt); + + pi2_trlone_lkup = ppi2_trlone_lkup[(1 << u4_cnt) - 2 + u4_signs]; + + while(u4_cnt--) + i2_level_arr[i--] = *pi2_trlone_lkup++; + } + + /****************************************************************/ + /* Decoding Levels Begins */ + /****************************************************************/ + if(i >= 0) + { + /****************************************************************/ + /* First level is decoded outside the loop as it has lot of */ + /* special cases. */ + /****************************************************************/ + UWORD32 u4_lev_suffix, u4_suffix_len, u4_lev_suffix_size; + UWORD16 u2_lev_code, u2_abs_value; + UWORD32 u4_lev_prefix; + + /***************************************************************/ + /* u4_suffix_len = 0, Find leading zeros in next 32 bits */ + /***************************************************************/ + FIND_ONE_IN_STREAM_32(u4_lev_prefix, u4_bitstream_offset, + pu4_bitstrm_buf); + + /*********************************************************/ + /* Special decoding case when trailing ones are 3 */ + /*********************************************************/ + u2_lev_code = MIN(15, u4_lev_prefix); + + u2_lev_code += (3 == u4_trailing_ones) ? 0 : (2); + + if(14 == u4_lev_prefix) + u4_lev_suffix_size = 4; + else if(15 <= u4_lev_prefix) + { + u2_lev_code += 15; + u4_lev_suffix_size = u4_lev_prefix - 3; + } + else + u4_lev_suffix_size = 0; + + //HP_LEVEL_PREFIX + if(16 <= u4_lev_prefix) + { + u2_lev_code += ((1 << (u4_lev_prefix - 3)) - 4096); + } + if(u4_lev_suffix_size) + { + GETBITS(u4_lev_suffix, u4_bitstream_offset, pu4_bitstrm_buf, + u4_lev_suffix_size); + u2_lev_code += u4_lev_suffix; + } + + u2_abs_value = (u2_lev_code + 2) >> 1; + /*********************************************************/ + /* If Level code is odd, level is negative else positive */ + /*********************************************************/ + i2_level_arr[i--] = (u2_lev_code & 1) ? -u2_abs_value : u2_abs_value; + + u4_suffix_len = (u2_abs_value > 3) ? 2 : 1; + + /*********************************************************/ + /* Now loop over the remaining levels */ + /*********************************************************/ + while(i >= 0) + { + + /***************************************************************/ + /* Find leading zeros in next 32 bits */ + /***************************************************************/ + FIND_ONE_IN_STREAM_32(u4_lev_prefix, u4_bitstream_offset, + pu4_bitstrm_buf); + + u4_lev_suffix_size = + (15 <= u4_lev_prefix) ? + (u4_lev_prefix - 3) : u4_suffix_len; + + /*********************************************************/ + /* Compute level code using prefix and suffix */ + /*********************************************************/ + GETBITS(u4_lev_suffix, u4_bitstream_offset, pu4_bitstrm_buf, + u4_lev_suffix_size); + u2_lev_code = (MIN(u4_lev_prefix,15) << u4_suffix_len) + + u4_lev_suffix; + + //HP_LEVEL_PREFIX + if(16 <= u4_lev_prefix) + { + u2_lev_code += ((1 << (u4_lev_prefix - 3)) - 4096); + } + u2_abs_value = (u2_lev_code + 2) >> 1; + + /*********************************************************/ + /* If Level code is odd, level is negative else positive */ + /*********************************************************/ + i2_level_arr[i--] = + (u2_lev_code & 1) ? -u2_abs_value : u2_abs_value; + + /*********************************************************/ + /* Increment suffix length if required */ + /*********************************************************/ + u4_suffix_len += (u2_abs_value > (3 << (u4_suffix_len - 1))); + } + + /****************************************************************/ + /* Decoding Levels Ends */ + /****************************************************************/ + } + + if(u4_total_coeff < 4) + { + UWORD32 u4_max_ldz = (4 - u4_total_coeff); + FIND_ONE_IN_STREAM_LEN(u4_total_zeroes, u4_bitstream_offset, + pu4_bitstrm_buf, u4_max_ldz); + } + else + u4_total_zeroes = 0; + + /**************************************************************/ + /* Decode the runs and form the coefficient buffer */ + /**************************************************************/ + { + const UWORD8 *pu1_table_runbefore; + UWORD32 u4_run; + UWORD32 u4_scan_pos = (u4_total_coeff + u4_total_zeroes - 1); + UWORD32 u4_zeroes_left = u4_total_zeroes; + i = u4_total_coeff - 1; + + /**************************************************************/ + /* Decoding Runs for 0 < zeros left <=6 */ + /**************************************************************/ + pu1_table_runbefore = (UWORD8 *)gau1_ih264d_table_run_before; + while(u4_zeroes_left && i) + { + UWORD32 u4_code; + NEXTBITS(u4_code, u4_bitstream_offset, pu4_bitstrm_buf, 3); + + u4_code = pu1_table_runbefore[u4_code + (u4_zeroes_left << 3)]; + u4_run = u4_code >> 2; + + FLUSHBITS(u4_bitstream_offset, (u4_code & 0x03)); + SET_BIT(ps_tu_4x4->u2_sig_coeff_map, u4_scan_pos); + *pi2_coeff_data++ = i2_level_arr[i--]; + u4_zeroes_left -= u4_run; + u4_scan_pos -= (u4_run + 1); + } + /**************************************************************/ + /* Decoding Runs End */ + /**************************************************************/ + + /**************************************************************/ + /* Copy the remaining coefficients */ + /**************************************************************/ + while(i >= 0) + { + SET_BIT(ps_tu_4x4->u2_sig_coeff_map, u4_scan_pos); + *pi2_coeff_data++ = i2_level_arr[i--]; + u4_scan_pos--; + } + } + + { + WORD32 offset; + offset = (UWORD8 *)pi2_coeff_data - (UWORD8 *)ps_tu_4x4; + offset = ALIGN4(offset); + ps_dec->pv_parse_tu_coeff_data = (void *)((UWORD8 *)ps_dec->pv_parse_tu_coeff_data + offset); + } + + ps_bitstrm->u4_ofst = u4_bitstream_offset; +} + +/*! + ************************************************************************** + * \if Function name : CavlcParsingInvScanInvQuant \endif + * + * \brief + * This function do cavlc parsing of coefficient tokens for any block + * type except chromDc and depending + * on whenther any coefficients to be parsed calls module + * RestOfResidualBlockCavlc. + * + * \return + * Returns total number of non-zero coefficients. + * + ************************************************************************** + */ + +WORD32 ih264d_cavlc_parse4x4coeff_n0to7(WORD16 *pi2_coeff_block, + UWORD32 u4_isdc, /* is it a DC block */ + WORD32 u4_n, + dec_struct_t *ps_dec, + UWORD32 *pu4_total_coeff) +{ + dec_bit_stream_t *ps_bitstrm = ps_dec->ps_bitstrm; + UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer; + UWORD32 u4_bitstream_offset = ps_bitstrm->u4_ofst; + UWORD32 u4_code, u4_index, u4_ldz; + const UWORD16 *pu2_code = (const UWORD16*)gau2_ih264d_code_gx; + const UWORD16 *pu2_offset_num_vlc = + (const UWORD16 *)gau2_ih264d_offset_num_vlc_tab; + UWORD32 u4_offset_num_vlc = pu2_offset_num_vlc[u4_n]; + + + UNUSED(pi2_coeff_block); + *pu4_total_coeff = 0; + FIND_ONE_IN_STREAM_32(u4_ldz, u4_bitstream_offset, pu4_bitstrm_buf); + NEXTBITS(u4_index, u4_bitstream_offset, pu4_bitstrm_buf, 3); + u4_index += (u4_ldz << 3); + u4_index += u4_offset_num_vlc; + + u4_index = MIN(u4_index, 303); + u4_code = pu2_code[u4_index]; + + FLUSHBITS(u4_bitstream_offset, (u4_code & 0x03)); + ps_bitstrm->u4_ofst = u4_bitstream_offset; + *pu4_total_coeff = (u4_code >> 4); + + if(*pu4_total_coeff) + { + UWORD32 u4_trailing_ones, u4_offset, u4_total_coeff_tone; + const UWORD8 *pu1_offset = + (UWORD8 *)gau1_ih264d_total_coeff_fn_ptr_offset; + WORD32 ret; + u4_trailing_ones = ((u4_code >> 2) & 0x03); + u4_offset = pu1_offset[*pu4_total_coeff - 1]; + u4_total_coeff_tone = (*pu4_total_coeff << 16) | u4_trailing_ones; + + ret = ps_dec->pf_cavlc_4x4res_block[u4_offset](u4_isdc, + u4_total_coeff_tone, + ps_bitstrm); + if(ret != 0) + return ERROR_CAVLC_NUM_COEFF_T; + } + + return OK; +} + +WORD32 ih264d_cavlc_parse4x4coeff_n8(WORD16 *pi2_coeff_block, + UWORD32 u4_isdc, /* is it a DC block */ + WORD32 u4_n, + dec_struct_t *ps_dec, + UWORD32 *pu4_total_coeff) +{ + + dec_bit_stream_t *ps_bitstrm = ps_dec->ps_bitstrm; + UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer; + UWORD32 u4_bitstream_offset = ps_bitstrm->u4_ofst; + UWORD32 u4_code; + UNUSED(u4_n); + UNUSED(pi2_coeff_block); + GETBITS(u4_code, u4_bitstream_offset, pu4_bitstrm_buf, 6); + ps_bitstrm->u4_ofst = u4_bitstream_offset; + *pu4_total_coeff = 0; + + if(u4_code != 3) + { + UWORD8 *pu1_offset = (UWORD8 *)gau1_ih264d_total_coeff_fn_ptr_offset; + UWORD32 u4_trailing_ones, u4_offset, u4_total_coeff_tone; + + *pu4_total_coeff = (u4_code >> 2) + 1; + u4_trailing_ones = u4_code & 0x03; + u4_offset = pu1_offset[*pu4_total_coeff - 1]; + u4_total_coeff_tone = (*pu4_total_coeff << 16) | u4_trailing_ones; + + ps_dec->pf_cavlc_4x4res_block[u4_offset](u4_isdc, + u4_total_coeff_tone, + ps_bitstrm); + } + + return OK; +} + +/*! + ************************************************************************** + * \if Function name : ih264d_cavlc_parse_chroma_dc \endif + * + * \brief + * This function do cavlc parsing of coefficient tokens chromDc block + * and depending on whenther any coefficients to be parsed calls module + * ih264d_rest_of_residual_cav_chroma_dc_block. + * + * \return + * Returns total number of non-zero coefficients. + * + ************************************************************************** + */ + +void ih264d_cavlc_parse_chroma_dc(dec_mb_info_t *ps_cur_mb_info, + WORD16 *pi2_coeff_block, + dec_bit_stream_t *ps_bitstrm, + UWORD32 u4_scale_u, + UWORD32 u4_scale_v, + WORD32 i4_mb_inter_inc) +{ + UWORD32 u4_total_coeff, u4_trailing_ones, u4_total_coeff_tone, u4_code; + UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer; + UWORD32 u4_bitstream_offset = ps_bitstrm->u4_ofst; + const UWORD8 *pu1_cav_chromdc = (const UWORD8*)gau1_ih264d_cav_chromdc_vld; + UNUSED(i4_mb_inter_inc); + /******************************************************************/ + /* Chroma DC Block for U component */ + /******************************************************************/ + NEXTBITS(u4_code, u4_bitstream_offset, pu4_bitstrm_buf, 8); + + u4_code = pu1_cav_chromdc[u4_code]; + + FLUSHBITS(u4_bitstream_offset, ((u4_code & 0x7) + 1)); + ps_bitstrm->u4_ofst = u4_bitstream_offset; + + u4_total_coeff = (u4_code >> 5); + + if(u4_total_coeff) + { + WORD32 i_z0, i_z1, i_z2, i_z3; + tu_sblk4x4_coeff_data_t *ps_tu_4x4; + dec_struct_t *ps_dec = (dec_struct_t *)ps_bitstrm->pv_codec_handle; + WORD16 ai2_dc_coef[4]; + UWORD8 pu1_inv_scan[4] = + { 0, 1, 2, 3 }; + WORD16 *pi2_coeff_data = + (WORD16 *)ps_dec->pv_parse_tu_coeff_data; + + ps_tu_4x4 = (tu_sblk4x4_coeff_data_t *)ps_dec->pv_parse_tu_coeff_data; + + u4_trailing_ones = ((u4_code >> 3) & 0x3); + u4_total_coeff_tone = (u4_total_coeff << 16) | u4_trailing_ones; + ih264d_rest_of_residual_cav_chroma_dc_block(u4_total_coeff_tone, + ps_bitstrm); + + ai2_dc_coef[0] = 0; + ai2_dc_coef[1] = 0; + ai2_dc_coef[2] = 0; + ai2_dc_coef[3] = 0; + + ih264d_unpack_coeff4x4_dc_4x4blk(ps_tu_4x4, + ai2_dc_coef, + pu1_inv_scan); + /*-------------------------------------------------------------------*/ + /* Inverse 2x2 transform and scaling of chroma DC */ + /*-------------------------------------------------------------------*/ + i_z0 = (ai2_dc_coef[0] + ai2_dc_coef[2]); + i_z1 = (ai2_dc_coef[0] - ai2_dc_coef[2]); + i_z2 = (ai2_dc_coef[1] - ai2_dc_coef[3]); + i_z3 = (ai2_dc_coef[1] + ai2_dc_coef[3]); + + /*-----------------------------------------------------------*/ + /* Scaling and storing the values back */ + /*-----------------------------------------------------------*/ + *pi2_coeff_data++ = ((i_z0 + i_z3) * u4_scale_u) >> 5; + *pi2_coeff_data++ = ((i_z0 - i_z3) * u4_scale_u) >> 5; + *pi2_coeff_data++ = ((i_z1 + i_z2) * u4_scale_u) >> 5; + *pi2_coeff_data++ = ((i_z1 - i_z2) * u4_scale_u) >> 5; + + ps_dec->pv_parse_tu_coeff_data = (void *)pi2_coeff_data; + + SET_BIT(ps_cur_mb_info->u1_yuv_dc_block_flag,1); + } + + /******************************************************************/ + /* Chroma DC Block for V component */ + /******************************************************************/ + pi2_coeff_block += 64; + u4_bitstream_offset = ps_bitstrm->u4_ofst; + + NEXTBITS(u4_code, u4_bitstream_offset, pu4_bitstrm_buf, 8); + + u4_code = pu1_cav_chromdc[u4_code]; + + FLUSHBITS(u4_bitstream_offset, ((u4_code & 0x7) + 1)); + ps_bitstrm->u4_ofst = u4_bitstream_offset; + + u4_total_coeff = (u4_code >> 5); + + if(u4_total_coeff) + { + WORD32 i_z0, i_z1, i_z2, i_z3; + tu_sblk4x4_coeff_data_t *ps_tu_4x4; + dec_struct_t *ps_dec = (dec_struct_t *)ps_bitstrm->pv_codec_handle; + WORD16 ai2_dc_coef[4]; + UWORD8 pu1_inv_scan[4] = + { 0, 1, 2, 3 }; + WORD16 *pi2_coeff_data = + (WORD16 *)ps_dec->pv_parse_tu_coeff_data; + + ps_tu_4x4 = (tu_sblk4x4_coeff_data_t *)ps_dec->pv_parse_tu_coeff_data; + + u4_trailing_ones = ((u4_code >> 3) & 0x3); + u4_total_coeff_tone = (u4_total_coeff << 16) | u4_trailing_ones; + ih264d_rest_of_residual_cav_chroma_dc_block(u4_total_coeff_tone, + ps_bitstrm); + + ai2_dc_coef[0] = 0; + ai2_dc_coef[1] = 0; + ai2_dc_coef[2] = 0; + ai2_dc_coef[3] = 0; + + ih264d_unpack_coeff4x4_dc_4x4blk(ps_tu_4x4, + ai2_dc_coef, + pu1_inv_scan); + + /*-------------------------------------------------------------------*/ + /* Inverse 2x2 transform and scaling of chroma DC */ + /*-------------------------------------------------------------------*/ + i_z0 = (ai2_dc_coef[0] + ai2_dc_coef[2]); + i_z1 = (ai2_dc_coef[0] - ai2_dc_coef[2]); + i_z2 = (ai2_dc_coef[1] - ai2_dc_coef[3]); + i_z3 = (ai2_dc_coef[1] + ai2_dc_coef[3]); + + /*-----------------------------------------------------------*/ + /* Scaling and storing the values back */ + /*-----------------------------------------------------------*/ + *pi2_coeff_data++ = ((i_z0 + i_z3) * u4_scale_v) >> 5; + *pi2_coeff_data++ = ((i_z0 - i_z3) * u4_scale_v) >> 5; + *pi2_coeff_data++ = ((i_z1 + i_z2) * u4_scale_v) >> 5; + *pi2_coeff_data++ = ((i_z1 - i_z2) * u4_scale_v) >> 5; + + ps_dec->pv_parse_tu_coeff_data = (void *)pi2_coeff_data; + + SET_BIT(ps_cur_mb_info->u1_yuv_dc_block_flag,2); + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_parse_pmb_ref_index_cavlc_range1 */ +/* */ +/* Description : This function does the Cavlc TEV range =1 parsing of */ +/* reference index for a P MB. Range is 1 when */ +/* num_ref_idx_active_minus1 is 0 */ +/* */ +/* Inputs : <What inputs does the function take?> */ +/* Globals : <Does it use any global variables?> */ +/* Processing : <Describe how the function operates - include algorithm */ +/* description> */ +/* Outputs : <What does the function produce?> */ +/* Returns : <What does the function return?> */ +/* */ +/* Issues : <List any issues or problems with this function> */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 19 09 2008 Jay Draft */ +/* */ +/*****************************************************************************/ +void ih264d_parse_pmb_ref_index_cavlc_range1(UWORD32 u4_num_part, /* Number of partitions in MB */ + dec_bit_stream_t *ps_bitstrm, /* Pointer to bitstream Structure. */ + WORD8 *pi1_ref_idx, /* pointer to reference index array */ + UWORD32 u4_num_ref_idx_active_minus1 /* Not used for range 1 */ + ) +{ + UWORD32 u4_i; + UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer; + UWORD32 *pu4_bitstream_off = &ps_bitstrm->u4_ofst; + UNUSED(u4_num_ref_idx_active_minus1); + for(u4_i = 0; u4_i < u4_num_part; u4_i++) + { + UWORD32 u4_ref_idx; + u4_ref_idx = ih264d_tev_range1(pu4_bitstream_off, pu4_bitstrm_buf); + + /* Storing Reference Idx Information */ + pi1_ref_idx[u4_i] = (WORD8)u4_ref_idx; + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_parse_pmb_ref_index_cavlc */ +/* */ +/* Description : This function does the Cavlc TEV range > 1 parsing of */ +/* reference index for a P MB. */ +/* Range > 1 when num_ref_idx_active_minus1 > 0 */ +/* */ +/* Inputs : <What inputs does the function take?> */ +/* Globals : <Does it use any global variables?> */ +/* Processing : <Describe how the function operates - include algorithm */ +/* description> */ +/* Outputs : <What does the function produce?> */ +/* Returns : <What does the function return?> */ +/* */ +/* Issues : <List any issues or problems with this function> */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 19 09 2008 Jay Draft */ +/* */ +/*****************************************************************************/ + +WORD32 ih264d_parse_pmb_ref_index_cavlc(UWORD32 u4_num_part, /* Number of partitions in MB */ + dec_bit_stream_t *ps_bitstrm, /* Pointer to bitstream Structure. */ + WORD8 *pi1_ref_idx, /* pointer to reference index array */ + UWORD32 u4_num_ref_idx_active_minus1 /* Number of active references - 1 */ + ) +{ + UWORD32 u4_i; + UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer; + UWORD32 *pu4_bitstream_off = &ps_bitstrm->u4_ofst; + + for(u4_i = 0; u4_i < u4_num_part; u4_i++) + { + UWORD32 u4_ref_idx; +//Inlined ih264d_uev + UWORD32 u4_bitstream_offset = *pu4_bitstream_off; + UWORD32 u4_word, u4_ldz; + + /***************************************************************/ + /* Find leading zeros in next 32 bits */ + /***************************************************************/ + NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf); + u4_ldz = CLZ(u4_word); + /* Flush the ps_bitstrm */ + u4_bitstream_offset += (u4_ldz + 1); + /* Read the suffix from the ps_bitstrm */ + u4_word = 0; + if(u4_ldz) + GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, u4_ldz); + *pu4_bitstream_off = u4_bitstream_offset; + u4_ref_idx = ((1 << u4_ldz) + u4_word - 1); +//Inlined ih264d_uev + + if(u4_ref_idx > u4_num_ref_idx_active_minus1) + return ERROR_REF_IDX; + + /* Storing Reference Idx Information */ + pi1_ref_idx[u4_i] = (WORD8)u4_ref_idx; + } + return OK; +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_parse_bmb_ref_index_cavlc_range1 */ +/* */ +/* Description : This function does the Cavlc TEV range =1 parsing of */ +/* reference index for a B MB. Range is 1 when */ +/* num_ref_idx_active_minus1 is 0 */ +/* */ +/* Inputs : <What inputs does the function take?> */ +/* Globals : <Does it use any global variables?> */ +/* Processing : <Describe how the function operates - include algorithm */ +/* description> */ +/* Outputs : <What does the function produce?> */ +/* Returns : <What does the function return?> */ +/* */ +/* Issues : <List any issues or problems with this function> */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 19 09 2008 Jay Draft */ +/* */ +/*****************************************************************************/ +void ih264d_parse_bmb_ref_index_cavlc_range1(UWORD32 u4_num_part, /* Number of partitions in MB */ + dec_bit_stream_t *ps_bitstrm, /* Pointer to bitstream Structure. */ + WORD8 *pi1_ref_idx, /* pointer to reference index array */ + UWORD32 u4_num_ref_idx_active_minus1 /* Not used for range 1 */ + ) +{ + UWORD32 u4_i; + UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer; + UWORD32 *pu4_bitstream_off = &ps_bitstrm->u4_ofst; + UNUSED(u4_num_ref_idx_active_minus1); + for(u4_i = 0; u4_i < u4_num_part; u4_i++) + { + if(pi1_ref_idx[u4_i] > -1) + { + UWORD32 u4_ref_idx; + + u4_ref_idx = ih264d_tev_range1(pu4_bitstream_off, pu4_bitstrm_buf); + + /* Storing Reference Idx Information */ + pi1_ref_idx[u4_i] = (WORD8)u4_ref_idx; + } + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_parse_bmb_ref_index_cavlc */ +/* */ +/* Description : This function does the Cavlc TEV range > 1 parsing of */ +/* reference index for a B MB. */ +/* Range > 1 when num_ref_idx_active_minus1 > 0 */ +/* */ +/* Inputs : <What inputs does the function take?> */ +/* Globals : <Does it use any global variables?> */ +/* Processing : <Describe how the function operates - include algorithm */ +/* description> */ +/* Outputs : <What does the function produce?> */ +/* Returns : <What does the function return?> */ +/* */ +/* Issues : <List any issues or problems with this function> */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 19 09 2008 Jay Draft */ +/* */ +/*****************************************************************************/ +WORD32 ih264d_parse_bmb_ref_index_cavlc(UWORD32 u4_num_part, /* Number of partitions in MB */ + dec_bit_stream_t *ps_bitstrm, /* Pointer to bitstream Structure. */ + WORD8 *pi1_ref_idx, /* pointer to reference index array */ + UWORD32 u4_num_ref_idx_active_minus1 /* Number of active references - 1 */ + ) +{ + UWORD32 u4_i; + UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer; + UWORD32 *pu4_bitstream_off = &ps_bitstrm->u4_ofst; + + for(u4_i = 0; u4_i < u4_num_part; u4_i++) + { + if(pi1_ref_idx[u4_i] > -1) + { + UWORD32 u4_ref_idx; +//inlining ih264d_uev + UWORD32 u4_bitstream_offset = *pu4_bitstream_off; + UWORD32 u4_word, u4_ldz; + + /***************************************************************/ + /* Find leading zeros in next 32 bits */ + /***************************************************************/ + NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf); + u4_ldz = CLZ(u4_word); + /* Flush the ps_bitstrm */ + u4_bitstream_offset += (u4_ldz + 1); + /* Read the suffix from the ps_bitstrm */ + u4_word = 0; + if(u4_ldz) + GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, u4_ldz); + *pu4_bitstream_off = u4_bitstream_offset; + u4_ref_idx = ((1 << u4_ldz) + u4_word - 1); +//inlining ih264d_uev + if(u4_ref_idx > u4_num_ref_idx_active_minus1) + return ERROR_REF_IDX; + + /* Storing Reference Idx Information */ + pi1_ref_idx[u4_i] = (WORD8)u4_ref_idx; + } + } + return OK; +} +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_cavlc_parse_8x8block_both_available */ +/* */ +/* Description : This function does the residual parsing of 4 subblocks */ +/* in a 8x8 block when both top and left are available */ +/* */ +/* Inputs : pi2_coeff_block : pointer to residual block where */ +/* decoded and inverse scan coefficients are updated */ +/* */ +/* u4_sub_block_strd : indicates the number of sublocks */ +/* in a row. It is 4 for luma and 2 for chroma. */ +/* */ +/* u4_isdc : required to indicate 4x4 parse modules if the */ +/* current Mb is I_16x16/chroma DC coded. */ +/* */ +/* ps_dec : pointer to Decstruct (decoder context) */ +/* */ +/* pu1_top_nnz : top nnz pointer */ +/* */ +/* pu1_left_nnz : left nnz pointer */ +/* */ +/* Globals : No */ +/* Processing : Parsing for four subblocks in unrolled, top and left nnz */ +/* are updated on the fly. csbp is set in accordance to */ +/* decoded numcoeff for the subblock index in raster order */ +/* */ +/* Outputs : The updated residue buffer, nnzs and csbp current block */ +/* */ +/* Returns : Returns the coded sub block pattern csbp for the block */ +/* */ +/* Issues : <List any issues or problems with this function> */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 09 10 2008 Jay Draft */ +/* */ +/*****************************************************************************/ +WORD32 ih264d_cavlc_parse_8x8block_both_available(WORD16 *pi2_coeff_block, + UWORD32 u4_sub_block_strd, + UWORD32 u4_isdc, + dec_struct_t * ps_dec, + UWORD8 *pu1_top_nnz, + UWORD8 *pu1_left_nnz, + UWORD8 u1_tran_form8x8, + UWORD8 u1_mb_field_decodingflag, + UWORD32 *pu4_csbp) +{ + UWORD32 u4_num_coeff, u4_n, u4_subblock_coded; + UWORD32 u4_top0, u4_top1; + UWORD32 *pu4_dummy; + WORD32 (**pf_cavlc_parse4x4coeff)(WORD16 *pi2_coeff_block, + UWORD32 u4_isdc, + WORD32 u4_n, + struct _DecStruct *ps_dec, + UWORD32 *pu4_dummy) = + ps_dec->pf_cavlc_parse4x4coeff; + UWORD32 u4_idx = 0; + UWORD8 *puc_temp; + WORD32 ret; + + *pu4_csbp = 0; + /* need to change the inverse scan matrices here */ + puc_temp = ps_dec->pu1_inv_scan; + + /*------------------------------------------------------*/ + /* Residual 4x4 decoding: SubBlock 0 */ + /*------------------------------------------------------*/ + if(u1_tran_form8x8) + { + if(!u1_mb_field_decodingflag) + { + ps_dec->pu1_inv_scan = + (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[0]; + } + else + { + ps_dec->pu1_inv_scan = + (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[0]; + } + } + u4_n = (pu1_top_nnz[0] + pu1_left_nnz[0] + 1) >> 1; + ret = pf_cavlc_parse4x4coeff[(u4_n > 7)](pi2_coeff_block, u4_isdc, + u4_n, ps_dec, &u4_num_coeff); + if(ret != OK) + return ret; + + u4_top0 = u4_num_coeff; + u4_subblock_coded = (u4_num_coeff != 0); + INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded); + + /*------------------------------------------------------*/ + /* Residual 4x4 decoding: SubBlock 1 */ + /*------------------------------------------------------*/ + u4_idx++; + if(u1_tran_form8x8) + { + if(!u1_mb_field_decodingflag) + { + ps_dec->pu1_inv_scan = + (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[1]; + } + else + { + ps_dec->pu1_inv_scan = + (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[1]; + } + } + else + { + pi2_coeff_block += NUM_COEFFS_IN_4x4BLK; + } + u4_n = (pu1_top_nnz[1] + u4_num_coeff + 1) >> 1; + ret = pf_cavlc_parse4x4coeff[(u4_n > 7)](pi2_coeff_block, u4_isdc, + u4_n, ps_dec, &u4_num_coeff); + if(ret != OK) + return ret; + + u4_top1 = pu1_left_nnz[0] = u4_num_coeff; + u4_subblock_coded = (u4_num_coeff != 0); + INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded); + + /*------------------------------------------------------*/ + /* Residual 4x4 decoding: SubBlock 2 */ + /*------------------------------------------------------*/ + u4_idx += (u4_sub_block_strd - 1); + if(u1_tran_form8x8) + { + if(!u1_mb_field_decodingflag) + { + ps_dec->pu1_inv_scan = + (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[2]; + } + else + { + ps_dec->pu1_inv_scan = + (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[2]; + } + } + else + { + pi2_coeff_block += ((u4_sub_block_strd - 1) * NUM_COEFFS_IN_4x4BLK); + } + u4_n = (u4_top0 + pu1_left_nnz[1] + 1) >> 1; + ret = pf_cavlc_parse4x4coeff[(u4_n > 7)](pi2_coeff_block, u4_isdc, + u4_n, ps_dec, &u4_num_coeff); + if(ret != OK) + return ret; + + pu1_top_nnz[0] = u4_num_coeff; + u4_subblock_coded = (u4_num_coeff != 0); + INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded); + + /*------------------------------------------------------*/ + /* Residual 4x4 decoding: SubBlock 3 */ + /*------------------------------------------------------*/ + u4_idx++; + if(u1_tran_form8x8) + { + if(!u1_mb_field_decodingflag) + { + ps_dec->pu1_inv_scan = + (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[3]; + } + else + { + ps_dec->pu1_inv_scan = + (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[3]; + } + } + else + { + pi2_coeff_block += NUM_COEFFS_IN_4x4BLK; + } + u4_n = (u4_top1 + u4_num_coeff + 1) >> 1; + ret = pf_cavlc_parse4x4coeff[(u4_n > 7)](pi2_coeff_block, u4_isdc, + u4_n, ps_dec, &u4_num_coeff); + if(ret != OK) + return ret; + + pu1_top_nnz[1] = pu1_left_nnz[1] = u4_num_coeff; + u4_subblock_coded = (u4_num_coeff != 0); + INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded); + + ps_dec->pu1_inv_scan = puc_temp; + + return OK; +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_cavlc_parse_8x8block_left_available */ +/* */ +/* Description : This function does the residual parsing of 4 subblocks */ +/* in a 8x8 block when only left is available for block */ +/* */ +/* Inputs : pi2_coeff_block : pointer to residual block where */ +/* decoded and inverse scan coefficients are updated */ +/* */ +/* u4_sub_block_strd : indicates the number of sublocks */ +/* in a row. It is 4 for luma and 2 for chroma. */ +/* */ +/* u4_isdc : required to indicate 4x4 parse modules if the */ +/* current Mb is I_16x16/chroma DC coded. */ +/* */ +/* ps_dec : pointer to Decstruct (decoder context) */ +/* */ +/* pu1_top_nnz : top nnz pointer */ +/* */ +/* pu1_left_nnz : left nnz pointer */ +/* */ +/* Globals : No */ +/* Processing : Parsing for four subblocks in unrolled, top and left nnz */ +/* are updated on the fly. csbp is set in accordance to */ +/* decoded numcoeff for the subblock index in raster order */ +/* */ +/* Outputs : The updated residue buffer, nnzs and csbp current block */ +/* */ +/* Returns : Returns the coded sub block pattern csbp for the block */ +/* */ +/* Issues : <List any issues or problems with this function> */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 09 10 2008 Jay Draft */ +/* */ +/*****************************************************************************/ +WORD32 ih264d_cavlc_parse_8x8block_left_available(WORD16 *pi2_coeff_block, + UWORD32 u4_sub_block_strd, + UWORD32 u4_isdc, + dec_struct_t * ps_dec, + UWORD8 *pu1_top_nnz, + UWORD8 *pu1_left_nnz, + UWORD8 u1_tran_form8x8, + UWORD8 u1_mb_field_decodingflag, + UWORD32 *pu4_csbp) +{ + UWORD32 u4_num_coeff, u4_n, u4_subblock_coded; + UWORD32 u4_top0, u4_top1; + UWORD32 *pu4_dummy; + WORD32 (**pf_cavlc_parse4x4coeff)(WORD16 *pi2_coeff_block, + UWORD32 u4_isdc, + WORD32 u4_n, + struct _DecStruct *ps_dec, + UWORD32 *pu4_dummy) = + ps_dec->pf_cavlc_parse4x4coeff; + UWORD32 u4_idx = 0; + UWORD8 *puc_temp; + WORD32 ret; + + *pu4_csbp = 0; + puc_temp = ps_dec->pu1_inv_scan; + + /*------------------------------------------------------*/ + /* Residual 4x4 decoding: SubBlock 0 */ + /*------------------------------------------------------*/ + if(u1_tran_form8x8) + { + if(!u1_mb_field_decodingflag) + { + ps_dec->pu1_inv_scan = + (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[0]; + } + else + { + ps_dec->pu1_inv_scan = + (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[0]; + } + } + u4_n = pu1_left_nnz[0]; + ret = pf_cavlc_parse4x4coeff[(u4_n > 7)](pi2_coeff_block, u4_isdc, + u4_n, ps_dec, &u4_num_coeff); + if(ret != OK) + return ret; + + u4_top0 = u4_num_coeff; + u4_subblock_coded = (u4_num_coeff != 0); + INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded); + + /*------------------------------------------------------*/ + /* Residual 4x4 decoding: SubBlock 1 */ + /*------------------------------------------------------*/ + u4_idx++; + if(u1_tran_form8x8) + { + if(!u1_mb_field_decodingflag) + { + ps_dec->pu1_inv_scan = + (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[1]; + } + else + { + ps_dec->pu1_inv_scan = + (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[1]; + } + } + else + { + pi2_coeff_block += NUM_COEFFS_IN_4x4BLK; + } + u4_n = u4_num_coeff; + ret = pf_cavlc_parse4x4coeff[(u4_n > 7)](pi2_coeff_block, u4_isdc, + u4_n, ps_dec, &u4_num_coeff); + if(ret != OK) + return ret; + + u4_top1 = pu1_left_nnz[0] = u4_num_coeff; + u4_subblock_coded = (u4_num_coeff != 0); + INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded); + + /*------------------------------------------------------*/ + /* Residual 4x4 decoding: SubBlock 2 */ + /*------------------------------------------------------*/ + u4_idx += (u4_sub_block_strd - 1); + if(u1_tran_form8x8) + { + if(!u1_mb_field_decodingflag) + { + ps_dec->pu1_inv_scan = + (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[2]; + } + else + { + ps_dec->pu1_inv_scan = + (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[2]; + } + } + else + { + pi2_coeff_block += ((u4_sub_block_strd - 1) * NUM_COEFFS_IN_4x4BLK); + } + u4_n = (u4_top0 + pu1_left_nnz[1] + 1) >> 1; + ret = pf_cavlc_parse4x4coeff[(u4_n > 7)](pi2_coeff_block, u4_isdc, + u4_n, ps_dec, &u4_num_coeff); + if(ret != OK) + return ret; + + pu1_top_nnz[0] = u4_num_coeff; + u4_subblock_coded = (u4_num_coeff != 0); + INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded); + + /*------------------------------------------------------*/ + /* Residual 4x4 decoding: SubBlock 3 */ + /*------------------------------------------------------*/ + u4_idx++; + if(u1_tran_form8x8) + { + if(!u1_mb_field_decodingflag) + { + ps_dec->pu1_inv_scan = + (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[3]; + } + else + { + ps_dec->pu1_inv_scan = + (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[3]; + } + } + else + { + pi2_coeff_block += NUM_COEFFS_IN_4x4BLK; + } + u4_n = (u4_top1 + u4_num_coeff + 1) >> 1; + ret = pf_cavlc_parse4x4coeff[(u4_n > 7)](pi2_coeff_block, u4_isdc, + u4_n, ps_dec, &u4_num_coeff); + if(ret != OK) + return ret; + + pu1_top_nnz[1] = pu1_left_nnz[1] = u4_num_coeff; + u4_subblock_coded = (u4_num_coeff != 0); + INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded); + + ps_dec->pu1_inv_scan = puc_temp; + + return OK; +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_cavlc_parse_8x8block_top_available */ +/* */ +/* Description : This function does the residual parsing of 4 subblocks */ +/* in a 8x8 block when only top is available for block */ +/* */ +/* Inputs : pi2_coeff_block : pointer to residual block where */ +/* decoded and inverse scan coefficients are updated */ +/* */ +/* u4_sub_block_strd : indicates the number of sublocks */ +/* in a row. It is 4 for luma and 2 for chroma. */ +/* */ +/* u4_isdc : required to indicate 4x4 parse modules if the */ +/* current Mb is I_16x16/chroma DC coded. */ +/* */ +/* ps_dec : pointer to Decstruct (decoder context) */ +/* */ +/* pu1_top_nnz : top nnz pointer */ +/* */ +/* pu1_left_nnz : left nnz pointer */ +/* */ +/* Globals : No */ +/* Processing : Parsing for four subblocks in unrolled, top and left nnz */ +/* are updated on the fly. csbp is set in accordance to */ +/* decoded numcoeff for the subblock index in raster order */ +/* */ +/* Outputs : The updated residue buffer, nnzs and csbp current block */ +/* */ +/* Returns : Returns the coded sub block pattern csbp for the block */ +/* */ +/* Issues : <List any issues or problems with this function> */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 09 10 2008 Jay Draft */ +/* */ +/*****************************************************************************/ +WORD32 ih264d_cavlc_parse_8x8block_top_available(WORD16 *pi2_coeff_block, + UWORD32 u4_sub_block_strd, + UWORD32 u4_isdc, + dec_struct_t * ps_dec, + UWORD8 *pu1_top_nnz, + UWORD8 *pu1_left_nnz, + UWORD8 u1_tran_form8x8, + UWORD8 u1_mb_field_decodingflag, + UWORD32 *pu4_csbp) +{ + UWORD32 u4_num_coeff, u4_n, u4_subblock_coded; + UWORD32 u4_top0, u4_top1; + UWORD32 *pu4_dummy; + WORD32 (**pf_cavlc_parse4x4coeff)(WORD16 *pi2_coeff_block, + UWORD32 u4_isdc, + WORD32 u4_n, + struct _DecStruct *ps_dec, + UWORD32 *pu4_dummy) = + ps_dec->pf_cavlc_parse4x4coeff; + UWORD32 u4_idx = 0; + UWORD8 *puc_temp; + WORD32 ret; + + *pu4_csbp = 0; + puc_temp = ps_dec->pu1_inv_scan; + + /*------------------------------------------------------*/ + /* Residual 4x4 decoding: SubBlock 0 */ + /*------------------------------------------------------*/ + if(u1_tran_form8x8) + { + if(!u1_mb_field_decodingflag) + { + ps_dec->pu1_inv_scan = + (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[0]; + } + else + { + ps_dec->pu1_inv_scan = + (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[0]; + } + } + u4_n = pu1_top_nnz[0]; + ret = pf_cavlc_parse4x4coeff[(u4_n > 7)](pi2_coeff_block, u4_isdc, + u4_n, ps_dec, &u4_num_coeff); + if(ret != OK) + return ret; + + u4_top0 = u4_num_coeff; + u4_subblock_coded = (u4_num_coeff != 0); + INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded); + + /*------------------------------------------------------*/ + /* Residual 4x4 decoding: SubBlock 1 */ + /*------------------------------------------------------*/ + u4_idx++; + if(u1_tran_form8x8) + { + if(!u1_mb_field_decodingflag) + { + ps_dec->pu1_inv_scan = + (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[1]; + } + else + { + ps_dec->pu1_inv_scan = + (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[1]; + } + } + else + { + pi2_coeff_block += NUM_COEFFS_IN_4x4BLK; + } + u4_n = (pu1_top_nnz[1] + u4_num_coeff + 1) >> 1; + ret = pf_cavlc_parse4x4coeff[(u4_n > 7)](pi2_coeff_block, u4_isdc, + u4_n, ps_dec, &u4_num_coeff); + if(ret != OK) + return ret; + + u4_top1 = pu1_left_nnz[0] = u4_num_coeff; + u4_subblock_coded = (u4_num_coeff != 0); + INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded); + + /*------------------------------------------------------*/ + /* Residual 4x4 decoding: SubBlock 2 */ + /*------------------------------------------------------*/ + u4_idx += (u4_sub_block_strd - 1); + if(u1_tran_form8x8) + { + if(!u1_mb_field_decodingflag) + { + ps_dec->pu1_inv_scan = + (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[2]; + } + else + { + ps_dec->pu1_inv_scan = + (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[2]; + } + } + else + { + pi2_coeff_block += ((u4_sub_block_strd - 1) * NUM_COEFFS_IN_4x4BLK); + } + u4_n = u4_top0; + ret = pf_cavlc_parse4x4coeff[(u4_n > 7)](pi2_coeff_block, u4_isdc, + u4_n, ps_dec, &u4_num_coeff); + if(ret != OK) + return ret; + + pu1_top_nnz[0] = u4_num_coeff; + u4_subblock_coded = (u4_num_coeff != 0); + INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded); + + /*------------------------------------------------------*/ + /* Residual 4x4 decoding: SubBlock 3 */ + /*------------------------------------------------------*/ + u4_idx++; + if(u1_tran_form8x8) + { + if(!u1_mb_field_decodingflag) + { + ps_dec->pu1_inv_scan = + (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[3]; + } + else + { + ps_dec->pu1_inv_scan = + (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[3]; + } + } + else + { + pi2_coeff_block += NUM_COEFFS_IN_4x4BLK; + } + u4_n = (u4_top1 + u4_num_coeff + 1) >> 1; + ret = pf_cavlc_parse4x4coeff[(u4_n > 7)](pi2_coeff_block, u4_isdc, + u4_n, ps_dec, &u4_num_coeff); + if(ret != OK) + return ret; + + pu1_top_nnz[1] = pu1_left_nnz[1] = u4_num_coeff; + u4_subblock_coded = (u4_num_coeff != 0); + INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded); + + ps_dec->pu1_inv_scan = puc_temp; + + return OK; +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_cavlc_parse_8x8block_none_available */ +/* */ +/* Description : This function does the residual parsing of 4 subblocks */ +/* in a 8x8 block when none of the neigbours are available */ +/* */ +/* Inputs : pi2_coeff_block : pointer to residual block where */ +/* decoded and inverse scan coefficients are updated */ +/* */ +/* u4_sub_block_strd : indicates the number of sublocks */ +/* in a row. It is 4 for luma and 2 for chroma. */ +/* */ +/* u4_isdc : required to indicate 4x4 parse modules if the */ +/* current Mb is I_16x16/chroma DC coded. */ +/* */ +/* ps_dec : pointer to Decstruct (decoder context) */ +/* */ +/* pu1_top_nnz : top nnz pointer */ +/* */ +/* pu1_left_nnz : left nnz pointer */ +/* */ +/* Globals : No */ +/* Processing : Parsing for four subblocks in unrolled, top and left nnz */ +/* are updated on the fly. csbp is set in accordance to */ +/* decoded numcoeff for the subblock index in raster order */ +/* */ +/* Outputs : The updated residue buffer, nnzs and csbp current block */ +/* */ +/* Returns : Returns the coded sub block pattern csbp for the block */ +/* */ +/* Issues : <List any issues or problems with this function> */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 09 10 2008 Jay Draft */ +/* */ +/*****************************************************************************/ +WORD32 ih264d_cavlc_parse_8x8block_none_available(WORD16 *pi2_coeff_block, + UWORD32 u4_sub_block_strd, + UWORD32 u4_isdc, + dec_struct_t * ps_dec, + UWORD8 *pu1_top_nnz, + UWORD8 *pu1_left_nnz, + UWORD8 u1_tran_form8x8, + UWORD8 u1_mb_field_decodingflag, + UWORD32 *pu4_csbp) +{ + UWORD32 u4_num_coeff, u4_n, u4_subblock_coded; + UWORD32 u4_top0, u4_top1; + UWORD32 *pu4_dummy; + WORD32 (**pf_cavlc_parse4x4coeff)(WORD16 *pi2_coeff_block, + UWORD32 u4_isdc, + WORD32 u4_n, + struct _DecStruct *ps_dec, + UWORD32 *pu4_dummy) = + ps_dec->pf_cavlc_parse4x4coeff; + UWORD32 u4_idx = 0; + UWORD8 *puc_temp; + WORD32 ret; + + *pu4_csbp = 0; + puc_temp = ps_dec->pu1_inv_scan; + + /*------------------------------------------------------*/ + /* Residual 4x4 decoding: SubBlock 0 */ + /*------------------------------------------------------*/ + if(u1_tran_form8x8) + { + if(!u1_mb_field_decodingflag) + { + ps_dec->pu1_inv_scan = + (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[0]; + } + else + { + ps_dec->pu1_inv_scan = + (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[0]; + } + } + ret = pf_cavlc_parse4x4coeff[0](pi2_coeff_block, u4_isdc, 0, + ps_dec, &u4_num_coeff); + if(ret != OK) + return ret; + + u4_top0 = u4_num_coeff; + u4_subblock_coded = (u4_num_coeff != 0); + INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded); + + /*------------------------------------------------------*/ + /* Residual 4x4 decoding: SubBlock 1 */ + /*------------------------------------------------------*/ + u4_idx++; + if(u1_tran_form8x8) + { + if(!u1_mb_field_decodingflag) + { + ps_dec->pu1_inv_scan = + (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[1]; + } + else + { + ps_dec->pu1_inv_scan = + (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[1]; + } + } + else + { + pi2_coeff_block += NUM_COEFFS_IN_4x4BLK; + } + u4_n = u4_num_coeff; + ret = pf_cavlc_parse4x4coeff[(u4_n > 7)](pi2_coeff_block, u4_isdc, + u4_n, ps_dec, &u4_num_coeff); + if(ret != OK) + return ret; + + u4_top1 = pu1_left_nnz[0] = u4_num_coeff; + u4_subblock_coded = (u4_num_coeff != 0); + INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded); + + /*------------------------------------------------------*/ + /* Residual 4x4 decoding: SubBlock 2 */ + /*------------------------------------------------------*/ + u4_idx += (u4_sub_block_strd - 1); + if(u1_tran_form8x8) + { + if(!u1_mb_field_decodingflag) + { + ps_dec->pu1_inv_scan = + (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[2]; + } + else + { + ps_dec->pu1_inv_scan = + (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[2]; + } + } + else + { + pi2_coeff_block += ((u4_sub_block_strd - 1) * NUM_COEFFS_IN_4x4BLK); + } + u4_n = u4_top0; + ret = pf_cavlc_parse4x4coeff[(u4_n > 7)](pi2_coeff_block, u4_isdc, + u4_n, ps_dec, &u4_num_coeff); + if(ret != OK) + return ret; + + pu1_top_nnz[0] = u4_num_coeff; + u4_subblock_coded = (u4_num_coeff != 0); + INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded); + + /*------------------------------------------------------*/ + /* Residual 4x4 decoding: SubBlock 3 */ + /*------------------------------------------------------*/ + u4_idx++; + if(u1_tran_form8x8) + { + if(!u1_mb_field_decodingflag) + { + ps_dec->pu1_inv_scan = + (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[3]; + } + else + { + ps_dec->pu1_inv_scan = + (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[3]; + } + } + else + { + pi2_coeff_block += NUM_COEFFS_IN_4x4BLK; + } + u4_n = (u4_top1 + u4_num_coeff + 1) >> 1; + ret = pf_cavlc_parse4x4coeff[(u4_n > 7)](pi2_coeff_block, u4_isdc, + u4_n, ps_dec, &u4_num_coeff); + if(ret != OK) + return ret; + + pu1_top_nnz[1] = pu1_left_nnz[1] = u4_num_coeff; + u4_subblock_coded = (u4_num_coeff != 0); + INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded); + + ps_dec->pu1_inv_scan = puc_temp; + + return OK; +} + +/*! + ************************************************************************** + * \if Function name : ih264d_parse_residual4x4_cavlc \endif + * + * \brief + * This function parses CAVLC syntax of a Luma and Chroma AC Residuals. + * + * \return + * 0 on Success and Error code otherwise + ************************************************************************** + */ + +WORD32 ih264d_parse_residual4x4_cavlc(dec_struct_t * ps_dec, + dec_mb_info_t *ps_cur_mb_info, + UWORD8 u1_offset) +{ + UWORD8 u1_cbp = ps_cur_mb_info->u1_cbp; + UWORD16 ui16_csbp = 0; + UWORD32 u4_nbr_avl; + WORD16 *pi2_residual_buf; + + UWORD8 u1_is_top_mb_avail; + UWORD8 u1_is_left_mb_avail; + + UWORD8 *pu1_top_nnz = ps_cur_mb_info->ps_curmb->pu1_nnz_y; + UWORD8 *pu1_left_nnz = ps_dec->pu1_left_nnz_y; + WORD16 *pi2_coeff_block = NULL; + UWORD32 *pu4_dummy; + WORD32 ret; + + WORD32 (**pf_cavlc_parse_8x8block)(WORD16 *pi2_coeff_block, + UWORD32 u4_sub_block_strd, + UWORD32 u4_isdc, + struct _DecStruct *ps_dec, + UWORD8 *pu1_top_nnz, + UWORD8 *pu1_left_nnz, + UWORD8 u1_tran_form8x8, + UWORD8 u1_mb_field_decodingflag, + UWORD32 *pu4_dummy) = ps_dec->pf_cavlc_parse_8x8block; + + + { + UWORD8 uc_temp = ps_dec->u1_mb_ngbr_availablity; + u1_is_top_mb_avail = BOOLEAN(uc_temp & TOP_MB_AVAILABLE_MASK); + u1_is_left_mb_avail = BOOLEAN(uc_temp & LEFT_MB_AVAILABLE_MASK); + u4_nbr_avl = (u1_is_top_mb_avail << 1) | u1_is_left_mb_avail; + } + + ps_cur_mb_info->u1_qp_div6 = ps_dec->u1_qp_y_div6; + ps_cur_mb_info->u1_qp_rem6 = ps_dec->u1_qp_y_rem6; + ps_cur_mb_info->u1_qpc_div6 = ps_dec->u1_qp_u_div6; + ps_cur_mb_info->u1_qpc_rem6 = ps_dec->u1_qp_u_rem6; + ps_cur_mb_info->u1_qpcr_div6 = ps_dec->u1_qp_v_div6; + ps_cur_mb_info->u1_qpcr_rem6 = ps_dec->u1_qp_v_rem6; + + if(u1_cbp & 0xf) + { + pu1_top_nnz[0] = ps_cur_mb_info->ps_top_mb->pu1_nnz_y[0]; + pu1_top_nnz[1] = ps_cur_mb_info->ps_top_mb->pu1_nnz_y[1]; + pu1_top_nnz[2] = ps_cur_mb_info->ps_top_mb->pu1_nnz_y[2]; + pu1_top_nnz[3] = ps_cur_mb_info->ps_top_mb->pu1_nnz_y[3]; + + /*******************************************************************/ + /* Block 0 residual decoding, check cbp and proceed (subblock = 0) */ + /*******************************************************************/ + if(!(u1_cbp & 0x1)) + { + *(UWORD16 *)(pu1_top_nnz) = 0; + *(UWORD16 *)(pu1_left_nnz) = 0; + + } + else + { + UWORD32 u4_temp; + ret = pf_cavlc_parse_8x8block[u4_nbr_avl]( + pi2_coeff_block, 4, u1_offset, ps_dec, pu1_top_nnz, + pu1_left_nnz, ps_cur_mb_info->u1_tran_form8x8, + ps_cur_mb_info->u1_mb_field_decodingflag, &u4_temp); + if(ret != OK) + return ret; + ui16_csbp = u4_temp; + } + + /*******************************************************************/ + /* Block 1 residual decoding, check cbp and proceed (subblock = 2) */ + /*******************************************************************/ + if(ps_cur_mb_info->u1_tran_form8x8) + { + pi2_coeff_block += 64; + } + else + { + pi2_coeff_block += (2 * NUM_COEFFS_IN_4x4BLK); + } + + if(!(u1_cbp & 0x2)) + { + *(UWORD16 *)(pu1_top_nnz + 2) = 0; + *(UWORD16 *)(pu1_left_nnz) = 0; + } + else + { + UWORD32 u4_temp = (u4_nbr_avl | 0x1); + ret = pf_cavlc_parse_8x8block[u4_temp]( + pi2_coeff_block, 4, u1_offset, ps_dec, + (pu1_top_nnz + 2), pu1_left_nnz, + ps_cur_mb_info->u1_tran_form8x8, + ps_cur_mb_info->u1_mb_field_decodingflag, &u4_temp); + if(ret != OK) + return ret; + ui16_csbp |= (u4_temp << 2); + } + + /*******************************************************************/ + /* Block 2 residual decoding, check cbp and proceed (subblock = 8) */ + /*******************************************************************/ + if(ps_cur_mb_info->u1_tran_form8x8) + { + pi2_coeff_block += 64; + } + else + { + pi2_coeff_block += (6 * NUM_COEFFS_IN_4x4BLK); + } + + if(!(u1_cbp & 0x4)) + { + *(UWORD16 *)(pu1_top_nnz) = 0; + *(UWORD16 *)(pu1_left_nnz + 2) = 0; + } + else + { + UWORD32 u4_temp = (u4_nbr_avl | 0x2); + ret = pf_cavlc_parse_8x8block[u4_temp]( + pi2_coeff_block, 4, u1_offset, ps_dec, pu1_top_nnz, + (pu1_left_nnz + 2), ps_cur_mb_info->u1_tran_form8x8, + ps_cur_mb_info->u1_mb_field_decodingflag, &u4_temp); + if(ret != OK) + return ret; + ui16_csbp |= (u4_temp << 8); + } + + /*******************************************************************/ + /* Block 3 residual decoding, check cbp and proceed (subblock = 10)*/ + /*******************************************************************/ + if(ps_cur_mb_info->u1_tran_form8x8) + { + pi2_coeff_block += 64; + } + else + { + pi2_coeff_block += (2 * NUM_COEFFS_IN_4x4BLK); + } + + if(!(u1_cbp & 0x8)) + { + *(UWORD16 *)(pu1_top_nnz + 2) = 0; + *(UWORD16 *)(pu1_left_nnz + 2) = 0; + } + else + { + UWORD32 u4_temp; + ret = pf_cavlc_parse_8x8block[0x3]( + pi2_coeff_block, 4, u1_offset, ps_dec, + (pu1_top_nnz + 2), (pu1_left_nnz + 2), + ps_cur_mb_info->u1_tran_form8x8, + ps_cur_mb_info->u1_mb_field_decodingflag, &u4_temp); + if(ret != OK) + return ret; + ui16_csbp |= (u4_temp << 10); + } + } + else + { + *(UWORD32 *)(pu1_top_nnz) = 0; + *(UWORD32 *)(pu1_left_nnz) = 0; + } + + ps_cur_mb_info->u2_luma_csbp = ui16_csbp; + ps_cur_mb_info->ps_curmb->u2_luma_csbp = ui16_csbp; + + { + UWORD16 u2_chroma_csbp = 0; + ps_cur_mb_info->u2_chroma_csbp = 0; + pu1_top_nnz = ps_cur_mb_info->ps_curmb->pu1_nnz_uv; + pu1_left_nnz = ps_dec->pu1_left_nnz_uv; + + u1_cbp >>= 4; + /*--------------------------------------------------------------------*/ + /* if Chroma Component not present OR no ac values present */ + /* Set the values of N to zero */ + /*--------------------------------------------------------------------*/ + if(u1_cbp == CBPC_ALLZERO || u1_cbp == CBPC_ACZERO) + { + *(UWORD32 *)(pu1_top_nnz) = 0; + *(UWORD32 *)(pu1_left_nnz) = 0; + } + + if(u1_cbp == CBPC_ALLZERO) + { + return (0); + } + /*--------------------------------------------------------------------*/ + /* Decode Chroma DC values */ + /*--------------------------------------------------------------------*/ + { + WORD32 u4_scale_u; + WORD32 u4_scale_v; + WORD32 i4_mb_inter_inc; + u4_scale_u = ps_dec->pu2_quant_scale_u[0] << ps_dec->u1_qp_u_div6; + u4_scale_v = ps_dec->pu2_quant_scale_v[0] << ps_dec->u1_qp_v_div6; + i4_mb_inter_inc = (!((ps_cur_mb_info->ps_curmb->u1_mb_type == I_4x4_MB) + || (ps_cur_mb_info->ps_curmb->u1_mb_type == I_16x16_MB))) + * 3; + + if(ps_dec->s_high_profile.u1_scaling_present) + { + u4_scale_u *= + ps_dec->s_high_profile.i2_scalinglist4x4[i4_mb_inter_inc + + 1][0]; + u4_scale_v *= + ps_dec->s_high_profile.i2_scalinglist4x4[i4_mb_inter_inc + + 2][0]; + + } + else + { + u4_scale_u <<= 4; + u4_scale_v <<= 4; + } + + ih264d_cavlc_parse_chroma_dc(ps_cur_mb_info,pi2_coeff_block, ps_dec->ps_bitstrm, + u4_scale_u, u4_scale_v, + i4_mb_inter_inc); + } + + if(u1_cbp == CBPC_ACZERO) + return (0); + + pu1_top_nnz[0] = ps_cur_mb_info->ps_top_mb->pu1_nnz_uv[0]; + pu1_top_nnz[1] = ps_cur_mb_info->ps_top_mb->pu1_nnz_uv[1]; + pu1_top_nnz[2] = ps_cur_mb_info->ps_top_mb->pu1_nnz_uv[2]; + pu1_top_nnz[3] = ps_cur_mb_info->ps_top_mb->pu1_nnz_uv[3]; + /*--------------------------------------------------------------------*/ + /* Decode Chroma AC values */ + /*--------------------------------------------------------------------*/ + { + UWORD32 u4_temp; + /*****************************************************************/ + /* U Block residual decoding, check cbp and proceed (subblock=0)*/ + /*****************************************************************/ + ret = pf_cavlc_parse_8x8block[u4_nbr_avl]( + pi2_coeff_block, 2, 1, ps_dec, pu1_top_nnz, + pu1_left_nnz, 0, 0, &u4_temp); + if(ret != OK) + return ret; + u2_chroma_csbp = u4_temp; + + pi2_coeff_block += MB_CHROM_SIZE; + /*****************************************************************/ + /* V Block residual decoding, check cbp and proceed (subblock=1)*/ + /*****************************************************************/ + ret = pf_cavlc_parse_8x8block[u4_nbr_avl](pi2_coeff_block, 2, 1, + ps_dec, + (pu1_top_nnz + 2), + (pu1_left_nnz + 2), 0, + 0, &u4_temp); + if(ret != OK) + return ret; + u2_chroma_csbp |= (u4_temp << 4); + } + + ps_cur_mb_info->u2_chroma_csbp = u2_chroma_csbp; + } + return OK; +} diff --git a/decoder/ih264d_parse_cavlc.h b/decoder/ih264d_parse_cavlc.h new file mode 100755 index 0000000..06105a3 --- /dev/null +++ b/decoder/ih264d_parse_cavlc.h @@ -0,0 +1,165 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +#ifndef _IH264D_PARSE_CAVLC_H_ +#define _IH264D_PARSE_CAVLC_H_ +/*! + ************************************************************************** + * \file ih264d_parse_cavlc.h + * + * \brief + * Declaration of UVLC and CAVLC functions + * + * \date + * 18/12/2002 + * + * \author AI + ************************************************************************** + */ +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264d_bitstrm.h" +#include "ih264d_structs.h" +#include "ih264d_cabac.h" + +enum cavlcTableNum +{ + tableTotalZeroOffset, + tableTotalZero, + tableRunBefore, + codeGx, + chromTab, + offsetNumVlcTab +}; + +WORD32 ih264d_uvlc(dec_bit_stream_t *ps_bitstrm, + UWORD32 u4_range, + UWORD32 *pi_bitstrm_ofst, + UWORD8 u1_flag, + UWORD32 u4_bitstrm_ofst, + UWORD32 *pi_bitstrm_buf); + +UWORD32 ih264d_uev(UWORD32 *pu4_bitstrm_ofst, UWORD32 *pu4_bitstrm_buf); + +WORD32 ih264d_sev(UWORD32 *pu4_bitstrm_ofst, UWORD32 *pu4_bitstrm_buf); + +UWORD32 ih264d_tev_range1(UWORD32 *pu4_bitstrm_ofst, + UWORD32 *pu4_bitstrm_buf); + +UWORD8 RestOfResidualBlockCavlc(WORD16 *pi2_coeff_block, + UWORD32 u1_ofst_is_dc_max_coef_scale_fact, + UWORD32 u4_total_coeff_trail_one, + dec_bit_stream_t *ps_bitstrm, + UWORD8 *pu1_invscan); + +WORD32 ih264d_cavlc_4x4res_block_totalcoeff_1( UWORD32 u4_isdc, + UWORD32 u4_total_coeff_trail_one, + dec_bit_stream_t *ps_bitstrm); + +WORD32 ih264d_cavlc_4x4res_block_totalcoeff_2to10(UWORD32 u4_isdc, + UWORD32 u4_total_coeff_trail_one, + dec_bit_stream_t *ps_bitstrm); + +WORD32 ih264d_cavlc_4x4res_block_totalcoeff_11to16(UWORD32 u4_isdc, + UWORD32 u4_total_coeff_trail_one, + dec_bit_stream_t *ps_bitstrm); + +WORD32 ih264d_cavlc_parse4x4coeff_n0to7(WORD16 *pi2_coeff_block, + UWORD32 u4_isdc, + WORD32 u4_n, + dec_struct_t *ps_dec, + UWORD32 *pu4_total_coeff); + +WORD32 ih264d_cavlc_parse4x4coeff_n8(WORD16 *pi2_coeff_block, + UWORD32 u4_isdc, + WORD32 u4_n, + dec_struct_t *ps_dec, + UWORD32 *pu4_total_coeff); + +void ih264d_cavlc_parse_chroma_dc(dec_mb_info_t *ps_cur_mb_info, + WORD16 *pi2_coeff_block, + dec_bit_stream_t *ps_bitstrm, + UWORD32 u4_scale_u, + UWORD32 u4_scale_v, + WORD32 i4_mb_inter_inc); + +WORD32 ih264d_cavlc_parse_8x8block_none_available(WORD16 *pi2_coeff_block, + UWORD32 u4_sub_block_strd, + UWORD32 u4_isdc, + dec_struct_t * ps_dec, + UWORD8 *pu1_top_nnz, + UWORD8 *pu1_left_nnz, + UWORD8 u1_tran_form8x8, + UWORD8 u1_mb_field_decodingflag, + UWORD32 *pu4_csbp); + +WORD32 ih264d_cavlc_parse_8x8block_left_available(WORD16 *pi2_coeff_block, + UWORD32 u4_sub_block_strd, + UWORD32 u4_isdc, + dec_struct_t * ps_dec, + UWORD8 *pu1_top_nnz, + UWORD8 *pu1_left_nnz, + UWORD8 u1_tran_form8x8, + UWORD8 u1_mb_field_decodingflag, + UWORD32 *pu4_csbp); + +WORD32 ih264d_cavlc_parse_8x8block_top_available(WORD16 *pi2_coeff_block, + UWORD32 u4_sub_block_strd, + UWORD32 u4_isdc, + dec_struct_t * ps_dec, + UWORD8 *pu1_top_nnz, + UWORD8 *pu1_left_nnz, + UWORD8 u1_tran_form8x8, + UWORD8 u1_mb_field_decodingflag, + UWORD32 *pu4_csbp); + +WORD32 ih264d_cavlc_parse_8x8block_both_available(WORD16 *pi2_coeff_block, + UWORD32 u4_sub_block_strd, + UWORD32 u4_isdc, + dec_struct_t * ps_dec, + UWORD8 *pu1_top_nnz, + UWORD8 *pu1_left_nnz, + UWORD8 u1_tran_form8x8, + UWORD8 u1_mb_field_decodingflag, + UWORD32 *pu4_csbp); + +WORD8 ResidualBlockChromaDC(WORD16 *pi2_level, dec_bit_stream_t *ps_bitstrm); + +void ih264d_parse_pmb_ref_index_cavlc_range1(UWORD32 u4_num_part, + dec_bit_stream_t *ps_bitstrm, + WORD8 *pi1_ref_idx, + UWORD32 u4_num_ref_idx_active_minus1); + +WORD32 ih264d_parse_pmb_ref_index_cavlc(UWORD32 u4_num_part, + dec_bit_stream_t *ps_bitstrm, + WORD8 *pi1_ref_idx, + UWORD32 u4_num_ref_idx_active_minus1); + +void ih264d_parse_bmb_ref_index_cavlc_range1(UWORD32 u4_num_part, + dec_bit_stream_t *ps_bitstrm, + WORD8 *pi1_ref_idx, + UWORD32 u4_num_ref_idx_active_minus1); + +WORD32 ih264d_parse_bmb_ref_index_cavlc(UWORD32 u4_num_part, + dec_bit_stream_t *ps_bitstrm, + WORD8 *pi1_ref_idx, + UWORD32 u4_num_ref_idx_active_minus1); + +#endif /* _IH264D_PARSE_CAVLC_H_ */ diff --git a/decoder/ih264d_parse_headers.c b/decoder/ih264d_parse_headers.c new file mode 100755 index 0000000..9458d6b --- /dev/null +++ b/decoder/ih264d_parse_headers.c @@ -0,0 +1,1204 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*! + ************************************************************************** + * \file ih264d_parse_headers.c + * + * \brief + * Contains High level syntax[above slice] parsing routines + * + * \date + * 19/12/2002 + * + * \author AI + ************************************************************************** + */ +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264d_bitstrm.h" +#include "ih264d_structs.h" +#include "ih264d_parse_cavlc.h" +#include "ih264d_defs.h" +#include "ih264d_defs.h" +#include "ih264d_defs.h" +#include "ih264d_parse_slice.h" +#include "ih264d_tables.h" +#include "ih264d_utils.h" +#include "ih264d_nal.h" +#include "ih264d_deblocking.h" + +#include "ih264d_mem_request.h" +#include "ih264d_debug.h" +#include "ih264d_error_handler.h" +#include "ih264d_mb_utils.h" +#include "ih264d_sei.h" +#include "ih264d_vui.h" +#include "ih264d_thread_parse_decode.h" +#include "ih264d_thread_compute_bs.h" +#include "ih264d_quant_scaling.h" +#include "ih264d_defs.h" +#include "ivd.h" + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_parse_slice_partition */ +/* */ +/* Description : This function is intended to parse and decode slice part */ +/* itions. Currently it's not implemented. Decoder will */ +/* print a message, skips this NAL and continues */ +/* Inputs : ps_dec Decoder parameters */ +/* ps_bitstrm Bitstream */ +/* Globals : None */ +/* Processing : This functionality needs to be implemented */ +/* Outputs : None */ +/* Returns : None */ +/* */ +/* Issues : Not implemented */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 06 05 2002 NS Draft */ +/* */ +/*****************************************************************************/ + +WORD32 ih264d_parse_slice_partition(dec_struct_t * ps_dec, + dec_bit_stream_t * ps_bitstrm) +{ + H264_DEC_DEBUG_PRINT("\nSlice partition not supported"); + UNUSED(ps_dec); + UNUSED(ps_bitstrm); + return (0); +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_parse_sei */ +/* */ +/* Description : This function is intended to parse and decode SEI */ +/* Currently it's not implemented. Decoder will print a */ +/* message, skips this NAL and continues */ +/* Inputs : ps_dec Decoder parameters */ +/* ps_bitstrm Bitstream */ +/* Globals : None */ +/* Processing : This functionality needs to be implemented */ +/* Outputs : None */ +/* Returns : None */ +/* */ +/* Issues : Not implemented */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 06 05 2002 NS Draft */ +/* */ +/*****************************************************************************/ +WORD32 ih264d_parse_sei(dec_struct_t * ps_dec, dec_bit_stream_t * ps_bitstrm) +{ + UNUSED(ps_dec); + UNUSED(ps_bitstrm); + return (0); +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_parse_filler_data */ +/* */ +/* Description : This function is intended to parse and decode filler */ +/* data NAL. Currently it's not implemented. Decoder will */ +/* print a message, skips this NAL and continues */ +/* Inputs : ps_dec Decoder parameters */ +/* ps_bitstrm Bitstream */ +/* Globals : None */ +/* Processing : This functionality needs to be implemented */ +/* Outputs : None */ +/* Returns : None */ +/* */ +/* Issues : Not implemented */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 06 05 2002 NS Draft */ +/* */ +/*****************************************************************************/ +WORD32 ih264d_parse_filler_data(dec_struct_t * ps_dec, + dec_bit_stream_t * ps_bitstrm) +{ + UNUSED(ps_dec); + UNUSED(ps_bitstrm); + return (0); +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_parse_end_of_stream */ +/* */ +/* Description : This function is intended to parse and decode end of */ +/* sequence. Currently it's not implemented. Decoder will */ +/* print a message, skips this NAL and continues */ +/* Inputs : ps_dec Decoder parameters */ +/* Globals : None */ +/* Processing : This functionality needs to be implemented */ +/* Outputs : None */ +/* Returns : None */ +/* */ +/* Issues : Not implemented */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 06 05 2002 NS Draft */ +/* */ +/*****************************************************************************/ +void ih264d_parse_end_of_stream(dec_struct_t * ps_dec) +{ + UNUSED(ps_dec); + return; +} + +/*! + ************************************************************************** + * \if Function name : ih264d_parse_pps \endif + * + * \brief + * Decodes Picture Parameter set + * + * \return + * 0 on Success and Error code otherwise + ************************************************************************** + */ +WORD32 ih264d_parse_pps(dec_struct_t * ps_dec, dec_bit_stream_t * ps_bitstrm) +{ + UWORD8 uc_temp; + dec_seq_params_t * ps_sps = NULL; + dec_pic_params_t * ps_pps = NULL; + UWORD32 *pu4_bitstrm_buf = ps_dec->ps_bitstrm->pu4_buffer; + UWORD32 *pu4_bitstrm_ofst = &ps_dec->ps_bitstrm->u4_ofst; + + /* Variables used for error resilience checks */ + UWORD32 u4_temp; + WORD32 i_temp; + + /* For High profile related syntax elements */ + UWORD8 u1_more_data_flag; + WORD32 i4_i; + + /*--------------------------------------------------------------------*/ + /* Decode pic_parameter_set_id and find corresponding pic params */ + /*--------------------------------------------------------------------*/ + u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf); + if(u4_temp & MASK_ERR_PIC_SET_ID) + return ERROR_INV_SPS_PPS_T; + ps_pps = ps_dec->pv_scratch_sps_pps; + *ps_pps = ps_dec->ps_pps[u4_temp]; + ps_pps->u1_pic_parameter_set_id = (WORD8)u4_temp; + COPYTHECONTEXT("PPS: pic_parameter_set_id",ps_pps->u1_pic_parameter_set_id); + + /************************************************/ + /* initilization of High profile syntax element */ + /************************************************/ + ps_pps->i4_transform_8x8_mode_flag = 0; + ps_pps->i4_pic_scaling_matrix_present_flag = 0; + + /*--------------------------------------------------------------------*/ + /* Decode seq_parameter_set_id and map it to a seq_parameter_set */ + /*--------------------------------------------------------------------*/ + u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf); + if(u4_temp & MASK_ERR_SEQ_SET_ID) + return ERROR_INV_SPS_PPS_T; + COPYTHECONTEXT("PPS: seq_parameter_set_id",u4_temp); + ps_sps = &ps_dec->ps_sps[u4_temp]; + ps_pps->ps_sps = ps_sps; + + /*--------------------------------------------------------------------*/ + /* Decode entropy_coding_mode */ + /*--------------------------------------------------------------------*/ + ps_pps->u1_entropy_coding_mode = ih264d_get_bit_h264(ps_bitstrm); + COPYTHECONTEXT("PPS: entropy_coding_mode_flag",ps_pps->u1_entropy_coding_mode); + + ps_pps->u1_pic_order_present_flag = ih264d_get_bit_h264(ps_bitstrm); + COPYTHECONTEXT("PPS: pic_order_present_flag",ps_pps->u1_pic_order_present_flag); + + /*--------------------------------------------------------------------*/ + /* Decode num_slice_groups_minus1 */ + /*--------------------------------------------------------------------*/ + u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf) + 1; + if(u4_temp != 1) + { + UWORD32 i4_error_code; + i4_error_code = ERROR_FEATURE_UNAVAIL; + return i4_error_code; + } + ps_pps->u1_num_slice_groups = u4_temp; + COPYTHECONTEXT("PPS: num_slice_groups_minus1",ps_pps->u1_num_slice_groups -1); + + /*--------------------------------------------------------------------*/ + /* Other parameter set values */ + /*--------------------------------------------------------------------*/ + u4_temp = 1 + ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf); + if(u4_temp > H264_MAX_REF_IDX) + return ERROR_REF_IDX; + ps_pps->u1_num_ref_idx_lx_active[0] = u4_temp; + COPYTHECONTEXT("PPS: num_ref_idx_l0_active_minus1", + ps_pps->u1_num_ref_idx_lx_active[0] - 1); + + u4_temp = 1 + ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf); + if(u4_temp > H264_MAX_REF_IDX) + return ERROR_REF_IDX; + ps_pps->u1_num_ref_idx_lx_active[1] = u4_temp; + COPYTHECONTEXT("PPS: num_ref_idx_l1_active_minus1", + ps_pps->u1_num_ref_idx_lx_active[1] - 1); + + ps_pps->u1_wted_pred_flag = ih264d_get_bit_h264(ps_bitstrm); + COPYTHECONTEXT("PPS: weighted prediction u4_flag",ps_pps->u1_wted_pred_flag); + uc_temp = ih264d_get_bits_h264(ps_bitstrm, 2); + COPYTHECONTEXT("PPS: weighted_bipred_idc",uc_temp); + ps_pps->u1_wted_bipred_idc = uc_temp; + + if(ps_pps->u1_wted_bipred_idc > MAX_WEIGHT_BIPRED_IDC) + return ERROR_INV_SPS_PPS_T; + + i_temp = 26 + ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf); + + if((i_temp < 0) || (i_temp > 51)) + return ERROR_INV_RANGE_QP_T; + + ps_pps->u1_pic_init_qp = i_temp; + COPYTHECONTEXT("PPS: pic_init_qp_minus26",ps_pps->u1_pic_init_qp - 26); + + i_temp = 26 + ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf); + + if((i_temp < 0) || (i_temp > 51)) + return ERROR_INV_RANGE_QP_T; + + ps_pps->u1_pic_init_qs = i_temp; + COPYTHECONTEXT("PPS: pic_init_qs_minus26",ps_pps->u1_pic_init_qs - 26); + + i_temp = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf); + if((i_temp < -12) || (i_temp > 12)) + return ERROR_INV_RANGE_QP_T; + ps_pps->i1_chroma_qp_index_offset = i_temp; + COPYTHECONTEXT("PPS: chroma_qp_index_offset",ps_pps->i1_chroma_qp_index_offset); + + /***************************************************************************/ + /* initialize second_chroma_qp_index_offset to i1_chroma_qp_index_offset if */ + /* second_chroma_qp_index_offset is not present in bit-ps_bitstrm */ + /***************************************************************************/ + ps_pps->i1_second_chroma_qp_index_offset = + ps_pps->i1_chroma_qp_index_offset; + + ps_pps->u1_deblocking_filter_parameters_present_flag = ih264d_get_bit_h264( + ps_bitstrm); + COPYTHECONTEXT("PPS: deblocking_filter_control_present_flag", + ps_pps->u1_deblocking_filter_parameters_present_flag); + ps_pps->u1_constrained_intra_pred_flag = ih264d_get_bit_h264(ps_bitstrm); + COPYTHECONTEXT("PPS: constrained_intra_pred_flag", + ps_pps->u1_constrained_intra_pred_flag); + ps_pps->u1_redundant_pic_cnt_present_flag = ih264d_get_bit_h264(ps_bitstrm); + COPYTHECONTEXT("PPS: redundant_pic_cnt_present_flag", + ps_pps->u1_redundant_pic_cnt_present_flag); + + /* High profile related syntax elements */ + u1_more_data_flag = MORE_RBSP_DATA(ps_bitstrm); + if(u1_more_data_flag && (ps_pps->ps_sps->u1_profile_idc == HIGH_PROFILE_IDC)) + { + /* read transform_8x8_mode_flag */ + ps_pps->i4_transform_8x8_mode_flag = (WORD32)ih264d_get_bit_h264( + ps_bitstrm); + + /* read pic_scaling_matrix_present_flag */ + ps_pps->i4_pic_scaling_matrix_present_flag = + (WORD32)ih264d_get_bit_h264(ps_bitstrm); + + if(ps_pps->i4_pic_scaling_matrix_present_flag) + { + /* read the scaling matrices */ + for(i4_i = 0; + i4_i + < (6 + + (ps_pps->i4_transform_8x8_mode_flag + << 1)); + i4_i++) + { + ps_pps->u1_pic_scaling_list_present_flag[i4_i] = + ih264d_get_bit_h264(ps_bitstrm); + + if(ps_pps->u1_pic_scaling_list_present_flag[i4_i]) + { + if(i4_i < 6) + { + ih264d_scaling_list( + ps_pps->i2_pic_scalinglist4x4[i4_i], + 16, + &ps_pps->u1_pic_use_default_scaling_matrix_flag[i4_i], + ps_bitstrm); + } + else + { + ih264d_scaling_list( + ps_pps->i2_pic_scalinglist8x8[i4_i - 6], + 64, + &ps_pps->u1_pic_use_default_scaling_matrix_flag[i4_i], + ps_bitstrm); + } + } + } + } + + /* read second_chroma_qp_index_offset syntax element */ + ps_pps->i1_second_chroma_qp_index_offset = ih264d_sev( + pu4_bitstrm_ofst, pu4_bitstrm_buf); + + if((ps_pps->i1_second_chroma_qp_index_offset + 12) > 24) + return ERROR_INV_RANGE_QP_T; + } + + ps_pps->u1_is_valid = TRUE; + ps_dec->ps_pps[ps_pps->u1_pic_parameter_set_id] = *ps_pps; + return OK; +} + +/*! + ************************************************************************** + * \if Function name : ih264d_parse_sps \endif + * + * \brief + * Decodes Sequence parameter set from the bitstream + * + * \return + * 0 on Success and Error code otherwise + ************************************************************************** + */ +UWORD32 ih264d_correct_level_idc(UWORD32 u4_level_idc, UWORD32 u4_total_mbs) +{ + UWORD32 u4_max_mbs_allowed; + + switch(u4_level_idc) + { + case H264_LEVEL_1_0: + u4_max_mbs_allowed = MAX_MBS_LEVEL_10; + break; + case H264_LEVEL_1_1: + u4_max_mbs_allowed = MAX_MBS_LEVEL_11; + break; + case H264_LEVEL_1_2: + u4_max_mbs_allowed = MAX_MBS_LEVEL_12; + break; + case H264_LEVEL_1_3: + u4_max_mbs_allowed = MAX_MBS_LEVEL_13; + break; + case H264_LEVEL_2_0: + u4_max_mbs_allowed = MAX_MBS_LEVEL_20; + break; + case H264_LEVEL_2_1: + u4_max_mbs_allowed = MAX_MBS_LEVEL_21; + break; + case H264_LEVEL_2_2: + u4_max_mbs_allowed = MAX_MBS_LEVEL_22; + break; + case H264_LEVEL_3_0: + u4_max_mbs_allowed = MAX_MBS_LEVEL_30; + break; + case H264_LEVEL_3_1: + u4_max_mbs_allowed = MAX_MBS_LEVEL_31; + break; + case H264_LEVEL_3_2: + u4_max_mbs_allowed = MAX_MBS_LEVEL_32; + break; + case H264_LEVEL_4_0: + u4_max_mbs_allowed = MAX_MBS_LEVEL_40; + break; + case H264_LEVEL_4_1: + u4_max_mbs_allowed = MAX_MBS_LEVEL_41; + break; + case H264_LEVEL_4_2: + u4_max_mbs_allowed = MAX_MBS_LEVEL_42; + break; + case H264_LEVEL_5_0: + u4_max_mbs_allowed = MAX_MBS_LEVEL_50; + break; + case H264_LEVEL_5_1: + default: + u4_max_mbs_allowed = MAX_MBS_LEVEL_51; + break; + + } + + /*correct of the level is incorrect*/ + if(u4_total_mbs > u4_max_mbs_allowed) + { + if(u4_total_mbs > MAX_MBS_LEVEL_50) + u4_level_idc = H264_LEVEL_5_1; + else if(u4_total_mbs > MAX_MBS_LEVEL_42) + u4_level_idc = H264_LEVEL_5_0; + else if(u4_total_mbs > MAX_MBS_LEVEL_41) + u4_level_idc = H264_LEVEL_4_2; + else if(u4_total_mbs > MAX_MBS_LEVEL_40) + u4_level_idc = H264_LEVEL_4_1; + else if(u4_total_mbs > MAX_MBS_LEVEL_32) + u4_level_idc = H264_LEVEL_4_0; + else if(u4_total_mbs > MAX_MBS_LEVEL_31) + u4_level_idc = H264_LEVEL_3_2; + else if(u4_total_mbs > MAX_MBS_LEVEL_30) + u4_level_idc = H264_LEVEL_3_1; + else if(u4_total_mbs > MAX_MBS_LEVEL_21) + u4_level_idc = H264_LEVEL_3_0; + else if(u4_total_mbs > MAX_MBS_LEVEL_20) + u4_level_idc = H264_LEVEL_2_1; + else if(u4_total_mbs > MAX_MBS_LEVEL_10) + u4_level_idc = H264_LEVEL_2_0; + } + + return (u4_level_idc); + +} +WORD32 ih264d_parse_sps(dec_struct_t *ps_dec, dec_bit_stream_t *ps_bitstrm) +{ + UWORD8 i; + dec_seq_params_t *ps_seq = NULL; + UWORD8 u1_profile_idc, u1_level_idc, u1_seq_parameter_set_id; + UWORD16 i2_max_frm_num; + UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer; + UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst; + UWORD8 u1_frm, uc_constraint_set0_flag, uc_constraint_set1_flag; + + UWORD32 u4_temp; + WORD32 pic_height_in_map_units_minus1 = 0; + UWORD32 u2_pic_wd = 0; + UWORD32 u2_pic_ht = 0; + UWORD32 u2_frm_wd_y = 0; + UWORD32 u2_frm_ht_y = 0; + UWORD32 u2_frm_wd_uv = 0; + UWORD32 u2_frm_ht_uv = 0; + UWORD32 u2_crop_offset_y = 0; + UWORD32 u2_crop_offset_uv = 0; + WORD32 ret; + + /* High profile related syntax element */ + WORD32 i4_i; + /* G050 */ + UWORD8 u1_frame_cropping_flag, u1_frame_cropping_rect_left_ofst, + u1_frame_cropping_rect_right_ofst, + u1_frame_cropping_rect_top_ofst, + u1_frame_cropping_rect_bottom_ofst; + /* G050 */ + /*--------------------------------------------------------------------*/ + /* Decode seq_parameter_set_id and profile and level values */ + /*--------------------------------------------------------------------*/ + SWITCHONTRACE; + u1_profile_idc = ih264d_get_bits_h264(ps_bitstrm, 8); + COPYTHECONTEXT("SPS: profile_idc",u1_profile_idc); + + /* G050 */ + uc_constraint_set0_flag = ih264d_get_bit_h264(ps_bitstrm); + uc_constraint_set1_flag = ih264d_get_bit_h264(ps_bitstrm); + ih264d_get_bit_h264(ps_bitstrm); + + /*****************************************************/ + /* Read 5 bits for uc_constraint_set3_flag (1 bit) */ + /* and reserved_zero_4bits (4 bits) - Sushant */ + /*****************************************************/ + ih264d_get_bits_h264(ps_bitstrm, 5); + /* G050 */ + + /* Check whether particular profile is suported or not */ + /* Check whether particular profile is suported or not */ + if((u1_profile_idc != MAIN_PROFILE_IDC) && + + (u1_profile_idc != BASE_PROFILE_IDC) && + + (u1_profile_idc != HIGH_PROFILE_IDC) + + ) + { + + if((uc_constraint_set1_flag != 1) && (uc_constraint_set0_flag != 1)) + { + if(NULL != ps_dec) + { + UWORD32 i4_error_code; + i4_error_code = ERROR_FEATURE_UNAVAIL; + return i4_error_code; + } + else + { + return (ERROR_FEATURE_UNAVAIL); + } + } + } + + u1_level_idc = ih264d_get_bits_h264(ps_bitstrm, 8); + + /* + if(ps_dec->u4_level_at_init < u1_level_idc) + { + UWORD32 i4_error_code; + H264_DEC_DEBUG_PRINT("\nstream has the level more than the one which is set during init\n"); + i4_error_code = ERROR_ACTUAL_LEVEL_GREATER_THAN_INIT ; + return i4_error_code; + * Here instead of flagging the error, we could have ignored this error + * and went ahead for further decoding, but we are not doing + * so because, at least one header should be healthy to do the + * decoding, and moreover, it may help to avoid the crashes in the erroneous + * streams. + * + + } + */ + COPYTHECONTEXT("SPS: u4_level_idc",u1_level_idc); + + u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf); + if(u4_temp & MASK_ERR_SEQ_SET_ID) + return ERROR_INV_SPS_PPS_T; + u1_seq_parameter_set_id = u4_temp; + COPYTHECONTEXT("SPS: seq_parameter_set_id", + u1_seq_parameter_set_id); + + /*--------------------------------------------------------------------*/ + /* Find an seq param entry in seqparam array of decStruct */ + /*--------------------------------------------------------------------*/ + + ps_seq = ps_dec->pv_scratch_sps_pps; + *ps_seq = ps_dec->ps_sps[u1_seq_parameter_set_id]; + ps_seq->u1_profile_idc = u1_profile_idc; + ps_seq->u1_level_idc = u1_level_idc; + ps_seq->u1_seq_parameter_set_id = u1_seq_parameter_set_id; + + /*******************************************************************/ + /* Initializations for high profile - Sushant */ + /*******************************************************************/ + ps_seq->i4_chroma_format_idc = 1; + ps_seq->i4_bit_depth_luma_minus8 = 0; + ps_seq->i4_bit_depth_chroma_minus8 = 0; + ps_seq->i4_qpprime_y_zero_transform_bypass_flag = 0; + ps_seq->i4_seq_scaling_matrix_present_flag = 0; + if(u1_profile_idc == HIGH_PROFILE_IDC) + { + + /* reading chroma_format_idc */ + ps_seq->i4_chroma_format_idc = ih264d_uev(pu4_bitstrm_ofst, + pu4_bitstrm_buf); + + /* Monochrome is not supported */ + if(ps_seq->i4_chroma_format_idc != 1) + { + return ERROR_INV_SPS_PPS_T; + } + + /* reading bit_depth_luma_minus8 */ + ps_seq->i4_bit_depth_luma_minus8 = ih264d_uev(pu4_bitstrm_ofst, + pu4_bitstrm_buf); + + if(ps_seq->i4_bit_depth_luma_minus8 != 0) + { + return ERROR_INV_SPS_PPS_T; + } + + /* reading bit_depth_chroma_minus8 */ + ps_seq->i4_bit_depth_chroma_minus8 = ih264d_uev(pu4_bitstrm_ofst, + pu4_bitstrm_buf); + + if(ps_seq->i4_bit_depth_chroma_minus8 != 0) + { + return ERROR_INV_SPS_PPS_T; + } + + /* reading qpprime_y_zero_transform_bypass_flag */ + ps_seq->i4_qpprime_y_zero_transform_bypass_flag = + (WORD32)ih264d_get_bit_h264(ps_bitstrm); + + if(ps_seq->i4_qpprime_y_zero_transform_bypass_flag != 0) + { + return ERROR_INV_SPS_PPS_T; + } + + /* reading seq_scaling_matrix_present_flag */ + ps_seq->i4_seq_scaling_matrix_present_flag = + (WORD32)ih264d_get_bit_h264(ps_bitstrm); + + if(ps_seq->i4_seq_scaling_matrix_present_flag) + { + for(i4_i = 0; i4_i < 8; i4_i++) + { + ps_seq->u1_seq_scaling_list_present_flag[i4_i] = + ih264d_get_bit_h264(ps_bitstrm); + + /* initialize u1_use_default_scaling_matrix_flag[i4_i] to zero */ + /* before calling scaling list */ + ps_seq->u1_use_default_scaling_matrix_flag[i4_i] = 0; + + if(ps_seq->u1_seq_scaling_list_present_flag[i4_i]) + { + if(i4_i < 6) + { + ih264d_scaling_list( + ps_seq->i2_scalinglist4x4[i4_i], + 16, + &ps_seq->u1_use_default_scaling_matrix_flag[i4_i], + ps_bitstrm); + } + else + { + ih264d_scaling_list( + ps_seq->i2_scalinglist8x8[i4_i - 6], + 64, + &ps_seq->u1_use_default_scaling_matrix_flag[i4_i], + ps_bitstrm); + } + } + } + } + } + /*--------------------------------------------------------------------*/ + /* Decode MaxFrameNum */ + /*--------------------------------------------------------------------*/ + u4_temp = 4 + ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf); + if(u4_temp > MAX_BITS_IN_FRAME_NUM) + { + return ERROR_INV_SPS_PPS_T; + } + ps_seq->u1_bits_in_frm_num = u4_temp; + COPYTHECONTEXT("SPS: log2_max_frame_num_minus4", + (ps_seq->u1_bits_in_frm_num - 4)); + + i2_max_frm_num = (1 << (ps_seq->u1_bits_in_frm_num)); + ps_seq->u2_u4_max_pic_num_minus1 = i2_max_frm_num - 1; + /*--------------------------------------------------------------------*/ + /* Decode picture order count and related values */ + /*--------------------------------------------------------------------*/ + u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf); + + if(u4_temp > MAX_PIC_ORDER_CNT_TYPE) + { + return ERROR_INV_POC_TYPE_T; + } + ps_seq->u1_pic_order_cnt_type = u4_temp; + COPYTHECONTEXT("SPS: pic_order_cnt_type",ps_seq->u1_pic_order_cnt_type); + + ps_seq->u1_num_ref_frames_in_pic_order_cnt_cycle = 1; + if(ps_seq->u1_pic_order_cnt_type == 0) + { + u4_temp = 4 + ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf); + if(u4_temp > MAX_BITS_IN_POC_LSB) + { + return ERROR_INV_SPS_PPS_T; + } + ps_seq->u1_log2_max_pic_order_cnt_lsb_minus = u4_temp; + ps_seq->i4_max_pic_order_cntLsb = (1 << u4_temp); + COPYTHECONTEXT("SPS: log2_max_pic_order_cnt_lsb_minus4",(u4_temp - 4)); + } + else if(ps_seq->u1_pic_order_cnt_type == 1) + { + ps_seq->u1_delta_pic_order_always_zero_flag = ih264d_get_bit_h264( + ps_bitstrm); + COPYTHECONTEXT("SPS: delta_pic_order_always_zero_flag", + ps_seq->u1_delta_pic_order_always_zero_flag); + + ps_seq->i4_ofst_for_non_ref_pic = ih264d_sev(pu4_bitstrm_ofst, + pu4_bitstrm_buf); + COPYTHECONTEXT("SPS: offset_for_non_ref_pic", + ps_seq->i4_ofst_for_non_ref_pic); + + ps_seq->i4_ofst_for_top_to_bottom_field = ih264d_sev( + pu4_bitstrm_ofst, pu4_bitstrm_buf); + COPYTHECONTEXT("SPS: offset_for_top_to_bottom_field", + ps_seq->i4_ofst_for_top_to_bottom_field); + + u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf); + if(u4_temp > 255) + return ERROR_INV_SPS_PPS_T; + ps_seq->u1_num_ref_frames_in_pic_order_cnt_cycle = u4_temp; + COPYTHECONTEXT("SPS: num_ref_frames_in_pic_order_cnt_cycle", + ps_seq->u1_num_ref_frames_in_pic_order_cnt_cycle); + + for(i = 0; i < ps_seq->u1_num_ref_frames_in_pic_order_cnt_cycle; i++) + { + ps_seq->i4_ofst_for_ref_frame[i] = ih264d_sev( + pu4_bitstrm_ofst, pu4_bitstrm_buf); + COPYTHECONTEXT("SPS: offset_for_ref_frame", + ps_seq->i4_ofst_for_ref_frame[i]); + } + } + + u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf); + + if((u4_temp > H264_MAX_REF_PICS)) + { + return ERROR_NUM_REF; + } + ps_seq->u1_num_ref_frames = u4_temp; + COPYTHECONTEXT("SPS: num_ref_frames",ps_seq->u1_num_ref_frames); + + ps_seq->u1_gaps_in_frame_num_value_allowed_flag = ih264d_get_bit_h264( + ps_bitstrm); + COPYTHECONTEXT("SPS: gaps_in_frame_num_value_allowed_flag", + ps_seq->u1_gaps_in_frame_num_value_allowed_flag); + + /*--------------------------------------------------------------------*/ + /* Decode FrameWidth and FrameHeight and related values */ + /*--------------------------------------------------------------------*/ + ps_seq->u2_frm_wd_in_mbs = 1 + + ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf); + COPYTHECONTEXT("SPS: pic_width_in_mbs_minus1", + ps_seq->u2_frm_wd_in_mbs - 1); + u2_pic_wd = (ps_seq->u2_frm_wd_in_mbs << 4); + + pic_height_in_map_units_minus1 = ih264d_uev(pu4_bitstrm_ofst, + pu4_bitstrm_buf); + ps_seq->u2_frm_ht_in_mbs = 1 + pic_height_in_map_units_minus1; + + u2_pic_ht = (ps_seq->u2_frm_ht_in_mbs << 4); + + /*--------------------------------------------------------------------*/ + /* Get the value of MaxMbAddress and Number of bits needed for it */ + /*--------------------------------------------------------------------*/ + ps_seq->u2_max_mb_addr = (ps_seq->u2_frm_wd_in_mbs + * ps_seq->u2_frm_ht_in_mbs) - 1; + + ps_seq->u2_total_num_of_mbs = ps_seq->u2_max_mb_addr + 1; + + ps_seq->u1_level_idc = ih264d_correct_level_idc( + u1_level_idc, ps_seq->u2_total_num_of_mbs); + + u1_frm = ih264d_get_bit_h264(ps_bitstrm); + ps_seq->u1_frame_mbs_only_flag = u1_frm; + + COPYTHECONTEXT("SPS: frame_mbs_only_flag", u1_frm); + + if(!u1_frm) + { + u2_pic_ht <<= 1; + ps_seq->u1_mb_aff_flag = ih264d_get_bit_h264(ps_bitstrm); + COPYTHECONTEXT("SPS: mb_adaptive_frame_field_flag", + ps_seq->u1_mb_aff_flag); + + } + else + ps_seq->u1_mb_aff_flag = 0; + + { + WORD32 frame_height_in_mbs = (2 - ps_seq->u1_frame_mbs_only_flag) + * (pic_height_in_map_units_minus1 + 1); + UWORD32 wdth = (ps_seq->u2_frm_wd_in_mbs) << 4; + UWORD32 hght = (frame_height_in_mbs) << 4; + + if((u2_pic_wd < H264_MIN_FRAME_WIDTH) + || (u2_pic_wd > ps_dec->u4_width_at_init)) + { + ivd_video_decode_op_t *ps_out; + /*set width and height in decode output structure*/ + ps_out = (ivd_video_decode_op_t *)ps_dec->pv_dec_out; + ps_out->u4_pic_wd = u2_pic_wd; + ps_out->u4_pic_ht = u2_pic_ht; + + return IVD_STREAM_WIDTH_HEIGHT_NOT_SUPPORTED; + } + + if((u2_pic_ht < H264_MIN_FRAME_HEIGHT) + || (((0 != ps_seq->u1_frame_mbs_only_flag) + && (u2_pic_ht * u2_pic_wd + > ps_dec->u4_height_at_init + * ps_dec->u4_width_at_init)) + || ((0 == ps_seq->u1_frame_mbs_only_flag) + && (ALIGN32(u2_pic_ht) + * u2_pic_wd + > ALIGN32(ps_dec->u4_height_at_init) + * ps_dec->u4_width_at_init)))) + { + ivd_video_decode_op_t *ps_out; + /*set width and height in decode output structure*/ + ps_out = (ivd_video_decode_op_t *)ps_dec->pv_dec_out; + ps_out->u4_pic_wd = u2_pic_wd; + ps_out->u4_pic_ht = u2_pic_ht; + + return IVD_STREAM_WIDTH_HEIGHT_NOT_SUPPORTED; + } + + + + + } + + ps_seq->u1_direct_8x8_inference_flag = ih264d_get_bit_h264(ps_bitstrm); + + COPYTHECONTEXT("SPS: direct_8x8_inference_flag", + ps_seq->u1_direct_8x8_inference_flag); + + /* G050 */ + u1_frame_cropping_flag = ih264d_get_bit_h264(ps_bitstrm); + COPYTHECONTEXT("SPS: frame_cropping_flag",u1_frame_cropping_flag); + + if(u1_frame_cropping_flag) + { + u1_frame_cropping_rect_left_ofst = ih264d_uev(pu4_bitstrm_ofst, + pu4_bitstrm_buf); + COPYTHECONTEXT("SPS: frame_cropping_rect_left_offset", + u1_frame_cropping_rect_left_ofst); + u1_frame_cropping_rect_right_ofst = ih264d_uev(pu4_bitstrm_ofst, + pu4_bitstrm_buf); + COPYTHECONTEXT("SPS: frame_cropping_rect_right_offset", + u1_frame_cropping_rect_right_ofst); + u1_frame_cropping_rect_top_ofst = ih264d_uev(pu4_bitstrm_ofst, + pu4_bitstrm_buf); + COPYTHECONTEXT("SPS: frame_cropping_rect_top_offset", + u1_frame_cropping_rect_top_ofst); + u1_frame_cropping_rect_bottom_ofst = ih264d_uev(pu4_bitstrm_ofst, + pu4_bitstrm_buf); + COPYTHECONTEXT("SPS: frame_cropping_rect_bottom_offset", + u1_frame_cropping_rect_bottom_ofst); + } + /* G050 */ + + ps_seq->u1_vui_parameters_present_flag = ih264d_get_bit_h264(ps_bitstrm); + COPYTHECONTEXT("SPS: vui_parameters_present_flag", + ps_seq->u1_vui_parameters_present_flag); + + u2_frm_wd_y = u2_pic_wd + (UWORD8)(PAD_LEN_Y_H << 1); + if(1 == ps_dec->u4_share_disp_buf) + { + if(ps_dec->u4_app_disp_width > u2_frm_wd_y) + u2_frm_wd_y = ps_dec->u4_app_disp_width; + } + + u2_frm_ht_y = u2_pic_ht + (UWORD8)(PAD_LEN_Y_V << 2); + u2_frm_wd_uv = u2_pic_wd + (UWORD8)(PAD_LEN_UV_H << 2); + u2_frm_wd_uv = MAX(u2_frm_wd_uv, u2_frm_wd_y); + + u2_frm_ht_uv = (u2_pic_ht >> 1) + (UWORD8)(PAD_LEN_UV_V << 2); + u2_frm_ht_uv = MAX(u2_frm_ht_uv, (u2_frm_ht_y >> 1)); + + + /* Calculate display picture width, height and start u4_ofst from YUV420 */ + /* pictute buffers as per cropping information parsed above */ + { + UWORD16 u2_rgt_ofst = 0; + UWORD16 u2_lft_ofst = 0; + UWORD16 u2_top_ofst = 0; + UWORD16 u2_btm_ofst = 0; + UWORD8 u1_frm_mbs_flag; + UWORD8 u1_vert_mult_factor; + WORD32 i4_cropped_ht, i4_cropped_wd; + + if(u1_frame_cropping_flag) + { + /* Calculate right and left u4_ofst for cropped picture */ + u2_rgt_ofst = u1_frame_cropping_rect_right_ofst << 1; + u2_lft_ofst = u1_frame_cropping_rect_left_ofst << 1; + + /* Know frame MBs only u4_flag */ + u1_frm_mbs_flag = (1 == ps_seq->u1_frame_mbs_only_flag); + + /* Simplify the vertical u4_ofst calculation from field/frame */ + u1_vert_mult_factor = (2 - u1_frm_mbs_flag); + + /* Calculate bottom and top u4_ofst for cropped picture */ + u2_btm_ofst = (u1_frame_cropping_rect_bottom_ofst + << u1_vert_mult_factor); + u2_top_ofst = (u1_frame_cropping_rect_top_ofst + << u1_vert_mult_factor); + } + + /* Calculate u4_ofst from start of YUV 420 picture buffer to start of*/ + /* cropped picture buffer */ + u2_crop_offset_y = (u2_frm_wd_y * u2_top_ofst) + (u2_lft_ofst); + u2_crop_offset_uv = (u2_frm_wd_uv * (u2_top_ofst >> 1)) + + (u2_lft_ofst >> 1) * YUV420SP_FACTOR; + /* Calculate the display picture width and height based on crop */ + /* information */ + i4_cropped_ht = u2_pic_ht - (u2_btm_ofst + u2_top_ofst); + i4_cropped_wd = u2_pic_wd - (u2_rgt_ofst + u2_lft_ofst); + + if((i4_cropped_ht < MB_SIZE) || (i4_cropped_wd < MB_SIZE)) + { + return ERROR_INV_SPS_PPS_T; + } + + if((3 == ps_dec->i4_header_decoded) && (ps_dec->u2_pic_wd != u2_pic_wd)) + { + ps_dec->u1_res_changed = 1; + return IVD_RES_CHANGED; + } + if((3 == ps_dec->i4_header_decoded) && (ps_dec->u2_pic_ht != u2_pic_ht)) + { + ps_dec->u1_res_changed = 1; + return IVD_RES_CHANGED; + } + + ps_dec->u2_disp_height = i4_cropped_ht; + + ps_dec->u2_disp_width = i4_cropped_wd; + + } + + ps_seq->u1_is_valid = TRUE; + + if(1 == ps_seq->u1_vui_parameters_present_flag) + { + ret = ih264d_parse_vui_parametres(&ps_seq->s_vui, ps_bitstrm); + if(ret != OK) + return ret; + } + + /* + * Code Add to check for display width. + * This has to be at the end of the SPS parsing, so everything gets + * parsed and the error will not affect decoding. + * */ + if((0 != ps_dec->u4_app_disp_width) + && (ps_dec->u4_app_disp_width < ps_dec->u2_pic_wd)) + { + ps_dec->u4_app_disp_width = ps_dec->u2_pic_wd; + return ERROR_DISP_WIDTH_RESET_TO_PIC_WIDTH; + } + + + + ps_dec->u2_pic_wd = u2_pic_wd; + ps_dec->u2_pic_ht = u2_pic_ht; + + /* Added temporarily to give pic height and width as display height */ + /* and width in case some cropping errors occur` */ + /*ps_dec->u2_disp_height = ps_dec->u2_pic_ht; + ps_dec->u2_disp_width = ps_dec->u2_pic_wd;*/ + + /* Determining the Width and Height of Frame from that of Picture */ + + ps_dec->u2_frm_wd_y = u2_frm_wd_y; + ps_dec->u2_frm_ht_y = u2_frm_ht_y; + + ps_dec->u2_frm_wd_uv = u2_frm_wd_uv; + ps_dec->u2_frm_ht_uv = u2_frm_ht_uv; + ps_dec->s_pad_mgr.u1_pad_len_y_v = (UWORD8)(PAD_LEN_Y_V << (1 - u1_frm)); + ps_dec->s_pad_mgr.u1_pad_len_cr_v = (UWORD8)(PAD_LEN_UV_V << (1 - u1_frm)); + + ps_dec->u2_frm_wd_in_mbs = ps_seq->u2_frm_wd_in_mbs; + ps_dec->u2_frm_ht_in_mbs = ps_seq->u2_frm_ht_in_mbs; + + ps_dec->u2_crop_offset_y = u2_crop_offset_y; + ps_dec->u2_crop_offset_uv = u2_crop_offset_uv; + + ps_dec->ps_sps[u1_seq_parameter_set_id] = *ps_seq; + + return OK; +} + +/*! + ************************************************************************** + * \if Function name : ih264d_parse_end_of_sequence \endif + * + * \brief + * Decodes End of Sequence. + * + * \param ps_bitstrm : Pointer to bit ps_bitstrm containing the NAL unit + * + * \return + * 0 on Success and error code otherwise + ************************************************************************** + */ +WORD32 ih264d_parse_end_of_sequence(dec_struct_t * ps_dec) +{ + WORD32 ret; + + ret = ih264d_end_of_pic_processing(ps_dec); + return ret; +} + +/*! + ************************************************************************** + * \if Function name : AcessUnitDelimiterRbsp \endif + * + * \brief + * Decodes AcessUnitDelimiterRbsp. + * + * \param ps_bitstrm : Pointer to bit ps_bitstrm containing the NAL unit + * + * \return + * 0 on Success and error code otherwise + ************************************************************************** + */ + +WORD32 ih264d_access_unit_delimiter_rbsp(dec_struct_t * ps_dec) +{ + UWORD8 u1_primary_pic_type; + u1_primary_pic_type = ih264d_get_bits_h264(ps_dec->ps_bitstrm, 3); + switch(u1_primary_pic_type) + { + case I_PIC: + case SI_PIC: + case ISI_PIC: + ps_dec->ps_dec_err_status->u1_pic_aud_i = PIC_TYPE_I; + break; + default: + ps_dec->ps_dec_err_status->u1_pic_aud_i = PIC_TYPE_UNKNOWN; + } + return (0); +} +/*! + ************************************************************************** + * \if Function name : ih264d_parse_nal_unit \endif + * + * \brief + * Decodes NAL unit + * + * \return + * 0 on Success and error code otherwise + ************************************************************************** + */ + +WORD32 ih264d_parse_nal_unit(iv_obj_t *dec_hdl, + ivd_video_decode_op_t *ps_dec_op, + UWORD8 *pu1_buf, + UWORD32 u4_length) +{ + + dec_bit_stream_t *ps_bitstrm; + + + dec_struct_t *ps_dec = (dec_struct_t *)dec_hdl->pv_codec_handle; + ivd_video_decode_ip_t *ps_dec_in = + (ivd_video_decode_ip_t *)ps_dec->pv_dec_in; + dec_slice_params_t * ps_cur_slice = ps_dec->ps_cur_slice; + UWORD8 u1_first_byte, u1_nal_ref_idc; + UWORD8 u1_nal_unit_type; + WORD32 i_status = OK; + ps_bitstrm = ps_dec->ps_bitstrm; + + if(pu1_buf) + { + if(u4_length) + { + ps_dec_op->u4_frame_decoded_flag = 0; + ih264d_process_nal_unit(ps_dec->ps_bitstrm, pu1_buf, + u4_length); + + SWITCHOFFTRACE; + u1_first_byte = ih264d_get_bits_h264(ps_bitstrm, 8); + + if(NAL_FORBIDDEN_BIT(u1_first_byte)) + { + H264_DEC_DEBUG_PRINT("\nForbidden bit set in Nal Unit, Let's try\n"); + } + u1_nal_unit_type = NAL_UNIT_TYPE(u1_first_byte); + ps_dec->u1_nal_unit_type = u1_nal_unit_type; + u1_nal_ref_idc = (UWORD8)(NAL_REF_IDC(u1_first_byte)); + //Skip all NALUs if SPS and PPS are not decoded + switch(u1_nal_unit_type) + { + case SLICE_DATA_PARTITION_A_NAL: + case SLICE_DATA_PARTITION_B_NAL: + case SLICE_DATA_PARTITION_C_NAL: + if(!ps_dec->i4_decode_header) + ih264d_parse_slice_partition(ps_dec, ps_bitstrm); + + break; + + case IDR_SLICE_NAL: + case SLICE_NAL: + + /* ! */ + DEBUG_THREADS_PRINTF("Decoding a slice NAL\n"); + if(!ps_dec->i4_decode_header) + { + if(ps_dec->i4_header_decoded == 3) + { + /* ! */ + ps_dec->u4_slice_start_code_found = 1; + + ih264d_rbsp_to_sodb(ps_dec->ps_bitstrm); + + i_status = ih264d_parse_decode_slice( + (UWORD8)(u1_nal_unit_type + == IDR_SLICE_NAL), + u1_nal_ref_idc, ps_dec); + + if(i_status != OK) + return i_status; + } + else + { + H264_DEC_DEBUG_PRINT( + "\nSlice NAL Supplied but no header has been supplied\n"); + } + } + break; + + case SEI_NAL: + if(!ps_dec->i4_decode_header) + { + ih264d_rbsp_to_sodb(ps_dec->ps_bitstrm); + i_status = ih264d_parse_sei_message(ps_dec, ps_bitstrm); + if(i_status != OK) + return i_status; + ih264d_parse_sei(ps_dec, ps_bitstrm); + } + break; + case SEQ_PARAM_NAL: + /* ! */ + ih264d_rbsp_to_sodb(ps_dec->ps_bitstrm); + i_status = ih264d_parse_sps(ps_dec, ps_bitstrm); + if(i_status == ERROR_INV_SPS_PPS_T) + return i_status; + if(!i_status) + ps_dec->i4_header_decoded |= 0x1; + break; + + case PIC_PARAM_NAL: + /* ! */ + ih264d_rbsp_to_sodb(ps_dec->ps_bitstrm); + i_status = ih264d_parse_pps(ps_dec, ps_bitstrm); + if(i_status == ERROR_INV_SPS_PPS_T) + return i_status; + if(!i_status) + ps_dec->i4_header_decoded |= 0x2; + break; + case ACCESS_UNIT_DELIMITER_RBSP: + if(!ps_dec->i4_decode_header) + { + ih264d_access_unit_delimiter_rbsp(ps_dec); + } + break; + //Let us ignore the END_OF_SEQ_RBSP NAL and decode even after this NAL + case END_OF_STREAM_RBSP: + if(!ps_dec->i4_decode_header) + { + ih264d_parse_end_of_stream(ps_dec); + } + break; + case FILLER_DATA_NAL: + if(!ps_dec->i4_decode_header) + { + ih264d_parse_filler_data(ps_dec, ps_bitstrm); + } + break; + default: + H264_DEC_DEBUG_PRINT("\nUnknown NAL type %d\n", u1_nal_unit_type); + break; + } + + } + + } + + return i_status; + +} + diff --git a/decoder/ih264d_parse_headers.h b/decoder/ih264d_parse_headers.h new file mode 100755 index 0000000..3c829e7 --- /dev/null +++ b/decoder/ih264d_parse_headers.h @@ -0,0 +1,46 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +#ifndef _IH264D_PARSE_HEADERS_H_ +#define _IH264D_PARSE_HEADERS_H_ +/*! +************************************************************************** +* \file ih264d_parse_headers.h +* +* \brief +* Contains declarations high level syntax[above slice] +* parsing routines +* +* \date +* 19/12/2002 +* +* \author AI +************************************************************************** +*/ +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264d_bitstrm.h" +#include "ih264d_structs.h" +WORD32 ih264d_parse_nal_unit(iv_obj_t *dec_hdl, + ivd_video_decode_op_t *ps_dec_op, + UWORD8 *pu1_buf, + UWORD32 u4_length); + +#endif /* _IH264D_PARSE_HEADERS_H_ */ diff --git a/decoder/ih264d_parse_islice.c b/decoder/ih264d_parse_islice.c new file mode 100755 index 0000000..7851a0b --- /dev/null +++ b/decoder/ih264d_parse_islice.c @@ -0,0 +1,1479 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/*! + ************************************************************************** + * \file ih264d_parse_islice.c + * + * \brief + * Contains routines that decode a I slice type + * + * Detailed_description + * + * \date + * 07/07/2003 + * + * \author NS + ************************************************************************** + */ +#include "ih264d_error_handler.h" +#include "ih264d_debug.h" +#include <string.h> +#include "ih264d_bitstrm.h" +#include "ih264d_defs.h" +#include "ih264d_debug.h" +#include "ih264d_tables.h" +#include "ih264d_structs.h" +#include "ih264d_defs.h" +#include "ih264d_parse_cavlc.h" +#include "ih264d_mb_utils.h" +#include "ih264d_deblocking.h" +#include "ih264d_cabac.h" +#include "ih264d_parse_cabac.h" +#include "ih264d_parse_mb_header.h" +#include "ih264d_parse_slice.h" +#include "ih264d_process_pslice.h" +#include "ih264d_process_intra_mb.h" +#include "ih264d_parse_islice.h" +#include "ih264d_error_handler.h" +#include "ih264d_mvpred.h" +#include "ih264d_defs.h" +#include "ih264d_thread_parse_decode.h" +#include "ithread.h" +#include "ih264d_parse_mb_header.h" +#include "assert.h" +#include "ih264d_utils.h" +#include "ih264d_format_conv.h" + +void ih264d_init_cabac_contexts(UWORD8 u1_slice_type, dec_struct_t * ps_dec); + +void ih264d_itrans_recon_luma_dc(dec_struct_t *ps_dec, + WORD16* pi2_src, + WORD16* pi2_coeff_block, + const UWORD16 *pu2_weigh_mat); + + + +/*! + ************************************************************************** + * \if Function name : ParseIMb \endif + * + * \brief + * This function parses CAVLC syntax of a I MB. If 16x16 Luma DC transform + * is also done here. Transformed Luma DC values are copied in their + * 0th pixel location of corrosponding CoeffBlock. + * + * \return + * 0 on Success and Error code otherwise + ************************************************************************** + */ +WORD32 ih264d_parse_imb_cavlc(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info, + UWORD8 u1_mb_num, + UWORD8 u1_mb_type) +{ + WORD32 i4_delta_qp; + UWORD32 u4_temp; + UWORD32 ui_is_top_mb_available; + UWORD32 ui_is_left_mb_available; + UWORD32 u4_cbp; + UWORD32 u4_offset; + UWORD32 *pu4_bitstrm_buf; + WORD32 ret; + + dec_bit_stream_t * const ps_bitstrm = ps_dec->ps_bitstrm; + UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst; + UNUSED(u1_mb_num); + ps_cur_mb_info->u1_tran_form8x8 = 0; + ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0; + + ps_cur_mb_info->u1_yuv_dc_block_flag = 0; + + u4_temp = ps_dec->u1_mb_ngbr_availablity; + ui_is_top_mb_available = BOOLEAN(u4_temp & TOP_MB_AVAILABLE_MASK); + ui_is_left_mb_available = BOOLEAN(u4_temp & LEFT_MB_AVAILABLE_MASK); + + pu4_bitstrm_buf = ps_bitstrm->pu4_buffer; + + if(u1_mb_type == I_4x4_MB) + { + ps_cur_mb_info->ps_curmb->u1_mb_type = I_4x4_MB; + u4_offset = 0; + + /*--------------------------------------------------------------------*/ + /* Read transform_size_8x8_flag if present */ + /*--------------------------------------------------------------------*/ + if(ps_dec->s_high_profile.u1_transform8x8_present) + { + ps_cur_mb_info->u1_tran_form8x8 = ih264d_get_bit_h264(ps_bitstrm); + COPYTHECONTEXT("transform_size_8x8_flag", ps_cur_mb_info->u1_tran_form8x8); + ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = ps_cur_mb_info->u1_tran_form8x8; + } + + /*--------------------------------------------------------------------*/ + /* Read the IntraPrediction modes for LUMA */ + /*--------------------------------------------------------------------*/ + if (!ps_cur_mb_info->u1_tran_form8x8) + { + ih264d_read_intra_pred_modes(ps_dec, + ((UWORD8 *)ps_dec->pv_parse_tu_coeff_data), + ((UWORD8 *)ps_dec->pv_parse_tu_coeff_data+16), + ps_cur_mb_info->u1_tran_form8x8); + UWORD8 *pu1_temp = (UWORD8 *)ps_dec->pv_parse_tu_coeff_data; + pu1_temp += 32; + ps_dec->pv_parse_tu_coeff_data = (void *)pu1_temp; + } + else + { + ih264d_read_intra_pred_modes(ps_dec, + ((UWORD8 *)ps_dec->pv_parse_tu_coeff_data), + ((UWORD8 *)ps_dec->pv_parse_tu_coeff_data+4), + ps_cur_mb_info->u1_tran_form8x8); + UWORD8 *pu1_temp = (UWORD8 *)ps_dec->pv_parse_tu_coeff_data; + pu1_temp += 8; + ps_dec->pv_parse_tu_coeff_data = (void *)pu1_temp; + } + /*--------------------------------------------------------------------*/ + /* Read the IntraPrediction mode for CHROMA */ + /*--------------------------------------------------------------------*/ +//Inlined ih264d_uev + { + UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst; + UWORD32 u4_word, u4_ldz, u4_temp; + + /***************************************************************/ + /* Find leading zeros in next 32 bits */ + /***************************************************************/ + NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf); + u4_ldz = CLZ(u4_word); + /* Flush the ps_bitstrm */ + u4_bitstream_offset += (u4_ldz + 1); + /* Read the suffix from the ps_bitstrm */ + u4_word = 0; + if(u4_ldz) + { + GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, + u4_ldz); + } + *pu4_bitstrm_ofst = u4_bitstream_offset; + u4_temp = ((1 << u4_ldz) + u4_word - 1); + if(u4_temp > 3) + { + return ERROR_CHROMA_PRED_MODE; + } + ps_cur_mb_info->u1_chroma_pred_mode = u4_temp; + COPYTHECONTEXT("intra_chroma_pred_mode", ps_cur_mb_info->u1_chroma_pred_mode); + } + /*--------------------------------------------------------------------*/ + /* Read the Coded block pattern */ + /*--------------------------------------------------------------------*/ + { + UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst; + UWORD32 u4_word, u4_ldz; + + /***************************************************************/ + /* Find leading zeros in next 32 bits */ + /***************************************************************/ + NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf); + u4_ldz = CLZ(u4_word); + /* Flush the ps_bitstrm */ + u4_bitstream_offset += (u4_ldz + 1); + /* Read the suffix from the ps_bitstrm */ + u4_word = 0; + if(u4_ldz) + { + GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, + u4_ldz); + } + *pu4_bitstrm_ofst = u4_bitstream_offset; + u4_cbp = ((1 << u4_ldz) + u4_word - 1); + } + if(u4_cbp > 47) + { + return ERROR_CBP; + } + + u4_cbp = gau1_ih264d_cbp_table[u4_cbp][0]; + COPYTHECONTEXT("coded_block_pattern", u1_cbp); + ps_cur_mb_info->u1_cbp = u4_cbp; + + /*--------------------------------------------------------------------*/ + /* Read mb_qp_delta */ + /*--------------------------------------------------------------------*/ + if(ps_cur_mb_info->u1_cbp) + { + UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst; + UWORD32 u4_word, u4_ldz, u4_abs_val; + + /***************************************************************/ + /* Find leading zeros in next 32 bits */ + /***************************************************************/ + NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf); + u4_ldz = CLZ(u4_word); + + /* Flush the ps_bitstrm */ + u4_bitstream_offset += (u4_ldz + 1); + + /* Read the suffix from the ps_bitstrm */ + u4_word = 0; + if(u4_ldz) + { + GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, + u4_ldz); + } + + *pu4_bitstrm_ofst = u4_bitstream_offset; + u4_abs_val = ((1 << u4_ldz) + u4_word) >> 1; + + if(u4_word & 0x1) + { + i4_delta_qp = (-(WORD32)u4_abs_val); + } + else + { + i4_delta_qp = (u4_abs_val); + } + + if((i4_delta_qp < -26) || (i4_delta_qp > 25)) + { + return ERROR_INV_RANGE_QP_T; + } + + COPYTHECONTEXT("mb_qp_delta", i1_delta_qp); + if(i4_delta_qp != 0) + { + ret = ih264d_update_qp(ps_dec, (WORD8)i4_delta_qp); + if(ret != OK) + return ret; + } + } + + } + else + { + u4_offset = 1; + ps_cur_mb_info->ps_curmb->u1_mb_type = I_16x16_MB; + /*-------------------------------------------------------------------*/ + /* Read the IntraPrediction mode for CHROMA */ + /*-------------------------------------------------------------------*/ + { + UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst; + UWORD32 u4_word, u4_ldz; + + /***************************************************************/ + /* Find leading zeros in next 32 bits */ + /***************************************************************/ + NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf); + u4_ldz = CLZ(u4_word); + /* Flush the ps_bitstrm */ + u4_bitstream_offset += (u4_ldz + 1); + /* Read the suffix from the ps_bitstrm */ + u4_word = 0; + if(u4_ldz) + { + GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, + u4_ldz); + } + *pu4_bitstrm_ofst = u4_bitstream_offset; + u4_temp = ((1 << u4_ldz) + u4_word - 1); + +//Inlined ih264d_uev + + if(u4_temp > 3) + { + return ERROR_CHROMA_PRED_MODE; + } + ps_cur_mb_info->u1_chroma_pred_mode = u4_temp; + COPYTHECONTEXT("intra_chroma_pred_mode", ps_cur_mb_info->u1_chroma_pred_mode); + } + /*-------------------------------------------------------------------*/ + /* Read the Coded block pattern */ + /*-------------------------------------------------------------------*/ + u4_cbp = gau1_ih264d_cbp_tab[(u1_mb_type - 1) >> 2]; + ps_cur_mb_info->u1_cbp = u4_cbp; + + /*-------------------------------------------------------------------*/ + /* Read mb_qp_delta */ + /*-------------------------------------------------------------------*/ + { + UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst; + UWORD32 u4_word, u4_ldz, u4_abs_val; + + /***************************************************************/ + /* Find leading zeros in next 32 bits */ + /***************************************************************/ + NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf); + u4_ldz = CLZ(u4_word); + + /* Flush the ps_bitstrm */ + u4_bitstream_offset += (u4_ldz + 1); + + /* Read the suffix from the ps_bitstrm */ + u4_word = 0; + if(u4_ldz) + GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, + u4_ldz); + + *pu4_bitstrm_ofst = u4_bitstream_offset; + u4_abs_val = ((1 << u4_ldz) + u4_word) >> 1; + + if(u4_word & 0x1) + i4_delta_qp = (-(WORD32)u4_abs_val); + else + i4_delta_qp = (u4_abs_val); + + if((i4_delta_qp < -26) || (i4_delta_qp > 25)) + return ERROR_INV_RANGE_QP_T; + + } +//inlinined ih264d_sev + COPYTHECONTEXT("Delta quant", i1_delta_qp); + + if(i4_delta_qp != 0) + { + ret = ih264d_update_qp(ps_dec, (WORD8)i4_delta_qp); + if(ret != OK) + return ret; + } + + { + WORD16 i_scaleFactor; + UWORD32 ui_N = 0; + WORD16 *pi2_scale_matrix_ptr; + /*******************************************************************/ + /* for luma DC coefficients the scaling is done during the parsing */ + /* to preserve the precision */ + /*******************************************************************/ + if(ps_dec->s_high_profile.u1_scaling_present) + { + pi2_scale_matrix_ptr = + ps_dec->s_high_profile.i2_scalinglist4x4[0]; + } + else + { + i_scaleFactor = 16; + pi2_scale_matrix_ptr = &i_scaleFactor; + } + + /*---------------------------------------------------------------*/ + /* Decode DC coefficients */ + /*---------------------------------------------------------------*/ + /*---------------------------------------------------------------*/ + /* Calculation of N */ + /*---------------------------------------------------------------*/ + if(ui_is_left_mb_available) + { + + if(ui_is_top_mb_available) + { + ui_N = ((ps_cur_mb_info->ps_top_mb->pu1_nnz_y[0] + + ps_dec->pu1_left_nnz_y[0] + 1) >> 1); + } + else + { + ui_N = ps_dec->pu1_left_nnz_y[0]; + } + } + else if(ui_is_top_mb_available) + { + ui_N = ps_cur_mb_info->ps_top_mb->pu1_nnz_y[0]; + } + + { + WORD16 pi2_dc_coef[16]; + WORD32 pi4_tmp[16]; + tu_sblk4x4_coeff_data_t *ps_tu_4x4 = + (tu_sblk4x4_coeff_data_t *)ps_dec->pv_parse_tu_coeff_data; + WORD16 *pi2_coeff_block = + (WORD16 *)ps_dec->pv_parse_tu_coeff_data; + ps_tu_4x4->u2_sig_coeff_map = 0; + UWORD32 u4_num_coeff; + + ret = ps_dec->pf_cavlc_parse4x4coeff[(ui_N > 7)](pi2_dc_coef, 0, ui_N, + ps_dec, &u4_num_coeff); + if(ret != OK) + return ret; + + if(EXCEED_OFFSET(ps_bitstrm)) + return ERROR_EOB_TERMINATE_T; + if(ps_tu_4x4->u2_sig_coeff_map) + { + memset(pi2_dc_coef,0,sizeof(pi2_dc_coef)); + ih264d_unpack_coeff4x4_dc_4x4blk(ps_tu_4x4, + pi2_dc_coef, + ps_dec->pu1_inv_scan); + + PROFILE_DISABLE_IQ_IT_RECON() + ps_dec->pf_ihadamard_scaling_4x4(pi2_dc_coef, + pi2_coeff_block, + ps_dec->pu2_quant_scale_y, + (UWORD16 *)pi2_scale_matrix_ptr, + ps_dec->u1_qp_y_div6, + pi4_tmp); + pi2_coeff_block += 16; + ps_dec->pv_parse_tu_coeff_data = (void *)pi2_coeff_block; + SET_BIT(ps_cur_mb_info->u1_yuv_dc_block_flag,0); + } + + } + } + } + + + if(u4_cbp) + { + + ret = ih264d_parse_residual4x4_cavlc(ps_dec, ps_cur_mb_info, + (UWORD8)u4_offset); + if(ret != OK) + return ret; + if(EXCEED_OFFSET(ps_bitstrm)) + return ERROR_EOB_TERMINATE_T; + + /* Store Left Mb NNZ and TOP chroma NNZ */ + } + else + { + ps_cur_mb_info->u1_qp_div6 = ps_dec->u1_qp_y_div6; + ps_cur_mb_info->u1_qpc_div6 = ps_dec->u1_qp_u_div6; + ps_cur_mb_info->u1_qpcr_div6 = ps_dec->u1_qp_v_div6; + ps_cur_mb_info->u1_qp_rem6 = ps_dec->u1_qp_y_rem6; + ps_cur_mb_info->u1_qpc_rem6 = ps_dec->u1_qp_u_rem6; + ps_cur_mb_info->u1_qpcr_rem6 = ps_dec->u1_qp_v_rem6; + ih264d_update_nnz_for_skipmb(ps_dec, ps_cur_mb_info, CAVLC); + } + + return OK; +} + +/*! + ************************************************************************** + * \if Function name : ParseIMbCab \endif + * + * \brief + * This function parses CABAC syntax of a I MB. If 16x16 Luma DC transform + * is also done here. Transformed Luma DC values are copied in their + * 0th pixel location of corrosponding CoeffBlock. + * + * \return + * 0 on Success and Error code otherwise + ************************************************************************** + */ +WORD32 ih264d_parse_imb_cabac(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info, + UWORD8 u1_mb_type) +{ + WORD8 i1_delta_qp; + UWORD8 u1_cbp; + UWORD8 u1_offset; + /* Variables for handling Cabac contexts */ + ctxt_inc_mb_info_t *p_curr_ctxt = ps_dec->ps_curr_ctxt_mb_info; + ctxt_inc_mb_info_t *ps_left_ctxt = ps_dec->p_left_ctxt_mb_info; + dec_bit_stream_t * const ps_bitstrm = ps_dec->ps_bitstrm; + bin_ctxt_model_t *p_bin_ctxt; + + UWORD8 u1_intra_chrom_pred_mode; + UWORD8 u1_dc_block_flag = 0; + WORD32 ret; + + ps_cur_mb_info->u1_yuv_dc_block_flag = 0; + + if(ps_left_ctxt == ps_dec->ps_def_ctxt_mb_info) + { + ps_dec->pu1_left_yuv_dc_csbp[0] = 0xf; + } + + if(ps_dec->ps_cur_slice->u1_slice_type != I_SLICE) + { + WORD32 *pi4_buf; + WORD8 *pi1_buf; + MEMSET_16BYTES(&ps_dec->pu1_left_mv_ctxt_inc[0][0], 0); + *((UWORD32 *)ps_dec->pi1_left_ref_idx_ctxt_inc) = 0; + MEMSET_16BYTES(p_curr_ctxt->u1_mv, 0); + pi1_buf = p_curr_ctxt->i1_ref_idx; + pi4_buf = (WORD32 *)pi1_buf; + *pi4_buf = 0; + } + + if(u1_mb_type == I_4x4_MB) + { + ps_cur_mb_info->ps_curmb->u1_mb_type = I_4x4_MB; + p_curr_ctxt->u1_mb_type = CAB_I4x4; + u1_offset = 0; + + ps_cur_mb_info->u1_tran_form8x8 = 0; + ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0; + + /*--------------------------------------------------------------------*/ + /* Read transform_size_8x8_flag if present */ + /*--------------------------------------------------------------------*/ + if(ps_dec->s_high_profile.u1_transform8x8_present) + { + ps_cur_mb_info->u1_tran_form8x8 = ih264d_parse_transform8x8flag_cabac( + ps_dec, ps_cur_mb_info); + COPYTHECONTEXT("transform_size_8x8_flag", ps_cur_mb_info->u1_tran_form8x8); + p_curr_ctxt->u1_transform8x8_ctxt = ps_cur_mb_info->u1_tran_form8x8; + ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = ps_cur_mb_info->u1_tran_form8x8; + } + else + { + p_curr_ctxt->u1_transform8x8_ctxt = 0; + } + + /*--------------------------------------------------------------------*/ + /* Read the IntraPrediction modes for LUMA */ + /*--------------------------------------------------------------------*/ + if (!ps_cur_mb_info->u1_tran_form8x8) + { + ih264d_read_intra_pred_modes_cabac( + ps_dec, + ((UWORD8 *)ps_dec->pv_parse_tu_coeff_data), + ((UWORD8 *)ps_dec->pv_parse_tu_coeff_data+16), + ps_cur_mb_info->u1_tran_form8x8); + UWORD8 *pu1_temp = (UWORD8 *)ps_dec->pv_parse_tu_coeff_data; + pu1_temp += 32; + ps_dec->pv_parse_tu_coeff_data = (void *)pu1_temp; + } + else + { + ih264d_read_intra_pred_modes_cabac( + ps_dec, + ((UWORD8 *)ps_dec->pv_parse_tu_coeff_data), + ((UWORD8 *)ps_dec->pv_parse_tu_coeff_data+4), + ps_cur_mb_info->u1_tran_form8x8); + UWORD8 *pu1_temp = (UWORD8 *)ps_dec->pv_parse_tu_coeff_data; + pu1_temp += 8; + ps_dec->pv_parse_tu_coeff_data = (void *)pu1_temp; + } + /*--------------------------------------------------------------------*/ + /* Read the IntraPrediction mode for CHROMA */ + /*--------------------------------------------------------------------*/ + u1_intra_chrom_pred_mode = ih264d_parse_chroma_pred_mode_cabac(ps_dec); + COPYTHECONTEXT("intra_chroma_pred_mode", u1_intra_chrom_pred_mode); + p_curr_ctxt->u1_intra_chroma_pred_mode = ps_cur_mb_info->u1_chroma_pred_mode = + u1_intra_chrom_pred_mode; + + /*--------------------------------------------------------------------*/ + /* Read the Coded block pattern */ + /*--------------------------------------------------------------------*/ + u1_cbp = ih264d_parse_ctx_cbp_cabac(ps_dec); + COPYTHECONTEXT("coded_block_pattern", u1_cbp); + ps_cur_mb_info->u1_cbp = u1_cbp; + p_curr_ctxt->u1_cbp = u1_cbp; + + /*--------------------------------------------------------------------*/ + /* Read mb_qp_delta */ + /*--------------------------------------------------------------------*/ + if(ps_cur_mb_info->u1_cbp) + { + ret = ih264d_parse_mb_qp_delta_cabac(ps_dec, &i1_delta_qp); + if(ret != OK) + return ret; + COPYTHECONTEXT("mb_qp_delta", i1_delta_qp); + if(i1_delta_qp != 0) + { + ret = ih264d_update_qp(ps_dec, i1_delta_qp); + if(ret != OK) + return ret; + } + } + else + ps_dec->i1_prev_mb_qp_delta = 0; + p_curr_ctxt->u1_yuv_dc_csbp &= 0xFE; + } + else + { + u1_offset = 1; + ps_cur_mb_info->ps_curmb->u1_mb_type = I_16x16_MB; + p_curr_ctxt->u1_mb_type = CAB_I16x16; + ps_cur_mb_info->u1_tran_form8x8 = 0; + p_curr_ctxt->u1_transform8x8_ctxt = 0; + ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0; + /*--------------------------------------------------------------------*/ + /* Read the IntraPrediction mode for CHROMA */ + /*--------------------------------------------------------------------*/ + u1_intra_chrom_pred_mode = ih264d_parse_chroma_pred_mode_cabac(ps_dec); + if(u1_intra_chrom_pred_mode > 3) + return ERROR_CHROMA_PRED_MODE; + + COPYTHECONTEXT("Chroma intra_chroma_pred_mode pred mode", u1_intra_chrom_pred_mode); + p_curr_ctxt->u1_intra_chroma_pred_mode = ps_cur_mb_info->u1_chroma_pred_mode = + u1_intra_chrom_pred_mode; + + /*--------------------------------------------------------------------*/ + /* Read the Coded block pattern */ + /*--------------------------------------------------------------------*/ + u1_cbp = gau1_ih264d_cbp_tab[(u1_mb_type - 1) >> 2]; + ps_cur_mb_info->u1_cbp = u1_cbp; + p_curr_ctxt->u1_cbp = u1_cbp; + + /*--------------------------------------------------------------------*/ + /* Read mb_qp_delta */ + /*--------------------------------------------------------------------*/ + ret = ih264d_parse_mb_qp_delta_cabac(ps_dec, &i1_delta_qp); + if(ret != OK) + return ret; + COPYTHECONTEXT("mb_qp_delta", i1_delta_qp); + if(i1_delta_qp != 0) + { + ret = ih264d_update_qp(ps_dec, i1_delta_qp); + if(ret != OK) + return ret; + } + + { + WORD16 i_scaleFactor; + WORD16* pi2_scale_matrix_ptr; + /*******************************************************************/ + /* for luma DC coefficients the scaling is done during the parsing */ + /* to preserve the precision */ + /*******************************************************************/ + if(ps_dec->s_high_profile.u1_scaling_present) + { + pi2_scale_matrix_ptr = + ps_dec->s_high_profile.i2_scalinglist4x4[0]; + + } + else + { + i_scaleFactor = 16; + pi2_scale_matrix_ptr = &i_scaleFactor; + } + { + ctxt_inc_mb_info_t *ps_top_ctxt = ps_dec->p_top_ctxt_mb_info; + UWORD8 uc_a, uc_b; + UWORD32 u4_ctx_inc; + + INC_SYM_COUNT(&(ps_dec->s_cab_dec_env)); + + /* if MbAddrN not available then CondTermN = 1 */ + uc_b = ((ps_top_ctxt->u1_yuv_dc_csbp) & 0x01); + + /* if MbAddrN not available then CondTermN = 1 */ + uc_a = ((ps_dec->pu1_left_yuv_dc_csbp[0]) & 0x01); + + u4_ctx_inc = (uc_a + (uc_b << 1)); + + { + WORD16 pi2_dc_coef[16]; + tu_sblk4x4_coeff_data_t *ps_tu_4x4 = + (tu_sblk4x4_coeff_data_t *)ps_dec->pv_parse_tu_coeff_data; + WORD16 *pi2_coeff_block = + (WORD16 *)ps_dec->pv_parse_tu_coeff_data; + + p_bin_ctxt = (ps_dec->p_cbf_t[LUMA_DC_CTXCAT]) + u4_ctx_inc; + + u1_dc_block_flag = + ih264d_read_coeff4x4_cabac(ps_bitstrm, + LUMA_DC_CTXCAT, + ps_dec->p_significant_coeff_flag_t[LUMA_DC_CTXCAT], + ps_dec, p_bin_ctxt); + + /* Store coded_block_flag */ + p_curr_ctxt->u1_yuv_dc_csbp &= 0xFE; + p_curr_ctxt->u1_yuv_dc_csbp |= u1_dc_block_flag; + if(u1_dc_block_flag) + { + WORD32 pi4_tmp[16]; + memset(pi2_dc_coef,0,sizeof(pi2_dc_coef)); + ih264d_unpack_coeff4x4_dc_4x4blk(ps_tu_4x4, + pi2_dc_coef, + ps_dec->pu1_inv_scan); + + PROFILE_DISABLE_IQ_IT_RECON() + ps_dec->pf_ihadamard_scaling_4x4(pi2_dc_coef, + pi2_coeff_block, + ps_dec->pu2_quant_scale_y, + (UWORD16 *)pi2_scale_matrix_ptr, + ps_dec->u1_qp_y_div6, + pi4_tmp); + pi2_coeff_block += 16; + ps_dec->pv_parse_tu_coeff_data = (void *)pi2_coeff_block; + SET_BIT(ps_cur_mb_info->u1_yuv_dc_block_flag,0); + } + + } + + } + } + } + + ps_dec->pu1_left_yuv_dc_csbp[0] &= 0x6; + ps_dec->pu1_left_yuv_dc_csbp[0] |= u1_dc_block_flag; + + ih264d_parse_residual4x4_cabac(ps_dec, ps_cur_mb_info, u1_offset); + if(EXCEED_OFFSET(ps_bitstrm)) + return ERROR_EOB_TERMINATE_T; + return OK; +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_parse_islice_data_cavlc */ +/* */ +/* Description : This function parses cabac syntax of a inter slice on */ +/* N MB basis. */ +/* */ +/* Inputs : ps_dec */ +/* sliceparams */ +/* firstMbInSlice */ +/* */ +/* Processing : 1. After parsing syntax for N MBs those N MBs are */ +/* decoded till the end of slice. */ +/* */ +/* Returns : 0 */ +/* */ +/* Issues : <List any issues or problems with this function> */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 24 06 2005 ARNY Draft */ +/* */ +/*****************************************************************************/ +WORD32 ih264d_parse_islice_data_cavlc(dec_struct_t * ps_dec, + dec_slice_params_t * ps_slice, + UWORD16 u2_first_mb_in_slice) +{ + UWORD8 uc_more_data_flag; + UWORD8 u1_num_mbs, u1_mb_idx; + dec_mb_info_t *ps_cur_mb_info; + deblk_mb_t *ps_cur_deblk_mb; + dec_bit_stream_t * const ps_bitstrm = ps_dec->ps_bitstrm; + UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst; + UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer; + UWORD16 i2_pic_wdin_mbs = ps_dec->u2_frm_wd_in_mbs; + WORD16 i2_cur_mb_addr; + UWORD8 u1_mbaff; + UWORD8 u1_num_mbs_next, u1_end_of_row, u1_tfr_n_mb; + WORD32 ret; + + ps_dec->u1_qp = ps_slice->u1_slice_qp; + ret = ih264d_update_qp(ps_dec, 0); + if(ret != OK) + return ret; + u1_mbaff = ps_slice->u1_mbaff_frame_flag; + + /* initializations */ + u1_mb_idx = ps_dec->u1_mb_idx; + u1_num_mbs = u1_mb_idx; + + uc_more_data_flag = 1; + i2_cur_mb_addr = u2_first_mb_in_slice << u1_mbaff; + + do + { + UWORD8 u1_mb_type; + + if(i2_cur_mb_addr > ps_dec->ps_cur_sps->u2_max_mb_addr) + { + break; + } + + ps_cur_mb_info = ps_dec->ps_nmb_info + u1_num_mbs; + ps_dec->u4_num_pmbair = (u1_num_mbs >> u1_mbaff); + + ps_cur_mb_info->u1_end_of_slice = 0; + + /***************************************************************/ + /* Get the required information for decoding of MB */ + /* mb_x, mb_y , neighbour availablity, */ + /***************************************************************/ + ps_dec->pf_get_mb_info(ps_dec, i2_cur_mb_addr, ps_cur_mb_info, 0); + + /***************************************************************/ + /* Set the deblocking parameters for this MB */ + /***************************************************************/ + ps_cur_deblk_mb = ps_dec->ps_deblk_mbn + u1_num_mbs; + + if(ps_dec->u4_app_disable_deblk_frm == 0) + ih264d_set_deblocking_parameters(ps_cur_deblk_mb, ps_slice, + ps_dec->u1_mb_ngbr_availablity, + ps_dec->u1_cur_mb_fld_dec_flag); + + ps_cur_deblk_mb->u1_mb_type = ps_cur_deblk_mb->u1_mb_type | D_INTRA_MB; + + /**************************************************************/ + /* Macroblock Layer Begins, Decode the u1_mb_type */ + /**************************************************************/ +//Inlined ih264d_uev + { + UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst; + UWORD32 u4_word, u4_ldz, u4_temp; + + /***************************************************************/ + /* Find leading zeros in next 32 bits */ + /***************************************************************/ + NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf); + u4_ldz = CLZ(u4_word); + /* Flush the ps_bitstrm */ + u4_bitstream_offset += (u4_ldz + 1); + /* Read the suffix from the ps_bitstrm */ + u4_word = 0; + if(u4_ldz) + GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, + u4_ldz); + *pu4_bitstrm_ofst = u4_bitstream_offset; + u4_temp = ((1 << u4_ldz) + u4_word - 1); + if(u4_temp > 25) + return ERROR_MB_TYPE; + u1_mb_type = u4_temp; + + } +//Inlined ih264d_uev + ps_cur_mb_info->u1_mb_type = u1_mb_type; + COPYTHECONTEXT("u1_mb_type", u1_mb_type); + + /**************************************************************/ + /* Parse Macroblock data */ + /**************************************************************/ + if(25 == u1_mb_type) + { + /* I_PCM_MB */ + ps_cur_mb_info->ps_curmb->u1_mb_type = I_PCM_MB; + ret = ih264d_parse_ipcm_mb(ps_dec, ps_cur_mb_info, u1_num_mbs); + if(ret != OK) + return ret; + ps_cur_deblk_mb->u1_mb_qp = 0; + } + else + { + ret = ih264d_parse_imb_cavlc(ps_dec, ps_cur_mb_info, u1_num_mbs, u1_mb_type); + if(ret != OK) + return ret; + ps_cur_deblk_mb->u1_mb_qp = ps_dec->u1_qp; + } + + if(u1_mbaff) + { + ih264d_update_mbaff_left_nnz(ps_dec, ps_cur_mb_info); + } + /**************************************************************/ + /* Get next Macroblock address */ + /**************************************************************/ + + i2_cur_mb_addr++; + uc_more_data_flag = MORE_RBSP_DATA(ps_bitstrm); + + /* Store the colocated information */ + { + mv_pred_t *ps_mv_nmb_start = ps_dec->ps_mv_cur + (u1_num_mbs << 4); + + mv_pred_t s_mvPred = + { + { 0, 0, 0, 0 }, + { -1, -1 }, 0, 0}; + ih264d_rep_mv_colz(ps_dec, &s_mvPred, ps_mv_nmb_start, 0, + (UWORD8)(ps_dec->u1_cur_mb_fld_dec_flag << 1), 4, + 4); + } + + /*if num _cores is set to 3,compute bs will be done in another thread*/ + if(ps_dec->u4_num_cores < 3) + { + if(ps_dec->u4_app_disable_deblk_frm == 0) + ps_dec->pf_compute_bs(ps_dec, ps_cur_mb_info, + (UWORD16)(u1_num_mbs >> u1_mbaff)); + } + u1_num_mbs++; + ps_dec->u2_total_mbs_coded++; + + /****************************************************************/ + /* Check for End Of Row */ + /****************************************************************/ + u1_num_mbs_next = i2_pic_wdin_mbs - ps_dec->u2_mbx - 1; + u1_end_of_row = (!u1_num_mbs_next) && (!(u1_mbaff && (u1_num_mbs & 0x01))); + u1_tfr_n_mb = (u1_num_mbs == ps_dec->u1_recon_mb_grp) || u1_end_of_row + || (!uc_more_data_flag); + ps_cur_mb_info->u1_end_of_slice = (!uc_more_data_flag); + + /*H264_DEC_DEBUG_PRINT("Pic: %d Mb_X=%d Mb_Y=%d", + ps_slice->i4_poc >> ps_slice->u1_field_pic_flag, + ps_dec->u2_mbx,ps_dec->u2_mby + (1 - ps_cur_mb_info->u1_topmb)); + H264_DEC_DEBUG_PRINT("u1_tfr_n_mb || (!uc_more_data_flag): %d", u1_tfr_n_mb || (!uc_more_data_flag));*/ + if(u1_tfr_n_mb || (!uc_more_data_flag)) + { + + if(ps_dec->u1_separate_parse) + { + ih264d_parse_tfr_nmb(ps_dec, u1_mb_idx, u1_num_mbs, + u1_num_mbs_next, u1_tfr_n_mb, u1_end_of_row); + ps_dec->ps_nmb_info += u1_num_mbs; + } + else + { + ret = ih264d_decode_recon_tfr_nmb(ps_dec, u1_mb_idx, u1_num_mbs, + u1_num_mbs_next, u1_tfr_n_mb, + u1_end_of_row); + if(ret != OK) + return ret; + } + + if(u1_tfr_n_mb) + u1_num_mbs = 0; + u1_mb_idx = u1_num_mbs; + ps_dec->u1_mb_idx = u1_num_mbs; + + } + } + while(uc_more_data_flag); + + if(ps_dec->u1_separate_parse) + { + ps_dec->ps_parse_cur_slice->end_of_slice = 1; + ps_dec->ps_cur_slice->u4_mbs_in_slice = i2_cur_mb_addr + - (u2_first_mb_in_slice << u1_mbaff); + } + return OK; +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_parse_islice_data_cabac */ +/* */ +/* Description : This function parses cabac syntax of a inter slice on */ +/* N MB basis. */ +/* */ +/* Inputs : ps_dec */ +/* sliceparams */ +/* firstMbInSlice */ +/* */ +/* Processing : 1. After parsing syntax for N MBs those N MBs are */ +/* decoded till the end of slice. */ +/* */ +/* Returns : 0 */ +/* */ +/* Issues : <List any issues or problems with this function> */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 24 06 2005 ARNY Draft */ +/* */ +/*****************************************************************************/ +WORD32 ih264d_parse_islice_data_cabac(dec_struct_t * ps_dec, + dec_slice_params_t * ps_slice, + UWORD16 u2_first_mb_in_slice) +{ + UWORD8 uc_more_data_flag; + UWORD8 u1_num_mbs, u1_mb_idx; + dec_mb_info_t *ps_cur_mb_info; + deblk_mb_t *ps_cur_deblk_mb; + + dec_bit_stream_t * const ps_bitstrm = ps_dec->ps_bitstrm; + UWORD16 i2_pic_wdin_mbs = ps_dec->u2_frm_wd_in_mbs; + WORD16 i2_cur_mb_addr; + UWORD8 u1_mbaff; + UWORD8 u1_num_mbs_next, u1_end_of_row, u1_tfr_n_mb; + WORD32 ret; + + ps_dec->u1_qp = ps_slice->u1_slice_qp; + ret = ih264d_update_qp(ps_dec, 0); + if(ret != 0) + return ret; + u1_mbaff = ps_slice->u1_mbaff_frame_flag; + + if(ps_bitstrm->u4_ofst & 0x07) + { + ps_bitstrm->u4_ofst += 8; + ps_bitstrm->u4_ofst &= 0xFFFFFFF8; + } + ret = ih264d_init_cabac_dec_envirnoment(&(ps_dec->s_cab_dec_env), ps_bitstrm); + if(ret != OK) + return ret; + ih264d_init_cabac_contexts(I_SLICE, ps_dec); + + ps_dec->i1_prev_mb_qp_delta = 0; + + /* initializations */ + u1_mb_idx = ps_dec->u1_mb_idx; + u1_num_mbs = u1_mb_idx; + + uc_more_data_flag = 1; + i2_cur_mb_addr = u2_first_mb_in_slice << u1_mbaff; + do + { + UWORD16 u2_mbx; + { + UWORD8 u1_mb_type; + + ps_cur_mb_info = ps_dec->ps_nmb_info + u1_num_mbs; + ps_dec->u4_num_pmbair = (u1_num_mbs >> u1_mbaff); + + ps_cur_mb_info->u1_end_of_slice = 0; + + /***************************************************************/ + /* Get the required information for decoding of MB */ + /* mb_x, mb_y , neighbour availablity, */ + /***************************************************************/ + ps_dec->pf_get_mb_info(ps_dec, i2_cur_mb_addr, ps_cur_mb_info, 0); + u2_mbx = ps_dec->u2_mbx; + + /*********************************************************************/ + /* initialize u1_tran_form8x8 to zero to aviod uninitialized accesses */ + /*********************************************************************/ + ps_cur_mb_info->u1_tran_form8x8 = 0; + ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0; + + /***************************************************************/ + /* Set the deblocking parameters for this MB */ + /***************************************************************/ + ps_cur_deblk_mb = ps_dec->ps_deblk_mbn + u1_num_mbs; + if(ps_dec->u4_app_disable_deblk_frm == 0) + ih264d_set_deblocking_parameters( + ps_cur_deblk_mb, ps_slice, + ps_dec->u1_mb_ngbr_availablity, + ps_dec->u1_cur_mb_fld_dec_flag); + + ps_cur_deblk_mb->u1_mb_type = ps_cur_deblk_mb->u1_mb_type + | D_INTRA_MB; + + /* Macroblock Layer Begins */ + /* Decode the u1_mb_type */ + u1_mb_type = ih264d_parse_mb_type_intra_cabac(0, ps_dec); + if(u1_mb_type > 25) + return ERROR_MB_TYPE; + ps_cur_mb_info->u1_mb_type = u1_mb_type; + COPYTHECONTEXT("u1_mb_type", u1_mb_type); + + /* Parse Macroblock Data */ + if(25 == u1_mb_type) + { + /* I_PCM_MB */ + ps_cur_mb_info->ps_curmb->u1_mb_type = I_PCM_MB; + ret = ih264d_parse_ipcm_mb(ps_dec, ps_cur_mb_info, u1_num_mbs); + if(ret != OK) + return ret; + ps_cur_deblk_mb->u1_mb_qp = 0; + } + else + { + ret = ih264d_parse_imb_cabac(ps_dec, ps_cur_mb_info, u1_mb_type); + if(ret != OK) + return ret; + ps_cur_deblk_mb->u1_mb_qp = ps_dec->u1_qp; + } + + if(u1_mbaff) + { + ih264d_update_mbaff_left_nnz(ps_dec, ps_cur_mb_info); + } + /* Next macroblock information */ + if(i2_cur_mb_addr > ps_dec->ps_cur_sps->u2_max_mb_addr) + return ERROR_MB_ADDRESS_T; + i2_cur_mb_addr++; + + if(ps_cur_mb_info->u1_topmb && u1_mbaff) + uc_more_data_flag = 1; + else + { + uc_more_data_flag = ih264d_decode_terminate(&ps_dec->s_cab_dec_env, + ps_bitstrm); + uc_more_data_flag = !uc_more_data_flag; + COPYTHECONTEXT("Decode Sliceterm",!uc_more_data_flag); + } + /* Store the colocated information */ + { + + mv_pred_t *ps_mv_nmb_start = ps_dec->ps_mv_cur + (u1_num_mbs << 4); + mv_pred_t s_mvPred = + { + { 0, 0, 0, 0 }, + { -1, -1 }, 0, 0}; + ih264d_rep_mv_colz( + ps_dec, &s_mvPred, ps_mv_nmb_start, 0, + (UWORD8)(ps_dec->u1_cur_mb_fld_dec_flag << 1), + 4, 4); + } + /*if num _cores is set to 3,compute bs will be done in another thread*/ + if(ps_dec->u4_num_cores < 3) + { + if(ps_dec->u4_app_disable_deblk_frm == 0) + ps_dec->pf_compute_bs(ps_dec, ps_cur_mb_info, + (UWORD16)(u1_num_mbs >> u1_mbaff)); + } + u1_num_mbs++; + ps_dec->u2_total_mbs_coded++; + + } + + /****************************************************************/ + /* Check for End Of Row */ + /****************************************************************/ + u1_num_mbs_next = i2_pic_wdin_mbs - u2_mbx - 1; + u1_end_of_row = (!u1_num_mbs_next) && (!(u1_mbaff && (u1_num_mbs & 0x01))); + u1_tfr_n_mb = (u1_num_mbs == ps_dec->u1_recon_mb_grp) || u1_end_of_row + || (!uc_more_data_flag); + ps_cur_mb_info->u1_end_of_slice = (!uc_more_data_flag); + + if(u1_tfr_n_mb || (!uc_more_data_flag)) + { + + + if(ps_dec->u1_separate_parse) + { + ih264d_parse_tfr_nmb(ps_dec, u1_mb_idx, u1_num_mbs, + u1_num_mbs_next, u1_tfr_n_mb, u1_end_of_row); + ps_dec->ps_nmb_info += u1_num_mbs; + } + else + { + ret = ih264d_decode_recon_tfr_nmb(ps_dec, u1_mb_idx, u1_num_mbs, + u1_num_mbs_next, u1_tfr_n_mb, + u1_end_of_row); + if(ret != OK) + return ret; + } + + if(u1_tfr_n_mb) + u1_num_mbs = 0; + u1_mb_idx = u1_num_mbs; + ps_dec->u1_mb_idx = u1_num_mbs; + + } + } + while(uc_more_data_flag); + + if(ps_dec->u1_separate_parse) + { + ps_dec->ps_parse_cur_slice->end_of_slice = 1; + ps_dec->ps_cur_slice->u4_mbs_in_slice = i2_cur_mb_addr + - (u2_first_mb_in_slice << u1_mbaff); + } + return OK; +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_parse_ipcm_mb */ +/* */ +/* Description : This function decodes the pixel values of I_PCM Mb. */ +/* */ +/* Inputs : ps_dec, ps_cur_mb_info and mb number */ +/* */ +/* Description : This function reads the luma and chroma pixels directly */ +/* from the bitstream when the mbtype is I_PCM and stores */ +/* them in recon buffer. If the entropy coding mode is */ +/* cabac, decoding engine is re-initialized. The nnzs and */ +/* cabac contexts are appropriately modified. */ +/* Returns : void */ +/* */ +/* Revision History: */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 13 07 2002 Jay */ +/* */ +/*****************************************************************************/ + +WORD32 ih264d_parse_ipcm_mb(dec_struct_t * ps_dec, + dec_mb_info_t *ps_cur_mb_info, + UWORD8 u1_mbNum) +{ + dec_bit_stream_t * const ps_bitstrm = ps_dec->ps_bitstrm; + UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag; + UWORD8 *pu1_y, *pu1_u, *pu1_v; + WORD32 ret; + + UWORD32 u4_rec_width_y, u4_rec_width_uv; + UWORD32 u1_num_mb_pair; + UWORD8 u1_x, u1_y; + /* CHANGED CODE */ + tfr_ctxt_t *ps_frame_buf; + UWORD8 u1_mb_field_decoding_flag; + UWORD32 *pu4_buf; + UWORD8 *pu1_buf; + /* CHANGED CODE */ + + if(ps_dec->u1_separate_parse) + { + ps_frame_buf = &ps_dec->s_tran_addrecon_parse; + } + else + { + ps_frame_buf = &ps_dec->s_tran_addrecon; + } + /* align bistream to byte boundary. */ + /* pcm_alignment_zero_bit discarded */ + /* For XX GotoByteBoundary */ + if(ps_bitstrm->u4_ofst & 0x07) + { + ps_bitstrm->u4_ofst += 8; + ps_bitstrm->u4_ofst &= 0xFFFFFFF8; + } + + /* Store left Nnz as 16 for each 4x4 blk */ + + pu1_buf = ps_dec->pu1_left_nnz_y; + pu4_buf = (UWORD32 *)pu1_buf; + *pu4_buf = 0x10101010; + pu1_buf = ps_cur_mb_info->ps_curmb->pu1_nnz_y; + pu4_buf = (UWORD32 *)pu1_buf; + *pu4_buf = 0x10101010; + pu1_buf = ps_cur_mb_info->ps_curmb->pu1_nnz_uv; + pu4_buf = (UWORD32 *)pu1_buf; + *pu4_buf = 0x10101010; + pu1_buf = ps_dec->pu1_left_nnz_uv; + pu4_buf = (UWORD32 *)pu1_buf; + *pu4_buf = 0x10101010; + ps_cur_mb_info->u1_cbp = 0xff; + + ps_dec->i1_prev_mb_qp_delta = 0; + /* Get neighbour MB's */ + u1_num_mb_pair = (u1_mbNum >> u1_mbaff); + + /*****************************************************************************/ + /* calculate the RECON buffer YUV pointers for the PCM data */ + /*****************************************************************************/ + /* CHANGED CODE */ + u1_mb_field_decoding_flag = ps_cur_mb_info->u1_mb_field_decodingflag; + pu1_y = ps_frame_buf->pu1_dest_y + (u1_num_mb_pair << 4); + pu1_u = ps_frame_buf->pu1_dest_u + (u1_num_mb_pair << 4); + pu1_v = pu1_u + 1; + + u4_rec_width_y = ps_dec->u2_frm_wd_y << u1_mb_field_decoding_flag; + u4_rec_width_uv = ps_dec->u2_frm_wd_uv << u1_mb_field_decoding_flag; + /* CHANGED CODE */ + + if(u1_mbaff) + { + UWORD8 u1_top_mb; + + u1_top_mb = ps_cur_mb_info->u1_topmb; + + if(u1_top_mb == 0) + { + pu1_y += (u1_mb_field_decoding_flag ? + (u4_rec_width_y >> 1) : (u4_rec_width_y << 4)); + pu1_u += (u1_mb_field_decoding_flag ? + (u4_rec_width_uv) : (u4_rec_width_uv << 4)); + pu1_v = pu1_u + 1; + } + } + + /* Read Luma samples */ + for(u1_y = 0; u1_y < 16; u1_y++) + { + for(u1_x = 0; u1_x < 16; u1_x++) + pu1_y[u1_x] = ih264d_get_bits_h264(ps_bitstrm, 8); + + pu1_y += u4_rec_width_y; + } + + /* Read Chroma samples */ + for(u1_y = 0; u1_y < 8; u1_y++) + { + for(u1_x = 0; u1_x < 8; u1_x++) + pu1_u[u1_x * YUV420SP_FACTOR] = ih264d_get_bits_h264(ps_bitstrm, 8); + + pu1_u += u4_rec_width_uv; + } + + for(u1_y = 0; u1_y < 8; u1_y++) + { + for(u1_x = 0; u1_x < 8; u1_x++) + pu1_v[u1_x * YUV420SP_FACTOR] = ih264d_get_bits_h264(ps_bitstrm, 8); + + pu1_v += u4_rec_width_uv; + } + + if(CABAC == ps_dec->ps_cur_pps->u1_entropy_coding_mode) + { + UWORD32 *pu4_buf; + UWORD8 *pu1_buf; + ctxt_inc_mb_info_t *p_curr_ctxt = ps_dec->ps_curr_ctxt_mb_info; + /* Re-initialize the cabac decoding engine. */ + ret = ih264d_init_cabac_dec_envirnoment(&(ps_dec->s_cab_dec_env), ps_bitstrm); + if(ret != OK) + return ret; + /* update the cabac contetxs */ + p_curr_ctxt->u1_mb_type = CAB_I_PCM; + p_curr_ctxt->u1_cbp = 47; + p_curr_ctxt->u1_intra_chroma_pred_mode = 0; + p_curr_ctxt->u1_transform8x8_ctxt = 0; + ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0; + + pu1_buf = ps_dec->pu1_left_nnz_y; + pu4_buf = (UWORD32 *)pu1_buf; + *pu4_buf = 0x01010101; + + pu1_buf = ps_cur_mb_info->ps_curmb->pu1_nnz_y; + pu4_buf = (UWORD32 *)pu1_buf; + *pu4_buf = 0x01010101; + + pu1_buf = ps_cur_mb_info->ps_curmb->pu1_nnz_uv; + pu4_buf = (UWORD32 *)pu1_buf; + *pu4_buf = 0x01010101; + + pu1_buf = ps_dec->pu1_left_nnz_uv; + pu4_buf = (UWORD32 *)pu1_buf; + *pu4_buf = 0x01010101; + + p_curr_ctxt->u1_yuv_dc_csbp = 0x7; + ps_dec->pu1_left_yuv_dc_csbp[0] = 0x7; + if(ps_dec->ps_cur_slice->u1_slice_type != I_SLICE) + { + + MEMSET_16BYTES(&ps_dec->pu1_left_mv_ctxt_inc[0][0], 0); + memset(ps_dec->pi1_left_ref_idx_ctxt_inc, 0, 4); + MEMSET_16BYTES(p_curr_ctxt->u1_mv, 0); + memset(p_curr_ctxt->i1_ref_idx, 0, 4); + + } + } + return OK; +} + +/*! + ************************************************************************** + * \if Function name : ih264d_decode_islice \endif + * + * \brief + * Decodes an I Slice + * + * + * \return + * 0 on Success and Error code otherwise + ************************************************************************** + */ +WORD32 ih264d_parse_islice(dec_struct_t *ps_dec, + UWORD16 u2_first_mb_in_slice) +{ + dec_pic_params_t * ps_pps = ps_dec->ps_cur_pps; + dec_slice_params_t * ps_slice = ps_dec->ps_cur_slice; + UWORD32 *pu4_bitstrm_buf = ps_dec->ps_bitstrm->pu4_buffer; + UWORD32 *pu4_bitstrm_ofst = &ps_dec->ps_bitstrm->u4_ofst; + UWORD32 u4_temp; + WORD32 i_temp; + WORD32 ret; + + /*--------------------------------------------------------------------*/ + /* Read remaining contents of the slice header */ + /*--------------------------------------------------------------------*/ + /* dec_ref_pic_marking function */ + /* G050 */ + if(ps_slice->u1_nal_ref_idc != 0) + { + if(!ps_dec->ps_dpb_cmds->u1_dpb_commands_read) + ps_dec->u4_bitoffset = ih264d_read_mmco_commands( + ps_dec); + else + ps_dec->ps_bitstrm->u4_ofst += ps_dec->u4_bitoffset; + } + /* G050 */ + + /* Read slice_qp_delta */ + i_temp = ps_pps->u1_pic_init_qp + + ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf); + if((i_temp < 0) || (i_temp > 51)) + return ERROR_INV_RANGE_QP_T; + ps_slice->u1_slice_qp = i_temp; + COPYTHECONTEXT("SH: slice_qp_delta", + ps_slice->u1_slice_qp - ps_pps->u1_pic_init_qp); + + if(ps_pps->u1_deblocking_filter_parameters_present_flag == 1) + { + u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf); + COPYTHECONTEXT("SH: disable_deblocking_filter_idc", u4_temp); + + if(u4_temp > SLICE_BOUNDARY_DBLK_DISABLED) + { + return ERROR_INV_SLICE_HDR_T; + } + ps_slice->u1_disable_dblk_filter_idc = u4_temp; + if(u4_temp != 1) + { + i_temp = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf) + << 1; + if((MIN_DBLK_FIL_OFF > i_temp) || (i_temp > MAX_DBLK_FIL_OFF)) + { + return ERROR_INV_SLICE_HDR_T; + } + ps_slice->i1_slice_alpha_c0_offset = i_temp; + COPYTHECONTEXT("SH: slice_alpha_c0_offset_div2", + ps_slice->i1_slice_alpha_c0_offset >> 1); + + i_temp = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf) + << 1; + if((MIN_DBLK_FIL_OFF > i_temp) || (i_temp > MAX_DBLK_FIL_OFF)) + { + return ERROR_INV_SLICE_HDR_T; + } + ps_slice->i1_slice_beta_offset = i_temp; + COPYTHECONTEXT("SH: slice_beta_offset_div2", + ps_slice->i1_slice_beta_offset >> 1); + + } + else + { + ps_slice->i1_slice_alpha_c0_offset = 0; + ps_slice->i1_slice_beta_offset = 0; + } + } + else + { + ps_slice->u1_disable_dblk_filter_idc = 0; + ps_slice->i1_slice_alpha_c0_offset = 0; + ps_slice->i1_slice_beta_offset = 0; + } + + /* Initialization to check if number of motion vector per 2 Mbs */ + /* are exceeding the range or not */ + ps_dec->u2_mv_2mb[0] = 0; + ps_dec->u2_mv_2mb[1] = 0; + + + /*set slice header cone to 2 ,to indicate correct header*/ + DATA_SYNC(); + ps_dec->ps_parse_cur_slice->slice_header_done = 2; + + if(ps_pps->u1_entropy_coding_mode) + { + SWITCHOFFTRACE; SWITCHONTRACECABAC; + if(ps_dec->ps_cur_slice->u1_mbaff_frame_flag) + { + ps_dec->pf_get_mb_info = ih264d_get_mb_info_cabac_mbaff; + } + else + ps_dec->pf_get_mb_info = ih264d_get_mb_info_cabac_nonmbaff; + + ret = ih264d_parse_islice_data_cabac(ps_dec, ps_slice, + u2_first_mb_in_slice); + if(ret != OK) + return ret; + SWITCHONTRACE; SWITCHOFFTRACECABAC; + if(ps_dec->ps_parse_cur_slice->u2_error_flag == 1) + return 0; + + } + else + { + if(ps_dec->ps_cur_slice->u1_mbaff_frame_flag) + { + ps_dec->pf_get_mb_info = ih264d_get_mb_info_cavlc_mbaff; + } + else + ps_dec->pf_get_mb_info = ih264d_get_mb_info_cavlc_nonmbaff; + ret = ih264d_parse_islice_data_cavlc(ps_dec, ps_slice, + u2_first_mb_in_slice); + if(ret != OK) + return ret; + } + + return OK; +} diff --git a/decoder/ih264d_parse_islice.h b/decoder/ih264d_parse_islice.h new file mode 100755 index 0000000..6a43d7b --- /dev/null +++ b/decoder/ih264d_parse_islice.h @@ -0,0 +1,113 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/*! + ************************************************************************** + * \file ih264d_parse_islice.h + * + * \brief + * Contains routines that decode a I slice type + * + * Detailed_description + * + * \date + * 07/07/2003 + * + * \author NS + ************************************************************************** + */ + +#ifndef _IH264D_PARSE_ISLICE_H_ +#define _IH264D_PARSE_ISLICE_H_ + +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264d_tables.h" + +WORD32 ih264d_parse_residual4x4_cavlc(dec_struct_t * ps_dec, + dec_mb_info_t *ps_cur_mb_info, + UWORD8 u1_offset); +WORD32 ih264d_parse_residual4x4_cabac(dec_struct_t * ps_dec, + dec_mb_info_t *ps_cur_mb_info, + UWORD8 u1_offset); +WORD32 ih264d_parse_imb_cavlc(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info, + UWORD8 u1_mb_num, + UWORD8 u1_mb_type); +WORD32 ih264d_parse_imb_cabac(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info, + UWORD8 u1_mb_type); + +WORD32 ih264d_parse_islice_data_cavlc(dec_struct_t * ps_dec, + dec_slice_params_t * ps_slice, + UWORD16 u2_first_mb_in_slice); +WORD32 ih264d_parse_islice_data_cabac(dec_struct_t * ps_dec, + dec_slice_params_t * ps_slice, + UWORD16 u2_first_mb_in_slice); +WORD32 ih264d_parse_pmb_cavlc(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info, + UWORD8 u1_mb_num, + UWORD8 u1_num_mbsNby2); +WORD32 ih264d_parse_pmb_cabac(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info, + UWORD8 u1_mb_num, + UWORD8 u1_num_mbsNby2); + +WORD32 ih264d_parse_bmb_non_direct_cavlc(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info, + UWORD8 u1_mb_num, + UWORD8 u1_mbNumModNBy2); + +WORD32 ih264d_parse_bmb_non_direct_cabac(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info, + UWORD8 u1_mb_num, + UWORD8 u1_mbNumModNBy2); + +WORD32 ih264d_parse_bmb_cavlc(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info, + UWORD8 u1_mb_num, + UWORD8 u1_num_mbsNby2); + +WORD32 ih264d_parse_bmb_cabac(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info, + UWORD8 u1_mb_num, + UWORD8 u1_num_mbsNby2); + +WORD32 ih264d_parse_inter_slice_data_cavlc(dec_struct_t * ps_dec, + dec_slice_params_t * ps_slice, + UWORD16 u2_first_mb_in_slice); + +WORD32 ih264d_parse_inter_slice_data_cabac(dec_struct_t * ps_dec, + dec_slice_params_t * ps_slice, + UWORD16 u2_first_mb_in_slice); + +WORD32 ParseBMb(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info, + UWORD8 u1_mb_num, + UWORD8 u1_num_mbsNby2); + +WORD32 ih264d_parse_ipcm_mb(dec_struct_t * ps_dec, + dec_mb_info_t *ps_cur_mb_info, + UWORD8 u1_mbNum); +WORD32 ih264d_parse_islice(dec_struct_t *ps_dec, + UWORD16 u2_first_mb_in_slice); + +#endif /* _IH264D_PARSE_ISLICE_H_ */ diff --git a/decoder/ih264d_parse_mb_header.c b/decoder/ih264d_parse_mb_header.c new file mode 100755 index 0000000..f30ad67 --- /dev/null +++ b/decoder/ih264d_parse_mb_header.c @@ -0,0 +1,1397 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*! + *************************************************************************** + * \file ih264d_parse_mb_header.c + * + * \brief + * This file contains context identifier encoding routines. + * + * \date + * 04/02/2003 + * + * \author NS + *************************************************************************** + */ +#include <string.h> +#include "ih264d_structs.h" +#include "ih264d_bitstrm.h" +#include "ih264d_cabac.h" +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264d_defs.h" +#include "ih264d_error_handler.h" +#include "ih264d_tables.h" +#include "ih264d_debug.h" +#include "ih264d_defs.h" +#include "ih264d_defs.h" +#include "ih264d_mb_utils.h" +#include "ih264d_parse_mb_header.h" +#include "ih264d_defs.h" + +/*! < CtxtInc index 0 - CtxMbTypeI, CtxMbTypeSISuffix + index 1 - CtxMbTypePSuffix, CtxMbTypeBSuffix + */ + + + +/*! + ************************************************************************** + * \if Function name : ih264d_parse_mb_type_intra_cabac \endif + * + * \brief + * This function decodes MB type using CABAC entropy coding mode. + * + * \return + * MBType. + * + ************************************************************************** + */ +UWORD8 ih264d_parse_mb_type_intra_cabac(UWORD8 u1_inter, + struct _DecStruct * ps_dec) +{ + decoding_envirnoment_t * ps_cab_env = &ps_dec->s_cab_dec_env; + dec_bit_stream_t * ps_bitstrm = ps_dec->ps_bitstrm; + ctxt_inc_mb_info_t * ps_left_ctxt = ps_dec->p_left_ctxt_mb_info; + ctxt_inc_mb_info_t * ps_top_ctxt = ps_dec->p_top_ctxt_mb_info; + bin_ctxt_model_t *ps_mb_bin_ctxt = ps_dec->p_mb_type_t; + WORD8 u1_mb_type, u1_bin; + UWORD32 u4_cxt_inc; + + u4_cxt_inc = 0; + if(!u1_inter) + { + if(ps_left_ctxt != ps_dec->ps_def_ctxt_mb_info) + u4_cxt_inc += ((ps_left_ctxt->u1_mb_type != CAB_I4x4) ? 1 : 0); + if(ps_top_ctxt != ps_dec->ps_def_ctxt_mb_info) + u4_cxt_inc += ((ps_top_ctxt->u1_mb_type != CAB_I4x4) ? 1 : 0); + } + else + { + ps_mb_bin_ctxt = ps_mb_bin_ctxt + 3 + (ps_dec->u1_B << 1); + } + + /* b0 */ + u1_mb_type = (UWORD8)ih264d_decode_bin(u4_cxt_inc, ps_mb_bin_ctxt, ps_bitstrm, + ps_cab_env); + if(u1_mb_type) + { + /* I16x16 or I_PCM mode */ + /* b1 */ + u1_bin = ih264d_decode_terminate(ps_cab_env, ps_bitstrm); + if(u1_bin == 0) + { + /* I16x16 mode */ + /* Read b2 and b3 */ + u4_cxt_inc = (u1_inter) ? 0x021 : 0x043; + + u1_bin = ih264d_decode_bins(2, u4_cxt_inc, ps_mb_bin_ctxt, ps_bitstrm, + ps_cab_env); + + if(u1_bin & 0x01) + u1_mb_type += 4; + + if(u1_bin & 0x02) + u1_mb_type += 12; + + if(u1_bin & 0x01) + { + /* since b3=1, Read three bins */ + u4_cxt_inc = (u1_inter) ? 0x0332 : 0x0765; + u1_bin = (UWORD8)ih264d_decode_bins(3, u4_cxt_inc, ps_mb_bin_ctxt, + ps_bitstrm, ps_cab_env); + + } + else + { + /* Read two bins */ + u4_cxt_inc = (u1_inter) ? 0x033 : 0x076; + u1_bin = (UWORD8)ih264d_decode_bins(2, u4_cxt_inc, ps_mb_bin_ctxt, + ps_bitstrm, ps_cab_env); + } + u1_mb_type += u1_bin; + } + else + { + /* I_PCM mode */ + /* b1=1 */ + u1_mb_type = 25; + } + } + return (u1_mb_type); +} + +/*! + ************************************************************************** + * \if Function name : ih264d_parse_mb_type_cabac \endif + * + * \brief + * This function decodes MB type using CABAC entropy coding mode. + * + * \return + * MBType. + * + ************************************************************************** + */ +UWORD32 ih264d_parse_mb_type_cabac(struct _DecStruct * ps_dec) +{ + const UWORD8 uc_slice_type = ps_dec->ps_cur_slice->u1_slice_type; + decoding_envirnoment_t *ps_cab_env = &ps_dec->s_cab_dec_env; + dec_bit_stream_t *ps_bitstrm = ps_dec->ps_bitstrm; + ctxt_inc_mb_info_t *ps_left_ctxt = ps_dec->p_left_ctxt_mb_info; + ctxt_inc_mb_info_t *ps_top_ctxt = ps_dec->p_top_ctxt_mb_info; + WORD8 c_ctxt_inc; + bin_ctxt_model_t *ps_mb_bin_ctxt = ps_dec->p_mb_type_t; + WORD8 u1_mb_type = 0, u1_bin; + UWORD32 u4_cxt_inc; + + INC_SYM_COUNT(ps_cab_env); + + c_ctxt_inc = 0; + + if(uc_slice_type == SI_SLICE) + { + /* b0 */ + if(ps_left_ctxt != ps_dec->ps_def_ctxt_mb_info) + c_ctxt_inc += ((ps_left_ctxt->u1_mb_type != CAB_SI4x4) ? 1 : 0); + if(ps_top_ctxt != ps_dec->ps_def_ctxt_mb_info) + c_ctxt_inc += ((ps_top_ctxt->u1_mb_type != CAB_SI4x4) ? 1 : 0); + + u4_cxt_inc = c_ctxt_inc; + u1_bin = (UWORD8)ih264d_decode_bin(u4_cxt_inc, ps_mb_bin_ctxt, ps_bitstrm, + ps_cab_env); + if(u1_bin == 0) + { + /* SI MB */ + u1_mb_type = 0; + } + else + { + u1_mb_type = 1 + ih264d_parse_mb_type_intra_cabac(0, ps_dec); + } + } + else if(uc_slice_type == P_SLICE) + { + /* P Slice */ + /* b0 */ + u4_cxt_inc = 0; + u1_bin = (UWORD8)ih264d_decode_bin(u4_cxt_inc, ps_mb_bin_ctxt, ps_bitstrm, + ps_cab_env); + if(!u1_bin) + { + /* Inter MB types */ + /* b1 */ + u4_cxt_inc = 0x01; + u1_bin = (UWORD8)ih264d_decode_bin(u4_cxt_inc, ps_mb_bin_ctxt, + ps_bitstrm, ps_cab_env); + /* b2 */ + u4_cxt_inc = u1_bin + 2; + u1_mb_type = (UWORD8)ih264d_decode_bin(u4_cxt_inc, ps_mb_bin_ctxt, + ps_bitstrm, ps_cab_env); + u1_mb_type = (u1_bin << 1) + u1_mb_type; + if(u1_mb_type) + u1_mb_type = 4 - u1_mb_type; + } + else + { + /* Intra Prefix 1 found */ + /* Intra MB type */ + u1_mb_type = 5 + ih264d_parse_mb_type_intra_cabac(1, ps_dec); + } + } + else if(uc_slice_type == B_SLICE) + { + WORD8 a, b; + /* B Slice */ + /* b0 */ + /* a = b = 0, if B slice and MB is a SKIP or B_DIRECT16x16 */ + a = 0; + b = 0; + u1_mb_type = 0; + if(ps_left_ctxt != ps_dec->ps_def_ctxt_mb_info) + a = ((ps_left_ctxt->u1_mb_type & CAB_BD16x16_MASK) != CAB_BD16x16); + if(ps_top_ctxt != ps_dec->ps_def_ctxt_mb_info) + b = ((ps_top_ctxt->u1_mb_type & CAB_BD16x16_MASK) != CAB_BD16x16); + + u4_cxt_inc = a + b; + + u1_bin = (UWORD8)ih264d_decode_bin(u4_cxt_inc, ps_mb_bin_ctxt, ps_bitstrm, + ps_cab_env); + + if(u1_bin) + { + + /* b1 */ + u4_cxt_inc = 0x03; + u1_bin = (UWORD8)ih264d_decode_bin(u4_cxt_inc, ps_mb_bin_ctxt, + ps_bitstrm, ps_cab_env); + + if(!u1_bin) + { + /* b2 */ + u4_cxt_inc = 0x05; + u1_bin = (UWORD8)ih264d_decode_bin(u4_cxt_inc, ps_mb_bin_ctxt, + ps_bitstrm, ps_cab_env); + + u1_mb_type = u1_bin + 1; + } + else + { + u1_mb_type = 3; + /* b2 */ + u4_cxt_inc = 0x04; + u1_bin = (UWORD8)ih264d_decode_bin(u4_cxt_inc, ps_mb_bin_ctxt, + ps_bitstrm, ps_cab_env); + + if(u1_bin) + { + u1_mb_type += 8; + /* b3 */ + u4_cxt_inc = 0x05; + u1_bin = (UWORD8)ih264d_decode_bin(u4_cxt_inc, ps_mb_bin_ctxt, + ps_bitstrm, ps_cab_env); + + if(!u1_bin) + { + u1_mb_type++; + /* b4, b5, b6 */ + u4_cxt_inc = 0x0555; + u1_bin = (UWORD8)ih264d_decode_bins(3, u4_cxt_inc, + ps_mb_bin_ctxt, + ps_bitstrm, + ps_cab_env); + + + + u1_mb_type += u1_bin; + } + else + { + /* b4 */ + u4_cxt_inc = 0x05; + u1_bin = (UWORD8)ih264d_decode_bin(u4_cxt_inc, + ps_mb_bin_ctxt, + ps_bitstrm, + ps_cab_env); + + if(u1_bin) + { + /* b5 */ + u1_bin = (UWORD8)ih264d_decode_bin(u4_cxt_inc, + ps_mb_bin_ctxt, + ps_bitstrm, + ps_cab_env); + + u1_mb_type += (u1_bin ? 11 : 0); + } + else + { + u1_mb_type = 20; + /* b5 */ + u1_bin = (UWORD8)ih264d_decode_bin(u4_cxt_inc, + ps_mb_bin_ctxt, + ps_bitstrm, + ps_cab_env); + + if(!u1_bin) + { + /* b6 */ + u1_bin = (UWORD8)ih264d_decode_bin(u4_cxt_inc, + ps_mb_bin_ctxt, + ps_bitstrm, + ps_cab_env); + + u1_mb_type += u1_bin; + } + else + { + /* Intra Prefix 111101 found */ + /* Intra MB type */ + u1_mb_type = + 23 + + ih264d_parse_mb_type_intra_cabac( + 1, + ps_dec); + } + } + } + } + else + { + /* b3, b4, b5 */ + u4_cxt_inc = 0x0555; + u1_bin = (UWORD8)ih264d_decode_bins(3, u4_cxt_inc, + ps_mb_bin_ctxt, ps_bitstrm, + ps_cab_env); + + + + + u1_mb_type += u1_bin; + } + } + } + } + return ((UWORD32)u1_mb_type); +} + +/*! + ************************************************************************** + * \if Function name : DecSubMBType \endif + * + * \brief + * This function decodes MB type using CABAC entropy coding mode. + * + * \return + * MBType. + * + ************************************************************************** + */ +UWORD32 ih264d_parse_submb_type_cabac(const UWORD8 u1_slc_type_b, + decoding_envirnoment_t * ps_cab_env, + dec_bit_stream_t * ps_bitstrm, + bin_ctxt_model_t * ps_sub_mb_cxt) +{ + WORD8 u1_sub_mb_type, u1_bin; + + INC_SYM_COUNT(ps_cab_env); + + u1_sub_mb_type = 0; + u1_bin = (UWORD8)ih264d_decode_bin(0, ps_sub_mb_cxt, ps_bitstrm, + ps_cab_env); + + if(u1_slc_type_b ^ u1_bin) + return 0; + + if(!u1_slc_type_b) + { + /* P Slice */ + u1_sub_mb_type = 1; + u1_bin = (UWORD8)ih264d_decode_bin(1, ps_sub_mb_cxt, ps_bitstrm, + ps_cab_env); + if(u1_bin == 1) + { + u1_bin = (UWORD8)ih264d_decode_bin(2, ps_sub_mb_cxt, ps_bitstrm, + ps_cab_env); + u1_sub_mb_type = (2 + (!u1_bin)); + } + + return u1_sub_mb_type; + } + else + { + /* B Slice */ + + /* b1 */ + u1_bin = (UWORD8)ih264d_decode_bin(1, ps_sub_mb_cxt, ps_bitstrm, + ps_cab_env); + if(u1_bin) + { + /* b2 */ + u1_bin = (UWORD8)ih264d_decode_bin(2, ps_sub_mb_cxt, ps_bitstrm, + ps_cab_env); + if(u1_bin) + { + /* b3 */ + u1_sub_mb_type = 7; + u1_bin = (UWORD8)ih264d_decode_bin(3, ps_sub_mb_cxt, ps_bitstrm, + ps_cab_env); + u1_sub_mb_type += u1_bin << 2; + u1_bin = !u1_bin; + /* b4 */ + if(u1_bin == 0) + { + u1_bin = ih264d_decode_bin(3, ps_sub_mb_cxt, ps_bitstrm, + ps_cab_env); + } + else + { + u1_bin = (UWORD8)ih264d_decode_bins(2, 0x33, ps_sub_mb_cxt, + ps_bitstrm, ps_cab_env); + } + + return (u1_sub_mb_type + u1_bin); + } + else + { + /* b3 */ + u1_bin = (UWORD8)ih264d_decode_bins(2, 0x33, ps_sub_mb_cxt, + ps_bitstrm, ps_cab_env); + return (3 + u1_bin); + } + } + else + { + /* b2 */ + u1_bin = (UWORD8)ih264d_decode_bin(3, ps_sub_mb_cxt, ps_bitstrm, + ps_cab_env); + return (1 + u1_bin); + } + } +} + +/*! + ************************************************************************** + * \if Function name : ih264d_parse_ref_idx_cabac \endif + * + * \brief + * This function decodes Reference Index using CABAC entropy coding mode. + * + * \return + * None + * + ************************************************************************** + */ +WORD32 ih264d_parse_ref_idx_cabac(const UWORD8 u1_num_part, + const UWORD8 u1_b2, + const UWORD8 u1_max_ref_minus1, + const UWORD8 u1_mb_mode, + WORD8 * pi1_ref_idx, + WORD8 * const pi1_lft_cxt, + WORD8 * const pi1_top_cxt, + decoding_envirnoment_t * const ps_cab_env, + dec_bit_stream_t * const ps_bitstrm, + bin_ctxt_model_t * const ps_ref_cxt) +{ + UWORD8 u1_a, u1_b; + UWORD32 u4_cxt_inc; + UWORD8 u1_blk_no, u1_i, u1_idx_lft, u1_idx_top; + WORD8 i1_ref_idx; + + for(u1_blk_no = 0, u1_i = 0; u1_i < u1_num_part; u1_i++, pi1_ref_idx++) + { + u1_idx_lft = ((u1_blk_no & 0x02) >> 1) + u1_b2; + u1_idx_top = (u1_blk_no & 0x01) + u1_b2; + i1_ref_idx = *pi1_ref_idx; + + if(i1_ref_idx > 0) + { + u1_a = pi1_lft_cxt[u1_idx_lft] > 0; + u1_b = pi1_top_cxt[u1_idx_top] > 0; + + u4_cxt_inc = u1_a + (u1_b << 1); + u4_cxt_inc = (u4_cxt_inc | 0x55540); + + i1_ref_idx = (WORD8)ih264d_decode_bins_unary(32, u4_cxt_inc, + ps_ref_cxt, ps_bitstrm, + ps_cab_env); + + if((i1_ref_idx > u1_max_ref_minus1) || (i1_ref_idx < 0)) + { + return ERROR_REF_IDX; + } + + *pi1_ref_idx = i1_ref_idx; + + INC_SYM_COUNT(ps_cab_env); + + } + + /* Storing Reference Idx Information */ + pi1_lft_cxt[u1_idx_lft] = i1_ref_idx; + pi1_top_cxt[u1_idx_top] = i1_ref_idx; + u1_blk_no = u1_blk_no + 1 + (u1_mb_mode & 0x01); + } + /* if(!u1_sub_mb) */ + if(u1_num_part != 4) + { + pi1_lft_cxt[(!(u1_mb_mode & 0x1)) + u1_b2] = pi1_lft_cxt[u1_b2]; + pi1_top_cxt[(!(u1_mb_mode & 0x2)) + u1_b2] = pi1_top_cxt[u1_b2]; + } + return OK; +} + +/*! + ************************************************************************** + * \if Function name : ih264d_parse_mb_qp_delta_cabac \endif + * + * \brief + * This function decodes MB Qp delta using CABAC entropy coding mode. + * + * \return + * None + * + ************************************************************************** + */ +WORD32 ih264d_parse_mb_qp_delta_cabac(struct _DecStruct * ps_dec, + WORD8 *pi1_mb_qp_delta) +{ + decoding_envirnoment_t * ps_cab_env = &ps_dec->s_cab_dec_env; + dec_bit_stream_t * ps_bitstrm = ps_dec->ps_bitstrm; + + UWORD8 u1_code_num; + bin_ctxt_model_t *ps_mb_qp_delta_ctxt = ps_dec->p_mb_qp_delta_t; + UWORD32 u4_cxt_inc; + + INC_SYM_COUNT(ps_cab_env); + + u4_cxt_inc = (!(!(ps_dec->i1_prev_mb_qp_delta))); + + u1_code_num = 0; + u4_cxt_inc = (u4_cxt_inc | 0x33320); + /* max number of bins = 53, + since Range for MbQpDelta= -26 to +25 inclusive, UNARY code */ + u1_code_num = ih264d_decode_bins_unary(32, u4_cxt_inc, ps_mb_qp_delta_ctxt, + ps_bitstrm, ps_cab_env); + if(u1_code_num == 32) + { + /* Read remaining 21 bins */ + UWORD8 uc_codeNumX; + u4_cxt_inc = 0x33333; + uc_codeNumX = ih264d_decode_bins_unary(21, u4_cxt_inc, ps_mb_qp_delta_ctxt, + ps_bitstrm, ps_cab_env); + u1_code_num = u1_code_num + uc_codeNumX; + } + + *pi1_mb_qp_delta = (u1_code_num + 1) >> 1; + /* Table 9.3: If code_num is even Syntax Element has -ve value */ + if(!(u1_code_num & 0x01)) + *pi1_mb_qp_delta = -(*pi1_mb_qp_delta); + + /* Range of MbQpDelta= -26 to +25 inclusive */ + if((*pi1_mb_qp_delta < -26) || (*pi1_mb_qp_delta > 25)) + return ERROR_INV_RANGE_QP_T; + ps_dec->i1_prev_mb_qp_delta = *pi1_mb_qp_delta; + return OK; +} +/*! + ************************************************************************** + * \if Function name : ih264d_parse_chroma_pred_mode_cabac \endif + * + * \brief + * This function decodes Chroma Pred mode using CABAC entropy coding mode. + * + * \return + * None + * + ************************************************************************** + */ +WORD8 ih264d_parse_chroma_pred_mode_cabac(struct _DecStruct * ps_dec) +{ + decoding_envirnoment_t * ps_cab_env = &ps_dec->s_cab_dec_env; + dec_bit_stream_t * ps_bitstrm = ps_dec->ps_bitstrm; + ctxt_inc_mb_info_t * ps_left_ctxt = ps_dec->p_left_ctxt_mb_info; + ctxt_inc_mb_info_t * ps_top_ctxt = ps_dec->p_top_ctxt_mb_info; + WORD8 i1_chroma_pred_mode, a, b; + UWORD32 u4_cxt_inc; + + INC_SYM_COUNT(ps_cab_env); + + /* Binarization is TU and Cmax=3 */ + i1_chroma_pred_mode = 0; + a = 0; + b = 0; + + a = ((ps_left_ctxt->u1_intra_chroma_pred_mode != 0) ? 1 : 0); + + b = ((ps_top_ctxt->u1_intra_chroma_pred_mode != 0) ? 1 : 0); + u4_cxt_inc = a + b; + + u4_cxt_inc = (u4_cxt_inc | 0x330); + + i1_chroma_pred_mode = ih264d_decode_bins_tunary( + 3, u4_cxt_inc, ps_dec->p_intra_chroma_pred_mode_t, + ps_bitstrm, ps_cab_env); + + return (i1_chroma_pred_mode); +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_parse_transform8x8flag_cabac */ +/* */ +/* Description : */ +/* Inputs : */ +/* */ +/* */ +/* Returns : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* Rajasekhar Creation */ +/* */ +/*****************************************************************************/ +UWORD8 ih264d_parse_transform8x8flag_cabac(struct _DecStruct * ps_dec, + dec_mb_info_t * ps_cur_mb_info) +{ + decoding_envirnoment_t * ps_cab_env = &ps_dec->s_cab_dec_env; + dec_bit_stream_t * ps_bitstrm = ps_dec->ps_bitstrm; + ctxt_inc_mb_info_t * ps_left_ctxt = ps_dec->p_left_ctxt_mb_info; + ctxt_inc_mb_info_t * ps_top_ctxt = ps_dec->p_top_ctxt_mb_info; + UWORD8 u1_transform_8x8flag; + UWORD8 u1_mb_ngbr_avail = ps_cur_mb_info->u1_mb_ngbr_availablity; + + WORD8 a, b; + UWORD32 u4_cxt_inc; + + /* for calculating the context increment for transform8x8 u4_flag */ + /* it reads transform8x8 u4_flag of the neighbors through */ + + /* Binarization is FLC */ + a = 0; + b = 0; + + if(u1_mb_ngbr_avail & LEFT_MB_AVAILABLE_MASK) + { + a = ps_left_ctxt->u1_transform8x8_ctxt; + } + if(u1_mb_ngbr_avail & TOP_MB_AVAILABLE_MASK) + { + b = ps_top_ctxt->u1_transform8x8_ctxt; + + } + + u4_cxt_inc = a + b; + + u1_transform_8x8flag = ih264d_decode_bin( + u4_cxt_inc, ps_dec->s_high_profile.ps_transform8x8_flag, + ps_bitstrm, ps_cab_env); + + return (u1_transform_8x8flag); +} + +/*! + ************************************************************************** + * \if Function name : ih264d_read_intra_pred_modes_cabac \endif + * + * \brief + * Reads the intra pred mode related values of I4x4 MB from bitstream. + * + * This function will read the prev intra pred mode flags and + * stores it in pu1_prev_intra4x4_pred_mode_flag. If the u4_flag + * indicates that most probable mode is not intra pred mode, then + * the rem_intra4x4_pred_mode is read and stored in + * pu1_rem_intra4x4_pred_mode array. + * + * + * \return + * 0 on success and Error code otherwise + * + ************************************************************************** + */ +WORD32 ih264d_read_intra_pred_modes_cabac(dec_struct_t * ps_dec, + UWORD8 * pu1_prev_intra4x4_pred_mode_flag, + UWORD8 * pu1_rem_intra4x4_pred_mode, + UWORD8 u1_tran_form8x8) +{ + WORD32 i4x4_luma_blk_idx = 0; + dec_bit_stream_t * ps_bitstrm = ps_dec->ps_bitstrm; + decoding_envirnoment_t * ps_cab_env = &ps_dec->s_cab_dec_env; + bin_ctxt_model_t *ps_ctxt_ipred_luma_mpm, *ps_ctx_ipred_luma_rm; + WORD32 i4_rem_intra4x4_pred_mode; + UWORD32 u4_prev_intra4x4_pred_mode_flag; + UWORD32 u4_code_int_range, u4_code_int_val_ofst; + const UWORD32 *pu4_table = (const UWORD32 *)ps_cab_env->cabac_table; + + ps_ctxt_ipred_luma_mpm = ps_dec->p_prev_intra4x4_pred_mode_flag_t; + ps_ctx_ipred_luma_rm = ps_dec->p_rem_intra4x4_pred_mode_t; + SWITCHOFFTRACE; + + i4x4_luma_blk_idx = (0 == u1_tran_form8x8) ? 16 : 4; + + u4_code_int_range = ps_cab_env->u4_code_int_range; + u4_code_int_val_ofst = ps_cab_env->u4_code_int_val_ofst; + + do + { + + DECODE_ONE_BIN_MACRO(ps_ctxt_ipred_luma_mpm, u4_code_int_range, + u4_code_int_val_ofst, pu4_table, ps_bitstrm, + u4_prev_intra4x4_pred_mode_flag) + *pu1_prev_intra4x4_pred_mode_flag = u4_prev_intra4x4_pred_mode_flag; + + i4_rem_intra4x4_pred_mode = -1; + if(!u4_prev_intra4x4_pred_mode_flag) + { + + /*inlining DecodeDecisionBins_FLC*/ + + { + + UWORD8 u1_max_bins = 3; + UWORD32 u4_value; + UWORD32 u4_symbol, i; + + i = 0; + u4_value = 0; + + do + { + + DECODE_ONE_BIN_MACRO(ps_ctx_ipred_luma_rm, u4_code_int_range, + u4_code_int_val_ofst, pu4_table, + ps_bitstrm, u4_symbol) + + INC_BIN_COUNT(ps_cab_env);INC_DECISION_BINS(ps_cab_env); + + u4_value = u4_value | (u4_symbol << i); + + i++; + } + while(i < u1_max_bins); + + i4_rem_intra4x4_pred_mode = (u4_value); + + } + + } + + (*pu1_rem_intra4x4_pred_mode) = i4_rem_intra4x4_pred_mode; + + COPYTHECONTEXT("intra4x4_pred_mode", i4_rem_intra4x4_pred_mode); + + pu1_prev_intra4x4_pred_mode_flag++; + pu1_rem_intra4x4_pred_mode++; + + i4x4_luma_blk_idx--; + } + while(i4x4_luma_blk_idx); + + ps_cab_env->u4_code_int_range = u4_code_int_range; + ps_cab_env->u4_code_int_val_ofst = u4_code_int_val_ofst; + + return (0); + +} + +/*! + ************************************************************************** + * \if Function name : ih264d_parse_ctx_cbp_cabac \endif + * + * \brief + * This function decodes CtxCbpLuma and CtxCbpChroma (CBP of a Macroblock). + * using CABAC entropy coding mode. + * + * \return + * CBP of a MB. + * + ************************************************************************** + */ +UWORD32 ih264d_parse_ctx_cbp_cabac(struct _DecStruct * ps_dec) +{ + + UWORD32 u4_cxt_inc; + decoding_envirnoment_t * ps_cab_env = &ps_dec->s_cab_dec_env; + dec_bit_stream_t * ps_bitstrm = ps_dec->ps_bitstrm; + ctxt_inc_mb_info_t * ps_left_ctxt = ps_dec->p_left_ctxt_mb_info; + ctxt_inc_mb_info_t * ps_top_ctxt = ps_dec->p_top_ctxt_mb_info; + bin_ctxt_model_t *ps_ctxt_cbp_luma = ps_dec->p_cbp_luma_t, *ps_bin_ctxt; + WORD8 c_Cbp; //,i,j; + UWORD32 u4_code_int_range, u4_code_int_val_ofst; + UWORD32 u4_offset, *pu4_buffer; + const UWORD32 *pu4_table = (const UWORD32 *)ps_cab_env->cabac_table; + + INC_SYM_COUNT(ps_cab_env); + + + + /* CBP Luma, FL, Cmax = 15, L = 4 */ + u4_cxt_inc = (!((ps_top_ctxt->u1_cbp >> 2) & 0x01)) << 1; + u4_cxt_inc += !((ps_left_ctxt->u1_cbp >> 1) & 0x01); + + u4_offset = ps_bitstrm->u4_ofst; + pu4_buffer = ps_bitstrm->pu4_buffer; + + u4_code_int_range = ps_cab_env->u4_code_int_range; + u4_code_int_val_ofst = ps_cab_env->u4_code_int_val_ofst; + /*renormalize to ensure there 23 bits more in the u4_code_int_val_ofst*/ + { + UWORD32 u4_clz, read_bits; + + u4_clz = CLZ(u4_code_int_range); + FLUSHBITS(u4_offset, u4_clz) + NEXTBITS(read_bits, u4_offset, pu4_buffer, 23) + u4_code_int_range = u4_code_int_range << u4_clz; + u4_code_int_val_ofst = (u4_code_int_val_ofst << u4_clz) | read_bits; + } + + ps_bin_ctxt = ps_ctxt_cbp_luma + u4_cxt_inc; + + /*inlining DecodeDecision_onebin without renorm*/ + { + + UWORD32 u4_qnt_int_range, u4_int_range_lps; + UWORD32 u4_symbol, u1_mps_state; + UWORD32 table_lookup; + UWORD32 u4_clz; + + u1_mps_state = (ps_bin_ctxt->u1_mps_state); + + u4_clz = CLZ(u4_code_int_range); + u4_qnt_int_range = u4_code_int_range << u4_clz; + u4_qnt_int_range = (u4_qnt_int_range >> 29) & 0x3; + + table_lookup = pu4_table[(u1_mps_state << 2) + u4_qnt_int_range]; + u4_int_range_lps = table_lookup & 0xff; + + u4_int_range_lps = u4_int_range_lps << (23 - u4_clz); + u4_code_int_range = u4_code_int_range - u4_int_range_lps; + + u4_symbol = ((u1_mps_state >> 6) & 0x1); + + /*if mps*/ + u1_mps_state = (table_lookup >> 8) & 0x7F; + + CHECK_IF_LPS(u4_code_int_range, u4_code_int_val_ofst, u4_symbol, + u4_int_range_lps, u1_mps_state, table_lookup) + + INC_BIN_COUNT(ps_cab_env); + + ps_bin_ctxt->u1_mps_state = u1_mps_state; + + c_Cbp = u4_symbol; + + } + + u4_cxt_inc = (!((ps_top_ctxt->u1_cbp >> 3) & 0x01)) << 1; + u4_cxt_inc += !(c_Cbp & 0x01); + ps_bin_ctxt = ps_ctxt_cbp_luma + u4_cxt_inc; + /*inlining DecodeDecision_onebin without renorm*/ + + { + + UWORD32 u4_qnt_int_range, u4_int_range_lps; + UWORD32 u4_symbol, u1_mps_state; + UWORD32 table_lookup; + UWORD32 u4_clz; + + u1_mps_state = (ps_bin_ctxt->u1_mps_state); + + u4_clz = CLZ(u4_code_int_range); + u4_qnt_int_range = u4_code_int_range << u4_clz; + u4_qnt_int_range = (u4_qnt_int_range >> 29) & 0x3; + + table_lookup = pu4_table[(u1_mps_state << 2) + u4_qnt_int_range]; + u4_int_range_lps = table_lookup & 0xff; + + u4_int_range_lps = u4_int_range_lps << (23 - u4_clz); + u4_code_int_range = u4_code_int_range - u4_int_range_lps; + + u4_symbol = ((u1_mps_state >> 6) & 0x1); + + /*if mps*/ + u1_mps_state = (table_lookup >> 8) & 0x7F; + + CHECK_IF_LPS(u4_code_int_range, u4_code_int_val_ofst, u4_symbol, + u4_int_range_lps, u1_mps_state, table_lookup) + + INC_BIN_COUNT(ps_cab_env); + + ps_bin_ctxt->u1_mps_state = u1_mps_state; + + c_Cbp |= u4_symbol << 1; + + } + + u4_cxt_inc = (!(c_Cbp & 0x01)) << 1; + u4_cxt_inc += !((ps_left_ctxt->u1_cbp >> 3) & 0x01); + ps_bin_ctxt = ps_ctxt_cbp_luma + u4_cxt_inc; + /*inlining DecodeDecision_onebin without renorm*/ + + { + + UWORD32 u4_qnt_int_range, u4_int_range_lps; + UWORD32 u4_symbol, u1_mps_state; + UWORD32 table_lookup; + UWORD32 u4_clz; + + u1_mps_state = (ps_bin_ctxt->u1_mps_state); + + u4_clz = CLZ(u4_code_int_range); + u4_qnt_int_range = u4_code_int_range << u4_clz; + u4_qnt_int_range = (u4_qnt_int_range >> 29) & 0x3; + + table_lookup = pu4_table[(u1_mps_state << 2) + u4_qnt_int_range]; + u4_int_range_lps = table_lookup & 0xff; + + u4_int_range_lps = u4_int_range_lps << (23 - u4_clz); + u4_code_int_range = u4_code_int_range - u4_int_range_lps; + + u4_symbol = ((u1_mps_state >> 6) & 0x1); + + /*if mps*/ + u1_mps_state = (table_lookup >> 8) & 0x7F; + + CHECK_IF_LPS(u4_code_int_range, u4_code_int_val_ofst, u4_symbol, + u4_int_range_lps, u1_mps_state, table_lookup) + + INC_BIN_COUNT(ps_cab_env); + + ps_bin_ctxt->u1_mps_state = u1_mps_state; + + c_Cbp |= u4_symbol << 2; + + } + + u4_cxt_inc = (!((c_Cbp >> 1) & 0x01)) << 1; + u4_cxt_inc += !((c_Cbp >> 2) & 0x01); + ps_bin_ctxt = ps_ctxt_cbp_luma + u4_cxt_inc; + /*inlining DecodeDecision_onebin without renorm*/ + + { + + UWORD32 u4_qnt_int_range, u4_int_range_lps; + UWORD32 u4_symbol, u1_mps_state; + UWORD32 table_lookup; + UWORD32 u4_clz; + + u1_mps_state = (ps_bin_ctxt->u1_mps_state); + + u4_clz = CLZ(u4_code_int_range); + u4_qnt_int_range = u4_code_int_range << u4_clz; + u4_qnt_int_range = (u4_qnt_int_range >> 29) & 0x3; + + table_lookup = pu4_table[(u1_mps_state << 2) + u4_qnt_int_range]; + u4_int_range_lps = table_lookup & 0xff; + + u4_int_range_lps = u4_int_range_lps << (23 - u4_clz); + u4_code_int_range = u4_code_int_range - u4_int_range_lps; + + u4_symbol = ((u1_mps_state >> 6) & 0x1); + + /*if mps*/ + u1_mps_state = (table_lookup >> 8) & 0x7F; + + CHECK_IF_LPS(u4_code_int_range, u4_code_int_val_ofst, u4_symbol, + u4_int_range_lps, u1_mps_state, table_lookup) + + INC_BIN_COUNT(ps_cab_env); + + ps_bin_ctxt->u1_mps_state = u1_mps_state; + + c_Cbp |= u4_symbol << 3; + + } + + if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_8) + { + + RENORM_RANGE_OFFSET(u4_code_int_range, u4_code_int_val_ofst, u4_offset, + pu4_buffer) + + } + + { + UWORD32 u4_cxt_inc; + WORD8 a, b, c, d; + bin_ctxt_model_t *p_CtxtCbpChroma = ps_dec->p_cbp_chroma_t; + + /* CBP Chroma, TU, Cmax = 2 */ + a = 0; + b = 0; + c = 0; + d = 0; + + { + a = (ps_top_ctxt->u1_cbp > 15) ? 2 : 0; + c = (ps_top_ctxt->u1_cbp > 31) ? 2 : 0; + } + + { + b = (ps_left_ctxt->u1_cbp > 15) ? 1 : 0; + d = (ps_left_ctxt->u1_cbp > 31) ? 1 : 0; + } + u4_cxt_inc = a + b; + u4_cxt_inc = (u4_cxt_inc | ((4 + c + d) << 4)); + + /*inlining ih264d_decode_bins_tunary */ + + { + + UWORD8 u1_max_bins = 2; + UWORD32 u4_ctx_inc = u4_cxt_inc; + + UWORD32 u4_value; + UWORD32 u4_symbol; + UWORD8 u4_ctx_Inc; + bin_ctxt_model_t *ps_bin_ctxt; + u4_value = 0; + + do + { + u4_ctx_Inc = u4_ctx_inc & 0xF; + u4_ctx_inc = u4_ctx_inc >> 4; + + ps_bin_ctxt = p_CtxtCbpChroma + u4_ctx_Inc; + /*inlining DecodeDecision_onebin*/ + { + + UWORD32 u4_qnt_int_range, u4_int_range_lps; + + UWORD32 u1_mps_state; + UWORD32 table_lookup; + UWORD32 u4_clz; + + u1_mps_state = (ps_bin_ctxt->u1_mps_state); + + u4_clz = CLZ(u4_code_int_range); + u4_qnt_int_range = u4_code_int_range << u4_clz; + u4_qnt_int_range = (u4_qnt_int_range >> 29) & 0x3; + + table_lookup = pu4_table[(u1_mps_state << 2) + + u4_qnt_int_range]; + u4_int_range_lps = table_lookup & 0xff; + + u4_int_range_lps = u4_int_range_lps << (23 - u4_clz); + u4_code_int_range = u4_code_int_range - u4_int_range_lps; + + u4_symbol = ((u1_mps_state >> 6) & 0x1); + + /*if mps*/ + u1_mps_state = (table_lookup >> 8) & 0x7F; + + CHECK_IF_LPS(u4_code_int_range, u4_code_int_val_ofst, + u4_symbol, u4_int_range_lps, u1_mps_state, + table_lookup) + + if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_8) + { + RENORM_RANGE_OFFSET(u4_code_int_range, + u4_code_int_val_ofst, u4_offset, + pu4_buffer) + } + ps_bin_ctxt->u1_mps_state = u1_mps_state; + } + + INC_BIN_COUNT(ps_cab_env);INC_DECISION_BINS( + ps_cab_env); + + u4_value++; + } + while((u4_value < u1_max_bins) & (u4_symbol)); + + u4_value = u4_value - 1 + u4_symbol; + + a = (u4_value); + + } + +c_Cbp = (c_Cbp | (a << 4)); +} + +ps_bitstrm->u4_ofst = u4_offset; + +ps_cab_env->u4_code_int_range = u4_code_int_range; +ps_cab_env->u4_code_int_val_ofst = u4_code_int_val_ofst; + +return (c_Cbp); +} + +/*! + ************************************************************************** + * \if Function name : ih264d_get_mvd_cabac \endif + * + * \brief + * This function decodes Horz and Vert mvd_l0 and mvd_l1 using CABAC entropy + * coding mode as defined in 9.3.2.3. + * + * \return + * None + * + ************************************************************************** + */ +void ih264d_get_mvd_cabac(UWORD8 u1_sub_mb, + UWORD8 u1_b2, + UWORD8 u1_part_wd, + UWORD8 u1_part_ht, + UWORD8 u1_dec_mvd, + dec_struct_t *ps_dec, + mv_pred_t *ps_mv) +{ + UWORD8 u1_abs_mvd_x = 0, u1_abs_mvd_y = 0; + UWORD8 u1_sub_mb_x, u1_sub_mb_y; + UWORD8 *pu1_top_mv_ctxt, *pu1_lft_mv_ctxt; + WORD16 *pi2_mv; + + u1_sub_mb_x = (UWORD8)(u1_sub_mb & 0x03); + u1_sub_mb_y = (UWORD8)(u1_sub_mb >> 2); + pu1_top_mv_ctxt = &ps_dec->ps_curr_ctxt_mb_info->u1_mv[u1_sub_mb_x][u1_b2]; + pu1_lft_mv_ctxt = &ps_dec->pu1_left_mv_ctxt_inc[u1_sub_mb_y][u1_b2]; + pi2_mv = &ps_mv->i2_mv[u1_b2]; + + if(u1_dec_mvd) + { + WORD16 i2_mv_x, i2_mv_y; + WORD32 i2_temp; + { + decoding_envirnoment_t * ps_cab_env = &ps_dec->s_cab_dec_env; + dec_bit_stream_t * ps_bitstrm = ps_dec->ps_bitstrm; + UWORD16 u2_abs_mvd_x_a, u2_abs_mvd_x_b, u2_abs_mvd_y_a, + u2_abs_mvd_y_b; + + u2_abs_mvd_x_b = (UWORD16)pu1_top_mv_ctxt[0]; + u2_abs_mvd_y_b = (UWORD16)pu1_top_mv_ctxt[1]; + u2_abs_mvd_x_a = (UWORD16)pu1_lft_mv_ctxt[0]; + u2_abs_mvd_y_a = (UWORD16)pu1_lft_mv_ctxt[1]; + + i2_temp = u2_abs_mvd_x_a + u2_abs_mvd_x_b; + + i2_mv_x = ih264d_parse_mvd_cabac(ps_bitstrm, ps_cab_env, + ps_dec->p_mvd_x_t, i2_temp); + + i2_temp = u2_abs_mvd_y_a + u2_abs_mvd_y_b; + + i2_mv_y = ih264d_parse_mvd_cabac(ps_bitstrm, ps_cab_env, + ps_dec->p_mvd_y_t, i2_temp); + } + + /***********************************************************************/ + /* Store the abs_mvd_values in cabac contexts */ + /* The follownig code can be easily optimzed if mvX, mvY clip values */ + /* are packed in 16 bits follwed by memcpy */ + /***********************************************************************/ + u1_abs_mvd_x = CLIP3(0, 127, ABS(i2_mv_x)); + u1_abs_mvd_y = CLIP3(0, 127, ABS(i2_mv_y)); + + COPYTHECONTEXT("MVD", i2_mv_x);COPYTHECONTEXT("MVD", i2_mv_y); + + /* Storing Mv residuals */ + pi2_mv[0] = i2_mv_x; + pi2_mv[1] = i2_mv_y; + } + + /***************************************************************/ + /* Store abs_mvd_values cabac contexts */ + /***************************************************************/ +#ifndef ARM + { + UWORD8 u1_i; + for(u1_i = 0; u1_i < u1_part_wd; u1_i++, pu1_top_mv_ctxt += 4) + { + pu1_top_mv_ctxt[0] = u1_abs_mvd_x; + pu1_top_mv_ctxt[1] = u1_abs_mvd_y; + } + + for(u1_i = 0; u1_i < u1_part_ht; u1_i++, pu1_lft_mv_ctxt += 4) + { + pu1_lft_mv_ctxt[0] = u1_abs_mvd_x; + pu1_lft_mv_ctxt[1] = u1_abs_mvd_y; + } + } +#else + /* Optimising the loop, with Little-Endian Assumption */ + { + UWORD16 *pu2_top_cxt = (UWORD16 *)pu1_top_mv_ctxt; + UWORD16 *pu2_lft_cxt = (UWORD16 *)pu1_lft_mv_ctxt; + UWORD16 u2_pack_mvd = (UWORD16)((u1_abs_mvd_y << 8) | u1_abs_mvd_x); + UWORD8 u1_wd = u1_part_wd, u1_ht = u1_part_ht; + + u1_wd--; + *pu2_top_cxt = u2_pack_mvd; + pu2_top_cxt += 2; + if(u1_wd) + { + u1_wd--; + *pu2_top_cxt = u2_pack_mvd; + pu2_top_cxt += 2; + } + if(u1_wd) + { + *pu2_top_cxt = u2_pack_mvd; + pu2_top_cxt += 2; + *pu2_top_cxt = u2_pack_mvd; + } + u1_ht--; + *pu2_lft_cxt = u2_pack_mvd; + pu2_lft_cxt += 2; + if(u1_ht) + { + u1_ht--; + *pu2_lft_cxt = u2_pack_mvd; + pu2_lft_cxt += 2; + } + if(u1_ht) + { + *pu2_lft_cxt = u2_pack_mvd; + pu2_lft_cxt += 2; + *pu2_lft_cxt = u2_pack_mvd; + } + } +#endif +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_parse_mvd_cabac */ +/* */ +/* Description : This cabac function decodes the mvd in a given direction */ +/* direction ( x or y ) as defined in 9.3.2.3. */ +/* */ +/* Inputs : 1. pointer to Bitstream */ +/* 2. pointer to cabac decoding environmnet */ +/* 3. pointer to Mvd context */ +/* 4. abs(Top mvd) = u2_abs_mvd_b */ +/* 5. abs(left mvd)= u2_abs_mvd_a */ +/* */ +/* Processing : see section 9.3.2.3 of the standard */ +/* */ +/* Outputs : i2_mvd */ +/* Returns : i2_mvd */ +/* */ +/* Issues : none */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 16 06 2005 Jay Draft */ +/* */ +/*****************************************************************************/ +WORD16 ih264d_parse_mvd_cabac(dec_bit_stream_t * ps_bitstrm, + decoding_envirnoment_t * ps_cab_env, + bin_ctxt_model_t * p_ctxt_mvd, + UWORD32 i4_temp) + +{ + WORD8 k; + WORD16 i2_suf; + WORD16 i2_mvd; + UWORD16 u2_abs_mvd; + UWORD32 u4_ctx_inc; + UWORD32 u4_prefix; + const UWORD32 *pu4_table = (const UWORD32 *)ps_cab_env->cabac_table; + UWORD32 u4_code_int_range, u4_code_int_val_ofst; + + /* if mvd < 9 */ + /* mvd = Prefix */ + /* else */ + /* mvd = Prefix + Suffix */ + /* decode sign bit */ + /* Prefix TU decoding Cmax =Ucoff and Suffix 3rd order Exp-Golomb */ + + u2_abs_mvd = (UWORD16)i4_temp; + u4_ctx_inc = 1; + + if(u2_abs_mvd < 3) + u4_ctx_inc = 0; + else if(u2_abs_mvd > 32) + u4_ctx_inc = 2; + + u4_ctx_inc = (u4_ctx_inc | 0x65430); + + /*inlining modified version of ih264d_decode_bins_unary*/ + + { + UWORD8 u1_max_bins = 9; + UWORD32 u4_value; + UWORD32 u4_symbol; + bin_ctxt_model_t *ps_bin_ctxt; + UWORD32 u4_ctx_Inc; + + u4_value = 0; + u4_code_int_range = ps_cab_env->u4_code_int_range; + u4_code_int_val_ofst = ps_cab_env->u4_code_int_val_ofst; + + do + { + u4_ctx_Inc = u4_ctx_inc & 0xf; + u4_ctx_inc = u4_ctx_inc >> 4; + + ps_bin_ctxt = p_ctxt_mvd + u4_ctx_Inc; + + DECODE_ONE_BIN_MACRO(ps_bin_ctxt, u4_code_int_range, + u4_code_int_val_ofst, pu4_table, ps_bitstrm, + u4_symbol) + + INC_BIN_COUNT(ps_cab_env);INC_DECISION_BINS(ps_cab_env); + + u4_value++; + + } + while(u4_symbol && u4_value < 5); + + ps_bin_ctxt = p_ctxt_mvd + 6; + + if(u4_symbol && (u4_value < u1_max_bins)) + { + + do + { + + DECODE_ONE_BIN_MACRO(ps_bin_ctxt, u4_code_int_range, + u4_code_int_val_ofst, pu4_table, + ps_bitstrm, u4_symbol) + + INC_BIN_COUNT(ps_cab_env);INC_DECISION_BINS(ps_cab_env); + u4_value++; + } + while(u4_symbol && (u4_value < u1_max_bins)); + + } + + ps_cab_env->u4_code_int_range = u4_code_int_range; + ps_cab_env->u4_code_int_val_ofst = u4_code_int_val_ofst; + u4_value = u4_value - 1 + u4_symbol; + u4_prefix = (u4_value); + } + + i2_mvd = u4_prefix; + + if(i2_mvd == 9) + { + /* Read Suffix */ + k = ih264d_decode_bypass_bins_unary(ps_cab_env, ps_bitstrm); + i2_suf = (1 << k) - 1; + k = k + 3; + i2_suf = (i2_suf << 3); + i2_mvd += i2_suf; + i2_suf = ih264d_decode_bypass_bins(ps_cab_env, k, ps_bitstrm); + i2_mvd += i2_suf; + } + /* Read Sign bit */ + if(!i2_mvd) + return (i2_mvd); + + else + { + UWORD32 u4_code_int_val_ofst, u4_code_int_range; + + u4_code_int_val_ofst = ps_cab_env->u4_code_int_val_ofst; + u4_code_int_range = ps_cab_env->u4_code_int_range; + + if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_9) + { + UWORD32 *pu4_buffer, u4_offset; + + pu4_buffer = ps_bitstrm->pu4_buffer; + u4_offset = ps_bitstrm->u4_ofst; + + RENORM_RANGE_OFFSET(u4_code_int_range, u4_code_int_val_ofst, + u4_offset, pu4_buffer) + ps_bitstrm->u4_ofst = u4_offset; + } + + u4_code_int_range = u4_code_int_range >> 1; + + if(u4_code_int_val_ofst >= u4_code_int_range) + { + /* S=1 */ + u4_code_int_val_ofst -= u4_code_int_range; + i2_mvd = (-i2_mvd); + } + + ps_cab_env->u4_code_int_val_ofst = u4_code_int_val_ofst; + ps_cab_env->u4_code_int_range = u4_code_int_range; + + return (i2_mvd); + + } +} diff --git a/decoder/ih264d_parse_mb_header.h b/decoder/ih264d_parse_mb_header.h new file mode 100755 index 0000000..63067b9 --- /dev/null +++ b/decoder/ih264d_parse_mb_header.h @@ -0,0 +1,88 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*! + *************************************************************************** + * \file ih264d_parse_mb_header.h + * + * \brief + * This file contains context identifier decoding routines. + * + * \date + * 04/02/2003 + * + * \author NS + *************************************************************************** + */ +#ifndef _IH264D_PARSE_MB_HEADER_H_ +#define _IH264D_PARSE_MB_HEADER_H_ + +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264d_structs.h" +#include "ih264d_cabac.h" + +WORD32 ih264d_read_intra_pred_modes_cabac(dec_struct_t * ps_dec, + UWORD8 * pu1_prev_intra4x4_pred_mode_flag, + UWORD8 * pu1_rem_intra4x4_pred_mode, + UWORD8 u1_tran_form8x8); + +UWORD32 ih264d_parse_mb_type_cabac(struct _DecStruct * ps_dec); +UWORD8 ih264d_parse_mb_type_intra_cabac(UWORD8 u1_inter, + struct _DecStruct * ps_dec); + +UWORD32 ih264d_parse_submb_type_cabac(const UWORD8 u1_slc_type_p, + decoding_envirnoment_t * ps_cab_env, + dec_bit_stream_t * ps_bitstrm, + bin_ctxt_model_t * ps_sub_mb_cxt); +WORD32 ih264d_parse_ref_idx_cabac(const UWORD8 u1_num_part, + const UWORD8 u1_b2, + const UWORD8 u1_max_ref_minus1, + const UWORD8 u1_mb_mode, + WORD8 * pi1_ref_idx, + WORD8 * const pi1_lft_cxt, + WORD8 * const pi1_top_cxt, + decoding_envirnoment_t * const ps_cab_env, + dec_bit_stream_t * const ps_bitstrm, + bin_ctxt_model_t * const ps_ref_cxt); + +WORD32 ih264d_parse_mb_qp_delta_cabac(struct _DecStruct * ps_dec, + WORD8 *pi1_mb_qp_delta); +WORD8 ih264d_parse_chroma_pred_mode_cabac(struct _DecStruct * ps_dec); + +UWORD32 ih264d_parse_ctx_cbp_cabac(struct _DecStruct * ps_dec); + +UWORD8 ih264d_parse_transform8x8flag_cabac(struct _DecStruct * ps_dec, + dec_mb_info_t * ps_cur_mb_info); + +void ih264d_get_mvd_cabac(UWORD8 u1_sub_mb, + UWORD8 u1_b2, + UWORD8 u1_part_wd, + UWORD8 u1_part_ht, + UWORD8 u1_dec_mvd, + dec_struct_t *ps_dec, + mv_pred_t *ps_mv); + +WORD16 ih264d_parse_mvd_cabac(dec_bit_stream_t * ps_bitstrm, + decoding_envirnoment_t * ps_cab_env, + bin_ctxt_model_t * p_ctxt_mvd, + UWORD32 temp); + +#endif /* _IH264D_PARSE_MB_HEADER_H_ */ diff --git a/decoder/ih264d_parse_pslice.c b/decoder/ih264d_parse_pslice.c new file mode 100755 index 0000000..67d1405 --- /dev/null +++ b/decoder/ih264d_parse_pslice.c @@ -0,0 +1,1760 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/*! + ************************************************************************** + * \file ih264d_parse_pslice.c + * + * \brief + * Contains routines that decode a I slice type + * + * Detailed_description + * + * \date + * 07/07/2003 + * + * \author NS + ************************************************************************** + */ + +#include <string.h> +#include "ih264d_bitstrm.h" +#include "ih264d_defs.h" +#include "ih264d_debug.h" +#include "ih264d_tables.h" +#include "ih264d_structs.h" +#include "ih264d_defs.h" +#include "ih264d_parse_cavlc.h" +#include "ih264d_mb_utils.h" +#include "ih264d_parse_slice.h" +#include "ih264d_mvpred.h" +#include "ih264d_parse_islice.h" +#include "ih264d_process_intra_mb.h" +#include "ih264d_inter_pred.h" +#include "ih264d_process_pslice.h" +#include "ih264d_deblocking.h" +#include "ih264d_cabac.h" +#include "ih264d_parse_mb_header.h" +#include "ih264d_error_handler.h" +#include "ih264d_defs.h" +#include "ih264d_format_conv.h" +#include "ih264d_quant_scaling.h" +#include "ih264d_thread_parse_decode.h" +#include "ih264d_process_bslice.h" +#include "ithread.h" +#include "ih264d_utils.h" +#include "ih264d_format_conv.h" + +void ih264d_init_cabac_contexts(UWORD8 u1_slice_type, dec_struct_t * ps_dec); +void ih264d_deblock_mb_level(dec_struct_t *ps_dec, + dec_mb_info_t *ps_cur_mb_info, + UWORD32 nmb_index); + +/*! + ************************************************************************** + * \if Function name : ih264d_parse_pmb_cavlc \endif + * + * \brief + * This function parses CAVLC syntax of a P MB. + * + * \return + * 0 on Success and Error code otherwise + ************************************************************************** + */ +WORD32 ih264d_parse_pmb_cavlc(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info, + UWORD8 u1_mb_num, + UWORD8 u1_num_mbsNby2) +{ + UWORD32 u1_num_mb_part; + UWORD32 uc_sub_mb; + dec_bit_stream_t * const ps_bitstrm = ps_dec->ps_bitstrm; + UWORD32 * const pu4_bitstrm_buf = ps_bitstrm->pu4_buffer; + UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst; + + parse_pmbarams_t * ps_parse_mb_data = ps_dec->ps_parse_mb_data + + u1_num_mbsNby2; + WORD8 * pi1_ref_idx = ps_parse_mb_data->i1_ref_idx[0]; + const UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag; + const UWORD8 * pu1_num_mb_part = (const UWORD8 *)gau1_ih264d_num_mb_part; + UWORD8 * pu1_col_info = ps_parse_mb_data->u1_col_info; + + UWORD32 u1_mb_type = ps_cur_mb_info->u1_mb_type; + UWORD32 u4_sum_mb_mode_pack = 0; + WORD32 ret; + + UWORD8 u1_no_submb_part_size_lt8x8_flag = 1; + ps_cur_mb_info->u1_tran_form8x8 = 0; + ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0; + + ps_cur_mb_info->u1_yuv_dc_block_flag = 0; + + ps_cur_mb_info->u1_mb_mc_mode = u1_mb_type; + uc_sub_mb = ((u1_mb_type == PRED_8x8) | (u1_mb_type == PRED_8x8R0)); + + /* Reading the subMB type */ + if(uc_sub_mb) + { + WORD32 i; + UWORD8 u1_colz = (PRED_8x8 << 6); + + for(i = 0; i < 4; i++) + { + UWORD32 ui_sub_mb_mode; + + //Inlined ih264d_uev + UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst; + UWORD32 u4_word, u4_ldz; + + /***************************************************************/ + /* Find leading zeros in next 32 bits */ + /***************************************************************/ + NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf); + u4_ldz = CLZ(u4_word); + /* Flush the ps_bitstrm */ + u4_bitstream_offset += (u4_ldz + 1); + /* Read the suffix from the ps_bitstrm */ + u4_word = 0; + if(u4_ldz) + GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, + u4_ldz); + *pu4_bitstrm_ofst = u4_bitstream_offset; + ui_sub_mb_mode = ((1 << u4_ldz) + u4_word - 1); + //Inlined ih264d_uev + + if(ui_sub_mb_mode > 3) + { + return ERROR_SUB_MB_TYPE; + } + else + { + u4_sum_mb_mode_pack = (u4_sum_mb_mode_pack << 8) | ui_sub_mb_mode; + /* Storing collocated information */ + *pu1_col_info++ = u1_colz | (UWORD8)(ui_sub_mb_mode << 4); + + COPYTHECONTEXT("sub_mb_type", ui_sub_mb_mode); + } + + /* check if Motion compensation is done below 8x8 */ + if(ui_sub_mb_mode != P_L0_8x8) + { + u1_no_submb_part_size_lt8x8_flag = 0; + } + } + + // + u1_num_mb_part = 4; + } + else + { + *pu1_col_info++ = (u1_mb_type << 6); + if(u1_mb_type) + *pu1_col_info++ = (u1_mb_type << 6); + u1_num_mb_part = pu1_num_mb_part[u1_mb_type]; + + } + + /* Decoding reference index 0: For simple profile the following */ + /* conditions are always true (mb_field_decoding_flag == 0); */ + /* (MbPartPredMode != PredL1) */ + + { + + UWORD8 uc_field = ps_cur_mb_info->u1_mb_field_decodingflag; + UWORD8 uc_num_ref_idx_l0_active_minus1 = + (ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[0] + << (u1_mbaff & uc_field)) - 1; + + if((uc_num_ref_idx_l0_active_minus1 > 0) & (u1_mb_type != PRED_8x8R0)) + { + if(1 == uc_num_ref_idx_l0_active_minus1) + ih264d_parse_pmb_ref_index_cavlc_range1( + u1_num_mb_part, ps_bitstrm, pi1_ref_idx, + uc_num_ref_idx_l0_active_minus1); + else + { + ret = ih264d_parse_pmb_ref_index_cavlc( + u1_num_mb_part, ps_bitstrm, pi1_ref_idx, + uc_num_ref_idx_l0_active_minus1); + if(ret != OK) + return ret; + } + } + else + { + /* When there exists only a single frame to predict from */ + UWORD8 uc_i; + for(uc_i = 0; uc_i < u1_num_mb_part; uc_i++) + /* Storing Reference Idx Information */ + pi1_ref_idx[uc_i] = 0; + } + } + + { + UWORD8 u1_p_idx, uc_i; + parse_part_params_t * ps_part = ps_dec->ps_part; + UWORD8 u1_sub_mb_mode, u1_num_subpart, u1_mb_part_width, u1_mb_part_height; + UWORD8 u1_sub_mb_num; + const UWORD8 * pu1_top_left_sub_mb_indx; + mv_pred_t * ps_mv, *ps_mv_start = ps_dec->ps_mv_cur + (u1_mb_num << 4); + /* Loading the table pointers */ + const UWORD8 * pu1_mb_partw = (const UWORD8 *)gau1_ih264d_mb_partw; + const UWORD8 * pu1_mb_parth = (const UWORD8 *)gau1_ih264d_mb_parth; + const UWORD8 * pu1_sub_mb_indx_mod = + (const UWORD8 *)(gau1_ih264d_submb_indx_mod) + + (uc_sub_mb * 6); + const UWORD8 * pu1_sub_mb_partw = (const UWORD8 *)gau1_ih264d_submb_partw; + const UWORD8 * pu1_sub_mb_parth = (const UWORD8 *)gau1_ih264d_submb_parth; + const UWORD8 * pu1_num_sub_mb_part = + (const UWORD8 *)gau1_ih264d_num_submb_part; + + UWORD16 u2_sub_mb_num = 0x028A; + + /*********************************************************/ + /* default initialisations for condition (uc_sub_mb == 0) */ + /* i.e. all are subpartitions of 8x8 */ + /*********************************************************/ + u1_sub_mb_mode = 0; + u1_num_subpart = 1; + u1_mb_part_width = pu1_mb_partw[u1_mb_type]; + u1_mb_part_height = pu1_mb_parth[u1_mb_type]; + pu1_top_left_sub_mb_indx = pu1_sub_mb_indx_mod + (u1_mb_type << 1); + u1_sub_mb_num = 0; + + /* Loop on number of partitions */ + for(uc_i = 0, u1_p_idx = 0; uc_i < u1_num_mb_part; uc_i++) + { + UWORD8 uc_j; + if(uc_sub_mb) + { + u1_sub_mb_mode = u4_sum_mb_mode_pack >> 24; + u1_num_subpart = pu1_num_sub_mb_part[u1_sub_mb_mode]; + u1_mb_part_width = pu1_sub_mb_partw[u1_sub_mb_mode]; + u1_mb_part_height = pu1_sub_mb_parth[u1_sub_mb_mode]; + pu1_top_left_sub_mb_indx = pu1_sub_mb_indx_mod + (u1_sub_mb_mode << 1); + u1_sub_mb_num = u2_sub_mb_num >> 12; + u4_sum_mb_mode_pack <<= 8; + u2_sub_mb_num <<= 4; + } + + /* Loop on Number of sub-partitions */ + for(uc_j = 0; uc_j < u1_num_subpart; uc_j++, pu1_top_left_sub_mb_indx++) + { + WORD16 i2_mvx, i2_mvy; + u1_sub_mb_num += *pu1_top_left_sub_mb_indx; + ps_mv = ps_mv_start + u1_sub_mb_num; + + /* Reading the differential Mv from the bitstream */ + //i2_mvx = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf); + //inlining ih264d_sev + { + UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst; + UWORD32 u4_word, u4_ldz, u4_abs_val; + + /***************************************************************/ + /* Find leading zeros in next 32 bits */ + /***************************************************************/ + NEXTBITS_32(u4_word, u4_bitstream_offset, + pu4_bitstrm_buf); + u4_ldz = CLZ(u4_word); + + /* Flush the ps_bitstrm */ + u4_bitstream_offset += (u4_ldz + 1); + + /* Read the suffix from the ps_bitstrm */ + u4_word = 0; + if(u4_ldz) + GETBITS(u4_word, u4_bitstream_offset, + pu4_bitstrm_buf, u4_ldz); + + *pu4_bitstrm_ofst = u4_bitstream_offset; + u4_abs_val = ((1 << u4_ldz) + u4_word) >> 1; + + if(u4_word & 0x1) + i2_mvx = (-(WORD32)u4_abs_val); + else + i2_mvx = (u4_abs_val); + } + //inlinined ih264d_sev + COPYTHECONTEXT("MVD", i2_mvx); + i2_mvy = ih264d_sev(pu4_bitstrm_ofst, + pu4_bitstrm_buf); + COPYTHECONTEXT("MVD", i2_mvy); + + /* Storing Info for partitions */ + ps_part->u1_is_direct = PART_NOT_DIRECT; + ps_part->u1_sub_mb_num = u1_sub_mb_num; + ps_part->u1_partheight = u1_mb_part_height; + ps_part->u1_partwidth = u1_mb_part_width; + + /* Storing Mv residuals */ + ps_mv->i2_mv[0] = i2_mvx; + ps_mv->i2_mv[1] = i2_mvy; + + /* Increment partition Index */ + u1_p_idx++; + ps_part++; + } + } + ps_parse_mb_data->u1_num_part = u1_p_idx; + ps_dec->ps_part = ps_part; + } + + { + UWORD32 u4_cbp; + + /* Read the Coded block pattern */ + UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst; + UWORD32 u4_word, u4_ldz; + + /***************************************************************/ + /* Find leading zeros in next 32 bits */ + /***************************************************************/ + NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf); + u4_ldz = CLZ(u4_word); + /* Flush the ps_bitstrm */ + u4_bitstream_offset += (u4_ldz + 1); + /* Read the suffix from the ps_bitstrm */ + u4_word = 0; + if(u4_ldz) + GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, u4_ldz); + *pu4_bitstrm_ofst = u4_bitstream_offset; + u4_cbp = ((1 << u4_ldz) + u4_word - 1); + + if(u4_cbp > 47) + return ERROR_CBP; + + u4_cbp = *((UWORD8*)gau1_ih264d_cbp_inter + u4_cbp); + COPYTHECONTEXT("coded_block_pattern", u4_cbp); + ps_cur_mb_info->u1_cbp = u4_cbp; + + /* Read the transform8x8 u4_flag if present */ + if((ps_dec->s_high_profile.u1_transform8x8_present) && (u4_cbp & 0xf) + && u1_no_submb_part_size_lt8x8_flag) + { + ps_cur_mb_info->u1_tran_form8x8 = ih264d_get_bit_h264(ps_bitstrm); + COPYTHECONTEXT("transform_size_8x8_flag", ps_cur_mb_info->u1_tran_form8x8); + ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = ps_cur_mb_info->u1_tran_form8x8; + } + + /* Read mb_qp_delta */ + if(u4_cbp) + { + WORD32 i_temp; + + UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst; + UWORD32 u4_word, u4_ldz, u4_abs_val; + + /***************************************************************/ + /* Find leading zeros in next 32 bits */ + /***************************************************************/ + NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf); + u4_ldz = CLZ(u4_word); + + /* Flush the ps_bitstrm */ + u4_bitstream_offset += (u4_ldz + 1); + + /* Read the suffix from the ps_bitstrm */ + u4_word = 0; + if(u4_ldz) + GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, + u4_ldz); + + *pu4_bitstrm_ofst = u4_bitstream_offset; + u4_abs_val = ((1 << u4_ldz) + u4_word) >> 1; + + if(u4_word & 0x1) + i_temp = (-(WORD32)u4_abs_val); + else + i_temp = (u4_abs_val); + + if((i_temp < -26) || (i_temp > 25)) + return ERROR_INV_RANGE_QP_T; + //inlinined ih264d_sev + + COPYTHECONTEXT("mb_qp_delta", i_temp); + if(i_temp) + { + ret = ih264d_update_qp(ps_dec, (WORD8)i_temp); + if(ret != OK) + return ret; + } + + ret = ih264d_parse_residual4x4_cavlc(ps_dec, ps_cur_mb_info, 0); + if(ret != OK) + return ret; + if(EXCEED_OFFSET(ps_bitstrm)) + return ERROR_EOB_TERMINATE_T; + } + else + { + ih264d_update_nnz_for_skipmb(ps_dec, ps_cur_mb_info, CAVLC); + } + + + + } + + return OK; +} + +/*! + ************************************************************************** + * \if Function name : ih264d_parse_pmb_cabac \endif + * + * \brief + * This function parses CABAC syntax of a P MB. + * + * \return + * 0 on Success and Error code otherwise + ************************************************************************** + */ +WORD32 ih264d_parse_pmb_cabac(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info, + UWORD8 u1_mb_num, + UWORD8 u1_num_mbsNby2) +{ + UWORD32 u1_num_mb_part; + UWORD32 uc_sub_mb; + parse_pmbarams_t * ps_parse_mb_data = ps_dec->ps_parse_mb_data + + u1_num_mbsNby2; + WORD8 * pi1_ref_idx = ps_parse_mb_data->i1_ref_idx[0]; + const UWORD8 * pu1_num_mb_part = (const UWORD8 *)gau1_ih264d_num_mb_part; + const UWORD32 u1_mb_type = ps_cur_mb_info->u1_mb_type; + UWORD8 * pu1_col_info = ps_parse_mb_data->u1_col_info; + UWORD32 u1_mb_mc_mode = u1_mb_type; + ctxt_inc_mb_info_t * p_curr_ctxt = ps_dec->ps_curr_ctxt_mb_info; + decoding_envirnoment_t * ps_cab_env = &ps_dec->s_cab_dec_env; + dec_bit_stream_t * ps_bitstrm = ps_dec->ps_bitstrm; + UWORD32 u4_sub_mb_pack = 0; + WORD32 ret; + + UWORD8 u1_no_submb_part_size_lt8x8_flag = 1; + ps_cur_mb_info->u1_tran_form8x8 = 0; + ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0; + + ps_cur_mb_info->u1_yuv_dc_block_flag = 0; + + p_curr_ctxt->u1_mb_type = CAB_P; + ps_cur_mb_info->u1_mb_mc_mode = u1_mb_type; + uc_sub_mb = ((u1_mb_type == PRED_8x8) | (u1_mb_type == PRED_8x8R0)); + + /* Reading the subMB type */ + if(uc_sub_mb) + { + + UWORD8 u1_colz = (PRED_8x8 << 6); + u1_mb_mc_mode = 0; + + { + UWORD8 u1_sub_mb_mode; + u1_sub_mb_mode = ih264d_parse_submb_type_cabac( + 0, ps_cab_env, ps_bitstrm, + ps_dec->p_sub_mb_type_t); + if(u1_sub_mb_mode > 3) + return ERROR_SUB_MB_TYPE; + + u4_sub_mb_pack = (u4_sub_mb_pack << 8) | u1_sub_mb_mode; + /* Storing collocated information */ + *pu1_col_info++ = u1_colz | ((UWORD8)(u1_sub_mb_mode << 4)); + COPYTHECONTEXT("sub_mb_type", u1_sub_mb_mode); + /* check if Motion compensation is done below 8x8 */ + if(u1_sub_mb_mode != P_L0_8x8) + { + u1_no_submb_part_size_lt8x8_flag = 0; + } + } + { + UWORD8 u1_sub_mb_mode; + u1_sub_mb_mode = ih264d_parse_submb_type_cabac( + 0, ps_cab_env, ps_bitstrm, + ps_dec->p_sub_mb_type_t); + if(u1_sub_mb_mode > 3) + return ERROR_SUB_MB_TYPE; + + u4_sub_mb_pack = (u4_sub_mb_pack << 8) | u1_sub_mb_mode; + /* Storing collocated information */ + *pu1_col_info++ = u1_colz | ((UWORD8)(u1_sub_mb_mode << 4)); + COPYTHECONTEXT("sub_mb_type", u1_sub_mb_mode); + /* check if Motion compensation is done below 8x8 */ + if(u1_sub_mb_mode != P_L0_8x8) + { + u1_no_submb_part_size_lt8x8_flag = 0; + } + } + { + UWORD8 u1_sub_mb_mode; + u1_sub_mb_mode = ih264d_parse_submb_type_cabac( + 0, ps_cab_env, ps_bitstrm, + ps_dec->p_sub_mb_type_t); + if(u1_sub_mb_mode > 3) + return ERROR_SUB_MB_TYPE; + + u4_sub_mb_pack = (u4_sub_mb_pack << 8) | u1_sub_mb_mode; + /* Storing collocated information */ + *pu1_col_info++ = u1_colz | ((UWORD8)(u1_sub_mb_mode << 4)); + COPYTHECONTEXT("sub_mb_type", u1_sub_mb_mode); + /* check if Motion compensation is done below 8x8 */ + if(u1_sub_mb_mode != P_L0_8x8) + { + u1_no_submb_part_size_lt8x8_flag = 0; + } + } + { + UWORD8 u1_sub_mb_mode; + u1_sub_mb_mode = ih264d_parse_submb_type_cabac( + 0, ps_cab_env, ps_bitstrm, + ps_dec->p_sub_mb_type_t); + if(u1_sub_mb_mode > 3) + return ERROR_SUB_MB_TYPE; + + u4_sub_mb_pack = (u4_sub_mb_pack << 8) | u1_sub_mb_mode; + /* Storing collocated information */ + *pu1_col_info++ = u1_colz | ((UWORD8)(u1_sub_mb_mode << 4)); + COPYTHECONTEXT("sub_mb_type", u1_sub_mb_mode); + /* check if Motion compensation is done below 8x8 */ + if(u1_sub_mb_mode != P_L0_8x8) + { + u1_no_submb_part_size_lt8x8_flag = 0; + } + } + u1_num_mb_part = 4; + } + else + { + u1_num_mb_part = pu1_num_mb_part[u1_mb_type]; + /* Storing collocated Mb and SubMb mode information */ + *pu1_col_info++ = (u1_mb_type << 6); + if(u1_mb_type) + *pu1_col_info++ = (u1_mb_type << 6); + } + /* Decoding reference index 0: For simple profile the following */ + /* conditions are always true (mb_field_decoding_flag == 0); */ + /* (MbPartPredMode != PredL1) */ + { + WORD8 * pi1_top_ref_idx_ctx_inc_arr = p_curr_ctxt->i1_ref_idx; + WORD8 * pi1_left_ref_idx_ctxt_inc = ps_dec->pi1_left_ref_idx_ctxt_inc; + UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag; + UWORD8 uc_field = ps_cur_mb_info->u1_mb_field_decodingflag; + UWORD8 uc_num_ref_idx_l0_active_minus1 = + (ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[0] + << (u1_mbaff & uc_field)) - 1; + + if((uc_num_ref_idx_l0_active_minus1 > 0) & (u1_mb_type != PRED_8x8R0)) + { + /* force the routine to decode ref idx for each partition */ + *((UWORD32 *)pi1_ref_idx) = 0x01010101; + ret = ih264d_parse_ref_idx_cabac(u1_num_mb_part, 0, + uc_num_ref_idx_l0_active_minus1, + u1_mb_mc_mode, pi1_ref_idx, + pi1_left_ref_idx_ctxt_inc, + pi1_top_ref_idx_ctx_inc_arr, ps_cab_env, + ps_bitstrm, ps_dec->p_ref_idx_t); + if(ret != OK) + return ret; + } + else + { + /* When there exists only a single frame to predict from */ + pi1_left_ref_idx_ctxt_inc[0] = 0; + pi1_left_ref_idx_ctxt_inc[1] = 0; + pi1_top_ref_idx_ctx_inc_arr[0] = 0; + pi1_top_ref_idx_ctx_inc_arr[1] = 0; + *((UWORD32 *)pi1_ref_idx) = 0; + } + } + + { + UWORD8 u1_p_idx, uc_i; + parse_part_params_t * ps_part = ps_dec->ps_part; + UWORD8 u1_sub_mb_mode, u1_num_subpart, u1_mb_part_width, u1_mb_part_height; + UWORD8 u1_sub_mb_num; + const UWORD8 * pu1_top_left_sub_mb_indx; + mv_pred_t *ps_mv_start = ps_dec->ps_mv_cur + (u1_mb_num << 4); + UWORD16 u2_sub_mb_num_pack = 0x028A; + + /* Loading the table pointers */ + const UWORD8 * pu1_mb_partw = (const UWORD8 *)gau1_ih264d_mb_partw; + const UWORD8 * pu1_mb_parth = (const UWORD8 *)gau1_ih264d_mb_parth; + const UWORD8 * pu1_sub_mb_indx_mod = + (const UWORD8 *)(gau1_ih264d_submb_indx_mod) + + (uc_sub_mb * 6); + const UWORD8 * pu1_sub_mb_partw = (const UWORD8 *)gau1_ih264d_submb_partw; + const UWORD8 * pu1_sub_mb_parth = (const UWORD8 *)gau1_ih264d_submb_parth; + const UWORD8 * pu1_num_sub_mb_part = + (const UWORD8 *)gau1_ih264d_num_submb_part; + + /*********************************************************/ + /* default initialisations for condition (uc_sub_mb == 0) */ + /* i.e. all are subpartitions of 8x8 */ + /*********************************************************/ + u1_sub_mb_mode = 0; + u1_num_subpart = 1; + u1_mb_part_width = pu1_mb_partw[u1_mb_type]; + u1_mb_part_height = pu1_mb_parth[u1_mb_type]; + pu1_top_left_sub_mb_indx = pu1_sub_mb_indx_mod + (u1_mb_type << 1); + u1_sub_mb_num = 0; + + /* Loop on number of partitions */ + for(uc_i = 0, u1_p_idx = 0; uc_i < u1_num_mb_part; uc_i++) + { + UWORD8 uc_j; + if(uc_sub_mb) + { + u1_sub_mb_mode = u4_sub_mb_pack >> 24; + u1_num_subpart = pu1_num_sub_mb_part[u1_sub_mb_mode]; + u1_mb_part_width = pu1_sub_mb_partw[u1_sub_mb_mode]; + u1_mb_part_height = pu1_sub_mb_parth[u1_sub_mb_mode]; + pu1_top_left_sub_mb_indx = pu1_sub_mb_indx_mod + (u1_sub_mb_mode << 1); + u1_sub_mb_num = u2_sub_mb_num_pack >> 12; + u4_sub_mb_pack <<= 8; + u2_sub_mb_num_pack <<= 4; + } + /* Loop on Number of sub-partitions */ + for(uc_j = 0; uc_j < u1_num_subpart; uc_j++, pu1_top_left_sub_mb_indx++) + { + mv_pred_t * ps_mv; + + u1_sub_mb_num += *pu1_top_left_sub_mb_indx; + ps_mv = ps_mv_start + u1_sub_mb_num; + + /* Storing Info for partitions */ + ps_part->u1_is_direct = PART_NOT_DIRECT; + ps_part->u1_sub_mb_num = u1_sub_mb_num; + ps_part->u1_partheight = u1_mb_part_height; + ps_part->u1_partwidth = u1_mb_part_width; + + /* Increment partition Index */ + u1_p_idx++; + ps_part++; + + ih264d_get_mvd_cabac(u1_sub_mb_num, 0, u1_mb_part_width, + u1_mb_part_height, 1, ps_dec, ps_mv); + } + } + ps_parse_mb_data->u1_num_part = u1_p_idx; + ps_dec->ps_part = ps_part; + } + { + UWORD8 u1_cbp; + + /* Read the Coded block pattern */ + u1_cbp = (WORD8)ih264d_parse_ctx_cbp_cabac(ps_dec); + COPYTHECONTEXT("coded_block_pattern", u1_cbp); + ps_cur_mb_info->u1_cbp = u1_cbp; + p_curr_ctxt->u1_cbp = u1_cbp; + p_curr_ctxt->u1_intra_chroma_pred_mode = 0; + p_curr_ctxt->u1_yuv_dc_csbp &= 0xFE; + ps_dec->pu1_left_yuv_dc_csbp[0] &= 0x6; + + if(u1_cbp > 47) + return ERROR_CBP; + + ps_cur_mb_info->u1_tran_form8x8 = 0; + ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0; + + /* Read the transform8x8 u4_flag if present */ + if((ps_dec->s_high_profile.u1_transform8x8_present) && (u1_cbp & 0xf) + && u1_no_submb_part_size_lt8x8_flag) + { + ps_cur_mb_info->u1_tran_form8x8 = ih264d_parse_transform8x8flag_cabac( + ps_dec, ps_cur_mb_info); + COPYTHECONTEXT("transform_size_8x8_flag", ps_cur_mb_info->u1_tran_form8x8); + p_curr_ctxt->u1_transform8x8_ctxt = ps_cur_mb_info->u1_tran_form8x8; + ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = ps_cur_mb_info->u1_tran_form8x8; + + } + else + { + p_curr_ctxt->u1_transform8x8_ctxt = 0; + } + + /* Read mb_qp_delta */ + if(u1_cbp) + { + WORD8 c_temp; + ret = ih264d_parse_mb_qp_delta_cabac(ps_dec, &c_temp); + if(ret != OK) + return ret; + COPYTHECONTEXT("mb_qp_delta", c_temp); + if(c_temp != 0) + { + ret = ih264d_update_qp(ps_dec, c_temp); + if(ret != OK) + return ret; + } + } + else + ps_dec->i1_prev_mb_qp_delta = 0; + + + + ih264d_parse_residual4x4_cabac(ps_dec, ps_cur_mb_info, 0); + if(EXCEED_OFFSET(ps_dec->ps_bitstrm)) + return ERROR_EOB_TERMINATE_T; + } + return OK; +} + +/*! + ************************************************************************** + * \if Function name : parsePSliceData \endif + * + * \brief + * This function parses CAVLC syntax of N MB's of a P slice. + * 1. After parsing syntax of N MB's, for those N MB's (less than N, incase + * of end of slice or end of row), MB is decoded. This process is carried + * for one complete MB row or till end of slice. + * 2. Bottom one row of current MB is copied to IntraPredLine buffers. + * IntraPredLine buffers are used for Intra prediction of next row. + * 3. Current MB row along with previous 4 rows of Luma (and 2 of Chroma) are + * deblocked. + * 4. 4 rows (2 for Chroma) previous row and 12 rows (6 for Chroma) are + * DMA'ed to picture buffers. + * + * \return + * 0 on Success and Error code otherwise + ************************************************************************** + */ + +/*! + ************************************************************************** + * \if Function name : ih264d_update_nnz_for_skipmb \endif + * + * \brief + * + * \return + * None + * + ************************************************************************** + */ +void ih264d_update_nnz_for_skipmb(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info, + UWORD8 u1_entrpy) +{ + UWORD32 *pu4_buf; + UWORD8 *pu1_buf; + UNUSED(u1_entrpy); + pu1_buf = ps_dec->pu1_left_nnz_y; + pu4_buf = (UWORD32 *)pu1_buf; + *pu4_buf = 0; + pu1_buf = ps_dec->pu1_left_nnz_uv; + pu4_buf = (UWORD32 *)pu1_buf; + *pu4_buf = 0; + pu1_buf = ps_cur_mb_info->ps_curmb->pu1_nnz_y; + pu4_buf = (UWORD32 *)pu1_buf; + *pu4_buf = 0; + pu1_buf = ps_cur_mb_info->ps_curmb->pu1_nnz_uv; + pu4_buf = (UWORD32 *)pu1_buf; + *pu4_buf = 0; + ps_cur_mb_info->ps_curmb->u2_luma_csbp = 0; + ps_cur_mb_info->u2_luma_csbp = 0; + ps_cur_mb_info->u2_chroma_csbp = 0; +} + + + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_parse_inter_slice_data_cabac */ +/* */ +/* Description : This function parses cabac syntax of a inter slice on */ +/* N MB basis. */ +/* */ +/* Inputs : ps_dec */ +/* sliceparams */ +/* firstMbInSlice */ +/* */ +/* Processing : 1. After parsing syntax for N MBs those N MBs are */ +/* decoded till the end of slice. */ +/* 2. MV prediction and DMA happens on a N/2 MB basis. */ +/* */ +/* Returns : 0 */ +/* */ +/* Issues : <List any issues or problems with this function> */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 13 07 2002 Jay Draft */ +/* */ +/*****************************************************************************/ +WORD32 ih264d_parse_inter_slice_data_cabac(dec_struct_t * ps_dec, + dec_slice_params_t * ps_slice, + UWORD16 u2_first_mb_in_slice) +{ + UWORD32 uc_more_data_flag; + WORD32 i2_cur_mb_addr; + UWORD32 u1_num_mbs, u1_num_mbsNby2, u1_mb_idx; + UWORD32 u1_mbaff; + UWORD32 u1_num_mbs_next, u1_end_of_row; + const UWORD16 i2_pic_wdin_mbs = ps_dec->u2_frm_wd_in_mbs; + UWORD32 u1_slice_end = 0; + UWORD32 u1_tfr_n_mb = 0; + UWORD32 u1_decode_nmb = 0; + + + deblk_mb_t *ps_cur_deblk_mb; + dec_mb_info_t *ps_cur_mb_info; + + parse_pmbarams_t *ps_parse_mb_data = ps_dec->ps_parse_mb_data; + UWORD32 u1_inter_mb_skip_type; + UWORD32 u1_inter_mb_type; + UWORD32 u1_deblk_mb_type; + UWORD32 u1_mb_threshold; + dec_bit_stream_t * const ps_bitstrm = ps_dec->ps_bitstrm; + WORD32 ret; + + /******************************************************/ + /* Initialisations specific to B or P slice */ + /******************************************************/ + if(ps_slice->u1_slice_type == P_SLICE) + { + u1_inter_mb_skip_type = CAB_P_SKIP; + u1_inter_mb_type = P_MB; + u1_deblk_mb_type = D_INTER_MB; + u1_mb_threshold = 5; + } + else // B_SLICE + { + u1_inter_mb_skip_type = CAB_B_SKIP; + u1_inter_mb_type = B_MB; + u1_deblk_mb_type = D_B_SLICE; + u1_mb_threshold = 23; + } + + /******************************************************/ + /* Slice Level Initialisations */ + /******************************************************/ + i2_cur_mb_addr = u2_first_mb_in_slice; + ps_dec->u1_qp = ps_slice->u1_slice_qp; + ret = ih264d_update_qp(ps_dec, 0); + if(ret != OK) + return ret; + u1_mb_idx = ps_dec->u1_mb_idx; + u1_num_mbs = u1_mb_idx; + u1_num_mbsNby2 = 0; + u1_mbaff = ps_slice->u1_mbaff_frame_flag; + i2_cur_mb_addr = u2_first_mb_in_slice << u1_mbaff; + uc_more_data_flag = 1; + + /* Initialisations specific to cabac */ + if(ps_bitstrm->u4_ofst & 0x07) + { + ps_bitstrm->u4_ofst += 8; + ps_bitstrm->u4_ofst &= 0xFFFFFFF8; + } + + ret = ih264d_init_cabac_dec_envirnoment(&(ps_dec->s_cab_dec_env), ps_bitstrm); + if(ret != OK) + return ret; + + ps_dec->i1_prev_mb_qp_delta = 0; + + while(!u1_slice_end) + { + UWORD8 u1_mb_type; + UWORD32 u4_mb_skip; + + + ps_cur_mb_info = ps_dec->ps_nmb_info + u1_num_mbs; + + ps_cur_mb_info->u1_Mux = 0; + ps_dec->u4_num_pmbair = (u1_num_mbs >> u1_mbaff); + ps_cur_deblk_mb = ps_dec->ps_deblk_mbn + u1_num_mbs; + + ps_cur_mb_info->u1_end_of_slice = 0; + + /* Storing Default partition info */ + ps_parse_mb_data->u1_num_part = 1; + ps_parse_mb_data->u1_isI_mb = 0; + + /***************************************************************/ + /* Get the required information for decoding of MB */ + /* mb_x, mb_y , neighbour availablity, */ + /***************************************************************/ + u4_mb_skip = ps_dec->pf_get_mb_info(ps_dec, i2_cur_mb_addr, ps_cur_mb_info, 1); + + /*********************************************************************/ + /* initialize u1_tran_form8x8 to zero to aviod uninitialized accesses */ + /*********************************************************************/ + ps_cur_mb_info->u1_tran_form8x8 = 0; + ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0; + + /***************************************************************/ + /* Set the deblocking parameters for this MB */ + /***************************************************************/ + if(ps_dec->u4_app_disable_deblk_frm == 0) + ih264d_set_deblocking_parameters(ps_cur_deblk_mb, ps_slice, + ps_dec->u1_mb_ngbr_availablity, + ps_dec->u1_cur_mb_fld_dec_flag); + + if(u4_mb_skip) + { + + /* Set appropriate flags in ps_cur_mb_info and ps_dec */ + memset(ps_dec->ps_curr_ctxt_mb_info, 0, sizeof(ctxt_inc_mb_info_t)); + ps_dec->ps_curr_ctxt_mb_info->u1_mb_type = u1_inter_mb_skip_type; + + MEMSET_16BYTES(&ps_dec->pu1_left_mv_ctxt_inc[0][0], 0); + + *((UWORD32 *)ps_dec->pi1_left_ref_idx_ctxt_inc) = 0; + *(ps_dec->pu1_left_yuv_dc_csbp) = 0; + + ps_dec->i1_prev_mb_qp_delta = 0; + ps_cur_mb_info->u1_mb_type = MB_SKIP; + ps_cur_mb_info->u1_cbp = 0; + + { + /* Storing Skip partition info */ + parse_part_params_t *ps_part_info = ps_dec->ps_part; + ps_part_info->u1_is_direct = PART_DIRECT_16x16; + ps_part_info->u1_sub_mb_num = 0; + ps_dec->ps_part++; + } + + /* Update Nnzs */ + ih264d_update_nnz_for_skipmb(ps_dec, ps_cur_mb_info, CABAC); + + ps_cur_mb_info->ps_curmb->u1_mb_type = u1_inter_mb_type; + ps_cur_deblk_mb->u1_mb_type |= u1_deblk_mb_type; + ps_cur_deblk_mb->u1_mb_qp = ps_dec->u1_qp; + + } + else + { + + /* Macroblock Layer Begins */ + /* Decode the u1_mb_type */ + u1_mb_type = ih264d_parse_mb_type_cabac(ps_dec); + ps_cur_mb_info->u1_mb_type = u1_mb_type; + if(u1_mb_type > (25 + u1_mb_threshold)) + return ERROR_MB_TYPE; + + /* Parse Macroblock Data */ + if(u1_mb_type < u1_mb_threshold) + { + ps_cur_mb_info->ps_curmb->u1_mb_type = u1_inter_mb_type; + *(ps_dec->pu1_left_yuv_dc_csbp) &= 0x6; + + ret = ps_dec->pf_parse_inter_mb(ps_dec, ps_cur_mb_info, u1_num_mbs, + u1_num_mbsNby2); + if(ret != OK) + return ret; + ps_cur_deblk_mb->u1_mb_qp = ps_dec->u1_qp; + ps_cur_deblk_mb->u1_mb_type |= u1_deblk_mb_type; + } + else + { + /* Storing Intra partition info */ + ps_parse_mb_data->u1_num_part = 0; + ps_parse_mb_data->u1_isI_mb = 1; + + if((25 + u1_mb_threshold) == u1_mb_type) + { + /* I_PCM_MB */ + ps_cur_mb_info->ps_curmb->u1_mb_type = I_PCM_MB; + ret = ih264d_parse_ipcm_mb(ps_dec, ps_cur_mb_info, u1_num_mbs); + if(ret != OK) + return ret; + ps_cur_deblk_mb->u1_mb_qp = 0; + } + else + { + if(u1_mb_type == u1_mb_threshold) + ps_cur_mb_info->ps_curmb->u1_mb_type = I_4x4_MB; + else + ps_cur_mb_info->ps_curmb->u1_mb_type = I_16x16_MB; + + ret = ih264d_parse_imb_cabac( + ps_dec, ps_cur_mb_info, + (UWORD8)(u1_mb_type - u1_mb_threshold)); + if(ret != OK) + return ret; + ps_cur_deblk_mb->u1_mb_qp = ps_dec->u1_qp; + } + ps_cur_deblk_mb->u1_mb_type |= D_INTRA_MB; + + } + + } + + if(u1_mbaff) + { + ih264d_update_mbaff_left_nnz(ps_dec, ps_cur_mb_info); + } + /* Next macroblock information */ + if(i2_cur_mb_addr > ps_dec->ps_cur_sps->u2_max_mb_addr) + return ERROR_MB_ADDRESS_T; + i2_cur_mb_addr++; + + if(ps_cur_mb_info->u1_topmb && u1_mbaff) + uc_more_data_flag = 1; + else + { + uc_more_data_flag = ih264d_decode_terminate(&ps_dec->s_cab_dec_env, + ps_bitstrm); + uc_more_data_flag = !uc_more_data_flag; + COPYTHECONTEXT("Decode Sliceterm",!uc_more_data_flag); + } + + u1_num_mbs++; + ps_dec->u2_total_mbs_coded++; + u1_num_mbsNby2++; + ps_parse_mb_data++; + + /****************************************************************/ + /* Check for End Of Row and other flags that determine when to */ + /* do DMA setup for N/2-Mb, Decode for N-Mb, and Transfer for */ + /* N-Mb */ + /****************************************************************/ + u1_num_mbs_next = i2_pic_wdin_mbs - ps_dec->u2_mbx - 1; + u1_end_of_row = (!u1_num_mbs_next) && (!(u1_mbaff && (u1_num_mbs & 0x01))); + u1_slice_end = !uc_more_data_flag; + u1_tfr_n_mb = (u1_num_mbs == ps_dec->u1_recon_mb_grp) || u1_end_of_row + || u1_slice_end; + u1_decode_nmb = u1_tfr_n_mb || u1_slice_end; + ps_cur_mb_info->u1_end_of_slice = u1_slice_end; + /*u1_dma_nby2mb = u1_decode_nmb || + (u1_num_mbsNby2 == ps_dec->u1_recon_mb_grp_pair);*/ + +//if(u1_dma_nby2mb) + if(u1_decode_nmb) + { + + ret = ps_dec->pf_mvpred_ref_tfr_nby2mb(ps_dec, u1_mb_idx, u1_num_mbs); + if(ret != OK) + return ret; + u1_num_mbsNby2 = 0; + + { + ps_parse_mb_data = ps_dec->ps_parse_mb_data; + ps_dec->ps_part = ps_dec->ps_parse_part_params; + } + } + + /*H264_DEC_DEBUG_PRINT("Pic: %d Mb_X=%d Mb_Y=%d", + ps_slice->i4_poc >> ps_slice->u1_field_pic_flag, + ps_dec->u2_mbx,ps_dec->u2_mby + (1 - ps_cur_mb_info->u1_topmb)); + H264_DEC_DEBUG_PRINT("u1_decode_nmb: %d, u1_num_mbs: %d", u1_decode_nmb, u1_num_mbs);*/ + if(u1_decode_nmb) + { + + if(ps_dec->u1_separate_parse) + { + ih264d_parse_tfr_nmb(ps_dec, u1_mb_idx, u1_num_mbs, + u1_num_mbs_next, u1_tfr_n_mb, u1_end_of_row); + ps_dec->ps_nmb_info += u1_num_mbs; + } + else + { + ret = ih264d_decode_recon_tfr_nmb(ps_dec, u1_mb_idx, u1_num_mbs, + u1_num_mbs_next, u1_tfr_n_mb, + u1_end_of_row); + if(ret != OK) + return ret; + } + + if(u1_tfr_n_mb) + u1_num_mbs = 0; + u1_mb_idx = u1_num_mbs; + ps_dec->u1_mb_idx = u1_num_mbs; + + } + } + + if(ps_dec->u1_separate_parse) + { + ps_dec->ps_parse_cur_slice->end_of_slice = 1; + ps_dec->ps_cur_slice->u4_mbs_in_slice = i2_cur_mb_addr + - (u2_first_mb_in_slice << u1_mbaff); + } + return OK; +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_parse_inter_slice_data_cavlc */ +/* */ +/* Description : This function parses cavlc syntax of a inter slice on */ +/* N MB basis. */ +/* */ +/* Inputs : ps_dec */ +/* sliceparams */ +/* firstMbInSlice */ +/* */ +/* Processing : 1. After parsing syntax for N MBs those N MBs are */ +/* decoded till the end of slice. */ +/* 2. MV prediction and DMA happens on a N/2 MB basis. */ +/* */ +/* Returns : 0 */ +/* */ +/* Issues : <List any issues or problems with this function> */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 13 07 2002 Jay Draft */ +/* */ +/*****************************************************************************/ + +WORD32 ih264d_parse_inter_slice_data_cavlc(dec_struct_t * ps_dec, + dec_slice_params_t * ps_slice, + UWORD16 u2_first_mb_in_slice) +{ + UWORD32 uc_more_data_flag; + WORD32 i2_cur_mb_addr; + UWORD32 u1_num_mbs, u1_num_mbsNby2, u1_mb_idx; + UWORD32 i2_mb_skip_run; + UWORD32 u1_read_mb_type; + + UWORD32 u1_mbaff; + UWORD32 u1_num_mbs_next, u1_end_of_row; + const UWORD32 i2_pic_wdin_mbs = ps_dec->u2_frm_wd_in_mbs; + UWORD32 u1_slice_end = 0; + UWORD32 u1_tfr_n_mb = 0; + UWORD32 u1_decode_nmb = 0; + + dec_bit_stream_t * const ps_bitstrm = ps_dec->ps_bitstrm; + UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer; + UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst; + deblk_mb_t *ps_cur_deblk_mb; + dec_mb_info_t *ps_cur_mb_info; + parse_pmbarams_t *ps_parse_mb_data = ps_dec->ps_parse_mb_data; + UWORD32 u1_inter_mb_type; + UWORD32 u1_deblk_mb_type; + UWORD32 u1_mb_threshold; + WORD32 ret; + + /******************************************************/ + /* Initialisations specific to B or P slice */ + /******************************************************/ + + if(ps_slice->u1_slice_type == P_SLICE) + { + u1_inter_mb_type = P_MB; + u1_deblk_mb_type = D_INTER_MB; + u1_mb_threshold = 5; + } + else // B_SLICE + { + u1_inter_mb_type = B_MB; + u1_deblk_mb_type = D_B_SLICE; + u1_mb_threshold = 23; + } + /******************************************************/ + /* Slice Level Initialisations */ + /******************************************************/ + i2_cur_mb_addr = u2_first_mb_in_slice; + ps_dec->u1_qp = ps_slice->u1_slice_qp; + ret = ih264d_update_qp(ps_dec, 0); + if(ret != OK) + return ret; + u1_mb_idx = ps_dec->u1_mb_idx; + u1_num_mbs = u1_mb_idx; + + u1_num_mbsNby2 = 0; + u1_mbaff = ps_slice->u1_mbaff_frame_flag; + i2_cur_mb_addr = u2_first_mb_in_slice << u1_mbaff; + i2_mb_skip_run = 0; + uc_more_data_flag = 1; + u1_read_mb_type = 0; + + while(!u1_slice_end) + { + UWORD8 u1_mb_type; + + if(i2_cur_mb_addr > ps_dec->ps_cur_sps->u2_max_mb_addr) + { + + break; + } + + + ps_cur_mb_info = ps_dec->ps_nmb_info + u1_num_mbs; + + ps_cur_mb_info->u1_Mux = 0; + ps_dec->u4_num_pmbair = (u1_num_mbs >> u1_mbaff); + ps_cur_deblk_mb = ps_dec->ps_deblk_mbn + u1_num_mbs; + + ps_cur_mb_info->u1_end_of_slice = 0; + + /* Storing Default partition info */ + ps_parse_mb_data->u1_num_part = 1; + ps_parse_mb_data->u1_isI_mb = 0; + + if((!i2_mb_skip_run) && (!u1_read_mb_type)) + { + + //Inlined ih264d_uev + UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst; + UWORD32 u4_word, u4_ldz; + + /***************************************************************/ + /* Find leading zeros in next 32 bits */ + /***************************************************************/ + NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf); + + u4_ldz = CLZ(u4_word); + + /* Flush the ps_bitstrm */ + u4_bitstream_offset += (u4_ldz + 1); + /* Read the suffix from the ps_bitstrm */ + u4_word = 0; + if(u4_ldz) + { + GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, + u4_ldz); + } + *pu4_bitstrm_ofst = u4_bitstream_offset; + i2_mb_skip_run = ((1 << u4_ldz) + u4_word - 1); + //Inlined ih264d_uev + COPYTHECONTEXT("mb_skip_run", i2_mb_skip_run); + uc_more_data_flag = MORE_RBSP_DATA(ps_bitstrm); + u1_read_mb_type = uc_more_data_flag; + } + + /***************************************************************/ + /* Get the required information for decoding of MB */ + /* mb_x, mb_y , neighbour availablity, */ + /***************************************************************/ + ps_dec->pf_get_mb_info(ps_dec, i2_cur_mb_addr, ps_cur_mb_info, i2_mb_skip_run); + + /***************************************************************/ + /* Set the deblocking parameters for this MB */ + /***************************************************************/ + if(ps_dec->u4_app_disable_deblk_frm == 0) + ih264d_set_deblocking_parameters(ps_cur_deblk_mb, ps_slice, + ps_dec->u1_mb_ngbr_availablity, + ps_dec->u1_cur_mb_fld_dec_flag); + + if(i2_mb_skip_run) + { + /* Set appropriate flags in ps_cur_mb_info and ps_dec */ + ps_dec->i1_prev_mb_qp_delta = 0; + ps_dec->u1_sub_mb_num = 0; + ps_cur_mb_info->u1_mb_type = MB_SKIP; + ps_cur_mb_info->u1_mb_mc_mode = PRED_16x16; + ps_cur_mb_info->u1_cbp = 0; + + { + /* Storing Skip partition info */ + parse_part_params_t *ps_part_info = ps_dec->ps_part; + ps_part_info->u1_is_direct = PART_DIRECT_16x16; + ps_part_info->u1_sub_mb_num = 0; + ps_dec->ps_part++; + } + + /* Update Nnzs */ + ih264d_update_nnz_for_skipmb(ps_dec, ps_cur_mb_info, CAVLC); + + ps_cur_mb_info->ps_curmb->u1_mb_type = u1_inter_mb_type; + ps_cur_deblk_mb->u1_mb_type |= u1_deblk_mb_type; + + i2_mb_skip_run--; + } + else + { + u1_read_mb_type = 0; + /**************************************************************/ + /* Macroblock Layer Begins, Decode the u1_mb_type */ + /**************************************************************/ + { + UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst; + UWORD32 u4_word, u4_ldz, u4_temp; + + + //Inlined ih264d_uev + /***************************************************************/ + /* Find leading zeros in next 32 bits */ + /***************************************************************/ + NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf); + u4_ldz = CLZ(u4_word); + /* Flush the ps_bitstrm */ + u4_bitstream_offset += (u4_ldz + 1); + /* Read the suffix from the ps_bitstrm */ + u4_word = 0; + if(u4_ldz) + GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, + u4_ldz); + *pu4_bitstrm_ofst = u4_bitstream_offset; + u4_temp = ((1 << u4_ldz) + u4_word - 1); + //Inlined ih264d_uev + if(u4_temp > (UWORD32)(25 + u1_mb_threshold)) + return ERROR_MB_TYPE; + u1_mb_type = u4_temp; + COPYTHECONTEXT("u1_mb_type", u1_mb_type); + } + ps_cur_mb_info->u1_mb_type = u1_mb_type; + + /**************************************************************/ + /* Parse Macroblock data */ + /**************************************************************/ + if(u1_mb_type < u1_mb_threshold) + { + ps_cur_mb_info->ps_curmb->u1_mb_type = u1_inter_mb_type; + + ret = ps_dec->pf_parse_inter_mb(ps_dec, ps_cur_mb_info, u1_num_mbs, + u1_num_mbsNby2); + if(ret != OK) + return ret; + ps_cur_deblk_mb->u1_mb_type |= u1_deblk_mb_type; + } + else + { + /* Storing Intra partition info */ + ps_parse_mb_data->u1_num_part = 0; + ps_parse_mb_data->u1_isI_mb = 1; + + if((25 + u1_mb_threshold) == u1_mb_type) + { + /* I_PCM_MB */ + ps_cur_mb_info->ps_curmb->u1_mb_type = I_PCM_MB; + ret = ih264d_parse_ipcm_mb(ps_dec, ps_cur_mb_info, u1_num_mbs); + if(ret != OK) + return ret; + ps_dec->u1_qp = 0; + } + else + { + ret = ih264d_parse_imb_cavlc( + ps_dec, ps_cur_mb_info, u1_num_mbs, + (UWORD8)(u1_mb_type - u1_mb_threshold)); + if(ret != OK) + return ret; + } + + ps_cur_deblk_mb->u1_mb_type |= D_INTRA_MB; + } + uc_more_data_flag = MORE_RBSP_DATA(ps_bitstrm); + } + ps_cur_deblk_mb->u1_mb_qp = ps_dec->u1_qp; + + if(u1_mbaff) + { + ih264d_update_mbaff_left_nnz(ps_dec, ps_cur_mb_info); + } + /**************************************************************/ + /* Get next Macroblock address */ + /**************************************************************/ + i2_cur_mb_addr++; + + u1_num_mbs++; + ps_dec->u2_total_mbs_coded++; + u1_num_mbsNby2++; + ps_parse_mb_data++; + + /****************************************************************/ + /* Check for End Of Row and other flags that determine when to */ + /* do DMA setup for N/2-Mb, Decode for N-Mb, and Transfer for */ + /* N-Mb */ + /****************************************************************/ + u1_num_mbs_next = i2_pic_wdin_mbs - ps_dec->u2_mbx - 1; + u1_end_of_row = (!u1_num_mbs_next) && (!(u1_mbaff && (u1_num_mbs & 0x01))); + u1_slice_end = (!(uc_more_data_flag || i2_mb_skip_run)); + u1_tfr_n_mb = (u1_num_mbs == ps_dec->u1_recon_mb_grp) || u1_end_of_row + || u1_slice_end; + u1_decode_nmb = u1_tfr_n_mb || u1_slice_end; + ps_cur_mb_info->u1_end_of_slice = u1_slice_end; + + /*u1_dma_nby2mb = u1_decode_nmb || + (u1_num_mbsNby2 == ps_dec->u1_recon_mb_grp_pair);*/ + +//if(u1_dma_nby2mb) + if(u1_decode_nmb) + { + + ret = ps_dec->pf_mvpred_ref_tfr_nby2mb(ps_dec, u1_mb_idx, u1_num_mbs); + if(ret != OK) + return ret; + u1_num_mbsNby2 = 0; + + { + ps_parse_mb_data = ps_dec->ps_parse_mb_data; + ps_dec->ps_part = ps_dec->ps_parse_part_params; + } + } + + /*H264_DEC_DEBUG_PRINT("Pic: %d Mb_X=%d Mb_Y=%d", + ps_slice->i4_poc >> ps_slice->u1_field_pic_flag, + ps_dec->u2_mbx,ps_dec->u2_mby + (1 - ps_cur_mb_info->u1_topmb)); + H264_DEC_DEBUG_PRINT("u1_decode_nmb: %d", u1_decode_nmb);*/ + if(u1_decode_nmb) + { + + + + if(ps_dec->u1_separate_parse) + { + ih264d_parse_tfr_nmb(ps_dec, u1_mb_idx, u1_num_mbs, + u1_num_mbs_next, u1_tfr_n_mb, u1_end_of_row); + ps_dec->ps_nmb_info += u1_num_mbs; + } + else + { + ret = ih264d_decode_recon_tfr_nmb(ps_dec, u1_mb_idx, u1_num_mbs, + u1_num_mbs_next, u1_tfr_n_mb, + u1_end_of_row); + if(ret != OK) + return ret; + } + + if(u1_tfr_n_mb) + u1_num_mbs = 0; + u1_mb_idx = u1_num_mbs; + ps_dec->u1_mb_idx = u1_num_mbs; + + } +//ps_dec->ps_pred++; + } + + if(ps_dec->u1_separate_parse) + { + ps_dec->ps_parse_cur_slice->end_of_slice = 1; + ps_dec->ps_cur_slice->u4_mbs_in_slice = i2_cur_mb_addr + - (u2_first_mb_in_slice << u1_mbaff); + } + + + return OK; +} + +/*! + ************************************************************************** + * \if Function name : ih264d_decode_pslice \endif + * + * \brief + * Decodes a P Slice + * + * + * \return + * 0 on Success and Error code otherwise + ************************************************************************** + */ +WORD32 ih264d_parse_pslice(dec_struct_t *ps_dec, UWORD16 u2_first_mb_in_slice) +{ + dec_pic_params_t * ps_pps = ps_dec->ps_cur_pps; + dec_slice_params_t * ps_cur_slice = ps_dec->ps_cur_slice; + dec_bit_stream_t *ps_bitstrm = ps_dec->ps_bitstrm; + UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer; + UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst; + UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag; //ps_dec->ps_cur_sps->u1_mb_aff_flag; + UWORD8 u1_field_pic_flag = ps_cur_slice->u1_field_pic_flag; + + UWORD32 u4_temp; + WORD32 i_temp; + WORD32 ret; + + /*--------------------------------------------------------------------*/ + /* Read remaining contents of the slice header */ + /*--------------------------------------------------------------------*/ + { + WORD8 *pi1_buf; + WORD16 *pi2_mv = ps_dec->s_default_mv_pred.i2_mv; + WORD32 *pi4_mv = (WORD32*)pi2_mv; + WORD16 *pi16_refFrame; + + pi1_buf = ps_dec->s_default_mv_pred.i1_ref_frame; + pi16_refFrame = (WORD16*)pi1_buf; + *pi4_mv = 0; + *(pi4_mv + 1) = 0; + *pi16_refFrame = OUT_OF_RANGE_REF; + ps_dec->s_default_mv_pred.u1_col_ref_pic_idx = (UWORD8)-1; + ps_dec->s_default_mv_pred.u1_pic_type = (UWORD8)-1; + } + + ps_cur_slice->u1_num_ref_idx_active_override_flag = ih264d_get_bit_h264( + ps_bitstrm); + + COPYTHECONTEXT("SH: num_ref_idx_override_flag", + ps_cur_slice->u1_num_ref_idx_active_override_flag); + + u4_temp = ps_dec->ps_cur_pps->u1_num_ref_idx_lx_active[0]; + if(ps_cur_slice->u1_num_ref_idx_active_override_flag) + { + u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf) + 1; + } + + { + + + + UWORD8 u1_max_ref_idx = MAX_FRAMES << u1_field_pic_flag; + if(u4_temp > u1_max_ref_idx) + { + return ERROR_NUM_REF; + } + ps_cur_slice->u1_num_ref_idx_lx_active[0] = u4_temp; + COPYTHECONTEXT("SH: num_ref_idx_l0_active_minus1", + ps_cur_slice->u1_num_ref_idx_lx_active[0] - 1); + + } + + { + UWORD8 uc_refIdxReFlagL0 = ih264d_get_bit_h264(ps_bitstrm); + COPYTHECONTEXT("SH: ref_pic_list_reordering_flag_l0",uc_refIdxReFlagL0); + + /* Initialize the Reference list once in Picture if the slice type */ + /* of first slice is between 5 to 9 defined in table 7.3 of standard */ + /* If picture contains both P & B slices then Initialize the Reference*/ + /* List only when it switches from P to B and B to P */ + { + UWORD8 init_idx_flg = (ps_dec->u1_pr_sl_type + != ps_dec->ps_cur_slice->u1_slice_type); + if(ps_dec->u1_first_pb_nal_in_pic + || (init_idx_flg & !ps_dec->u1_sl_typ_5_9) + || ps_dec->u1_num_ref_idx_lx_active_prev + != ps_cur_slice->u1_num_ref_idx_lx_active[0]) + { + ih264d_init_ref_idx_lx_p(ps_dec); + } + if(ps_dec->u1_first_pb_nal_in_pic & ps_dec->u1_sl_typ_5_9) + ps_dec->u1_first_pb_nal_in_pic = 0; + } + /* Store the value for future slices in the same picture */ + ps_dec->u1_num_ref_idx_lx_active_prev = + ps_cur_slice->u1_num_ref_idx_lx_active[0]; + + /* Modified temporarily */ + if(uc_refIdxReFlagL0) + { + WORD8 ret; + ps_dec->ps_ref_pic_buf_lx[0] = ps_dec->ps_dpb_mgr->ps_mod_dpb[0]; + ret = ih264d_ref_idx_reordering(ps_dec, 0); + if(ret == -1) + return ERROR_REFIDX_ORDER_T; + ps_dec->ps_ref_pic_buf_lx[0] = ps_dec->ps_dpb_mgr->ps_mod_dpb[0]; + } + else + ps_dec->ps_ref_pic_buf_lx[0] = + ps_dec->ps_dpb_mgr->ps_init_dpb[0]; + } + /* Create refIdx to POC mapping */ + { + void **pui_map_ref_idx_to_poc_lx0, **pui_map_ref_idx_to_poc_lx1; + WORD8 idx; + struct pic_buffer_t *ps_pic; + + pui_map_ref_idx_to_poc_lx0 = ps_dec->ppv_map_ref_idx_to_poc + FRM_LIST_L0; + pui_map_ref_idx_to_poc_lx0[0] = 0; //For ref_idx = -1 + pui_map_ref_idx_to_poc_lx0++; + for(idx = 0; idx < ps_cur_slice->u1_num_ref_idx_lx_active[0]; idx++) + { + ps_pic = ps_dec->ps_ref_pic_buf_lx[0][idx]; + pui_map_ref_idx_to_poc_lx0[idx] = (ps_pic->pu1_buf1); + } + + /* Bug Fix Deblocking */ + pui_map_ref_idx_to_poc_lx1 = ps_dec->ppv_map_ref_idx_to_poc + FRM_LIST_L1; + pui_map_ref_idx_to_poc_lx1[0] = 0; + + if(u1_mbaff) + { + void **ppv_map_ref_idx_to_poc_lx_t, **ppv_map_ref_idx_to_poc_lx_b; + void **ppv_map_ref_idx_to_poc_lx_t1, **ppv_map_ref_idx_to_poc_lx_b1; + ppv_map_ref_idx_to_poc_lx_t = ps_dec->ppv_map_ref_idx_to_poc + + TOP_LIST_FLD_L0; + ppv_map_ref_idx_to_poc_lx_b = ps_dec->ppv_map_ref_idx_to_poc + + BOT_LIST_FLD_L0; + + ppv_map_ref_idx_to_poc_lx_t[0] = 0; // For ref_idx = -1 + ppv_map_ref_idx_to_poc_lx_t++; + ppv_map_ref_idx_to_poc_lx_b[0] = 0; // For ref_idx = -1 + ppv_map_ref_idx_to_poc_lx_b++; + + idx = 0; + for(idx = 0; idx < ps_cur_slice->u1_num_ref_idx_lx_active[0]; idx++) + { + ps_pic = ps_dec->ps_ref_pic_buf_lx[0][idx]; + ppv_map_ref_idx_to_poc_lx_t[0] = (ps_pic->pu1_buf1); + ppv_map_ref_idx_to_poc_lx_b[1] = (ps_pic->pu1_buf1); + + ppv_map_ref_idx_to_poc_lx_b[0] = (ps_pic->pu1_buf1) + 1; + ppv_map_ref_idx_to_poc_lx_t[1] = (ps_pic->pu1_buf1) + 1; + + ppv_map_ref_idx_to_poc_lx_t += 2; + ppv_map_ref_idx_to_poc_lx_b += 2; + } + ppv_map_ref_idx_to_poc_lx_t1 = ps_dec->ppv_map_ref_idx_to_poc + + TOP_LIST_FLD_L1; + ppv_map_ref_idx_to_poc_lx_t1[0] = 0; + ppv_map_ref_idx_to_poc_lx_b1 = ps_dec->ppv_map_ref_idx_to_poc + + BOT_LIST_FLD_L1; + ppv_map_ref_idx_to_poc_lx_b1[0] = 0; + + } + + if(ps_dec->u4_num_cores >= 3) + { + WORD32 num_entries; + WORD32 size; + + num_entries = MIN(MAX_FRAMES, ps_dec->u4_num_ref_frames_at_init); + num_entries = 2 * ((2 * num_entries) + 1); + + size = num_entries * sizeof(void *); + size += PAD_MAP_IDX_POC * sizeof(void *); + + memcpy((void *)ps_dec->ps_parse_cur_slice->ppv_map_ref_idx_to_poc, + ps_dec->ppv_map_ref_idx_to_poc, + size); + } + + + } + if(ps_pps->u1_wted_pred_flag) + { + ret = ih264d_parse_pred_weight_table(ps_cur_slice, ps_bitstrm); + if(ret != OK) + return ret; + ih264d_form_pred_weight_matrix(ps_dec); + ps_dec->pu4_wt_ofsts = ps_dec->pu4_wts_ofsts_mat; + } + else + { + ps_dec->ps_cur_slice->u2_log2Y_crwd = 0; + ps_dec->pu4_wt_ofsts = ps_dec->pu4_wts_ofsts_mat; + } + + ps_dec->ps_parse_cur_slice->u2_log2Y_crwd = + ps_dec->ps_cur_slice->u2_log2Y_crwd; + + if(u1_mbaff && (u1_field_pic_flag == 0)) + { + ih264d_convert_frm_mbaff_list(ps_dec); + } + + /* G050 */ + if(ps_cur_slice->u1_nal_ref_idc != 0) + { + if(!ps_dec->ps_dpb_cmds->u1_dpb_commands_read) + ps_dec->u4_bitoffset = ih264d_read_mmco_commands(ps_dec); + else + ps_bitstrm->u4_ofst += ps_dec->u4_bitoffset; + + } + /* G050 */ + + if(ps_pps->u1_entropy_coding_mode == CABAC) + { + u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf); + + if(u4_temp > MAX_CABAC_INIT_IDC) + { + return ERROR_INV_SLICE_HDR_T; + } + ps_cur_slice->u1_cabac_init_idc = u4_temp; + COPYTHECONTEXT("SH: cabac_init_idc",ps_cur_slice->u1_cabac_init_idc); + } + + /* Read slice_qp_delta */ + i_temp = ps_pps->u1_pic_init_qp + + ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf); + if((i_temp < 0) || (i_temp > 51)) + { + return ERROR_INV_RANGE_QP_T; + } + ps_cur_slice->u1_slice_qp = i_temp; + COPYTHECONTEXT("SH: slice_qp_delta", + (WORD8)(ps_cur_slice->u1_slice_qp - ps_pps->u1_pic_init_qp)); + + if(ps_pps->u1_deblocking_filter_parameters_present_flag == 1) + { + u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf); + if(u4_temp > SLICE_BOUNDARY_DBLK_DISABLED) + { + return ERROR_INV_SLICE_HDR_T; + } + + COPYTHECONTEXT("SH: disable_deblocking_filter_idc", u4_temp); + ps_cur_slice->u1_disable_dblk_filter_idc = u4_temp; + if(u4_temp != 1) + { + i_temp = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf) + << 1; + if((MIN_DBLK_FIL_OFF > i_temp) || (i_temp > MAX_DBLK_FIL_OFF)) + { + return ERROR_INV_SLICE_HDR_T; + } + ps_cur_slice->i1_slice_alpha_c0_offset = i_temp; + COPYTHECONTEXT("SH: slice_alpha_c0_offset_div2", + ps_cur_slice->i1_slice_alpha_c0_offset >> 1); + + i_temp = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf) + << 1; + if((MIN_DBLK_FIL_OFF > i_temp) || (i_temp > MAX_DBLK_FIL_OFF)) + { + return ERROR_INV_SLICE_HDR_T; + } + ps_cur_slice->i1_slice_beta_offset = i_temp; + COPYTHECONTEXT("SH: slice_beta_offset_div2", + ps_cur_slice->i1_slice_beta_offset >> 1); + } + else + { + ps_cur_slice->i1_slice_alpha_c0_offset = 0; + ps_cur_slice->i1_slice_beta_offset = 0; + } + } + else + { + ps_cur_slice->u1_disable_dblk_filter_idc = 0; + ps_cur_slice->i1_slice_alpha_c0_offset = 0; + ps_cur_slice->i1_slice_beta_offset = 0; + } + + DATA_SYNC(); + ps_dec->ps_parse_cur_slice->slice_header_done = 2; + + if(ps_pps->u1_entropy_coding_mode) + { + SWITCHOFFTRACE; SWITCHONTRACECABAC; + ps_dec->pf_parse_inter_slice = ih264d_parse_inter_slice_data_cabac; + if(ps_dec->ps_parse_cur_slice->u2_error_flag == 1) + return 0; + ps_dec->pf_parse_inter_mb = ih264d_parse_pmb_cabac; + ih264d_init_cabac_contexts(P_SLICE, ps_dec); + + if(ps_dec->ps_cur_slice->u1_mbaff_frame_flag) + ps_dec->pf_get_mb_info = ih264d_get_mb_info_cabac_mbaff; + else + ps_dec->pf_get_mb_info = ih264d_get_mb_info_cabac_nonmbaff; + } + else + { + SWITCHONTRACE; SWITCHOFFTRACECABAC; + ps_dec->pf_parse_inter_slice = ih264d_parse_inter_slice_data_cavlc; + ps_dec->pf_parse_inter_mb = ih264d_parse_pmb_cavlc; + if(ps_dec->ps_cur_slice->u1_mbaff_frame_flag) + { + ps_dec->pf_get_mb_info = ih264d_get_mb_info_cavlc_mbaff; + } + else + ps_dec->pf_get_mb_info = ih264d_get_mb_info_cavlc_nonmbaff; + } + + ps_dec->u1_B = 0; + ps_dec->pf_mvpred_ref_tfr_nby2mb = ih264d_mv_pred_ref_tfr_nby2_pmb; + ret = ps_dec->pf_parse_inter_slice(ps_dec, ps_cur_slice, u2_first_mb_in_slice); + if(ret != OK) + return ret; +// ps_dec->curr_slice_in_error = 0 ; + return OK; +} diff --git a/decoder/ih264d_parse_slice.c b/decoder/ih264d_parse_slice.c new file mode 100755 index 0000000..323df43 --- /dev/null +++ b/decoder/ih264d_parse_slice.c @@ -0,0 +1,1887 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*! + ************************************************************************** + * \file ih264d_parse_slice.c + * + * \brief + * Contains routines that decodes a slice NAL unit + * + * \date + * 19/12/2002 + * + * \author AI + ************************************************************************** + */ +#include <string.h> +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ithread.h" +#include "ih264d_structs.h" +#include "ih264d_debug.h" +#include "ih264d_bitstrm.h" +#include "ih264d_parse_mb_header.h" +#include "ih264d_process_bslice.h" +#include "ih264d_process_pslice.h" +#include "ih264d_parse_cavlc.h" +#include "ih264d_utils.h" +#include "ih264d_deblocking.h" +#include "ih264d_defs.h" +#include "ih264d_error_handler.h" +#include "ih264d_tables.h" +#include "ih264d_defs.h" +#include "ih264d_mem_request.h" +#include "ih264d_parse_islice.h" +#include "ih264d_parse_slice.h" +#include "ih264d_mvpred.h" +#include "ih264d_mb_utils.h" + +#include "ih264d_defs.h" +#include "ih264d_quant_scaling.h" + +#include "ih264d_inter_pred.h" + +#include "ih264d_sei.h" +#include "ih264d.h" +#include "ih264_error.h" +#include "ih264_disp_mgr.h" +#include "ih264_buf_mgr.h" + +#include "ih264d_thread_parse_decode.h" +#include "ih264d_thread_compute_bs.h" +#include "ih264d_dpb_manager.h" +#include <assert.h> +#include "ih264d_parse_islice.h" +#define RET_LAST_SKIP 0x80000000 + +/*! + ************************************************************************** + * \if Function name : ih264d_form_pred_weight_matrix \endif + * + * \brief + * Forms pred weight matrix. + * + * \return + * None + * + ************************************************************************** + */ + +void ih264d_form_pred_weight_matrix(dec_struct_t *ps_dec) +{ + dec_slice_params_t *ps_cur_slice; + UWORD8 uc_num_ref_idx_l0_active, uc_num_ref_idx_l1_active; + UWORD8 i, j; + UWORD32 *pu4_mat_iwt_ofst; + UWORD16 i2_idx; + UWORD32 *pui32_weight_offset_l0, *pui32_weight_offset_l1; + UWORD32 u4_temp; + + ps_cur_slice = ps_dec->ps_cur_slice; + uc_num_ref_idx_l0_active = ps_cur_slice->u1_num_ref_idx_lx_active[0]; + uc_num_ref_idx_l1_active = ps_cur_slice->u1_num_ref_idx_lx_active[1]; + + pu4_mat_iwt_ofst = ps_dec->pu4_wts_ofsts_mat; + + if(ps_cur_slice->u1_slice_type == B_SLICE) + { + for(i = 0; i < uc_num_ref_idx_l0_active; i++) + { + pui32_weight_offset_l0 = ps_cur_slice->u4_wt_ofst_lx[0][i]; + for(j = 0; j < uc_num_ref_idx_l1_active; j++) + { + pui32_weight_offset_l1 = ps_cur_slice->u4_wt_ofst_lx[1][j]; + i2_idx = i * uc_num_ref_idx_l0_active + j; + i2_idx = X3(i2_idx); + /* u4_temp = (pui32_weight_offset_l0[0] | (pui32_weight_offset_l1[0] << 16)); + pu4_mat_iwt_ofst[0] = u4_temp; + u4_temp = (pui32_weight_offset_l0[1] | (pui32_weight_offset_l1[1] << 16)); + pu4_mat_iwt_ofst[1] = u4_temp; + u4_temp = (pui32_weight_offset_l0[2] | (pui32_weight_offset_l1[2] << 16)); + pu4_mat_iwt_ofst[2] = u4_temp; + pu4_mat_iwt_ofst += 3;*/ + pu4_mat_iwt_ofst[0] = pui32_weight_offset_l0[0]; + pu4_mat_iwt_ofst[1] = pui32_weight_offset_l1[0]; + pu4_mat_iwt_ofst[2] = pui32_weight_offset_l0[1]; + pu4_mat_iwt_ofst[3] = pui32_weight_offset_l1[1]; + pu4_mat_iwt_ofst[4] = pui32_weight_offset_l0[2]; + pu4_mat_iwt_ofst[5] = pui32_weight_offset_l1[2]; + pu4_mat_iwt_ofst += 6; + } + } + } + else + { + for(i = 0; i < uc_num_ref_idx_l0_active; i++) + { + pui32_weight_offset_l0 = ps_cur_slice->u4_wt_ofst_lx[0][i]; + i2_idx = X3(i); + u4_temp = (UWORD32)pui32_weight_offset_l0[0]; + pu4_mat_iwt_ofst[0] = u4_temp; + u4_temp = (UWORD32)pui32_weight_offset_l0[1]; + pu4_mat_iwt_ofst[2] = u4_temp; + u4_temp = (UWORD32)pui32_weight_offset_l0[2]; + pu4_mat_iwt_ofst[4] = u4_temp; + pu4_mat_iwt_ofst += 6; + } + } +} + + +/*! + ************************************************************************** + * \if Function name : init_firstSliceParam \endif + * + * \brief + * Initialize the Parameter required for all the slices for a picture + * + * \return : Nothing + * + ************************************************************************** + */ + +WORD32 ih264d_start_of_pic(dec_struct_t *ps_dec, + WORD32 i4_poc, + pocstruct_t *ps_temp_poc, + UWORD16 u2_frame_num, + dec_pic_params_t *ps_pps) +{ + pocstruct_t *ps_prev_poc = &ps_dec->s_cur_pic_poc; + pocstruct_t *ps_cur_poc = ps_temp_poc; + + pic_buffer_t *pic_buf; + + ivd_video_decode_op_t * ps_dec_output = + (ivd_video_decode_op_t *)ps_dec->pv_dec_out; + dec_slice_params_t *ps_cur_slice = ps_dec->ps_cur_slice; + dec_seq_params_t *ps_seq = ps_pps->ps_sps; + UWORD8 u1_bottom_field_flag = ps_cur_slice->u1_bottom_field_flag; + UWORD8 u1_field_pic_flag = ps_cur_slice->u1_field_pic_flag; + /* high profile related declarations */ + high_profile_tools_t s_high_profile; + WORD32 ret; + + H264_MUTEX_LOCK(&ps_dec->process_disp_mutex); + + ps_prev_poc->i4_pic_order_cnt_lsb = ps_cur_poc->i4_pic_order_cnt_lsb; + ps_prev_poc->i4_pic_order_cnt_msb = ps_cur_poc->i4_pic_order_cnt_msb; + ps_prev_poc->i4_delta_pic_order_cnt_bottom = + ps_cur_poc->i4_delta_pic_order_cnt_bottom; + ps_prev_poc->i4_delta_pic_order_cnt[0] = + ps_cur_poc->i4_delta_pic_order_cnt[0]; + ps_prev_poc->i4_delta_pic_order_cnt[1] = + ps_cur_poc->i4_delta_pic_order_cnt[1]; + ps_prev_poc->u1_bot_field = ps_dec->ps_cur_slice->u1_bottom_field_flag; + ps_prev_poc->i4_prev_frame_num_ofst = ps_cur_poc->i4_prev_frame_num_ofst; + ps_prev_poc->u2_frame_num = u2_frame_num; + ps_dec->i1_prev_mb_qp_delta = 0; + ps_dec->i1_next_ctxt_idx = 0; + + ps_dec->u4_mb_level_deblk = 0; + + /* Disable MB_LEVEL_DEBLK if deblock thread is enabled */ + if(ps_dec->u4_num_cores >= 3) + { + ps_dec->u4_mb_level_deblk = 0; + } + + + if(ps_seq->u1_mb_aff_flag == 1) + { + ps_dec->u4_mb_level_deblk = 0; + if(ps_dec->u4_num_cores > 2) + ps_dec->u4_num_cores = 2; + } + if(ps_dec->u4_mb_level_deblk == 1) + ps_dec->u4_use_intrapred_line_copy = 1; + else + ps_dec->u4_use_intrapred_line_copy = 0; + + if((ps_dec->u4_num_cores >= 3) && (ps_seq->u1_mb_aff_flag == 0)) + { + ps_dec->u4_use_intrapred_line_copy = 1; + } + + ps_dec->u4_app_disable_deblk_frm = 0; + /* If degrade is enabled, set the degrade flags appropriately */ + if(ps_dec->i4_degrade_type && ps_dec->i4_degrade_pics) + { + WORD32 degrade_pic; + ps_dec->i4_degrade_pic_cnt++; + degrade_pic = 0; + + /* If degrade is to be done in all frames, then do not check further */ + switch(ps_dec->i4_degrade_pics) + { + case 4: + { + degrade_pic = 1; + break; + } + case 3: + { + if(ps_cur_slice->u1_slice_type != I_SLICE) + degrade_pic = 1; + + break; + } + case 2: + { + + /* If pic count hits non-degrade interval or it is an islice, then do not degrade */ + if((ps_cur_slice->u1_slice_type != I_SLICE) + && (ps_dec->i4_degrade_pic_cnt + != ps_dec->i4_nondegrade_interval)) + degrade_pic = 1; + + break; + } + case 1: + { + /* Check if the current picture is non-ref */ + if(0 == ps_cur_slice->u1_nal_ref_idc) + { + degrade_pic = 1; + } + break; + } + + } + if(degrade_pic) + { + if(ps_dec->i4_degrade_type & 0x2) + ps_dec->u4_app_disable_deblk_frm = 1; + + /* MC degrading is done only for non-ref pictures */ + if(0 == ps_cur_slice->u1_nal_ref_idc) + { + if(ps_dec->i4_degrade_type & 0x4) + ps_dec->i4_mv_frac_mask = 0; + + if(ps_dec->i4_degrade_type & 0x8) + ps_dec->i4_mv_frac_mask = 0; + } + } + else + ps_dec->i4_degrade_pic_cnt = 0; + } + + { + dec_err_status_t * ps_err = ps_dec->ps_dec_err_status; + if(ps_dec->u1_sl_typ_5_9 + && ((ps_cur_slice->u1_slice_type == I_SLICE) + || (ps_cur_slice->u1_slice_type + == SI_SLICE))) + ps_err->u1_cur_pic_type = PIC_TYPE_I; + else + ps_err->u1_cur_pic_type = PIC_TYPE_UNKNOWN; + + if(ps_err->u1_pic_aud_i == PIC_TYPE_I) + { + ps_err->u1_cur_pic_type = PIC_TYPE_I; + ps_err->u1_pic_aud_i = PIC_TYPE_UNKNOWN; + } + + if(ps_cur_slice->u1_nal_unit_type == IDR_SLICE_NAL) + { + if(ps_err->u1_err_flag) + ih264d_reset_ref_bufs(ps_dec->ps_dpb_mgr); + ps_err->u1_err_flag = ACCEPT_ALL_PICS; + } + } + + ps_dec->u1_first_nal_in_pic = 0; + if(ps_dec->u1_init_dec_flag && ps_dec->s_prev_seq_params.u1_eoseq_pending) + { + /* Reset the decoder picture buffers */ + WORD32 j; + for(j = 0; j < MAX_DISP_BUFS_NEW; j++) + { + + ih264_buf_mgr_release((buf_mgr_t *)ps_dec->pv_pic_buf_mgr, + j, + BUF_MGR_REF); + ih264_buf_mgr_release((buf_mgr_t *)ps_dec->pv_mv_buf_mgr, + ps_dec->au1_pic_buf_id_mv_buf_id_map[j], + BUF_MGR_REF); + ih264_buf_mgr_release((buf_mgr_t *)ps_dec->pv_pic_buf_mgr, + j, + BUF_MGR_IO); + } + + /* reset the decoder structure parameters related to buffer handling */ + ps_dec->u1_second_field = 0; + ps_dec->i4_cur_display_seq = 0; + + /********************************************************************/ + /* indicate in the decoder output i4_status that some frames are being */ + /* dropped, so that it resets timestamp and wait for a new sequence */ + /********************************************************************/ + + ps_dec->s_prev_seq_params.u1_eoseq_pending = 0; + } + ret = ih264d_init_pic(ps_dec, u2_frame_num, i4_poc, ps_pps); + if(ret != OK) + return ret; + + ps_dec->pv_parse_tu_coeff_data = ps_dec->pv_pic_tu_coeff_data; + ps_dec->pv_proc_tu_coeff_data = ps_dec->pv_pic_tu_coeff_data; + ps_dec->ps_nmb_info = ps_dec->ps_frm_mb_info; + if(ps_dec->u1_separate_parse) + { + UWORD16 pic_wd = ps_dec->u4_width_at_init; + UWORD16 pic_ht = ps_dec->u4_height_at_init; + UWORD32 num_mbs; + + if((NULL != ps_dec->ps_sps) && (1 == (ps_dec->ps_sps->u1_is_valid))) + { + pic_wd = ps_dec->u2_pic_wd; + pic_ht = ps_dec->u2_pic_ht; + } + num_mbs = (pic_wd * pic_ht) >> 8; + + ps_dec->u4_start_frame_decode = 0; + if(ps_dec->pu1_dec_mb_map) + { + memset((void *)ps_dec->pu1_dec_mb_map, 0, num_mbs); + } + + if(ps_dec->pu1_recon_mb_map) + { + + memset((void *)ps_dec->pu1_recon_mb_map, 0, num_mbs); + } + + if(ps_dec->pu2_slice_num_map) + { + memset((void *)ps_dec->pu2_slice_num_map, 0, + (num_mbs * sizeof(UWORD16))); + } + + } + if(ps_dec->u4_first_slice_in_pic == 1) + { + ps_dec->ps_parse_cur_slice = &(ps_dec->ps_dec_slice_buf[0]); + ps_dec->ps_decode_cur_slice = &(ps_dec->ps_dec_slice_buf[0]); + ps_dec->ps_computebs_cur_slice = &(ps_dec->ps_dec_slice_buf[0]); + } + ps_dec->ps_parse_cur_slice->slice_header_done = 0; + ps_dec->ps_parse_cur_slice->last_slice_in_frame = 0; + ps_dec->ps_parse_cur_slice->u4_num_mbs_done_in_slice = 0; + + ps_dec->ps_parse_cur_slice->u2_error_flag = 0; + + /* Initialize all the HP toolsets to zero */ + ps_dec->s_high_profile.u1_scaling_present = 0; + ps_dec->s_high_profile.u1_transform8x8_present = 0; + + /* Get Next Free Picture */ + if(1 == ps_dec->u4_share_disp_buf) + { + UWORD32 i; + /* Free any buffer that is in the queue to be freed */ + for(i = 0; i < MAX_DISP_BUFS_NEW; i++) + { + if(0 == ps_dec->u4_disp_buf_to_be_freed[i]) + continue; + ih264_buf_mgr_release((buf_mgr_t *)ps_dec->pv_pic_buf_mgr, i, + BUF_MGR_IO); + ps_dec->u4_disp_buf_to_be_freed[i] = 0; + ps_dec->u4_disp_buf_mapping[i] = 0; + + } + } + if(!(u1_field_pic_flag && 0 != ps_dec->u1_top_bottom_decoded)) //ps_dec->u1_second_field)) + { + pic_buffer_t *ps_cur_pic; + WORD32 cur_pic_buf_id, cur_mv_buf_id; + col_mv_buf_t *ps_col_mv; + while(1) + { + ps_cur_pic = (pic_buffer_t *)ih264_buf_mgr_get_next_free( + (buf_mgr_t *)ps_dec->pv_pic_buf_mgr, + &cur_pic_buf_id); + if(ps_cur_pic == NULL) + { + ps_dec->i4_error_code = ERROR_UNAVAIL_PICBUF_T; + return ERROR_UNAVAIL_PICBUF_T; + } + if(0 == ps_dec->u4_disp_buf_mapping[cur_pic_buf_id]) + { + break; + } + + } + ps_col_mv = (col_mv_buf_t *)ih264_buf_mgr_get_next_free((buf_mgr_t *)ps_dec->pv_mv_buf_mgr, + &cur_mv_buf_id); + if(ps_col_mv == NULL) + { + ps_dec->i4_error_code = ERROR_UNAVAIL_MVBUF_T; + return ERROR_UNAVAIL_MVBUF_T; + } + + ps_dec->ps_cur_pic = ps_cur_pic; + ps_dec->u1_pic_buf_id = cur_pic_buf_id; + ps_cur_pic->u4_ts = ps_dec->u4_ts; + + + ps_cur_pic->u1_mv_buf_id = cur_mv_buf_id; + ps_dec->au1_pic_buf_id_mv_buf_id_map[cur_pic_buf_id] = cur_mv_buf_id; + + ps_cur_pic->pu1_col_zero_flag = (UWORD8 *)ps_col_mv->pv_col_zero_flag; + ps_cur_pic->ps_mv = (mv_pred_t *)ps_col_mv->pv_mv; + ps_dec->au1_pic_buf_ref_flag[cur_pic_buf_id] = 0; + + if(!ps_dec->ps_cur_pic) + { + H264_DEC_DEBUG_PRINT("------- Display Buffers Reset --------\n"); + WORD32 j; + for(j = 0; j < MAX_DISP_BUFS_NEW; j++) + { + + ih264_buf_mgr_release((buf_mgr_t *)ps_dec->pv_pic_buf_mgr, + j, + BUF_MGR_REF); + ih264_buf_mgr_release((buf_mgr_t *)ps_dec->pv_mv_buf_mgr, + ps_dec->au1_pic_buf_id_mv_buf_id_map[j], + BUF_MGR_REF); + ih264_buf_mgr_release((buf_mgr_t *)ps_dec->pv_pic_buf_mgr, + j, + BUF_MGR_IO); + } + + ps_dec->i4_cur_display_seq = 0; + ps_dec->i4_prev_max_display_seq = 0; + ps_dec->i4_max_poc = 0; + + ps_cur_pic = (pic_buffer_t *)ih264_buf_mgr_get_next_free( + (buf_mgr_t *)ps_dec->pv_pic_buf_mgr, + &cur_pic_buf_id); + if(ps_cur_pic == NULL) + { + ps_dec->i4_error_code = ERROR_UNAVAIL_PICBUF_T; + return ERROR_UNAVAIL_PICBUF_T; + } + + ps_col_mv = (col_mv_buf_t *)ih264_buf_mgr_get_next_free((buf_mgr_t *)ps_dec->pv_mv_buf_mgr, + &cur_mv_buf_id); + if(ps_col_mv == NULL) + { + ps_dec->i4_error_code = ERROR_UNAVAIL_MVBUF_T; + return ERROR_UNAVAIL_MVBUF_T; + } + + ps_dec->ps_cur_pic = ps_cur_pic; + ps_dec->u1_pic_buf_id = cur_pic_buf_id; + ps_cur_pic->u4_ts = ps_dec->u4_ts; + ps_dec->apv_buf_id_pic_buf_map[cur_pic_buf_id] = (void *)ps_cur_pic; + + ps_cur_pic->u1_mv_buf_id = cur_mv_buf_id; + ps_dec->au1_pic_buf_id_mv_buf_id_map[cur_pic_buf_id] = cur_mv_buf_id; + + ps_cur_pic->pu1_col_zero_flag = (UWORD8 *)ps_col_mv->pv_col_zero_flag; + ps_cur_pic->ps_mv = (mv_pred_t *)ps_col_mv->pv_mv; + ps_dec->au1_pic_buf_ref_flag[cur_pic_buf_id] = 0; + + } + + ps_dec->ps_cur_pic->u1_picturetype = u1_field_pic_flag; + ps_dec->ps_cur_pic->u4_pack_slc_typ = SKIP_NONE; + H264_DEC_DEBUG_PRINT("got a buffer\n"); + } + else + { + H264_DEC_DEBUG_PRINT("did not get a buffer\n"); + } + + ps_dec->u4_pic_buf_got = 1; + + ps_dec->ps_cur_pic->i4_poc = i4_poc; + ps_dec->ps_cur_pic->i4_frame_num = u2_frame_num; + ps_dec->ps_cur_pic->i4_pic_num = u2_frame_num; + ps_dec->ps_cur_pic->i4_top_field_order_cnt = ps_pps->i4_top_field_order_cnt; + ps_dec->ps_cur_pic->i4_bottom_field_order_cnt = + ps_pps->i4_bottom_field_order_cnt; + ps_dec->ps_cur_pic->i4_avg_poc = ps_pps->i4_avg_poc; + ps_dec->ps_cur_pic->u4_time_stamp = ps_dec->u4_pts; + + ps_dec->s_cur_pic = *(ps_dec->ps_cur_pic); + if(u1_field_pic_flag && u1_bottom_field_flag) + { + WORD32 i4_temp_poc; + WORD32 i4_top_field_order_poc, i4_bot_field_order_poc; + /* Point to odd lines, since it's bottom field */ + ps_dec->s_cur_pic.pu1_buf1 += ps_dec->s_cur_pic.u2_frm_wd_y; + ps_dec->s_cur_pic.pu1_buf2 += ps_dec->s_cur_pic.u2_frm_wd_uv; + ps_dec->s_cur_pic.pu1_buf3 += ps_dec->s_cur_pic.u2_frm_wd_uv; + ps_dec->s_cur_pic.ps_mv += + ((ps_dec->u2_pic_ht * ps_dec->u2_pic_wd) >> 5); + ps_dec->s_cur_pic.pu1_col_zero_flag += ((ps_dec->u2_pic_ht + * ps_dec->u2_pic_wd) >> 5); + ps_dec->ps_cur_pic->u1_picturetype |= BOT_FLD; + i4_top_field_order_poc = ps_dec->ps_cur_pic->i4_top_field_order_cnt; + i4_bot_field_order_poc = ps_dec->ps_cur_pic->i4_bottom_field_order_cnt; + i4_temp_poc = MIN(i4_top_field_order_poc, + i4_bot_field_order_poc); + ps_dec->ps_cur_pic->i4_avg_poc = i4_temp_poc; + } + + ps_cur_slice->u1_mbaff_frame_flag = ps_seq->u1_mb_aff_flag + && (!u1_field_pic_flag); + + ps_dec->ps_cur_pic->u1_picturetype |= (ps_cur_slice->u1_mbaff_frame_flag + << 2); + if(ps_cur_slice->u1_mbaff_frame_flag) + { + ps_dec->u2_mb_group_cols_y = ((ps_dec->u1_recon_mb_grp >> 1) << 4) + 8; + ps_dec->u2_mb_group_cols_cr = ((ps_dec->u1_recon_mb_grp >> 1) << 3) + 8; + } + else + { + ps_dec->u2_mb_group_cols_y = (ps_dec->u1_recon_mb_grp << 4) + 8; + ps_dec->u2_mb_group_cols_cr = (ps_dec->u1_recon_mb_grp << 3) + 8; + } + + + + + + ps_dec->ps_cur_mb_row = ps_dec->ps_nbr_mb_row; //[0]; + ps_dec->ps_cur_mb_row++; //Increment by 1 ,so that left mb will always be valid + ps_dec->ps_top_mb_row = + ps_dec->ps_nbr_mb_row + + ((ps_dec->u2_frm_wd_in_mbs + 1) + << (1 + - ps_dec->ps_cur_sps->u1_frame_mbs_only_flag)); + ps_dec->ps_top_mb_row++; //Increment by 1 ,so that left mb will always be valid + + ps_dec->u2_mb_group_cols_y1 = ps_dec->u2_mb_group_cols_y; + ps_dec->u2_mb_group_cols_cr1 = ps_dec->u2_mb_group_cols_cr; + ps_dec->pu1_y = ps_dec->pu1_y_scratch[0]; + ps_dec->pu1_u = ps_dec->pu1_u_scratch[0]; + ps_dec->pu1_v = ps_dec->pu1_v_scratch[0]; + ps_dec->u1_yuv_scratch_idx = 0; + /* CHANGED CODE */ + ps_dec->ps_mv_cur = ps_dec->s_cur_pic.ps_mv; + ps_dec->ps_mv_top = ps_dec->ps_mv_top_p[0]; + /* CHANGED CODE */ + ps_dec->u1_mv_top_p = 0; + ps_dec->u1_mb_idx = 0; + /* CHANGED CODE */ + ps_dec->ps_mv_left = ps_dec->s_cur_pic.ps_mv; + ps_dec->pu1_yleft = 0; + ps_dec->pu1_uleft = 0; + ps_dec->pu1_vleft = 0; + ps_dec->u1_not_wait_rec = 2; + ps_dec->u2_total_mbs_coded = 0; + ps_dec->i4_submb_ofst = -(SUB_BLK_SIZE); + ps_dec->u4_pred_info_idx = 0; + ps_dec->u4_pred_info_pkd_idx = 0; + ps_dec->u4_dma_buf_idx = 0; + ps_dec->ps_mv = ps_dec->s_cur_pic.ps_mv; + ps_dec->ps_mv_bank_cur = ps_dec->s_cur_pic.ps_mv; + ps_dec->pu1_col_zero_flag = ps_dec->s_cur_pic.pu1_col_zero_flag; + ps_dec->ps_part = ps_dec->ps_parse_part_params; + ps_dec->i2_prev_slice_mbx = -1; + ps_dec->i2_prev_slice_mby = 0; + ps_dec->u2_mv_2mb[0] = 0; + ps_dec->u2_mv_2mb[1] = 0; + ps_dec->u1_last_pic_not_decoded = 0; + + ps_dec->u2_cur_slice_num = 0; + ps_dec->u2_cur_slice_num_dec_thread = 0; + ps_dec->u2_cur_slice_num_bs = 0; + ps_dec->u4_intra_pred_line_ofst = 0; + ps_dec->pu1_cur_y_intra_pred_line = ps_dec->pu1_y_intra_pred_line; + ps_dec->pu1_cur_u_intra_pred_line = ps_dec->pu1_u_intra_pred_line; + ps_dec->pu1_cur_v_intra_pred_line = ps_dec->pu1_v_intra_pred_line; + + ps_dec->pu1_cur_y_intra_pred_line_base = ps_dec->pu1_y_intra_pred_line; + ps_dec->pu1_cur_u_intra_pred_line_base = ps_dec->pu1_u_intra_pred_line; + ps_dec->pu1_cur_v_intra_pred_line_base = ps_dec->pu1_v_intra_pred_line; + + + + + + ps_dec->pu1_prev_y_intra_pred_line = ps_dec->pu1_y_intra_pred_line + + (ps_dec->u2_frm_wd_in_mbs * MB_SIZE); + + ps_dec->pu1_prev_u_intra_pred_line = ps_dec->pu1_u_intra_pred_line + + ps_dec->u2_frm_wd_in_mbs * BLK8x8SIZE * YUV420SP_FACTOR; + ps_dec->pu1_prev_v_intra_pred_line = ps_dec->pu1_v_intra_pred_line + + ps_dec->u2_frm_wd_in_mbs * BLK8x8SIZE; + + ps_dec->ps_deblk_mbn = ps_dec->ps_deblk_pic; + ps_dec->ps_deblk_mbn_curr = ps_dec->ps_deblk_mbn; + ps_dec->ps_deblk_mbn_prev = ps_dec->ps_deblk_mbn + ps_dec->u1_recon_mb_grp; + /* Initialize The Function Pointer Depending Upon the Entropy and MbAff Flag */ + { + if(ps_cur_slice->u1_mbaff_frame_flag) + { + ps_dec->pf_compute_bs = ih264d_compute_bs_mbaff; + ps_dec->pf_mvpred = ih264d_mvpred_mbaff; + } + else + { + ps_dec->pf_compute_bs = ih264d_compute_bs_non_mbaff; + ps_dec->u1_cur_mb_fld_dec_flag = ps_cur_slice->u1_field_pic_flag; + } + } + /* Set up the Parameter for DMA transfer */ + { + UWORD8 u1_field_pic_flag = ps_dec->ps_cur_slice->u1_field_pic_flag; + + UWORD8 u1_mbaff = ps_cur_slice->u1_mbaff_frame_flag; + + UWORD8 uc_lastmbs = (((ps_dec->u2_pic_wd) >> 4) + % (ps_dec->u1_recon_mb_grp >> u1_mbaff)); + UWORD16 ui16_lastmbs_widthY = + (uc_lastmbs ? (uc_lastmbs << 4) : ((ps_dec->u1_recon_mb_grp + >> u1_mbaff) << 4)); + UWORD16 ui16_lastmbs_widthUV = + uc_lastmbs ? (uc_lastmbs << 3) : ((ps_dec->u1_recon_mb_grp + >> u1_mbaff) << 3); + + ps_dec->s_tran_addrecon.pu1_dest_y = ps_dec->s_cur_pic.pu1_buf1; + ps_dec->s_tran_addrecon.pu1_dest_u = ps_dec->s_cur_pic.pu1_buf2; + ps_dec->s_tran_addrecon.pu1_dest_v = ps_dec->s_cur_pic.pu1_buf3; + + ps_dec->s_tran_addrecon.u2_frm_wd_y = ps_dec->u2_frm_wd_y + << u1_field_pic_flag; + ps_dec->s_tran_addrecon.u2_frm_wd_uv = ps_dec->u2_frm_wd_uv + << u1_field_pic_flag; + + if(u1_field_pic_flag) + { + ui16_lastmbs_widthY += ps_dec->u2_frm_wd_y; + ui16_lastmbs_widthUV += ps_dec->u2_frm_wd_uv; + } + + /* Normal Increment of Pointer */ + ps_dec->s_tran_addrecon.u4_inc_y[0] = ((ps_dec->u1_recon_mb_grp << 4) + >> u1_mbaff); + ps_dec->s_tran_addrecon.u4_inc_uv[0] = ((ps_dec->u1_recon_mb_grp << 4) + >> u1_mbaff); + + /* End of Row Increment */ + ps_dec->s_tran_addrecon.u4_inc_y[1] = (ui16_lastmbs_widthY + + (PAD_LEN_Y_H << 1) + + ps_dec->s_tran_addrecon.u2_frm_wd_y + * ((15 << u1_mbaff) + u1_mbaff)); + ps_dec->s_tran_addrecon.u4_inc_uv[1] = (ui16_lastmbs_widthUV + + (PAD_LEN_UV_H << 2) + + ps_dec->s_tran_addrecon.u2_frm_wd_uv + * ((15 << u1_mbaff) + u1_mbaff)); + + /* Assign picture numbers to each frame/field */ + /* only once per picture. */ + ih264d_assign_pic_num(ps_dec); + ps_dec->s_tran_addrecon.u2_mv_top_left_inc = (ps_dec->u1_recon_mb_grp + << 2) - 1 - (u1_mbaff << 2); + ps_dec->s_tran_addrecon.u2_mv_left_inc = ((ps_dec->u1_recon_mb_grp + >> u1_mbaff) - 1) << (4 + u1_mbaff); + } + /**********************************************************************/ + /* High profile related initialization at pictrue level */ + /**********************************************************************/ + if(ps_seq->u1_profile_idc == HIGH_PROFILE_IDC) + { + if((ps_seq->i4_seq_scaling_matrix_present_flag) + || (ps_pps->i4_pic_scaling_matrix_present_flag)) + { + ih264d_form_scaling_matrix_picture(ps_seq, ps_pps, ps_dec); + ps_dec->s_high_profile.u1_scaling_present = 1; + } + else + { + ih264d_form_default_scaling_matrix(ps_dec); + } + + if(ps_pps->i4_transform_8x8_mode_flag) + { + ps_dec->s_high_profile.u1_transform8x8_present = 1; + } + } + else + { + ih264d_form_default_scaling_matrix(ps_dec); + } + + /* required while reading the transform_size_8x8 u4_flag */ + ps_dec->s_high_profile.u1_direct_8x8_inference_flag = + ps_seq->u1_direct_8x8_inference_flag; + ps_dec->s_high_profile.s_cavlc_ctxt = ps_dec->s_cavlc_ctxt; + + if(ps_dec->u1_separate_parse) + { + memcpy(&ps_dec->s_tran_addrecon_parse, &ps_dec->s_tran_addrecon, + sizeof(tfr_ctxt_t)); + } + + H264_MUTEX_UNLOCK(&ps_dec->process_disp_mutex); + return OK; +} + +/*! + ************************************************************************** + * \if Function name : ih264d_deblock_display \endif + * + * \brief : The function callls the deblocking routine and manages + : the Recon buffers and displays . + * \return : Nothing + * + ************************************************************************** + */ +WORD32 ih264d_end_of_pic_dispbuf_mgr(dec_struct_t * ps_dec) +{ + dec_slice_params_t *ps_cur_slice = ps_dec->ps_cur_slice; + UWORD8 u1_num_of_users = 0; + WORD32 ret; + + H264_MUTEX_LOCK(&ps_dec->process_disp_mutex); + if(1) + { + + { + ih264d_delete_nonref_nondisplay_pics(ps_dec->ps_dpb_mgr); + if(ps_cur_slice->u1_mmco_equalto5 + || (ps_cur_slice->u1_nal_unit_type == IDR_SLICE_NAL)) + { + ps_dec->ps_cur_pic->i4_poc = 0; + if(ps_dec->u2_total_mbs_coded + == (ps_dec->ps_cur_sps->u2_max_mb_addr + 1)) + ih264d_reset_ref_bufs(ps_dec->ps_dpb_mgr); + ih264d_release_display_bufs(ps_dec); + } + if(ps_dec->u4_num_reorder_frames_at_init != 0) + { + ret = ih264d_assign_display_seq(ps_dec); + if(ret != OK) + return ret; + } + } + + if(ps_cur_slice->u1_nal_ref_idc) + { + /* Mark pic buf as needed for reference */ + ih264_buf_mgr_set_status((buf_mgr_t *)ps_dec->pv_pic_buf_mgr, + ps_dec->u1_pic_buf_id, + BUF_MGR_REF); + /* Mark mv buf as needed for reference */ + ih264_buf_mgr_set_status((buf_mgr_t *)ps_dec->pv_mv_buf_mgr, + ps_dec->au1_pic_buf_id_mv_buf_id_map[ps_dec->u1_pic_buf_id], + BUF_MGR_REF); + ps_dec->au1_pic_buf_ref_flag[ps_dec->u1_pic_buf_id] = 1; + } + + /* 420 consumer */ + /* Increment the number of users by 1 for display based upon */ + /*the SEEK KEY FRAME control sent to decoder */ + if(((0 == ps_dec->u1_last_pic_not_decoded) + && (0 + == (ps_dec->ps_cur_pic->u4_pack_slc_typ + & ps_dec->u4_skip_frm_mask))) + || (ps_cur_slice->u1_nal_unit_type == IDR_SLICE_NAL)) + { + /* Mark pic buf as needed for display */ + ih264_buf_mgr_set_status((buf_mgr_t *)ps_dec->pv_pic_buf_mgr, + ps_dec->u1_pic_buf_id, + BUF_MGR_IO); + + } + + if(!ps_cur_slice->u1_field_pic_flag + || ((TOP_FIELD_ONLY | BOT_FIELD_ONLY) + != ps_dec->u1_top_bottom_decoded)) + { + pic_buffer_t *ps_cur_pic = ps_dec->ps_cur_pic; + ps_cur_pic->u2_disp_width = ps_dec->u2_disp_width; + ps_cur_pic->u2_disp_height = ps_dec->u2_disp_height >> 1; + + ps_cur_pic->u2_crop_offset_y = ps_dec->u2_crop_offset_y; + ps_cur_pic->u2_crop_offset_uv = ps_dec->u2_crop_offset_uv; + ps_cur_pic->u1_pic_type = 0; + + ret = ih264d_insert_pic_in_display_list( + ps_dec->ps_dpb_mgr, + ps_dec->u1_pic_buf_id, + ps_dec->i4_prev_max_display_seq + + ps_dec->ps_cur_pic->i4_poc, + ps_dec->ps_cur_pic->i4_frame_num); + if(ret != OK) + return ret; + + { + ivd_video_decode_op_t * ps_dec_output = + (ivd_video_decode_op_t *)ps_dec->pv_dec_out; + + ps_dec_output->u4_frame_decoded_flag = 1; + } + if(ps_dec->au1_pic_buf_ref_flag[ps_dec->u1_pic_buf_id] == 0) + { + ih264_buf_mgr_release((buf_mgr_t *)ps_dec->pv_mv_buf_mgr, + ps_dec->au1_pic_buf_id_mv_buf_id_map[ps_dec->u1_pic_buf_id], + BUF_MGR_REF); + ps_dec->au1_pic_buf_ref_flag[ps_dec->u1_pic_buf_id] = 0; + + } + } + else + { + H264_DEC_DEBUG_PRINT("pic not inserted display %d %d\n", + ps_cur_slice->u1_field_pic_flag, + ps_dec->u1_second_field); + } + { + + if(!ps_cur_slice->u1_end_of_frame_signal) + { + ps_cur_slice->u1_end_of_frame_signal = 1; + } + } + + if(!ps_cur_slice->u1_field_pic_flag + || ((TOP_FIELD_ONLY | BOT_FIELD_ONLY) + == ps_dec->u1_top_bottom_decoded)) + { + if(ps_dec->u4_num_reorder_frames_at_init == 0) + { + ret = ih264d_assign_display_seq(ps_dec); + if(ret != OK) + return ret; + } + } + } + + H264_MUTEX_UNLOCK(&ps_dec->process_disp_mutex); + + return OK; +} + +void ih264d_err_pic_dispbuf_mgr(dec_struct_t *ps_dec) +{ + dec_slice_params_t *ps_cur_slice = ps_dec->ps_cur_slice; + ivd_video_decode_op_t * ps_dec_output = + (ivd_video_decode_op_t *)ps_dec->pv_dec_out; + + ih264_buf_mgr_release((buf_mgr_t *)ps_dec->pv_pic_buf_mgr, + ps_dec->u1_pic_buf_id, + BUF_MGR_REF); + ih264_buf_mgr_release((buf_mgr_t *)ps_dec->pv_mv_buf_mgr, + ps_dec->au1_pic_buf_id_mv_buf_id_map[ps_dec->u1_pic_buf_id], + BUF_MGR_REF); + ih264_buf_mgr_release((buf_mgr_t *)ps_dec->pv_pic_buf_mgr, + ps_dec->u1_pic_buf_id, + BUF_MGR_IO); +} + +void ih264d_deblock_picture(void *ptr) +{ + dec_struct_t *ps_dec = (dec_struct_t *)ptr; + + { + /*Deblock picture only if all the mb's in the frame have been decoded*/ + if(ps_dec->u1_pic_decode_done == 1) + { + if(ps_dec->ps_cur_slice->u1_mbaff_frame_flag + || ps_dec->ps_cur_slice->u1_field_pic_flag) + { + ps_dec->p_DeblockPicture[ps_dec->ps_cur_slice->u1_mbaff_frame_flag]( + ps_dec); + } + else + + { + + ih264d_deblock_picture_progressive(ps_dec); + } + + } + } + +} + +/*! + ************************************************************************** + * \if Function name : ih264d_deblock_display \endif + * + * \brief : The function callls the deblocking routine and manages + : the Recon buffers and displays . + * \return : Nothing + * + ************************************************************************** + */ +WORD32 ih264d_deblock_display(dec_struct_t *ps_dec) +{ + WORD32 ret; + /* Call deblocking */ + ih264d_deblock_picture(ps_dec); + + ret = ih264d_end_of_pic_dispbuf_mgr(ps_dec); + if(ret != OK) + return ret; + + return OK; +} + +/* + *! + ************************************************************************** + * \if Function name : EndofPoc \endif + * + * \brief + * EndofPoc Processing + * + * \return + * 0 on Success and Error code otherwise + ************************************************************************** + */ + +WORD32 ih264d_end_of_pic(dec_struct_t *ps_dec, + UWORD8 u1_is_idr_slice, + UWORD16 u2_frame_num) +{ + dec_slice_params_t *ps_cur_slice = ps_dec->ps_cur_slice; + WORD32 ret; + + ps_dec->u1_first_nal_in_pic = 1; + ps_dec->u1_first_pb_nal_in_pic = 1; + ps_dec->u2_mbx = 0xffff; + ps_dec->u2_mby = 0; + { + dec_err_status_t * ps_err = ps_dec->ps_dec_err_status; + if(ps_err->u1_err_flag & REJECT_CUR_PIC) + { + ps_err->u1_err_flag ^= REJECT_CUR_PIC; + ih264d_err_pic_dispbuf_mgr(ps_dec); + return OK; + } + } + + H264_MUTEX_LOCK(&ps_dec->process_disp_mutex); + ret = ih264d_end_of_pic_processing(ps_dec); + if(ret != OK) + return ret; + ps_dec->u2_total_mbs_coded = 0; + /*--------------------------------------------------------------------*/ + /* ih264d_decode_pic_order_cnt - calculate the Pic Order Cnt */ + /* Needed to detect end of picture */ + /*--------------------------------------------------------------------*/ + { + pocstruct_t *ps_prev_poc = &ps_dec->s_prev_pic_poc; + pocstruct_t *ps_cur_poc = &ps_dec->s_cur_pic_poc; + if((0 == u1_is_idr_slice) && ps_cur_slice->u1_nal_ref_idc) + ps_dec->u2_prev_ref_frame_num = ps_cur_slice->u2_frame_num; + + if(u1_is_idr_slice || ps_cur_slice->u1_mmco_equalto5) + ps_dec->u2_prev_ref_frame_num = 0; + + if(ps_dec->ps_cur_sps->u1_gaps_in_frame_num_value_allowed_flag) + { + ret = ih264d_decode_gaps_in_frame_num(ps_dec, u2_frame_num); + if(ret != OK) + return ret; + } + + ps_prev_poc->i4_prev_frame_num_ofst = ps_cur_poc->i4_prev_frame_num_ofst; + ps_prev_poc->u2_frame_num = ps_cur_poc->u2_frame_num; + ps_prev_poc->u1_mmco_equalto5 = ps_cur_slice->u1_mmco_equalto5; + if(ps_cur_slice->u1_nal_ref_idc) + { + ps_prev_poc->i4_pic_order_cnt_lsb = ps_cur_poc->i4_pic_order_cnt_lsb; + ps_prev_poc->i4_pic_order_cnt_msb = ps_cur_poc->i4_pic_order_cnt_msb; + ps_prev_poc->i4_delta_pic_order_cnt_bottom = + ps_cur_poc->i4_delta_pic_order_cnt_bottom; + ps_prev_poc->i4_delta_pic_order_cnt[0] = + ps_cur_poc->i4_delta_pic_order_cnt[0]; + ps_prev_poc->i4_delta_pic_order_cnt[1] = + ps_cur_poc->i4_delta_pic_order_cnt[1]; + ps_prev_poc->u1_bot_field = ps_cur_poc->u1_bot_field; + } + } + if(!ps_cur_slice->u1_end_of_frame_signal) + { + return ERROR_END_OF_FRAME_EXPECTED_T; + } H264_MUTEX_UNLOCK(&ps_dec->process_disp_mutex); + + return OK; +} + +/*! + ************************************************************************** + * \if Function name : DecodeSlice \endif + * + * \brief + * Parses a slice + * + * \return + * 0 on Success and Error code otherwise + ************************************************************************** + */ + +WORD32 ih264d_parse_decode_slice(UWORD8 u1_is_idr_slice, + UWORD8 u1_nal_ref_idc, + dec_struct_t *ps_dec /* Decoder parameters */ + ) +{ + dec_bit_stream_t * ps_bitstrm = ps_dec->ps_bitstrm; + dec_pic_params_t *ps_pps; + dec_seq_params_t *ps_seq; + dec_slice_params_t *ps_cur_slice = ps_dec->ps_cur_slice; + pocstruct_t s_tmp_poc; + WORD32 i_delta_poc[2]; + WORD32 i4_poc = 0; + UWORD16 u2_first_mb_in_slice, u2_frame_num; + UWORD8 u1_field_pic_flag, u1_redundant_pic_cnt = 0, u1_slice_type; + UWORD32 u4_idr_pic_id = 0; + UWORD8 u1_bottom_field_flag, u1_pic_order_cnt_type; + + UWORD8 u1_nal_unit_type; + UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer; + UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst; + WORD8 i1_is_end_of_poc; + + WORD32 ret; + UWORD32 u4_temp; + WORD32 i_temp; + UWORD32 u4_call_end_of_pic = 0; + + /*--------------------------------------------------------------------*/ + /* Decode Portion of the Slice header */ + /* This is done to detect end of picture */ + /*--------------------------------------------------------------------*/ + + if(ps_dec->u4_first_slice_in_pic == 0) + { + volatile dec_slice_struct_t *ps_next_slice; + + ps_next_slice = ps_dec->ps_parse_cur_slice + 1; + + /*Reset the ready u4_flag and then increment*/ + ps_next_slice->slice_header_done = 0; + DATA_SYNC(); + ps_dec->ps_parse_cur_slice++; + } + + /* read FirstMbInSlice and slice type*/ + ps_dec->ps_dpb_cmds->u1_dpb_commands_read_slc = 0; + u2_first_mb_in_slice = ih264d_uev(pu4_bitstrm_ofst, + pu4_bitstrm_buf); + if(u2_first_mb_in_slice + > (ps_dec->u2_frm_ht_in_mbs * ps_dec->u2_frm_wd_in_mbs)) + { + + return ERROR_CORRUPTED_SLICE; + } + + /*we currently don not support ASO*/ + if(((u2_first_mb_in_slice << ps_cur_slice->u1_mbaff_frame_flag) + <= ps_dec->u2_cur_mb_addr) && (ps_dec->u2_cur_mb_addr != 0) + && (ps_dec->u4_first_slice_in_pic != 0)) + { + return ERROR_CORRUPTED_SLICE; + } + + COPYTHECONTEXT("SH: first_mb_in_slice",u2_first_mb_in_slice); + + u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf); + + if(u4_temp > 9) + return ERROR_INV_SLC_TYPE_T; + + u1_slice_type = u4_temp; + COPYTHECONTEXT("SH: slice_type",(u1_slice_type)); + ps_dec->u1_sl_typ_5_9 = 0; + /* Find Out the Slice Type is 5 to 9 or not then Set the Flag */ + /* u1_sl_typ_5_9 = 1 .Which tells that all the slices in the Pic*/ + /* will be of same type of current */ + if(u1_slice_type > 4) + { + u1_slice_type -= 5; + ps_dec->u1_sl_typ_5_9 = 1; + } + + { + UWORD32 skip; + + if((ps_dec->i4_app_skip_mode == IVD_SKIP_PB) + || (ps_dec->i4_dec_skip_mode == IVD_SKIP_PB)) + { + UWORD32 u4_bit_stream_offset = 0; + + if(ps_dec->u1_nal_unit_type == IDR_SLICE_NAL) + { + skip = 0; + + ps_dec->i4_dec_skip_mode = IVD_SKIP_NONE; + } + else if((I_SLICE == u1_slice_type) + && (1 >= ps_dec->ps_sps->u1_num_ref_frames)) + { + skip = 0; + + ps_dec->i4_dec_skip_mode = IVD_SKIP_NONE; + } + else + { + skip = 1; + } + + /* If one frame worth of data is already skipped, do not skip the next one */ + if((0 == u2_first_mb_in_slice) && (1 == ps_dec->u4_prev_nal_skipped)) + { + skip = 0; + } + + if(skip) + { + ps_dec->u4_prev_nal_skipped = 1; + ps_dec->i4_dec_skip_mode = IVD_SKIP_PB; + return 0; + } + else + { + /* If the previous NAL was skipped, then + do not process that buffer in this call. + Return to app and process it in the next call. + This is necessary to handle cases where I/IDR is not complete in + the current buffer and application intends to fill the remaining part of the bitstream + later. This ensures we process only frame worth of data in every call */ + if(1 == ps_dec->u4_prev_nal_skipped) + { + ps_dec->u4_return_to_app = 1; + return 0; + } + } + } + + } + + u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf); + if(u4_temp & MASK_ERR_PIC_SET_ID) + return ERROR_INV_SPS_PPS_T; + /* discard slice if pic param is invalid */ + COPYTHECONTEXT("SH: pic_parameter_set_id", u4_temp); + ps_pps = &ps_dec->ps_pps[u4_temp]; + if(FALSE == ps_pps->u1_is_valid) + { + return ERROR_INV_SPS_PPS_T; + } + ps_seq = ps_pps->ps_sps; + if(!ps_seq) + return ERROR_INV_SPS_PPS_T; + if(FALSE == ps_seq->u1_is_valid) + return ERROR_INV_SPS_PPS_T; + + /* Get the frame num */ + u2_frame_num = ih264d_get_bits_h264(ps_bitstrm, + ps_seq->u1_bits_in_frm_num); +// H264_DEC_DEBUG_PRINT("FRAME %d First MB in slice: %d\n", u2_frame_num, u2_first_mb_in_slice); + + COPYTHECONTEXT("SH: frame_num", u2_frame_num); +// H264_DEC_DEBUG_PRINT("Second field: %d frame num: %d prv_frame_num: %d \n", ps_dec->u1_second_field, u2_frame_num, ps_dec->u2_prv_frame_num); + + /* Get the field related flags */ + if(!ps_seq->u1_frame_mbs_only_flag) + { + + u1_field_pic_flag = ih264d_get_bit_h264(ps_bitstrm); + COPYTHECONTEXT("SH: field_pic_flag", u1_field_pic_flag); + u1_bottom_field_flag = 0; + + if(u1_field_pic_flag) + { + ps_dec->pu1_inv_scan = (UWORD8 *)gau1_ih264d_inv_scan_fld; + u1_bottom_field_flag = ih264d_get_bit_h264(ps_bitstrm); + COPYTHECONTEXT("SH: bottom_field_flag", u1_bottom_field_flag); + + } + else + { + ps_dec->pu1_inv_scan = (UWORD8 *)gau1_ih264d_inv_scan; + } + } + else + { + u1_field_pic_flag = 0; + u1_bottom_field_flag = 0; + + ps_dec->pu1_inv_scan = (UWORD8 *)gau1_ih264d_inv_scan; + } + + u1_nal_unit_type = SLICE_NAL; + if(u1_is_idr_slice) + { + if(0 == u1_field_pic_flag) + { + ps_dec->u1_top_bottom_decoded = TOP_FIELD_ONLY | BOT_FIELD_ONLY; + } + u1_nal_unit_type = IDR_SLICE_NAL; + u4_idr_pic_id = ih264d_uev(pu4_bitstrm_ofst, + pu4_bitstrm_buf); + if(u4_idr_pic_id > 65535) + return ERROR_INV_SPS_PPS_T; + COPYTHECONTEXT("SH: ", u4_idr_pic_id); + } + + /* read delta pic order count information*/ + i_delta_poc[0] = i_delta_poc[1] = 0; + s_tmp_poc.i4_pic_order_cnt_lsb = 0; + s_tmp_poc.i4_delta_pic_order_cnt_bottom = 0; + u1_pic_order_cnt_type = ps_seq->u1_pic_order_cnt_type; + if(u1_pic_order_cnt_type == 0) + { + i_temp = ih264d_get_bits_h264( + ps_bitstrm, + ps_seq->u1_log2_max_pic_order_cnt_lsb_minus); + if(i_temp < 0 || i_temp >= ps_seq->i4_max_pic_order_cntLsb) + return ERROR_INV_SPS_PPS_T; + s_tmp_poc.i4_pic_order_cnt_lsb = i_temp; + COPYTHECONTEXT("SH: pic_order_cnt_lsb", s_tmp_poc.i4_pic_order_cnt_lsb); + + if((ps_pps->u1_pic_order_present_flag == 1) && (!u1_field_pic_flag)) + { + s_tmp_poc.i4_delta_pic_order_cnt_bottom = ih264d_sev( + pu4_bitstrm_ofst, pu4_bitstrm_buf); + //if(s_tmp_poc.i4_delta_pic_order_cnt_bottom > ps_seq->i4_max_pic_order_cntLsb) + COPYTHECONTEXT("SH: delta_pic_order_cnt_bottom", + s_tmp_poc.i4_delta_pic_order_cnt_bottom); + } + } + + s_tmp_poc.i4_delta_pic_order_cnt[0] = 0; + s_tmp_poc.i4_delta_pic_order_cnt[1] = 0; + if(u1_pic_order_cnt_type == 1 + && (!ps_seq->u1_delta_pic_order_always_zero_flag)) + { + s_tmp_poc.i4_delta_pic_order_cnt[0] = ih264d_sev(pu4_bitstrm_ofst, + pu4_bitstrm_buf); + COPYTHECONTEXT("SH: delta_pic_order_cnt[0]", + s_tmp_poc.i4_delta_pic_order_cnt[0]); + + if(ps_pps->u1_pic_order_present_flag && !u1_field_pic_flag) + { + s_tmp_poc.i4_delta_pic_order_cnt[1] = ih264d_sev( + pu4_bitstrm_ofst, pu4_bitstrm_buf); + COPYTHECONTEXT("SH: delta_pic_order_cnt[1]", + s_tmp_poc.i4_delta_pic_order_cnt[1]); + } + } + + if(ps_pps->u1_redundant_pic_cnt_present_flag) + { + u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf); + if(u4_temp > MAX_REDUNDANT_PIC_CNT) + return ERROR_INV_SPS_PPS_T; + u1_redundant_pic_cnt = u4_temp; + COPYTHECONTEXT("SH: redundant_pic_cnt", u1_redundant_pic_cnt); + } + /*--------------------------------------------------------------------*/ + /* Check if the slice is part of new picture if so do End of Pic */ + /* processing. */ + /*--------------------------------------------------------------------*/ + i1_is_end_of_poc = 0; + if(!ps_dec->u1_first_nal_in_pic) + { + UWORD8 uc_mbs_exceed = 0; + i1_is_end_of_poc = ih264d_is_end_of_pic(u2_frame_num, u1_nal_ref_idc, + &s_tmp_poc, &ps_dec->s_cur_pic_poc, + ps_cur_slice, u1_pic_order_cnt_type, + u1_nal_unit_type, u4_idr_pic_id, + u1_field_pic_flag, + u1_bottom_field_flag); + + /*since we support only Full frame decode, every new process should + * process a new pic + */ + if(ps_dec->u4_first_slice_in_pic == 1) + { + i1_is_end_of_poc = 1; + } + + if(ps_dec->u2_total_mbs_coded + == (ps_dec->ps_cur_sps->u2_max_mb_addr + 1)) + { + /*u2_total_mbs_coded is forced to u2_max_mb_addr+ 1 at the end of decode ,so + ,if it is first slice in pic dont consider u2_total_mbs_coded to detect new picture */ + if(ps_dec->u4_first_slice_in_pic == 0) + uc_mbs_exceed = 1; + } + + if(i1_is_end_of_poc || uc_mbs_exceed) + { + + if(1 == ps_dec->u1_last_pic_not_decoded) + { + ret = ih264d_end_of_pic_dispbuf_mgr(ps_dec); + + if(ret != OK) + return ret; + + ret = ih264d_end_of_pic(ps_dec, u1_is_idr_slice, u2_frame_num); + if(ret != OK) + return ret; +#if WIN32 + H264_DEC_DEBUG_PRINT(" ------ PIC SKIPPED ------\n"); +#endif + return RET_LAST_SKIP; + } + else + { + if((ps_dec->u2_total_mbs_coded + < (ps_dec->ps_cur_sps->u2_max_mb_addr + 1))) + { + H264_DEC_DEBUG_PRINT("Hello\n"); + ps_dec->u2_total_mbs_coded = + ps_dec->ps_cur_sps->u2_max_mb_addr + 1; + ps_dec->u1_first_nal_in_pic = 1; + ps_dec->u1_first_pb_nal_in_pic = 1; + return ERROR_END_OF_FRAME_EXPECTED_T; + /*if (ps_cur_slice->u1_field_pic_flag && + ((TOP_FIELD_ONLY | BOT_FIELD_ONLY) == ps_dec->u1_top_bottom_decoded)) + { + ps_cur_slice->u1_end_of_frame_signal = 0; + }*/ + } + ret = ih264d_end_of_pic(ps_dec, u1_is_idr_slice, u2_frame_num); + if(ret != OK) + return ret; + } + + } + else + { + + if(ps_dec->u4_first_slice_in_pic == 1) + { + /*If the first slice in decode api is not from a new picture, + * we will return error code ,as we don't support partial + frame decode*/ + return ERROR_PIC_NUM_IS_REPEATED; + } + } + } + ps_cur_slice->u1_end_of_frame_signal = 0; + if(u1_field_pic_flag) + { + /* + * Check if the frame number has changed. + */ + H264_DEC_DEBUG_PRINT( + "u2_frame_num: %d ps_dec->u2_prv_frame_num: %d ps_dec->u1_top_bottom_decoded: %d\n", + u2_frame_num, ps_dec->u2_prv_frame_num, + ps_dec->u1_top_bottom_decoded); + if((u2_frame_num != ps_dec->u2_prv_frame_num) + && (0 != ps_dec->u1_top_bottom_decoded)) + { + if((TOP_FIELD_ONLY | BOT_FIELD_ONLY) + != ps_dec->u1_top_bottom_decoded) + { + H264_DEC_DEBUG_PRINT("Dangling Field, toggling second field\n"); + ps_dec->u1_second_field = 1 - ps_dec->u1_second_field; + ps_dec->u1_dangling_field = 1; + /* + * Updating the u1_bottom_field_flag since its used in the concealment function. + */ + ps_cur_slice->u1_bottom_field_flag = u1_bottom_field_flag; + ps_dec->u2_prv_frame_num = u2_frame_num; + + ret = ih264d_deblock_display(ps_dec); + if(ret != OK) + return ret; + + /* + * The bytes consumed will be handled by the + * video_decode function after the error is handled. + */ + return ERROR_DANGLING_FIELD_IN_PIC; + + } + + } + + ps_dec->u2_prv_frame_num = u2_frame_num; + } + + if(ps_cur_slice->u1_mmco_equalto5) + { + WORD32 i4_temp_poc; + WORD32 i4_top_field_order_poc, i4_bot_field_order_poc; + + if(!ps_cur_slice->u1_field_pic_flag) // or a complementary field pair + { + i4_top_field_order_poc = ps_dec->ps_cur_pic->i4_top_field_order_cnt; + i4_bot_field_order_poc = + ps_dec->ps_cur_pic->i4_bottom_field_order_cnt; + i4_temp_poc = MIN(i4_top_field_order_poc, + i4_bot_field_order_poc); + } + else if(!ps_cur_slice->u1_bottom_field_flag) + i4_temp_poc = ps_dec->ps_cur_pic->i4_top_field_order_cnt; + else + i4_temp_poc = ps_dec->ps_cur_pic->i4_bottom_field_order_cnt; + + ps_dec->ps_cur_pic->i4_top_field_order_cnt = i4_temp_poc + - ps_dec->ps_cur_pic->i4_top_field_order_cnt; + ps_dec->ps_cur_pic->i4_bottom_field_order_cnt = i4_temp_poc + - ps_dec->ps_cur_pic->i4_bottom_field_order_cnt; + ps_dec->ps_cur_pic->i4_poc = i4_temp_poc; + ps_dec->ps_cur_pic->i4_avg_poc = i4_temp_poc; + } + if(ps_dec->u1_first_nal_in_pic) + { + ret = ih264d_decode_pic_order_cnt(u1_is_idr_slice, u2_frame_num, + &ps_dec->s_prev_pic_poc, + &s_tmp_poc, ps_cur_slice, ps_pps, + u1_nal_ref_idc, + u1_bottom_field_flag, + u1_field_pic_flag, &i4_poc); + if(ret != OK) + return ret; + /* Display seq no calculations */ + if(i4_poc >= ps_dec->i4_max_poc) + ps_dec->i4_max_poc = i4_poc; + /* IDR Picture or POC wrap around */ + if(i4_poc == 0) + { + ps_dec->i4_prev_max_display_seq = ps_dec->i4_prev_max_display_seq + + ps_dec->i4_max_poc + + ps_dec->u1_max_dec_frame_buffering + 1; + ps_dec->i4_max_poc = 0; + } + } + + /*--------------------------------------------------------------------*/ + /* Copy the values read from the bitstream to the slice header and then*/ + /* If the slice is first slice in picture, then do Start of Picture */ + /* processing. */ + /*--------------------------------------------------------------------*/ + ps_cur_slice->i4_delta_pic_order_cnt[0] = i_delta_poc[0]; + ps_cur_slice->i4_delta_pic_order_cnt[1] = i_delta_poc[1]; + ps_cur_slice->u4_idr_pic_id = u4_idr_pic_id; + ps_cur_slice->u2_first_mb_in_slice = u2_first_mb_in_slice; + ps_cur_slice->u1_field_pic_flag = u1_field_pic_flag; + ps_cur_slice->u1_bottom_field_flag = u1_bottom_field_flag; + ps_cur_slice->u1_slice_type = u1_slice_type; + ps_cur_slice->i4_pic_order_cnt_lsb = s_tmp_poc.i4_pic_order_cnt_lsb; + + ps_cur_slice->u1_nal_unit_type = u1_nal_unit_type; + ps_cur_slice->u1_redundant_pic_cnt = u1_redundant_pic_cnt; + ps_cur_slice->u1_nal_ref_idc = u1_nal_ref_idc; + ps_cur_slice->u1_pic_order_cnt_type = u1_pic_order_cnt_type; + + if(ps_seq->u1_frame_mbs_only_flag) + ps_cur_slice->u1_direct_8x8_inference_flag = + ps_seq->u1_direct_8x8_inference_flag; + else + ps_cur_slice->u1_direct_8x8_inference_flag = 1; + + if(u1_slice_type == B_SLICE) + { + ps_cur_slice->u1_direct_spatial_mv_pred_flag = ih264d_get_bit_h264( + ps_bitstrm); + COPYTHECONTEXT("SH: direct_spatial_mv_pred_flag", + ps_cur_slice->u1_direct_spatial_mv_pred_flag); + + if(ps_cur_slice->u1_direct_spatial_mv_pred_flag) + ps_cur_slice->pf_decodeDirect = ih264d_decode_spatial_direct; + else + ps_cur_slice->pf_decodeDirect = ih264d_decode_temporal_direct; + if(!((ps_pps->ps_sps->u1_mb_aff_flag) && (!u1_field_pic_flag))) + ps_dec->pf_mvpred = ih264d_mvpred_nonmbaffB; + } + else + { + if(!((ps_pps->ps_sps->u1_mb_aff_flag) && (!u1_field_pic_flag))) + ps_dec->pf_mvpred = ih264d_mvpred_nonmbaff; + } + + if(ps_dec->u1_first_nal_in_pic) + { + ret = ih264d_start_of_pic(ps_dec, i4_poc, &s_tmp_poc, u2_frame_num, ps_pps); + if(ret != OK) + return ret; + + ps_dec->u4_output_present = 0; + + if(1 == ps_dec->u4_fmt_conv_in_process) + { + ih264d_get_next_display_field(ps_dec, + ps_dec->ps_out_buffer, + &(ps_dec->s_disp_op)); + /* If error code is non-zero then there is no buffer available for display, + hence avoid format conversion */ + + if(0 != ps_dec->s_disp_op.u4_error_code) + { + ps_dec->u4_fmt_conv_cur_row = ps_dec->s_disp_frame_info.u4_y_ht; + ps_dec->as_fmt_conv_part[0].u4_flag = 0; + ps_dec->as_fmt_conv_part[1].u4_flag = 0; + } + else + ps_dec->u4_output_present = 1; + } + if(ps_dec->u1_separate_parse == 1) + { + if(ps_dec->u4_dec_thread_created == 0) + { + ithread_create(ps_dec->pv_dec_thread_handle, NULL, + (void *)ih264d_decode_picture_thread, + (void *)ps_dec); + + ps_dec->u4_dec_thread_created = 1; + } + + if((ps_dec->u4_num_cores == 3) && (ps_dec->u4_app_disable_deblk_frm == 0) + && (ps_dec->u4_bs_deblk_thread_created == 0)) + { + ps_dec->u4_start_bs_deblk = 0; + ithread_create(ps_dec->pv_bs_deblk_thread_handle, NULL, + (void *)ih264d_computebs_deblk_thread, + (void *)ps_dec); + ps_dec->u4_bs_deblk_thread_created = 1; + } + } + + } + + /* INITIALIZATION of fn ptrs for MC and formMbPartInfo functions */ + { + UWORD8 uc_nofield_nombaff = 1; // = ((ps_dec->ps_sps->u1_profile_idc == 0x42) || (u1_slice_type == I_SLICE)); + + + + uc_nofield_nombaff = ((ps_dec->ps_cur_slice->u1_field_pic_flag == 0) + && (ps_dec->ps_cur_slice->u1_mbaff_frame_flag == 0) + && (u1_slice_type != B_SLICE) + && (ps_dec->ps_cur_pps->u1_wted_pred_flag == 0)); + + /* Initialise MC and formMbPartInfo fn ptrs one time based on profile_idc */ + + if(uc_nofield_nombaff) + { + ps_dec->p_form_mb_part_info = ih264d_form_mb_part_info_bp; + ps_dec->p_motion_compensate = ih264d_motion_compensate_bp; + } + else + { + ps_dec->p_form_mb_part_info = ih264d_form_mb_part_info_mp; + ps_dec->p_motion_compensate = ih264d_motion_compensate_mp; + } + + + } + + /* + * Decide whether to decode the current picture or not + */ + { + dec_err_status_t * ps_err = ps_dec->ps_dec_err_status; + if(ps_err->u4_frm_sei_sync == u2_frame_num) + { + ps_err->u1_err_flag = ACCEPT_ALL_PICS; + ps_err->u4_frm_sei_sync = SYNC_FRM_DEFAULT; + } + ps_err->u4_cur_frm = u2_frame_num; + } + + /* Decision for decoding if the picture is to be skipped */ + { + WORD32 i4_skip_b_pic, i4_skip_p_pic; + + i4_skip_b_pic = (ps_dec->u4_skip_frm_mask & B_SLC_BIT) + && (B_SLICE == u1_slice_type) && (0 == u1_nal_ref_idc); + + i4_skip_p_pic = (ps_dec->u4_skip_frm_mask & P_SLC_BIT) + && (P_SLICE == u1_slice_type) && (0 == u1_nal_ref_idc); + + /**************************************************************/ + /* Skip the B picture if skip mask is set for B picture and */ + /* Current B picture is a non reference B picture or there is */ + /* no user for reference B picture */ + /**************************************************************/ + if(i4_skip_b_pic) + { + ps_dec->ps_cur_pic->u4_pack_slc_typ |= B_SLC_BIT; + /* Don't decode the picture in SKIP-B mode if that picture is B */ + /* and also it is not to be used as a reference picture */ + ps_dec->u1_last_pic_not_decoded = 1; + + return OK; + } + /**************************************************************/ + /* Skip the P picture if skip mask is set for P picture and */ + /* Current P picture is a non reference P picture or there is */ + /* no user for reference P picture */ + /**************************************************************/ + if(i4_skip_p_pic) + { + ps_dec->ps_cur_pic->u4_pack_slc_typ |= P_SLC_BIT; + /* Don't decode the picture in SKIP-P mode if that picture is P */ + /* and also it is not to be used as a reference picture */ + ps_dec->u1_last_pic_not_decoded = 1; + + return OK; + } + } + + { + UWORD16 u2_mb_x, u2_mb_y; + + ps_dec->i4_submb_ofst = ((u2_first_mb_in_slice + << ps_cur_slice->u1_mbaff_frame_flag) * SUB_BLK_SIZE) + - SUB_BLK_SIZE; + if(u2_first_mb_in_slice) + { + UWORD8 u1_mb_aff; + UWORD8 u1_field_pic; + UWORD16 u2_frm_wd_in_mbs; + u2_frm_wd_in_mbs = ps_seq->u2_frm_wd_in_mbs; + u1_mb_aff = ps_cur_slice->u1_mbaff_frame_flag; + u1_field_pic = ps_cur_slice->u1_field_pic_flag; + + { + UWORD32 x_offset; + UWORD32 y_offset; + UWORD32 u4_frame_stride; + tfr_ctxt_t *ps_trns_addr; // = &ps_dec->s_tran_addrecon_parse; + + if(ps_dec->u1_separate_parse) + { + ps_trns_addr = &ps_dec->s_tran_addrecon_parse; + } + else + { + ps_trns_addr = &ps_dec->s_tran_addrecon; + } + u2_mb_x = MOD(u2_first_mb_in_slice, u2_frm_wd_in_mbs); + u2_mb_y = DIV(u2_first_mb_in_slice, u2_frm_wd_in_mbs); + + u2_mb_y <<= u1_mb_aff; + + if((u2_mb_x > u2_frm_wd_in_mbs - 1) + || (u2_mb_y > ps_dec->u2_frm_ht_in_mbs - 1)) + { + return ERROR_CORRUPTED_SLICE; + } + + u4_frame_stride = ps_dec->u2_frm_wd_y << u1_field_pic; + x_offset = u2_mb_x << 4; + y_offset = (u2_mb_y * u4_frame_stride) << 4; + + ps_trns_addr->pu1_dest_y = ps_dec->s_cur_pic.pu1_buf1 + x_offset + + y_offset; + + u4_frame_stride = ps_dec->u2_frm_wd_uv << u1_field_pic; + x_offset >>= 1; + y_offset = (u2_mb_y * u4_frame_stride) << 3; + + x_offset *= YUV420SP_FACTOR; + + ps_trns_addr->pu1_dest_u = ps_dec->s_cur_pic.pu1_buf2 + x_offset + + y_offset; + ps_trns_addr->pu1_dest_v = ps_dec->s_cur_pic.pu1_buf3 + x_offset + + y_offset; + + ps_trns_addr->pu1_mb_y = ps_trns_addr->pu1_dest_y; + ps_trns_addr->pu1_mb_u = ps_trns_addr->pu1_dest_u; + ps_trns_addr->pu1_mb_v = ps_trns_addr->pu1_dest_v; + + if(ps_dec->u4_mb_level_deblk == 1) + { + /*If it is not the first mb in row,the previous MB which needs to be deblocked + * as there is delay of 1 MB*/ + if(u2_mb_x != 0) + { + ps_trns_addr->pu1_mb_y -= MB_SIZE; + ps_trns_addr->pu1_mb_u -= BLK8x8SIZE * YUV420SP_FACTOR; + ps_trns_addr->pu1_mb_v -= BLK8x8SIZE; + } + } + + // assign the deblock structure pointers to start of slice + if(ps_dec->u1_separate_parse == 1) + { + ps_dec->ps_deblk_mbn = ps_dec->ps_deblk_pic + + (u2_first_mb_in_slice << u1_mb_aff); + } + else + { + if(ps_dec->u4_mb_level_deblk == 0) + ps_dec->ps_deblk_mbn = ps_dec->ps_deblk_pic + + (u2_first_mb_in_slice << u1_mb_aff); + } + + ps_dec->u2_cur_mb_addr = (u2_first_mb_in_slice << u1_mb_aff); + + ps_dec->ps_mv_cur = ps_dec->s_cur_pic.ps_mv + + ((u2_first_mb_in_slice << u1_mb_aff) << 4); + } + } + else + { + tfr_ctxt_t *ps_trns_addr; + + if(ps_dec->u1_separate_parse) + { + ps_trns_addr = &ps_dec->s_tran_addrecon_parse; + } + else + { + ps_trns_addr = &ps_dec->s_tran_addrecon; + } + + u2_mb_x = 0xffff; + u2_mb_y = 0; + // assign the deblock structure pointers to start of slice + ps_dec->u2_cur_mb_addr = 0; + ps_dec->ps_deblk_mbn = ps_dec->ps_deblk_pic; + if(ps_dec->u4_mb_level_deblk == 1) + { + ps_dec->ps_deblk_mbn_curr = ps_dec->ps_deblk_mbn; + ps_dec->ps_deblk_mbn_prev = ps_dec->ps_deblk_mbn + + ps_dec->u1_recon_mb_grp; + } + ps_dec->ps_mv_cur = ps_dec->s_cur_pic.ps_mv; + ps_trns_addr->pu1_dest_y = ps_dec->s_cur_pic.pu1_buf1; + ps_trns_addr->pu1_dest_u = ps_dec->s_cur_pic.pu1_buf2; + ps_trns_addr->pu1_dest_v = ps_dec->s_cur_pic.pu1_buf3; + + ps_trns_addr->pu1_mb_y = ps_trns_addr->pu1_dest_y; + ps_trns_addr->pu1_mb_u = ps_trns_addr->pu1_dest_u; + ps_trns_addr->pu1_mb_v = ps_trns_addr->pu1_dest_v; + + } + + ps_dec->ps_part = ps_dec->ps_parse_part_params; + + ps_dec->u2_mbx = + (MOD(u2_first_mb_in_slice - 1, ps_seq->u2_frm_wd_in_mbs)); + ps_dec->u2_mby = + (DIV(u2_first_mb_in_slice - 1, ps_seq->u2_frm_wd_in_mbs)); + ps_dec->u2_mby <<= ps_cur_slice->u1_mbaff_frame_flag; + ps_dec->i2_prev_slice_mbx = ps_dec->u2_mbx; + ps_dec->i2_prev_slice_mby = ps_dec->u2_mby; + } + + /* RBSP stop bit is used for CABAC decoding*/ + ps_bitstrm->u4_max_ofst += ps_dec->ps_cur_pps->u1_entropy_coding_mode; + + ps_dec->u1_B = (u1_slice_type == B_SLICE); + ps_dec->u4_next_mb_skip = 0; + + ps_dec->ps_parse_cur_slice->u4_num_mbs_done_in_slice = 0; + ps_dec->ps_parse_cur_slice->u4_first_mb_in_slice = + ps_dec->ps_cur_slice->u2_first_mb_in_slice; + ps_dec->ps_parse_cur_slice->slice_type = + ps_dec->ps_cur_slice->u1_slice_type; + ps_dec->ps_parse_cur_slice->end_of_slice = 0; + ps_dec->ps_parse_cur_slice->last_slice_in_frame = 0; + + + /*set to zero to indicate a valid slice has been decoded*/ + ps_dec->u4_first_slice_in_pic = 0; + + ps_dec->u4_start_frame_decode = 1; + + + ps_dec->u4_start_bs_deblk = 1; + + ps_dec->ps_parse_cur_slice->u2_error_flag = 0; + { + WORD32 num_entries; + WORD32 size; + UWORD8 *pu1_buf; + + num_entries = MIN(MAX_FRAMES, ps_dec->u4_num_ref_frames_at_init); + num_entries = 2 * ((2 * num_entries) + 1); + + size = num_entries * sizeof(void *); + size += PAD_MAP_IDX_POC * sizeof(void *); + + pu1_buf = (UWORD8 *)ps_dec->pv_map_ref_idx_to_poc_buf; + pu1_buf += size * ps_dec->u2_cur_slice_num; + ps_dec->ps_parse_cur_slice->ppv_map_ref_idx_to_poc = (volatile void **)pu1_buf; + } + + if(u1_slice_type == I_SLICE) + { + ps_dec->ps_cur_pic->u4_pack_slc_typ |= I_SLC_BIT; + + ret = ih264d_parse_islice(ps_dec, u2_first_mb_in_slice); + if(ret != OK) + return ret; + + if(ps_dec->i4_pic_type != B_SLICE && ps_dec->i4_pic_type != P_SLICE) + ps_dec->i4_pic_type = I_SLICE; + + } + else if(u1_slice_type == P_SLICE) + { + ps_dec->ps_cur_pic->u4_pack_slc_typ |= P_SLC_BIT; + ret = ih264d_parse_pslice(ps_dec, u2_first_mb_in_slice); + if(ret != OK) + return ret; + ps_dec->u1_pr_sl_type = u1_slice_type; + if(ps_dec->i4_pic_type != B_SLICE) + ps_dec->i4_pic_type = P_SLICE; + } + else if(u1_slice_type == B_SLICE) + { + ps_dec->ps_cur_pic->u4_pack_slc_typ |= B_SLC_BIT; + ret = ih264d_parse_bslice(ps_dec, u2_first_mb_in_slice); + if(ret != OK) + return ret; + ps_dec->u1_pr_sl_type = u1_slice_type; + ps_dec->i4_pic_type = B_SLICE; + } + else + return ERROR_INV_SLC_TYPE_T; + + ps_dec->ps_parse_cur_slice->end_of_slice = 1; + + ps_dec->u2_cur_slice_num++; + /* storing last Mb X and MbY of the slice */ + ps_dec->i2_prev_slice_mbx = ps_dec->u2_mbx; + ps_dec->i2_prev_slice_mby = ps_dec->u2_mby; + /* End of Picture detection */ + + if(ps_dec->u2_total_mbs_coded >= (ps_seq->u2_max_mb_addr + 1)) + { + ps_dec->u1_pic_decode_done = 1; + + } + + { + dec_err_status_t * ps_err = ps_dec->ps_dec_err_status; + if((ps_err->u1_err_flag & REJECT_PB_PICS) + && (ps_err->u1_cur_pic_type == PIC_TYPE_I)) + { + ps_err->u1_err_flag = ACCEPT_ALL_PICS; + } + } + + PRINT_BIN_BIT_RATIO(ps_dec) + + return OK; +} + diff --git a/decoder/ih264d_parse_slice.h b/decoder/ih264d_parse_slice.h new file mode 100755 index 0000000..cf5f9ce --- /dev/null +++ b/decoder/ih264d_parse_slice.h @@ -0,0 +1,47 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +#ifndef _IH264D_PARSE_SLICE_H_ +#define _IH264D_PARSE_SLICE_H_ +/*! + ************************************************************************** + * \file ih264d_parse_slice.h + * + * \brief + * Contains routines that decodes a slice NAL unit + * + * \date + * 19/12/2002 + * + * \author AI + ************************************************************************** + */ +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264d_structs.h" +#include "ih264d_error_handler.h" +WORD32 ih264d_parse_decode_slice(UWORD8 u1_is_idr_slice, + UWORD8 u1_nal_ref_idc, + dec_struct_t * ps_dec ); + +WORD32 ih264d_ref_idx_reordering(dec_struct_t * ps_dec, UWORD8 u1_isB); +WORD32 ih264d_read_mmco_commands(dec_struct_t * ps_dec); +void ih264d_form_pred_weight_matrix(dec_struct_t *ps_dec); +#endif /* _IH264D_PARSE_SLICE_H_ */ diff --git a/decoder/ih264d_process_bslice.c b/decoder/ih264d_process_bslice.c new file mode 100755 index 0000000..69199cf --- /dev/null +++ b/decoder/ih264d_process_bslice.c @@ -0,0 +1,2345 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*! + ************************************************************************** + * \file ih264d_process_bslice.c + * + * \brief + * Contains routines that decode B slice type + * + * Detailed_description + * + * \date + * 21/12/2002 + * + * \author NS + ************************************************************************** + */ +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" + +#include <string.h> +#include "ih264d_structs.h" +#include "ih264d_bitstrm.h" +#include "ih264d_parse_cavlc.h" +#include "ih264d_mb_utils.h" +#include "ih264d_mvpred.h" +#include "ih264d_inter_pred.h" +#include "ih264d_process_pslice.h" +#include "ih264d_error_handler.h" +#include "ih264d_tables.h" +#include "ih264d_parse_slice.h" +#include "ih264d_process_pslice.h" +#include "ih264d_process_bslice.h" +#include "ih264d_tables.h" +#include "ih264d_parse_islice.h" +#include "ih264d_mvpred.h" + +void ih264d_init_cabac_contexts(UWORD8 u1_slice_type, dec_struct_t * ps_dec); +//UWORD32 g_hits = 0; +//UWORD32 g_miss = 0; +/*! + ************************************************************************** + * \if Function name : ih264d_decode_spatial_direct \endif + * + * \brief + * Decodes spatial direct mode. + * + * \return + * None. + * Arunoday T + ************************************************************************** + */ +WORD32 ih264d_decode_spatial_direct(dec_struct_t * ps_dec, + UWORD8 u1_wd_x, + dec_mb_info_t * ps_cur_mb_info, + UWORD8 u1_mb_num) +{ + mv_pred_t s_mv_pred, *ps_mv; + UWORD8 u1_col_zero_flag, u1_sub_mb_num, u1_direct_zero_pred_flag = 0; + UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag; + mv_pred_t *ps_mv_ntop_start; + mv_pred_t *ps_mv_nmb_start = ps_dec->ps_mv_cur + (u1_mb_num << 4); + UWORD8 partition_size, sub_partition, u1_mb_partw, u1_mb_parth; + UWORD8 i; + WORD8 i1_pred, i1_ref_frame0, i1_ref_frame1; + struct pic_buffer_t *ps_ref_frame = NULL, *ps_col_pic, *ps_pic_buff0 = NULL, + *ps_pic_buff1 = NULL; + + UWORD8 u1_zero_pred_cond_f, u1_zero_pred_cond_b; + WORD16 i2_def_mv[2], i2_spat_pred_mv[4], *pi2_final_mv0, *pi2_final_mv1; + UWORD16 ui2_mask_fwd = 0, ui2_mask_bwd = 0, u2_mask = 0; + UWORD32 *pui32_weight_ofsts = NULL; + directmv_t s_mvdirect; + UWORD8 u1_colz; + UWORD8 u1_final_ref_idx = 0; + const UWORD8 *pu1_mb_parth = (const UWORD8 *)gau1_ih264d_mb_parth; + const UWORD8 *pu1_mb_partw = (const UWORD8 *)gau1_ih264d_mb_partw; + const UWORD16 sub_mask_table[] = + { 0x33, 0x3, 0x11, 0x1 }; + const UWORD16 mask_table[] = + { 0xffff, /*16x16 NA */ + 0xff, /* 16x8*/ + 0x3333, /* 8x16*/ + 0x33 };/* 8x8*/ + mv_pred_t s_temp_mv_pred; + WORD32 ret = 0; + + /* CHANGED CODE */ + ps_mv_ntop_start = ps_dec->ps_mv_cur + (u1_mb_num << 4) + - (ps_dec->u2_frm_wd_in_mbs << (4 + u1_mbaff)) + 12; + + /* assign default values for MotionVector as zero */ + i2_def_mv[0] = 0; + i2_def_mv[1] = 0; + + u1_direct_zero_pred_flag = ps_dec->pf_mvpred(ps_dec, ps_cur_mb_info, ps_mv_nmb_start, + ps_mv_ntop_start, &s_mv_pred, 0, 4, + 0, 1, B_DIRECT_SPATIAL); + + i2_spat_pred_mv[0] = s_mv_pred.i2_mv[0]; + i2_spat_pred_mv[1] = s_mv_pred.i2_mv[1]; + i2_spat_pred_mv[2] = s_mv_pred.i2_mv[2]; + i2_spat_pred_mv[3] = s_mv_pred.i2_mv[3]; + + i1_ref_frame0 = s_mv_pred.i1_ref_frame[0]; + i1_ref_frame1 = s_mv_pred.i1_ref_frame[1]; + + i1_ref_frame0 = (i1_ref_frame0 < 0) ? -1 : i1_ref_frame0; + i1_ref_frame1 = (i1_ref_frame1 < 0) ? -1 : i1_ref_frame1; + + i1_pred = 0; + + { + WORD8 u1_ref_idx, u1_ref_idx1; + UWORD32 uc_Idx, uc_Idx1; + UWORD8 u1_scale_ref = (ps_dec->ps_cur_slice->u1_mbaff_frame_flag + && ps_cur_mb_info->u1_mb_field_decodingflag); + u1_final_ref_idx = i1_ref_frame0; + if(i1_ref_frame0 >= 0) + { + /* convert RefIdx if it is MbAff */ + u1_ref_idx = i1_ref_frame0; + u1_ref_idx1 = i1_ref_frame0; + if(u1_scale_ref) + { + u1_ref_idx1 = u1_ref_idx >> 1; + if((u1_ref_idx & 0x01) != (1 - ps_cur_mb_info->u1_topmb)) + u1_ref_idx1 += MAX_REF_BUFS; + } + /* If i1_ref_frame0 < 0 then refIdxCol is obtained from ps_pic_buff1 */ + ps_pic_buff0 = ps_dec->ps_ref_pic_buf_lx[0][u1_ref_idx1]; + ps_ref_frame = ps_pic_buff0; + i1_pred = PRED_L0; + } + + if(i1_ref_frame1 >= 0) + { + /* convert RefIdx if it is MbAff */ + u1_ref_idx = i1_ref_frame1; + u1_ref_idx1 = i1_ref_frame1; + if(u1_scale_ref) + { + u1_ref_idx1 = u1_ref_idx >> 1; + if((u1_ref_idx & 0x01) != (1 - ps_cur_mb_info->u1_topmb)) + u1_ref_idx1 += MAX_REF_BUFS; + } + ps_pic_buff1 = ps_dec->ps_ref_pic_buf_lx[1][u1_ref_idx1]; + i1_pred = i1_pred | PRED_L1; + } + if(i1_ref_frame0 < 0) + { + ps_ref_frame = ps_pic_buff1; + u1_final_ref_idx = i1_ref_frame1; + } + + u1_zero_pred_cond_f = (u1_direct_zero_pred_flag) || (i1_ref_frame0 < 0); + u1_zero_pred_cond_b = (u1_direct_zero_pred_flag) || (i1_ref_frame1 < 0); + + if(ps_dec->ps_cur_pps->u1_wted_bipred_idc) + { + uc_Idx = ((i1_ref_frame0 < 1) ? 0 : i1_ref_frame0) + * ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1]; + if(u1_scale_ref) + uc_Idx >>= 1; + uc_Idx1 = (i1_ref_frame1 < 0) ? 0 : i1_ref_frame1; + uc_Idx += (u1_scale_ref) ? (uc_Idx1 >> 1) : uc_Idx1; + pui32_weight_ofsts = + (UWORD32*)&ps_dec->pu4_wt_ofsts[2 * X3(uc_Idx)]; + + if(i1_ref_frame0 < 0) + pui32_weight_ofsts += 1; + + if(u1_scale_ref && (ps_dec->ps_cur_pps->u1_wted_bipred_idc == 2)) + { + WORD16 i2_ref_idx; + i2_ref_idx = MAX(i1_ref_frame0, 0); + i2_ref_idx *= (ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1] + << 1); + i2_ref_idx += MAX(i1_ref_frame1, 0); + if(!ps_cur_mb_info->u1_topmb) + i2_ref_idx += + (ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[0] + << 1) + * (ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1] + << 1); + pui32_weight_ofsts = (UWORD32*)&ps_dec->pu4_mbaff_wt_mat[2 + * X3(i2_ref_idx)]; + } + } + } + + s_temp_mv_pred.i1_ref_frame[0] = i1_ref_frame0; + s_temp_mv_pred.i1_ref_frame[1] = i1_ref_frame1; + s_temp_mv_pred.u1_col_ref_pic_idx = ps_ref_frame->u1_mv_buf_id; + s_temp_mv_pred.u1_pic_type = ps_ref_frame->u1_pic_type; + + /**********************************************************************/ + /* Call the function which gets the number of partitions and */ + /* partition info of colocated Mb */ + /**********************************************************************/ + + ps_dec->pf_parse_mvdirect(ps_dec, ps_dec->ps_col_pic, &s_mvdirect, u1_wd_x, + ps_dec->i4_submb_ofst, ps_cur_mb_info); + ps_col_pic = ps_dec->ps_col_pic; + if((s_mvdirect.u1_col_zeroflag_change == 0) || u1_direct_zero_pred_flag) + { + WORD16 i2_mv_x, i2_mv_y, i2_mvX1, i2_mvY1; + /* Most probable case */ + u1_col_zero_flag = *(ps_col_pic->pu1_col_zero_flag + + s_mvdirect.i4_mv_indices[0]); + u1_col_zero_flag = u1_col_zero_flag & 0x01; + + if(u1_zero_pred_cond_f || ((i1_ref_frame0 == 0) && (u1_col_zero_flag == 1))) + { + i2_mv_x = 0; + i2_mv_y = 0; + } + else + { + i2_mv_x = i2_spat_pred_mv[0]; + i2_mv_y = i2_spat_pred_mv[1]; + + } + + if(u1_zero_pred_cond_b || ((i1_ref_frame1 == 0) && (u1_col_zero_flag == 1))) + { + i2_mvX1 = 0; + i2_mvY1 = 0; + } + else + { + i2_mvX1 = i2_spat_pred_mv[2]; + i2_mvY1 = i2_spat_pred_mv[3]; + } + + u1_sub_mb_num = ps_dec->u1_sub_mb_num; + u1_mb_partw = (u1_wd_x >> 2); + + + if(i1_ref_frame0 >= 0) + { + { + pred_info_pkd_t *ps_pred_pkd; + WORD16 i2_mv[2]; + WORD8 i1_ref_idx= 0; + + i2_mv[0] = i2_mv_x; + i2_mv[1] = i2_mv_y; + + ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx; + ih264d_fill_pred_info(i2_mv,u1_mb_partw,u1_mb_partw,u1_sub_mb_num,i1_pred, + ps_pred_pkd,ps_pic_buff0->u1_pic_buf_id,i1_ref_idx,pui32_weight_ofsts, + ps_pic_buff0->u1_pic_type); + ps_dec->u4_pred_info_pkd_idx++; + ps_cur_mb_info->u1_num_pred_parts++; + + + } + + } + + if(i1_ref_frame1 >= 0) + { + { + pred_info_pkd_t *ps_pred_pkd; + WORD16 i2_mv[2]; + WORD8 i1_ref_idx= 0; + + i2_mv[0] = i2_mvX1; + i2_mv[1] = i2_mvY1; + + ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx; + ih264d_fill_pred_info(i2_mv,u1_mb_partw,u1_mb_partw,u1_sub_mb_num,i1_pred, + ps_pred_pkd,ps_pic_buff1->u1_pic_buf_id,i1_ref_idx,pui32_weight_ofsts, + ps_pic_buff1->u1_pic_type); + ps_dec->u4_pred_info_pkd_idx++; + ps_cur_mb_info->u1_num_pred_parts++; + + + } + } + + + /* Replication optimisation */ + s_temp_mv_pred.i2_mv[0] = i2_mv_x; + s_temp_mv_pred.i2_mv[1] = i2_mv_y; + s_temp_mv_pred.i2_mv[2] = i2_mvX1; + s_temp_mv_pred.i2_mv[3] = i2_mvY1; + + /* Calculating colocated zero information */ + { + /*************************************/ + /* If(bit2 and bit3 set) */ + /* then */ + /* (bit0 and bit1) => submmbmode */ + /* (bit2 and bit3) => mbmode */ + /* else */ + /* (bit0 and bit1) => mbmode */ + /*************************************/ + /*UWORD8 u1_packed_mb_sub_mb_mode = sub_partition ? + (s_mvdirect.i1_partitionsize[0]) : ((s_mvdirect.i1_partitionsize[0]) << 2);*/ + UWORD8 u1_packed_mb_sub_mb_mode = (u1_mb_partw == 2) ? 0x03 : 0; + + if(i1_ref_frame0 < 0) + { + i2_mv_x = i2_mvX1; + i2_mv_y = i2_mvY1; + } + + /* Change from left shift 4 to 6 - Varun */ + u1_colz = (ps_cur_mb_info->u1_mb_field_decodingflag << 1) + | ((u1_final_ref_idx == 0) && (ABS(i2_mv_x) <= 1) + && (ABS(i2_mv_y) <= 1)); + u1_colz |= (u1_packed_mb_sub_mb_mode << 6); + } + ps_mv = ps_mv_nmb_start + u1_sub_mb_num; + ih264d_rep_mv_colz(ps_dec, &s_temp_mv_pred, ps_mv, u1_sub_mb_num, u1_colz, + u1_mb_partw, u1_mb_partw); + if(u1_wd_x == MB_SIZE) + ps_dec->u1_currB_type = 0; + + + + return OK; + } + /***************************************************************************/ + /* If present MB is 16x16 and the partition of colocated Mb is >= PRED_8x8 */ + /* i.e 8x8 or less than 8x8 partitions then set up DMA for (0,0) and */ + /* spatially predicted motion vector and do the multiplexing after */ + /* motion compensation */ + /***************************************************************************/ + + + if((u1_wd_x == MB_SIZE) && (s_mvdirect.i1_num_partitions > 2)) + { + ps_cur_mb_info->u1_Mux = 1; + if(i1_ref_frame0 >= 0) + { + + { + pred_info_pkd_t *ps_pred_pkd; + WORD8 i1_ref_idx= 0; + + ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx; + ih264d_fill_pred_info(&(i2_spat_pred_mv[0]),4,4,0,i1_pred, + ps_pred_pkd,ps_pic_buff0->u1_pic_buf_id,i1_ref_idx,pui32_weight_ofsts, + ps_pic_buff0->u1_pic_type); + ps_dec->u4_pred_info_pkd_idx++; + ps_cur_mb_info->u1_num_pred_parts++; + + + } + + /****** (0,0) Motion vectors DMA *****/ + { + pred_info_pkd_t *ps_pred_pkd; + WORD16 i2_mv[2]; + WORD8 i1_ref_idx= 0; + + i2_mv[0] = 0; + i2_mv[1] = 0; + + ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx; + ih264d_fill_pred_info(i2_mv,4,4,0,i1_pred, + ps_pred_pkd,ps_pic_buff0->u1_pic_buf_id,i1_ref_idx,pui32_weight_ofsts, + ps_pic_buff0->u1_pic_type); + ps_dec->u4_pred_info_pkd_idx++; + ps_cur_mb_info->u1_num_pred_parts++; + + + } + } + if(i1_ref_frame1 >= 0) + { + { + pred_info_pkd_t *ps_pred_pkd; + WORD16 i2_mv[2]; + WORD8 i1_ref_idx= 0; + + ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx; + ih264d_fill_pred_info(&(i2_spat_pred_mv[2]),4,4,0,i1_pred, + ps_pred_pkd,ps_pic_buff1->u1_pic_buf_id,i1_ref_idx,pui32_weight_ofsts, + ps_pic_buff1->u1_pic_type); + ps_dec->u4_pred_info_pkd_idx++; + ps_cur_mb_info->u1_num_pred_parts++; + + + } + + /****** (0,0) Motion vectors DMA *****/ + + { + pred_info_pkd_t *ps_pred_pkd; + WORD16 i2_mv[2]; + WORD8 i1_ref_idx= 0; + + i2_mv[0] = 0; + i2_mv[1] = 0; + + ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx; + ih264d_fill_pred_info(i2_mv,4,4,0,i1_pred, + ps_pred_pkd,ps_pic_buff1->u1_pic_buf_id,i1_ref_idx,pui32_weight_ofsts, + ps_pic_buff1->u1_pic_type); + ps_dec->u4_pred_info_pkd_idx++; + ps_cur_mb_info->u1_num_pred_parts++; + + + } + } + } + + /*u1_col = *(ps_col_pic->pu1_col_zero_flag + s_mvdirect.i4_mv_indices[0]); + u1_col &= 1; + u1_init = 0;*/ + + for(i = 0; i < s_mvdirect.i1_num_partitions; i++) + { + partition_size = s_mvdirect.i1_partitionsize[i]; + u1_sub_mb_num = s_mvdirect.i1_submb_num[i]; + + sub_partition = partition_size >> 2; + partition_size &= 0x3; + u1_mb_partw = pu1_mb_partw[partition_size]; + u1_mb_parth = pu1_mb_parth[partition_size]; + u2_mask = mask_table[partition_size]; + if(sub_partition != 0) + { + u1_mb_partw >>= 1; + u1_mb_parth >>= 1; + u2_mask = sub_mask_table[partition_size]; + } + + u1_col_zero_flag = *(ps_col_pic->pu1_col_zero_flag + + s_mvdirect.i4_mv_indices[i]); + u1_col_zero_flag = u1_col_zero_flag & 0x01; + + /*if(u1_col != u1_col_zero_flag) + u1_init = 1;*/ + + if(u1_zero_pred_cond_f || ((i1_ref_frame0 == 0) && (u1_col_zero_flag == 1))) + { + pi2_final_mv0 = &i2_def_mv[0]; + ui2_mask_fwd |= (u2_mask << u1_sub_mb_num); + } + else + pi2_final_mv0 = &i2_spat_pred_mv[0]; + + if(u1_zero_pred_cond_b || ((i1_ref_frame1 == 0) && (u1_col_zero_flag == 1))) + { + pi2_final_mv1 = &i2_def_mv[0]; + ui2_mask_bwd |= (u2_mask << u1_sub_mb_num); + } + else + pi2_final_mv1 = &i2_spat_pred_mv[2]; + + if(ps_cur_mb_info->u1_Mux != 1) + { + /*u1_sub_mb_x = u1_sub_mb_num & 0x03; + uc_sub_mb_y = (u1_sub_mb_num >> 2);*/ + if(i1_ref_frame0 >= 0) + { + + { + pred_info_pkd_t *ps_pred_pkd; + WORD8 i1_ref_idx= 0; + + ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx; + ih264d_fill_pred_info(pi2_final_mv0,u1_mb_partw,u1_mb_parth,u1_sub_mb_num,i1_pred, + ps_pred_pkd,ps_pic_buff0->u1_pic_buf_id,i1_ref_idx,pui32_weight_ofsts, + ps_pic_buff0->u1_pic_type); + ps_dec->u4_pred_info_pkd_idx++; + ps_cur_mb_info->u1_num_pred_parts++; + + + } + + } + + if(i1_ref_frame1 >= 0) + { + { + pred_info_pkd_t *ps_pred_pkd; + WORD8 i1_ref_idx= 0; + + ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx; + ih264d_fill_pred_info(pi2_final_mv1,u1_mb_partw,u1_mb_parth,u1_sub_mb_num,i1_pred, + ps_pred_pkd,ps_pic_buff1->u1_pic_buf_id,i1_ref_idx,pui32_weight_ofsts, + ps_pic_buff1->u1_pic_type); + ps_dec->u4_pred_info_pkd_idx++; + ps_cur_mb_info->u1_num_pred_parts++; + + + } + } + } + + /* Replication optimisation */ + s_temp_mv_pred.i2_mv[0] = pi2_final_mv0[0]; + s_temp_mv_pred.i2_mv[1] = pi2_final_mv0[1]; + s_temp_mv_pred.i2_mv[2] = pi2_final_mv1[0]; + s_temp_mv_pred.i2_mv[3] = pi2_final_mv1[1]; + + /* Calculating colocated zero information */ + { + WORD16 i2_mv_x = 0, i2_mv_y = 0; + /*************************************/ + /* If(bit2 and bit3 set) */ + /* then */ + /* (bit0 and bit1) => submmbmode */ + /* (bit2 and bit3) => mbmode */ + /* else */ + /* (bit0 and bit1) => mbmode */ + /*************************************/ + UWORD8 u1_packed_mb_sub_mb_mode = + sub_partition ? (s_mvdirect.i1_partitionsize[i]) : ((s_mvdirect.i1_partitionsize[i]) + << 2); + + if(i1_ref_frame0 >= 0) + { + i2_mv_x = pi2_final_mv0[0]; + i2_mv_y = pi2_final_mv0[1]; + } + else + { + i2_mv_x = pi2_final_mv1[0]; + i2_mv_y = pi2_final_mv1[1]; + } + + u1_colz = (ps_cur_mb_info->u1_mb_field_decodingflag << 1) + | ((u1_final_ref_idx == 0) && (ABS(i2_mv_x) <= 1) + && (ABS(i2_mv_y) <= 1)); + u1_colz |= (u1_packed_mb_sub_mb_mode << 4); + } + ps_mv = ps_mv_nmb_start + u1_sub_mb_num; + ih264d_rep_mv_colz(ps_dec, &s_temp_mv_pred, ps_mv, u1_sub_mb_num, u1_colz, + u1_mb_parth, u1_mb_partw); + } + i = 0; + if(i1_ref_frame0 >= 0) + ps_cur_mb_info->u2_mask[i++] = ui2_mask_fwd; + if(i1_ref_frame1 >= 0) + ps_cur_mb_info->u2_mask[i] = ui2_mask_bwd; + + /*if(u1_init) + H264_DEC_DEBUG_PRINT("hit\n"); + else + H264_DEC_DEBUG_PRINT("miss\n");*/ + + return OK; +} + +/*! + ************************************************************************** + * \if Function name : ih264d_decode_temporal_direct \endif + * + * \brief + * Decodes temporal direct mode. + * + * \return + * None. + * + ************************************************************************** + */ +WORD32 ih264d_decode_temporal_direct(dec_struct_t * ps_dec, + UWORD8 u1_wd_x, + dec_mb_info_t * ps_cur_mb_info, + UWORD8 u1_mb_num) +{ + struct pic_buffer_t *ps_pic_buff0, *ps_pic_buff1, *ps_col_pic; + mv_pred_t *ps_mv, s_temp_mv_pred; + UWORD8 u1_sub_mb_num; + UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag; + WORD16 i2_mv_x0, i2_mv_y0, i2_mv_x1, i2_mv_y1; + UWORD8 u1_mb_partw, u1_mb_parth; + UWORD8 i, partition_size, sub_partition; + UWORD32 *pui32_weight_ofsts = NULL; + directmv_t s_mvdirect; + const UWORD8 *pu1_mb_parth = (const UWORD8 *)gau1_ih264d_mb_parth; + const UWORD8 *pu1_mb_partw = (const UWORD8 *)gau1_ih264d_mb_partw; + WORD8 c_refFrm0, c_refFrm1; + UWORD8 u1_ref_idx0, u1_is_cur_mb_fld; + UWORD32 pic0_poc, pic1_poc, cur_poc; + WORD32 ret = 0; + + u1_is_cur_mb_fld = ps_cur_mb_info->u1_mb_field_decodingflag; + ps_pic_buff1 = ps_dec->ps_ref_pic_buf_lx[1][0]; + + /**********************************************************************/ + /* Call the function which gets the number of partitions and */ + /* partition info of colocated Mb */ + /**********************************************************************/ + ps_dec->pf_parse_mvdirect(ps_dec, ps_dec->ps_col_pic, &s_mvdirect, u1_wd_x, + ps_dec->i4_submb_ofst, ps_cur_mb_info); + ps_col_pic = ps_dec->ps_col_pic; + + for(i = 0; i < s_mvdirect.i1_num_partitions; i++) + { + UWORD8 u1_colz; + partition_size = s_mvdirect.i1_partitionsize[i]; + u1_sub_mb_num = s_mvdirect.i1_submb_num[i]; + ps_mv = ps_col_pic->ps_mv + s_mvdirect.i4_mv_indices[i]; + + /* This should be removed to catch unitialized memory read */ + u1_ref_idx0 = 0; + + sub_partition = partition_size >> 2; + partition_size &= 0x3; + u1_mb_partw = pu1_mb_partw[partition_size]; + u1_mb_parth = pu1_mb_parth[partition_size]; + if(sub_partition != 0) + { + u1_mb_partw >>= 1; + u1_mb_parth >>= 1; + } + c_refFrm0 = ps_mv->i1_ref_frame[0]; + c_refFrm1 = ps_mv->i1_ref_frame[1]; + + if((c_refFrm0 == -1) && (c_refFrm1 == -1)) + { + u1_ref_idx0 = 0; + ps_pic_buff0 = ps_dec->ps_ref_pic_buf_lx[0][0]; + if(u1_mbaff && u1_is_cur_mb_fld) + { + if(ps_cur_mb_info->u1_topmb) + { + pic0_poc = ps_pic_buff0->i4_top_field_order_cnt; + pic1_poc = ps_pic_buff1->i4_top_field_order_cnt; + cur_poc = ps_dec->ps_cur_pic->i4_top_field_order_cnt; + } + else + { + pic1_poc = ps_pic_buff1->i4_bottom_field_order_cnt; + cur_poc = ps_dec->ps_cur_pic->i4_bottom_field_order_cnt; + ps_pic_buff1 = ps_dec->ps_ref_pic_buf_lx[1][MAX_REF_BUFS]; + pic0_poc = ps_pic_buff0->i4_bottom_field_order_cnt; + ps_pic_buff0 = ps_dec->ps_ref_pic_buf_lx[0][MAX_REF_BUFS]; + } + } + else + { + pic0_poc = ps_pic_buff0->i4_avg_poc; + pic1_poc = ps_pic_buff1->i4_avg_poc; + cur_poc = ps_dec->ps_cur_pic->i4_poc; + } + } + else + { + UWORD8 uc_i, u1_num_frw_ref_pics; + UWORD8 buf_id, u1_pic_type; + buf_id = ps_mv->u1_col_ref_pic_idx; + u1_pic_type = ps_mv->u1_pic_type; + if(ps_dec->ps_cur_slice->u1_field_pic_flag) + { + if(s_mvdirect.u1_vert_mv_scale == FRM_TO_FLD) + { + u1_pic_type = TOP_FLD; + if(ps_dec->ps_cur_slice->u1_bottom_field_flag) + u1_pic_type = BOT_FLD; + } + } + u1_num_frw_ref_pics = + ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[0]; + + for(uc_i = 0; uc_i < u1_num_frw_ref_pics; uc_i++) + { + if(ps_dec->ps_cur_slice->u1_field_pic_flag) + { + if(ps_dec->ps_ref_pic_buf_lx[0][uc_i]->u1_mv_buf_id == buf_id) + { + if(ps_dec->ps_ref_pic_buf_lx[0][uc_i]->u1_pic_type + == u1_pic_type) + { + u1_ref_idx0 = uc_i; + break; + } + } + } + else + { + if(ps_dec->ps_ref_pic_buf_lx[0][uc_i]->u1_mv_buf_id == buf_id) + { + u1_ref_idx0 = uc_i; + break; + } + } + } + + ps_pic_buff0 = ps_dec->ps_ref_pic_buf_lx[0][u1_ref_idx0]; + ps_pic_buff1 = ps_dec->ps_ref_pic_buf_lx[1][0]; + + if(u1_mbaff && u1_is_cur_mb_fld) + { + pic0_poc = ps_pic_buff0->i4_top_field_order_cnt; + u1_ref_idx0 <<= 1; + if(s_mvdirect.u1_vert_mv_scale == ONE_TO_ONE) + { + if(u1_pic_type == BOT_FLD) + { + pic0_poc = ps_pic_buff0->i4_bottom_field_order_cnt; + ps_pic_buff0 = ps_dec->ps_ref_pic_buf_lx[0][(u1_ref_idx0 + >> 1) + MAX_REF_BUFS]; + if(ps_cur_mb_info->u1_topmb) + u1_ref_idx0++; + } + else + { + if(1 - ps_cur_mb_info->u1_topmb) + u1_ref_idx0++; + } + } + if(s_mvdirect.u1_vert_mv_scale == FRM_TO_FLD) + { + if(1 - ps_cur_mb_info->u1_topmb) + { + pic0_poc = ps_pic_buff0->i4_bottom_field_order_cnt; + ps_pic_buff0 = ps_dec->ps_ref_pic_buf_lx[0][(u1_ref_idx0 + >> 1) + MAX_REF_BUFS]; + } + } + if(ps_cur_mb_info->u1_topmb) + { + pic1_poc = ps_pic_buff1->i4_top_field_order_cnt; + cur_poc = ps_dec->ps_cur_pic->i4_top_field_order_cnt; + } + else + { + pic1_poc = ps_pic_buff1->i4_bottom_field_order_cnt; + cur_poc = ps_dec->ps_cur_pic->i4_bottom_field_order_cnt; + ps_pic_buff1 = ps_dec->ps_ref_pic_buf_lx[1][MAX_REF_BUFS]; + } + } + else + { + pic0_poc = ps_pic_buff0->i4_avg_poc; + pic1_poc = ps_pic_buff1->i4_avg_poc; + cur_poc = ps_dec->ps_cur_pic->i4_poc; + } + } + { + WORD16 i16_td; + + if(c_refFrm0 >= 0) + { + i2_mv_x0 = ps_mv->i2_mv[0]; + i2_mv_y0 = ps_mv->i2_mv[1]; + } + else if(c_refFrm1 >= 0) + { + i2_mv_x0 = ps_mv->i2_mv[2]; + i2_mv_y0 = ps_mv->i2_mv[3]; + } + else + { + i2_mv_x0 = 0; + i2_mv_y0 = 0; + } + /* If FRM_TO_FLD or FLD_TO_FRM scale the "y" component of the colocated Mv*/ + if(s_mvdirect.u1_vert_mv_scale == FRM_TO_FLD) + { + i2_mv_y0 /= 2; + } + else if(s_mvdirect.u1_vert_mv_scale == FLD_TO_FRM) + { + i2_mv_y0 *= 2; + } + + i16_td = pic1_poc - pic0_poc; + if((ps_pic_buff0->u1_is_short == 0) || (i16_td == 0)) + { + i2_mv_x1 = 0; + i2_mv_y1 = 0; + } + else + { + WORD16 i16_tb, i16_tx, i2_dist_scale_factor, i16_temp; + + i16_td = CLIP3(-128, 127, i16_td); + i16_tb = cur_poc - pic0_poc; + i16_tb = CLIP3(-128, 127, i16_tb); + + i16_tx = (16384 + ABS(SIGN_POW2_DIV(i16_td, 1))) / i16_td; + i2_dist_scale_factor = CLIP3(-1024, 1023, + (((i16_tb * i16_tx) + 32) >> 6)); + i16_temp = (i2_mv_x0 * i2_dist_scale_factor + 128) >> 8; + i2_mv_x1 = i16_temp - i2_mv_x0; + i2_mv_x0 = i16_temp; + + i16_temp = (i2_mv_y0 * i2_dist_scale_factor + 128) >> 8; + i2_mv_y1 = i16_temp - i2_mv_y0; + i2_mv_y0 = i16_temp; + } + { + mv_pred_t *ps_mv; + + /*u1_sub_mb_x = u1_sub_mb_num & 0x03; + uc_sub_mb_y = u1_sub_mb_num >> 2;*/ + if(ps_dec->ps_cur_pps->u1_wted_bipred_idc) + { + UWORD8 u1_idx = + u1_ref_idx0 + * ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1]; + UWORD8 u1_scale_ref = u1_mbaff && u1_is_cur_mb_fld; + if(u1_scale_ref) + u1_idx >>= 1; + pui32_weight_ofsts = (UWORD32*)&ps_dec->pu4_wt_ofsts[2 + * X3(u1_idx)]; + if(u1_scale_ref + && (ps_dec->ps_cur_pps->u1_wted_bipred_idc + == 2)) + { + WORD16 i2_ref_idx; + i2_ref_idx = u1_ref_idx0; + i2_ref_idx *= + (ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1] + << 1); + if(!ps_cur_mb_info->u1_topmb) + i2_ref_idx += + (ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[0] + << 1) + * (ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1] + << 1); + pui32_weight_ofsts = + (UWORD32*)&ps_dec->pu4_mbaff_wt_mat[2 + * X3(i2_ref_idx)]; + } + } + { + pred_info_pkd_t *ps_pred_pkd; + WORD16 i2_mv[2]; + WORD8 i1_ref_idx= 0; + + i2_mv[0] = i2_mv_x0; + i2_mv[1] = i2_mv_y0; + + ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx; + ih264d_fill_pred_info(i2_mv,u1_mb_partw,u1_mb_parth,u1_sub_mb_num,PRED_L0 | PRED_L1, + ps_pred_pkd,ps_pic_buff0->u1_pic_buf_id,i1_ref_idx,pui32_weight_ofsts, + ps_pic_buff0->u1_pic_type); + ps_dec->u4_pred_info_pkd_idx++; + ps_cur_mb_info->u1_num_pred_parts++; + + + } + { + pred_info_pkd_t *ps_pred_pkd; + WORD16 i2_mv[2]; + WORD8 i1_ref_idx= 0; + + i2_mv[0] = i2_mv_x1; + i2_mv[1] = i2_mv_y1; + + ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx; + ih264d_fill_pred_info(i2_mv,u1_mb_partw,u1_mb_parth,u1_sub_mb_num,PRED_L0 | PRED_L1, + ps_pred_pkd,ps_pic_buff1->u1_pic_buf_id,i1_ref_idx,pui32_weight_ofsts, + ps_pic_buff1->u1_pic_type); + ps_dec->u4_pred_info_pkd_idx++; + ps_cur_mb_info->u1_num_pred_parts++; + + + } + + /* Replication optimisation */ + s_temp_mv_pred.i2_mv[0] = i2_mv_x0; + s_temp_mv_pred.i2_mv[1] = i2_mv_y0; + s_temp_mv_pred.i2_mv[2] = i2_mv_x1; + s_temp_mv_pred.i2_mv[3] = i2_mv_y1; + s_temp_mv_pred.i1_ref_frame[0] = u1_ref_idx0; + s_temp_mv_pred.i1_ref_frame[1] = 0; + s_temp_mv_pred.u1_col_ref_pic_idx = ps_pic_buff0->u1_mv_buf_id; + s_temp_mv_pred.u1_pic_type = ps_pic_buff0->u1_pic_type; + ps_mv = ps_dec->ps_mv_cur + (u1_mb_num << 4) + u1_sub_mb_num; + + { + WORD16 i2_mv_x = 0, i2_mv_y = 0; + UWORD8 u1_packed_mb_sub_mb_mode = + sub_partition ? (s_mvdirect.i1_partitionsize[i]) : ((s_mvdirect.i1_partitionsize[i]) + << 2); + + if(c_refFrm0 >= 0) + { + i2_mv_x = i2_mv_x0; + i2_mv_y = i2_mv_y0; + } + else + { + i2_mv_x = i2_mv_x1; + i2_mv_y = i2_mv_y1; + } + + u1_colz = + (ps_cur_mb_info->u1_mb_field_decodingflag << 1) + | ((u1_ref_idx0 == 0) + && (ABS(i2_mv_x) + <= 1) + && (ABS(i2_mv_y) + <= 1)); + u1_colz |= (u1_packed_mb_sub_mb_mode << 4); + } + ih264d_rep_mv_colz(ps_dec, &s_temp_mv_pred, ps_mv, u1_sub_mb_num, + u1_colz, u1_mb_parth, u1_mb_partw); + } + } + } + /* return value set to UWORD8 to make it homogeneous */ + /* with decodespatialdirect */ + return OK; +} + +void ih264d_convert_frm_to_fld_list(struct pic_buffer_t *ps_ref_pic_buf_lx, + UWORD8 *pu1_L0, + dec_struct_t *ps_dec, + UWORD8 u1_num_short_term_bufs) +{ + UWORD8 uc_count = *pu1_L0, i, uc_l1, uc_lx, j; + struct pic_buffer_t *ps_ref_lx[2], *ps_ref_pic_lx; + UWORD8 u1_bottom_field_flag; + dec_slice_params_t *ps_cur_slice; + UWORD8 u1_ref[2], u1_fld[2], u1_same_fld, u1_op_fld; + UWORD32 ui_half_num_of_sub_mbs; + + uc_l1 = 0; + uc_lx = 0; + ps_cur_slice = ps_dec->ps_cur_slice; + ps_ref_pic_lx = ps_ref_pic_buf_lx - MAX_REF_BUFS; + ps_ref_lx[0] = ps_ref_pic_buf_lx; + ps_ref_lx[1] = ps_ref_pic_buf_lx; + u1_bottom_field_flag = ps_cur_slice->u1_bottom_field_flag; + ui_half_num_of_sub_mbs = ((ps_dec->u2_pic_ht * ps_dec->u2_pic_wd) >> 5); + if(u1_bottom_field_flag) + { + u1_ref[0] = BOT_REF; + u1_ref[1] = TOP_REF; + u1_fld[0] = BOT_FLD; + u1_fld[1] = TOP_FLD; + u1_same_fld = BOT_FLD; + u1_op_fld = TOP_FLD; + } + else + { + u1_ref[0] = TOP_REF; + u1_ref[1] = BOT_REF; + u1_fld[0] = TOP_FLD; + u1_fld[1] = BOT_FLD; + u1_same_fld = TOP_FLD; + u1_op_fld = BOT_FLD; + } + + /* Create the field list starting with all the short term */ + /* frames followed by all the long term frames. No long term */ + /* reference field should have a list idx less than a short */ + /* term reference field during initiailization. */ + + for(j = 0; j < 2; j++) + { + i = ((j == 0) ? 0 : u1_num_short_term_bufs); + uc_count = ((j == 0) ? u1_num_short_term_bufs : *pu1_L0); + for(; i < uc_count; i++, ps_ref_lx[0]++) + { + /* Search field of same parity in Frame list */ + if((ps_ref_lx[0]->u1_pic_type & u1_ref[0])) // || ((ps_ref_lx[0]->u1_picturetype & 0x3) == 0)) + { + /* Insert PIC of same parity in RefPicList */ + ih264d_insert_pic_in_ref_pic_listx(ps_ref_pic_lx, ps_ref_lx[0]); + ps_ref_pic_lx->i4_pic_num = (ps_ref_pic_lx->i4_pic_num * 2 + 1); + ps_ref_pic_lx->u1_long_term_pic_num = + (ps_ref_pic_lx->u1_long_term_frm_idx * 2 + 1); + ps_ref_pic_lx->u1_pic_type = u1_same_fld; + if(u1_fld[0] & BOT_FLD) + { + ps_ref_pic_lx->u1_pic_type = BOT_FLD; + ps_ref_pic_lx->pu1_buf1 += ps_ref_pic_lx->u2_frm_wd_y; + ps_ref_pic_lx->pu1_buf2 += ps_ref_pic_lx->u2_frm_wd_uv; + ps_ref_pic_lx->pu1_buf3 += ps_ref_pic_lx->u2_frm_wd_uv; + if(ps_ref_pic_lx->u1_picturetype & 0x3) + { + ps_ref_pic_lx->pu1_col_zero_flag += ui_half_num_of_sub_mbs; + ps_ref_pic_lx->ps_mv += ui_half_num_of_sub_mbs; + } + ps_ref_pic_lx->i4_poc = + ps_ref_pic_lx->i4_bottom_field_order_cnt; + ps_ref_pic_lx->i4_avg_poc = + ps_ref_pic_lx->i4_bottom_field_order_cnt; + } + else + { + ps_ref_pic_lx->u1_pic_type = TOP_FLD; + ps_ref_pic_lx->i4_poc = ps_ref_pic_lx->i4_top_field_order_cnt; + ps_ref_pic_lx->i4_avg_poc = + ps_ref_pic_lx->i4_top_field_order_cnt; + } + + ps_ref_pic_lx++; + uc_lx++; + /* Find field of opposite parity */ + if(uc_l1 < uc_count && ps_ref_lx[1]) + { + while(!(ps_ref_lx[1]->u1_pic_type & u1_ref[1])) + { + ps_ref_lx[1]++; + uc_l1++; + if(uc_l1 >= uc_count) + ps_ref_lx[1] = 0; + if(!ps_ref_lx[1]) + break; + } + + if(ps_ref_lx[1]) + { + uc_l1++; + ih264d_insert_pic_in_ref_pic_listx(ps_ref_pic_lx, + ps_ref_lx[1]); + ps_ref_pic_lx->u1_pic_type = u1_op_fld; + ps_ref_pic_lx->i4_pic_num = (ps_ref_pic_lx->i4_pic_num * 2); + ps_ref_pic_lx->u1_long_term_pic_num = + (ps_ref_pic_lx->u1_long_term_frm_idx * 2); + if(u1_fld[1] & BOT_FLD) + { + ps_ref_pic_lx->u1_pic_type = BOT_FLD; + ps_ref_pic_lx->pu1_buf1 += ps_ref_pic_lx->u2_frm_wd_y; + ps_ref_pic_lx->pu1_buf2 += ps_ref_pic_lx->u2_frm_wd_uv; + ps_ref_pic_lx->pu1_buf3 += ps_ref_pic_lx->u2_frm_wd_uv; + if(ps_ref_pic_lx->u1_picturetype & 0x3) + { + ps_ref_pic_lx->pu1_col_zero_flag += + ui_half_num_of_sub_mbs; + ps_ref_pic_lx->ps_mv += ui_half_num_of_sub_mbs; + } + ps_ref_pic_lx->i4_poc = + ps_ref_pic_lx->i4_bottom_field_order_cnt; + ps_ref_pic_lx->i4_avg_poc = + ps_ref_pic_lx->i4_bottom_field_order_cnt; + } + else + { + ps_ref_pic_lx->u1_pic_type = TOP_FLD; + ps_ref_pic_lx->i4_poc = + ps_ref_pic_lx->i4_top_field_order_cnt; + ps_ref_pic_lx->i4_avg_poc = + ps_ref_pic_lx->i4_top_field_order_cnt; + } + ps_ref_pic_lx++; + uc_lx++; + ps_ref_lx[1]++; + } + } + } + } + + /* Same parity fields are over, now insert left over opposite parity fields */ + /** Added if(ps_ref_lx[1]) for error checks */ + if(ps_ref_lx[1]) + { + for(; uc_l1 < uc_count; uc_l1++) + { + if(ps_ref_lx[1]->u1_pic_type & u1_ref[1]) + { + /* Insert PIC of opposite parity in RefPicList */ + ih264d_insert_pic_in_ref_pic_listx(ps_ref_pic_lx, + ps_ref_lx[1]); + ps_ref_pic_lx->u1_pic_type = u1_op_fld; + ps_ref_pic_lx->i4_pic_num = (ps_ref_pic_lx->i4_pic_num * 2); + ps_ref_pic_lx->u1_long_term_pic_num = + (ps_ref_pic_lx->u1_long_term_frm_idx * 2); + if(u1_op_fld == BOT_FLD) + { + ps_ref_pic_lx->u1_pic_type = BOT_FLD; + ps_ref_pic_lx->pu1_buf1 += ps_ref_pic_lx->u2_frm_wd_y; + ps_ref_pic_lx->pu1_buf2 += ps_ref_pic_lx->u2_frm_wd_uv; + ps_ref_pic_lx->pu1_buf3 += ps_ref_pic_lx->u2_frm_wd_uv; + if(ps_ref_pic_lx->u1_picturetype & 0x3) + { + ps_ref_pic_lx->pu1_col_zero_flag += + ui_half_num_of_sub_mbs; + ps_ref_pic_lx->ps_mv += ui_half_num_of_sub_mbs; + } + ps_ref_pic_lx->i4_poc = + ps_ref_pic_lx->i4_bottom_field_order_cnt; + ps_ref_pic_lx->i4_avg_poc = + ps_ref_pic_lx->i4_bottom_field_order_cnt; + } + else + { + ps_ref_pic_lx->i4_poc = + ps_ref_pic_lx->i4_top_field_order_cnt; + ps_ref_pic_lx->i4_avg_poc = + ps_ref_pic_lx->i4_top_field_order_cnt; + } + ps_ref_pic_lx++; + uc_lx++; + ps_ref_lx[1]++; + } + } + } + } + *pu1_L0 = uc_lx; +} + +void ih264d_convert_frm_mbaff_list(dec_struct_t *ps_dec) +{ + struct pic_buffer_t **ps_ref_pic_lx; + UWORD8 u1_max_ref_idx, idx; + UWORD16 u2_frm_wd_y, u2_frm_wd_uv; + struct pic_buffer_t **ps_ref_pic_buf_lx; + UWORD32 u4_half_num_of_sub_mbs = ((ps_dec->u2_pic_ht * ps_dec->u2_pic_wd) >> 5); + + ps_ref_pic_buf_lx = ps_dec->ps_ref_pic_buf_lx[0]; + ps_ref_pic_lx = ps_dec->ps_ref_pic_buf_lx[0]; + u1_max_ref_idx = ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[0]; + for(idx = 0; idx < u1_max_ref_idx; idx++) + { + ps_ref_pic_lx[idx]->u1_pic_type = TOP_FLD; + ps_ref_pic_lx[idx]->i4_poc = ps_ref_pic_lx[idx]->i4_top_field_order_cnt; + + } + u2_frm_wd_y = ps_dec->u2_frm_wd_y; + u2_frm_wd_uv = ps_dec->u2_frm_wd_uv; + + for(idx = 0; idx < u1_max_ref_idx; idx++) + { + *ps_ref_pic_lx[idx + MAX_REF_BUFS] = *ps_ref_pic_buf_lx[idx]; + ps_ref_pic_lx[idx + MAX_REF_BUFS]->pu1_buf1 = + ps_ref_pic_buf_lx[idx]->pu1_buf1 + u2_frm_wd_y; + ps_ref_pic_lx[idx + MAX_REF_BUFS]->pu1_buf2 = + ps_ref_pic_buf_lx[idx]->pu1_buf2 + u2_frm_wd_uv; + ps_ref_pic_lx[idx + MAX_REF_BUFS]->pu1_buf3 = + ps_ref_pic_buf_lx[idx]->pu1_buf3 + u2_frm_wd_uv; + + ps_ref_pic_lx[idx + MAX_REF_BUFS]->u1_pic_type = BOT_FLD; + ps_ref_pic_lx[idx + MAX_REF_BUFS]->i4_poc = + ps_ref_pic_buf_lx[idx]->i4_bottom_field_order_cnt; + if(ps_ref_pic_buf_lx[idx]->u1_picturetype & 0x3) + { + ps_ref_pic_lx[idx + MAX_REF_BUFS]->pu1_col_zero_flag = + ps_ref_pic_buf_lx[idx]->pu1_col_zero_flag + + u4_half_num_of_sub_mbs; + ps_ref_pic_lx[idx + MAX_REF_BUFS]->ps_mv = + ps_ref_pic_buf_lx[idx]->ps_mv + u4_half_num_of_sub_mbs; + } + } + + if(ps_dec->u1_B) + { + ps_ref_pic_buf_lx = ps_dec->ps_ref_pic_buf_lx[1]; + ps_ref_pic_lx = ps_dec->ps_ref_pic_buf_lx[1]; + u1_max_ref_idx = ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1]; + for(idx = 0; idx < u1_max_ref_idx; idx++) + { + ps_ref_pic_lx[idx]->u1_pic_type = TOP_FLD; + ps_ref_pic_lx[idx]->i4_poc = ps_ref_pic_lx[idx]->i4_top_field_order_cnt; + + } + + for(idx = 0; idx < u1_max_ref_idx; idx++) + { + *ps_ref_pic_lx[idx + MAX_REF_BUFS] = *ps_ref_pic_buf_lx[idx]; + ps_ref_pic_lx[idx + MAX_REF_BUFS]->pu1_buf1 = + ps_ref_pic_buf_lx[idx]->pu1_buf1 + u2_frm_wd_y; + ps_ref_pic_lx[idx + MAX_REF_BUFS]->pu1_buf2 = + ps_ref_pic_buf_lx[idx]->pu1_buf2 + u2_frm_wd_uv; + ps_ref_pic_lx[idx + MAX_REF_BUFS]->pu1_buf3 = + ps_ref_pic_buf_lx[idx]->pu1_buf3 + u2_frm_wd_uv; + ps_ref_pic_lx[idx + MAX_REF_BUFS]->u1_pic_type = BOT_FLD; + ps_ref_pic_lx[idx + MAX_REF_BUFS]->i4_poc = + ps_ref_pic_buf_lx[idx]->i4_bottom_field_order_cnt; + + if(ps_ref_pic_buf_lx[idx]->u1_picturetype & 0x3) + { + ps_ref_pic_lx[idx + MAX_REF_BUFS]->pu1_col_zero_flag = + ps_ref_pic_buf_lx[idx]->pu1_col_zero_flag + + u4_half_num_of_sub_mbs; + ps_ref_pic_lx[idx + MAX_REF_BUFS]->ps_mv = + ps_ref_pic_buf_lx[idx]->ps_mv + + u4_half_num_of_sub_mbs; + } + } + } +} +/*! + ************************************************************************** + * \if Function name : ih264d_init_ref_idx_lx_b \endif + * + * \brief + * Initializes forward and backward refernce lists for B slice decoding. + * + * + * \return + * 0 on Success and Error code otherwise + ************************************************************************** + */ +void ih264d_init_ref_idx_lx_b(dec_struct_t *ps_dec) +{ + struct pic_buffer_t *ps_ref_pic_buf_lx; + dpb_manager_t *ps_dpb_mgr; + struct dpb_info_t *ps_next_dpb; + WORD32 i_cur_poc, i_max_st_poc, i_min_st_poc, i_ref_poc, i_temp_poc; + WORD8 i; + UWORD8 u1_max_lt_index, u1_min_lt_index, u1_lt_index; + UWORD8 u1_field_pic_flag; + dec_slice_params_t *ps_cur_slice; + UWORD8 u1_L0, u1_L1; + UWORD8 u1_num_short_term_bufs; + UWORD8 u1_max_ref_idx_l0, u1_max_ref_idx_l1; + + ps_cur_slice = ps_dec->ps_cur_slice; + u1_field_pic_flag = ps_cur_slice->u1_field_pic_flag; + u1_max_ref_idx_l0 = ps_cur_slice->u1_num_ref_idx_lx_active[0] + << u1_field_pic_flag; + u1_max_ref_idx_l1 = ps_cur_slice->u1_num_ref_idx_lx_active[1] + << u1_field_pic_flag; + + ps_dpb_mgr = ps_dec->ps_dpb_mgr; + /* Get the current POC */ + i_cur_poc = ps_dec->ps_cur_pic->i4_poc; + + /* Get MaxStPOC,MinStPOC,MaxLt,MinLt */ + i_max_st_poc = i_cur_poc; + i_min_st_poc = i_cur_poc; + u1_max_lt_index = MAX_REF_BUFS + 1; + u1_min_lt_index = MAX_REF_BUFS + 1; + /* Start from ST head */ + ps_next_dpb = ps_dpb_mgr->ps_dpb_st_head; + for(i = 0; i < ps_dpb_mgr->u1_num_st_ref_bufs; i++) + { + i_ref_poc = ps_next_dpb->ps_pic_buf->i4_poc; + if(i_ref_poc < i_cur_poc) + { + /* RefPic Buf POC is before Current POC in display order */ + i_min_st_poc = MIN(i_min_st_poc, i_ref_poc); + } + else + { + /* RefPic Buf POC is after Current POC in display order */ + i_max_st_poc = MAX(i_max_st_poc, i_ref_poc); + } + + /* Chase the next link */ + ps_next_dpb = ps_next_dpb->ps_prev_short; + } + + /* Start from LT head */ + ps_next_dpb = ps_dpb_mgr->ps_dpb_ht_head; + if(ps_next_dpb) + { + u1_max_lt_index = ps_next_dpb->u1_lt_idx; + u1_min_lt_index = ps_next_dpb->u1_lt_idx; + } + for(i = 0; i < ps_dpb_mgr->u1_num_lt_ref_bufs; i++) + { + u1_lt_index = ps_next_dpb->u1_lt_idx; + u1_max_lt_index = (UWORD8)(MAX(u1_max_lt_index, u1_lt_index)); + u1_min_lt_index = (UWORD8)(MIN(u1_min_lt_index, u1_lt_index)); + + /* Chase the next link */ + ps_next_dpb = ps_next_dpb->ps_prev_long; + } + + /* 1. Initialize refIdxL0 */ + u1_L0 = 0; + if(u1_field_pic_flag) + { + ps_ref_pic_buf_lx = ps_dpb_mgr->ps_init_dpb[0][0]; + ps_ref_pic_buf_lx += MAX_REF_BUFS; + i_temp_poc = i_cur_poc; + } + else + { + ps_ref_pic_buf_lx = ps_dpb_mgr->ps_init_dpb[0][0]; + i_temp_poc = i_cur_poc - 1; + } + /* Arrange all short term buffers in output order as given by POC */ + /* 1.1 Arrange POC's less than CurrPOC in the descending POC order starting + from (CurrPOC - 1)*/ + for(; i_temp_poc >= i_min_st_poc; i_temp_poc--) + { + /* Start from ST head */ + ps_next_dpb = ps_dpb_mgr->ps_dpb_st_head; + for(i = 0; i < ps_dpb_mgr->u1_num_st_ref_bufs; i++) + { + if((WORD32)ps_next_dpb->ps_pic_buf->i4_poc == i_temp_poc) + { + /* Copy info in pic buffer */ + ih264d_insert_pic_in_ref_pic_listx(ps_ref_pic_buf_lx, + ps_next_dpb->ps_pic_buf); + ps_ref_pic_buf_lx++; + u1_L0++; + break; + } + ps_next_dpb = ps_next_dpb->ps_prev_short; + } + } + + { + /* 1.2. Arrange POC's more than CurrPOC in the ascending POC order starting + from (CurrPOC + 1)*/ + for(i_temp_poc = i_cur_poc + 1; i_temp_poc <= i_max_st_poc; i_temp_poc++) + { + /* Start from ST head */ + ps_next_dpb = ps_dpb_mgr->ps_dpb_st_head; + for(i = 0; i < ps_dpb_mgr->u1_num_st_ref_bufs; i++) + { + if((WORD32)ps_next_dpb->ps_pic_buf->i4_poc == i_temp_poc) + { + ih264d_insert_pic_in_ref_pic_listx(ps_ref_pic_buf_lx, + ps_next_dpb->ps_pic_buf); + ps_ref_pic_buf_lx++; + u1_L0++; + break; + } + ps_next_dpb = ps_next_dpb->ps_prev_short; + } + } + } + + /* 1.3 Arrange all Long term buffers in ascending order, in LongtermIndex */ + /* Start from ST head */ + + u1_num_short_term_bufs = u1_L0; + for(u1_lt_index = u1_min_lt_index; u1_lt_index <= u1_max_lt_index; u1_lt_index++) + { + ps_next_dpb = ps_dpb_mgr->ps_dpb_ht_head; + for(i = 0; i < ps_dpb_mgr->u1_num_lt_ref_bufs; i++) + { + if(ps_next_dpb->u1_lt_idx == u1_lt_index) + { + ih264d_insert_pic_in_ref_pic_listx(ps_ref_pic_buf_lx, + ps_next_dpb->ps_pic_buf); + ps_ref_pic_buf_lx->u1_long_term_pic_num = + ps_ref_pic_buf_lx->u1_long_term_frm_idx; + + ps_ref_pic_buf_lx++; + u1_L0++; + break; + } + ps_next_dpb = ps_next_dpb->ps_prev_long; + } + } + + if(u1_field_pic_flag) + { + /* Initialize the rest of the entries in the */ + /* reference list to handle of errors */ + { + UWORD8 u1_i; + pic_buffer_t *ps_ref_pic; + + ps_ref_pic = ps_dpb_mgr->ps_init_dpb[0][0] + MAX_REF_BUFS; + + if(NULL == ps_ref_pic->pu1_buf1) + { + ps_ref_pic = ps_dec->ps_cur_pic; + } + for(u1_i = u1_L0; u1_i < u1_max_ref_idx_l0; u1_i++) + { + *ps_ref_pic_buf_lx = *ps_ref_pic; + ps_ref_pic_buf_lx++; + } + } + ih264d_convert_frm_to_fld_list( + ps_dpb_mgr->ps_init_dpb[0][0] + MAX_REF_BUFS, &u1_L0, + ps_dec, u1_num_short_term_bufs); + + ps_ref_pic_buf_lx = ps_dpb_mgr->ps_init_dpb[0][0] + u1_L0; + } + + ps_dec->ps_cur_slice->u1_initial_list_size[0] = u1_L0; + + /* Initialize the rest of the entries in the */ + /* reference list to handle of errors */ + { + UWORD8 u1_i; + pic_buffer_t *ps_ref_pic; + + ps_ref_pic = ps_dpb_mgr->ps_init_dpb[0][0]; + + if(NULL == ps_ref_pic->pu1_buf1) + { + ps_ref_pic = ps_dec->ps_cur_pic; + } + for(u1_i = u1_L0; u1_i < u1_max_ref_idx_l0; u1_i++) + { + *ps_ref_pic_buf_lx = *ps_ref_pic; + ps_ref_pic_buf_lx++; + } + } + { + /* 2. Initialize refIdxL1 */ + u1_L1 = 0; + if(u1_field_pic_flag) + { + ps_ref_pic_buf_lx = ps_dpb_mgr->ps_init_dpb[1][0] + MAX_REF_BUFS; + } + else + { + ps_ref_pic_buf_lx = ps_dpb_mgr->ps_init_dpb[1][0]; + } + + /* 2.1. Arrange POC's more than CurrPOC in the ascending POC order starting + from (CurrPOC + 1)*/ + for(i_temp_poc = i_cur_poc + 1; i_temp_poc <= i_max_st_poc; i_temp_poc++) + { + /* Start from ST head */ + ps_next_dpb = ps_dpb_mgr->ps_dpb_st_head; + for(i = 0; i < ps_dpb_mgr->u1_num_st_ref_bufs; i++) + { + if((WORD32)ps_next_dpb->ps_pic_buf->i4_poc == i_temp_poc) + { + ih264d_insert_pic_in_ref_pic_listx(ps_ref_pic_buf_lx, + ps_next_dpb->ps_pic_buf); + ps_ref_pic_buf_lx++; + u1_L1++; + break; + } + ps_next_dpb = ps_next_dpb->ps_prev_short; + } + } + + if(u1_field_pic_flag) + { + i_temp_poc = i_cur_poc; + } + else + { + i_temp_poc = i_cur_poc - 1; + } + + /* Arrange all short term buffers in output order as given by POC */ + /* 2.2 Arrange POC's less than CurrPOC in the descending POC order starting + from (CurrPOC - 1)*/ + for(; i_temp_poc >= i_min_st_poc; i_temp_poc--) + { + /* Start from ST head */ + ps_next_dpb = ps_dpb_mgr->ps_dpb_st_head; + for(i = 0; i < ps_dpb_mgr->u1_num_st_ref_bufs; i++) + { + if((WORD32)ps_next_dpb->ps_pic_buf->i4_poc == i_temp_poc) + { + ih264d_insert_pic_in_ref_pic_listx(ps_ref_pic_buf_lx, + ps_next_dpb->ps_pic_buf); + ps_ref_pic_buf_lx++; + u1_L1++; + break; + } + ps_next_dpb = ps_next_dpb->ps_prev_short; + } + } + + /* 2.3 Arrange all Long term buffers in ascending order, in LongtermIndex */ + /* Start from ST head */ + u1_num_short_term_bufs = u1_L1; + ps_next_dpb = ps_dpb_mgr->ps_dpb_ht_head; + for(u1_lt_index = u1_min_lt_index; u1_lt_index <= u1_max_lt_index; + u1_lt_index++) + { + for(i = 0; i < ps_dpb_mgr->u1_num_lt_ref_bufs; i++) + { + if(ps_next_dpb->u1_lt_idx == u1_lt_index) + { + ih264d_insert_pic_in_ref_pic_listx(ps_ref_pic_buf_lx, + ps_next_dpb->ps_pic_buf); + ps_ref_pic_buf_lx->u1_long_term_pic_num = + ps_ref_pic_buf_lx->u1_long_term_frm_idx; + ps_ref_pic_buf_lx++; + u1_L1++; + break; + } + ps_next_dpb = ps_next_dpb->ps_prev_long; + } + } + + if(u1_field_pic_flag) + { + /* Initialize the rest of the entries in the */ + /* reference list to handle of errors */ + { + UWORD8 u1_i; + pic_buffer_t *ps_ref_pic; + + ps_ref_pic = ps_dpb_mgr->ps_init_dpb[1][0] + MAX_REF_BUFS; + + if(NULL == ps_ref_pic->pu1_buf1) + { + ps_ref_pic = ps_dec->ps_cur_pic; + } + for(u1_i = u1_L1; u1_i < u1_max_ref_idx_l1; u1_i++) + { + *ps_ref_pic_buf_lx = *ps_ref_pic; + ps_ref_pic_buf_lx++; + } + } + + ih264d_convert_frm_to_fld_list( + ps_dpb_mgr->ps_init_dpb[1][0] + MAX_REF_BUFS, + &u1_L1, ps_dec, u1_num_short_term_bufs); + ps_ref_pic_buf_lx = ps_dpb_mgr->ps_init_dpb[1][0] + u1_L1; + } + + ps_dec->ps_cur_slice->u1_initial_list_size[1] = u1_L1; + + /* Initialize the rest of the entries in the */ + /* reference list to handle of errors */ + { + UWORD8 u1_i; + pic_buffer_t *ps_ref_pic; + + ps_ref_pic = ps_dpb_mgr->ps_init_dpb[1][0]; + + if(NULL == ps_ref_pic->pu1_buf1) + { + ps_ref_pic = ps_dec->ps_cur_pic; + } + for(u1_i = u1_L1; u1_i < u1_max_ref_idx_l1; u1_i++) + { + *ps_ref_pic_buf_lx = *ps_ref_pic; + ps_ref_pic_buf_lx++; + } + } + + /* If list0 and list 1 ebtries are same then swap the 0th and 1st entry */ + /* of list 1 */ + { + struct pic_buffer_t *ps_ref_pic1_buf_l0, *ps_ref_pic1_buf_l1; + struct pic_buffer_t s_ref_pic1_buf_temp; + + ps_ref_pic1_buf_l0 = ps_dpb_mgr->ps_init_dpb[0][0]; + ps_ref_pic1_buf_l1 = ps_dpb_mgr->ps_init_dpb[1][0]; + + if((u1_L0 == u1_L1) && (u1_L0 > 1)) + { + WORD32 i_index, i_swap; + + i_swap = 1; + + for(i_index = 0; i_index < u1_L0; i_index++) + { + if((ps_ref_pic1_buf_l0[i_index]).pu1_buf1 + != (ps_ref_pic1_buf_l1[i_index]).pu1_buf1) + { + i_swap = 0; + break; + } + } + if(1 == i_swap) + { + memcpy(&s_ref_pic1_buf_temp, &ps_ref_pic1_buf_l1[1], + sizeof(struct pic_buffer_t)); + memcpy(&ps_ref_pic1_buf_l1[1], &ps_ref_pic1_buf_l1[0], + sizeof(struct pic_buffer_t)); + memcpy(&ps_ref_pic1_buf_l1[0], &s_ref_pic1_buf_temp, + sizeof(struct pic_buffer_t)); + } + } + } + } +} + + + +void ih264d_get_implicit_weights(dec_struct_t *ps_dec); + +/*! + ************************************************************************** + * \if Function name : ih264d_one_to_one \endif + * + * \brief + * Initializes forward and backward refernce lists for B slice decoding. + * + * + * \return + * 0 on Success and Error code otherwise + ************************************************************************** + */ +void ih264d_one_to_one(dec_struct_t *ps_dec, + struct pic_buffer_t *ps_col_pic, + directmv_t *ps_direct, + UWORD8 u1_wd_x, + WORD32 u2_sub_mb_ofst, + dec_mb_info_t * ps_cur_mb_info) +{ + UWORD8 *pu1_col_zero_flag_start, u1_col_mb_pred_mode, u1_num_blks, u1_sub_mb_num; + UWORD8 u1_init_colzero_flag; + UNUSED(ps_cur_mb_info); + pu1_col_zero_flag_start = ps_col_pic->pu1_col_zero_flag + u2_sub_mb_ofst; + u1_col_mb_pred_mode = pu1_col_zero_flag_start[ps_dec->u1_sub_mb_num]; + u1_init_colzero_flag = u1_col_mb_pred_mode & 1; + u1_col_mb_pred_mode >>= 6; + ps_direct->u1_vert_mv_scale = ONE_TO_ONE; + ps_direct->u1_col_zeroflag_change = 0; + + if(u1_wd_x == MB_SIZE) + { + ps_dec->u1_currB_type = (!!u1_col_mb_pred_mode); + if(u1_col_mb_pred_mode == PRED_16x16) + { + ps_direct->i1_num_partitions = 1; + ps_direct->i4_mv_indices[0] = u2_sub_mb_ofst; + ps_direct->i1_submb_num[0] = 0; + ps_direct->i1_partitionsize[0] = PRED_16x16; + + return; + } + else if(u1_col_mb_pred_mode < PRED_8x8) + { + ps_direct->i1_num_partitions = 2; + ps_direct->i4_mv_indices[0] = u2_sub_mb_ofst; + ps_direct->i1_submb_num[0] = 0; + ps_direct->i1_partitionsize[0] = u1_col_mb_pred_mode; + u1_sub_mb_num = (u1_col_mb_pred_mode == PRED_16x8) ? 8 : 2; + ps_direct->i1_submb_num[1] = u1_sub_mb_num; + ps_direct->i4_mv_indices[1] = u2_sub_mb_ofst + + ps_direct->i1_submb_num[1]; + ps_direct->i1_partitionsize[1] = u1_col_mb_pred_mode; + if((pu1_col_zero_flag_start[u1_sub_mb_num] & 1) != u1_init_colzero_flag) + ps_direct->u1_col_zeroflag_change = 1; + return; + } + else + { + u1_num_blks = 4; + } + } + else + { + u1_num_blks = 1; + } + + { + const UWORD8 *pu1_top_lt_mb_part_idx; + UWORD8 u1_col_sub_mb_pred_mode, uc_blk, u1_sub_blk, u1_submb_col = 0; + UWORD8 u1_num_sub_blks, uc_direct8x8inf, *pu1_col_zero_flag, u1_sub_mb_num; + const UWORD8 *pu1_num_sub_mb_part = + (const UWORD8 *)gau1_ih264d_num_submb_part; + UWORD8 i1_num_partitions = 0, partition_size; + WORD32 mv_index; + const UWORD8 *pu1_top_lt_sub_mb_idx = gau1_ih264d_submb_indx_mod_sp_drct; + + u1_sub_mb_num = ps_dec->u1_sub_mb_num; + uc_direct8x8inf = ps_dec->ps_cur_slice->u1_direct_8x8_inference_flag; + pu1_top_lt_mb_part_idx = gau1_ih264d_top_left_mb_part_indx_mod + + (PRED_8x8 << 1) + 1; + + for(uc_blk = 0; uc_blk < u1_num_blks; uc_blk++) + { + partition_size = PRED_8x8; + pu1_top_lt_sub_mb_idx = gau1_ih264d_submb_indx_mod_sp_drct; + if(uc_direct8x8inf == 1) + { + u1_submb_col = u1_sub_mb_num | (u1_sub_mb_num >> 1); + mv_index = u2_sub_mb_ofst + u1_submb_col; + u1_num_sub_blks = 1; + } + else + { + /* colMbPart is either 8x8, 8x4, 4x8, 4x4 */ + pu1_col_zero_flag = pu1_col_zero_flag_start + u1_sub_mb_num; + u1_col_sub_mb_pred_mode = *pu1_col_zero_flag; + u1_col_sub_mb_pred_mode = (u1_col_sub_mb_pred_mode & 0x30) >> 4; + partition_size = (UWORD8)((u1_col_sub_mb_pred_mode) + | (PRED_8x8 << 2)); + mv_index = u2_sub_mb_ofst + u1_sub_mb_num; + pu1_top_lt_sub_mb_idx += (u1_col_sub_mb_pred_mode << 1); + u1_num_sub_blks = pu1_num_sub_mb_part[u1_col_sub_mb_pred_mode]; + + } + + for(u1_sub_blk = 0; u1_sub_blk < u1_num_sub_blks; + u1_sub_blk++, pu1_top_lt_sub_mb_idx++) + { + u1_sub_mb_num += *pu1_top_lt_sub_mb_idx; + mv_index += *pu1_top_lt_sub_mb_idx; + ps_direct->i4_mv_indices[i1_num_partitions] = mv_index; + ps_direct->i1_submb_num[i1_num_partitions] = u1_sub_mb_num; + ps_direct->i1_partitionsize[i1_num_partitions] = partition_size; + i1_num_partitions++; + if(!uc_direct8x8inf) + u1_submb_col = u1_sub_mb_num; + if((pu1_col_zero_flag_start[u1_submb_col] & 1) + != u1_init_colzero_flag) + ps_direct->u1_col_zeroflag_change = 1; + } + u1_sub_mb_num = *pu1_top_lt_mb_part_idx++; + } + ps_direct->i1_num_partitions = i1_num_partitions; + } +} +/*! + ************************************************************************** + * \if Function name : ih264d_mbaff_cross_pmbair \endif + * + * \brief + * Initializes forward and backward refernce lists for B slice decoding. + * + * + * \return + * 0 on Success and Error code otherwise + ************************************************************************** + */ +void ih264d_mbaff_cross_pmbair(dec_struct_t *ps_dec, + struct pic_buffer_t *ps_col_pic, + directmv_t *ps_direct, + UWORD8 u1_wd_x, + WORD32 u2_sub_mb_ofst, + dec_mb_info_t * ps_cur_mb_info) +{ + UWORD8 *pu1_col_zero_flag_start, *pu1_col_zero_flag, u1_sub_mb_num, + uc_sub_mb_num_col; + UWORD8 *pu1_col_zero_flag_right_half; + WORD32 i4_force_8X8; + UWORD8 u1_num_blks, u1_col_mb_pred_mode, uc_blk, u1_col_sub_mb_pred_mode, + u1_col_sub_mb_pred_mode_rt; + UWORD8 i1_num_partitions = 0, partition_size; + + WORD32 mv_index; + + UWORD8 u1_num_sub_blks; + UWORD8 u1_is_cur_mb_fld, i; + UWORD8 u1_init_colzero_flag; + + u1_is_cur_mb_fld = ps_cur_mb_info->u1_mb_field_decodingflag; + u1_sub_mb_num = ps_dec->u1_sub_mb_num; + ps_direct->u1_col_zeroflag_change = 0; + /*pu1_col_zero_flag_start = ps_col_pic->pu1_col_zero_flag + u2_sub_mb_ofst; + u1_col_mb_pred_mode = pu1_col_zero_flag_start[u1_sub_mb_num]; + u1_init_colzero_flag = u1_col_mb_pred_mode & 1; + u1_col_mb_pred_mode >>= 6; */ + if(0 == u1_is_cur_mb_fld) + { + ps_direct->u1_vert_mv_scale = FLD_TO_FRM; + if(u1_wd_x == MB_SIZE) + { + pu1_col_zero_flag_start = ps_col_pic->pu1_col_zero_flag + + u2_sub_mb_ofst; + u1_col_mb_pred_mode = pu1_col_zero_flag_start[0]; + u1_init_colzero_flag = u1_col_mb_pred_mode & 1; + u1_col_mb_pred_mode >>= 6; + + + if(u1_col_mb_pred_mode & 0x2) + { + ps_dec->u1_currB_type = 1; + if(u1_col_mb_pred_mode == PRED_8x16) + { + ps_direct->i1_num_partitions = 2; + ps_direct->i4_mv_indices[0] = u2_sub_mb_ofst; + ps_direct->i1_submb_num[0] = 0; + ps_direct->i1_partitionsize[0] = PRED_8x16; + ps_direct->i4_mv_indices[1] = u2_sub_mb_ofst + 2; + ps_direct->i1_submb_num[1] = 2; + ps_direct->i1_partitionsize[1] = PRED_8x16; + if((pu1_col_zero_flag_start[2] & 1) != u1_init_colzero_flag) + ps_direct->u1_col_zeroflag_change = 1; + } + else + { + pu1_col_zero_flag = pu1_col_zero_flag_start + u1_sub_mb_num; + u1_col_sub_mb_pred_mode = (*pu1_col_zero_flag & 0x10);/* 8x4 or 4x4 mode */ + + pu1_col_zero_flag_right_half = pu1_col_zero_flag_start + + u1_sub_mb_num + 2; + u1_col_sub_mb_pred_mode_rt = + (*pu1_col_zero_flag_right_half & 0x10);/* 8x4 or 4x4 mode */ + + i4_force_8X8 = (u1_col_sub_mb_pred_mode) + || (u1_col_sub_mb_pred_mode_rt); + if(i4_force_8X8) + { + u1_num_sub_blks = 2; + partition_size = PRED_8x8; + } + else + { + partition_size = PRED_8x16; + u1_num_sub_blks = 1; + } + + for(i = 0; i < 2; i++) + { + for(uc_blk = 0; uc_blk < u1_num_sub_blks; uc_blk++) + { + uc_sub_mb_num_col = u1_sub_mb_num | (u1_sub_mb_num >> 1); + uc_sub_mb_num_col &= 0x7; + mv_index = u2_sub_mb_ofst + uc_sub_mb_num_col; + + ps_direct->i4_mv_indices[i1_num_partitions] = + mv_index; + ps_direct->i1_submb_num[i1_num_partitions] = + u1_sub_mb_num; + ps_direct->i1_partitionsize[i1_num_partitions] = + partition_size; + i1_num_partitions++; + if((pu1_col_zero_flag_start[uc_sub_mb_num_col] & 1) + != u1_init_colzero_flag) + ps_direct->u1_col_zeroflag_change = 1; + u1_sub_mb_num += 8; + } + u1_sub_mb_num = 2; /* move to second half of Cur MB */ + } + ps_direct->i1_num_partitions = i1_num_partitions; + return; + } + } + else + { + ps_direct->i1_num_partitions = 1; + ps_direct->i4_mv_indices[0] = u2_sub_mb_ofst; + ps_direct->i1_submb_num[0] = 0; + ps_direct->i1_partitionsize[0] = PRED_16x16; + ps_dec->u1_currB_type = 0; + return; + } + } + else + { + uc_sub_mb_num_col = u1_sub_mb_num | (u1_sub_mb_num >> 1); + uc_sub_mb_num_col &= 0x7; + + ps_direct->i4_mv_indices[0] = u2_sub_mb_ofst + uc_sub_mb_num_col; + ps_direct->i1_submb_num[0] = u1_sub_mb_num; + ps_direct->i1_partitionsize[0] = PRED_8x8; + ps_direct->i1_num_partitions = 1; + } + } + else + { + ps_direct->u1_vert_mv_scale = FRM_TO_FLD; + pu1_col_zero_flag_start = ps_col_pic->pu1_col_zero_flag + u2_sub_mb_ofst; + u1_init_colzero_flag = pu1_col_zero_flag_start[0] & 1; + + if(u1_wd_x == MB_SIZE) + { + UWORD8 u1_submb_col; + UWORD8 *puc_colZeroFlagStart_bot_mb, uc_colMbPredMode_bot_mb; + + pu1_col_zero_flag_start = ps_col_pic->pu1_col_zero_flag + + u2_sub_mb_ofst; + u1_col_mb_pred_mode = pu1_col_zero_flag_start[u1_sub_mb_num] >> 6; + + puc_colZeroFlagStart_bot_mb = ps_col_pic->pu1_col_zero_flag + + u2_sub_mb_ofst + 16; + uc_colMbPredMode_bot_mb = puc_colZeroFlagStart_bot_mb[8] >> 6; + + i4_force_8X8 = (u1_col_mb_pred_mode & 0x2) + || (uc_colMbPredMode_bot_mb & 0x2); + if(i4_force_8X8) + { + u1_num_blks = 2; + partition_size = PRED_8x8; + } + else + { + u1_num_blks = 1; + partition_size = PRED_16x8; + } + + ps_dec->u1_currB_type = 1; + /*As this mb is derived from 2 Mbs min no of partitions = 2*/ + for(i = 0; i < 2; i++) + { + + pu1_col_zero_flag_start = ps_col_pic->pu1_col_zero_flag + + u2_sub_mb_ofst; + u1_col_mb_pred_mode = pu1_col_zero_flag_start[u1_sub_mb_num] >> 6; + + for(uc_blk = 0; uc_blk < u1_num_blks; uc_blk++) + { + u1_submb_col = (u1_sub_mb_num & 0x7) ? 1 : 0; + u1_submb_col += u1_sub_mb_num; + mv_index = u2_sub_mb_ofst + u1_submb_col; + + + ps_direct->i4_mv_indices[i1_num_partitions] = mv_index; + ps_direct->i1_submb_num[i1_num_partitions] = u1_sub_mb_num; + ps_direct->i1_partitionsize[i1_num_partitions] = + partition_size; + i1_num_partitions++; + if((pu1_col_zero_flag_start[u1_submb_col] & 1) + != u1_init_colzero_flag) + ps_direct->u1_col_zeroflag_change = 1; + u1_sub_mb_num += 2; + } + u1_sub_mb_num = 8; /* move to second half of Cur MB */ + u2_sub_mb_ofst += 16;/* move to next Colocated MB */ + } + ps_direct->i1_num_partitions = i1_num_partitions; + return; + } + else + { + uc_sub_mb_num_col = u1_sub_mb_num | (u1_sub_mb_num >> 1); + uc_sub_mb_num_col &= 0xb; + u2_sub_mb_ofst += (u1_sub_mb_num >> 3) ? 16 : 0; + + ps_direct->i4_mv_indices[0] = u2_sub_mb_ofst + uc_sub_mb_num_col; + ps_direct->i1_submb_num[0] = u1_sub_mb_num; + ps_direct->i1_partitionsize[0] = PRED_8x8; + ps_direct->i1_num_partitions = 1; + return; + } + } +} +/*! + ************************************************************************** + * \if Function name : ih264d_cal_col_pic \endif + * + * \brief + * Finds the colocated picture. + * + * + * \return + * 0 on Success and Error code otherwise + ************************************************************************** + */ +WORD32 ih264d_cal_col_pic(dec_struct_t *ps_dec) +{ + struct pic_buffer_t* ps_col_pic = ps_dec->ps_col_pic; + UWORD8 uc_curpictype, uc_colpictype; + ps_col_pic = ps_dec->ps_ref_pic_buf_lx[1][0]; + uc_curpictype = (ps_dec->ps_cur_pic->u1_picturetype & 0x7); + uc_colpictype = (ps_col_pic->u1_picturetype & 0x7); + if(uc_curpictype == FRM_PIC) + { + if(uc_colpictype == FRM_PIC) + ps_dec->pf_parse_mvdirect = ih264d_one_to_one; + else if(uc_colpictype == COMP_FLD_PAIR) + { + ps_dec->pf_parse_mvdirect = ih264d_fld_to_frm; + if(ps_col_pic->i4_top_field_order_cnt + >= ps_col_pic->i4_bottom_field_order_cnt) + { + struct pic_buffer_t* ps_tempPic = ps_col_pic; + UWORD32 ui_half_num_of_sub_mbs = ((ps_dec->u2_pic_ht + * ps_dec->u2_pic_wd) >> 5); + ps_col_pic = ps_dec->ps_ref_pic_buf_lx[1][MAX_REF_BUFS]; + /* memcpy ps_tempPic to ps_col_pic */ + *ps_col_pic = *ps_tempPic; + ps_col_pic->pu1_buf1 = ps_tempPic->pu1_buf1 + + ps_tempPic->u2_frm_wd_y; + ps_col_pic->pu1_buf2 = ps_tempPic->pu1_buf2 + + ps_tempPic->u2_frm_wd_uv; + ps_col_pic->pu1_buf3 = ps_tempPic->pu1_buf3 + + ps_tempPic->u2_frm_wd_uv; + ps_col_pic->pu1_col_zero_flag = ps_tempPic->pu1_col_zero_flag + + ui_half_num_of_sub_mbs; + ps_col_pic->ps_mv = ps_tempPic->ps_mv + ui_half_num_of_sub_mbs; + + + ps_col_pic->u1_pic_type = 0;/*complementary reference field pair-refering as frame */ + + + + } + } + else + { + UWORD32 i4_error_code; + i4_error_code = ERROR_DBP_MANAGER_T; +// i4_error_code |= 1<<IVD_CORRUPTEDDATA; + return i4_error_code; + } + } + else if(uc_curpictype == AFRM_PIC) + { + ps_dec->pf_parse_mvdirect = ih264d_fld_to_mbaff; + } + else /* must be a field*/ + { + if(uc_colpictype == FRM_PIC) + ps_dec->pf_parse_mvdirect = ih264d_frm_to_fld; + else if(uc_colpictype == AFRM_PIC) + ps_dec->pf_parse_mvdirect = ih264d_mbaff_to_fld; + else + ps_dec->pf_parse_mvdirect = ih264d_one_to_one; + } + ps_dec->ps_col_pic = ps_col_pic; + return OK; +} + +/*! + ************************************************************************** + * \if Function name : ih264d_frm_to_fld \endif + * + * \brief + * Initializes forward and backward refernce lists for B slice decoding. + * + * + * \return + * 0 on Success and Error code otherwise + ************************************************************************** + */ +void ih264d_frm_to_fld(dec_struct_t *ps_dec, + struct pic_buffer_t *ps_col_pic, + directmv_t *ps_direct, + UWORD8 u1_wd_x, + WORD32 u2_sub_mb_ofst, + dec_mb_info_t * ps_cur_mb_info) +{ + UWORD8 *pu1_col_zero_flag_start, u1_sub_mb_num; + UWORD8 u1_num_blks, u1_col_mb_pred_mode, uc_blk; + UWORD8 i1_num_partitions = 0, partition_size, i; + WORD32 mv_index; + UWORD32 increment; + WORD32 i4_force_8X8; + UNUSED(ps_cur_mb_info); + ps_direct->u1_col_zeroflag_change = 1; + ps_direct->u1_vert_mv_scale = FRM_TO_FLD; + u1_sub_mb_num = ps_dec->u1_sub_mb_num; + + /* new calculation specific to this function */ + if((ps_col_pic->u1_picturetype & 0x7) == FRM_PIC) + { + UWORD16 u2_frm_wd_in_mbs = ps_dec->u2_frm_wd_in_mbs; + increment = (u2_frm_wd_in_mbs << 4); + /*mbAddrCol = mbAddrCol1 */ + u2_sub_mb_ofst = (ps_dec->u2_mbx + + (2 * ps_dec->u2_mby * u2_frm_wd_in_mbs)) << 4; + } + else + increment = 16; + + if(u1_wd_x == MB_SIZE) + { + ps_dec->u1_currB_type = 1; + + { + UWORD8 *puc_colZeroFlagStart_bot_mb, uc_colMbPredMode_bot_mb; + + pu1_col_zero_flag_start = ps_col_pic->pu1_col_zero_flag + + u2_sub_mb_ofst; + u1_col_mb_pred_mode = (*pu1_col_zero_flag_start >> 6); + + puc_colZeroFlagStart_bot_mb = ps_col_pic->pu1_col_zero_flag + + u2_sub_mb_ofst + increment; + uc_colMbPredMode_bot_mb = (*puc_colZeroFlagStart_bot_mb >> 6); + + i4_force_8X8 = (u1_col_mb_pred_mode & 0x2) + || (uc_colMbPredMode_bot_mb & 0x2); + + if(i4_force_8X8) + { + u1_num_blks = 2; + partition_size = PRED_8x8; + } + else + { + partition_size = PRED_16x8; + u1_num_blks = 1; + } + } + + /*As this mb is derived from 2 Mbs, min no of partitions = 2*/ + for(i = 0; i < 2; i++) + { + for(uc_blk = 0; uc_blk < u1_num_blks; uc_blk++) + { + mv_index = u2_sub_mb_ofst + u1_sub_mb_num; + mv_index += (u1_sub_mb_num & 0x7) ? 1 : 0; + + ps_direct->i4_mv_indices[i1_num_partitions] = mv_index; + ps_direct->i1_submb_num[i1_num_partitions] = u1_sub_mb_num; + ps_direct->i1_partitionsize[i1_num_partitions] = partition_size; + i1_num_partitions++; + + u1_sub_mb_num += 2; + } + u1_sub_mb_num = 8; /* move to second half of Cur MB */ + u2_sub_mb_ofst += increment;/* move to next Colocated MB */ + } + ps_direct->i1_num_partitions = i1_num_partitions; + return; + } + else + { + UWORD8 u1_sub_mb_num_col; + u1_sub_mb_num_col = u1_sub_mb_num | (u1_sub_mb_num >> 1); + u1_sub_mb_num_col &= 0xb; + u2_sub_mb_ofst += (u1_sub_mb_num >> 3) ? increment : 0; + + ps_direct->i4_mv_indices[0] = u2_sub_mb_ofst + u1_sub_mb_num_col; + ps_direct->i1_submb_num[0] = u1_sub_mb_num; + ps_direct->i1_partitionsize[0] = PRED_8x8; + ps_direct->i1_num_partitions = 1; + return; + } +} +/*! + ************************************************************************** + * \if Function name : ih264d_fld_to_frm \endif + * + * \brief + * Initializes forward and backward refernce lists for B slice decoding. + * + * + * \return + * 0 on Success and Error code otherwise + ************************************************************************** + */ +void ih264d_fld_to_frm(dec_struct_t *ps_dec, + struct pic_buffer_t *ps_col_pic, + directmv_t *ps_direct, + UWORD8 u1_wd_x, + WORD32 u2_sub_mb_ofst, + dec_mb_info_t * ps_cur_mb_info) +{ + UWORD8 *pu1_col_zero_flag_start, *pu1_col_zero_flag, + *pu1_col_zero_flag_right_half, u1_sub_mb_num, uc_sub_mb_num_col; + UWORD8 u1_col_mb_pred_mode, uc_blk; + WORD32 i4_force_8X8; + + UNUSED(ps_cur_mb_info); + ps_direct->u1_vert_mv_scale = FLD_TO_FRM; + ps_direct->u1_col_zeroflag_change = 1; + /* new calculation specific to this function for u2_sub_mb_ofst*/ + u2_sub_mb_ofst = (ps_dec->u2_mbx + + ((ps_dec->u2_mby >> 1) * ps_dec->u2_frm_wd_in_mbs)) << 4; + u2_sub_mb_ofst += ((ps_dec->u2_mby & 1) << 3); + + if(u1_wd_x == MB_SIZE) + { + pu1_col_zero_flag_start = ps_col_pic->pu1_col_zero_flag + u2_sub_mb_ofst; + u1_col_mb_pred_mode = (*pu1_col_zero_flag_start >> 6); + ps_dec->u1_currB_type = (!!u1_col_mb_pred_mode); + + if(u1_col_mb_pred_mode & 0x2) + { + if(u1_col_mb_pred_mode == PRED_8x16) + { + ps_direct->i1_num_partitions = 2; + ps_direct->i4_mv_indices[0] = u2_sub_mb_ofst; + ps_direct->i1_submb_num[0] = 0; + ps_direct->i1_partitionsize[0] = PRED_8x16; + ps_direct->i4_mv_indices[1] = u2_sub_mb_ofst + 2; + ps_direct->i1_submb_num[1] = 2; + ps_direct->i1_partitionsize[1] = PRED_8x16; + } + else + { + UWORD8 i1_num_partitions = 0, partition_size; + UWORD32 mv_index; + UWORD8 u1_num_sub_blks, i, u1_col_sub_mb_pred_mode, + u1_col_sub_mb_pred_mode_rt; + + u1_sub_mb_num = ps_dec->u1_sub_mb_num; + + pu1_col_zero_flag = pu1_col_zero_flag_start + u1_sub_mb_num; + u1_col_sub_mb_pred_mode = (*pu1_col_zero_flag & 0x10);/* 8x4 or 4x4 mode */ + + pu1_col_zero_flag_right_half = pu1_col_zero_flag_start + u1_sub_mb_num + + 2; + u1_col_sub_mb_pred_mode_rt = (*pu1_col_zero_flag_right_half + & 0x10);/* 8x4 or 4x4 mode */ + + i4_force_8X8 = (u1_col_sub_mb_pred_mode) + || (u1_col_sub_mb_pred_mode_rt); + if(i4_force_8X8) + { + u1_num_sub_blks = 2; + partition_size = PRED_8x8; + } + else + { + partition_size = PRED_8x16; + u1_num_sub_blks = 1; + } + + for(i = 0; i < 2; i++) + { + for(uc_blk = 0; uc_blk < u1_num_sub_blks; uc_blk++) + { + uc_sub_mb_num_col = u1_sub_mb_num | (u1_sub_mb_num >> 1); + uc_sub_mb_num_col &= 0x7; + mv_index = u2_sub_mb_ofst + uc_sub_mb_num_col; + + ps_direct->i4_mv_indices[i1_num_partitions] = mv_index; + ps_direct->i1_submb_num[i1_num_partitions] = + u1_sub_mb_num; + ps_direct->i1_partitionsize[i1_num_partitions] = + partition_size; + i1_num_partitions++; + u1_sub_mb_num += 8; + } + + u1_sub_mb_num = 2; /* move to second half of Cur MB */ + + } + ps_direct->i1_num_partitions = i1_num_partitions; + return; + } + } + else + { + ps_direct->i1_num_partitions = 1; + ps_direct->i4_mv_indices[0] = u2_sub_mb_ofst; + ps_direct->i1_submb_num[0] = 0; + ps_direct->i1_partitionsize[0] = PRED_16x16; + return; + } + } + else + { + u1_sub_mb_num = ps_dec->u1_sub_mb_num; + uc_sub_mb_num_col = u1_sub_mb_num | (u1_sub_mb_num >> 1); + uc_sub_mb_num_col &= 0x7; + + ps_direct->i4_mv_indices[0] = u2_sub_mb_ofst + uc_sub_mb_num_col; + ps_direct->i1_submb_num[0] = u1_sub_mb_num; + ps_direct->i1_partitionsize[0] = PRED_8x8; + ps_direct->i1_num_partitions = 1; + } +} +/*! + ************************************************************************** + * \if Function name : ih264d_one_to_one \endif + * + * \brief + * Initializes forward and backward refernce lists for B slice decoding. + * + * + * \return + * 0 on Success and Error code otherwise + ************************************************************************** + */ +void ih264d_mbaff_to_fld(dec_struct_t *ps_dec, + struct pic_buffer_t *ps_col_pic, + directmv_t *ps_direct, + UWORD8 u1_wd_x, + WORD32 u2_sub_mb_ofst, + dec_mb_info_t * ps_cur_mb_info) +{ + UWORD8* pu1_col_zero_flag, u1_iscol_mb_fld; + u2_sub_mb_ofst <<= 1; + pu1_col_zero_flag = ps_col_pic->pu1_col_zero_flag + u2_sub_mb_ofst; + u1_iscol_mb_fld = (*pu1_col_zero_flag & 0x2) >> 1; + if(u1_iscol_mb_fld) + { + u2_sub_mb_ofst += (ps_dec->ps_cur_slice->u1_bottom_field_flag << 4); + ih264d_one_to_one(ps_dec, ps_col_pic, ps_direct, u1_wd_x, + u2_sub_mb_ofst, ps_cur_mb_info); + } + else + ih264d_frm_to_fld(ps_dec, ps_col_pic, ps_direct, u1_wd_x, + u2_sub_mb_ofst, ps_cur_mb_info); +} +/*! + ************************************************************************** + * \if Function name : ih264d_one_to_one \endif + * + * \brief + * Initializes forward and backward refernce lists for B slice decoding. + * + * + * \return + * 0 on Success and Error code otherwise + ************************************************************************** + */ +void ih264d_fld_to_mbaff(dec_struct_t *ps_dec, + struct pic_buffer_t *ps_col_pic, + directmv_t *ps_direct, + UWORD8 u1_wd_x, + WORD32 u2_sub_mb_ofst, + dec_mb_info_t * ps_cur_mb_info) +{ + if((ps_col_pic->u1_picturetype & 0x7) == COMP_FLD_PAIR) + { + /* first calculate the colocated picture which varies with Mb */ + UWORD8 u1_is_cur_mb_fld; + u1_is_cur_mb_fld = ps_cur_mb_info->u1_mb_field_decodingflag; + u2_sub_mb_ofst = (u2_sub_mb_ofst & 0xffe0); /* mbaddrCol5 = curmbaddr/2;*/ + u2_sub_mb_ofst >>= 1; + + ps_col_pic = ps_dec->ps_ref_pic_buf_lx[1][0]; + if(u1_is_cur_mb_fld) + { + if(1 - ps_cur_mb_info->u1_topmb) + ps_col_pic = ps_dec->ps_ref_pic_buf_lx[1][MAX_REF_BUFS]; + + ih264d_one_to_one(ps_dec, ps_col_pic, ps_direct, u1_wd_x, + u2_sub_mb_ofst, ps_cur_mb_info); + } + else + { + + if(ABS(ps_col_pic->i4_top_field_order_cnt + - ps_dec->ps_cur_pic->i4_poc) >= + ABS(ps_dec->ps_cur_pic->i4_poc - ps_col_pic->i4_bottom_field_order_cnt)) + { + ps_col_pic = ps_dec->ps_ref_pic_buf_lx[1][MAX_REF_BUFS]; + } + + if(ps_cur_mb_info->u1_topmb == 0) + u2_sub_mb_ofst += 8; + ih264d_mbaff_cross_pmbair(ps_dec, ps_col_pic, ps_direct, u1_wd_x, + u2_sub_mb_ofst, ps_cur_mb_info); + } + ps_dec->ps_col_pic = ps_col_pic; + } + else + { + UWORD8* pu1_col_zero_flag = ps_col_pic->pu1_col_zero_flag + + u2_sub_mb_ofst; + UWORD8 temp, u1_is_cur_mb_fld, u1_iscol_mb_fld; + + u1_iscol_mb_fld = (*pu1_col_zero_flag & 0x2) >> 1; + u1_is_cur_mb_fld = ps_cur_mb_info->u1_mb_field_decodingflag; + temp = (u1_iscol_mb_fld ^ u1_is_cur_mb_fld); + + if(temp == 0) + ih264d_one_to_one(ps_dec, ps_col_pic, ps_direct, u1_wd_x, + u2_sub_mb_ofst, ps_cur_mb_info); + else + { + u2_sub_mb_ofst &= 0xffef; + if(u1_is_cur_mb_fld == 0) + { + if(ABS(ps_col_pic->i4_top_field_order_cnt + - ps_dec->ps_cur_pic->i4_poc) >= + ABS(ps_dec->ps_cur_pic->i4_poc - ps_col_pic->i4_bottom_field_order_cnt)) + { + u2_sub_mb_ofst += 0x10; + } + if(ps_cur_mb_info->u1_topmb == 0) + u2_sub_mb_ofst += 8; + } + ih264d_mbaff_cross_pmbair(ps_dec, ps_col_pic, ps_direct, u1_wd_x, + u2_sub_mb_ofst, ps_cur_mb_info); + } + } +} + diff --git a/decoder/ih264d_process_bslice.h b/decoder/ih264d_process_bslice.h new file mode 100755 index 0000000..5aa76e3 --- /dev/null +++ b/decoder/ih264d_process_bslice.h @@ -0,0 +1,108 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +#ifndef _IH264D_PARSE_BSLICE_H_ +#define _IH264D_PARSE_BSLICE_H_ +/*! +************************************************************************** +* \file ih264d_process_bslice.h +* +* \brief +* Contains declarations of routines that decode a B slice type +* +* Detailed_description +* +* \date +* 21/12/2002 +* +* \author NS +************************************************************************** +*/ +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264d_structs.h" +WORD32 ih264d_parse_bslice(dec_struct_t * ps_dec, + UWORD16 u2_first_mb_in_slice); +WORD32 ih264d_decode_spatial_direct(dec_struct_t * ps_dec, + UWORD8 u1_wd_x, + dec_mb_info_t * ps_cur_mb_info, + UWORD8 u1_mb_num); +WORD32 ih264d_decode_temporal_direct(dec_struct_t * ps_dec, + UWORD8 u1_wd_x, + dec_mb_info_t * ps_cur_mb_info, + UWORD8 u1_mb_num); +WORD32 parseBSliceData(dec_struct_t * ps_dec, + dec_slice_params_t * ps_slice, + UWORD16 u2_first_mb_in_slice); +WORD32 parseBSliceData(dec_struct_t * ps_dec, + dec_slice_params_t * ps_slice, + UWORD16 u2_first_mb_in_slice); + +void ih264d_init_ref_idx_lx_b(dec_struct_t *ps_dec); + +void ih264d_convert_frm_to_fld_list(struct pic_buffer_t *ps_ref_pic_buf_lx, + UWORD8 *pu1_L0, + dec_struct_t *ps_dec, + UWORD8 u1_num_short_term_bufs); + +void ih264d_convert_frm_mbaff_list(dec_struct_t *ps_dec); +void ih264d_one_to_one(dec_struct_t *ps_dec, + struct pic_buffer_t *ps_col_pic, + directmv_t *ps_direct, + UWORD8 u1_wd_x, + WORD32 u2_sub_mb_ofst, + dec_mb_info_t * ps_cur_mb_info); +void ih264d_mbaff_cross_pmbair(dec_struct_t *ps_dec, + struct pic_buffer_t *ps_col_pic, + directmv_t *ps_direct, + UWORD8 u1_wd_x, + WORD32 u2_sub_mb_ofst, + dec_mb_info_t * ps_cur_mb_info); +void ih264d_frm_to_fld(dec_struct_t *ps_dec, + struct pic_buffer_t *ps_col_pic, + directmv_t *ps_direct, + UWORD8 u1_wd_x, + WORD32 u2_sub_mb_ofst, + dec_mb_info_t * ps_cur_mb_info); +void ih264d_fld_to_frm(dec_struct_t *ps_dec, + struct pic_buffer_t *ps_col_pic, + directmv_t *ps_direct, + UWORD8 u1_wd_x, + WORD32 u2_sub_mb_ofst, + dec_mb_info_t * ps_cur_mb_info); +void ih264d_mbaff_to_fld(dec_struct_t *ps_dec, + struct pic_buffer_t *ps_col_pic, + directmv_t *ps_direct, + UWORD8 u1_wd_x, + WORD32 u2_sub_mb_ofst, + dec_mb_info_t * ps_cur_mb_info); +void ih264d_fld_to_mbaff(dec_struct_t *ps_dec, + struct pic_buffer_t *ps_col_pic, + directmv_t *ps_direct, + UWORD8 u1_wd_x, + WORD32 u2_sub_mb_ofst, + dec_mb_info_t * ps_cur_mb_info); +WORD32 ih264d_cal_col_pic(dec_struct_t *ps_dec); + +WORD32 ih264d_mv_pred_ref_tfr_nby2_bmb(dec_struct_t * ps_dec, + UWORD8 u1_num_mbs, + UWORD8 u1_num_mbsNby2); + +#endif /* _IH264D_PARSE_BSLICE_H_ */ diff --git a/decoder/ih264d_process_intra_mb.c b/decoder/ih264d_process_intra_mb.c new file mode 100755 index 0000000..96006ce --- /dev/null +++ b/decoder/ih264d_process_intra_mb.c @@ -0,0 +1,2006 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*! + ************************************************************************** + * \file ih264d_process_intra_mb.c + * + * \brief + * Contains routines that decode a I slice type + * + * Detailed_description + * + * \date + * 07/07/2003 + * + * \author NS + ************************************************************************** + */ + +#include <string.h> +#include "ih264d_bitstrm.h" +#include "ih264d_defs.h" +#include "ih264d_debug.h" +#include "ih264d_tables.h" +#include "ih264d_structs.h" +#include "ih264d_defs.h" +#include "ih264d_parse_cavlc.h" +#include "ih264d_mb_utils.h" +#include "ih264d_parse_slice.h" +#include "ih264d_process_intra_mb.h" +#include "ih264d_error_handler.h" +#include "ih264d_quant_scaling.h" +#include "ih264d_tables.h" + +/*! + ************************************************************************** + * \if Function name : ih264d_itrans_recon_luma_dc \endif + * + * \brief + * This function does InvTransform, scaling and reconstruction of Luma DC. + * + * \return + * 0 on Success and Error code otherwise + ************************************************************************** + */ +void ih264d_itrans_recon_luma_dc(dec_struct_t *ps_dec, + WORD16* pi2_src, + WORD16* pi2_coeff_block, + const UWORD16 *pu2_weigh_mat) +{ + WORD32 i; + WORD16 pi2_out[16]; + WORD32 pi4_tmp[16]; + WORD16 *pi2_out_ptr = &pi2_out[0]; + PROFILE_DISABLE_IQ_IT_RECON_RETURN() + ps_dec->pf_ihadamard_scaling_4x4(pi2_src, pi2_out, + ps_dec->pu2_quant_scale_y, pu2_weigh_mat, + ps_dec->u1_qp_y_div6, pi4_tmp); + for(i = 0; i < 4; i++) + { + pi2_coeff_block[0] = pi2_out_ptr[0]; + pi2_coeff_block[4 * 16] = pi2_out_ptr[4]; + pi2_coeff_block[8 * 16] = pi2_out_ptr[8]; + pi2_coeff_block[12 * 16] = pi2_out_ptr[12]; + + pi2_out_ptr++; /* Point to next column */ + pi2_coeff_block += 16; + } +} +/*! + ************************************************************************** + * \if Function name : ih264d_read_intra_pred_modes \endif + * + * \brief + * Reads the intra pred mode related values of I4x4 MB from bitstream. + * + * This function will read the prev intra pred mode flags and + * stores it in pu1_prev_intra4x4_pred_mode_flag. If the u4_flag + * indicates that most probable mode is not intra pred mode, then + * the rem_intra4x4_pred_mode is read and stored in + * pu1_rem_intra4x4_pred_mode array. + * + * + * \return + * 0 on success and Error code otherwise + * + ************************************************************************** + */ +WORD32 ih264d_read_intra_pred_modes(dec_struct_t * ps_dec, + UWORD8 * pu1_prev_intra4x4_pred_mode_flag, + UWORD8 * pu1_rem_intra4x4_pred_mode, + UWORD32 u4_trans_form8x8) +{ + WORD32 i4x4_luma_blk_idx = 0, i8x8_luma_blk_idx = 0; + + dec_bit_stream_t * ps_bitstrm = ps_dec->ps_bitstrm; + + if(!u4_trans_form8x8) + { + for(i4x4_luma_blk_idx = 0; i4x4_luma_blk_idx < 16; ++i4x4_luma_blk_idx) + { + UWORD32 u4_temp; + SWITCHOFFTRACE; + + GETBIT(u4_temp, ps_bitstrm->u4_ofst, ps_bitstrm->pu4_buffer); + *pu1_prev_intra4x4_pred_mode_flag = (UWORD8)u4_temp; + if(!(*pu1_prev_intra4x4_pred_mode_flag)) + { + GETBITS(u4_temp, ps_bitstrm->u4_ofst, ps_bitstrm->pu4_buffer, 3); + + *(pu1_rem_intra4x4_pred_mode) = (UWORD8)u4_temp; + } + + pu1_prev_intra4x4_pred_mode_flag++; + pu1_rem_intra4x4_pred_mode++; + } + } + else + { + /**********************************************************************/ + /* prev_intra4x4_pred_modes to be interpreted as */ + /* prev_intra8x8_pred_modes in case of transform 8x8 */ + /**********************************************************************/ + for(i8x8_luma_blk_idx = 0; i8x8_luma_blk_idx < 4; i8x8_luma_blk_idx++) + { + UWORD32 u4_temp; + GETBIT(u4_temp, ps_bitstrm->u4_ofst, ps_bitstrm->pu4_buffer); + *pu1_prev_intra4x4_pred_mode_flag = (UWORD8)u4_temp; + if(!(*pu1_prev_intra4x4_pred_mode_flag)) + { + GETBITS(u4_temp, ps_bitstrm->u4_ofst, ps_bitstrm->pu4_buffer, 3); + + (*pu1_rem_intra4x4_pred_mode) = (UWORD8)u4_temp; + } + pu1_prev_intra4x4_pred_mode_flag++; + pu1_rem_intra4x4_pred_mode++; + } + } + return (0); +} +WORD32 ih264d_unpack_coeff4x4_4x4blk(dec_struct_t * ps_dec, + WORD16 *pi2_out_coeff_data, + UWORD8 *pu1_inv_scan) +{ + tu_sblk4x4_coeff_data_t *ps_tu_4x4 = (tu_sblk4x4_coeff_data_t *)ps_dec->pv_proc_tu_coeff_data; + UWORD16 u2_sig_coeff_map = ps_tu_4x4->u2_sig_coeff_map; + WORD32 idx = 0; + WORD16 *pi2_coeff_data = &ps_tu_4x4->ai2_level[0]; + WORD32 dc_only_flag = 0; + WORD32 num_coeff = 0; + + PROFILE_DISABLE_UNPACK_LUMA() + while(u2_sig_coeff_map) + { + idx = CLZ(u2_sig_coeff_map); + + idx = 31 - idx; + RESET_BIT(u2_sig_coeff_map,idx); + + idx = pu1_inv_scan[idx]; + pi2_out_coeff_data[idx] = *pi2_coeff_data++; + num_coeff++; + } + + if((num_coeff == 1) && (idx == 0)) + { + dc_only_flag = 1; + } + + { + WORD32 offset; + offset = (UWORD8 *)pi2_coeff_data - (UWORD8 *)ps_tu_4x4; + offset = ALIGN4(offset); + ps_dec->pv_proc_tu_coeff_data = (void *)((UWORD8 *)ps_dec->pv_proc_tu_coeff_data + offset); + } + + return dc_only_flag; +} + +UWORD32 ih264d_unpack_coeff4x4_8x8blk(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info, + UWORD16 ui2_luma_csbp, + WORD16 *pi2_out_coeff_data) +{ + UWORD8 *pu1_inv_scan; + UWORD8 u1_mb_field_decoding_flag = ps_cur_mb_info->u1_mb_field_decodingflag; + UWORD8 u1_field_coding_flag = ps_cur_mb_info->ps_curmb->u1_mb_fld; + UWORD32 u4_luma_dc_only_csbp = 0; + WORD32 dc_only_flag = 0; + + PROFILE_DISABLE_UNPACK_LUMA() + if(u1_field_coding_flag || u1_mb_field_decoding_flag) + { + pu1_inv_scan = (UWORD8 *)gau1_ih264d_inv_scan_fld; + } + else + { + pu1_inv_scan = (UWORD8 *)gau1_ih264d_inv_scan; + } + + // sub 0 + if(ui2_luma_csbp & 0x1) + { + memset(pi2_out_coeff_data,0,16*sizeof(WORD16)); + dc_only_flag = ih264d_unpack_coeff4x4_4x4blk(ps_dec, + pi2_out_coeff_data, + pu1_inv_scan); + + INSERT_BIT(u4_luma_dc_only_csbp, 0, dc_only_flag); + } + + pi2_out_coeff_data += 16; + // sub 1 + if(ui2_luma_csbp & 0x2) + { + memset(pi2_out_coeff_data,0,16*sizeof(WORD16)); + dc_only_flag = ih264d_unpack_coeff4x4_4x4blk(ps_dec, + pi2_out_coeff_data, + pu1_inv_scan); + INSERT_BIT(u4_luma_dc_only_csbp, 1, dc_only_flag); + } + + pi2_out_coeff_data += 16 + 32; + // sub 2 + if(ui2_luma_csbp & 0x10) + { + memset(pi2_out_coeff_data,0,16*sizeof(WORD16)); + dc_only_flag = ih264d_unpack_coeff4x4_4x4blk(ps_dec, + pi2_out_coeff_data, + pu1_inv_scan); + INSERT_BIT(u4_luma_dc_only_csbp, 4, dc_only_flag); + } + + pi2_out_coeff_data += 16; + // sub 3 + if(ui2_luma_csbp & 0x20) + { + memset(pi2_out_coeff_data,0,16*sizeof(WORD16)); + dc_only_flag = ih264d_unpack_coeff4x4_4x4blk(ps_dec, + pi2_out_coeff_data, + pu1_inv_scan); + INSERT_BIT(u4_luma_dc_only_csbp, 5, dc_only_flag); + } + return u4_luma_dc_only_csbp; +} +WORD32 ih264d_unpack_coeff8x8_8x8blk_cavlc(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info, + UWORD16 ui2_luma_csbp, + WORD16 *pi2_out_coeff_data) +{ + UWORD8 *pu1_inv_scan; + UWORD8 u1_mb_field_decoding_flag = ps_cur_mb_info->u1_mb_field_decodingflag; + UWORD8 u1_field_coding_flag = ps_cur_mb_info->ps_curmb->u1_mb_fld; + WORD32 dc_only_flag = 0; + + PROFILE_DISABLE_UNPACK_LUMA() + if(ui2_luma_csbp & 0x33) + { + memset(pi2_out_coeff_data,0,64*sizeof(WORD16)); + } + + if(!u1_mb_field_decoding_flag) + { + pu1_inv_scan = + (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[0]; + } + else + { + pu1_inv_scan = + (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[0]; + } + // sub 0 + if(ui2_luma_csbp & 0x1) + { + dc_only_flag = ih264d_unpack_coeff4x4_4x4blk(ps_dec, + pi2_out_coeff_data, + pu1_inv_scan); + } + + if(!u1_mb_field_decoding_flag) + { + pu1_inv_scan = + (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[1]; + } + else + { + pu1_inv_scan = + (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[1]; + } + // sub 1 + if(ui2_luma_csbp & 0x2) + { + dc_only_flag = 0; + ih264d_unpack_coeff4x4_4x4blk(ps_dec, + pi2_out_coeff_data, + pu1_inv_scan); + } + + if(!u1_mb_field_decoding_flag) + { + pu1_inv_scan = + (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[2]; + } + else + { + pu1_inv_scan = + (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[2]; + } + // sub 2 + if(ui2_luma_csbp & 0x10) + { + dc_only_flag = 0; + ih264d_unpack_coeff4x4_4x4blk(ps_dec, + pi2_out_coeff_data, + pu1_inv_scan); + } + + if(!u1_mb_field_decoding_flag) + { + pu1_inv_scan = + (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[3]; + } + else + { + pu1_inv_scan = + (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[3]; + } + // sub 3 + if(ui2_luma_csbp & 0x20) + { + dc_only_flag = 0; + ih264d_unpack_coeff4x4_4x4blk(ps_dec, + pi2_out_coeff_data, + pu1_inv_scan); + } + return dc_only_flag; +} +void ih264d_unpack_coeff4x4_8x8blk_chroma(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info, + UWORD16 ui2_chroma_csbp, + WORD16 *pi2_out_coeff_data) +{ + UWORD8 *pu1_inv_scan; + UWORD8 u1_mb_field_decoding_flag = ps_cur_mb_info->u1_mb_field_decodingflag; + UWORD8 u1_field_coding_flag = ps_cur_mb_info->ps_curmb->u1_mb_fld; + + PROFILE_DISABLE_UNPACK_CHROMA() + if(u1_field_coding_flag || u1_mb_field_decoding_flag) + { + pu1_inv_scan = (UWORD8 *)gau1_ih264d_inv_scan_fld; + } + else + { + pu1_inv_scan = (UWORD8 *)gau1_ih264d_inv_scan; + } + + if(ui2_chroma_csbp & 0x1) + { + memset(pi2_out_coeff_data,0,16*sizeof(WORD16)); + ih264d_unpack_coeff4x4_4x4blk(ps_dec, + pi2_out_coeff_data, + pu1_inv_scan); + } + pi2_out_coeff_data += 16; + if(ui2_chroma_csbp & 0x2) + { + memset(pi2_out_coeff_data,0,16*sizeof(WORD16)); + ih264d_unpack_coeff4x4_4x4blk(ps_dec, + pi2_out_coeff_data, + pu1_inv_scan); + } + + pi2_out_coeff_data += 16; + if(ui2_chroma_csbp & 0x4) + { + memset(pi2_out_coeff_data,0,16*sizeof(WORD16)); + ih264d_unpack_coeff4x4_4x4blk(ps_dec, + pi2_out_coeff_data, + pu1_inv_scan); + } + + pi2_out_coeff_data += 16; + if(ui2_chroma_csbp & 0x8) + { + memset(pi2_out_coeff_data,0,16*sizeof(WORD16)); + ih264d_unpack_coeff4x4_4x4blk(ps_dec, + pi2_out_coeff_data, + pu1_inv_scan); + } +} +UWORD32 ih264d_unpack_luma_coeff4x4_mb(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info, + UWORD8 intra_flag) +{ + UWORD8 u1_mb_type = ps_cur_mb_info->u1_mb_type; + UWORD16 ui2_luma_csbp = ps_cur_mb_info->u2_luma_csbp; + UWORD8 *pu1_inv_scan = ps_dec->pu1_inv_scan; + WORD16 *pi2_coeff_data = ps_dec->pi2_coeff_data; + + PROFILE_DISABLE_UNPACK_LUMA() + if(!ps_cur_mb_info->u1_tran_form8x8) + { + UWORD32 u4_luma_dc_only_csbp = 0; + UWORD32 u4_temp = 0; + WORD16* pi2_dc_val = NULL; + /* + * Reserve the pointer to dc vals. The dc vals will be copied + * after unpacking of ac vals since memset to 0 inside. + */ + if(intra_flag && (u1_mb_type != I_4x4_MB)) + { + if(CHECKBIT(ps_cur_mb_info->u1_yuv_dc_block_flag,0)) + { + pi2_dc_val = (WORD16 *)ps_dec->pv_proc_tu_coeff_data; + + ps_dec->pv_proc_tu_coeff_data = (void *)(pi2_dc_val + 16); + } + } + + if(ui2_luma_csbp) + { + pi2_coeff_data = ps_dec->pi2_coeff_data; + u4_temp = ih264d_unpack_coeff4x4_8x8blk(ps_dec, + ps_cur_mb_info, + ui2_luma_csbp, + pi2_coeff_data); + u4_luma_dc_only_csbp = u4_temp; + + pi2_coeff_data += 32; + + ui2_luma_csbp = ui2_luma_csbp >> 2; + u4_temp = ih264d_unpack_coeff4x4_8x8blk(ps_dec, + ps_cur_mb_info, + ui2_luma_csbp, + pi2_coeff_data); + + u4_luma_dc_only_csbp |= (u4_temp << 2); + + pi2_coeff_data += 32 + 64; + + ui2_luma_csbp = ui2_luma_csbp >> 6; + u4_temp = ih264d_unpack_coeff4x4_8x8blk(ps_dec, + ps_cur_mb_info, + ui2_luma_csbp, + pi2_coeff_data); + + u4_luma_dc_only_csbp |= (u4_temp << 8); + + pi2_coeff_data += 32; + + ui2_luma_csbp = ui2_luma_csbp >> 2; + u4_temp = ih264d_unpack_coeff4x4_8x8blk(ps_dec, + ps_cur_mb_info, + ui2_luma_csbp, + pi2_coeff_data); + u4_luma_dc_only_csbp |= (u4_temp << 10); + } + + if(pi2_dc_val != NULL) + { + WORD32 i; + pi2_coeff_data = ps_dec->pi2_coeff_data; + for(i = 0; i < 4; i++) + { + pi2_coeff_data[0] = pi2_dc_val[0]; + pi2_coeff_data[4 * 16] = pi2_dc_val[4]; + pi2_coeff_data[8 * 16] = pi2_dc_val[8]; + pi2_coeff_data[12 * 16] = pi2_dc_val[12]; + + pi2_dc_val++; /* Point to next column */ + pi2_coeff_data += 16; + } + u4_luma_dc_only_csbp = ps_cur_mb_info->u2_luma_csbp ^ 0xFFFF; + } + return u4_luma_dc_only_csbp; + } + else + { + UWORD32 u4_luma_dc_only_cbp = 0; + WORD32 dc_only_flag; + if(ui2_luma_csbp) + { + pi2_coeff_data = ps_dec->pi2_coeff_data; + dc_only_flag = ih264d_unpack_coeff8x8_8x8blk_cavlc(ps_dec, + ps_cur_mb_info, + ui2_luma_csbp, + pi2_coeff_data); + INSERT_BIT(u4_luma_dc_only_cbp, 0, dc_only_flag); + + pi2_coeff_data += 64; + + ui2_luma_csbp = ui2_luma_csbp >> 2; + dc_only_flag = ih264d_unpack_coeff8x8_8x8blk_cavlc(ps_dec, + ps_cur_mb_info, + ui2_luma_csbp, + pi2_coeff_data); + + INSERT_BIT(u4_luma_dc_only_cbp, 1, dc_only_flag); + + pi2_coeff_data += 64; + + ui2_luma_csbp = ui2_luma_csbp >> 6; + dc_only_flag = ih264d_unpack_coeff8x8_8x8blk_cavlc(ps_dec, + ps_cur_mb_info, + ui2_luma_csbp, + pi2_coeff_data); + + INSERT_BIT(u4_luma_dc_only_cbp, 2, dc_only_flag); + + pi2_coeff_data += 64; + ui2_luma_csbp = ui2_luma_csbp >> 2; + dc_only_flag = ih264d_unpack_coeff8x8_8x8blk_cavlc(ps_dec, + ps_cur_mb_info, + ui2_luma_csbp, + pi2_coeff_data); + INSERT_BIT(u4_luma_dc_only_cbp, 3, dc_only_flag); + } + return u4_luma_dc_only_cbp; + } + +} + +void ih264d_unpack_chroma_coeff4x4_mb(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info) +{ + UWORD8 u1_mb_type = ps_cur_mb_info->u1_mb_type; + UWORD16 ui2_chroma_csbp = ps_cur_mb_info->u2_chroma_csbp; + UWORD8 *pu1_inv_scan = ps_dec->pu1_inv_scan; + WORD16 *pi2_coeff_data = ps_dec->pi2_coeff_data; + WORD32 i; + WORD16 *pi2_dc_val_u = NULL; + WORD16 *pi2_dc_val_v = NULL; + + PROFILE_DISABLE_UNPACK_CHROMA() + if((ps_cur_mb_info->u1_cbp >> 4) == CBPC_ALLZERO) + return; + + /* + * Reserve the pointers to dc vals. The dc vals will be copied + * after unpacking of ac vals since memset to 0 inside. + */ + if(CHECKBIT(ps_cur_mb_info->u1_yuv_dc_block_flag,1)) + { + pi2_dc_val_u = (WORD16 *)ps_dec->pv_proc_tu_coeff_data; + + ps_dec->pv_proc_tu_coeff_data = (void *)(pi2_dc_val_u + 4); + } + if(CHECKBIT(ps_cur_mb_info->u1_yuv_dc_block_flag,2)) + { + pi2_dc_val_v = (WORD16 *)ps_dec->pv_proc_tu_coeff_data; + + ps_dec->pv_proc_tu_coeff_data = (void *)(pi2_dc_val_v + 4); + } + + if((ps_cur_mb_info->u1_cbp >> 4) == CBPC_NONZERO) + { + pi2_coeff_data = ps_dec->pi2_coeff_data; + ih264d_unpack_coeff4x4_8x8blk_chroma(ps_dec, + ps_cur_mb_info, + ui2_chroma_csbp, + pi2_coeff_data); + + pi2_coeff_data += 64; + ui2_chroma_csbp = ui2_chroma_csbp >> 4; + ih264d_unpack_coeff4x4_8x8blk_chroma(ps_dec, + ps_cur_mb_info, + ui2_chroma_csbp, + pi2_coeff_data); + + } + + pi2_coeff_data = ps_dec->pi2_coeff_data; + if(pi2_dc_val_u != NULL) + { + pi2_coeff_data[0] = *pi2_dc_val_u++; + pi2_coeff_data[1 * 16] = *pi2_dc_val_u++; + pi2_coeff_data[2 * 16] = *pi2_dc_val_u++; + pi2_coeff_data[3 * 16] = *pi2_dc_val_u++; + } + else + { + pi2_coeff_data[0] = 0; + pi2_coeff_data[1 * 16] = 0; + pi2_coeff_data[2 * 16] = 0; + pi2_coeff_data[3 * 16] = 0; + } + pi2_coeff_data += 64; + if(pi2_dc_val_v != NULL) + { + pi2_coeff_data[0] = *pi2_dc_val_v++; + pi2_coeff_data[1 * 16] = *pi2_dc_val_v++; + pi2_coeff_data[2 * 16] = *pi2_dc_val_v++; + pi2_coeff_data[3 * 16] = *pi2_dc_val_v++; + } + else + { + pi2_coeff_data[0] = 0; + pi2_coeff_data[1 * 16] = 0; + pi2_coeff_data[2 * 16] = 0; + pi2_coeff_data[3 * 16] = 0; + } +} +UWORD32 ih264d_unpack_luma_coeff8x8_mb(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info) +{ + WORD32 blk_8x8_cnt; + WORD16 *pi2_out_coeff_data = ps_dec->pi2_coeff_data; + UWORD8 u1_field_coding_flag = ps_cur_mb_info->ps_curmb->u1_mb_fld; + UWORD8 *pu1_inv_scan; + UWORD32 u4_luma_dc_only_cbp = 0; + + PROFILE_DISABLE_UNPACK_LUMA() + if(!u1_field_coding_flag) + { + /*******************************************************************/ + /* initializing inverse scan matrices */ + /*******************************************************************/ + pu1_inv_scan = (UWORD8 *)gau1_ih264d_inv_scan_prog8x8_cabac; + } + else + { + /*******************************************************************/ + /* initializing inverse scan matrices */ + /*******************************************************************/ + pu1_inv_scan = (UWORD8 *)gau1_ih264d_inv_scan_int8x8_cabac; + } + + for(blk_8x8_cnt = 0; blk_8x8_cnt < 4; blk_8x8_cnt++) + { + if(CHECKBIT(ps_cur_mb_info->u1_cbp, blk_8x8_cnt)) + { + tu_blk8x8_coeff_data_t *ps_tu_8x8 = (tu_blk8x8_coeff_data_t *)ps_dec->pv_proc_tu_coeff_data; + UWORD32 u4_sig_coeff_map; + WORD32 idx = 0; + WORD16 *pi2_coeff_data = &ps_tu_8x8->ai2_level[0]; + WORD32 num_coeff = 0; + + /* memset 64 coefficient to zero */ + memset(pi2_out_coeff_data,0,64*sizeof(WORD16)); + + u4_sig_coeff_map = ps_tu_8x8->au4_sig_coeff_map[1]; + + while(u4_sig_coeff_map) + { + idx = CLZ(u4_sig_coeff_map); + + idx = 31 - idx; + RESET_BIT(u4_sig_coeff_map,idx); + + idx = pu1_inv_scan[idx + 32]; + pi2_out_coeff_data[idx] = *pi2_coeff_data++; + num_coeff++; + } + + u4_sig_coeff_map = ps_tu_8x8->au4_sig_coeff_map[0]; + while(u4_sig_coeff_map) + { + idx = CLZ(u4_sig_coeff_map); + + idx = 31 - idx; + RESET_BIT(u4_sig_coeff_map,idx); + + idx = pu1_inv_scan[idx]; + pi2_out_coeff_data[idx] = *pi2_coeff_data++; + num_coeff++; + } + + if((num_coeff == 1) && (idx == 0)) + { + SET_BIT(u4_luma_dc_only_cbp,blk_8x8_cnt); + } + + + { + WORD32 offset; + offset = (UWORD8 *)pi2_coeff_data - (UWORD8 *)ps_tu_8x8; + offset = ALIGN4(offset); + ps_dec->pv_proc_tu_coeff_data = (void *)((UWORD8 *)ps_dec->pv_proc_tu_coeff_data + offset); + } + } + pi2_out_coeff_data += 64; + } + + return u4_luma_dc_only_cbp; +} +/*! + ************************************************************************** + * \if Function name : ih264d_process_intra_mb \endif + * + * \brief + * This function decodes an I MB. Intraprediction is carried out followed + * by InvTramsform. Both IntraPrediction and Reconstrucion are carried out + * row buffer itself. + * + * + * \return + * 0 on Success and Error code otherwise + ************************************************************************** + */ +WORD32 ih264d_process_intra_mb(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info, + UWORD8 u1_mb_num) +{ + UWORD8 u1_mb_type = ps_cur_mb_info->u1_mb_type; + UWORD8 uc_temp = ps_cur_mb_info->u1_mb_ngbr_availablity; + UWORD8 u1_top_available = BOOLEAN(uc_temp & TOP_MB_AVAILABLE_MASK); + UWORD8 u1_left_available = BOOLEAN(uc_temp & LEFT_MB_AVAILABLE_MASK); + UWORD8 u1_use_top_right_mb = BOOLEAN(uc_temp & TOP_RIGHT_MB_AVAILABLE_MASK); + UWORD8 u1_use_top_left_mb = BOOLEAN(uc_temp & TOP_LEFT_MB_AVAILABLE_MASK); + UWORD8 uc_useTopMB = u1_top_available; + UWORD16 u2_use_left_mb = u1_left_available; + UWORD16 u2_use_left_mb_pack; + UWORD8 *pu1_luma_pred_buffer; + /* CHANGED CODE */ + UWORD8 *pu1_luma_rei1_buffer; + UWORD8 *puc_top; + + mb_neigbour_params_t *ps_left_mb; + mb_neigbour_params_t *ps_top_mb; + mb_neigbour_params_t *ps_top_right_mb; + mb_neigbour_params_t *ps_curmb; + + UWORD16 u2_mbx = ps_cur_mb_info->u2_mbx; + UWORD32 ui_pred_width, ui_rec_width; + WORD16 *pi2_y_coeff; + UWORD8 u1_mbaff, u1_topmb, u1_mb_field_decoding_flag; + UWORD32 u4_num_pmbair; + UWORD16 ui2_luma_csbp = ps_cur_mb_info->u2_luma_csbp; + UWORD8 *pu1_yleft, *pu1_ytop_left; + /* Chroma variables*/ + UWORD8 *pu1_top_u; + UWORD8 *pu1_uleft; + UWORD8 *pu1_u_top_left; + /* CHANGED CODE */ + UWORD8 *pu1_mb_cb_rei1_buffer, *pu1_mb_cr_rei1_buffer; + UWORD32 u4_recwidth_cr; + /* CHANGED CODE */ + tfr_ctxt_t *ps_frame_buf = &ps_dec->s_tran_addrecon; + UWORD32 u4_luma_dc_only_csbp = 0; + UWORD32 u4_luma_dc_only_cbp = 0; + + UWORD8 *pu1_prev_intra4x4_pred_mode_data = (UWORD8 *)ps_dec->pv_proc_tu_coeff_data; //Pointer to keep track of intra4x4_pred_mode data in pv_proc_tu_coeff_data buffer + u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag; + u1_topmb = ps_cur_mb_info->u1_topmb; + u4_num_pmbair = (u1_mb_num >> u1_mbaff); + + + /*--------------------------------------------------------------------*/ + /* Find the current MB's mb params */ + /*--------------------------------------------------------------------*/ + u1_mb_field_decoding_flag = ps_cur_mb_info->u1_mb_field_decodingflag; + + ps_curmb = ps_cur_mb_info->ps_curmb; + ps_top_mb = ps_cur_mb_info->ps_top_mb; + ps_left_mb = ps_cur_mb_info->ps_left_mb; + ps_top_right_mb = ps_cur_mb_info->ps_top_right_mb; + + /*--------------------------------------------------------------------*/ + /* Check whether neighbouring MB is Inter MB and */ + /* constrained intra pred is 1. */ + /*--------------------------------------------------------------------*/ + u2_use_left_mb_pack = (u2_use_left_mb << 8) + u2_use_left_mb; + + if(ps_dec->ps_cur_pps->u1_constrained_intra_pred_flag) + { + UWORD8 u1_left = (UWORD8)u2_use_left_mb; + + uc_useTopMB = uc_useTopMB + && ((ps_top_mb->u1_mb_type != P_MB) + && (ps_top_mb->u1_mb_type != B_MB)); + u2_use_left_mb = u2_use_left_mb + && ((ps_left_mb->u1_mb_type != P_MB) + && (ps_left_mb->u1_mb_type != B_MB)); + + u2_use_left_mb_pack = (u2_use_left_mb << 8) + u2_use_left_mb; + if(u1_mbaff) + { + if(u1_mb_field_decoding_flag ^ ps_left_mb->u1_mb_fld) + { + u1_left = u1_left + && (((ps_left_mb + 1)->u1_mb_type != P_MB) + && ((ps_left_mb + 1)->u1_mb_type + != B_MB)); + u2_use_left_mb = u2_use_left_mb && u1_left; + if(u1_mb_field_decoding_flag) + u2_use_left_mb_pack = (u1_left << 8) + + (u2_use_left_mb_pack & 0xff); + else + u2_use_left_mb_pack = (u2_use_left_mb << 8) + + (u2_use_left_mb); + } + } + u1_use_top_right_mb = + u1_use_top_right_mb + && ((ps_top_right_mb->u1_mb_type != P_MB) + && (ps_top_right_mb->u1_mb_type + != B_MB)); + + u1_use_top_left_mb = + u1_use_top_left_mb + && ((ps_cur_mb_info->u1_topleft_mbtype != P_MB) + && (ps_cur_mb_info->u1_topleft_mbtype + != B_MB)); + } + + /*********************Common pointer calculations *************************/ + /* CHANGED CODE */ + pu1_luma_pred_buffer = ps_dec->pu1_y; + pu1_luma_rei1_buffer = ps_frame_buf->pu1_dest_y + (u4_num_pmbair << 4); + pu1_mb_cb_rei1_buffer = ps_frame_buf->pu1_dest_u + + (u4_num_pmbair << 3) * YUV420SP_FACTOR; + pu1_mb_cr_rei1_buffer = ps_frame_buf->pu1_dest_v + (u4_num_pmbair << 3); + ui_pred_width = MB_SIZE; + ui_rec_width = ps_dec->u2_frm_wd_y << u1_mb_field_decoding_flag; + u4_recwidth_cr = ps_dec->u2_frm_wd_uv << u1_mb_field_decoding_flag; + /************* Current and top luma pointer *****************/ + + if(u1_mbaff) + { + if(u1_topmb == 0) + { + pu1_luma_rei1_buffer += ( + u1_mb_field_decoding_flag ? + (ui_rec_width >> 1) : + (ui_rec_width << 4)); + pu1_mb_cb_rei1_buffer += ( + u1_mb_field_decoding_flag ? + (u4_recwidth_cr >> 1) : + (u4_recwidth_cr << 3)); + pu1_mb_cr_rei1_buffer += ( + u1_mb_field_decoding_flag ? + (u4_recwidth_cr >> 1) : + (u4_recwidth_cr << 3)); + } + } + + /* CHANGED CODE */ + if(ps_dec->u4_use_intrapred_line_copy == 1) + { + puc_top = ps_dec->pu1_prev_y_intra_pred_line + (ps_cur_mb_info->u2_mbx << 4); + pu1_top_u = ps_dec->pu1_prev_u_intra_pred_line + + (ps_cur_mb_info->u2_mbx << 3) * YUV420SP_FACTOR; + } + else + { + puc_top = pu1_luma_rei1_buffer - ui_rec_width; + pu1_top_u = pu1_mb_cb_rei1_buffer - u4_recwidth_cr; + } + /* CHANGED CODE */ + + /************* Left pointer *****************/ + pu1_yleft = pu1_luma_rei1_buffer - 1; + pu1_uleft = pu1_mb_cb_rei1_buffer - 1 * YUV420SP_FACTOR; + + /**************Top Left pointer calculation**********/ + pu1_ytop_left = puc_top - 1; + pu1_u_top_left = pu1_top_u - 1 * YUV420SP_FACTOR; + + /* CHANGED CODE */ + PROFILE_DISABLE_INTRA_PRED() + { + pu1_prev_intra4x4_pred_mode_data = (UWORD8 *)ps_dec->pv_proc_tu_coeff_data; + if(u1_mb_type == I_4x4_MB && ps_cur_mb_info->u1_tran_form8x8 == 0) + { + ps_dec->pv_proc_tu_coeff_data = (void *)((UWORD8 *)ps_dec->pv_proc_tu_coeff_data + 32); + + } + else if (u1_mb_type == I_4x4_MB && ps_cur_mb_info->u1_tran_form8x8 == 1) + { + ps_dec->pv_proc_tu_coeff_data = (void *)((UWORD8 *)ps_dec->pv_proc_tu_coeff_data + 8); + } + } + if(!ps_cur_mb_info->u1_tran_form8x8) + { + u4_luma_dc_only_csbp = ih264d_unpack_luma_coeff4x4_mb(ps_dec, + ps_cur_mb_info, + 1); + } + else + { + if(!ps_dec->ps_cur_pps->u1_entropy_coding_mode) + { + u4_luma_dc_only_cbp = ih264d_unpack_luma_coeff4x4_mb(ps_dec, + ps_cur_mb_info, + 1); + } + else + { + u4_luma_dc_only_cbp = ih264d_unpack_luma_coeff8x8_mb(ps_dec, + ps_cur_mb_info); + } + } + + pi2_y_coeff = ps_dec->pi2_coeff_data; + + if(u1_mb_type != I_4x4_MB) + { + UWORD8 u1_intrapred_mode = MB_TYPE_TO_INTRA_16x16_MODE(u1_mb_type); + /*--------------------------------------------------------------------*/ + /* 16x16 IntraPrediction */ + /*--------------------------------------------------------------------*/ + { + UWORD8 u1_packed_modes = (u1_top_available << 1) + + u1_left_available; + UWORD8 u1_err_code = + (u1_intrapred_mode & 1) ? + u1_intrapred_mode : + (u1_intrapred_mode ^ 2); + + if((u1_err_code & u1_packed_modes) ^ u1_err_code) + { + return ERROR_INTRAPRED; + } + } + { + UWORD8 au1_ngbr_pels[33]; + /* Get neighbour pixels */ + /* left pels */ + if(u2_use_left_mb) + { + WORD32 i; + for(i = 0; i < 16; i++) + au1_ngbr_pels[16 - 1 - i] = pu1_yleft[i * ui_rec_width]; + } + else + { + memset(au1_ngbr_pels, 0, 16); + } + + /* top left pels */ + au1_ngbr_pels[16] = *pu1_ytop_left; + + /* top pels */ + if(uc_useTopMB) + { + memcpy(au1_ngbr_pels + 16 + 1, puc_top, 16); + } + else + { + memset(au1_ngbr_pels + 16 + 1, 0, 16); + } + PROFILE_DISABLE_INTRA_PRED() + ps_dec->apf_intra_pred_luma_16x16[u1_intrapred_mode]( + au1_ngbr_pels, pu1_luma_rei1_buffer, 1, ui_rec_width, + ((uc_useTopMB << 2) | u2_use_left_mb)); + } + { + UWORD32 i; + WORD16 ai2_tmp[16]; + for(i = 0; i < 16; i++) + { + WORD16 *pi2_level = pi2_y_coeff + (i << 4); + UWORD8 *pu1_pred_sblk = pu1_luma_rei1_buffer + + ((i & 0x3) * BLK_SIZE) + + (i >> 2) * (ui_rec_width << 2); + PROFILE_DISABLE_IQ_IT_RECON() + { + if(CHECKBIT(ps_cur_mb_info->u2_luma_csbp, i)) + { + ps_dec->pf_iquant_itrans_recon_luma_4x4( + pi2_level, + pu1_pred_sblk, + pu1_pred_sblk, + ui_rec_width, + ui_rec_width, + gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qp_rem6], + (UWORD16 *)ps_dec->s_high_profile.i2_scalinglist4x4[0], + ps_cur_mb_info->u1_qp_div6, ai2_tmp, 1, + pi2_level); + } + else if((CHECKBIT(u4_luma_dc_only_csbp, i)) && pi2_level[0] != 0) + { + ps_dec->pf_iquant_itrans_recon_luma_4x4_dc( + pi2_level, + pu1_pred_sblk, + pu1_pred_sblk, + ui_rec_width, + ui_rec_width, + gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qp_rem6], + (UWORD16 *)ps_dec->s_high_profile.i2_scalinglist4x4[0], + ps_cur_mb_info->u1_qp_div6, ai2_tmp, 1, + pi2_level); + } + } + } + } + } + else if(!ps_cur_mb_info->u1_tran_form8x8) + { + UWORD8 u1_is_left_sub_block, u1_is_top_sub_block = uc_useTopMB; + UWORD8 u1_sub_blk_x, u1_sub_blk_y, u1_sub_mb_num; + WORD8 i1_top_pred_mode; + WORD8 i1_left_pred_mode; + UWORD8 *pu1_top, *pu1_left, *pu1_top_left, *pu1_top_right; + WORD8 *pi1_cur_pred_mode, *pi1_left_pred_mode, *pc_topPredMode; + UWORD16 ui2_left_pred_buf_width = 0xffff; + WORD8 i1_intra_pred; + UWORD8 *pu1_prev_intra4x4_pred_mode_flag = pu1_prev_intra4x4_pred_mode_data; + UWORD8 *pu1_rem_intra4x4_pred_mode = pu1_prev_intra4x4_pred_mode_data + 16; + WORD16 *pi2_y_coeff1; + UWORD8 u1_cur_sub_block; + UWORD16 ui2_top_rt_mask; + + /*--------------------------------------------------------------------*/ + /* 4x4 IntraPrediction */ + /*--------------------------------------------------------------------*/ + /* Calculation of Top Right subblock mask */ + /* */ + /* (a) Set it to default mask */ + /* [It has 0 for sublocks which will never have top-right sub block] */ + /* */ + /* (b) If top MB is not available */ + /* Clear the bits of the first row sub blocks */ + /* */ + /* (c) Set/Clear bit for top-right sublock of MB */ + /* [5 sub-block in decoding order] based on TOP RIGHT MB availablity */ + /*--------------------------------------------------------------------*/ + + pu1_top = puc_top; + + ui2_top_rt_mask = (u1_use_top_right_mb << 3) | (0x5750); + if(uc_useTopMB) + ui2_top_rt_mask |= 0x7; + + /*Top Related initialisations*/ + + + pi1_cur_pred_mode = ps_cur_mb_info->ps_curmb->pi1_intrapredmodes; + pc_topPredMode = ps_cur_mb_info->ps_top_mb->pi1_intrapredmodes; + /*-------------------------------------- + if(u1_mbaff) + { + + pi1_cur_pred_mode += (u2_mbx << 2); + pc_topPredMode = pi1_cur_pred_mode + ps_cur_mb_info->i1_offset; + pi1_cur_pred_mode += (u1_topmb) ? 0: 4; + }*/ + + if(u1_top_available) + { + if(ps_top_mb->u1_mb_type == I_4x4_MB) + *(WORD32*)pi1_cur_pred_mode = *(WORD32*)pc_topPredMode; + else + *(WORD32*)pi1_cur_pred_mode = + (uc_useTopMB) ? DC_DC_DC_DC : NOT_VALID; + } + else + *(WORD32*)pi1_cur_pred_mode = NOT_VALID; + /* CHANGED CODE */ + + /* CHANGED CODE */ + + /*Left Related initialisations*/ + pi1_left_pred_mode = ps_dec->pi1_left_pred_mode; + if(!u1_mbaff) + { + + if(u1_left_available) + { + + if(ps_left_mb->u1_mb_type != I_4x4_MB) + *(WORD32*)pi1_left_pred_mode = + (u2_use_left_mb_pack) ? + DC_DC_DC_DC : + NOT_VALID; + + } + else + { + + *(WORD32*)pi1_left_pred_mode = NOT_VALID; + } + + } + else + { + UWORD8 u1_curMbfld = ps_cur_mb_info->u1_mb_field_decodingflag; + UWORD8 u1_leftMbfld = ps_left_mb->u1_mb_fld; + + if(u1_curMbfld ^ u1_leftMbfld) + { + + if(u1_topmb + | ((u1_topmb == 0) + && ((ps_curmb - 1)->u1_mb_type + != I_4x4_MB))) + { + if(u1_left_available) + { + if(ps_left_mb->u1_mb_type != I_4x4_MB) + { + if(CHECKBIT(u2_use_left_mb_pack,0) == 0) + *(WORD32*)pi1_left_pred_mode = NOT_VALID; + else + *(WORD32*)pi1_left_pred_mode = DC_DC_DC_DC; + } + } + else + *(WORD32*)pi1_left_pred_mode = NOT_VALID; + + if(u1_curMbfld) + { + if(u1_left_available) + { + if((ps_left_mb + 1)->u1_mb_type != I_4x4_MB) + { + if(u2_use_left_mb_pack >> 8) + *(WORD32*)(pi1_left_pred_mode + 4) = + DC_DC_DC_DC; + else + *(WORD32*)(pi1_left_pred_mode + 4) = + NOT_VALID; + } + } + else + *(WORD32*)(pi1_left_pred_mode + 4) = NOT_VALID; + pi1_left_pred_mode[1] = pi1_left_pred_mode[2]; + pi1_left_pred_mode[2] = pi1_left_pred_mode[4]; + pi1_left_pred_mode[3] = pi1_left_pred_mode[6]; + *(WORD32*)(pi1_left_pred_mode + 4) = + *(WORD32*)pi1_left_pred_mode; + } + else + { + + pi1_left_pred_mode[7] = pi1_left_pred_mode[3]; + pi1_left_pred_mode[6] = pi1_left_pred_mode[3]; + pi1_left_pred_mode[5] = pi1_left_pred_mode[2]; + pi1_left_pred_mode[4] = pi1_left_pred_mode[2]; + pi1_left_pred_mode[3] = pi1_left_pred_mode[1]; + pi1_left_pred_mode[2] = pi1_left_pred_mode[1]; + pi1_left_pred_mode[1] = pi1_left_pred_mode[0]; + } + } + pi1_left_pred_mode += (u1_topmb) ? 0 : 4; + } + else + { + + pi1_left_pred_mode += (u1_topmb) ? 0 : 4; + if(u1_left_available) + { + + if(ps_left_mb->u1_mb_type != I_4x4_MB) + *(WORD32*)pi1_left_pred_mode = + (u2_use_left_mb_pack) ? + DC_DC_DC_DC : + NOT_VALID; + } + else + *(WORD32*)pi1_left_pred_mode = NOT_VALID; + } + } + /* One time pointer initialisations*/ + pi2_y_coeff1 = pi2_y_coeff; + pu1_top_left = pu1_ytop_left; + + /* Scan the sub-blocks in Raster Scan Order */ + for(u1_sub_mb_num = 0; u1_sub_mb_num < 16; u1_sub_mb_num++) + { + UWORD8 au1_ngbr_pels[13]; + + u1_sub_blk_x = u1_sub_mb_num & 0x3; + u1_sub_blk_y = u1_sub_mb_num >> 2; + i1_top_pred_mode = pi1_cur_pred_mode[u1_sub_blk_x]; + i1_left_pred_mode = pi1_left_pred_mode[u1_sub_blk_y]; + u1_use_top_right_mb = (!!CHECKBIT(ui2_top_rt_mask, u1_sub_mb_num)); + + /*********** left subblock availability**********/ + if(u1_sub_blk_x) + u1_is_left_sub_block = 1; + else + u1_is_left_sub_block = + (u1_sub_blk_y < 2) ? + (CHECKBIT(u2_use_left_mb_pack, + 0)) : + (u2_use_left_mb_pack >> 8); + + /* CHANGED CODE */ + if(u1_sub_blk_y) + u1_is_top_sub_block = 1; + + /* CHANGED CODE */ + /***************** Top *********************/ + if(ps_dec->u4_use_intrapred_line_copy == 1) + { + + if(u1_sub_blk_y) + pu1_top = pu1_luma_rei1_buffer - ui_rec_width; + else + pu1_top = puc_top + (u1_sub_blk_x << 2); + } + else + { + pu1_top = pu1_luma_rei1_buffer - ui_rec_width; + } + /***************** Top Right *********************/ + pu1_top_right = pu1_top + 4; + /***************** Top Left *********************/ + pu1_top_left = pu1_top - 1; + /***************** Left *********************/ + pu1_left = pu1_luma_rei1_buffer - 1; + /* CHANGED CODE */ + + /*---------------------------------------------------------------*/ + /* Calculation of Intra prediction mode */ + /*---------------------------------------------------------------*/ + i1_intra_pred = ((i1_left_pred_mode < 0) | (i1_top_pred_mode < 0)) ? + DC : MIN(i1_left_pred_mode, i1_top_pred_mode); + { + UWORD8 u1_packed_modes = (u1_is_top_sub_block << 1) + + u1_is_left_sub_block; + UWORD8 *pu1_intra_err_codes = + (UWORD8 *)gau1_ih264d_intra_pred_err_code; + UWORD8 uc_b2b0 = ((u1_sub_mb_num & 4) >> 1) | (u1_sub_mb_num & 1); + UWORD8 uc_b3b1 = ((u1_sub_mb_num & 8) >> 2) + | ((u1_sub_mb_num & 2) >> 1); + + u1_cur_sub_block = (uc_b3b1 << 2) + uc_b2b0; + PROFILE_DISABLE_INTRA_PRED() + if(!pu1_prev_intra4x4_pred_mode_flag[u1_cur_sub_block]) + { + i1_intra_pred = + pu1_rem_intra4x4_pred_mode[u1_cur_sub_block] + + (pu1_rem_intra4x4_pred_mode[u1_cur_sub_block] + >= i1_intra_pred); + } + { + UWORD8 u1_err_code = pu1_intra_err_codes[i1_intra_pred]; + + /*if((u1_err_code & u1_packed_modes) ^ u1_err_code) + { + }*/ + + } + } + { + /* Get neighbour pixels */ + /* left pels */ + if(u1_is_left_sub_block) + { + WORD32 i; + for(i = 0; i < 4; i++) + au1_ngbr_pels[4 - 1 - i] = pu1_left[i * ui_rec_width]; + } + else + { + memset(au1_ngbr_pels, 0, 4); + } + + /* top left pels */ + au1_ngbr_pels[4] = *pu1_top_left; + + /* top pels */ + if(u1_is_top_sub_block) + { + memcpy(au1_ngbr_pels + 4 + 1, pu1_top, 4); + } + else + { + memset(au1_ngbr_pels + 4 + 1, 0, 4); + } + + /* top right pels */ + if(u1_use_top_right_mb) + { + memcpy(au1_ngbr_pels + 4 * 2 + 1, pu1_top_right, 4); + } + else if(u1_is_top_sub_block) + { + memset(au1_ngbr_pels + 4 * 2 + 1, au1_ngbr_pels[4 * 2], 4); + } + } + PROFILE_DISABLE_INTRA_PRED() + ps_dec->apf_intra_pred_luma_4x4[i1_intra_pred]( + au1_ngbr_pels, pu1_luma_rei1_buffer, 1, + ui_rec_width, + ((u1_is_top_sub_block << 2) | u1_is_left_sub_block)); + + /* CHANGED CODE */ + if(CHECKBIT(ui2_luma_csbp, u1_sub_mb_num)) + { + WORD16 ai2_tmp[16]; + PROFILE_DISABLE_IQ_IT_RECON() + { + if(CHECKBIT(u4_luma_dc_only_csbp, u1_sub_mb_num)) + { + ps_dec->pf_iquant_itrans_recon_luma_4x4_dc( + pi2_y_coeff1, + pu1_luma_rei1_buffer, + pu1_luma_rei1_buffer, + ui_rec_width, + ui_rec_width, + gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qp_rem6], + (UWORD16 *)ps_dec->s_high_profile.i2_scalinglist4x4[0], + ps_cur_mb_info->u1_qp_div6, ai2_tmp, 0, + NULL); + } + else + { + ps_dec->pf_iquant_itrans_recon_luma_4x4( + pi2_y_coeff1, + pu1_luma_rei1_buffer, + pu1_luma_rei1_buffer, + ui_rec_width, + ui_rec_width, + gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qp_rem6], + (UWORD16 *)ps_dec->s_high_profile.i2_scalinglist4x4[0], + ps_cur_mb_info->u1_qp_div6, ai2_tmp, 0, + NULL); + } + } + + } + + /*---------------------------------------------------------------*/ + /* Update sub block number */ + /*---------------------------------------------------------------*/ + pi2_y_coeff1 += 16; + pu1_luma_rei1_buffer += + (u1_sub_blk_x == 3) ? (ui_rec_width << 2) - 12 : 4; + pu1_luma_pred_buffer += + (u1_sub_blk_x == 3) ? (ui_pred_width << 2) - 12 : 4; + /* CHANGED CODE */ + pi1_cur_pred_mode[u1_sub_blk_x] = i1_intra_pred; + pi1_left_pred_mode[u1_sub_blk_y] = i1_intra_pred; + } + } + else if((u1_mb_type == I_4x4_MB) && (ps_cur_mb_info->u1_tran_form8x8 == 1)) + { + UWORD8 u1_is_left_sub_block, u1_is_top_sub_block = uc_useTopMB; + UWORD8 u1_sub_blk_x, u1_sub_blk_y, u1_sub_mb_num; + WORD8 i1_top_pred_mode; + WORD8 i1_left_pred_mode; + UWORD8 *pu1_top, *pu1_left, *pu1_top_left; + WORD8 *pi1_cur_pred_mode, *pi1_left_pred_mode, *pc_topPredMode; + UWORD16 ui2_left_pred_buf_width = 0xffff; + WORD8 i1_intra_pred; + UWORD8 *pu1_prev_intra4x4_pred_mode_flag = pu1_prev_intra4x4_pred_mode_data; + UWORD8 *pu1_rem_intra4x4_pred_mode = pu1_prev_intra4x4_pred_mode_data + 4; + WORD16 *pi2_y_coeff1; + UWORD16 ui2_top_rt_mask; + UWORD32 u4_4x4_left_offset = 0; + + /*--------------------------------------------------------------------*/ + /* 8x8 IntraPrediction */ + /*--------------------------------------------------------------------*/ + /* Calculation of Top Right subblock mask */ + /* */ + /* (a) Set it to default mask */ + /* [It has 0 for sublocks which will never have top-right sub block] */ + /* */ + /* (b) If top MB is not available */ + /* Clear the bits of the first row sub blocks */ + /* */ + /* (c) Set/Clear bit for top-right sublock of MB */ + /* [5 sub-block in decoding order] based on TOP RIGHT MB availablity */ + /* */ + /* ui2_top_rt_mask: marks availibility of top right(neighbour) */ + /* in the 8x8 Block ordering */ + /* */ + /* tr0 tr1 */ + /* 0 1 tr3 */ + /* 2 3 */ + /* */ + /* Top rights for 0 is in top MB */ + /* top right of 1 will be in top right MB */ + /* top right of 3 in right MB and hence not available */ + /* This corresponds to ui2_top_rt_mask having default value 0x4 */ + /*--------------------------------------------------------------------*/ + + ui2_top_rt_mask = (u1_use_top_right_mb << 1) | (0x4); + + if(uc_useTopMB) + { + ui2_top_rt_mask |= 0x1; + } + + /* Top Related initialisations */ + pi1_cur_pred_mode = ps_cur_mb_info->ps_curmb->pi1_intrapredmodes; + pc_topPredMode = ps_cur_mb_info->ps_top_mb->pi1_intrapredmodes; + /* + if(u1_mbaff) + { + pi1_cur_pred_mode += (u2_mbx << 2); + pc_topPredMode = pi1_cur_pred_mode + ps_cur_mb_info->i1_offset; + pi1_cur_pred_mode += (u1_topmb) ? 0: 4; + } + */ + if(u1_top_available) + { + if(ps_top_mb->u1_mb_type == I_4x4_MB) + { + *(WORD32*)pi1_cur_pred_mode = *(WORD32*)pc_topPredMode; + } + else + { + *(WORD32*)pi1_cur_pred_mode = + (uc_useTopMB) ? DC_DC_DC_DC : NOT_VALID; + } + } + else + { + *(WORD32*)pi1_cur_pred_mode = NOT_VALID; + } + + pu1_top = puc_top - 8; + + /*Left Related initialisations*/ + pi1_left_pred_mode = ps_dec->pi1_left_pred_mode; + + if(!u1_mbaff) + { + if(u1_left_available) + { + if(ps_left_mb->u1_mb_type != I_4x4_MB) + { + *(WORD32*)pi1_left_pred_mode = + (u2_use_left_mb_pack) ? + DC_DC_DC_DC : + NOT_VALID; + } + } + else + { + *(WORD32*)pi1_left_pred_mode = NOT_VALID; + } + } + else + { + UWORD8 u1_curMbfld = ps_cur_mb_info->u1_mb_field_decodingflag; + + UWORD8 u1_leftMbfld = ps_left_mb->u1_mb_fld; + + if((!u1_curMbfld) && (u1_leftMbfld)) + { + u4_4x4_left_offset = 1; + } + + if(u1_curMbfld ^ u1_leftMbfld) + { + + if(u1_topmb + | ((u1_topmb == 0) + && ((ps_curmb - 1)->u1_mb_type + != I_4x4_MB))) + + { + if(u1_left_available) + { + if(ps_left_mb->u1_mb_type != I_4x4_MB) + { + if(CHECKBIT(u2_use_left_mb_pack,0) == 0) + { + *(WORD32*)pi1_left_pred_mode = NOT_VALID; + } + else + { + *(WORD32*)pi1_left_pred_mode = DC_DC_DC_DC; + } + } + } + else + { + *(WORD32*)pi1_left_pred_mode = NOT_VALID; + } + + if(u1_curMbfld) + { + if(u1_left_available) + { + if((ps_left_mb + 1)->u1_mb_type != I_4x4_MB) + { + if(u2_use_left_mb_pack >> 8) + { + *(WORD32*)(pi1_left_pred_mode + 4) = + DC_DC_DC_DC; + } + else + { + *(WORD32*)(pi1_left_pred_mode + 4) = + NOT_VALID; + } + } + } + else + { + *(WORD32*)(pi1_left_pred_mode + 4) = NOT_VALID; + } + + pi1_left_pred_mode[1] = pi1_left_pred_mode[2]; + pi1_left_pred_mode[2] = pi1_left_pred_mode[4]; + pi1_left_pred_mode[3] = pi1_left_pred_mode[6]; + *(WORD32*)(pi1_left_pred_mode + 4) = + *(WORD32*)pi1_left_pred_mode; + } + else + { + pi1_left_pred_mode[7] = pi1_left_pred_mode[3]; + pi1_left_pred_mode[6] = pi1_left_pred_mode[3]; + pi1_left_pred_mode[5] = pi1_left_pred_mode[2]; + pi1_left_pred_mode[4] = pi1_left_pred_mode[2]; + pi1_left_pred_mode[3] = pi1_left_pred_mode[1]; + pi1_left_pred_mode[2] = pi1_left_pred_mode[1]; + pi1_left_pred_mode[1] = pi1_left_pred_mode[0]; + } + } + pi1_left_pred_mode += (u1_topmb) ? 0 : 4; + } + else + { + pi1_left_pred_mode += (u1_topmb) ? 0 : 4; + + if(u1_left_available) + { + if(ps_left_mb->u1_mb_type != I_4x4_MB) + { + *(WORD32*)pi1_left_pred_mode = + (u2_use_left_mb_pack) ? + DC_DC_DC_DC : + NOT_VALID; + } + } + else + { + *(WORD32*)pi1_left_pred_mode = NOT_VALID; + } + } + } + + /* One time pointer initialisations*/ + pi2_y_coeff1 = pi2_y_coeff; + + if(u1_use_top_left_mb) + { + pu1_top_left = pu1_ytop_left; + } + else + { + pu1_top_left = NULL; + } + + /* Scan the sub-blocks in Raster Scan Order */ + for(u1_sub_mb_num = 0; u1_sub_mb_num < 4; u1_sub_mb_num++) + { + u1_sub_blk_x = (u1_sub_mb_num & 0x1); + u1_sub_blk_y = (u1_sub_mb_num >> 1); + i1_top_pred_mode = pi1_cur_pred_mode[u1_sub_blk_x << 1]; + i1_left_pred_mode = pi1_left_pred_mode[u1_sub_blk_y << 1]; + + if(2 == u1_sub_mb_num) + { + i1_left_pred_mode = pi1_left_pred_mode[(u1_sub_blk_y << 1) + + u4_4x4_left_offset]; + } + + u1_use_top_right_mb = (!!CHECKBIT(ui2_top_rt_mask, u1_sub_mb_num)); + + /*********** left subblock availability**********/ + if(u1_sub_blk_x) + { + u1_is_left_sub_block = 1; + } + else + { + u1_is_left_sub_block = + (u1_sub_blk_y < 1) ? + (CHECKBIT(u2_use_left_mb_pack, + 0)) : + (u2_use_left_mb_pack >> 8); + } + + /***************** Top *********************/ + if(u1_sub_blk_y) + { + u1_is_top_sub_block = 1; + // sushant + pu1_top = /*pu1_luma_pred_buffer*/pu1_luma_rei1_buffer - ui_rec_width; + } + else + { + pu1_top += 8; + } + + /***************** Left *********************/ + if((u1_sub_blk_x) | (u4_num_pmbair != 0)) + { + // sushant + pu1_left = /*pu1_luma_pred_buffer*/pu1_luma_rei1_buffer - 1; + ui2_left_pred_buf_width = ui_rec_width; + } + else + { + pu1_left = pu1_yleft; + pu1_yleft += (ui_rec_width << 3); + ui2_left_pred_buf_width = ui_rec_width; + } + + /***************** Top Left *********************/ + if(u1_sub_mb_num) + { + pu1_top_left = (u1_sub_blk_x) ? + pu1_top - 1 : pu1_left - ui_rec_width; + + if((u1_sub_blk_x && (!u1_is_top_sub_block)) + || ((!u1_sub_blk_x) && (!u1_is_left_sub_block))) + { + pu1_top_left = NULL; + } + } + + /*---------------------------------------------------------------*/ + /* Calculation of Intra prediction mode */ + /*---------------------------------------------------------------*/ + i1_intra_pred = ((i1_left_pred_mode < 0) | (i1_top_pred_mode < 0)) ? + DC : MIN(i1_left_pred_mode, i1_top_pred_mode); + { + UWORD8 u1_packed_modes = (u1_is_top_sub_block << 1) + + u1_is_left_sub_block; + UWORD8 *pu1_intra_err_codes = + (UWORD8 *)gau1_ih264d_intra_pred_err_code; + + /********************************************************************/ + /* Same intra4x4_pred_mode array is filled with intra4x4_pred_mode */ + /* for a MB with 8x8 intrapredicition */ + /********************************************************************/ + PROFILE_DISABLE_INTRA_PRED() + if(!pu1_prev_intra4x4_pred_mode_flag[u1_sub_mb_num]) + { + i1_intra_pred = pu1_rem_intra4x4_pred_mode[u1_sub_mb_num] + + (pu1_rem_intra4x4_pred_mode[u1_sub_mb_num] + >= i1_intra_pred); + } + { + UWORD8 u1_err_code = pu1_intra_err_codes[i1_intra_pred]; + + if((u1_err_code & u1_packed_modes) ^ u1_err_code) + { + return ERROR_INTRAPRED; + } + } + } + + { + UWORD8 au1_ngbr_pels[25]; + WORD32 ngbr_avail; + ngbr_avail = u1_is_left_sub_block << 0; + ngbr_avail |= u1_is_top_sub_block << 2; + + if(pu1_top_left) + ngbr_avail |= 1 << 1; + + ngbr_avail |= u1_use_top_right_mb << 3; + PROFILE_DISABLE_INTRA_PRED() + { + ps_dec->pf_intra_pred_ref_filtering(pu1_left, pu1_top_left, + pu1_top, au1_ngbr_pels, + ui2_left_pred_buf_width, + ngbr_avail); + + ps_dec->apf_intra_pred_luma_8x8[i1_intra_pred]( + au1_ngbr_pels, pu1_luma_rei1_buffer, 1, + ui_rec_width, + ((u1_is_top_sub_block << 2) | u1_is_left_sub_block)); + } + } + + /* Inverse Transform and Reconstruction */ + if(CHECKBIT(ps_cur_mb_info->u1_cbp, u1_sub_mb_num)) + { + WORD16 *pi2_scale_matrix_ptr; + WORD16 ai2_tmp[64]; + + pi2_scale_matrix_ptr = + ps_dec->s_high_profile.i2_scalinglist8x8[0]; + PROFILE_DISABLE_IQ_IT_RECON() + { + if(CHECKBIT(u4_luma_dc_only_cbp, u1_sub_mb_num)) + { + ps_dec->pf_iquant_itrans_recon_luma_8x8_dc( + pi2_y_coeff1, + pu1_luma_rei1_buffer, + pu1_luma_rei1_buffer, + ui_rec_width, + ui_rec_width, + gau1_ih264d_dequant8x8_cavlc[ps_cur_mb_info->u1_qp_rem6], + (UWORD16 *)pi2_scale_matrix_ptr, + ps_cur_mb_info->u1_qp_div6, ai2_tmp, 0, + NULL); + } + else + { + ps_dec->pf_iquant_itrans_recon_luma_8x8( + pi2_y_coeff1, + pu1_luma_rei1_buffer, + pu1_luma_rei1_buffer, + ui_rec_width, + ui_rec_width, + gau1_ih264d_dequant8x8_cavlc[ps_cur_mb_info->u1_qp_rem6], + (UWORD16 *)pi2_scale_matrix_ptr, + ps_cur_mb_info->u1_qp_div6, ai2_tmp, 0, + NULL); + } + } + + } + + /*---------------------------------------------------------------*/ + /* Update sub block number */ + /*---------------------------------------------------------------*/ + pi2_y_coeff1 += 64; + + pu1_luma_rei1_buffer += + (u1_sub_blk_x == 1) ? + (ui_rec_width << 3) - (8 * 1) : 8; + + /*---------------------------------------------------------------*/ + /* Pred mode filled in terms of 4x4 block so replicated in 2 */ + /* locations. */ + /*---------------------------------------------------------------*/ + pi1_cur_pred_mode[u1_sub_blk_x << 1] = i1_intra_pred; + pi1_cur_pred_mode[(u1_sub_blk_x << 1) + 1] = i1_intra_pred; + pi1_left_pred_mode[u1_sub_blk_y << 1] = i1_intra_pred; + pi1_left_pred_mode[(u1_sub_blk_y << 1) + 1] = i1_intra_pred; + } + } + /* Decode Chroma Block */ + ih264d_unpack_chroma_coeff4x4_mb(ps_dec, + ps_cur_mb_info); + /*--------------------------------------------------------------------*/ + /* Chroma Blocks decoding */ + /*--------------------------------------------------------------------*/ + { + UWORD8 u1_intra_chrom_pred_mode; + UWORD8 u1_chroma_cbp = (UWORD8)(ps_cur_mb_info->u1_cbp >> 4); + + /*--------------------------------------------------------------------*/ + /* Perform Chroma intra prediction */ + /*--------------------------------------------------------------------*/ + + u1_intra_chrom_pred_mode = CHROMA_TO_LUMA_INTRA_MODE( + ps_cur_mb_info->u1_chroma_pred_mode); + + { + UWORD8 u1_packed_modes = (u1_top_available << 1) + + u1_left_available; + UWORD8 u1_err_code = + (u1_intra_chrom_pred_mode & 1) ? + u1_intra_chrom_pred_mode : + (u1_intra_chrom_pred_mode ^ 2); + if((u1_err_code & u1_packed_modes) ^ u1_err_code) + return ERROR_INTRAPRED; + } + + /* CHANGED CODE */ + if(u1_chroma_cbp != CBPC_ALLZERO) + { + UWORD16 u2_chroma_csbp = + (u1_chroma_cbp == CBPC_ACZERO) ? + 0 : ps_cur_mb_info->u2_chroma_csbp; + UWORD32 u4_scale_u; + UWORD32 u4_scale_v; + + { + UWORD16 au2_ngbr_pels[33]; + UWORD8 *pu1_ngbr_pels = (UWORD8 *)au2_ngbr_pels; + UWORD16 *pu2_left_uv; + UWORD16 *pu2_topleft_uv; + WORD32 use_left1 = (u2_use_left_mb_pack & 0x0ff); + WORD32 use_left2 = (u2_use_left_mb_pack & 0xff00) >> 8; + + pu2_left_uv = (UWORD16 *)pu1_uleft; + pu2_topleft_uv = (UWORD16 *)pu1_u_top_left; + /* Get neighbour pixels */ + /* left pels */ + if(u2_use_left_mb_pack) + { + WORD32 i; + if(use_left1) + { + for(i = 0; i < 4; i++) + au2_ngbr_pels[8 - 1 - i] = pu2_left_uv[i + * u4_recwidth_cr / YUV420SP_FACTOR]; + } + else + { + memset(au2_ngbr_pels + 4, 0, 4 * sizeof(UWORD16)); + } + + if(use_left2) + { + for(i = 4; i < 8; i++) + au2_ngbr_pels[8 - 1 - i] = pu2_left_uv[i + * u4_recwidth_cr / YUV420SP_FACTOR]; + } + else + { + memset(au2_ngbr_pels, 0, 4 * sizeof(UWORD16)); + } + } + else + { + memset(au2_ngbr_pels, 0, 8 * sizeof(UWORD16)); + } + + /* top left pels */ + au2_ngbr_pels[8] = *pu2_topleft_uv; + + /* top pels */ + if(uc_useTopMB) + { + memcpy(au2_ngbr_pels + 8 + 1, pu1_top_u, + 8 * sizeof(UWORD16)); + } + else + { + memset(au2_ngbr_pels + 8 + 1, 0, 8 * sizeof(UWORD16)); + } + + PROFILE_DISABLE_INTRA_PRED() + ps_dec->apf_intra_pred_chroma[u1_intra_chrom_pred_mode]( + pu1_ngbr_pels, + pu1_mb_cb_rei1_buffer, + 1, + u4_recwidth_cr, + ((uc_useTopMB << 2) | (use_left2 << 4) + | use_left1)); + } + u4_scale_u = ps_cur_mb_info->u1_qpc_div6; + u4_scale_v = ps_cur_mb_info->u1_qpcr_div6; + pi2_y_coeff = ps_dec->pi2_coeff_data; + + { + UWORD32 i; + WORD16 ai2_tmp[16]; + for(i = 0; i < 4; i++) + { + WORD16 *pi2_level = pi2_y_coeff + (i << 4); + UWORD8 *pu1_pred_sblk = pu1_mb_cb_rei1_buffer + + ((i & 0x1) * BLK_SIZE * YUV420SP_FACTOR) + + (i >> 1) * (u4_recwidth_cr << 2); + PROFILE_DISABLE_IQ_IT_RECON() + { + if(CHECKBIT(u2_chroma_csbp, i)) + { + ps_dec->pf_iquant_itrans_recon_chroma_4x4( + pi2_level, + pu1_pred_sblk, + pu1_pred_sblk, + u4_recwidth_cr, + u4_recwidth_cr, + gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qpc_rem6], + (UWORD16 *)ps_dec->s_high_profile.i2_scalinglist4x4[1], + u4_scale_u, ai2_tmp, pi2_level); + } + else if(pi2_level[0] != 0) + { + ps_dec->pf_iquant_itrans_recon_chroma_4x4_dc( + pi2_level, + pu1_pred_sblk, + pu1_pred_sblk, + u4_recwidth_cr, + u4_recwidth_cr, + gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qpc_rem6], + (UWORD16 *)ps_dec->s_high_profile.i2_scalinglist4x4[1], + u4_scale_u, ai2_tmp, pi2_level); + } + } + + } + } + + pi2_y_coeff += MB_CHROM_SIZE; + u2_chroma_csbp = u2_chroma_csbp >> 4; + { + UWORD32 i; + WORD16 ai2_tmp[16]; + for(i = 0; i < 4; i++) + { + WORD16 *pi2_level = pi2_y_coeff + (i << 4); + UWORD8 *pu1_pred_sblk = pu1_mb_cb_rei1_buffer + 1 + + ((i & 0x1) * BLK_SIZE * YUV420SP_FACTOR) + + (i >> 1) * (u4_recwidth_cr << 2); + PROFILE_DISABLE_IQ_IT_RECON() + { + if(CHECKBIT(u2_chroma_csbp, i)) + { + ps_dec->pf_iquant_itrans_recon_chroma_4x4( + pi2_level, + pu1_pred_sblk, + pu1_pred_sblk, + u4_recwidth_cr, + u4_recwidth_cr, + gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qpcr_rem6], + (UWORD16 *)ps_dec->s_high_profile.i2_scalinglist4x4[2], + u4_scale_v, ai2_tmp, pi2_level); + } + else if(pi2_level[0] != 0) + { + ps_dec->pf_iquant_itrans_recon_chroma_4x4_dc( + pi2_level, + pu1_pred_sblk, + pu1_pred_sblk, + u4_recwidth_cr, + u4_recwidth_cr, + gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qpcr_rem6], + (UWORD16 *)ps_dec->s_high_profile.i2_scalinglist4x4[2], + u4_scale_v, ai2_tmp, pi2_level); + } + } + } + } + + } + else + { + /* If no inverse transform is needed, pass recon buffer pointer */ + /* to Intraprediction module instead of pred buffer pointer */ + { + UWORD16 au2_ngbr_pels[33]; + UWORD8 *pu1_ngbr_pels = (UWORD8 *)au2_ngbr_pels; + UWORD16 *pu2_left_uv; + UWORD16 *pu2_topleft_uv; + pu2_topleft_uv = (UWORD16 *)pu1_u_top_left; + pu2_left_uv = (UWORD16 *)pu1_uleft; + WORD32 use_left1 = (u2_use_left_mb_pack & 0x0ff); + WORD32 use_left2 = (u2_use_left_mb_pack & 0xff00) >> 8; + + /* Get neighbour pixels */ + /* left pels */ + if(u2_use_left_mb_pack) + { + WORD32 i; + if(use_left1) + { + for(i = 0; i < 4; i++) + au2_ngbr_pels[8 - 1 - i] = pu2_left_uv[i + * u4_recwidth_cr / YUV420SP_FACTOR]; + } + else + { + memset(au2_ngbr_pels + 4, 0, 4 * sizeof(UWORD16)); + } + + if(use_left2) + { + for(i = 4; i < 8; i++) + au2_ngbr_pels[8 - 1 - i] = pu2_left_uv[i + * u4_recwidth_cr / YUV420SP_FACTOR]; + } + else + { + memset(au2_ngbr_pels, 0, 4 * sizeof(UWORD16)); + } + + } + else + { + memset(au2_ngbr_pels, 0, 8 * sizeof(UWORD16)); + } + + /* top left pels */ + au2_ngbr_pels[8] = *pu2_topleft_uv; + + /* top pels */ + if(uc_useTopMB) + { + memcpy(au2_ngbr_pels + 8 + 1, pu1_top_u, + 8 * sizeof(UWORD16)); + } + else + { + memset(au2_ngbr_pels + 8 + 1, 0, 8 * sizeof(UWORD16)); + } + + PROFILE_DISABLE_INTRA_PRED() + ps_dec->apf_intra_pred_chroma[u1_intra_chrom_pred_mode]( + pu1_ngbr_pels, + pu1_mb_cb_rei1_buffer, + 1, + u4_recwidth_cr, + ((uc_useTopMB << 2) | (use_left2 << 4) + | use_left1)); + } + + } + + } + return OK; +} diff --git a/decoder/ih264d_process_intra_mb.h b/decoder/ih264d_process_intra_mb.h new file mode 100755 index 0000000..30d7819 --- /dev/null +++ b/decoder/ih264d_process_intra_mb.h @@ -0,0 +1,65 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/*! + ************************************************************************** + * \file ih264d_process_intra_mb.h + * + * \brief + * Contains routines that decode a I slice type + * + * Detailed_description + * + * \date + * 07/07/2003 + * + * \author NS + ************************************************************************** + */ +#ifndef _IH264D_PROCESS_INTRA_MB_H_ +#define _IH264D_PROCESS_INTRA_MB_H_ + +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264d_structs.h" + +#define CHROMA_TO_LUMA_INTRA_MODE(x) (x ^ ( (!(x & 0x01)) << 1)) +#define MB_TYPE_TO_INTRA_16x16_MODE(x) ((x - 1) & 0x03) + +UWORD32 ih264d_unpack_luma_coeff4x4_mb(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info, + UWORD8 intra_flag); +void ih264d_unpack_chroma_coeff4x4_mb(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info); +UWORD32 ih264d_unpack_luma_coeff8x8_mb(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info); + +WORD32 ih264d_read_intra_pred_modes(dec_struct_t *ps_dec, + UWORD8 *pu1_prev_intra4x4_pred_mode_flag, + UWORD8 *pu1_rem_intra4x4_pred_mode, + UWORD32 u4_trans_form8x8); + +WORD32 ih264d_process_intra_mb(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info, + UWORD8 u1_mb_num); + +#endif /* _IH264D_PROCESS_INTRA_MB_H_ */ + diff --git a/decoder/ih264d_process_pslice.c b/decoder/ih264d_process_pslice.c new file mode 100755 index 0000000..b1230f6 --- /dev/null +++ b/decoder/ih264d_process_pslice.c @@ -0,0 +1,1139 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*! + ************************************************************************** + * \file ih264d_process_pslice.c + * + * \brief + * Contains routines that decode a I slice type + * + * Detailed_description + * + * \date + * 21/12/2002 + * + * \author NS + ************************************************************************** + */ +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" + +#include <string.h> +#include "ih264d_bitstrm.h" +#include "ih264d_defs.h" +#include "ih264d_debug.h" +#include "ih264d_structs.h" +#include "ih264d_defs.h" +#include "ih264d_parse_cavlc.h" +#include "ih264d_mb_utils.h" +#include "ih264d_deblocking.h" +#include "ih264d_dpb_manager.h" +#include "ih264d_mvpred.h" +#include "ih264d_inter_pred.h" +#include "ih264d_process_pslice.h" +#include "ih264d_error_handler.h" +#include "ih264d_cabac.h" +#include "ih264d_debug.h" +#include "ih264d_tables.h" +#include "ih264d_parse_slice.h" +#include "ih264d_utils.h" +#include "ih264d_parse_islice.h" +#include "ih264d_process_bslice.h" +#include "ih264d_process_intra_mb.h" + +void ih264d_init_cabac_contexts(UWORD8 u1_slice_type, dec_struct_t * ps_dec); + +void ih264d_insert_pic_in_ref_pic_listx(struct pic_buffer_t *ps_ref_pic_buf_lx, + struct pic_buffer_t *ps_pic) +{ + *ps_ref_pic_buf_lx = *ps_pic; +} + +WORD32 ih264d_mv_pred_ref_tfr_nby2_pmb(dec_struct_t * ps_dec, + UWORD8 u1_mb_idx, + UWORD8 u1_num_mbs) +{ + parse_pmbarams_t * ps_mb_part_info; + parse_part_params_t * ps_part; + mv_pred_t *ps_mv_nmb, *ps_mv_nmb_start, *ps_mv_ntop, *ps_mv_ntop_start; + UWORD32 i, j; + const UWORD32 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag; + dec_mb_info_t * ps_cur_mb_info; + WORD32 i2_mv_x, i2_mv_y; + WORD32 ret; + + ps_dec->i4_submb_ofst -= (u1_num_mbs - u1_mb_idx) << 4; + ps_mb_part_info = ps_dec->ps_parse_mb_data; // + u1_mb_idx; + ps_part = ps_dec->ps_parse_part_params; // + u1_mb_idx; + + /* N/2 Mb MvPred and Transfer Setup Loop */ + for(i = u1_mb_idx; i < u1_num_mbs; i++, ps_mb_part_info++) + { + UWORD32 u1_colz; + UWORD32 u1_field; + mv_pred_t s_mvPred; + mv_pred_t *ps_mv_pred = &s_mvPred; + + + + *ps_mv_pred = ps_dec->s_default_mv_pred; + + ps_dec->i4_submb_ofst += SUB_BLK_SIZE; + ps_dec->u2_wait_id = i; + + /* Restore the slice scratch MbX and MbY context */ + ps_cur_mb_info = ps_dec->ps_nmb_info + i; + u1_field = ps_cur_mb_info->u1_mb_field_decodingflag; + + + + ps_mv_nmb_start = ps_dec->ps_mv_cur + (i << 4); + ps_dec->u2_mbx = ps_cur_mb_info->u2_mbx; + ps_dec->u2_mby = ps_cur_mb_info->u2_mby; + ps_dec->u2_mv_2mb[i & 0x1] = 0; + + /* Look for MV Prediction and Reference Transfer in Non-I Mbs */ + if(!ps_mb_part_info->u1_isI_mb) + { + UWORD32 u1_blk_no; + WORD32 i1_ref_idx, i1_ref_idx1; + UWORD32 u1_sub_mb_x, u1_sub_mb_y, u1_sub_mb_num; + UWORD32 u1_num_part, u1_num_ref, u1_wd, u1_ht; + UWORD32 *pu4_wt_offst, **ppu4_wt_ofst; + UWORD32 u1_scale_ref, u4_bot_mb; + WORD8 *pi1_ref_idx = ps_mb_part_info->i1_ref_idx[0]; + pic_buffer_t *ps_ref_frame, **pps_ref_frame; + deblk_mb_t * ps_cur_deblk_mb = ps_dec->ps_deblk_mbn + i; + + /* MB Level initialisations */ + ps_dec->u4_num_pmbair = i >> u1_mbaff; + ps_dec->u1_mb_idx_mv = i; + ppu4_wt_ofst = ps_mb_part_info->pu4_wt_offst; + pps_ref_frame = ps_dec->ps_ref_pic_buf_lx[0]; + /* CHANGED CODE */ + ps_mv_ntop_start = ps_mv_nmb_start + - (ps_dec->u2_frm_wd_in_mbs << (4 + u1_mbaff)) + 12; + + u1_num_part = ps_mb_part_info->u1_num_part; + ps_cur_deblk_mb->u1_mb_type |= (u1_num_part > 1) << 1; + ps_cur_mb_info->u4_pred_info_pkd_idx = ps_dec->u4_pred_info_pkd_idx; + ps_cur_mb_info->u1_num_pred_parts = 0; + + + /****************************************************/ + /* weighted u4_ofst pointer calculations, this loop */ + /* runs maximum 4 times, even in direct cases */ + /****************************************************/ + u1_scale_ref = u1_mbaff & u1_field; + + u4_bot_mb = 1 - ps_cur_mb_info->u1_topmb; + if(ps_dec->ps_cur_pps->u1_wted_pred_flag) + { + u1_num_ref = MIN(u1_num_part, 4); + for(u1_blk_no = 0; u1_blk_no < u1_num_ref; u1_blk_no++) + { + i1_ref_idx = pi1_ref_idx[u1_blk_no]; + if(u1_scale_ref) + i1_ref_idx >>= 1; + pu4_wt_offst = (UWORD32*)&ps_dec->pu4_wt_ofsts[2 + * X3(i1_ref_idx)]; + ppu4_wt_ofst[u1_blk_no] = pu4_wt_offst; + } + } + else + { + ppu4_wt_ofst[0] = NULL; + ppu4_wt_ofst[1] = NULL; + ppu4_wt_ofst[2] = NULL; + ppu4_wt_ofst[3] = NULL; + } + + /**************************************************/ + /* Loop on Partitions */ + /**************************************************/ + ps_dec->u4_dma_buf_idx = 0; + + for(j = 0; j < u1_num_part; j++, ps_part++) + { + + u1_sub_mb_num = ps_part->u1_sub_mb_num; + ps_dec->u1_sub_mb_num = u1_sub_mb_num; + + if(PART_NOT_DIRECT != ps_part->u1_is_direct) + { + /* Mb Skip Mode */ + /* Setting the default and other members of MvPred Structure */ + s_mvPred.i2_mv[2] = -1; + s_mvPred.i2_mv[3] = -1; + s_mvPred.i1_ref_frame[0] = 0; + i1_ref_idx = (u1_scale_ref && u4_bot_mb) ? MAX_REF_BUFS : 0; + ps_ref_frame = pps_ref_frame[i1_ref_idx]; + s_mvPred.u1_col_ref_pic_idx = ps_ref_frame->u1_mv_buf_id; + s_mvPred.u1_pic_type = ps_ref_frame->u1_pic_type; + pu4_wt_offst = (UWORD32*)&ps_dec->pu4_wt_ofsts[0]; + + ps_dec->pf_mvpred(ps_dec, ps_cur_mb_info, ps_mv_nmb_start, + ps_mv_ntop_start, &s_mvPred, 0, 4, 0, 1, + MB_SKIP); + + + + + + + { + pred_info_pkd_t *ps_pred_pkd; + ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx; + ih264d_fill_pred_info (s_mvPred.i2_mv,4,4,0,PRED_L0,ps_pred_pkd,ps_ref_frame->u1_pic_buf_id, + (i1_ref_idx >> u1_scale_ref),pu4_wt_offst, + ps_ref_frame->u1_pic_type); + + + ps_dec->u4_pred_info_pkd_idx++; + ps_cur_mb_info->u1_num_pred_parts++; + } + + + + /* Storing colocated zero information */ + u1_colz = ((ABS(s_mvPred.i2_mv[0]) <= 1) + && (ABS(s_mvPred.i2_mv[1]) <= 1)) + + (u1_field << 1); + + ih264d_rep_mv_colz(ps_dec, &s_mvPred, ps_mv_nmb_start, 0, + u1_colz, 4, 4); + } + else + { + u1_sub_mb_x = u1_sub_mb_num & 0x03; + u1_sub_mb_y = u1_sub_mb_num >> 2; + u1_blk_no = + (u1_num_part < 4) ? + j : + (((u1_sub_mb_y >> 1) << 1) + + (u1_sub_mb_x + >> 1)); + + ps_mv_ntop = ps_mv_ntop_start + u1_sub_mb_x; + ps_mv_nmb = ps_mv_nmb_start + u1_sub_mb_num; + + u1_wd = ps_part->u1_partwidth; + u1_ht = ps_part->u1_partheight; + + /* Populate the colpic info and reference frames */ + i1_ref_idx = pi1_ref_idx[u1_blk_no]; + s_mvPred.i1_ref_frame[0] = i1_ref_idx; + + /********************************************************/ + /* Predict Mv */ + /* Add Mv Residuals and store back */ + /********************************************************/ + ps_dec->pf_mvpred(ps_dec, ps_cur_mb_info, ps_mv_nmb, ps_mv_ntop, + &s_mvPred, u1_sub_mb_num, u1_wd, 0, 1, + ps_cur_mb_info->u1_mb_mc_mode); + i2_mv_x = ps_mv_nmb->i2_mv[0]; + i2_mv_y = ps_mv_nmb->i2_mv[1]; + i2_mv_x += s_mvPred.i2_mv[0]; + i2_mv_y += s_mvPred.i2_mv[1]; + s_mvPred.i2_mv[0] = i2_mv_x; + s_mvPred.i2_mv[1] = i2_mv_y; + + /********************************************************/ + /* Transfer setup call */ + /* convert RefIdx if it is MbAff */ + /* Pass Weight Offset and refFrame */ + /********************************************************/ + i1_ref_idx1 = i1_ref_idx >> u1_scale_ref; + if(u1_scale_ref && ((i1_ref_idx & 0x01) != u4_bot_mb)) + i1_ref_idx1 += MAX_REF_BUFS; + ps_ref_frame = pps_ref_frame[i1_ref_idx1]; + pu4_wt_offst = ppu4_wt_ofst[u1_blk_no]; + + + + + + + { + pred_info_pkd_t *ps_pred_pkd; + ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx; + ih264d_fill_pred_info (s_mvPred.i2_mv,u1_wd,u1_ht,u1_sub_mb_num,PRED_L0,ps_pred_pkd, + ps_ref_frame->u1_pic_buf_id,(i1_ref_idx >> u1_scale_ref),pu4_wt_offst, + ps_ref_frame->u1_pic_type); + + ps_dec->u4_pred_info_pkd_idx++; + ps_cur_mb_info->u1_num_pred_parts++; + } + + + + /* Fill colocated info in MvPred structure */ + s_mvPred.u1_col_ref_pic_idx = ps_ref_frame->u1_mv_buf_id; + s_mvPred.u1_pic_type = ps_ref_frame->u1_pic_type; + + /* Calculating colocated zero information */ + u1_colz = + (u1_field << 1) + | ((i1_ref_idx == 0) + && (ABS(i2_mv_x) + <= 1) + && (ABS(i2_mv_y) + <= 1)); + u1_colz |= ps_mb_part_info->u1_col_info[u1_blk_no]; + + /* Replicate the motion vectors and colzero u4_flag */ + /* for all sub-partitions */ + + ih264d_rep_mv_colz(ps_dec, &s_mvPred, ps_mv_nmb, + u1_sub_mb_num, u1_colz, u1_ht, + u1_wd); + } + } + + } + else + { + /* Storing colocated zero information */ + ih264d_rep_mv_colz(ps_dec, &s_mvPred, ps_mv_nmb_start, 0, + (UWORD8)(u1_field << 1), 4, 4); + + } + /*if num _cores is set to 3,compute bs will be done in another thread*/ + if(ps_dec->u4_num_cores < 3) + { + + if(ps_dec->u4_app_disable_deblk_frm == 0) + ps_dec->pf_compute_bs(ps_dec, ps_cur_mb_info, + (UWORD16)(i >> u1_mbaff)); + } + } + + + + return OK; +} + +#if THREAD_PARSE + +#else +WORD32 ih264d_decode_recon_tfr_nmb(dec_struct_t * ps_dec, + UWORD8 u1_mb_idx, + UWORD8 u1_num_mbs, + UWORD8 u1_num_mbs_next, + UWORD8 u1_tfr_n_mb, + UWORD8 u1_end_of_row) +{ + WORD32 i,j; + UWORD32 u1_end_of_row_next; + dec_mb_info_t * ps_cur_mb_info; + UWORD32 u4_update_mbaff = 0; + WORD32 ret; + const UWORD32 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag; + const UWORD32 u1_slice_type = ps_dec->ps_cur_slice->u1_slice_type; + const WORD32 u1_skip_th = ( + (u1_slice_type != I_SLICE) ? + (ps_dec->u1_B ? B_8x8 : PRED_8x8R0) : -1); + const UWORD32 u1_ipcm_th = ( + (u1_slice_type != I_SLICE) ? (ps_dec->u1_B ? 23 : 5) : 0); + + + + + + /* N Mb MC Loop */ + for(i = u1_mb_idx; i < u1_num_mbs; i++) + { + ps_cur_mb_info = ps_dec->ps_nmb_info + i; + ps_dec->u4_dma_buf_idx = 0; + ps_dec->u4_pred_info_idx = 0; + + if(ps_cur_mb_info->u1_mb_type <= u1_skip_th) + { + { + WORD32 pred_cnt = 0; + pred_info_pkd_t *ps_pred_pkd; + UWORD32 u4_pred_info_pkd_idx; + WORD8 i1_pred; + + u4_pred_info_pkd_idx = ps_cur_mb_info->u4_pred_info_pkd_idx; + + while(pred_cnt < ps_cur_mb_info->u1_num_pred_parts) + { + + ps_pred_pkd = ps_dec->ps_pred_pkd + u4_pred_info_pkd_idx; + + ps_dec->p_form_mb_part_info(ps_pred_pkd,ps_dec, + ps_cur_mb_info->u2_mbx,ps_cur_mb_info->u2_mby,(i >> u1_mbaff), + ps_cur_mb_info); + u4_pred_info_pkd_idx++; + pred_cnt++; + } + } + + ps_dec->p_motion_compensate(ps_dec, ps_cur_mb_info); + + } + else if(ps_cur_mb_info->u1_mb_type == MB_SKIP) + { + { + WORD32 pred_cnt = 0; + pred_info_pkd_t *ps_pred_pkd; + UWORD32 u4_pred_info_pkd_idx; + WORD8 i1_pred; + + u4_pred_info_pkd_idx = ps_cur_mb_info->u4_pred_info_pkd_idx; + + while(pred_cnt < ps_cur_mb_info->u1_num_pred_parts) + { + + ps_pred_pkd = ps_dec->ps_pred_pkd + u4_pred_info_pkd_idx; + + ps_dec->p_form_mb_part_info(ps_pred_pkd,ps_dec, + ps_cur_mb_info->u2_mbx,ps_cur_mb_info->u2_mby,(i >> u1_mbaff), + ps_cur_mb_info); + + u4_pred_info_pkd_idx++; + pred_cnt++; + } + } + /* Decode MB skip */ + ps_dec->p_motion_compensate(ps_dec, ps_cur_mb_info); + + } + + } + + + /* N Mb IQ IT RECON Loop */ + for(j = u1_mb_idx; j < i; j++) + { + ps_cur_mb_info = ps_dec->ps_nmb_info + j; + + if(ps_cur_mb_info->u1_mb_type <= u1_skip_th) + { + ih264d_process_inter_mb(ps_dec, ps_cur_mb_info, j); + + } + else if(ps_cur_mb_info->u1_mb_type != MB_SKIP) + { + if((u1_ipcm_th + 25) != ps_cur_mb_info->u1_mb_type) + { + ps_cur_mb_info->u1_mb_type -= (u1_skip_th + 1); + ret = ih264d_process_intra_mb(ps_dec, ps_cur_mb_info, j); + if(ret != OK) + return ret; + } + } + + if(ps_dec->u4_mb_level_deblk == 1) + { + ih264d_deblock_mb_level(ps_dec, ps_cur_mb_info, j); + + } + + if(u1_mbaff) + { + if(u4_update_mbaff) + { + UWORD32 u4_mb_num = ps_cur_mb_info->u2_mbx + + ps_dec->u2_frm_wd_in_mbs + * (ps_cur_mb_info->u2_mby >> 1); + UPDATE_MB_MAP_MBNUM_BYTE(ps_dec->pu1_recon_mb_map, u4_mb_num); + u4_update_mbaff = 0; + } + else + { + u4_update_mbaff = 1; + } + } + else + { + UWORD32 u4_mb_num = ps_cur_mb_info->u2_mbx + + ps_dec->u2_frm_wd_in_mbs * ps_cur_mb_info->u2_mby; + UPDATE_MB_MAP_MBNUM_BYTE(ps_dec->pu1_recon_mb_map, u4_mb_num); + } + } + + + if(u1_tfr_n_mb) + { + /****************************************************************/ + /* Check for End Of Row in Next iteration */ + /****************************************************************/ + u1_end_of_row_next = + u1_num_mbs_next + && (u1_num_mbs_next + <= (ps_dec->u1_recon_mb_grp + >> u1_mbaff)); + + /****************************************************************/ + /* Transfer the Following things */ + /* N-Mb DeblkParams Data ( To Ext DeblkParams Buffer ) */ + /* N-Mb Recon Data ( To Ext Frame Buffer ) */ + /* N-Mb Intrapredline Data ( Updated Internally) */ + /* N-Mb MV Data ( To Ext MV Buffer ) */ + /* N-Mb MVTop/TopRight Data ( To Int MV Top Scratch Buffers) */ + /****************************************************************/ + ih264d_transfer_mb_group_data(ps_dec, u1_num_mbs, u1_end_of_row, + u1_end_of_row_next); + ps_dec->u4_num_mbs_prev_nmb = u1_num_mbs; + + if(u1_end_of_row) + { + /* Reset the N-Mb Recon Buf Index to default Values */ + ps_dec->u2_mb_group_cols_y1 = ps_dec->u2_mb_group_cols_y; + ps_dec->u2_mb_group_cols_cr1 = ps_dec->u2_mb_group_cols_cr; + } + /* If next N-Mb Group is the EndOfRow, set the N-Mb Recon Buf Index */ + else if(u1_end_of_row_next) + { + ps_dec->u2_mb_group_cols_y1 = (u1_num_mbs_next << 4) + 8; + ps_dec->u2_mb_group_cols_cr1 = (u1_num_mbs_next << 3) + 8; + } + ps_dec->u4_pred_info_idx = 0; + ps_dec->u4_dma_buf_idx = 0; + + + } + return OK; +} +#endif +/*! + ************************************************************************** + * \if Function name : ih264d_process_inter_mb \endif + * + * \brief + * This function decodes an Inter MB. + * + * + * \return + * 0 on Success and Error code otherwise + ************************************************************************** + */ +WORD32 ih264d_process_inter_mb(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info, + UWORD8 u1_mb_num) +{ + /* CHANGED CODE */ + UWORD8 *pu1_rec_y, *pu1_rec_u, *pu1_rec_v; + + /*CHANGED CODE */ + UWORD32 ui_rec_width, u4_recwidth_cr; + WORD16 *pi2_y_coeff; + UWORD32 u1_mb_field_decoding_flag; + const UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag; + UWORD32 uc_botMb; + UWORD32 u4_num_pmbair; + /* CHANGED CODE */ + tfr_ctxt_t *ps_frame_buf = &ps_dec->s_tran_addrecon; + UWORD32 u4_luma_dc_only_csbp = 0; + UWORD32 u4_luma_dc_only_cbp = 0; + /* CHANGED CODE */ + + uc_botMb = 1 - ps_cur_mb_info->u1_topmb; + u4_num_pmbair = (u1_mb_num >> u1_mbaff); + u1_mb_field_decoding_flag = ps_cur_mb_info->u1_mb_field_decodingflag; + + + /* CHANGED CODE */ + pu1_rec_y = ps_frame_buf->pu1_dest_y + (u4_num_pmbair << 4); + pu1_rec_u = + ps_frame_buf->pu1_dest_u + + (u4_num_pmbair << 3) * YUV420SP_FACTOR; + pu1_rec_v = ps_frame_buf->pu1_dest_v + (u4_num_pmbair << 3); + ui_rec_width = ps_dec->u2_frm_wd_y << u1_mb_field_decoding_flag; + u4_recwidth_cr = ps_dec->u2_frm_wd_uv << u1_mb_field_decoding_flag; + + /* CHANGED CODE */ + + if(u1_mbaff) + { + if(uc_botMb) + { + pu1_rec_y += (u1_mb_field_decoding_flag ? + (ui_rec_width >> 1) : (ui_rec_width << 4)); + pu1_rec_u += (u1_mb_field_decoding_flag ? + (u4_recwidth_cr >> 1) : (u4_recwidth_cr << 3)); + pu1_rec_v += (u1_mb_field_decoding_flag ? + (u4_recwidth_cr >> 1) : (u4_recwidth_cr << 3)); + } + } + + if(!ps_cur_mb_info->u1_tran_form8x8) + { + u4_luma_dc_only_csbp = ih264d_unpack_luma_coeff4x4_mb(ps_dec, + ps_cur_mb_info, + 0); + } + else + { + if(!ps_dec->ps_cur_pps->u1_entropy_coding_mode) + { + u4_luma_dc_only_cbp = ih264d_unpack_luma_coeff4x4_mb(ps_dec, + ps_cur_mb_info, + 0); + } + else + { + u4_luma_dc_only_cbp = ih264d_unpack_luma_coeff8x8_mb(ps_dec, + ps_cur_mb_info); + } + } + + pi2_y_coeff = ps_dec->pi2_coeff_data; + /* Inverse Transform and Reconstruction */ + if(ps_cur_mb_info->u1_cbp & 0x0f) + { + /* CHANGED CODE */ + if(!ps_cur_mb_info->u1_tran_form8x8) + { + UWORD32 i; + WORD16 ai2_tmp[16]; + for(i = 0; i < 16; i++) + { + if(CHECKBIT(ps_cur_mb_info->u2_luma_csbp, i)) + { + WORD16 *pi2_level = pi2_y_coeff + (i << 4); + UWORD8 *pu1_pred_sblk = pu1_rec_y + ((i & 0x3) * BLK_SIZE) + + (i >> 2) * (ui_rec_width << 2); + PROFILE_DISABLE_IQ_IT_RECON() + { + if(CHECKBIT(u4_luma_dc_only_csbp, i)) + { + ps_dec->pf_iquant_itrans_recon_luma_4x4_dc( + pi2_level, + pu1_pred_sblk, + pu1_pred_sblk, + ui_rec_width, + ui_rec_width, + gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qp_rem6], + (UWORD16 *)ps_dec->s_high_profile.i2_scalinglist4x4[3], + ps_cur_mb_info->u1_qp_div6, ai2_tmp, 0, + NULL); + } + else + { + ps_dec->pf_iquant_itrans_recon_luma_4x4( + pi2_level, + pu1_pred_sblk, + pu1_pred_sblk, + ui_rec_width, + ui_rec_width, + gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qp_rem6], + (UWORD16 *)ps_dec->s_high_profile.i2_scalinglist4x4[3], + ps_cur_mb_info->u1_qp_div6, ai2_tmp, 0, + NULL); + } + } + } + } + } + else + { + WORD16 *pi2_scale_matrix_ptr; + WORD32 i; + + pi2_scale_matrix_ptr = + ps_dec->s_high_profile.i2_scalinglist8x8[1]; + + for(i = 0; i < 4; i++) + { + WORD16 ai2_tmp[64]; + WORD16 *pi16_levelBlock = pi2_y_coeff + (i << 6); /* move to the next 8x8 adding 64 */ + + UWORD8 *pu1_pred_sblk = pu1_rec_y + ((i & 0x1) * BLK8x8SIZE) + + (i >> 1) * (ui_rec_width << 3); + if(CHECKBIT(ps_cur_mb_info->u1_cbp, i)) + { + PROFILE_DISABLE_IQ_IT_RECON() + { + if(CHECKBIT(u4_luma_dc_only_cbp, i)) + { + ps_dec->pf_iquant_itrans_recon_luma_8x8_dc( + pi16_levelBlock, + pu1_pred_sblk, + pu1_pred_sblk, + ui_rec_width, + ui_rec_width, + gau1_ih264d_dequant8x8_cavlc[ps_cur_mb_info->u1_qp_rem6], + (UWORD16 *)pi2_scale_matrix_ptr, + ps_cur_mb_info->u1_qp_div6, ai2_tmp, 0, + NULL); + } + else + { + ps_dec->pf_iquant_itrans_recon_luma_8x8( + pi16_levelBlock, + pu1_pred_sblk, + pu1_pred_sblk, + ui_rec_width, + ui_rec_width, + gau1_ih264d_dequant8x8_cavlc[ps_cur_mb_info->u1_qp_rem6], + (UWORD16 *)pi2_scale_matrix_ptr, + ps_cur_mb_info->u1_qp_div6, ai2_tmp, 0, + NULL); + } + } + } + } + + } + } + + /* Decode Chroma Block */ + ih264d_unpack_chroma_coeff4x4_mb(ps_dec, + ps_cur_mb_info); + /*--------------------------------------------------------------------*/ + /* Chroma Blocks decoding */ + /*--------------------------------------------------------------------*/ + { + UWORD8 u1_chroma_cbp = (UWORD8)(ps_cur_mb_info->u1_cbp >> 4); + + if(u1_chroma_cbp != CBPC_ALLZERO) + { + UWORD32 u4_scale_u = ps_cur_mb_info->u1_qpc_div6; + UWORD32 u4_scale_v = ps_cur_mb_info->u1_qpcr_div6; + UWORD16 u2_chroma_csbp = ps_cur_mb_info->u2_chroma_csbp; + + pi2_y_coeff = ps_dec->pi2_coeff_data; + + { + UWORD32 i; + WORD16 ai2_tmp[16]; + for(i = 0; i < 4; i++) + { + WORD16 *pi2_level = pi2_y_coeff + (i << 4); + UWORD8 *pu1_pred_sblk = pu1_rec_u + + ((i & 0x1) * BLK_SIZE * YUV420SP_FACTOR) + + (i >> 1) * (u4_recwidth_cr << 2); + PROFILE_DISABLE_IQ_IT_RECON() + { + if(CHECKBIT(u2_chroma_csbp, i)) + { + ps_dec->pf_iquant_itrans_recon_chroma_4x4( + pi2_level, + pu1_pred_sblk, + pu1_pred_sblk, + u4_recwidth_cr, + u4_recwidth_cr, + gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qpc_rem6], + (UWORD16 *)ps_dec->s_high_profile.i2_scalinglist4x4[4], + u4_scale_u, ai2_tmp, pi2_level); + } + else if(pi2_level[0] != 0) + { + ps_dec->pf_iquant_itrans_recon_chroma_4x4_dc( + pi2_level, + pu1_pred_sblk, + pu1_pred_sblk, + u4_recwidth_cr, + u4_recwidth_cr, + gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qpc_rem6], + (UWORD16 *)ps_dec->s_high_profile.i2_scalinglist4x4[4], + u4_scale_u, ai2_tmp, pi2_level); + } + } + } + } + + pi2_y_coeff += MB_CHROM_SIZE; + u2_chroma_csbp >>= 4; + + { + UWORD32 i; + WORD16 ai2_tmp[16]; + for(i = 0; i < 4; i++) + { + WORD16 *pi2_level = pi2_y_coeff + (i << 4); + UWORD8 *pu1_pred_sblk = pu1_rec_u + 1 + + ((i & 0x1) * BLK_SIZE * YUV420SP_FACTOR) + + (i >> 1) * (u4_recwidth_cr << 2); + PROFILE_DISABLE_IQ_IT_RECON() + { + if(CHECKBIT(u2_chroma_csbp, i)) + { + ps_dec->pf_iquant_itrans_recon_chroma_4x4( + pi2_level, + pu1_pred_sblk, + pu1_pred_sblk, + u4_recwidth_cr, + u4_recwidth_cr, + gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qpcr_rem6], + (UWORD16 *)ps_dec->s_high_profile.i2_scalinglist4x4[5], + u4_scale_v, ai2_tmp, pi2_level); + } + else if(pi2_level[0] != 0) + { + ps_dec->pf_iquant_itrans_recon_chroma_4x4_dc( + pi2_level, + pu1_pred_sblk, + pu1_pred_sblk, + u4_recwidth_cr, + u4_recwidth_cr, + gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qpcr_rem6], + (UWORD16 *)ps_dec->s_high_profile.i2_scalinglist4x4[5], + u4_scale_v, ai2_tmp, pi2_level); + } + } + } + } + } + } + return (0); +} + +/*! + ************************************************************************** + * \if Function name : ih264d_parse_pred_weight_table \endif + * + * \brief + * Implements pred_weight_table() of 7.3.3.2. + * + * \return + * None + * + ************************************************************************** + */ +WORD32 ih264d_parse_pred_weight_table(dec_slice_params_t * ps_cur_slice, + dec_bit_stream_t * ps_bitstrm) +{ + UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer; + UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst; + WORD8 i, cont, lx; + UWORD8 uc_weight_flag; + UWORD32 *pui32_weight_offset_lx; + WORD16 c_weight, c_offset; + UWORD32 ui32_y_def_weight_ofst, ui32_cr_def_weight_ofst; + UWORD32 ui32_temp; + UWORD8 uc_luma_log2_weight_denom; + UWORD8 uc_chroma_log2_weight_denom; + + /* Variables for error resilience checks */ + UWORD32 u4_temp; + WORD32 i_temp; + + u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf); + if(u4_temp & MASK_LOG2_WEIGHT_DENOM) + { + return ERROR_PRED_WEIGHT_TABLE_T; + } + uc_luma_log2_weight_denom = u4_temp; + COPYTHECONTEXT("SH: luma_log2_weight_denom",uc_luma_log2_weight_denom); + ui32_y_def_weight_ofst = (1 << uc_luma_log2_weight_denom); + + u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf); + if(u4_temp & MASK_LOG2_WEIGHT_DENOM) + { + return ERROR_PRED_WEIGHT_TABLE_T; + } + uc_chroma_log2_weight_denom = u4_temp; + COPYTHECONTEXT("SH: chroma_log2_weight_denom",uc_chroma_log2_weight_denom); + ui32_cr_def_weight_ofst = (1 << uc_chroma_log2_weight_denom); + + ps_cur_slice->u2_log2Y_crwd = uc_luma_log2_weight_denom + | (uc_chroma_log2_weight_denom << 8); + + cont = (ps_cur_slice->u1_slice_type == B_SLICE); + lx = 0; + do + { + for(i = 0; i < ps_cur_slice->u1_num_ref_idx_lx_active[lx]; i++) + { + pui32_weight_offset_lx = ps_cur_slice->u4_wt_ofst_lx[lx][i]; + + uc_weight_flag = ih264d_get_bit_h264(ps_bitstrm); + pu4_bitstrm_buf = ps_bitstrm->pu4_buffer; + COPYTHECONTEXT("SH: luma_weight_l0_flag",uc_weight_flag); + if(uc_weight_flag) + { + i_temp = ih264d_sev(pu4_bitstrm_ofst, + pu4_bitstrm_buf); + if((i_temp + 128) & MASK_PRED_WEIGHT_OFFSET) + return ERROR_PRED_WEIGHT_TABLE_T; + c_weight = i_temp; + COPYTHECONTEXT("SH: luma_weight_l0",c_weight); + + i_temp = ih264d_sev(pu4_bitstrm_ofst, + pu4_bitstrm_buf); + if((i_temp + 128) & MASK_PRED_WEIGHT_OFFSET) + return ERROR_PRED_WEIGHT_TABLE_T; + c_offset = i_temp; + COPYTHECONTEXT("SH: luma_offset_l0",c_offset); + + ui32_temp = (c_offset << 16) | (c_weight & 0xFFFF); + pui32_weight_offset_lx[0] = ui32_temp; + } + else + { + + pui32_weight_offset_lx[0] = ui32_y_def_weight_ofst; + } + + { + WORD8 c_weightCb, c_weightCr, c_offsetCb, c_offsetCr; + uc_weight_flag = ih264d_get_bit_h264(ps_bitstrm); + pu4_bitstrm_buf = ps_bitstrm->pu4_buffer; + COPYTHECONTEXT("SH: chroma_weight_l0_flag",uc_weight_flag); + if(uc_weight_flag) + { + i_temp = ih264d_sev(pu4_bitstrm_ofst, + pu4_bitstrm_buf); + if((i_temp + 128) & MASK_PRED_WEIGHT_OFFSET) + return ERROR_PRED_WEIGHT_TABLE_T; + c_weightCb = i_temp; + COPYTHECONTEXT("SH: chroma_weight_l0",c_weightCb); + + i_temp = ih264d_sev(pu4_bitstrm_ofst, + pu4_bitstrm_buf); + if((i_temp + 128) & MASK_PRED_WEIGHT_OFFSET) + return ERROR_PRED_WEIGHT_TABLE_T; + c_offsetCb = i_temp; + COPYTHECONTEXT("SH: chroma_weight_l0",c_offsetCb); + + ui32_temp = (c_offsetCb << 16) | (c_weightCb & 0xFFFF); + pui32_weight_offset_lx[1] = ui32_temp; + + i_temp = ih264d_sev(pu4_bitstrm_ofst, + pu4_bitstrm_buf); + if((i_temp + 128) & MASK_PRED_WEIGHT_OFFSET) + return ERROR_PRED_WEIGHT_TABLE_T; + c_weightCr = i_temp; + COPYTHECONTEXT("SH: chroma_weight_l0",c_weightCr); + + i_temp = ih264d_sev(pu4_bitstrm_ofst, + pu4_bitstrm_buf); + if((i_temp + 128) & MASK_PRED_WEIGHT_OFFSET) + return ERROR_PRED_WEIGHT_TABLE_T; + c_offsetCr = i_temp; + COPYTHECONTEXT("SH: chroma_weight_l0",c_offsetCr); + + ui32_temp = (c_offsetCr << 16) | (c_weightCr & 0xFFFF); + pui32_weight_offset_lx[2] = ui32_temp; + } + else + { + pui32_weight_offset_lx[1] = ui32_cr_def_weight_ofst; + pui32_weight_offset_lx[2] = ui32_cr_def_weight_ofst; + } + } + } + lx++; + } + while(cont--); + + return OK; +} + + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_init_ref_idx_lx_p */ +/* */ +/* Description : This function initializes the reference picture L0 list */ +/* for P slices as per section 8.2.4.2.1 and 8.2.4.2.2. */ +/* */ +/* Inputs : pointer to ps_dec struture */ +/* Globals : NO */ +/* Processing : arranges all the short term pictures according to */ +/* pic_num in descending order starting from curr pic_num. */ +/* and inserts it in L0 list followed by all Long term */ +/* pictures in ascending order. */ +/* */ +/* Returns : void */ +/* */ +/* Issues : <List any issues or problems with this function> */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 13 07 2002 Jay Draft */ +/* */ +/*****************************************************************************/ +void ih264d_init_ref_idx_lx_p(dec_struct_t *ps_dec) +{ + struct pic_buffer_t *ps_ref_pic_buf_lx; + dpb_manager_t *ps_dpb_mgr; + struct dpb_info_t *ps_next_dpb; + WORD8 i; + UWORD8 u1_max_lt_index, u1_min_lt_index, u1_lt_index; + UWORD8 u1_field_pic_flag; + dec_slice_params_t *ps_cur_slice; + UWORD8 u1_L0; + WORD32 i4_cur_pic_num, i4_min_st_pic_num; + WORD32 i4_temp_pic_num, i4_ref_pic_num; + UWORD8 u1_num_short_term_bufs; + UWORD8 u1_max_ref_idx_l0; + + ps_cur_slice = ps_dec->ps_cur_slice; + u1_field_pic_flag = ps_cur_slice->u1_field_pic_flag; + u1_max_ref_idx_l0 = ps_cur_slice->u1_num_ref_idx_lx_active[0] + << u1_field_pic_flag; + + ps_dpb_mgr = ps_dec->ps_dpb_mgr; + /* Get the current frame number */ + i4_cur_pic_num = ps_dec->ps_cur_pic->i4_pic_num; + + /* Get Min pic_num,MinLt */ + i4_min_st_pic_num = i4_cur_pic_num; + u1_max_lt_index = MAX_REF_BUFS + 1; + u1_min_lt_index = MAX_REF_BUFS + 1; + + /* Start from ST head */ + ps_next_dpb = ps_dpb_mgr->ps_dpb_st_head; + for(i = 0; i < ps_dpb_mgr->u1_num_st_ref_bufs; i++) + { + i4_ref_pic_num = ps_next_dpb->ps_pic_buf->i4_pic_num; + if(i4_ref_pic_num < i4_cur_pic_num) + { + /* RefPic Buf pic_num is before Current pic_num in decode order */ + i4_min_st_pic_num = MIN(i4_min_st_pic_num, i4_ref_pic_num); + } + + /* Chase the next link */ + ps_next_dpb = ps_next_dpb->ps_prev_short; + } + + /* Start from LT head */ + ps_next_dpb = ps_dpb_mgr->ps_dpb_ht_head; + if(ps_next_dpb) + { + u1_max_lt_index = ps_next_dpb->u1_lt_idx; + u1_min_lt_index = ps_next_dpb->u1_lt_idx; + + for(i = 0; i < ps_dpb_mgr->u1_num_lt_ref_bufs; i++) + { + u1_lt_index = ps_next_dpb->u1_lt_idx; + u1_max_lt_index = (UWORD8)(MAX(u1_max_lt_index, u1_lt_index)); + u1_min_lt_index = (UWORD8)(MIN(u1_min_lt_index, u1_lt_index)); + + /* Chase the next link */ + ps_next_dpb = ps_next_dpb->ps_prev_long; + } + } + /* 1. Initialize refIdxL0 */ + u1_L0 = 0; + if(u1_field_pic_flag) + { + ps_ref_pic_buf_lx = ps_dpb_mgr->ps_init_dpb[0][0]; + ps_ref_pic_buf_lx += MAX_REF_BUFS; + i4_temp_pic_num = i4_cur_pic_num; + } + else + { + ps_ref_pic_buf_lx = ps_dpb_mgr->ps_init_dpb[0][0]; + i4_temp_pic_num = i4_cur_pic_num; + } + + /* Arrange all short term buffers in output order as given by pic_num */ + /* Arrange pic_num's less than Curr pic_num in the descending pic_num */ + /* order starting from (Curr pic_num - 1) */ + for(; i4_temp_pic_num >= i4_min_st_pic_num; i4_temp_pic_num--) + { + /* Start from ST head */ + ps_next_dpb = ps_dpb_mgr->ps_dpb_st_head; + for(i = 0; i < ps_dpb_mgr->u1_num_st_ref_bufs; i++) + { + if((WORD32)ps_next_dpb->ps_pic_buf->i4_pic_num == i4_temp_pic_num) + { + /* Copy info in pic buffer */ + ih264d_insert_pic_in_ref_pic_listx(ps_ref_pic_buf_lx, + ps_next_dpb->ps_pic_buf); + ps_ref_pic_buf_lx++; + u1_L0++; + break; + } + ps_next_dpb = ps_next_dpb->ps_prev_short; + } + } + + /* Arrange all Long term buffers in ascending order, in LongtermIndex */ + /* Start from LT head */ + u1_num_short_term_bufs = u1_L0; + for(u1_lt_index = u1_min_lt_index; u1_lt_index <= u1_max_lt_index; + u1_lt_index++) + { + ps_next_dpb = ps_dpb_mgr->ps_dpb_ht_head; + for(i = 0; i < ps_dpb_mgr->u1_num_lt_ref_bufs; i++) + { + if(ps_next_dpb->u1_lt_idx == u1_lt_index) + { + ih264d_insert_pic_in_ref_pic_listx(ps_ref_pic_buf_lx, + ps_next_dpb->ps_pic_buf); + + ps_ref_pic_buf_lx->u1_long_term_pic_num = + ps_ref_pic_buf_lx->u1_long_term_frm_idx; + ps_ref_pic_buf_lx++; + u1_L0++; + break; + } + ps_next_dpb = ps_next_dpb->ps_prev_long; + } + } + + if(u1_field_pic_flag) + { + /* Initialize the rest of the entries in the */ + /* reference list to handle of errors */ + { + UWORD8 u1_i; + pic_buffer_t *ps_ref_pic; + + ps_ref_pic = ps_dpb_mgr->ps_init_dpb[0][0] + MAX_REF_BUFS; + + if(NULL == ps_ref_pic->pu1_buf1) + { + ps_ref_pic = ps_dec->ps_cur_pic; + } + for(u1_i = u1_L0; u1_i < u1_max_ref_idx_l0; u1_i++) + { + *ps_ref_pic_buf_lx = *ps_ref_pic; + ps_ref_pic_buf_lx++; + } + } + + ih264d_convert_frm_to_fld_list( + ps_dpb_mgr->ps_init_dpb[0][0] + MAX_REF_BUFS, &u1_L0, + ps_dec, u1_num_short_term_bufs); + + ps_ref_pic_buf_lx = ps_dpb_mgr->ps_init_dpb[0][0] + u1_L0; + } + + /* Initialize the rest of the entries in the */ + /* reference list to handle of errors */ + { + UWORD8 u1_i; + pic_buffer_t *ps_ref_pic; + + ps_ref_pic = ps_dpb_mgr->ps_init_dpb[0][0]; + + if(NULL == ps_ref_pic->pu1_buf1) + { + ps_ref_pic = ps_dec->ps_cur_pic; + } + for(u1_i = u1_L0; u1_i < u1_max_ref_idx_l0; u1_i++) + { + *ps_ref_pic_buf_lx = *ps_ref_pic; + ps_ref_pic_buf_lx++; + } + } + ps_dec->ps_cur_slice->u1_initial_list_size[0] = u1_L0; +} + diff --git a/decoder/ih264d_process_pslice.h b/decoder/ih264d_process_pslice.h new file mode 100755 index 0000000..8740eb4 --- /dev/null +++ b/decoder/ih264d_process_pslice.h @@ -0,0 +1,69 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +#ifndef _IH264D_PROCESS_PSLICE_H_ +#define _IH264D_PROCESS_PSLICE_H_ +/*! +************************************************************************** +* \file ih264d_process_pslice.h +* +* \brief +* Contains declarations of routines that decode a P slice type +* +* Detailed_description +* +* \date +* 21/12/2002 +* +* \author NS +************************************************************************** +*/ +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264d_structs.h" +WORD32 ih264d_parse_pslice(dec_struct_t *ps_dec, + UWORD16 u2_first_mb_in_slice); +WORD32 ih264d_parse_pred_weight_table(dec_slice_params_t * ps_cur_slice, + dec_bit_stream_t * ps_bitstrm); + +WORD32 parsePSliceData(dec_struct_t * ps_dec, + dec_slice_params_t * ps_slice, + UWORD16 u2_first_mb_in_slice); + +WORD32 ih264d_process_inter_mb(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info, + UWORD8 u1_mb_num); + +void ih264d_init_ref_idx_lx_p(dec_struct_t *ps_dec); + +WORD32 ih264d_mv_pred_ref_tfr_nby2_pmb(dec_struct_t * ps_dec, + UWORD8 u1_num_mbs, + UWORD8 u1_num_mbsNby2); + +WORD32 ih264d_decode_recon_tfr_nmb(dec_struct_t * ps_dec, + UWORD8 u1_mb_idx, + UWORD8 u1_num_mbs, + UWORD8 u1_num_mbs_next, + UWORD8 u1_tfr_n_mb, + UWORD8 u1_end_of_row); + +void ih264d_insert_pic_in_ref_pic_listx(struct pic_buffer_t *ps_ref_pic_buf_lx, + struct pic_buffer_t *ps_pic); +#endif /* _IH264D_PROCESS_PSLICE_H_ */ diff --git a/decoder/ih264d_quant_scaling.c b/decoder/ih264d_quant_scaling.c new file mode 100755 index 0000000..fa9aeb5 --- /dev/null +++ b/decoder/ih264d_quant_scaling.c @@ -0,0 +1,274 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264d_bitstrm.h" +#include "ih264d_structs.h" +#include "ih264d_parse_cavlc.h" +#include "ih264d_defs.h" +#include "ih264d_defs.h" +#include "ih264d_defs.h" + +#include "ih264d_parse_slice.h" +#include "ih264d_tables.h" +#include "ih264d_utils.h" +#include "ih264d_nal.h" +#include "ih264d_deblocking.h" + +#include "ih264d_mem_request.h" +#include "ih264d_debug.h" + +#include "ih264d_error_handler.h" +#include "ih264d_mb_utils.h" +#include "ih264d_sei.h" +#include "ih264d_vui.h" +#include "ih264d_tables.h" + +#define IDCT_BLOCK_WIDTH8X8 8 + +void ih264d_scaling_list(WORD16 *pi2_scaling_list, + WORD32 i4_size_of_scalinglist, + UWORD8 *pu1_use_default_scaling_matrix_flag, + dec_bit_stream_t *ps_bitstrm) +{ + WORD32 i4_j, i4_delta_scale, i4_lastScale = 8, i4_nextScale = 8; + UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer; + UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst; + + *pu1_use_default_scaling_matrix_flag = 0; + + for(i4_j = 0; i4_j < i4_size_of_scalinglist; i4_j++) + { + if(i4_nextScale != 0) + { + i4_delta_scale = ih264d_sev(pu4_bitstrm_ofst, + pu4_bitstrm_buf); + + i4_nextScale = ((i4_lastScale + i4_delta_scale + 256) & 0xff); + + *pu1_use_default_scaling_matrix_flag = ((i4_j == 0) + && (i4_nextScale == 0)); + + } + pi2_scaling_list[i4_j] = + (i4_nextScale == 0) ? (i4_lastScale) : (i4_nextScale); + i4_lastScale = pi2_scaling_list[i4_j]; + } +} + +void ih264d_form_default_scaling_matrix(dec_struct_t *ps_dec) +{ + + /*************************************************************************/ + /* perform the inverse scanning for the frame and field scaling matrices */ + /*************************************************************************/ + { + UWORD8 *pu1_inv_scan; + WORD32 i4_i, i4_j; + + pu1_inv_scan = (UWORD8 *)gau1_ih264d_inv_scan; + + /* for all 4x4 matrices */ + for(i4_i = 0; i4_i < 6; i4_i++) + { + for(i4_j = 0; i4_j < 16; i4_j++) + { + ps_dec->s_high_profile.i2_scalinglist4x4[i4_i][pu1_inv_scan[i4_j]] = + 16; + + } + } + + /* for all 8x8 matrices */ + for(i4_i = 0; i4_i < 2; i4_i++) + { + for(i4_j = 0; i4_j < 64; i4_j++) + { + ps_dec->s_high_profile.i2_scalinglist8x8[i4_i][gau1_ih264d_inv_scan_prog8x8_cabac[i4_j]] = + 16; + + } + } + } +} + +void ih264d_form_scaling_matrix_picture(dec_seq_params_t *ps_seq, + dec_pic_params_t *ps_pic, + dec_struct_t *ps_dec) +{ + /* default scaling matrices */ + WORD32 i4_i; + + /* check the SPS first */ + if(ps_seq->i4_seq_scaling_matrix_present_flag) + { + for(i4_i = 0; i4_i < 8; i4_i++) + { + if(i4_i < 6) + { + /* fall-back rule A */ + if(!ps_seq->u1_seq_scaling_list_present_flag[i4_i]) + { + if((i4_i == 0) || (i4_i == 3)) + { + ps_dec->s_high_profile.pi2_scale_mat[i4_i] = + (i4_i == 0) ? (WORD16 *)(gai2_ih264d_default_intra4x4) : (WORD16 *)(gai2_ih264d_default_inter4x4); + } + else + { + ps_dec->s_high_profile.pi2_scale_mat[i4_i] = + ps_dec->s_high_profile.pi2_scale_mat[i4_i + - 1]; + } + } + else + { + if(ps_seq->u1_use_default_scaling_matrix_flag[i4_i]) + { + ps_dec->s_high_profile.pi2_scale_mat[i4_i] = + (i4_i < 3) ? (WORD16 *)(gai2_ih264d_default_intra4x4) : (WORD16 *)(gai2_ih264d_default_inter4x4); + } + else + { + ps_dec->s_high_profile.pi2_scale_mat[i4_i] = + ps_seq->i2_scalinglist4x4[i4_i]; + } + } + + } + else + { + /* fall-back rule A */ + if((!ps_seq->u1_seq_scaling_list_present_flag[i4_i]) + || (ps_seq->u1_use_default_scaling_matrix_flag[i4_i])) + { + ps_dec->s_high_profile.pi2_scale_mat[i4_i] = + (i4_i == 6) ? ((WORD16*)gai2_ih264d_default_intra8x8) : ((WORD16*)gai2_ih264d_default_inter8x8); + } + else + { + ps_dec->s_high_profile.pi2_scale_mat[i4_i] = + ps_seq->i2_scalinglist8x8[i4_i - 6]; + } + } + } + } + + /* checking for the PPS */ + + if(ps_pic->i4_pic_scaling_matrix_present_flag) + { + for(i4_i = 0; i4_i < 8; i4_i++) + { + if(i4_i < 6) + { + /* fall back rule B */ + if(!ps_pic->u1_pic_scaling_list_present_flag[i4_i]) + { + if((i4_i == 0) || (i4_i == 3)) + { + if(!ps_seq->i4_seq_scaling_matrix_present_flag) + { + ps_dec->s_high_profile.pi2_scale_mat[i4_i] = + (i4_i == 0) ? (WORD16 *)(gai2_ih264d_default_intra4x4) : (WORD16 *)(gai2_ih264d_default_inter4x4); + } + } + else + { + ps_dec->s_high_profile.pi2_scale_mat[i4_i] = + ps_dec->s_high_profile.pi2_scale_mat[i4_i + - 1]; + } + } + else + { + if(ps_pic->u1_pic_use_default_scaling_matrix_flag[i4_i]) + { + ps_dec->s_high_profile.pi2_scale_mat[i4_i] = + (i4_i < 3) ? (WORD16 *)(gai2_ih264d_default_intra4x4) : (WORD16 *)(gai2_ih264d_default_inter4x4); + } + else + { + ps_dec->s_high_profile.pi2_scale_mat[i4_i] = + ps_pic->i2_pic_scalinglist4x4[i4_i]; + } + } + } + else + { + if(!ps_pic->u1_pic_scaling_list_present_flag[i4_i]) + { + if(!ps_seq->u1_seq_scaling_list_present_flag[i4_i]) + { + ps_dec->s_high_profile.pi2_scale_mat[i4_i] = + (i4_i == 6) ? ((WORD16*)gai2_ih264d_default_intra8x8) : ((WORD16*)gai2_ih264d_default_inter8x8); + } + } + else + { + if(ps_pic->u1_pic_use_default_scaling_matrix_flag[i4_i]) + { + ps_dec->s_high_profile.pi2_scale_mat[i4_i] = + (i4_i == 6) ? (WORD16 *)(gai2_ih264d_default_intra8x8) : (WORD16 *)(gai2_ih264d_default_inter8x8); + } + else + { + ps_dec->s_high_profile.pi2_scale_mat[i4_i] = + ps_pic->i2_pic_scalinglist8x8[i4_i - 6]; + } + } + } + } + } + + /*************************************************************************/ + /* perform the inverse scanning for the frame and field scaling matrices */ + /*************************************************************************/ + { + UWORD8 *pu1_inv_scan_4x4; + WORD32 i4_i, i4_j; + + pu1_inv_scan_4x4 = (UWORD8 *)gau1_ih264d_inv_scan; + + /* for all 4x4 matrices */ + for(i4_i = 0; i4_i < 6; i4_i++) + { + for(i4_j = 0; i4_j < 16; i4_j++) + { + ps_dec->s_high_profile.i2_scalinglist4x4[i4_i][pu1_inv_scan_4x4[i4_j]] = + ps_dec->s_high_profile.pi2_scale_mat[i4_i][i4_j]; + + } + } + + /* for all 8x8 matrices */ + for(i4_i = 0; i4_i < 2; i4_i++) + { + for(i4_j = 0; i4_j < 64; i4_j++) + { + ps_dec->s_high_profile.i2_scalinglist8x8[i4_i][gau1_ih264d_inv_scan_prog8x8_cabac[i4_j]] = + ps_dec->s_high_profile.pi2_scale_mat[i4_i + 6][i4_j]; + + } + } + } +} + diff --git a/decoder/ih264d_quant_scaling.h b/decoder/ih264d_quant_scaling.h new file mode 100755 index 0000000..d9bd377 --- /dev/null +++ b/decoder/ih264d_quant_scaling.h @@ -0,0 +1,37 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +#ifndef _IH264D_QUANT_SCALING_H_ +#define _IH264D_QUANT_SCALING_H_ +void ih264d_scaling_list(WORD16 *pi2_scaling_list, + WORD32 i4_size_of_scalinglist, + UWORD8 *pu1_use_default_scaling_matrix_flag, + dec_bit_stream_t *ps_bitstrm); + + +void ih264d_form_scaling_matrix_picture(dec_seq_params_t *ps_seq, + dec_pic_params_t *ps_pic, + dec_struct_t *ps_dec); + +void ih264d_form_default_scaling_matrix(dec_struct_t *ps_dec); + + + + +#endif diff --git a/decoder/ih264d_sei.c b/decoder/ih264d_sei.c new file mode 100755 index 0000000..14ffcd6 --- /dev/null +++ b/decoder/ih264d_sei.c @@ -0,0 +1,386 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/*****************************************************************************/ +/* */ +/* File Name : ih264d_sei.c */ +/* */ +/* Description : This file contains routines to parse SEI NAL's */ +/* */ +/* List of Functions : <List the functions defined in this file> */ +/* */ +/* Issues / Problems : None */ +/* */ +/* Revision History : */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 25 05 2005 NS Draft */ +/* */ +/*****************************************************************************/ + +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264d_sei.h" +#include "ih264d_bitstrm.h" +#include "ih264d_structs.h" +#include "ih264d_error_handler.h" +#include "ih264d_vui.h" +#include "ih264d_parse_cavlc.h" +#include "ih264d_defs.h" + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_parse_buffering_period */ +/* */ +/* Description : This function parses SEI message buffering_period */ +/* Inputs : ps_buf_prd pointer to struct buf_period_t */ +/* ps_bitstrm Bitstream */ +/* Globals : None */ +/* Processing : Parses SEI payload buffering period. */ +/* Outputs : None */ +/* Returns : None */ +/* */ +/* Issues : Not implemented fully */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 06 05 2002 NS Draft */ +/* */ +/*****************************************************************************/ + +WORD32 ih264d_parse_buffering_period(buf_period_t *ps_buf_prd, + dec_bit_stream_t *ps_bitstrm, + dec_struct_t *ps_dec) +{ + UWORD8 u1_seq_parameter_set_id; + dec_seq_params_t *ps_seq; + UWORD8 u1_nal_hrd_present, u1_vcl_hrd_present; + UWORD32 i; + UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst; + UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer; + UNUSED(ps_buf_prd); + u1_seq_parameter_set_id = ih264d_uev(pu4_bitstrm_ofst, + pu4_bitstrm_buf); + if(u1_seq_parameter_set_id > MAX_NUM_SEQ_PARAMS) + return ERROR_INVALID_SEQ_PARAM; + ps_seq = &ps_dec->ps_sps[u1_seq_parameter_set_id]; + if(TRUE != ps_seq->u1_is_valid) + return (-1); + + ps_dec->ps_sei->u1_seq_param_set_id = u1_seq_parameter_set_id; + ps_dec->ps_cur_sps = ps_seq; + if(FALSE == ps_seq->u1_is_valid) + return ERROR_INVALID_SEQ_PARAM; + if(1 == ps_seq->u1_vui_parameters_present_flag) + { + u1_nal_hrd_present = ps_seq->s_vui.u1_nal_hrd_params_present; + if(u1_nal_hrd_present) + { + for(i = 0; i < ps_seq->s_vui.s_nal_hrd.u4_cpb_cnt; i++) + { + ih264d_get_bits_h264( + ps_bitstrm, + ps_seq->s_vui.s_nal_hrd.u1_initial_cpb_removal_delay); + ih264d_get_bits_h264( + ps_bitstrm, + ps_seq->s_vui.s_nal_hrd.u1_initial_cpb_removal_delay); + } + } + + u1_vcl_hrd_present = ps_seq->s_vui.u1_vcl_hrd_params_present; + if(u1_vcl_hrd_present) + { + for(i = 0; i < ps_seq->s_vui.s_vcl_hrd.u4_cpb_cnt; i++) + { + ih264d_get_bits_h264( + ps_bitstrm, + ps_seq->s_vui.s_vcl_hrd.u1_initial_cpb_removal_delay); + ih264d_get_bits_h264( + ps_bitstrm, + ps_seq->s_vui.s_vcl_hrd.u1_initial_cpb_removal_delay); + } + } + } + return OK; +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_parse_pic_timing */ +/* */ +/* Description : This function parses SEI message pic_timing */ +/* Inputs : ps_bitstrm Bitstream */ +/* ps_dec Poniter decoder context */ +/* ui4_payload_size pay load i4_size */ +/* Globals : None */ +/* Processing : Parses SEI payload picture timing */ +/* Outputs : None */ +/* Returns : None */ +/* */ +/* Issues : Not implemented fully */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 06 05 2002 NS Draft */ +/* */ +/*****************************************************************************/ +WORD32 ih264d_parse_pic_timing(dec_bit_stream_t *ps_bitstrm, + dec_struct_t *ps_dec, + UWORD32 ui4_payload_size) +{ + sei *ps_sei; + vui_t *ps_vu4; + UWORD8 u1_cpb_dpb_present; + UWORD8 u1_pic_struct_present_flag; + UWORD32 u4_start_offset, u4_bits_consumed; + UWORD8 u1_cpb_removal_delay_length, u1_dpb_output_delay_length; + + ps_sei = (sei *)ps_dec->ps_sei; + ps_vu4 = &ps_dec->ps_cur_sps->s_vui; + + u1_cpb_dpb_present = ps_vu4->u1_vcl_hrd_params_present + + ps_vu4->u1_nal_hrd_params_present; + + if(ps_vu4->u1_vcl_hrd_params_present) + { + u1_cpb_removal_delay_length = + ps_vu4->s_vcl_hrd.u1_cpb_removal_delay_length; + u1_dpb_output_delay_length = + ps_vu4->s_vcl_hrd.u1_dpb_output_delay_length; + } + else if(ps_vu4->u1_nal_hrd_params_present) + { + u1_cpb_removal_delay_length = + ps_vu4->s_nal_hrd.u1_cpb_removal_delay_length; + u1_dpb_output_delay_length = + ps_vu4->s_nal_hrd.u1_dpb_output_delay_length; + } + else + { + u1_cpb_removal_delay_length = 24; + u1_dpb_output_delay_length = 24; + + } + + u4_start_offset = ps_bitstrm->u4_ofst; + if(u1_cpb_dpb_present) + { + ih264d_get_bits_h264(ps_bitstrm, u1_cpb_removal_delay_length); + ih264d_get_bits_h264(ps_bitstrm, u1_dpb_output_delay_length); + } + + u1_pic_struct_present_flag = ps_vu4->u1_pic_struct_present_flag; + if(u1_pic_struct_present_flag) + { + ps_sei->u1_pic_struct = ih264d_get_bits_h264(ps_bitstrm, 4); + ps_dec->u1_pic_struct_copy = ps_sei->u1_pic_struct; + ps_sei->u1_is_valid = 1; + } + u4_bits_consumed = ps_bitstrm->u4_ofst - u4_start_offset; + ih264d_flush_bits_h264(ps_bitstrm, + (ui4_payload_size << 3) - u4_bits_consumed); + + return (0); +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_parse_recovery_point */ +/* */ +/* Description : This function parses SEI message recovery point */ +/* Inputs : ps_bitstrm Bitstream */ +/* ps_dec Poniter decoder context */ +/* ui4_payload_size pay load i4_size */ +/* Globals : None */ +/* Processing : Parses SEI payload picture timing */ +/* Outputs : None */ +/* Returns : None */ +/* */ +/* Issues : Not implemented fully */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 06 05 2002 NS Draft */ +/* */ +/*****************************************************************************/ +WORD32 ih264d_parse_recovery_point(dec_bit_stream_t *ps_bitstrm, + dec_struct_t *ps_dec, + UWORD32 ui4_payload_size) +{ + sei *ps_sei = ps_dec->ps_sei; + dec_err_status_t *ps_err = ps_dec->ps_dec_err_status; + UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst; + UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer; + UNUSED(ui4_payload_size); + ps_sei->u2_recovery_frame_cnt = ih264d_uev(pu4_bitstrm_ofst, + pu4_bitstrm_buf); + ps_err->u4_frm_sei_sync = ps_err->u4_cur_frm + + ps_sei->u2_recovery_frame_cnt; + ps_sei->u1_exact_match_flag = ih264d_get_bit_h264(ps_bitstrm); + ps_sei->u1_broken_link_flag = ih264d_get_bit_h264(ps_bitstrm); + ps_sei->u1_changing_slice_grp_idc = ih264d_get_bits_h264(ps_bitstrm, 2); + + return (0); +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_parse_sei_payload */ +/* */ +/* Description : This function parses SEI pay loads. Currently it's */ +/* implemented partially. */ +/* Inputs : ps_bitstrm Bitstream */ +/* ui4_payload_type SEI payload type */ +/* ui4_payload_size SEI payload i4_size */ +/* Globals : None */ +/* Processing : Parses SEI payloads units and stores the info */ +/* Outputs : None */ +/* Returns : None */ +/* */ +/* Issues : Not implemented fully */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 06 05 2002 NS Draft */ +/* */ +/*****************************************************************************/ + +WORD32 ih264d_parse_sei_payload(dec_bit_stream_t *ps_bitstrm, + UWORD32 ui4_payload_type, + UWORD32 ui4_payload_size, + dec_struct_t *ps_dec) +{ + sei *ps_sei; + WORD32 i4_status = 0; + ps_sei = (sei *)ps_dec->ps_sei; + switch(ui4_payload_type) + { + case SEI_BUF_PERIOD: + + i4_status = ih264d_parse_buffering_period(&ps_sei->s_buf_period, + ps_bitstrm, ps_dec); + /*if(i4_status != OK) + return i4_status;*/ + break; + case SEI_PIC_TIMING: + if(NULL == ps_dec->ps_cur_sps) + ih264d_flush_bits_h264(ps_bitstrm, (ui4_payload_size << 3)); + else + ih264d_parse_pic_timing(ps_bitstrm, ps_dec, + ui4_payload_size); + break; + case SEI_RECOVERY_PT: + ih264d_parse_recovery_point(ps_bitstrm, ps_dec, + ui4_payload_size); + break; + default: + ih264d_flush_bits_h264(ps_bitstrm, (ui4_payload_size << 3)); + break; + } + return (i4_status); +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_parse_sei_message */ +/* */ +/* Description : This function is parses and decode SEI. Currently it's */ +/* not implemented fully. */ +/* Inputs : ps_dec Decoder parameters */ +/* ps_bitstrm Bitstream */ +/* Globals : None */ +/* Processing : Parses SEI NAL units and stores the info */ +/* Outputs : None */ +/* Returns : None */ +/* */ +/* Issues : Not implemented fully */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 06 05 2002 NS Draft */ +/* */ +/*****************************************************************************/ + +WORD32 ih264d_parse_sei_message(dec_struct_t *ps_dec, + dec_bit_stream_t *ps_bitstrm) +{ + UWORD32 ui4_payload_type, ui4_payload_size; + UWORD32 u4_bits; + WORD32 i4_status = 0; + + do + { + ui4_payload_type = 0; + + u4_bits = ih264d_get_bits_h264(ps_bitstrm, 8); + while(0xff == u4_bits) + { + u4_bits = ih264d_get_bits_h264(ps_bitstrm, 8); + ui4_payload_type += 255; + } + ui4_payload_type += u4_bits; + + ui4_payload_size = 0; + u4_bits = ih264d_get_bits_h264(ps_bitstrm, 8); + while(0xff == u4_bits) + { + u4_bits = ih264d_get_bits_h264(ps_bitstrm, 8); + ui4_payload_size += 255; + } + ui4_payload_size += u4_bits; + + i4_status = ih264d_parse_sei_payload(ps_bitstrm, ui4_payload_type, + ui4_payload_size, ps_dec); + if(i4_status == -1) + { + i4_status = 0; + break; + } + + if(i4_status != OK) + return i4_status; + + if(ih264d_check_byte_aligned(ps_bitstrm) == 0) + { + u4_bits = ih264d_get_bit_h264(ps_bitstrm); + if(0 == u4_bits) + { + H264_DEC_DEBUG_PRINT("\nError in parsing SEI message"); + } + while(0 == ih264d_check_byte_aligned(ps_bitstrm)) + { + u4_bits = ih264d_get_bit_h264(ps_bitstrm); + if(u4_bits) + { + H264_DEC_DEBUG_PRINT("\nError in parsing SEI message"); + } + } + } + } + while(ps_bitstrm->u4_ofst < ps_bitstrm->u4_max_ofst); + return (i4_status); +} + diff --git a/decoder/ih264d_sei.h b/decoder/ih264d_sei.h new file mode 100755 index 0000000..5033740 --- /dev/null +++ b/decoder/ih264d_sei.h @@ -0,0 +1,91 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/*****************************************************************************/ +/* */ +/* File Name : ih264d_sei.h */ +/* */ +/* Description : This file contains routines to parse SEI NAL's */ +/* */ +/* List of Functions : <List the functions defined in this file> */ +/* */ +/* Issues / Problems : None */ +/* */ +/* Revision History : */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 25 05 2005 NS Draft */ +/* */ +/*****************************************************************************/ + +#ifndef _IH264D_SEI_H_ +#define _IH264D_SEI_H_ + +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264d_bitstrm.h" +#include "ih264d_structs.h" + +#define SEI_BUF_PERIOD 0 +#define SEI_PIC_TIMING 1 +#define SEI_PAN_SCAN_RECT 2 +#define SEI_FILLER 3 +#define SEI_UD_REG_T35 4 +#define SEI_UD_UN_REG 5 +#define SEI_RECOVERY_PT 6 +#define SEI_DEC_REF_MARK 7 +#define SEI_SPARE_PIC 8 +#define SEI_SCENE_INFO 9 +#define SEI_SUB_SEQN_INFO 10 +#define SEI_SUB_SEQN_LAY_CHAR 11 +#define SEI_SUB_SEQN_CHAR 12 +#define SEI_FULL_FRAME_FREEZE 13 +#define SEI_FULL_FRAME_FREEZE_REL 14 +#define SEI_FULL_FRAME_SNAP_SHOT 15 +#define SEI_PROG_REF_SEGMENT_START 16 +#define SEI_PROG_REF_SEGMENT_END 17 +#define SEI_MOT_CON_SLICE_GRP_SET 18 +/* Declaration of dec_struct_t to avoid CCS compilation Error */ +struct _DecStruct; +WORD32 ih264d_parse_sei_message(struct _DecStruct *ps_dec, + dec_bit_stream_t *ps_bitstrm); +typedef struct +{ + UWORD8 u1_seq_parameter_set_id; + UWORD32 u4_initial_cpb_removal_delay; + UWORD32 u4_nitial_cpb_removal_delay_offset; + +} buf_period_t; + +struct _sei +{ + UWORD8 u1_seq_param_set_id; + buf_period_t s_buf_period; + UWORD8 u1_pic_struct; + UWORD16 u2_recovery_frame_cnt; + UWORD8 u1_exact_match_flag; + UWORD8 u1_broken_link_flag; + UWORD8 u1_changing_slice_grp_idc; + UWORD8 u1_is_valid; +}; +typedef struct _sei sei; +#endif /* _IH264D_SEI_H_ */ + diff --git a/decoder/ih264d_structs.h b/decoder/ih264d_structs.h new file mode 100755 index 0000000..110f71d --- /dev/null +++ b/decoder/ih264d_structs.h @@ -0,0 +1,1582 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +#ifndef _IH264D_STRUCTS_H_ +#define _IH264D_STRUCTS_H_ + +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "iv.h" +#include "ivd.h" + +#include "ih264d_transfer_address.h" +#include "ih264d_defs.h" +#include "ih264d_defs.h" +#include "ih264d_bitstrm.h" +#include "ih264d_debug.h" +#include "ih264d_dpb_manager.h" +/* includes for CABAC */ +#include "ih264d_cabac.h" +#include "ih264d_dpb_manager.h" + +#include "ih264d_vui.h" +#include "ih264d_sei.h" +#include "iv.h" +#include "ivd.h" + +#include "ih264_weighted_pred.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include "ih264_inter_pred_filters.h" +#include "ih264_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" + + +/** Number of Mb's whoose syntax will be read */ +/************************************************************/ +/* MB_GROUP should be a multiple of 2 */ +/************************************************************/ +#define PARSE_MB_GROUP_4 4 + +/* MV_SCRATCH_BUFS assumed to be pow(2) */ +#define MV_SCRATCH_BUFS 4 + +#define LEFT_MB_PIXELS 4 +#define LEFT_MB_PIXELS_Y_FRM_BOT 64 /* 4 * 16 */ +#define LEFT_MB_PIXELS_CR_FRM_BOT 32 /* 4 * 8 */ + +#define TOP_FIELD_ONLY 0x02 +#define BOT_FIELD_ONLY 0x01 + +#define MAX_REF_BUF_SIZE (3776*2*2) + +struct _DecStruct; +struct _DecMbInfo; + + +#define NUM_INT_G_TABLE ((UWORD32) (sigcoeff_ctxtinc_field8x8 + 1)) +#define NUM_EXT_G_TABLE ((UWORD32) (ITTIAM_LOGO_V_BUF_T + 1)) + +typedef enum +{ + MB_TYPE_SI_SLICE = 0, + MB_TYPE_I_SLICE = 3, + MB_SKIP_FLAG_P_SLICE = 11, + MB_TYPE_P_SLICE = 14, + SUB_MB_TYPE_P_SLICE = 21, + MB_SKIP_FLAG_B_SLICE = 24, + MB_TYPE_B_SLICE = 27, + SUB_MB_TYPE_B_SLICE = 36, + MVD_X = 40, + MVD_Y = 47, + REF_IDX = 54, + MB_QP_DELTA = 60, + INTRA_CHROMA_PRED_MODE = 64, + PREV_INTRA4X4_PRED_MODE_FLAG = 68, + REM_INTRA4X4_PRED_MODE = 69, + MB_FIELD_DECODING_FLAG = 70, + CBP_LUMA = 73, + CBP_CHROMA = 77, + CBF = 85, + SIGNIFICANT_COEFF_FLAG_FRAME = 105, + SIGNIFICANT_COEFF_FLAG_FLD = 277, + LAST_SIGNIFICANT_COEFF_FLAG_FRAME = 166, + LAST_SIGNIFICANT_COEFF_FLAG_FLD = 338, + COEFF_ABS_LEVEL_MINUS1 = 227, + + /* High profile related Syntax element CABAC offsets */ + TRANSFORM_SIZE_8X8_FLAG = 399, + SIGNIFICANT_COEFF_FLAG_8X8_FRAME = 402, + LAST_SIGNIFICANT_COEFF_FLAG_8X8_FRAME = 417, + COEFF_ABS_LEVEL_MINUS1_8X8 = 426, + SIGNIFICANT_COEFF_FLAG_8X8_FIELD = 436, + LAST_SIGNIFICANT_COEFF_FLAG_8X8_FIELD = 451 + +} cabac_table_num_t; + +typedef enum +{ + SIG_COEFF_CTXT_CAT_0_OFFSET = 0, + SIG_COEFF_CTXT_CAT_1_OFFSET = 15, + SIG_COEFF_CTXT_CAT_2_OFFSET = 29, + SIG_COEFF_CTXT_CAT_3_OFFSET = 44, + SIG_COEFF_CTXT_CAT_4_OFFSET = 47, + SIG_COEFF_CTXT_CAT_5_OFFSET = 0, + COEFF_ABS_LEVEL_CAT_0_OFFSET = 0, + COEFF_ABS_LEVEL_CAT_1_OFFSET = 10, + COEFF_ABS_LEVEL_CAT_2_OFFSET = 20, + COEFF_ABS_LEVEL_CAT_3_OFFSET = 30, + COEFF_ABS_LEVEL_CAT_4_OFFSET = 39, + COEFF_ABS_LEVEL_CAT_5_OFFSET = 0 +} cabac_blk_cat_offset_t; + +typedef enum +{ + CABAC_IPBMB_LD_ADRS_T, + CABAC_IPBMB_LD_SZ_T, + CAVLC_IPBMB_LD_ADRS_T, + CAVLC_IPBMB_LD_SZ_T, + PARSE_IPBMB_RUN_ADRS_T, + + MVP_MBAFF_LD_ADRS_T, + MVP_MBAFF_LD_SZ_T, + MVP_NON_MBAFF_LD_ADRS_T, + MVP_NON_MBAFF_LD_SZ_T, + MVPRED_RUN_ADRS_T, + + B_REF_DMA_LD_ADRS_T, + B_REF_DMA_LD_SZ_T, + P_REF_DMA_LD_ADRS_T, + P_REF_DMA_LD_SZ_T, + REF_DMA_RUN_ADRS_T, + + SP_DRCT_LD_ADRS_T, + SP_DRCT_LD_SZ_T, + TMP_DRCT_LD_ADRS_T, + TMP_DRCT_LD_SZ_T, + B_SKIP_RUN_ADRS_T, + + DEC_DEBLK_RUN_ADRS_T, + H264_DBLK_LD_ADRS_T, + H264_DBLK_LD_SZ_T, + H264_DEC_LD_ADRS_T, + + /* + * (H264_DEC_LD_SZ_T + 1) will be considered as the end of this table + * new members to be added before this + */ + H264_DEC_LD_SZ_T +} code_overlay_tab_t; + +/** Structure for the MV bank */ +typedef struct _mv_pred_t +{ + WORD16 i2_mv[4]; /** 0- mvFwdX, 1- mvFwdY, 2- mvBwdX, 3- mvBwdY */ + WORD8 i1_ref_frame[2]; + + UWORD8 u1_col_ref_pic_idx; /** Idx into the pic buff array */ + UWORD8 u1_pic_type; /** Idx into the pic buff array */ + +} mv_pred_t; + +typedef struct +{ + WORD32 i4_mv_indices[16]; + WORD8 i1_submb_num[16]; + WORD8 i1_partitionsize[16]; + WORD8 i1_num_partitions; + WORD8 u1_vert_mv_scale; + UWORD8 u1_col_zeroflag_change; +} directmv_t; + +typedef struct pic_buffer_t +{ + /**Different components of the picture */ + UWORD8 *pu1_buf1; + UWORD8 *pu1_buf2; + UWORD8 *pu1_buf3; + UWORD16 u2_disp_width; /** Width of the display luma frame in pixels */ + UWORD16 u2_disp_height; /** Height of the display luma frame in pixels */ + UWORD32 u4_time_stamp; /** Time at which frame has to be displayed */ + UWORD16 u2_frm_wd_y; /** Width of the luma frame in pixels */ + UWORD16 u2_frm_wd_uv; /** Width of the chroma frame */ + UWORD16 u2_frm_ht_y; /** Height of the luma frame in pixels */ + UWORD16 u2_frm_ht_uv; /** Height of the chroma frame */ + /* Upto this is resembling the structure IH264DEC_DispUnit */ + + /* If any member is to be added, add below this */ + + /* u4_ofst from start of picture buffer to display position for Y buffer */ + UWORD16 u2_crop_offset_y; + + /* u4_ofst from start of picture buffer to display position for UV buffer */ + UWORD16 u2_crop_offset_uv; + + UWORD8 u1_is_short; /** (1: short 0: long) term ref pic */ + UWORD8 u1_pic_type; /** frame / field / complementary field pair */ + UWORD8 u1_pic_buf_id; /** Idx into the picBufAPI array */ + UWORD8 u1_mv_buf_id; + WORD32 i4_seq; + UWORD8 *pu1_col_zero_flag; + mv_pred_t *ps_mv; /** Pointer to the MV bank array */ + WORD32 i4_poc; /** POC */ + WORD32 i4_pic_num; + WORD32 i4_frame_num; + WORD32 i4_top_field_order_cnt; /** TopPOC */ + WORD32 i4_bottom_field_order_cnt; /** BottomPOC */ + WORD32 i4_avg_poc; /** minPOC */ + UWORD8 u1_picturetype; /*Same as u1_pic_type..u1_pic_type gets overwritten whereas + this doesnot get overwritten ...stores the pictype of + frame/complementary field pair/ mbaff */ + UWORD8 u1_long_term_frm_idx; + UWORD8 u1_long_term_pic_num; + UWORD32 u4_pack_slc_typ; /* It will contain information about types of slices */ + + /* ! */ + UWORD32 u4_ts; + UWORD8 u1_pic_struct;/* Refer to SEI table D-1 */ + +} pic_buffer_t; + +typedef struct +{ + void *u4_add[4]; +} neighbouradd_t; + +typedef struct +{ + const UWORD8 *pu1_inv_scan; + void *pv_table[6]; +} cavlc_cntxt_t; + +/** + ************************************************************************ + * \file ih264d_structs.h + * + * \brief + * Structures used in the H.264 decoder + * + * \date + * 18/11/2002 + * + * \author Sriram Sethuraman + * + ************************************************************************ + */ + +/** + * Structure to represent a MV Bank buffer and col flag + */ +typedef struct +{ + /** + * Pointer to buffer that holds col flag. + */ + void *pv_col_zero_flag; + + /** + * Pointer to buffer that holds mv_pred + */ + void *pv_mv; + + }col_mv_buf_t; + + +/* Note the i4_size of this structure is hardcoded in arm_default_weighted_Pred.s as 0x3C. + * ADD r0,r0,#0x3C and so on.. + * If there is a change in i4_size update above file accordingly. + */ +typedef struct +{ + UWORD8 u1_mc_addr_ofst; /** Offset in bytes relative to pu1_dma_dest_addr */ + UWORD8 u1_dydx; /** 4*dy + dx for Y comp / 8*dy + dx for UV comp */ + UWORD8 u1_is_bi_direct; /** 1: is bi-direct 0: forward / backward only */ + UWORD8 u1_wght_pred_type; /** 0-default 1-singleWeighted 2-BiWeighted */ + WORD8 i1_mb_partwidth; /** Width of MB partition */ + WORD8 i1_mb_partheight; /** Height of MB partition */ + WORD8 i1_mc_wd; /** Number of bytes in a DMA stride */ + WORD8 i1_dma_ht; /** Number of strides */ + + WORD8 i1_pod_ht; /** Flag specifying height of pad on demand */ + /** 0 (No pod) -ve(Top pod) +ve(Bottom pod) */ + UWORD16 u2_dst_stride; /** Stride value of the destination */ + UWORD16 u2_u1_ref_buf_wd; /** Width of the ref buffer */ + UWORD16 u2_frm_wd; + UWORD16 u2_dummy; + + UWORD8 *u1_pi1_wt_ofst_rec_v; /** Pointer to packed weight and u4_ofst */ + UWORD8 *pu1_rec_y_u; /** MB partition address in row buffer */ + UWORD8 *pu1_dma_dest_addr; /** Destination address for DMA transfer */ + UWORD8 *pu1_y_ref; + UWORD8 *pu1_u_ref; + UWORD8 *pu1_v_ref; + + UWORD8 *pu1_pred; + UWORD8 *pu1_pred_u; + UWORD8 *pu1_pred_v; + UWORD8 u1_dma_wd_y; + UWORD8 u1_dma_ht_y; + UWORD8 u1_dma_wd_uv; + UWORD8 u1_dma_ht_uv; +} pred_info_t; + +typedef struct +{ + UWORD32 *pu4_wt_offst; + WORD16 i2_mv[2]; + + /***************************************************/ + /*packing information i1_size_pos_info */ + /* bit 1:0 -> X position in terms of (4x4) units */ + /* bit 3:2 -> Y position in terms of (4x4) units */ + /* bit 5:4 -> PU width 0:4,1:8,2:16 */ + /* bit 7:6 -> PU height 0:4,1:8,2:16 */ + /***************************************************/ + WORD8 i1_size_pos_info; + + /***************************************************/ + /*packing information ref idx info */ + /* bit 5:0 ->ref_idx */ + /* bit 6:7 -> 0:l0,1:l1,2:bipred */ + /***************************************************/ + WORD8 i1_ref_idx_info; + + WORD8 i1_buf_id; + + + UWORD8 u1_pic_type; /** frame /top field/bottom field/mbaff / complementary field pair */ + +}pred_info_pkd_t; +/*! Sequence level parameters */ + +typedef struct +{ + UWORD8 u1_seq_parameter_set_id; /** id for the seq par set 0-31 */ + UWORD8 u1_is_valid; /** is Seq Param set valid */ + + UWORD16 u2_frm_wd_in_mbs; /** Frame width expressed in MB units */ + UWORD16 u2_frm_ht_in_mbs; /** Frame height expressed in MB units */ + + /* Following are derived from the above two */ + UWORD16 u2_fld_ht_in_mbs; /** Field height expressed in MB units */ + UWORD16 u2_max_mb_addr; /** Total number of macroblocks in a coded picture */ + UWORD16 u2_total_num_of_mbs; /** Total number of macroblocks in a coded picture */ + UWORD32 u4_fld_ht; /** field height */ + UWORD32 u4_cwidth; /** chroma width */ + UWORD32 u4_chr_frm_ht; /** chroma height */ + UWORD32 u4_chr_fld_ht; /** chroma field height */ + UWORD8 u1_mb_aff_flag; /** 0 - no mb_aff; 1 - uses mb_aff */ + + UWORD8 u1_profile_idc; /** profile value */ + UWORD8 u1_level_idc; /** level value */ + + /* high profile related syntax elements */ + WORD32 i4_chroma_format_idc; + WORD32 i4_bit_depth_luma_minus8; + WORD32 i4_bit_depth_chroma_minus8; + WORD32 i4_qpprime_y_zero_transform_bypass_flag; + WORD32 i4_seq_scaling_matrix_present_flag; + UWORD8 u1_seq_scaling_list_present_flag[8]; + UWORD8 u1_use_default_scaling_matrix_flag[8]; + WORD16 i2_scalinglist4x4[6][16]; + WORD16 i2_scalinglist8x8[2][64]; + UWORD8 u1_more_than_one_slice_group_allowed_flag; + UWORD8 u1_arbitrary_slice_order_allowed_flag; + UWORD8 u1_redundant_slices_allowed_flag; + UWORD8 u1_bits_in_frm_num; /** Number of bits in frame num */ + UWORD16 u2_u4_max_pic_num_minus1; /** Maximum frame num minus 1 */ + UWORD8 u1_pic_order_cnt_type; /** 0 - 2 indicates the method to code picture order count */ + UWORD8 u1_log2_max_pic_order_cnt_lsb_minus; + WORD32 i4_max_pic_order_cntLsb; + UWORD8 u1_num_ref_frames_in_pic_order_cnt_cycle; + UWORD8 u1_delta_pic_order_always_zero_flag; + WORD32 i4_ofst_for_non_ref_pic; + WORD32 i4_ofst_for_top_to_bottom_field; + WORD32 i4_ofst_for_ref_frame[MAX_NUM_REF_FRAMES_OFFSET]; + UWORD8 u1_num_ref_frames; + UWORD8 u1_gaps_in_frame_num_value_allowed_flag; + UWORD8 u1_frame_mbs_only_flag; /** 1 - frame only; 0 - field/frame pic */ + UWORD8 u1_direct_8x8_inference_flag; + UWORD8 u1_vui_parameters_present_flag; + vui_t s_vui; +} dec_seq_params_t; + +typedef struct +{ + UWORD16 u2_frm_wd_in_mbs; /** Frame width expressed in MB units */ + UWORD16 u2_frm_ht_in_mbs; /** Frame height expressed in MB units */ + UWORD8 u1_frame_mbs_only_flag; /** 1 - frame only; 0 - field/frame pic */ + UWORD8 u1_profile_idc; /** profile value */ + UWORD8 u1_level_idc; /** level value */ + UWORD8 u1_direct_8x8_inference_flag; + UWORD8 u1_eoseq_pending; +} prev_seq_params_t; + +/** Picture level parameters */ +typedef struct +{ + dec_seq_params_t *ps_sps; /** applicable seq. parameter set */ + + /* High profile related syntax elements */ + WORD32 i4_transform_8x8_mode_flag; + WORD32 i4_pic_scaling_matrix_present_flag; + UWORD8 u1_pic_scaling_list_present_flag[8]; + UWORD8 u1_pic_use_default_scaling_matrix_flag[8]; + WORD16 i2_pic_scalinglist4x4[6][16]; + WORD16 i2_pic_scalinglist8x8[2][64]; + WORD8 i1_second_chroma_qp_index_offset; + + UWORD32 u4_slice_group_change_rate; + UWORD8 *pu1_slice_groupmb_map; /** MB map with slice membership labels */ + UWORD8 u1_pic_parameter_set_id; /** id for the picture par set 0-255*/ + UWORD8 u1_entropy_coding_mode; /** Entropy coding : 0-VLC; 1 - CABAC */ + UWORD8 u1_num_slice_groups; /** Number of slice groups */ + UWORD8 u1_pic_init_qp; /** Initial QPY for the picture {-26,25}*/ + WORD8 i1_chroma_qp_index_offset; /** Chroma QP u4_ofst w.r.t QPY {-12,12} */ + UWORD8 u1_dblk_filter_parms_flag; /** Slice layer has deblocking filter parameters */ + UWORD8 u1_constrained_intra_pred_flag; /** Constrained intra prediction u4_flag */ + UWORD8 u1_redundant_pic_cnt_present_flag; /** Redundant_pic_cnt is in slices using this PPS */ + UWORD8 u1_pic_order_present_flag; /** Pic order present u4_flag */ + UWORD8 u1_num_ref_idx_lx_active[2]; /** Maximum reference picture index in the reference list 0 : range [1 - 15] */ + UWORD8 u1_wted_pred_flag; + UWORD8 u1_wted_bipred_idc; + UWORD8 u1_pic_init_qs; + UWORD8 u1_deblocking_filter_parameters_present_flag; + UWORD8 u1_vui_pic_parameters_flag; + UWORD8 u1_mb_slice_group_map_type; + UWORD8 u1_slice_group_change_direction_flag; + UWORD8 u1_frame_cropping_flag; + UWORD8 u1_frame_cropping_rect_left_ofst; + UWORD8 u1_frame_cropping_rect_right_ofst; + UWORD8 u1_frame_cropping_rect_top_ofst; + UWORD8 u1_frame_cropping_rect_bottom_ofst; + void * pv_codec_handle; /* For Error Handling */ + WORD32 i4_top_field_order_cnt; + WORD32 i4_bottom_field_order_cnt; + WORD32 i4_avg_poc; + UWORD8 u1_is_valid; /** is Pic Param set valid */ +} dec_pic_params_t; + +/** Picture Order Count Paramsters */ +typedef struct +{ + WORD32 i4_pic_order_cnt_lsb; + WORD32 i4_pic_order_cnt_msb; + WORD32 i4_delta_pic_order_cnt_bottom; + WORD32 i4_delta_pic_order_cnt[2]; + WORD32 i4_prev_frame_num_ofst; + UWORD8 u1_mmco_equalto5; + UWORD8 u1_bot_field; + UWORD16 u2_frame_num; + WORD32 i4_top_field_order_count; + WORD32 i4_bottom_field_order_count; +} pocstruct_t; + +/*****************************************************************************/ +/* parse_mb_pers_info contains necessary mb info data required persistently */ +/* in the form of top and left neighbours. */ +/*****************************************************************************/ +typedef struct +{ + void *u4_pic_addrress[4]; /* picture address for BS calc */ + WORD8 pi1_intrapredmodes[4]; /* calc Intra pred modes */ + UWORD8 pu1_nnz_y[4]; + UWORD8 pu1_nnz_uv[4]; + UWORD8 u1_mb_fld; + UWORD8 u1_mb_type; + UWORD16 u2_luma_csbp; /* Luma csbp used for BS calc */ + UWORD8 u1_tran_form8x8; +} mb_neigbour_params_t; + +/* This info is required for decoding purposes except Deblockng */ +typedef struct _DecMbInfo +{ + UWORD8 u1_mb_type; /** macroblock type: I/P/B/SI/SP */ + UWORD8 u1_chroma_pred_mode; + UWORD8 u1_cbp; + UWORD8 u1_mb_mc_mode; /** 16x16, 2 16x8, 2 8x16, 4 8x8 */ + UWORD8 u1_topmb; /** top Mb u4_flag */ + UWORD8 u1_mb_ngbr_availablity; + UWORD8 u1_end_of_slice; + UWORD8 u1_mb_field_decodingflag; + UWORD8 u1_topleft_mb_fld; + UWORD8 u1_topleft_mbtype; + WORD8 i1_offset; + UWORD8 u1_Mux; + UWORD8 u1_qp_div6; + UWORD8 u1_qp_rem6; + UWORD8 u1_qpc_div6; + UWORD8 u1_qpcr_div6; + UWORD8 u1_qpc_rem6; + UWORD8 u1_qpcr_rem6; + UWORD8 u1_tran_form8x8; + UWORD8 u1_num_pred_parts; + UWORD8 u1_yuv_dc_block_flag; + UWORD16 u2_top_right_avail_mask; + UWORD16 u2_top_left_avail_mask; + UWORD16 u2_luma_csbp; /** Coded 4x4 Sub Block Pattern */ + UWORD16 u2_chroma_csbp; /** Coded 4x4 Sub Block Pattern */ + UWORD16 u2_mbx; + UWORD16 u2_mby; + UWORD16 u2_mask[2]; + + UWORD32 u4_pred_info_pkd_idx; + + mb_neigbour_params_t *ps_left_mb; + mb_neigbour_params_t *ps_top_mb; + mb_neigbour_params_t *ps_top_right_mb; + mb_neigbour_params_t *ps_curmb; +} dec_mb_info_t; + + +/** Slice level parameters */ +typedef struct +{ + dec_pic_params_t *ps_pps; /** PPS used */ + WORD32 i4_delta_pic_order_cnt[2]; + WORD32 i4_poc; /** Pic order cnt of picture to which slice belongs*/ + UWORD32 u4_idr_pic_id; /** IDR pic ID */ + UWORD16 u2_first_mb_in_slice; /** Address of first MB in slice*/ + UWORD16 u2_frame_num; /** Frame number from prev IDR pic */ + + UWORD8 u1_mbaff_frame_flag; /** Mb adaptive frame field u4_flag */ + UWORD8 u1_field_pic_flag; /** Field picture or not */ + UWORD8 u1_bottom_field_flag; /** If slice belongs to bot field pic */ + UWORD8 u1_slice_type; /** I/P/B/SI/SP */ + WORD32 i4_pic_order_cnt_lsb; /** Picture Order Count */ + UWORD8 u1_slice_qp; /** Add slice_qp_delta to pic_init_QP */ + UWORD8 u1_disable_dblk_filter_idc; /** 0-dblk all edges; 1 - suppress; 2 - suppress only edges */ + WORD8 i1_slice_alpha_c0_offset; /** dblk: alpha and C0 table u4_ofst {-12,12}*/ + WORD8 i1_slice_beta_offset; /** dblk: beta table u4_ofst {-12, 12}*/ + UWORD8 u1_sp_for_switch_flag; + UWORD8 u1_no_output_of_prior_pics_flag; + UWORD8 u1_long_term_reference_flag; + UWORD8 u1_num_ref_idx_lx_active[2]; + UWORD8 u1_cabac_init_idc; /** cabac_init_idc */ + UWORD8 u1_num_ref_idx_active_override_flag; + UWORD8 u1_direct_spatial_mv_pred_flag; + WORD32 (*pf_decodeDirect)(struct _DecStruct *ps_dec, + UWORD8 u1_wd_x, + dec_mb_info_t *ps_cur_mb_info, + UWORD8 u1_mb_num); + UWORD8 u1_redundant_pic_cnt; + WORD8 i1_slice_qs_delta; + UWORD8 u1_nal_ref_idc; /** NAL ref idc of the Slice NAL unit */ + UWORD8 u1_nal_unit_type; /** NAL unit type of the Slice NAL */ + UWORD8 u1_direct_8x8_inference_flag; + UWORD8 u1_mmco_equalto5; /** any of the MMCO command equal to 5 */ + UWORD8 u1_pic_order_cnt_type; + pocstruct_t s_POC; + /* DataStructures required for weighted prediction */ + UWORD16 u2_log2Y_crwd; /** Packed luma and chroma log2_weight_denom */ + /* [list0/list1]:[ref pics index]:[0-Y 1-Cb 2-Cr] [weight/u4_ofst], + weights and offsets are signed numbers, since they are packed, it is defined + unsigned. LSB byte : weight and MSB byte: u4_ofst */ + UWORD32 u4_wt_ofst_lx[2][MAX_REF_BUFS][3]; + void * pv_codec_handle; /* For Error Handling */ + UWORD8 u1_end_of_frame_signal; + + /* This is used when reordering is done in Forward or */ + /* backward lists. This is because reordering can point */ + /* to any valid entry in initial list irrespective of */ + /* num_ref_idx_active which could be overwritten using */ + /* ref_idx_reorder_flag */ + UWORD8 u1_initial_list_size[2]; + UWORD32 u4_mbs_in_slice; +} dec_slice_params_t; + + +typedef struct +{ + UWORD8 u1_mb_type; /* Bit representations, X- reserved */ + /** |Field/Frame|X|X|X|X|Bslice u4_flag|PRED_NON_16x16 u4_flag |Intra Mbflag| */ + UWORD8 u1_mb_qp; + UWORD8 u1_deblocking_mode; /** dblk: Mode [ NO / NO TOP / NO LEFT] filter */ + WORD8 i1_slice_alpha_c0_offset; /** dblk: alpha and C0 table u4_ofst {-12,12}*/ + WORD8 i1_slice_beta_offset; /** dblk: beta table u4_ofst {-12, 12}*/ + UWORD8 u1_single_call; + UWORD8 u1_topmb_qp; + UWORD8 u1_left_mb_qp; + UWORD32 u4_bs_table[10]; /* Boundary strength */ + +} deblk_mb_t; + +typedef struct +{ + UWORD8 u1_mb_type; + UWORD8 u1_mb_qp; +} deblkmb_neighbour_t; + +#define MAX_MV_RESIDUAL_INFO_PER_MB 32 +#define MAX_REFIDX_INFO_PER_MB 4 +#define PART_NOT_DIRECT 0 +#define PART_DIRECT_8x8 1 +#define PART_DIRECT_16x16 2 +typedef struct +{ + UWORD8 u1_is_direct; + UWORD8 u1_pred_mode; + UWORD8 u1_sub_mb_num; + UWORD8 u1_partheight; + UWORD8 u1_partwidth; +} parse_part_params_t; + +typedef struct +{ + UWORD8 u1_isI_mb; + UWORD8 u1_num_part; + UWORD32 *pu4_wt_offst[MAX_REFIDX_INFO_PER_MB]; + WORD8 i1_ref_idx[2][MAX_REFIDX_INFO_PER_MB]; + UWORD8 u1_col_info[MAX_REFIDX_INFO_PER_MB]; +} parse_pmbarams_t; + +typedef struct +{ + UWORD8 *pu1_mb_y; /* pointer to N-Mb pad buffer Y (Horz) */ + UWORD8 *pu1_mb_u; /* pointer to N-Mb pad buffer U (Horz) */ + UWORD8 *pu1_mb_v; /* pointer to N-Mb pad buffer V (Horz) */ + UWORD8 *pu1_row_y; /* pointer to row pad buffer Y (Vert) */ + UWORD8 *pu1_row_u; /* pointer to row pad buffer U (Vert) */ + UWORD8 *pu1_row_v; /* pointer to row pad buffer V (Vert) */ + UWORD8 u1_vert_pad_top; /* flip-flop u4_flag remembering pad area (Vert) */ + UWORD8 u1_vert_pad_bot; /* flip-flop u4_flag remembering pad area (Vert) */ + UWORD8 u1_horz_pad; /* flip-flop u4_flag remembering pad area (Vert) */ + UWORD8 u1_pad_len_y_v; /* vertical pad amount for luma */ + UWORD8 u1_pad_len_cr_v; /* vertical pad amount for chroma */ +} pad_mgr_t; + +typedef struct code_overlay_ctxt +{ + UWORD8 u1_pb_slice_type; + UWORD8 u1_entropy_coding_type; + UWORD8 u1_mbaff_frame_flag; + UWORD8 u1_b_direct_flag; +} code_overlay_ctxt_t; + +#define ACCEPT_ALL_PICS (0x00) +#define REJECT_CUR_PIC (0x01) +#define REJECT_PB_PICS (0x02) + +#define PIC_TYPE_UNKNOWN (0xFF) +#define PIC_TYPE_I (0x00) +#define SYNC_FRM_DEFAULT (0xFFFFFFFF) +#define INIT_FRAME (0xFFFFFF) + +typedef struct dec_err_status_t +{ + UWORD8 u1_cur_pic_type; + UWORD8 u1_pic_aud_i; + UWORD8 u1_err_flag; + UWORD32 u4_frm_sei_sync; + UWORD32 u4_cur_frm; +} dec_err_status_t; + +/**************************************************************************/ +/* Structure holds information about all high profile toolsets */ +/**************************************************************************/ +typedef struct +{ + /*****************************************/ + /* variables required for scaling */ + /*****************************************/ + UWORD8 u1_scaling_present; + WORD16 *pi2_scale_mat[8]; + + /*************************************************/ + /* scaling matrices for frame macroblocks after */ + /* inverse scanning */ + /*************************************************/ + WORD16 i2_scalinglist4x4[6][16]; + WORD16 i2_scalinglist8x8[2][64]; + + + /*****************************************/ + /* variables required for transform8x8 */ + /*****************************************/ + UWORD8 u1_transform8x8_present; + UWORD8 u1_direct_8x8_inference_flag; + /* temporary variable to get noSubMbPartSizeLessThan8x8Flag from ih264d_parse_bmb_non_direct_cavlc */ + UWORD8 u1_no_submb_part_size_lt8x8_flag; + + /* needed for inverse scanning */ + cavlc_cntxt_t s_cavlc_ctxt; + + /* contexts for the CABAC related parsing */ + bin_ctxt_model_t *ps_transform8x8_flag; + bin_ctxt_model_t *ps_sigcoeff_8x8_frame; + bin_ctxt_model_t *ps_last_sigcoeff_8x8_frame; + bin_ctxt_model_t *ps_coeff_abs_levelminus1; + bin_ctxt_model_t *ps_sigcoeff_8x8_field; + bin_ctxt_model_t *ps_last_sigcoeff_8x8_field; + +/* variables required for intra8x8 */ + +/* variables required for handling different Qp for Cb and Cr */ + +} high_profile_tools_t; + +typedef struct +{ + UWORD32 u4_num_bufs; /* Number of buffers in each display frame. 2 for 420SP and 3 for 420P and so on */ + void *buf[3]; /* Pointers to each of the components */ + UWORD32 u4_bufsize[3]; + UWORD32 u4_ofst[3]; +} disp_buf_t; +typedef struct _dec_slice_struct +{ + volatile UWORD32 u4_first_mb_in_slice; + volatile UWORD32 u4_num_mbs_done_in_slice; + volatile UWORD32 slice_type; + volatile UWORD32 end_of_slice; + volatile UWORD32 slice_header_done; + volatile UWORD32 last_slice_in_frame; + volatile UWORD16 u2_log2Y_crwd; + volatile UWORD16 u2_error_flag; + volatile void **ppv_map_ref_idx_to_poc; +} dec_slice_struct_t; + +typedef struct +{ + UWORD32 u4_flag; + UWORD32 u4_start_y; + UWORD32 u4_num_rows_y; +} fmt_conv_part_t; + +/** + * Structure to hold coefficient info for a 4x4 transform + */ +typedef struct +{ + /** + * significant coefficient map + */ + UWORD16 u2_sig_coeff_map; + + /** + * holds coefficients + */ + WORD16 ai2_level[16]; +}tu_sblk4x4_coeff_data_t; + +/** + * Structure to hold coefficient info for a 8x8 transform + */ +typedef struct +{ + + /** + * significant coefficient map + */ + UWORD32 au4_sig_coeff_map[2]; + + /** + * holds coefficients + */ + WORD16 ai2_level[64]; +}tu_blk8x8_coeff_data_t; + +/** Aggregating structure that is globally available */ +typedef struct _DecStruct +{ + + /* Add below all other static memory allocations and pointers to items + that are dynamically allocated once per session */ + dec_bit_stream_t *ps_bitstrm; + dec_seq_params_t *ps_cur_sps; + dec_pic_params_t *ps_cur_pps; + dec_slice_params_t *ps_cur_slice; + + dec_pic_params_t *ps_pps; + dec_seq_params_t *ps_sps; + const UWORD16 *pu2_quant_scale_y; + const UWORD16 *pu2_quant_scale_u; + const UWORD16 *pu2_quant_scale_v; + UWORD16 u2_mbx; + UWORD16 u2_mby; + + UWORD16 u2_frm_wd_y; /** Width for luma buff */ + UWORD16 u2_frm_ht_y; /** Height for luma buff */ + UWORD16 u2_frm_wd_uv; /** Width for chroma buff */ + UWORD16 u2_frm_ht_uv; /** Height for chroma buff */ + UWORD16 u2_frm_wd_in_mbs; /** Frame width expressed in MB units */ + UWORD16 u2_frm_ht_in_mbs; /** Frame height expressed in MB units */ + WORD32 i4_submb_ofst; /** Offset in subMbs from the top left edge */ + /* Pointer to colocated Zero frame Image, will be used in B_DIRECT mode */ + /* colZeroFlag | // 0th bit + field_flag | // 1st bit + XX | // 2:3 bit don't cares + subMbMode | // 4:5 bit + MbMode | // 6:7 bit */ + + UWORD8 *pu1_col_zero_flag; + + UWORD16 u2_pic_wd; /** Width of the picture being decoded */ + UWORD16 u2_pic_ht; /** Height of the picture being decoded */ + + UWORD8 u1_first_nal_in_pic; + UWORD8 u1_mb_ngbr_availablity; + UWORD8 u1_ref_idxl0_active_minus1; + UWORD8 u1_qp; + UWORD8 u1_qp_y_div6; + UWORD8 u1_qp_u_div6; + UWORD8 u1_qp_y_rem6; + UWORD8 u1_qp_u_rem6; + + /*********************************/ + /* configurable mb-group numbers */ + /* very critical to the decoder */ + /*********************************/ + /************************************************************/ + /* MB_GROUP should be a multiple of 2 */ + /************************************************************/ + UWORD8 u1_recon_mb_grp; + UWORD8 u1_recon_mb_grp_pair; + /* Variables to handle Cabac */ + decoding_envirnoment_t s_cab_dec_env; /* < Structure for decoding_envirnoment_t */ + /* These things need to be updated at each MbLevel */ + WORD8 i1_next_ctxt_idx; /* < next Ctxt Index */ + UWORD8 u1_currB_type; + WORD8 i1_prev_mb_qp_delta; /* Prev MbQpDelta */ + UWORD8 u1_nal_unit_type; + + ctxt_inc_mb_info_t *p_ctxt_inc_mb_map; /* Pointer to ctxt_inc_mb_info_t map */ + ctxt_inc_mb_info_t *p_left_ctxt_mb_info; /* Pointer to left ctxt_inc_mb_info_t */ + ctxt_inc_mb_info_t *p_top_ctxt_mb_info; /* Pointer to top ctxt_inc_mb_info_t */ + ctxt_inc_mb_info_t *ps_curr_ctxt_mb_info; /* Pointer to current ctxt_inc_mb_info_t */ + ctxt_inc_mb_info_t *ps_def_ctxt_mb_info; /* Pointer to default ctxt_inc_mb_info_t */ + + /* mv contexts for mv decoding using cabac */ + //UWORD8 u1_top_mv_ctxt_inc[4][4]; + /* Dimensions for u1_left_mv_ctxt_inc_arr is [2][4][4] for Mbaff case */ + UWORD8 u1_left_mv_ctxt_inc_arr[2][4][4]; + UWORD8 (*pu1_left_mv_ctxt_inc)[4]; + + UWORD8 u1_sub_mb_num; + UWORD8 u1_B; /** if B slice u1_B = 1 else 0 */ + WORD16 i2_only_backwarddma_info_idx; + mv_pred_t *ps_mv; /** Pointer to the MV bank array */ + mv_pred_t *ps_mv_bank_cur; /** Pointer to the MV bank array */ + mv_pred_t s_default_mv_pred; /** Structure containing the default values + for MV predictor */ + + pred_info_t *ps_pred; /** Stores info to cfg MC */ + pred_info_t *ps_pred_start; + + UWORD32 u4_pred_info_idx; + pred_info_pkd_t *ps_pred_pkd; + pred_info_pkd_t *ps_pred_pkd_start; + UWORD32 u4_pred_info_pkd_idx; + UWORD8 *pu1_ref_buff; /** Destination buffer for DMAs */ + UWORD32 u4_dma_buf_idx; + + UWORD8 *pu1_y; + UWORD8 *pu1_u; + UWORD8 *pu1_v; + + WORD16 *pi2_y_coeff; + UWORD8 *pu1_inv_scan; + + /** + * Pointer frame level TU subblock coeff data + */ + void *pv_pic_tu_coeff_data; + + /** + * Pointer to TU subblock coeff data and number of subblocks and scan idx + * Incremented each time a coded subblock is processed + * + */ + void *pv_parse_tu_coeff_data; + + void *pv_proc_tu_coeff_data; + + WORD16 *pi2_coeff_data; + + cavlc_cntxt_t s_cavlc_ctxt; + + UWORD32 u4_n_leftY[2]; + UWORD32 u4_n_left_cr[2]; + UWORD32 u4_n_left_temp_y; + + UWORD8 pu1_left_nnz_y[4]; + UWORD8 pu1_left_nnz_uv[4]; + UWORD32 u4_n_left_temp_uv; + /***************************************************************************/ + /* Base pointer to all the cabac contexts */ + /***************************************************************************/ + bin_ctxt_model_t *p_cabac_ctxt_table_t; + + /***************************************************************************/ + /* cabac context pointers for every SE mapped into in p_cabac_ctxt_table_t */ + /***************************************************************************/ + bin_ctxt_model_t *p_mb_type_t; + bin_ctxt_model_t *p_mb_skip_flag_t; + bin_ctxt_model_t *p_sub_mb_type_t; + bin_ctxt_model_t *p_mvd_x_t; + bin_ctxt_model_t *p_mvd_y_t; + bin_ctxt_model_t *p_ref_idx_t; + bin_ctxt_model_t *p_mb_qp_delta_t; + bin_ctxt_model_t *p_intra_chroma_pred_mode_t; + bin_ctxt_model_t *p_prev_intra4x4_pred_mode_flag_t; + bin_ctxt_model_t *p_rem_intra4x4_pred_mode_t; + bin_ctxt_model_t *p_mb_field_dec_flag_t; + bin_ctxt_model_t *p_cbp_luma_t; + bin_ctxt_model_t *p_cbp_chroma_t; + bin_ctxt_model_t *p_cbf_t[NUM_CTX_CAT]; + bin_ctxt_model_t *p_significant_coeff_flag_t[NUM_CTX_CAT]; + bin_ctxt_model_t *p_coeff_abs_level_minus1_t[NUM_CTX_CAT]; + + UWORD32 u4_num_pmbair; /** MB pair number */ + mv_pred_t *ps_mv_left; /** Pointer to left motion vector bank */ + mv_pred_t *ps_mv_top_left; /** Pointer to top left motion vector bank */ + mv_pred_t *ps_mv_top_right; /** Pointer to top right motion vector bank */ + + UWORD8 *pu1_left_yuv_dc_csbp; + + /* c64x_map.inc takes care of only this part + If you change/add any members above this, + modify c64x_map.inc accordingly */ + + void **pp_ext_g_table_ptr; + + deblkmb_neighbour_t deblk_left_mb[2]; + deblkmb_neighbour_t *ps_deblk_top_mb; + neighbouradd_t (*ps_left_mvpred_addr)[2]; /* Left MvPred Address Ping Pong*/ +// neighbouradd_t *ps_topMvPredAdd; + + /***************************************************************************/ + /* Ref_idx contexts are stored in the following way */ + /* Array Idx 0,1 for reference indices in Forward direction */ + /* Array Idx 2,3 for reference indices in backward direction */ + /***************************************************************************/ + + /* Dimensions for u1_left_ref_ctxt_inc_arr is [2][4] for Mbaff:Top and Bot */ + WORD8 i1_left_ref_idx_ctx_inc_arr[2][4]; + WORD8 *pi1_left_ref_idx_ctxt_inc; + + /*************************************************************************/ + /* Arrangnment of DC CSBP */ + /* bits: b7 b6 b5 b4 b3 b2 b1 b0 */ + /* CSBP: x x x x x Vdc Udc Ydc */ + /*************************************************************************/ + /*************************************************************************/ + /* Points either to u1_yuv_dc_csbp_topmb or u1_yuv_dc_csbp_bot_mb */ + /*************************************************************************/ + UWORD8 u1_yuv_dc_csbp_topmb; + UWORD8 u1_yuv_dc_csbp_bot_mb; + + /* DMA SETUP */ + tfr_ctxt_t s_tran_addrecon_parse; + tfr_ctxt_t s_tran_addrecon; + + /* slice Header Simplification */ + UWORD8 u1_pr_sl_type; + UWORD8 u1_sl_typ_5_9; + WORD32 i4_frametype; + UWORD32 u4_app_disp_width; + WORD32 i4_error_code; + UWORD8 u1_first_pb_nal_in_pic; + UWORD32 u4_bitoffset; + + /* Variables added to handle field pics */ + + UWORD8 u1_second_field; + WORD32 i4_pic_type; + WORD32 i4_content_type; + WORD32 i4_decode_header; + WORD32 i4_header_decoded; + UWORD32 u4_total_frames_decoded; + + ctxt_inc_mb_info_t *ps_left_mb_ctxt_info; /* structure containing the left MB's + context info, incase of Mbaff */ + pocstruct_t s_prev_pic_poc; + pocstruct_t s_cur_pic_poc; + WORD32 i4_cur_display_seq; + WORD32 i4_prev_max_display_seq; + WORD32 i4_max_poc; + deblk_mb_t *ps_cur_deblk_mb; + + /* Pointers to local scratch buffers */ + deblk_mb_t *ps_deblk_pic; + + /* Pointers to Picture Buffers (Given by BufAPI Lib) */ + struct pic_buffer_t *ps_cur_pic; /** Pointer to Current picture buffer */ + + /* Scratch Picture Buffers (Given by BufAPI Lib) */ + struct pic_buffer_t s_cur_pic; + + /* Current Slice related information */ + volatile UWORD16 u2_cur_slice_num; + volatile UWORD16 u2_cur_slice_num_dec_thread; + + /* Variables needed for Buffer API handling */ + UWORD8 u1_nal_buf_id; + UWORD8 u1_pic_buf_id; + UWORD8 u1_pic_bufs; + + WORD16 *pi2_pred1; //[441]; /** Temp predictor buffer for MC */ + /* Pointer to refernce Pic buffers list, 0:fwd, 1:bwd */ + pic_buffer_t **ps_ref_pic_buf_lx[2]; + /* refIdx to POC mapping */ + void **ppv_map_ref_idx_to_poc; + UWORD32 *pu4_defI_wts_ofsts; + UWORD32 *pu4_wts_ofsts_mat; + UWORD32 *pu4_wt_ofsts; + UWORD32 *pu4_mbaff_wt_mat; + /* Function pointers to read Params common to CAVLC and CABAC */ + WORD32 (*pf_parse_inter_mb)(struct _DecStruct * ps_dec, + dec_mb_info_t * ps_cur_mb_info, + UWORD8 u1_mb_num, + UWORD8 u1_num_mbsNby2); + WORD32 (*pf_mvpred_ref_tfr_nby2mb)(struct _DecStruct * ps_dec, + UWORD8 u1_num_mbs, + UWORD8 u1_num_mbsNby2); + + WORD32 (*pf_parse_inter_slice)(struct _DecStruct * ps_dec, + dec_slice_params_t * ps_slice, + UWORD16 u2_first_mb_in_slice); + + UWORD32 (*pf_get_mb_info)(struct _DecStruct * ps_dec, + const UWORD16 u2_cur_mb_address, + dec_mb_info_t * ps_cur_mb_info, + UWORD32 u4_mbskip_run); + + /* Variables for Decode Buffer Management */ + dpb_manager_t *ps_dpb_mgr; + dpb_commands_t *ps_dpb_cmds; + + /* Variables Required for N MB design */ + dec_mb_info_t *ps_nmb_info; + + UWORD8 *pu1_y_intra_pred_line; + UWORD8 *pu1_u_intra_pred_line; + UWORD8 *pu1_v_intra_pred_line; + + UWORD8 *pu1_cur_y_intra_pred_line; + UWORD8 *pu1_cur_u_intra_pred_line; + UWORD8 *pu1_cur_v_intra_pred_line; + + UWORD8 *pu1_cur_y_intra_pred_line_base; + UWORD8 *pu1_cur_u_intra_pred_line_base; + UWORD8 *pu1_cur_v_intra_pred_line_base; + + UWORD8 *pu1_prev_y_intra_pred_line; + UWORD8 *pu1_prev_u_intra_pred_line; + UWORD8 *pu1_prev_v_intra_pred_line; + + UWORD32 u4_intra_pred_line_ofst; + + /* Scratch ping reconstruction pointers for Y U V */ + UWORD8 *pu1_y_scratch[2]; + UWORD8 *pu1_u_scratch[2]; + UWORD8 *pu1_v_scratch[2]; + UWORD8 u1_yuv_scratch_idx; + UWORD8 u1_not_wait_rec; + UWORD8 u1_res_changed; + + UWORD8 *pu1_yleft; /** Left Y pointer, used for intra-pred */ + UWORD8 *pu1_uleft; /** Left U pointer, used for intra-pred */ + UWORD8 *pu1_vleft; /** Left V pointer, used for intra-pred */ + UWORD8 u1_y_topleft[2]; /** Left Y pointer, used for intra-pred */ + UWORD8 u1_u_topleft[2]; /** Left U pointer, used for intra-pred */ + UWORD8 u1_v_topleft[2]; /** Left V pointer, used for intra-pred */ + UWORD16 u2_mb_group_cols_y; /** Number of Y pixels in the N MB group */ + UWORD16 u2_mb_group_cols_cr; /** Number of U/V pixels in the N MB group */ + UWORD16 u2_mb_group_cols_y1; /** Number of Y pixels in the N MB group */ + UWORD16 u2_mb_group_cols_cr1; /** Number of U/V pixels in the N MB group */ + + mv_pred_t *ps_mv_cur; /** pointer to current motion vector bank */ + mv_pred_t *ps_mv_top; /** pointer to top motion vector bank */ + mv_pred_t *ps_mv_top_right2;/** Pointer to top right motion vector bank */ + mv_pred_t *ps_mv_p[2]; /** Scratch ping motion vector bank */ + mv_pred_t *ps_mv_top_p[MV_SCRATCH_BUFS]; /** Scratch top pong motion vector bank */ + UWORD8 u1_mv_top_p; + + deblk_mb_t *ps_deblk_mbn; + deblk_mb_t *ps_deblk_mbn_dec_thrd;/*pointer used by parsing when spearaet_parse is 1*/ + deblk_mb_t *ps_deblk_mbn_curr; + deblk_mb_t *ps_deblk_mbn_prev; + + UWORD8 *pu1_temp_mc_buffer; + + struct _sei *ps_sei; + UWORD8 u1_pic_struct_copy; + /* Variables required for cropping */ + UWORD16 u2_disp_width; + UWORD16 u2_disp_height; + UWORD16 u2_crop_offset_y; + UWORD16 u2_crop_offset_uv; + + /* Variable required to get presentation time stamp through application */ + UWORD32 u4_pts; + + /* Variables used for gaps in frame number */ + UWORD16 u2_prev_ref_frame_num; + UWORD8 u1_vert_up_scale_flag; + iv_mem_rec_t *ps_mem_tab; + + UWORD16 u2_wait_id; + + void *pi4_ctxt_save_register; + void *pi4_ctxt_save_register_dec; + + UWORD8 u1_mb_idx; + struct pic_buffer_t *ps_col_pic; + void (*pf_parse_mvdirect)(struct _DecStruct*, + struct pic_buffer_t*, + directmv_t*, + UWORD8, + WORD32, + dec_mb_info_t *); + void *pv_dec_out; + void *pv_dec_in; + void *pv_scratch_sps_pps; /*used temeporarily store sps/ spps while parsing*/ + + /* state pointers to mb and partition information */ + parse_pmbarams_t *ps_parse_mb_data; + parse_part_params_t *ps_parse_part_params; + + /* scratch pointers to mb and partition information */ + parse_part_params_t *ps_part; + + UWORD8 u1_max_dec_frame_buffering; + pad_mgr_t s_pad_mgr; + UWORD8 (*pf_mvpred)(struct _DecStruct *ps_dec, + struct _DecMbInfo *ps_cur_mb_info, + mv_pred_t *ps_mv_pred, + mv_pred_t *ps_mv_nmb, + mv_pred_t *ps_mv_ntop, + UWORD8 u1_sub_mb_num, + UWORD8 uc_mb_part_width, + UWORD8 uc_lxstart, + UWORD8 uc_lxend, + UWORD8 u1_mb_mc_mode); + void (*pf_compute_bs)(struct _DecStruct * ps_dec, + struct _DecMbInfo * ps_cur_mb_info, + const UWORD16 u2_mbxn_mb); + UWORD8 u1_init_dec_flag; + prev_seq_params_t s_prev_seq_params; + UWORD8 u1_cur_mb_fld_dec_flag; /* current Mb fld or Frm */ + + code_overlay_ctxt_t s_code_overlay_ctxt; + UWORD8 u1_code_overlay; + +// WORD8 *pi1_cur_predmodes; + WORD8 pi1_left_pred_mode[8]; + UWORD8 u1_topleft_mb_fld; + UWORD8 u1_topleft_mbtype; + UWORD8 u1_topleft_mb_fld_bot; + UWORD8 u1_topleft_mbtype_bot; + UWORD8 u1_deblk_mb_grp; + WORD16 i2_prev_slice_mbx; + WORD16 i2_prev_slice_mby; + UWORD16 u2_top_left_mask; + UWORD16 u2_top_right_mask; + dec_err_status_t * ps_dec_err_status; + + UWORD32 *pu4_sos_signal; + UWORD8 u1_mb_idx_mv; + UWORD16 u2_mv_2mb[2]; + UWORD32 u4_ref_buf_size; + UWORD32 u4_packet_cnt; + /* to remember the i4_status & input parameters from the sample app */ + void *pv_dec_status; // itt_dec_status_t void pointer */ + void *pv_dec_params; // itt_dec_prms_t void pointer + void *pv_app_ctxt; + UWORD32 u4_skip_frm_mask; + void *pv_fmt_con_ctxt; + /* for the parallel format conversion */ + UWORD8 *pu1_frmt_conv_y[3]; + UWORD8 *pu1_frmt_conv_u[3]; + UWORD8 *pu1_frmt_conv_v[3]; + UWORD8 *pu1_deblk_scr; + UWORD32 u4_deblk_scr_sz; + + /* variable for finding the no.of mbs decoded in the current picture */ + UWORD16 u2_total_mbs_coded; + /* member added for supporting fragmented annex - B */ +// frg_annex_read_t s_frag_annex_read; + /* added for vui_t, sei support*/ + WORD32 i4_vui_frame_rate; + /* To Store the value of ref_idx_active for previous slice */ + /* useful in error handling */ + UWORD8 u1_num_ref_idx_lx_active_prev; + /* Flag added to come out of process call in annex-b if&if frame is decoded */ + /* presence of access unit delimters and pps and sps */ + UWORD8 u1_frame_decoded_flag; + + /* To keep track of whether the last picture was decoded or not */ + /* in case of skip mode set by the application */ + UWORD8 u1_last_pic_not_decoded; + UWORD32 *pu4_return_remaining_bufs; + + /* Used for disabling deblocking of non-reference pictures */ + WORD32 i4_set_low_complexity_mode; + WORD32 i4_disable_deblock; + + WORD32 e_dec_status; + UWORD32 u4_num_fld_in_frm; + + /* Function pointer for 4x4 residual cavlc parsing based on total coeff */ + WORD32 (*pf_cavlc_4x4res_block[3])(UWORD32 u4_isdc, + UWORD32 u4_total_coeff_trail_one, /**TotalCoefficients<<16+trailingones*/ + dec_bit_stream_t *ps_bitstrm); + + /* Function pointer array for interpolate functions in called from motion compensattion module */ + void (*p_mc_interpolate_x_y[16][3])(UWORD8*, + UWORD8*, + UWORD8*, + UWORD8, + UWORD16, + UWORD16, + UWORD8); + + /**************************************************************************/ + /* Function pointer for 4x4 totalcoeff, trlone and residual cavlc parsing */ + /* based on u4_n (neigbourinng nnz average) */ + /* These point to two functions depending on (u4_n > 7) and (u4_n <= 7) */ + /**************************************************************************/ + WORD32 (*pf_cavlc_parse4x4coeff[2])(WORD16 *pi2_coeff_block, + UWORD32 u4_isdc, /* is it a DC block */ + WORD32 u4_n, + struct _DecStruct *ps_dec, /** Decoder Parameters */ + UWORD32 *pu4_total_coeff); + + /**************************************************************************/ + /* Function pointer for luma 8x8block cavlc parsing based on top and left */ + /* neigbour availability. */ + /**************************************************************************/ + WORD32 (*pf_cavlc_parse_8x8block[4])(WORD16 *pi2_coeff_block, + UWORD32 u4_sub_block_strd, + UWORD32 u4_isdc, + struct _DecStruct *ps_dec, + UWORD8 *pu1_top_nnz, + UWORD8 *pu1_left_nnz, + UWORD8 u1_tran_form8x8, + UWORD8 u1_mb_field_decodingflag, + UWORD32 *pu4_csbp); + + /**************************************************************************/ + /* Ping pong top and current rows of mb neigbour_params */ + /**************************************************************************/ + mb_neigbour_params_t *ps_nbr_mb_row; + mb_neigbour_params_t *ps_cur_mb_row; + mb_neigbour_params_t *ps_top_mb_row; + + /**************************************************************************/ + /* Function pointer for 16x16 and non16x16 Bs1 calculations depending on */ + /* P and B slice. */ + /***************************************************************************/ + void (*pf_fill_bs1[2][2])(mv_pred_t *ps_cur_mv_pred, + mv_pred_t *ps_top_mv_pred, + void **ppv_map_ref_idx_to_poc, + UWORD32 *pu4_bs_table, /* pointer to the BsTable array */ + mv_pred_t *ps_leftmost_mv_pred, + neighbouradd_t *ps_left_addr, + void **u4_pic_addrress, + WORD32 i4_ver_mvlimit); + + void (*pf_fill_bs_xtra_left_edge[2])(UWORD32 *pu4_bs, /* Base pointer of BS table */ + WORD32 u4_left_mb_t_csbp, /* left mbpair's top csbp */ + WORD32 u4_left_mb_b_csbp, /* left mbpair's bottom csbp*/ + WORD32 u4_cur_mb_csbp, /* csbp of current mb */ + UWORD32 u4_cur_mb_bot /* is top or bottom mb */ + + ); + /* Function pointer array for BP and MP functions for MC*/ + void (*p_motion_compensate)(struct _DecStruct * ps_dec, + dec_mb_info_t *ps_cur_mb_info); + + + void (*p_mc_dec_thread)(struct _DecStruct * ps_dec, dec_mb_info_t *ps_cur_mb_info); + + /* Function pointer array for BP and MP functions for formMbPartInfo*/ + + WORD32 (*p_form_mb_part_info)(pred_info_pkd_t *ps_pred_pkd, + struct _DecStruct * ps_dec, + UWORD16 u2_mb_x, + UWORD16 u2_mb_y, + WORD32 mb_index, + dec_mb_info_t *ps_cur_mb_info); + + WORD32 (*p_form_mb_part_info_thread)(pred_info_pkd_t *ps_pred_pkd, + struct _DecStruct * ps_dec, + UWORD16 u2_mb_x, + UWORD16 u2_mb_y, + WORD32 mb_index, + dec_mb_info_t *ps_cur_mb_info); + + + /* Required for cabac mbaff bottom mb */ + UWORD32 u4_next_mb_skip; + + void (*p_DeblockPicture[2])(struct _DecStruct *); + + /* ! */ + UWORD32 u4_ts; + UWORD8 u1_flushfrm; + + /* Output format sent by the application */ + UWORD8 u1_chroma_format; + UWORD8 u1_pic_decode_done; + UWORD32 u4_level_at_init; + UWORD32 u4_width_at_init; + UWORD32 u4_height_at_init; + WORD32 init_done; + WORD32 process_called; + + /******************************************/ + /* For the high profile related variables */ + /******************************************/ + high_profile_tools_t s_high_profile; + /* CBCR */ + UWORD8 u1_qp_v_div6; + UWORD8 u1_qp_v_rem6; + /* + * TO help solve the dangling field case. + * Check for the previous frame number and the current frame number. + */ + UWORD16 u2_prv_frame_num; + UWORD8 u1_top_bottom_decoded; + UWORD8 u1_dangling_field; + + /* + * For Low Memory case + */ + UWORD32 u4_num_ref_frames_at_init; + UWORD32 u4_num_reorder_frames_at_init; + UWORD32 u4_num_extra_disp_bufs_at_init; + UWORD32 u4_num_disp_bufs_requested; + WORD32 i4_display_delay; + UWORD32 u4_slice_start_code_found; + + UWORD32 u4_mb_level_deblk; + UWORD32 u4_use_intrapred_line_copy; + UWORD32 u4_num_mbs_prev_nmb; + UWORD32 u4_app_deblk_disable_level; + UWORD32 u4_app_disable_deblk_frm; + WORD32 i4_app_skip_mode; + WORD32 i4_mv_frac_mask; + + disp_buf_t disp_bufs[MAX_DISP_BUFS_NEW]; + UWORD32 u4_disp_buf_mapping[MAX_DISP_BUFS_NEW]; + UWORD32 u4_disp_buf_to_be_freed[MAX_DISP_BUFS_NEW]; + UWORD32 u4_share_disp_buf; + UWORD32 u4_num_disp_bufs; + UWORD32 u4_prev_nal_skipped; + UWORD32 u4_return_to_app; + WORD32 i4_dec_skip_mode; + + UWORD32 u4_bs_deblk_thread_created; + volatile UWORD32 u4_start_bs_deblk; + void *pv_bs_deblk_thread_handle; + + UWORD32 u4_cur_bs_mb_num; + UWORD32 u4_bs_cur_slice_num_mbs; + UWORD32 u4_cur_slice_bs_done; + UWORD32 u4_cur_deblk_mb_num; + volatile UWORD16 u2_cur_slice_num_bs; + + UWORD32 u4_deblk_mb_x; + UWORD32 u4_deblk_mb_y; + deblk_mb_t *ps_cur_deblk_thrd_mb; + + + iv_yuv_buf_t s_disp_frame_info; + UWORD32 u4_fmt_conv_num_rows; + UWORD32 u4_fmt_conv_cur_row; + ivd_out_bufdesc_t *ps_out_buffer; + ivd_get_display_frame_op_t s_disp_op; + UWORD32 u4_stop_threads; + UWORD32 u4_output_present; + + volatile UWORD16 cur_dec_mb_num; + volatile UWORD16 u2_cur_mb_addr; + WORD16 i2_dec_thread_mb_y; + + UWORD8 u1_separate_parse; +// 0: slice parse not started, 1: slice decode can start, 2: slice in error + volatile UWORD32 u4_start_frame_decode; + UWORD32 u4_dec_thread_created; + void *pv_dec_thread_handle; + volatile UWORD8 *pu1_dec_mb_map; + volatile UWORD8 *pu1_recon_mb_map; + volatile UWORD16 *pu2_slice_num_map; + dec_slice_struct_t *ps_dec_slice_buf; + void *pv_map_ref_idx_to_poc_buf; + dec_mb_info_t *ps_frm_mb_info; + volatile dec_slice_struct_t * volatile ps_parse_cur_slice; + volatile dec_slice_struct_t * volatile ps_decode_cur_slice; + volatile dec_slice_struct_t * volatile ps_computebs_cur_slice; + UWORD32 u4_cur_slice_decode_done; + UWORD32 u4_extra_mem_used; + + UWORD32 u4_first_slice_in_pic; + UWORD32 u4_num_cores; + IVD_ARCH_T e_processor_arch; + IVD_SOC_T e_processor_soc; + + /** + * Pictures that are are degraded + * 0 : No degrade + * 1 : Only on non-reference frames + * 2 : Use interval specified by u4_nondegrade_interval + * 3 : All non-key frames + * 4 : All frames + */ + WORD32 i4_degrade_pics; + + /** + * Interval for pictures which are completely decoded without any degradation + */ + WORD32 i4_nondegrade_interval; + + /** + * bit position (lsb is zero): Type of degradation + * 1 : Disable deblocking + * 2 : Faster inter prediction filters + * 3 : Fastest inter prediction filters + */ + WORD32 i4_degrade_type; + + /** Degrade pic count, Used to maintain the interval between non-degraded pics + * + */ + WORD32 i4_degrade_pic_cnt; + + fmt_conv_part_t as_fmt_conv_part[2]; + UWORD32 u4_fmt_conv_in_process; + UWORD32 u4_pic_buf_got; + UWORD16 u2_mb_skip_error; + volatile UWORD16 u2_skip_deblock; + + /** + * Col flag and mv pred buffer manager + */ + void *pv_mv_buf_mgr; + + /** + * Picture buffer manager + */ + void *pv_pic_buf_mgr; + + /** + * Display buffer manager + */ + void *pv_disp_buf_mgr; + + void *apv_buf_id_pic_buf_map[MAX_DISP_BUFS_NEW]; + + UWORD8 au1_pic_buf_id_mv_buf_id_map[MAX_DISP_BUFS_NEW]; + + UWORD8 au1_pic_buf_ref_flag[MAX_DISP_BUFS_NEW]; + + ih264_default_weighted_pred_ft *pf_default_weighted_pred_luma; + + ih264_default_weighted_pred_ft *pf_default_weighted_pred_chroma; + + ih264_weighted_pred_ft *pf_weighted_pred_luma; + + ih264_weighted_pred_ft *pf_weighted_pred_chroma; + + ih264_weighted_bi_pred_ft *pf_weighted_bi_pred_luma; + + ih264_weighted_bi_pred_ft *pf_weighted_bi_pred_chroma; + + ih264_pad *pf_pad_top; + ih264_pad *pf_pad_bottom; + ih264_pad *pf_pad_left_luma; + ih264_pad *pf_pad_left_chroma; + ih264_pad *pf_pad_right_luma; + ih264_pad *pf_pad_right_chroma; + + ih264_inter_pred_chroma_ft *pf_inter_pred_chroma; + + ih264_inter_pred_luma_ft *apf_inter_pred_luma[16]; + + ih264_intra_pred_luma_ft *apf_intra_pred_luma_16x16[4]; + + ih264_intra_pred_luma_ft *apf_intra_pred_luma_8x8[9]; + + ih264_intra_pred_luma_ft *apf_intra_pred_luma_4x4[9]; + + ih264_intra_pred_ref_filtering_ft *pf_intra_pred_ref_filtering; + + ih264_intra_pred_chroma_ft *apf_intra_pred_chroma[4]; + + ih264_iquant_itrans_recon_ft *pf_iquant_itrans_recon_luma_4x4; + + ih264_iquant_itrans_recon_ft *pf_iquant_itrans_recon_luma_4x4_dc; + + ih264_iquant_itrans_recon_ft *pf_iquant_itrans_recon_luma_8x8; + + ih264_iquant_itrans_recon_ft *pf_iquant_itrans_recon_luma_8x8_dc; + + ih264_iquant_itrans_recon_chroma_ft *pf_iquant_itrans_recon_chroma_4x4; + + ih264_iquant_itrans_recon_chroma_ft *pf_iquant_itrans_recon_chroma_4x4_dc; + + ih264_ihadamard_scaling_ft *pf_ihadamard_scaling_4x4; + + /** + * deblock vertical luma edge with blocking strength 4 + */ + ih264_deblk_edge_bs4_ft *pf_deblk_luma_vert_bs4; + + /** + * deblock vertical luma edge with blocking strength less than 4 + */ + ih264_deblk_edge_bslt4_ft *pf_deblk_luma_vert_bslt4; + + /** + * deblock vertical luma edge with blocking strength 4 for mbaff + */ + ih264_deblk_edge_bs4_ft *pf_deblk_luma_vert_bs4_mbaff; + + /** + * deblock vertical luma edge with blocking strength less than 4 for mbaff + */ + ih264_deblk_edge_bslt4_ft *pf_deblk_luma_vert_bslt4_mbaff; + + /** + * deblock vertical chroma edge with blocking strength 4 + */ + ih264_deblk_chroma_edge_bs4_ft *pf_deblk_chroma_vert_bs4; + + /** + * deblock vertical chroma edge with blocking strength less than 4 + */ + ih264_deblk_chroma_edge_bslt4_ft *pf_deblk_chroma_vert_bslt4; + + /** + * deblock vertical chroma edge with blocking strength 4 for mbaff + */ + ih264_deblk_chroma_edge_bs4_ft *pf_deblk_chroma_vert_bs4_mbaff; + + /** + * deblock vertical chroma edge with blocking strength less than 4 for mbaff + */ + ih264_deblk_chroma_edge_bslt4_ft *pf_deblk_chroma_vert_bslt4_mbaff; + + /** + * deblock horizontal luma edge with blocking strength 4 + */ + ih264_deblk_edge_bs4_ft *pf_deblk_luma_horz_bs4; + + /** + * deblock horizontal luma edge with blocking strength less than 4 + */ + ih264_deblk_edge_bslt4_ft *pf_deblk_luma_horz_bslt4; + + /** + * deblock horizontal chroma edge with blocking strength 4 + */ + ih264_deblk_chroma_edge_bs4_ft *pf_deblk_chroma_horz_bs4; + + /** + * deblock horizontal chroma edge with blocking strength less than 4 + */ + ih264_deblk_chroma_edge_bslt4_ft *pf_deblk_chroma_horz_bslt4; + + +} dec_struct_t; + +#endif /* _H264_DEC_STRUCTS_H */ diff --git a/decoder/ih264d_tables.c b/decoder/ih264d_tables.c new file mode 100755 index 0000000..ddca2fb --- /dev/null +++ b/decoder/ih264d_tables.c @@ -0,0 +1,872 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** + ************************************************************************** + * \file ih264d_tables.c + * + * \brief + * Defination of all tables used by h264 decoder + * + * \date + * 17/09/2004 + * + * \author MA + ************************************************************************** + */ +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264d_defs.h" + +const UWORD8 gau1_ih264d_qp_scale_cr[] = + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 29, 30, 31, 32, 32, 33, 34, 34, 35, 35, 36, 36, 37, 37, 37, 38, 38, 38, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39 }; +const UWORD8 gau1_ih264d_alpha_table[] = + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 4, 4, 5, 6, 7, 8, 9, 10, 12, 13, 15, 17, 20, 22, 25, 28, 32, 36, + 40, 45, 50, 56, 63, 71, 80, 90, 101, 113, 127, 144, 162, 182, 203, 226, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }; +const UWORD8 gau1_ih264d_beta_table[] = + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, + 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18 }; + +const UWORD8 gau1_ih264d_clip_table[][4] = + { + { 0, 0, 0, 0 }, + { 0, 0, 0, 0 }, + { 0, 0, 0, 0 }, + { 0, 0, 0, 0 }, + { 0, 0, 0, 0 }, + { 0, 0, 0, 0 }, + { 0, 0, 0, 0 }, + { 0, 0, 0, 0 }, + { 0, 0, 0, 0 }, + { 0, 0, 0, 0 }, + { 0, 0, 0, 0 }, + { 0, 0, 0, 0 }, + + { 0, 0, 0, 0 }, + { 0, 0, 0, 0 }, + { 0, 0, 0, 0 }, + { 0, 0, 0, 0 }, + { 0, 0, 0, 0 }, + { 0, 0, 0, 0 }, + { 0, 0, 0, 0 }, + { 0, 0, 0, 0 }, + { 0, 0, 0, 0 }, + { 0, 0, 0, 0 }, + { 0, 0, 0, 0 }, + { 0, 0, 0, 0 }, + { 0, 0, 0, 0 }, + { 0, 0, 0, 0 }, + { 0, 0, 0, 0 }, + { 0, 0, 0, 0 }, + { 0, 0, 0, 0 }, + { 0, 0, 0, 1 }, + { 0, 0, 0, 1 }, + { 0, 0, 0, 1 }, + { 0, 0, 0, 1 }, + { 0, 0, 1, 1 }, + { 0, 0, 1, 1 }, + { 0, 1, 1, 1 }, + { 0, 1, 1, 1 }, + { 0, 1, 1, 1 }, + { 0, 1, 1, 1 }, + { 0, 1, 1, 2 }, + { 0, 1, 1, 2 }, + { 0, 1, 1, 2 }, + { 0, 1, 1, 2 }, + { 0, 1, 2, 3 }, + { 0, 1, 2, 3 }, + { 0, 2, 2, 3 }, + { 0, 2, 2, 4 }, + { 0, 2, 3, 4 }, + { 0, 2, 3, 4 }, + { 0, 3, 3, 5 }, + { 0, 3, 4, 6 }, + { 0, 3, 4, 6 }, + { 0, 4, 5, 7 }, + { 0, 4, 5, 8 }, + { 0, 4, 6, 9 }, + { 0, 5, 7, 10 }, + { 0, 6, 8, 11 }, + { 0, 6, 8, 13 }, + { 0, 7, 10, 14 }, + { 0, 8, 11, 16 }, + { 0, 9, 12, 18 }, + { 0, 10, 13, 20 }, + { 0, 11, 15, 23 }, + { 0, 13, 17, 25 }, + + { 0, 13, 17, 25 }, + { 0, 13, 17, 25 }, + { 0, 13, 17, 25 }, + { 0, 13, 17, 25 }, + { 0, 13, 17, 25 }, + { 0, 13, 17, 25 }, + { 0, 13, 17, 25 }, + { 0, 13, 17, 25 }, + { 0, 13, 17, 25 }, + { 0, 13, 17, 25 }, + { 0, 13, 17, 25 }, + { 0, 13, 17, 25 }, + { 0, 13, 17, 25 }, + { 0, 13, 17, 25 }, + { 0, 13, 17, 25 }, + { 0, 13, 17, 25 }, + { 0, 13, 17, 25 }, + { 0, 13, 17, 25 }, + { 0, 13, 17, 25 }, + { 0, 13, 17, 25 }, + { 0, 13, 17, 25 }, + { 0, 13, 17, 25 }, + { 0, 13, 17, 25 }, + { 0, 13, 17, 25 } + + }; +const UWORD8 gau1_ih264d_clip_table_deblock[] = + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51 }; + +/****************DEBLOCKING TABLES ENDS*******************/ + +/*************************************************************/ +/* BS CALCULATION TABLES */ +/*************************************************************/ +UWORD32 const gau4_ih264d_packed_bs2[32] = + { + /*************************************************************/ + /* BS TABLES FOR NORMAL EDGES */ + /*************************************************************/ + 0x00000000, + 0x02000000, 0x00020000, 0x02020000, 0x00000200, 0x02000200, 0x00020200, + 0x02020200, 0x00000002, 0x02000002, 0x00020002, 0x02020002, 0x00000202, + 0x02000202, 0x00020202, 0x02020202, + + /*************************************************************/ + /* BS TABLES FOR XTRA LEFT MB EDGES IN MBAFF CASE */ + /*************************************************************/ + 0x01010101, + 0x02010101, 0x01020101, 0x02020101, 0x01010201, 0x02010201, 0x01020201, + 0x02020201, 0x01010102, 0x02010102, 0x01020102, 0x02020102, 0x01010202, + 0x02010202, 0x01020202, 0x02020202, }; + +UWORD16 const gau2_ih264d_4x4_v2h_reorder[16] = + { 0x0000, 0x0001, 0x0010, 0x0011, 0x0100, 0x0101, 0x0110, 0x0111, 0x1000, + 0x1001, 0x1010, 0x1011, 0x1100, 0x1101, 0x1110, 0x1111 }; + +/****************SCALING TABLES STARTS *****************/ +const WORD16 gai2_ih264d_default_intra4x4[16] = + { 6, 13, 13, 20, 20, 20, 28, 28, 28, 28, 32, 32, 32, 37, 37, 42 }; + +const WORD16 gai2_ih264d_default_inter4x4[16] = + { 10, 14, 14, 20, 20, 20, 24, 24, 24, 24, 27, 27, 27, 30, 30, 34 }; + +const WORD16 gai2_ih264d_default_intra8x8[64] = + { 6, 10, 10, 13, 11, 13, 16, 16, 16, 16, 18, 18, 18, 18, 18, 23, 23, 23, 23, + 23, 23, 25, 25, 25, 25, 25, 25, 25, 27, 27, 27, 27, 27, 27, 27, 27, 29, + 29, 29, 29, 29, 29, 29, 31, 31, 31, 31, 31, 31, 33, 33, 33, 33, 33, 36, + 36, 36, 36, 38, 38, 38, 40, 40, 42 }; + +const WORD16 gai2_ih264d_default_inter8x8[64] = + { 9, 13, 13, 15, 13, 15, 17, 17, 17, 17, 19, 19, 19, 19, 19, 21, 21, 21, 21, + 21, 21, 22, 22, 22, 22, 22, 22, 22, 24, 24, 24, 24, 24, 24, 24, 24, 25, + 25, 25, 25, 25, 25, 25, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 30, + 30, 30, 30, 32, 32, 32, 33, 33, 35 }; + +const WORD16 gai2_ih264d_flat_4x4[16] = + { 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16 }; + +const WORD16 gai2_ih264d_flat_8x8[64] = + { 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16 }; + +/****************SCALING TABLES ENDS *****************/ + +/*Inverse scan tables for individual 4x4 blocks of 8x8 transform coeffs of CAVLC */ + +/* progressive */ + +const UWORD8 gau1_ih264d_inv_scan_prog8x8_cavlc[4][16] = + { + { 0, 9, 17, 18, 12, 40, 27, 7, 35, 57, 29, 30, 58, 38, 53, 47 }, /* for First subblock */ + { 1, 2, 24, 11, 19, 48, 20, 14, 42, 50, 22, 37, 59, 31, 60, 55 }, /* for second subblock */ + { 8, 3, 32, 4, 26, 41, 13, 21, 49, 43, 15, 44, 52, 39, 61, 62 }, /* for third subblock */ + { 16, 10, 25, 5, 33, 34, 6, 28, 56, 36, 23, 51, 45, 46, 54, 63 } /* for fourth subblock */ + }; + +const UWORD8 gau1_ih264d_inv_scan_int8x8_cavlc[4][16] = + { + { 0, 9, 2, 56, 18, 26, 34, 27, 35, 28, 36, 29, 45, 7, 54, 39 }, /* for First subblock */ + { 8, 24, 25, 33, 41, 11, 42, 12, 43, 13, 44, 14, 53, 15, 62, 47 }, /* for second subblock */ + { 16, 32, 40, 10, 49, 4, 50, 5, 51, 6, 52, 22, 61, 38, 23, 55 }, /* for third subblock */ + { 1, 17, 48, 3, 57, 19, 58, 20, 59, 21, 60, 37, 30, 46, 31, 63 } /* for fourth subblock */ + }; + +/*Inverse scan tables for individual 8x8 blocks of 8x8 transform coeffs of CABAC */ +/* progressive */ + +const UWORD8 gau1_ih264d_inv_scan_prog8x8_cabac[64] = + { 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, + 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, + 36, 29, 22, 15, 23, 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, + 60, 61, 54, 47, 55, 62, 63 }; + +/* interlace */ + +const UWORD8 gau1_ih264d_inv_scan_int8x8_cabac[64] = + { 0, 8, 16, 1, 9, 24, 32, 17, 2, 25, 40, 48, 56, 33, 10, 3, 18, 41, 49, 57, + 26, 11, 4, 19, 34, 42, 50, 58, 27, 12, 5, 20, 35, 43, 51, 59, 28, 13, 6, + 21, 36, 44, 52, 60, 29, 14, 22, 37, 45, 53, 61, 30, 7, 15, 38, 46, 54, 62, + 23, 31, 39, 47, 55, 63 }; + +/****************PARSING TABLES *******************/ +UWORD8 const gau1_ih264d_subblk_offset[16] = + { 8, 9, 12, 13, 10, 11, 14, 15, 16, 17, 20, 21, 18, 19, 22, 23 }; + +const UWORD8 gau1_ih264d_cbp_tab[6] = + { 0, 16, 32, 15, 31, 47 }; + +/** gives CBP value from codeword number, both for intra and inter */ + +const UWORD8 gau1_ih264d_cbp_table[48][2] = + { + { 47, 0 }, + { 31, 16 }, + { 15, 1 }, + { 0, 2 }, + { 23, 4 }, + { 27, 8 }, + { 29, 32 }, + { 30, 3 }, + { 7, 5 }, + { 11, 10 }, + { 13, 12 }, + { 14, 15 }, + { 39, 47 }, + { 43, 7 }, + { 45, 11 }, + { 46, 13 }, + { 16, 14 }, + { 3, 6 }, + { 5, 9 }, + { 10, 31 }, + { 12, 35 }, + { 19, 37 }, + { 21, 42 }, + { 26, 44 }, + { 28, 33 }, + { 35, 34 }, + { 37, 36 }, + { 42, 40 }, + { 44, 39 }, + { 1, 43 }, + { 2, 45 }, + { 4, 46 }, + { 8, 17 }, + { 17, 18 }, + { 18, 20 }, + { 20, 24 }, + { 24, 19 }, + { 6, 21 }, + { 9, 26 }, + { 22, 28 }, + { 25, 23 }, + { 32, 27 }, + { 33, 29 }, + { 34, 30 }, + { 36, 22 }, + { 40, 25 }, + { 38, 38 }, + { 41, 41 }, }; +/****************PARSING TABLES ENDS *******************/ + +/****************DECODE SLICE TABLES STARTS *******************/ +/*Definition of Tables needed by functions of this file */ +const UWORD8 gau1_ih264d_inv_scan[16] = + { 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 }; + +const UWORD8 gau1_ih264d_inv_scan_fld[16] = + { 0, 4, 1, 8, 12, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 }; + +const UWORD8 gau1_ih264d_dequant_matrix[6][16] = +{ + { 10, 13, 10, 13, 13, 16, 13, 16, 10, 13, 10 ,13, 13, 16, 13, 16}, + { 11, 14, 11, 14, 14, 18, 14, 18, 11, 14, 11 ,14, 14, 18, 14, 18}, + { 13, 16, 13, 16, 16, 20, 16, 20, 13, 16, 13 ,16, 16, 20, 16, 20}, + { 14, 18, 14, 18, 18, 23, 18, 23, 14, 18, 14, 18, 18, 23, 18, 23}, + { 16, 20, 16, 20, 20, 25, 20, 25, 16, 20, 16, 20, 20, 25, 20, 25}, + { 18, 23, 18, 23, 23, 29, 23, 29, 18, 23, 18, 23, 23, 29, 23, 29} +}; + +const UWORD16 gau2_ih264_iquant_scale_4x4[6][16] = + { + { 10, 13, 10, 13, 13, 16, 13, 16, 10, 13, 10, 13, 13, 16, 13, 16 }, + { 11, 14, 11, 14, 14, 18, 14, 18, 11, 14, 11, 14, 14, 18, 14, 18 }, + { 13, 16, 13, 16, 16, 20, 16, 20, 13, 16, 13, 16, 16, 20, 16, 20 }, + { 14, 18, 14, 18, 18, 23, 18, 23, 14, 18, 14, 18, 18, 23, 18, 23 }, + { 16, 20, 16, 20, 20, 25, 20, 25, 16, 20, 16, 20, 20, 25, 20, 25 }, + { 18, 23, 18, 23, 23, 29, 23, 29, 18, 23, 18, 23, 23, 29, 23, 29 } }; + +const UWORD8 gau1_ih264d_dequant8x8_zigzag_cavlc[4][6][16] = + { + { + { 20, 18, 24, 32, 19, 19, 18, 19, 19, 18, 18, 24, + 24, 25, 24, 18 }, /* for First subblock */ + { 22, 19, 26, 35, 21, 21, 19, 21, 21, 19, 19, 26, + 26, 28, 26, 19 }, + { 26, 23, 31, 42, 24, 24, 23, 24, 24, 23, 23, 31, + 31, 33, 31, 23 }, + { 28, 25, 33, 45, 26, 26, 25, 26, 26, 25, 25, 33, + 33, 35, 33, 25 }, + { 32, 28, 38, 51, 30, 30, 28, 30, 30, 28, 28, 38, + 38, 40, 38, 28 }, + { 36, 32, 43, 58, 34, 34, 32, 34, 34, 32, 32, 43, + 43, 46, 43, 32 } }, + { + { 19, 25, 19, 18, 24, 25, 25, 24, 24, 32, 32, 19, + 18, 18, 19, 24 }, /* for second subblock */ + { 21, 28, 21, 19, 26, 28, 28, 26, 26, 35, 35, + 21, 19, 19, 21, 26 }, + { 24, 33, 24, 23, 31, 33, 33, 31, 31, 42, 42, + 24, 23, 23, 24, 31 }, + { 26, 35, 26, 25, 33, 35, 35, 33, 33, 45, 45, + 26, 25, 25, 26, 33 }, + { 30, 40, 30, 28, 38, 40, 40, 38, 38, 51, 51, + 30, 28, 28, 30, 38 }, + { 34, 46, 34, 32, 43, 46, 46, 43, 43, 58, 58, + 34, 32, 32, 34, 43 } }, + { + { 19, 19, 20, 20, 24, 18, 18, 24, 24, 18, 18, 19, + 25, 19, 18, 24 }, /* for third subblock */ + { 21, 21, 22, 22, 26, 19, 19, 26, 26, 19, 19, + 21, 28, 21, 19, 26 }, + { 24, 24, 26, 26, 31, 23, 23, 31, 31, 23, 23, + 24, 33, 24, 23, 31 }, + { 26, 26, 28, 28, 33, 25, 25, 33, 33, 25, 25, + 26, 35, 26, 25, 33 }, + { 30, 30, 32, 32, 38, 28, 28, 38, 38, 28, 28, + 30, 40, 30, 28, 38 }, + { 34, 34, 36, 36, 43, 32, 32, 43, 43, 32, 32, + 34, 46, 34, 32, 43 } }, + { + { 25, 24, 18, 19, 19, 25, 25, 19, 19, 20, 24, 24, + 18, 24, 32, 18 }, /* for fourth subblock */ + { 28, 26, 19, 21, 21, 28, 28, 21, 21, 22, 26, + 26, 19, 26, 35, 19 }, + { 33, 31, 23, 24, 24, 33, 33, 24, 24, 26, 31, + 31, 23, 31, 42, 23 }, + { 35, 33, 25, 26, 26, 35, 35, 26, 26, 28, 33, + 33, 25, 33, 45, 25 }, + { 40, 38, 28, 30, 30, 40, 40, 30, 30, 32, 38, + 38, 28, 38, 51, 28 }, + { 46, 43, 32, 34, 34, 46, 46, 34, 34, 36, 43, + 43, 32, 43, 58, 32 } } + + }; + +const UWORD16 gau1_ih264d_dequant8x8_cavlc[6][64] = + { + { 20, 19, 25, 19, 20, 19, 25, 19, 19, 18, 24, 18, 19, + 18, 24, 18, 25, 24, 32, 24, 25, 24, 32, 24, 19, 18, + 24, 18, 19, 18, 24, 18, 20, 19, 25, 19, 20, 19, 25, + 19, 19, 18, 24, 18, 19, 18, 24, 18, 25, 24, 32, 24, + 25, 24, 32, 24, 19, 18, 24, 18, 19, 18, 24, 18 }, + { 22, 21, 28, 21, 22, 21, 28, 21, 21, 19, 26, 19, 21, + 19, 26, 19, 28, 26, 35, 26, 28, 26, 35, 26, 21, 19, + 26, 19, 21, 19, 26, 19, 22, 21, 28, 21, 22, 21, 28, + 21, 21, 19, 26, 19, 21, 19, 26, 19, 28, 26, 35, 26, + 28, 26, 35, 26, 21, 19, 26, 19, 21, 19, 26, 19 }, + { 26, 24, 33, 24, 26, 24, 33, 24, 24, 23, 31, 23, 24, + 23, 31, 23, 33, 31, 42, 31, 33, 31, 42, 31, 24, 23, + 31, 23, 24, 23, 31, 23, 26, 24, 33, 24, 26, 24, 33, + 24, 24, 23, 31, 23, 24, 23, 31, 23, 33, 31, 42, 31, + 33, 31, 42, 31, 24, 23, 31, 23, 24, 23, 31, 23 }, + { 28, 26, 35, 26, 28, 26, 35, 26, 26, 25, 33, 25, 26, + 25, 33, 25, 35, 33, 45, 33, 35, 33, 45, 33, 26, 25, + 33, 25, 26, 25, 33, 25, 28, 26, 35, 26, 28, 26, 35, + 26, 26, 25, 33, 25, 26, 25, 33, 25, 35, 33, 45, 33, + 35, 33, 45, 33, 26, 25, 33, 25, 26, 25, 33, 25 }, + { 32, 30, 40, 30, 32, 30, 40, 30, 30, 28, 38, 28, 30, + 28, 38, 28, 40, 38, 51, 38, 40, 38, 51, 38, 30, 28, + 38, 28, 30, 28, 38, 28, 32, 30, 40, 30, 32, 30, 40, + 30, 30, 28, 38, 28, 30, 28, 38, 28, 40, 38, 51, 38, + 40, 38, 51, 38, 30, 28, 38, 28, 30, 28, 38, 28 }, + { 36, 34, 46, 34, 36, 34, 46, 34, 34, 32, 43, 32, 34, + 32, 43, 32, 46, 43, 58, 43, 46, 43, 58, 43, 34, 32, + 43, 32, 34, 32, 43, 32, 36, 34, 46, 34, 36, 34, 46, + 34, 34, 32, 43, 32, 34, 32, 43, 32, 46, 43, 58, 43, + 46, 43, 58, 43, 34, 32, 43, 32, 34, 32, 43, 32 }, }; + +/****************DECODE SLICE TABLES ENDS *******************/ + +/****************MOTION VECTOR DECODING TABLES STARTS *******************/ + +/** + ************************************************************************** + * \brief This array is used to evaluate the condition when only one of + * predictor subMbs has a reference frame equal to that of E subMb. + ************************************************************************** + */ + +const WORD8 gau1_ih264d_mv_pred_condition[] = + { -1, 0, 1, -1, 2, -1, -1, -1 }; + +/** Number of subMbs for the 8x8 prediction mode */ +const UWORD8 gau1_ih264d_num_submb_part[] = + { 1, 2, 2, 4 }; + +/** Width of the 8x8 prediction mode in terms of subMbs */ +const UWORD8 gau1_ih264d_submb_partw[] = + { 2, 2, 1, 1 }; + +/** Height of the 8x8 prediction mode in terms of subMbs */ +const UWORD8 gau1_ih264d_submb_parth[] = + { 2, 1, 2, 1 }; + +/** Number of MB partitions for the MB prediction mode */ +const UWORD8 gau1_ih264d_num_mb_part[] = + { 1, 2, 2, 4 }; + +/** Width of the MB partition in terms of subMbs */ +const UWORD8 gau1_ih264d_mb_partw[] = + { 4, 4, 2, 2, 2 }; + +/** Height of the MB partition in terms of subMbs */ +const UWORD8 gau1_ih264d_mb_parth[] = + { 4, 2, 4, 2, 2 }; + +/** MB partition information is packed into a UWORD32 {0,number,width,height} */ +const UWORD32 gau4_ih264d_submb_part[] = + { 0x00010202, 0x00020201, 0x00020102, 0x00040101 }; + +const UWORD8 gau1_ih264d_submb_indx_mod[] = + { 0, 0, /* 16x16 */ + 0, 8, /* 16x8 */ + 0, 2, /* 8x16 */ + 0, 0, /* 8x8 */ + 0, 4, /* 8x4 */ + 0, 1, /* 4x8 */ + 0, 1, 3, 1 /* 4x4 */ + }; + +/** This table is used to assign CBPs to Inter MBs. */ +const UWORD8 gau1_ih264d_cbp_inter[] = + { 0, 16, 1, 2, 4, 8, 32, 3, 5, 10, 12, 15, 47, 7, 11, 13, 14, 6, 9, 31, 35, + 37, 42, 44, 33, 34, 36, 40, 39, 43, 45, 46, 17, 18, 20, 24, 19, 21, 26, + 28, 23, 27, 29, 30, 22, 25, 38, 41 }; + +/** Motion comp modes for P followed by B, + 0 to 4 : P Mbs + 5 to 27 : B Mbs + 28 to 30 : DIRECT */ +const UWORD8 gau1_ih264d_mb_mc_mode[] = + { + PRED_16x16, + PRED_16x8, PRED_8x16, PRED_8x8, PRED_8x8R0, + PRED_16x16, + PRED_16x16, PRED_16x16, PRED_16x16, PRED_16x8, PRED_8x16, + PRED_16x8, + PRED_8x16, PRED_16x8, PRED_8x16, PRED_16x8, PRED_8x16, + PRED_16x8, + PRED_8x16, PRED_16x8, PRED_8x16, PRED_16x8, PRED_8x16, + PRED_16x8, + PRED_8x16, PRED_16x8, PRED_8x16, PRED_8x8, + /* Self defined modes for B_SKIP and DIRECT16x16 */ + PRED_8x8, + PRED_8x8, PRED_8x8 }; + +const UWORD8 gau1_ih264d_submb_mc_mode[] = + { SUBMB_8x8, SUBMB_8x4, SUBMB_4x8, SUBMB_4x4, + SUBMB_8x8, + SUBMB_8x8, SUBMB_8x8, SUBMB_8x8, SUBMB_8x4, SUBMB_4x8, + SUBMB_8x4, + SUBMB_4x8, SUBMB_8x4, SUBMB_4x8, SUBMB_4x4, SUBMB_4x4, SUBMB_4x4, + /* Self defined modes B DIRECT8x8 */ + SUBMB_4x4, + SUBMB_4x4, SUBMB_4x4 }; + +/** Sub MB pred modes for B slice */ +const UWORD8 gau1_ih264d_submb_pred_modes[] = + { + PRED_L0, + PRED_L0, PRED_L0, PRED_L0, + B_DIRECT, + PRED_L0, PRED_L1, BI_PRED, PRED_L0, PRED_L0, PRED_L1, + PRED_L1, + BI_PRED, BI_PRED, PRED_L0, PRED_L1, BI_PRED, + /* Self defined modes for B DIRECT8x8 */ + BI_PRED, + PRED_L0, PRED_L1, }; + +/** MB pred modes for P and B slice */ +const WORD8 gau1_ih264d_mb_pred_modes[2][32] = + { + { PRED_L0, PRED_L0, PRED_L0, PRED_INVALID, PRED_INVALID, + B_DIRECT, + PRED_L0, PRED_L1, BI_PRED, PRED_L0, PRED_L0, PRED_L1, PRED_L1, + PRED_L0, + PRED_L0, PRED_L1, PRED_L1, PRED_L0, PRED_L0, PRED_L1, PRED_L1, + BI_PRED, + BI_PRED, BI_PRED, BI_PRED, BI_PRED, BI_PRED, PRED_INVALID, + /* Self defined modes for B_SKIP and DIRECT16x16 */ + BI_PRED, + PRED_L0, PRED_L1, }, + { PRED_INVALID, PRED_L0, PRED_L0, PRED_INVALID, PRED_INVALID, + PRED_INVALID, + PRED_INVALID, PRED_INVALID, PRED_INVALID, PRED_L0, PRED_L0, + PRED_L1, + PRED_L1, PRED_L1, PRED_L1, PRED_L0, PRED_L0, BI_PRED, BI_PRED, + BI_PRED, + BI_PRED, PRED_L0, PRED_L0, PRED_L1, PRED_L1, BI_PRED, BI_PRED, + PRED_INVALID, + /* Self defined modes for B_SKIP and DIRECT16x16 */ + PRED_INVALID, + PRED_INVALID, PRED_INVALID } }; + +/****************MOTION VECTOR DECODING TABLES ENDS *******************/ + +/****************CAVLC DECODING TABLES STARTS *******************/ + +/*****************************************************************************/ +/* 6 Bit table look for total zeros (totalcoeff = 2to10) as in Table 9.7 */ +/* of H264 standard. In each table entry, lower 4 bits represent total zeros */ +/* decoded while upper 4 bit represent the bits to be flushed from ps_bitstrm */ +/*****************************************************************************/ +const UWORD8 gau1_ih264d_table_total_zero_2to10[9][64] = + { + /* For total coeff = 2 */ + { 0x6E, 0x6D, 0x6C, 0x6B, 0x5A, 0x5A, 0x59, 0x59, 0x48, 0x48, 0x48, + 0x48, 0x47, 0x47, 0x47, 0x47, 0x46, 0x46, 0x46, 0x46, 0x45, 0x45, + 0x45, 0x45, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x33, + 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x32, 0x32, 0x32, 0x32, + 0x32, 0x32, 0x32, 0x32, 0x31, 0x31, 0x31, 0x31, 0x31, 0x31, 0x31, + 0x31, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, }, + + /* For total coeff = 3 */ + { 0x6D, 0x6B, 0x5C, 0x5C, 0x5A, 0x5A, 0x59, 0x59, 0x48, 0x48, 0x48, + 0x48, 0x45, 0x45, 0x45, 0x45, 0x44, 0x44, 0x44, 0x44, 0x40, 0x40, + 0x40, 0x40, 0x37, 0x37, 0x37, 0x37, 0x37, 0x37, 0x37, 0x37, 0x36, + 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, + 0x32, 0x31, 0x31, 0x31, 0x31, 0x31, 0x31, 0x31, 0x31, }, + + /* For total coeff = 4 */ + { 0x5C, 0x5C, 0x5B, 0x5B, 0x5A, 0x5A, 0x50, 0x50, 0x49, 0x49, 0x49, + 0x49, 0x47, 0x47, 0x47, 0x47, 0x43, 0x43, 0x43, 0x43, 0x42, 0x42, + 0x42, 0x42, 0x38, 0x38, 0x38, 0x38, 0x38, 0x38, 0x38, 0x38, 0x36, + 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x35, 0x35, 0x35, 0x35, + 0x35, 0x35, 0x35, 0x35, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, + 0x34, 0x31, 0x31, 0x31, 0x31, 0x31, 0x31, 0x31, 0x31, }, + + /* For total coeff = 5 */ + { 0x5B, 0x5B, 0x59, 0x59, 0x4A, 0x4A, 0x4A, 0x4A, 0x48, 0x48, 0x48, + 0x48, 0x42, 0x42, 0x42, 0x42, 0x41, 0x41, 0x41, 0x41, 0x40, 0x40, + 0x40, 0x40, 0x37, 0x37, 0x37, 0x37, 0x37, 0x37, 0x37, 0x37, 0x36, + 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x35, 0x35, 0x35, 0x35, + 0x35, 0x35, 0x35, 0x35, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, + 0x34, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, }, + + /* For total coeff = 6 */ + { 0x6A, 0x60, 0x51, 0x51, 0x48, 0x48, 0x48, 0x48, 0x39, 0x39, 0x39, + 0x39, 0x39, 0x39, 0x39, 0x39, 0x37, 0x37, 0x37, 0x37, 0x37, 0x37, + 0x37, 0x37, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x35, + 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x34, 0x34, 0x34, 0x34, + 0x34, 0x34, 0x34, 0x34, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, + 0x33, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, }, + + /* For total coeff = 7 */ + { 0x69, 0x60, 0x51, 0x51, 0x47, 0x47, 0x47, 0x47, 0x38, 0x38, 0x38, + 0x38, 0x38, 0x38, 0x38, 0x38, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, + 0x36, 0x36, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x33, + 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x32, 0x32, 0x32, 0x32, + 0x32, 0x32, 0x32, 0x32, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, + 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, }, + + /* For total coeff = 8 */ + { 0x68, 0x60, 0x52, 0x52, 0x41, 0x41, 0x41, 0x41, 0x37, 0x37, 0x37, + 0x37, 0x37, 0x37, 0x37, 0x37, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, + 0x36, 0x36, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x25, + 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, + 0x25, 0x25, 0x25, 0x25, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, + 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, }, + + /* For total coeff = 9 */ + { 0x61, 0x60, 0x57, 0x57, 0x42, 0x42, 0x42, 0x42, 0x35, 0x35, 0x35, + 0x35, 0x35, 0x35, 0x35, 0x35, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, + 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x24, + 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, + 0x24, 0x24, 0x24, 0x24, 0x23, 0x23, 0x23, 0x23, 0x23, 0x23, 0x23, + 0x23, 0x23, 0x23, 0x23, 0x23, 0x23, 0x23, 0x23, 0x23, }, + + /* For total coeff = 10 */ + { 0x51, 0x51, 0x50, 0x50, 0x46, 0x46, 0x46, 0x46, 0x32, 0x32, 0x32, + 0x32, 0x32, 0x32, 0x32, 0x32, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, + 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x24, + 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, + 0x24, 0x24, 0x24, 0x24, 0x23, 0x23, 0x23, 0x23, 0x23, 0x23, 0x23, + 0x23, 0x23, 0x23, 0x23, 0x23, 0x23, 0x23, 0x23, 0x23, } + + }; + +/*****************************************************************************/ +/* 4 Bit table look for total zeros (totalcoeff = 11to15) as in Table 9.7 */ +/* of H264 standard. In each table entry, lower 4 bits represent total zeros */ +/* decoded while upper 4 bit represent the bits to be flushed from ps_bitstrm */ +/*****************************************************************************/ +const UWORD8 gau1_ih264d_table_total_zero_11to15[5][16] = + { + /* For total coeff = 11 */ + { 0x40, 0x41, 0x32, 0x32, 0x33, 0x33, 0x35, 0x35, 0x14, 0x14, 0x14, + 0x14, 0x14, 0x14, 0x14, 0x14, }, + + /* For total coeff = 12 */ + { 0x40, 0x41, 0x34, 0x34, 0x22, 0x22, 0x22, 0x22, 0x13, 0x13, 0x13, + 0x13, 0x13, 0x13, 0x13, 0x13, }, + + /* For total coeff = 13 */ + { 0x30, 0x30, 0x31, 0x31, 0x23, 0x23, 0x23, 0x23, 0x12, 0x12, 0x12, + 0x12, 0x12, 0x12, 0x12, 0x12, }, + + /* For total coeff = 14 */ + { 0x20, 0x20, 0x20, 0x20, 0x21, 0x21, 0x21, 0x21, 0x12, 0x12, 0x12, + 0x12, 0x12, 0x12, 0x12, 0x12, }, + + /* For total coeff = 15 */ + { 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x11, 0x11, 0x11, + 0x11, 0x11, 0x11, 0x11, 0x11, }, }; + +/** Tables used to read "Run Before", Below tables are packed to reduce lookups */ +/** (Base addess of Gx << 2) + (Max code length for that Gx) */ +const UWORD8 gau1_ih264d_table_run_before[64] = + { 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 1, 1, 1, 1, 10, 10, 6, 6, 1, 1, 1, 1, + 14, 14, 10, 10, 6, 6, 2, 2, 19, 15, 10, 10, 6, 6, 2, 2, 23, 19, 15, 11, 6, + 6, 2, 2, 7, 11, 19, 15, 27, 23, 2, 2, 27, 27, 23, 19, 15, 11, 7, 3 }; + +/*****************************************************************************/ +/* Lookup table for CAVLC 4x4 total_coeff,trailing_ones as pers Table 9-5 */ +/* in the standard. Starting form lsb first 2 bits=flushbits, next 2bits= */ +/* trailing ones, next 5 bits=total_coeff. Total bits used = 9 out of 16 */ +/*****************************************************************************/ +const UWORD16 gau2_ih264d_code_gx[304] = + { + /* Lookup for 0 <= nC < 2 */ + 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0014, 0x0014, + 0x0014, 0x0014, 0x0014, 0x0014, 0x0014, 0x0014, 0x0028, 0x0028, 0x0028, + 0x0028, 0x0028, 0x0028, 0x0028, 0x0028, 0x0026, 0x0026, 0x0012, 0x0012, + 0x003D, 0x003D, 0x003D, 0x003D, 0x005E, 0x005E, 0x003A, 0x003A, 0x004D, + 0x004D, 0x004D, 0x004D, 0x006E, 0x006E, 0x004A, 0x004A, 0x0036, 0x0036, + 0x0022, 0x0022, 0x007E, 0x007E, 0x005A, 0x005A, 0x0046, 0x0046, 0x0032, + 0x0032, 0x008E, 0x008E, 0x006A, 0x006A, 0x0056, 0x0056, 0x0042, 0x0042, + 0x009E, 0x009E, 0x007A, 0x007A, 0x0066, 0x0066, 0x0052, 0x0052, 0x0083, + 0x009B, 0x0087, 0x0073, 0x00AF, 0x008B, 0x0077, 0x0063, 0x00CF, 0x00BB, + 0x00A7, 0x00A3, 0x00BF, 0x00AB, 0x0097, 0x0093, 0x00EF, 0x00DB, 0x00C7, + 0x00C3, 0x00DF, 0x00CB, 0x00B7, 0x00B3, 0x010F, 0x00FB, 0x00F7, 0x00E3, + 0x00FF, 0x00EB, 0x00E7, 0x00D3, 0x0102, 0x0102, 0x010A, 0x010A, 0x0106, + 0x0106, 0x00F2, 0x00F2, 0x00D4, 0x00D4, 0x00D4, 0x00D4, 0x00D4, 0x00D4, + 0x00D4, 0x00D4, + + /* Lookup for 2 <= nC < 4 */ + 0x0015, + 0x0015, 0x0015, 0x0015, 0x0001, 0x0001, 0x0001, 0x0001, 0x004E, 0x004E, + 0x003E, 0x003E, 0x0029, 0x0029, 0x0029, 0x0029, 0x006F, 0x003B, 0x0037, + 0x0013, 0x005E, 0x005E, 0x0026, 0x0026, 0x007E, 0x007E, 0x004A, 0x004A, + 0x0046, 0x0046, 0x0022, 0x0022, 0x008E, 0x008E, 0x005A, 0x005A, 0x0056, + 0x0056, 0x0032, 0x0032, 0x0052, 0x0052, 0x006A, 0x006A, 0x0066, 0x0066, + 0x0042, 0x0042, 0x009E, 0x009E, 0x007A, 0x007A, 0x0076, 0x0076, 0x0062, + 0x0062, 0x00BF, 0x009B, 0x0097, 0x0083, 0x00AF, 0x008B, 0x0087, 0x0073, + 0x00B3, 0x00BB, 0x00B7, 0x00A3, 0x00CF, 0x00AB, 0x00A7, 0x0093, 0x00EF, + 0x00DB, 0x00D7, 0x00D3, 0x00DF, 0x00CB, 0x00C7, 0x00C3, 0x00F7, 0x00F3, + 0x00FB, 0x00E7, 0x00EA, 0x00EA, 0x00E2, 0x00E2, 0x010E, 0x010E, 0x010A, + 0x010A, 0x0106, 0x0106, 0x0102, 0x0102, 0x00FC, 0x00FC, 0x00FC, 0x00FC, + 0x00FC, 0x00FC, 0x00FC, 0x00FC, + + /* Lookup for 4 <= nC < 8 */ + 0x007F, + 0x006F, 0x005F, 0x004F, 0x003F, 0x002B, 0x0017, 0x0003, 0x0057, 0x005B, + 0x0047, 0x004B, 0x0037, 0x008F, 0x003B, 0x0027, 0x0033, 0x007B, 0x0077, + 0x0023, 0x009F, 0x006B, 0x0067, 0x0013, 0x0073, 0x0063, 0x009B, 0x0053, + 0x00AF, 0x008B, 0x0087, 0x0043, 0x00CF, 0x00BB, 0x00A7, 0x0093, 0x00BF, + 0x00AB, 0x0097, 0x0083, 0x00C3, 0x00DB, 0x00C7, 0x00B3, 0x00DF, 0x00CB, + 0x00B7, 0x00A3, 0x00F7, 0x00E3, 0x00EF, 0x00EB, 0x00E7, 0x00D3, 0x00D6, + 0x00D6, 0x0106, 0x0106, 0x00F2, 0x00F2, 0x00FE, 0x00FE, 0x00FA, 0x00FA, + 0x010D, 0x010D, 0x010D, 0x010D, 0x0109, 0x0109, 0x0109, 0x0109, 0x0100, + 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100 }; + +/*****************************************************************************/ +/* Lookup table for CAVLC ChromaDC total_coeff,trailing_ones parsing as per */ +/* Table 9-5 in the standard. Starting from msb, First 4bits=total_coeff, */ +/* next 2bits=trailing_ones and last 2bits=flushbits-1 */ +/*****************************************************************************/ +const UWORD8 gau1_ih264d_cav_chromdc_vld[256] = + { 0x9E, 0x9E, 0x97, 0x8F, 0x76, 0x76, 0x6E, 0x6E, 0x85, 0x85, 0x85, 0x85, + 0x65, 0x65, 0x65, 0x65, 0x45, 0x45, 0x45, 0x45, 0x7D, 0x7D, 0x7D, 0x7D, + 0x4D, 0x4D, 0x4D, 0x4D, 0x25, 0x25, 0x25, 0x25, + + 0x52, + 0x52, 0x52, 0x52, 0x52, 0x52, 0x52, 0x52, 0x52, 0x52, 0x52, 0x52, 0x52, + 0x52, 0x52, 0x52, 0x52, 0x52, 0x52, 0x52, 0x52, 0x52, 0x52, 0x52, 0x52, + 0x52, 0x52, 0x52, 0x52, 0x52, 0x52, 0x52, + + 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, + + 0x28, + 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, + 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, + 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, + 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, + 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, + 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, + 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, + 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, + 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, + 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, + 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, }; + +const UWORD16 gau2_ih264d_offset_num_vlc_tab[9] = + { 0, 0, 120, 120, 224, 224, 224, 224, 224 }; + +/*****************************************************************************/ +/* Function pointer u4_ofst table lookup for parsing 4x4 residual blocks in */ +/* CAVLC. The u4_ofst is dependent on total coeffs coded */ +/*****************************************************************************/ +const UWORD8 gau1_ih264d_total_coeff_fn_ptr_offset[16] = + { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2 }; + +/****************************************************************************/ +/* gai2_ih264d_trailing_one_level lookup tables based on trailing one bits */ +/* All zeroes are u2_dummy in the table are u2_dummy to keep 3 uniform elements */ +/****************************************************************************/ +const WORD16 gai2_ih264d_trailing_one_level[14][3] = + { + /* All zeroes are u2_dummy */ + /**********************************************************************/ + /* Levels for trailing ones = 1, bits read can be 0 or 1 */ + /**********************************************************************/ + { 1, 0, 0 }, /* 0 */ + { -1, 0, 0 }, /* 1 */ + + /**********************************************************************/ + /* Levels for trailing ones = 2, bits read can be 00, 01, 10 ,11 */ + /**********************************************************************/ + { 1, 1, 0 }, /* 00 */ + { 1, -1, 0 }, /* 01 */ + { -1, 1, 0 }, /* 10 */ + { -1, -1, 0 }, /* 11 */ + + /**********************************************************************/ + /* Levels for trailing ones = 3, bits read can be 000 - 111 */ + /**********************************************************************/ + { 1, 1, 1 }, /* 000 */ + { 1, 1, -1 }, /* 001 */ + { 1, -1, 1 }, /* 010 */ + { 1, -1, -1 }, /* 011 */ + { -1, 1, 1 }, /* 100 */ + { -1, 1, -1 }, /* 101 */ + { -1, -1, 1 }, /* 110 */ + { -1, -1, -1 }, /* 111 */ + }; +/****************CAVLC DECODING TABLES ENDS *******************/ + +/****************************************************************************/ +/* These are the codes used for error detection in intra pred4x4 modes */ +/****************************************************************************/ +const UWORD8 gau1_ih264d_intra_pred_err_code[9] = + { 2, 1, 0, 2, 3, 3, 3, 2, 1 }; + +/* Number of users for top field , bottom field, which field needs to be */ +/* displayed first */ +const UWORD8 gau1_ih264d_sei_fld_usage[9][3] = + { + { 1, 1, DISP_FLD_FIRST_UNDEF }, + { 1, 0, DISP_TOP_FLD_FIRST }, + { 0, 1, DISP_BOT_FLD_FIRST }, + { 1, 1, DISP_TOP_FLD_FIRST }, + { 1, 1, DISP_BOT_FLD_FIRST }, + { 2, 1, DISP_TOP_FLD_FIRST }, + { 1, 2, DISP_BOT_FLD_FIRST }, + { 2, 2, DISP_FLD_FIRST_UNDEF }, + { 3, 3, DISP_FLD_FIRST_UNDEF } }; + +/*****************************************************************/ +/* Context increment for significant coefficient(CABAC) */ +/* Requires only 63 elements. But the last element with value -1 */ +/* is kept to make it 64 */ +/*****************************************************************/ +const UWORD8 gau1_ih264d_sigcoeff_context_inc_frame[64] = + { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5, 4, 4, 4, 4, 3, 3, 6, 7, 7, + 7, 8, 9, 10, 9, 8, 7, 7, 6, 11, 12, 13, 11, 6, 7, 8, 9, 14, 10, 9, 8, 6, + 11, 12, 13, 11, 6, 9, 14, 10, 9, 11, 12, 13, 11, 14, 10, 12, -1 }; + +const UWORD8 gau1_ih264d_sigcoeff_context_inc_field[64] = + { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5, 6, 9, 10, 10, 8, 11, 12, + 11, 9, 9, 10, 10, 8, 11, 12, 11, 9, 9, 10, 10, 8, 11, 12, 11, 9, 9, 10, + 10, 8, 13, 13, 9, 9, 10, 10, 8, 13, 13, 9, 9, 10, 10, 14, 14, 14, 14, 14, + -1 }; + +const UWORD8 gau1_ih264d_lastcoeff_context_inc[64] = + { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, + 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, -1 }; + +/*! + ************************************************************************** + * \brief gau1_ih264d_top_left_mb_part_indx_mod + * + * SubBlk number of the top left subBlk in each of the MB partition + * (16x16, 16x8, 8x16, 8x8) + ************************************************************************** + */ +const UWORD8 gau1_ih264d_top_left_mb_part_indx_mod[] = + { 0, 0 /* Junk */, /* 16x16 */ + 0, 8, /* 16x8 */ + 0, 2, /* 8x16 */ + 0, 2, 8, 10 /* 8x8 */ + }; + +/*! + ************************************************************************** + * \brief gau1_ih264d_submb_indx_mod_sp_drct + * + * Contains increments to the subBlk num in a given subMb partition. + ************************************************************************** + */ +const UWORD8 gau1_ih264d_submb_indx_mod_sp_drct[] = + { 0, 0 /* Junk */, /* 8x8 */ + 0, 4, /* 8x4 */ + 0, 1, /* 4x8 */ + 0, 1, 3, 1 /* 4x4 */ + }; diff --git a/decoder/ih264d_tables.h b/decoder/ih264d_tables.h new file mode 100755 index 0000000..04dfbd0 --- /dev/null +++ b/decoder/ih264d_tables.h @@ -0,0 +1,157 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +#ifndef _IH264D_TABLES_H_ +#define _IH264D_TABLES_H_ + +/** + ************************************************************************** + * \file ih264d_tables.h + * + * \brief + * Declaration of all tables used by h264 decoder + * + * \date + * 17/09/2004 + * + * \author MA + ************************************************************************** + */ +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264d_cabac.h" + +/*Deblocking Table declaration*/ +extern const UWORD8 gau1_ih264d_qp_scale_cr[]; +extern const UWORD8 gau1_ih264d_alpha_table[]; +extern const UWORD8 gau1_ih264d_clip_table_deblock[]; +extern const UWORD8 gau1_ih264d_beta_table[]; +extern const UWORD8 gau1_ih264d_clip_table[][4]; + +/*Parsing Table declaration*/ +extern const UWORD8 gau1_ih264d_cbp_tab[6]; +extern const UWORD32 gau4_ih264d_packed_bs2[16]; +extern const UWORD16 gau2_ih264d_4x4_v2h_reorder[16]; +extern const UWORD8 gau1_ih264d_subblk_offset[16]; +extern const UWORD8 gau1_ih264d_cbp_table[48][2]; + +/*Decode Slice Table declaration*/ +extern const UWORD8 gau1_ih264d_inv_scan[16]; +extern const UWORD8 gau1_ih264d_inv_scan_fld[16]; +extern const UWORD8 gau1_ih264d_dequant_matrix[6][16]; +extern const UWORD16 gau2_ih264_iquant_scale_4x4[6][16]; +extern const UWORD8 gau1_ih264d_dequant8x8_zigzag_cavlc[4][6][16]; +extern const UWORD16 gau1_ih264d_dequant8x8_cavlc[6][64]; + +extern const UWORD8 gau1_ih264d_inv_scan_prog8x8_cavlc[4][16]; +extern const UWORD8 gau1_ih264d_inv_scan_int8x8_cavlc[4][16]; +extern const UWORD8 gau1_ih264d_inv_scan_prog8x8_cabac[64]; +extern const UWORD8 gau1_ih264d_inv_scan_int8x8_cabac[64]; + +extern const UWORD8 gau1_ih264d_lastcoeff_context_inc[64]; +extern const UWORD8 gau1_ih264d_sigcoeff_context_inc_frame[64]; +extern const UWORD8 gau1_ih264d_sigcoeff_context_inc_field[64]; + +/* scaling related table declaration */ +extern const WORD16 gai2_ih264d_default_intra4x4[16]; +extern const WORD16 gai2_ih264d_default_inter4x4[16]; +extern const WORD16 gai2_ih264d_default_intra8x8[64]; +extern const WORD16 gai2_ih264d_default_inter8x8[64]; +extern const WORD16 gai2_ih264d_flat_4x4[16]; +extern const WORD16 gai2_ih264d_flat_8x8[64]; + +/*Decode MV Table declaration*/ +extern const WORD8 gau1_ih264d_mv_pred_condition[]; + +/** Number of subMbs for the 8x8 prediction mode */ +extern const UWORD8 gau1_ih264d_num_submb_part[]; + +/** Width of the 8x8 prediction mode in terms of subMbs */ +extern const UWORD8 gau1_ih264d_submb_partw[]; + +/** Height of the 8x8 prediction mode in terms of subMbs */ +extern const UWORD8 gau1_ih264d_submb_parth[]; + +/** Number of MB partitions for the MB prediction mode */ +extern const UWORD8 gau1_ih264d_num_mb_part[]; + +/** Width of the MB partition in terms of subMbs */ +extern const UWORD8 gau1_ih264d_mb_partw[]; + +/** Height of the MB partition in terms of subMbs */ +extern const UWORD8 gau1_ih264d_mb_parth[]; + +/** MB partition information is packed into a UWORD32 {0,number,width,height} */ +extern const UWORD32 gau4_ih264d_submb_part[]; + +extern const UWORD8 gau1_ih264d_submb_indx_mod[]; + +/** This table is used to assign CBPs to Inter MBs. */ +extern const UWORD8 gau1_ih264d_cbp_inter[]; + +/** Motion comp modes for P followed by B, + 0 to 4 : P Mbs + 5 to 27 : B Mbs + 28 to 30 : DIRECT */ +extern const UWORD8 gau1_ih264d_mb_mc_mode[]; + +extern const UWORD8 gau1_ih264d_submb_mc_mode[]; + +/** Sub MB pred modes for B slice */ +extern const UWORD8 gau1_ih264d_submb_pred_modes[]; + +/** MB pred modes for P and B slice */ +extern const WORD8 gau1_ih264d_mb_pred_modes[2][32]; + +/*Decode CAVLC Table declaration*/ +extern const UWORD8 gau1_ih264d_table_total_zero_2to10[9][64]; +extern const UWORD8 gau1_ih264d_table_total_zero_11to15[5][16]; +extern const UWORD8 gau1_ih264d_table_run_before[64]; +extern const UWORD16 gau2_ih264d_code_gx[304]; +extern const UWORD8 gau1_ih264d_cav_chromdc_vld[256]; +extern const UWORD16 gau2_ih264d_offset_num_vlc_tab[9]; +extern const UWORD8 gau1_ih264d_total_coeff_fn_ptr_offset[16]; +extern const WORD16 gai2_ih264d_trailing_one_level[14][3]; + +/*Decode CABAC Table declaration*/ +extern const UWORD32 gau4_ih264d_cabac_table[]; + +/****************************************************************************/ +/* For error detection in intra pred4x4 modes */ +/****************************************************************************/ +extern const UWORD8 gau1_ih264d_intra_pred_err_code[9]; + +/*****************************************************************************/ +/* Cabac tables for context initialization depending upon type of Slice, */ +/* cabac init Idc value and Qp. */ +/*****************************************************************************/ +extern const UWORD8 gau1_ih264d_cabac_ctxt_init_table[NUM_CAB_INIT_IDC_PLUS_ONE][QP_RANGE][NUM_CABAC_CTXTS]; + +/*****************************************************************************/ +/* SEI tables for field usge and which field first */ +/*****************************************************************************/ +extern const UWORD8 gau1_ih264d_sei_fld_usage[9][3]; + + +extern const UWORD8 gau1_ih264d_top_left_mb_part_indx_mod[]; +extern const UWORD8 gau1_ih264d_submb_indx_mod_sp_drct[]; + +#endif /*TABLES_H*/ diff --git a/decoder/ih264d_thread_compute_bs.c b/decoder/ih264d_thread_compute_bs.c new file mode 100755 index 0000000..6812d57 --- /dev/null +++ b/decoder/ih264d_thread_compute_bs.c @@ -0,0 +1,802 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/*! + ************************************************************************** + * \file ih264d_thread_compute_bs.c + * + * \brief + * Contains routines that for multi-thread decoder + * + * Detailed_description + * + * \date + * 20/02/2012 + * + * \author ZR + ************************************************************************** + */ +#include "ih264d_error_handler.h" +#include "ih264d_debug.h" +#include <string.h> +#include "ih264d_defs.h" +#include "ih264d_debug.h" +#include "ih264d_tables.h" +#include "ih264d_structs.h" +#include "ih264d_defs.h" +#include "ih264d_mb_utils.h" + +#include "ih264d_thread_compute_bs.h" +#include "ithread.h" +#include "ih264d_deblocking.h" +#include "ih264d_mb_utils.h" +#include "ih264d_tables.h" +#include "ih264d_format_conv.h" +#include "ih264d_defs.h" +UWORD16 ih264d_update_csbp_8x8(UWORD16 u2_luma_csbp); +void ih264d_fill_bs2_horz_vert(UWORD32 *pu4_bs, /* Base pointer of BS table */ + WORD32 u4_left_mb_csbp, /* csbp of left mb */ + WORD32 u4_top_mb_csbp, /* csbp of top mb */ + WORD32 u4_cur_mb_csbp, /* csbp of current mb */ + const UWORD32 *pu4_packed_bs2, const UWORD16 *pu2_4x4_v2h_reorder); + +#define BS_MB_GROUP 4 +#define DEBLK_MB_GROUP 1 +#define FORMAT_CONV_MB_GROUP 4 + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_compute_bs_non_mbaff_thread */ +/* */ +/* Description : This function computes the pointers of left,top & current*/ +/* : Nnz, MvPred & deblk_mb_t and supplies to FillBs function for*/ +/* : Boundary Strength Calculation .this function is used */ +/* : BS being calculated in separate thread */ +/* Inputs : pointer to decoder context,cur_mb_info,u4_mb_num */ +/* Processing : */ +/* */ +/* Outputs : Produces the Boundary Strength for Current Mb */ +/* Returns : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* ITTIAM */ +/*****************************************************************************/ + +void ih264d_compute_bs_non_mbaff_thread(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info, + UWORD32 u4_mb_num) +{ + /* Mvpred and Nnz for top and Courrent */ + mv_pred_t *ps_cur_mv_pred, *ps_top_mv_pred = NULL, *ps_left_mv_pred; + /* deblk_mb_t Params */ + deblk_mb_t *ps_cur_mb_params; /*< Parameters of current MacroBlock */ + deblkmb_neighbour_t *ps_deblk_top_mb; + + /* Reference Index to POC mapping*/ + void ** apv_map_ref_idx_to_poc; + UWORD32 u4_leftmbtype; + + UWORD16 u2_left_csbp, u2_top_csbp, u2_cur_csbp; + + /* Set of flags */ + UWORD32 u4_cur_mb_intra, u1_top_mb_typ, u4_cur_mb_fld; + UWORD32 u1_cur_mb_type; + UWORD32 * pu4_bs_table; + + /* Neighbour availability */ + /* Initialization */ + const UWORD32 u2_mbx = ps_cur_mb_info->u2_mbx; + const UWORD32 u2_mby = ps_cur_mb_info->u2_mby; + const UWORD32 u1_pingpong = u2_mbx & 0x01; + ps_deblk_top_mb = ps_dec->ps_deblk_top_mb + u2_mbx; + + /* Pointer assignment for Current DeblkMB, Current Mv Pred */ + ps_cur_mb_params = ps_dec->ps_deblk_pic + u4_mb_num; + ps_cur_mv_pred = ps_dec->s_cur_pic.ps_mv + (u4_mb_num << 4); + + apv_map_ref_idx_to_poc = + (void **)ps_dec->ps_computebs_cur_slice->ppv_map_ref_idx_to_poc + + 1; + u1_cur_mb_type = ps_cur_mb_params->u1_mb_type; + u1_top_mb_typ = ps_deblk_top_mb->u1_mb_type; + ps_deblk_top_mb->u1_mb_type = u1_cur_mb_type; + + { + ps_cur_mb_params->u1_topmb_qp = ps_deblk_top_mb->u1_mb_qp; + ps_deblk_top_mb->u1_mb_qp = ps_cur_mb_params->u1_mb_qp; + + ps_cur_mb_params->u1_left_mb_qp = ps_dec->deblk_left_mb[1].u1_mb_qp; + ps_dec->deblk_left_mb[1].u1_mb_qp = ps_cur_mb_params->u1_mb_qp; + + } + + /* if no deblocking required for current Mb then continue */ + /* Check next Mbs in Mb group */ + if(ps_cur_mb_params->u1_deblocking_mode & MB_DISABLE_FILTERING) + { + void ** pu4_map_ref_idx_to_poc_l1 = apv_map_ref_idx_to_poc + + POC_LIST_L0_TO_L1_DIFF; + { + /* Store Parameter for Top MvPred refernce frame Address */ + + void ** ppv_top_mv_pred_addr = ps_cur_mb_info->ps_curmb->u4_pic_addrress; + WORD8 * p1_refTop0 = (ps_cur_mv_pred + 12)->i1_ref_frame; + WORD8 * p1_refTop1 = (ps_cur_mv_pred + 14)->i1_ref_frame; + + /* Store Left addresses for Next Mb */ + void ** ppv_left_mv_pred_addr = + ps_dec->ps_left_mvpred_addr[!u1_pingpong][1].u4_add; + WORD8 * p1_refleft0 = (ps_cur_mv_pred + 3)->i1_ref_frame; + + + ppv_top_mv_pred_addr[0] = apv_map_ref_idx_to_poc[p1_refTop0[0]]; + ppv_top_mv_pred_addr[1] = pu4_map_ref_idx_to_poc_l1[p1_refTop0[1]]; + + ppv_left_mv_pred_addr[2] = apv_map_ref_idx_to_poc[p1_refTop1[0]]; + ppv_top_mv_pred_addr[2] = apv_map_ref_idx_to_poc[p1_refTop1[0]]; + ppv_left_mv_pred_addr[3] = pu4_map_ref_idx_to_poc_l1[p1_refTop1[1]]; + ppv_top_mv_pred_addr[3] = pu4_map_ref_idx_to_poc_l1[p1_refTop1[1]]; + + ppv_left_mv_pred_addr[0] = apv_map_ref_idx_to_poc[p1_refleft0[0]]; + ppv_left_mv_pred_addr[1] = pu4_map_ref_idx_to_poc_l1[p1_refleft0[1]]; + //} + /* Storing the leftMbtype for next Mb */ + ps_dec->deblk_left_mb[1].u1_mb_type = ps_cur_mb_params->u1_mb_type; + } + + return; + } + + /* Flag for extra left Edge */ + ps_cur_mb_params->u1_single_call = 1; + + /* Update the Left deblk_mb_t and Left MvPred Parameters */ + if(!u2_mbx) + { + u4_leftmbtype = 0; + + /* Initialize the ps_left_mv_pred with Junk but Valid Location */ + /* to avoid invalid memory access */ + /* this is read only pointer */ + ps_left_mv_pred = ps_cur_mv_pred + 3; + } + else + { + u4_leftmbtype = ps_dec->deblk_left_mb[1].u1_mb_type; + + /* Come to Left Most Edge of the MB */ + ps_left_mv_pred = ps_cur_mv_pred - (1 << 4) + 3; + } + + if(!u2_mby) + u1_top_mb_typ = 0; + + /* MvPred Pointer Calculation */ + /* CHANGED CODE */ + ps_top_mv_pred = ps_cur_mv_pred - (ps_dec->u2_frm_wd_in_mbs << 4) + 12; + + u4_cur_mb_intra = u1_cur_mb_type & D_INTRA_MB; + u4_cur_mb_fld = !!(u1_cur_mb_type & D_FLD_MB); + /* Compute BS function */ + pu4_bs_table = ps_cur_mb_params->u4_bs_table; + + u2_cur_csbp = ps_cur_mb_info->ps_curmb->u2_luma_csbp; + u2_left_csbp = ps_cur_mb_info->ps_left_mb->u2_luma_csbp; + u2_top_csbp = ps_cur_mb_info->ps_top_mb->u2_luma_csbp; + + /* Compute BS function */ + if(ps_dec->ps_cur_sps->u1_profile_idc == HIGH_PROFILE_IDC) + { + if(ps_cur_mb_info->u1_tran_form8x8 == 1) + { + u2_cur_csbp = ih264d_update_csbp_8x8( + ps_cur_mb_info->ps_curmb->u2_luma_csbp); + } + + if(ps_cur_mb_info->ps_left_mb->u1_tran_form8x8 == 1) + { + u2_left_csbp = ih264d_update_csbp_8x8( + ps_cur_mb_info->ps_left_mb->u2_luma_csbp); + } + + if(ps_cur_mb_info->ps_top_mb->u1_tran_form8x8 == 1) + { + u2_top_csbp = ih264d_update_csbp_8x8( + ps_cur_mb_info->ps_top_mb->u2_luma_csbp); + } + } + if(u4_cur_mb_intra) + { + + pu4_bs_table[4] = 0x04040404; + pu4_bs_table[0] = u4_cur_mb_fld ? 0x03030303 : 0x04040404; + pu4_bs_table[1] = 0x03030303; + pu4_bs_table[2] = 0x03030303; + pu4_bs_table[3] = 0x03030303; + pu4_bs_table[5] = 0x03030303; + pu4_bs_table[6] = 0x03030303; + pu4_bs_table[7] = 0x03030303; + } + else + { + UWORD32 u4_is_non16x16 = !!(u1_cur_mb_type & D_PRED_NON_16x16); + UWORD32 u4_is_b = + (ps_dec->ps_computebs_cur_slice->slice_type == B_SLICE); + + + + + + + ih264d_fill_bs2_horz_vert(pu4_bs_table, u2_left_csbp, u2_top_csbp, + u2_cur_csbp, gau4_ih264d_packed_bs2, + gau2_ih264d_4x4_v2h_reorder); + + if(u4_leftmbtype & D_INTRA_MB) + pu4_bs_table[4] = 0x04040404; + + if(u1_top_mb_typ & D_INTRA_MB) + pu4_bs_table[0] = u4_cur_mb_fld ? 0x03030303 : 0x04040404; + + ps_dec->pf_fill_bs1[u4_is_b][u4_is_non16x16]( + ps_cur_mv_pred, ps_top_mv_pred, apv_map_ref_idx_to_poc, + pu4_bs_table, ps_left_mv_pred, + &(ps_dec->ps_left_mvpred_addr[u1_pingpong][1]), + ps_cur_mb_info->ps_top_mb->u4_pic_addrress, + (4 >> u4_cur_mb_fld)); + } + + { + void ** pu4_map_ref_idx_to_poc_l1 = apv_map_ref_idx_to_poc + + POC_LIST_L0_TO_L1_DIFF; + { + /* Store Parameter for Top MvPred refernce frame Address */ + + void ** ppv_top_mv_pred_addr = ps_cur_mb_info->ps_curmb->u4_pic_addrress; + WORD8 * p1_refTop0 = (ps_cur_mv_pred + 12)->i1_ref_frame; + WORD8 * p1_refTop1 = (ps_cur_mv_pred + 14)->i1_ref_frame; + + /* Store Left addresses for Next Mb */ + void ** ppv_left_mv_pred_addr = + ps_dec->ps_left_mvpred_addr[!u1_pingpong][1].u4_add; + WORD8 * p1_refleft0 = (ps_cur_mv_pred + 3)->i1_ref_frame; + + ppv_top_mv_pred_addr[0] = apv_map_ref_idx_to_poc[p1_refTop0[0]]; + ppv_top_mv_pred_addr[1] = pu4_map_ref_idx_to_poc_l1[p1_refTop0[1]]; + + ppv_left_mv_pred_addr[2] = apv_map_ref_idx_to_poc[p1_refTop1[0]]; + ppv_top_mv_pred_addr[2] = apv_map_ref_idx_to_poc[p1_refTop1[0]]; + ppv_left_mv_pred_addr[3] = pu4_map_ref_idx_to_poc_l1[p1_refTop1[1]]; + ppv_top_mv_pred_addr[3] = pu4_map_ref_idx_to_poc_l1[p1_refTop1[1]]; + + ppv_left_mv_pred_addr[0] = apv_map_ref_idx_to_poc[p1_refleft0[0]]; + ppv_left_mv_pred_addr[1] = pu4_map_ref_idx_to_poc_l1[p1_refleft0[1]]; + + /* Storing the leftMbtype for next Mb */ + ps_dec->deblk_left_mb[1].u1_mb_type = ps_cur_mb_params->u1_mb_type; + + } + } + + /* For transform 8x8 disable deblocking of the intrernal edges of a 8x8 block */ + if(ps_cur_mb_info->u1_tran_form8x8) + { + pu4_bs_table[1] = 0; + pu4_bs_table[3] = 0; + pu4_bs_table[5] = 0; + pu4_bs_table[7] = 0; + } +} + +void ih264d_check_mb_map_deblk(dec_struct_t *ps_dec, + UWORD32 deblk_mb_grp, + tfr_ctxt_t *ps_tfr_cxt) +{ + UWORD32 i = 0; + UWORD32 u4_mb_num; + UWORD32 u4_cur_mb, u4_right_mb; + volatile UWORD8 *mb_map = ps_dec->pu1_recon_mb_map; + UWORD32 u4_mb_x, u4_mb_y, u4_image_wd_mb; + deblk_mb_t *ps_cur_mb = ps_dec->ps_cur_deblk_thrd_mb; + deblk_mb_t *ps_top_mb; + deblk_mb_t *ps_left_mb; + const WORD32 i4_cb_qp_idx_ofst = + ps_dec->ps_cur_pps->i1_chroma_qp_index_offset; + const WORD32 i4_cr_qp_idx_ofst = + ps_dec->ps_cur_pps->i1_second_chroma_qp_index_offset; + + UWORD32 u4_wd_y, u4_wd_uv; + UWORD8 u1_field_pic_flag = ps_dec->ps_cur_slice->u1_field_pic_flag; + + u4_mb_num = ps_dec->u4_cur_deblk_mb_num; + u4_mb_x = ps_dec->u4_deblk_mb_x; + u4_mb_y = ps_dec->u4_deblk_mb_y; + u4_image_wd_mb = ps_dec->u2_frm_wd_in_mbs; + u4_wd_y = ps_dec->u2_frm_wd_y << u1_field_pic_flag; + u4_wd_uv = ps_dec->u2_frm_wd_uv << u1_field_pic_flag; + ps_cur_mb = ps_dec->ps_cur_deblk_thrd_mb; + + for(i = 0; i < deblk_mb_grp; i++) + { + + //while(1) + //{ + CHECK_MB_MAP_BYTE(u4_mb_num, mb_map, u4_cur_mb); + + if(ps_dec->u4_cur_bs_mb_num <= u4_mb_num) + u4_cur_mb = 0; + + if(u4_mb_x < (u4_image_wd_mb - 1)) + { + CHECK_MB_MAP_BYTE((u4_mb_num + 1), mb_map, u4_right_mb); + } + else + u4_right_mb = 1; + + if((u4_cur_mb && u4_right_mb) == 0) + { + break; + } + else + { + + } + //} + + u4_mb_num++; + { + UWORD32 u4_deb_mode, u4_mbs_next; + u4_deb_mode = ps_cur_mb->u1_deblocking_mode; + if(!(u4_deb_mode & MB_DISABLE_FILTERING)) + { + + if(u4_mb_x) + { + ps_left_mb = ps_cur_mb - 1; + + } + else + { + ps_left_mb = NULL; + + } + if(u4_mb_y != 0) + { + ps_top_mb = ps_cur_mb - (u4_image_wd_mb); + } + else + { + ps_top_mb = NULL; + } + + if(u4_deb_mode & MB_DISABLE_LEFT_EDGE) + ps_left_mb = NULL; + if(u4_deb_mode & MB_DISABLE_TOP_EDGE) + ps_top_mb = NULL; + + ih264d_deblock_mb_nonmbaff(ps_dec, ps_tfr_cxt, + i4_cb_qp_idx_ofst, i4_cr_qp_idx_ofst, + ps_cur_mb, u4_wd_y, u4_wd_uv, + ps_top_mb, ps_left_mb); + + } + + ps_cur_mb++; + u4_mb_x++; + u4_mbs_next = u4_image_wd_mb - u4_mb_x; + + ps_tfr_cxt->pu1_mb_y += 16; + ps_tfr_cxt->pu1_mb_u += 8 * YUV420SP_FACTOR; + ps_tfr_cxt->pu1_mb_v += 8; + + if(!u4_mbs_next) + { + ps_tfr_cxt->pu1_mb_y += ps_tfr_cxt->u4_y_inc; + ps_tfr_cxt->pu1_mb_u += ps_tfr_cxt->u4_uv_inc; + ps_tfr_cxt->pu1_mb_v += ps_tfr_cxt->u4_uv_inc; + u4_mb_y++; + u4_mb_x = 0; + } + } + + } + + ps_dec->u4_cur_deblk_mb_num = u4_mb_num; + ps_dec->u4_deblk_mb_x = u4_mb_x; + ps_dec->u4_deblk_mb_y = u4_mb_y; + ps_dec->ps_cur_deblk_thrd_mb = ps_cur_mb; + +} + +void ih264d_check_mb_map_deblk_wait(dec_struct_t *ps_dec, + UWORD32 deblk_mb_grp, + tfr_ctxt_t *ps_tfr_cxt) +{ + UWORD32 i = 0; + UWORD32 u4_mb_num; + UWORD32 u4_cur_mb, u4_right_mb; + volatile UWORD8 *mb_map = ps_dec->pu1_recon_mb_map; + UWORD32 u4_mb_x, u4_mb_y, u4_image_wd_mb; + deblk_mb_t *ps_cur_mb = ps_dec->ps_cur_deblk_thrd_mb; + deblk_mb_t *ps_top_mb; + deblk_mb_t *ps_left_mb; + const WORD32 i4_cb_qp_idx_ofst = + ps_dec->ps_cur_pps->i1_chroma_qp_index_offset; + const WORD32 i4_cr_qp_idx_ofst = + ps_dec->ps_cur_pps->i1_second_chroma_qp_index_offset; + + UWORD32 u4_wd_y, u4_wd_uv; + UWORD8 u1_field_pic_flag = ps_dec->ps_cur_slice->u1_field_pic_flag; + + u4_mb_num = ps_dec->u4_cur_deblk_mb_num; + u4_mb_x = ps_dec->u4_deblk_mb_x; + u4_mb_y = ps_dec->u4_deblk_mb_y; + u4_image_wd_mb = ps_dec->u2_frm_wd_in_mbs; + u4_wd_y = ps_dec->u2_frm_wd_y << u1_field_pic_flag; + u4_wd_uv = ps_dec->u2_frm_wd_uv << u1_field_pic_flag; + ps_cur_mb = ps_dec->ps_cur_deblk_thrd_mb; + + for(i = 0; i < deblk_mb_grp; i++) + { + + while(1) + { + CHECK_MB_MAP_BYTE(u4_mb_num, mb_map, u4_cur_mb); + + if(ps_dec->u4_cur_bs_mb_num <= u4_mb_num) + u4_cur_mb = 0; + + if(u4_mb_x < (u4_image_wd_mb - 1)) + { + CHECK_MB_MAP_BYTE((u4_mb_num + 1), mb_map, u4_right_mb); + } + else + u4_right_mb = 1; + + if(ps_dec->u2_mb_skip_error) + { + ps_dec->u2_skip_deblock = 1; + break; + } + + + if(ps_dec->u2_skip_deblock == 1) + { + break; + } + if((u4_cur_mb && u4_right_mb) == 0) + { + + if(ps_dec->u4_output_present + && ps_dec->u4_fmt_conv_cur_row + < ps_dec->s_disp_frame_info.u4_y_ht) + { + ps_dec->u4_fmt_conv_num_rows = + MIN(ps_dec->u4_fmt_conv_num_rows, + (ps_dec->s_disp_frame_info.u4_y_ht + - ps_dec->u4_fmt_conv_cur_row)); + ih264d_format_convert(ps_dec, &(ps_dec->s_disp_op), + ps_dec->u4_fmt_conv_cur_row, + ps_dec->u4_fmt_conv_num_rows); + ps_dec->u4_fmt_conv_cur_row += ps_dec->u4_fmt_conv_num_rows; + } + else + NOP(32); + } + else + { + break; + } + } + + u4_mb_num++; + { + UWORD32 u4_deb_mode, u4_mbs_next; + u4_deb_mode = ps_cur_mb->u1_deblocking_mode; + if(!(u4_deb_mode & MB_DISABLE_FILTERING)) + { + + if(u4_mb_x) + { + ps_left_mb = ps_cur_mb - 1; + + } + else + { + ps_left_mb = NULL; + + } + if(u4_mb_y != 0) + { + ps_top_mb = ps_cur_mb - (u4_image_wd_mb); + } + else + { + ps_top_mb = NULL; + } + + if(u4_deb_mode & MB_DISABLE_LEFT_EDGE) + ps_left_mb = NULL; + if(u4_deb_mode & MB_DISABLE_TOP_EDGE) + ps_top_mb = NULL; + + ih264d_deblock_mb_nonmbaff(ps_dec, ps_tfr_cxt, + i4_cb_qp_idx_ofst, i4_cr_qp_idx_ofst, + ps_cur_mb, u4_wd_y, u4_wd_uv, + ps_top_mb, ps_left_mb); + } + + ps_cur_mb++; + u4_mb_x++; + u4_mbs_next = u4_image_wd_mb - u4_mb_x; + + ps_tfr_cxt->pu1_mb_y += 16; + ps_tfr_cxt->pu1_mb_u += 8 * YUV420SP_FACTOR; + ps_tfr_cxt->pu1_mb_v += 8; + + if(!u4_mbs_next) + { + ps_tfr_cxt->pu1_mb_y += ps_tfr_cxt->u4_y_inc; + ps_tfr_cxt->pu1_mb_u += ps_tfr_cxt->u4_uv_inc; + ps_tfr_cxt->pu1_mb_v += ps_tfr_cxt->u4_uv_inc; + u4_mb_y++; + u4_mb_x = 0; + } + } + + } + + ps_dec->u4_cur_deblk_mb_num = u4_mb_num; + ps_dec->u4_deblk_mb_x = u4_mb_x; + ps_dec->u4_deblk_mb_y = u4_mb_y; + ps_dec->ps_cur_deblk_thrd_mb = ps_cur_mb; + +} +void ih264d_computebs_deblk_slice(dec_struct_t *ps_dec, tfr_ctxt_t *ps_tfr_cxt) +{ + dec_mb_info_t *p_cur_mb; + UWORD32 u4_max_addr = ps_dec->ps_cur_sps->u2_max_mb_addr; + UWORD32 i; + UWORD32 u1_mb_aff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag; + UWORD16 u2_slice_num; + UWORD32 u4_mb_num; + + ps_dec->u4_cur_slice_bs_done = 0; + ps_dec->u4_bs_cur_slice_num_mbs = 0; + ps_dec->u4_cur_bs_mb_num = + (ps_dec->ps_computebs_cur_slice->u4_first_mb_in_slice) + << u1_mb_aff; + + while(ps_dec->u4_cur_slice_bs_done != 1) + { + UWORD32 bs_mb_grp = BS_MB_GROUP; + while(1) + { + + UWORD32 u4_cond = 0; + + u4_mb_num = ps_dec->u4_cur_bs_mb_num; + + /*introducing 1 MB delay*/ + if((u4_mb_num + BS_MB_GROUP) <= u4_max_addr) + u4_mb_num = u4_mb_num + BS_MB_GROUP; + else + { + bs_mb_grp = u4_max_addr - u4_mb_num + 1; + u4_mb_num = u4_max_addr; + + } + + CHECK_MB_MAP_BYTE(u4_mb_num, ps_dec->pu1_dec_mb_map, u4_cond); + if(u4_cond) + { + break; + } + + if(ps_dec->u2_skip_deblock == 0) + { + ih264d_check_mb_map_deblk(ps_dec, DEBLK_MB_GROUP, ps_tfr_cxt); + } + } + + GET_SLICE_NUM_MAP(ps_dec->pu2_slice_num_map, ps_dec->u4_cur_bs_mb_num, + u2_slice_num); + + if(u2_slice_num != ps_dec->u2_cur_slice_num_bs) + { + ps_dec->u4_cur_slice_bs_done = 1; + } + + /* Compute BS for NMB group*/ + for(i = 0; i < bs_mb_grp; i++) + { + GET_SLICE_NUM_MAP(ps_dec->pu2_slice_num_map, + ps_dec->u4_cur_bs_mb_num, u2_slice_num); + + if(u2_slice_num != ps_dec->u2_cur_slice_num_bs) + { + ps_dec->u4_cur_slice_bs_done = 1; + } + + if(ps_dec->u4_cur_slice_bs_done == 1) + break; + + p_cur_mb = &ps_dec->ps_frm_mb_info[ps_dec->u4_cur_bs_mb_num + & PD_MB_BUF_SIZE_MOD]; + + DEBUG_THREADS_PRINTF("ps_dec->u4_cur_bs_mb_num = %d\n",ps_dec->u4_cur_bs_mb_num); + ih264d_compute_bs_non_mbaff_thread(ps_dec, p_cur_mb, + ps_dec->u4_cur_bs_mb_num); + ps_dec->u4_cur_bs_mb_num++; + ps_dec->u4_bs_cur_slice_num_mbs++; + + } + + if(ps_dec->u4_cur_bs_mb_num > u4_max_addr) + { + ps_dec->u4_cur_slice_bs_done = 1; + } + + /*deblock MB group*/ + { + UWORD32 u4_num_mbs; + + if(ps_dec->u4_cur_bs_mb_num > ps_dec->u4_cur_deblk_mb_num) + + u4_num_mbs = ps_dec->u4_cur_bs_mb_num + - ps_dec->u4_cur_deblk_mb_num; + else + u4_num_mbs = 0; + + if(u4_num_mbs >= DEBLK_MB_GROUP) + u4_num_mbs = DEBLK_MB_GROUP; + if(ps_dec->u2_skip_deblock == 0) + { + ih264d_check_mb_map_deblk_wait(ps_dec, u4_num_mbs, ps_tfr_cxt); + } + } + + } +} + +void ih264d_computebs_deblk_thread(dec_struct_t *ps_dec) +{ + tfr_ctxt_t s_tfr_ctxt; + tfr_ctxt_t *ps_tfr_cxt = &s_tfr_ctxt; // = &ps_dec->s_tran_addrecon; + pad_mgr_t *ps_pad_mgr = &ps_dec->s_pad_mgr; + + UWORD32 yield_cnt = 0; + + ithread_set_name("ih264d_computebs_deblk_thread"); + + + // run the loop till all slices are decoded + + // 0: un-identified state, 1 - bs needed, 2 - bs not needed + while(1) + { + if(ps_dec->u4_start_bs_deblk == 0) + { + NOP(128); + NOP(128); + NOP(128); + NOP(128); + } + else + { + break; + } + } + + if(ps_dec->u4_start_bs_deblk == 1) + { + ps_dec->u4_cur_deblk_mb_num = 0; + ps_dec->u4_deblk_mb_x = 0; + ps_dec->u4_deblk_mb_y = 0; + + ih264d_init_deblk_tfr_ctxt(ps_dec, ps_pad_mgr, ps_tfr_cxt, + ps_dec->u2_frm_wd_in_mbs, 0); + + ps_tfr_cxt->pu1_mb_y = ps_tfr_cxt->pu1_src_y + 4; + ps_tfr_cxt->pu1_mb_u = ps_tfr_cxt->pu1_src_u + 4; + ps_tfr_cxt->pu1_mb_v = ps_tfr_cxt->pu1_src_v + 4; + + ps_dec->ps_cur_deblk_thrd_mb = ps_dec->ps_deblk_pic; + + while(1) + { + /*Complete all writes before processing next slice*/ + DATA_SYNC(); + /*wait untill all the slice params have been populated*/ + while(ps_dec->ps_computebs_cur_slice->slice_header_done == 0) + { + NOP(32); DEBUG_THREADS_PRINTF(" waiting for slice header at compute bs\n"); + } + + DEBUG_THREADS_PRINTF(" Entering compute bs slice\n"); + ih264d_computebs_deblk_slice(ps_dec, ps_tfr_cxt); + + DEBUG_THREADS_PRINTF(" Exit compute bs slice \n"); + + /*Complete all writes before processing next slice*/ + DATA_SYNC(); + + while(1) + { + volatile void * parse_addr, *computebs_addr; + volatile UWORD32 last_slice; + + parse_addr = (volatile void *)ps_dec->ps_parse_cur_slice; + computebs_addr = + (volatile void *)ps_dec->ps_computebs_cur_slice; + last_slice = + ps_dec->ps_computebs_cur_slice->last_slice_in_frame; + + if(last_slice == 1) + break; + + if(parse_addr != computebs_addr) + break; + + DEBUG_THREADS_PRINTF("Waiting at compute bs for next slice or end of frame\n"); + + NOP(32); + + } + + DEBUG_THREADS_PRINTF("CBS thread:Got next slice/end of frame signal \n "); + + if((void *)ps_dec->ps_parse_cur_slice + > (void *)ps_dec->ps_computebs_cur_slice) + { + ps_dec->ps_computebs_cur_slice++; + ps_dec->u2_cur_slice_num_bs++; + } + else + { + /*Last slice in frame*/ + break; + } + + } + + /*deblock remaining MBs*/ + { + UWORD32 u4_num_mbs; + + u4_num_mbs = ps_dec->ps_cur_sps->u2_max_mb_addr + - ps_dec->u4_cur_deblk_mb_num + 1; + + DEBUG_PERF_PRINTF("mbs left for deblocking= %d \n",u4_num_mbs); + + if(u4_num_mbs != 0) + if(ps_dec->u2_skip_deblock == 0) + ih264d_check_mb_map_deblk_wait(ps_dec, u4_num_mbs, + ps_tfr_cxt); + } + } + + ps_dec->u4_start_bs_deblk = 0; + ithread_exit(0); +} + + diff --git a/decoder/ih264d_thread_compute_bs.h b/decoder/ih264d_thread_compute_bs.h new file mode 100755 index 0000000..1bef07f --- /dev/null +++ b/decoder/ih264d_thread_compute_bs.h @@ -0,0 +1,34 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/* + * ih264d_thread_parse_decode.h + * + * Created on: Feb 21, 2012 + * Author: 100492 + */ + +#ifndef _IH264D_THREAD_COMPUTE_BS_H_ +#define _IH264D_THREAD_COMPUTE_BS_H_ +void ih264d_compute_bs_non_mbaff_thread(dec_struct_t * ps_dec, + dec_mb_info_t * ps_cur_mb_info, + UWORD32 u4_mb_num); + +void ih264d_computebs_deblk_thread(dec_struct_t *ps_dec); +#endif /* _IH264D_THREAD_COMPUTE_BS_H_ */ diff --git a/decoder/ih264d_thread_parse_decode.c b/decoder/ih264d_thread_parse_decode.c new file mode 100755 index 0000000..be3cb01 --- /dev/null +++ b/decoder/ih264d_thread_parse_decode.c @@ -0,0 +1,732 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*! + ************************************************************************** + * \file ih264d_thread_parse_decode.c + * + * \brief + * Contains routines that for multi-thread decoder + * + * Detailed_description + * + * \date + * 20/02/2012 + * + * \author ZR + ************************************************************************** + */ + +#include "ih264d_error_handler.h" +#include "ih264d_debug.h" +#include "ithread.h" +#include <string.h> +#include "ih264d_defs.h" +#include "ih264d_debug.h" +#include "ih264d_tables.h" +#include "ih264d_structs.h" +#include "ih264d_defs.h" +#include "ih264d_mb_utils.h" +#include "ih264d_thread_parse_decode.h" +#include "ih264d_inter_pred.h" + +#include "ih264d_process_pslice.h" +#include "ih264d_process_intra_mb.h" +#include "ih264d_deblocking.h" +#include "ih264d_format_conv.h" + +void ih264d_deblock_mb_level(dec_struct_t *ps_dec, + dec_mb_info_t *ps_cur_mb_info, + UWORD32 nmb_index); + +void ih264d_copy_intra_pred_line(dec_struct_t *ps_dec, + dec_mb_info_t *ps_cur_mb_info, + UWORD32 nmb_index); + +void ih264d_parse_tfr_nmb(dec_struct_t * ps_dec, + UWORD8 u1_mb_idx, + UWORD8 u1_num_mbs, + UWORD8 u1_num_mbs_next, + UWORD8 u1_tfr_n_mb, + UWORD8 u1_end_of_row) +{ + WORD32 i, u4_mb_num; + + const UWORD32 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag; + UWORD32 u4_n_mb_start; + + UNUSED(u1_mb_idx); + UNUSED(u1_num_mbs_next); + if(u1_tfr_n_mb) + { + + + u4_n_mb_start = (ps_dec->u2_cur_mb_addr + 1) - u1_num_mbs; + + // copy into s_frmMbInfo + + u4_mb_num = u4_n_mb_start; + ps_dec->ps_parse_cur_slice->u4_num_mbs_done_in_slice += u1_num_mbs; + u4_mb_num = (ps_dec->u2_cur_mb_addr + 1) - u1_num_mbs; + + for(i = 0; i < u1_num_mbs; i++) + { + DATA_SYNC(); + UPDATE_SLICE_NUM_MAP(ps_dec->pu2_slice_num_map, u4_mb_num, + ps_dec->u2_cur_slice_num); + UPDATE_MB_MAP_MBNUM_BYTE(ps_dec->pu1_dec_mb_map, u4_mb_num); + + u4_mb_num++; + } + + DATA_SYNC(); + /****************************************************************/ + /* Check for End Of Row in Next iteration */ + /****************************************************************/ + + /****************************************************************/ + /* Transfer the Following things */ + /* N-Mb DeblkParams Data ( To Ext DeblkParams Buffer ) */ + /* N-Mb Recon Data ( To Ext Frame Buffer ) */ + /* N-Mb Intrapredline Data ( Updated Internally) */ + /* N-Mb MV Data ( To Ext MV Buffer ) */ + /* N-Mb MVTop/TopRight Data ( To Int MV Top Scratch Buffers) */ + /****************************************************************/ + + /* Swap top and current pointers */ + + ps_dec->s_tran_addrecon_parse.pu1_dest_y += + ps_dec->s_tran_addrecon_parse.u4_inc_y[u1_end_of_row]; + ps_dec->s_tran_addrecon_parse.pu1_dest_u += + ps_dec->s_tran_addrecon_parse.u4_inc_uv[u1_end_of_row]; + ps_dec->s_tran_addrecon_parse.pu1_dest_v += + ps_dec->s_tran_addrecon_parse.u4_inc_uv[u1_end_of_row]; + + if(u1_end_of_row) + { + UWORD16 u2_mb_y; + UWORD32 u4_frame_stride, y_offset; + + ps_dec->ps_top_mb_row = ps_dec->ps_cur_mb_row; + ps_dec->ps_cur_mb_row += ((ps_dec->u2_frm_wd_in_mbs) << u1_mbaff); + + u2_mb_y = ps_dec->u2_mby + (1 + u1_mbaff); + u4_frame_stride = ps_dec->u2_frm_wd_y + << ps_dec->ps_cur_slice->u1_field_pic_flag; + y_offset = (u2_mb_y * u4_frame_stride) << 4; + ps_dec->s_tran_addrecon_parse.pu1_dest_y = + ps_dec->s_cur_pic.pu1_buf1 + y_offset; + + u4_frame_stride = ps_dec->u2_frm_wd_uv + << ps_dec->ps_cur_slice->u1_field_pic_flag; + y_offset = (u2_mb_y * u4_frame_stride) << 3; + ps_dec->s_tran_addrecon_parse.pu1_dest_u = + ps_dec->s_cur_pic.pu1_buf2 + y_offset; + ps_dec->s_tran_addrecon_parse.pu1_dest_v = + ps_dec->s_cur_pic.pu1_buf3 + y_offset; + + } + + ps_dec->ps_deblk_mbn += u1_num_mbs; + + /* + * The Slice boundary is also a valid condition to transfer. So recalculate + * the Left increment, in case the number of MBs is lesser than the + * N MB value. c_numMbs will be equal to N of N MB if the entire N Mb is + * decoded. + */ + ps_dec->s_tran_addrecon.u2_mv_left_inc = ((u1_num_mbs >> u1_mbaff) - 1) + << (4 + u1_mbaff); + ps_dec->s_tran_addrecon.u2_mv_top_left_inc = (u1_num_mbs << 2) - 1 + - (u1_mbaff << 2); + + /* reassign left MV and cur MV pointers */ + ps_dec->ps_mv_left = ps_dec->ps_mv_cur + + ps_dec->s_tran_addrecon.u2_mv_left_inc; + + + + + + ps_dec->ps_mv_cur += (u1_num_mbs << 4); + ps_dec->u4_num_mbs_prev_nmb = u1_num_mbs; + + + ps_dec->u4_dma_buf_idx = 0; + + } +} + +void ih264d_decode_tfr_nmb(dec_struct_t * ps_dec, + UWORD8 u1_num_mbs, + UWORD8 u1_num_mbs_next, + UWORD8 u1_end_of_row) +{ + + UWORD32 u1_end_of_row_next; + + const UWORD32 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag; + + /****************************************************************/ + /* Check for End Of Row in Next iteration */ + /****************************************************************/ + u1_end_of_row_next = + u1_num_mbs_next + && ((u1_num_mbs_next) + <= (ps_dec->u1_recon_mb_grp + >> u1_mbaff)); + + /****************************************************************/ + /* Transfer the Following things */ + /* N-Mb DeblkParams Data ( To Ext DeblkParams Buffer ) */ + /* N-Mb Recon Data ( To Ext Frame Buffer ) */ + /* N-Mb Intrapredline Data ( Updated Internally) */ + /* N-Mb MV Data ( To Ext MV Buffer ) */ + /* N-Mb MVTop/TopRight Data ( To Int MV Top Scratch Buffers) */ + /****************************************************************/ + if(u1_end_of_row) + { + ps_dec->i2_dec_thread_mb_y += (1 << u1_mbaff); + } + ih264d_transfer_mb_group_data(ps_dec, u1_num_mbs, u1_end_of_row, + u1_end_of_row_next); + + if(u1_end_of_row) + { + /* Reset the N-Mb Recon Buf Index to default Values */ + ps_dec->u2_mb_group_cols_y1 = ps_dec->u2_mb_group_cols_y; + ps_dec->u2_mb_group_cols_cr1 = ps_dec->u2_mb_group_cols_cr; + } + /* If next N-Mb Group is the EndOfRow, set the N-Mb Recon Buf Index */ + else if(u1_end_of_row_next) + { + ps_dec->u2_mb_group_cols_y1 = (u1_num_mbs_next << 4) + 8; + ps_dec->u2_mb_group_cols_cr1 = (u1_num_mbs_next << 3) + 8; + } +} + +WORD32 ih264d_decode_recon_tfr_nmb_thread(dec_struct_t * ps_dec, UWORD8 u1_num_mbs, // number of MBs loop should run + UWORD8 u1_num_mbs_next, + UWORD8 u1_end_of_row) +{ + WORD32 i,j; + dec_mb_info_t * ps_cur_mb_info; + UWORD32 u4_update_mbaff = 0; + const UWORD32 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag; + UWORD32 u1_slice_type, u1_B; + WORD32 u1_skip_th; + UWORD32 u1_ipcm_th; + UWORD32 u4_cond; + UWORD16 u2_slice_num,u2_cur_dec_mb_num; + WORD32 ret; + + u1_slice_type = ps_dec->ps_decode_cur_slice->slice_type; + + u1_B = (u1_slice_type == B_SLICE); + + u1_skip_th = + ((u1_slice_type != I_SLICE) ? + (u1_B ? B_8x8 : PRED_8x8R0) : -1); + + u1_ipcm_th = ((u1_slice_type != I_SLICE) ? (u1_B ? 23 : 5) : 0); + + u2_cur_dec_mb_num = ps_dec->cur_dec_mb_num; + + /* N Mb MC Loop */ + for(i = 0; i < u1_num_mbs; i++) + { + DATA_SYNC(); + + // check dec_mb_map + UWORD32 yield_cnt = 0, u4_max_addr; + + u4_max_addr = ps_dec->ps_cur_sps->u2_max_mb_addr; + while(1) + { + UWORD32 u4_mb_num = u2_cur_dec_mb_num; + + /*introducing 1 MB delay*/ + if(u4_mb_num < u4_max_addr) + u4_mb_num = u4_mb_num + 1; + + CHECK_MB_MAP_BYTE(u4_mb_num, ps_dec->pu1_dec_mb_map, u4_cond); + if(u4_cond) + { + break; + } + else + { + + { + NOP(128); + + } + + DEBUG_THREADS_PRINTF("waiting for mb mapcur_dec_mb_num = %d,ps_dec->u2_cur_mb_addr = %d\n",u2_cur_dec_mb_num, + ps_dec->u2_cur_mb_addr); + + } + } + + GET_SLICE_NUM_MAP(ps_dec->pu2_slice_num_map, u2_cur_dec_mb_num, + u2_slice_num); + + if(u2_slice_num != ps_dec->u2_cur_slice_num_dec_thread) + { + ps_dec->u4_cur_slice_decode_done = 1; + break; + } + + ps_cur_mb_info = &ps_dec->ps_frm_mb_info[u2_cur_dec_mb_num + & PD_MB_BUF_SIZE_MOD]; + + ps_dec->u4_dma_buf_idx = 0; + ps_dec->u4_pred_info_idx = 0; + + if(ps_cur_mb_info->u1_mb_type <= u1_skip_th) + { + + { + WORD32 pred_cnt = 0; + pred_info_pkd_t *ps_pred_pkd; + UWORD32 u4_pred_info_pkd_idx; + WORD8 i1_pred; + + u4_pred_info_pkd_idx = ps_cur_mb_info->u4_pred_info_pkd_idx; + + while(pred_cnt < ps_cur_mb_info->u1_num_pred_parts) + { + + ps_pred_pkd = ps_dec->ps_pred_pkd + u4_pred_info_pkd_idx; + + + ps_dec->p_form_mb_part_info_thread(ps_pred_pkd,ps_dec, + ps_cur_mb_info->u2_mbx,ps_cur_mb_info->u2_mby,(i >> u1_mbaff), + ps_cur_mb_info); + + u4_pred_info_pkd_idx++; + pred_cnt++; + + } + } + ps_dec->p_mc_dec_thread(ps_dec, ps_cur_mb_info); + } + else if(ps_cur_mb_info->u1_mb_type == MB_SKIP) + { + { + WORD32 pred_cnt = 0; + pred_info_pkd_t *ps_pred_pkd; + UWORD32 u4_pred_info_pkd_idx; + WORD8 i1_pred; + + u4_pred_info_pkd_idx = ps_cur_mb_info->u4_pred_info_pkd_idx; + + + + while(pred_cnt < ps_cur_mb_info->u1_num_pred_parts) + { + + ps_pred_pkd = ps_dec->ps_pred_pkd + u4_pred_info_pkd_idx; + + + ps_dec->p_form_mb_part_info_thread(ps_pred_pkd,ps_dec, + ps_cur_mb_info->u2_mbx,ps_cur_mb_info->u2_mby,(i >> u1_mbaff), + ps_cur_mb_info); + + + u4_pred_info_pkd_idx++; + pred_cnt++; + } + } + /* Decode MB skip */ + ps_dec->p_mc_dec_thread(ps_dec, ps_cur_mb_info); + } + + u2_cur_dec_mb_num++; + } + + /* N Mb IQ IT RECON Loop */ + for(j = 0; j < i; j++) + { + DATA_SYNC(); + + + ps_cur_mb_info = &ps_dec->ps_frm_mb_info[ps_dec->cur_dec_mb_num + & PD_MB_BUF_SIZE_MOD]; + + + if(ps_cur_mb_info->u1_mb_type <= u1_skip_th) + { + ih264d_process_inter_mb(ps_dec, ps_cur_mb_info, j); + } + else if(ps_cur_mb_info->u1_mb_type != MB_SKIP) + { + if((u1_ipcm_th + 25) != ps_cur_mb_info->u1_mb_type) + { + ps_cur_mb_info->u1_mb_type -= (u1_skip_th + 1); + ret = ih264d_process_intra_mb(ps_dec, ps_cur_mb_info, j); + if(ret != OK) + return ret; + } + } + + if(ps_dec->u4_mb_level_deblk == 1) + { + + ih264d_deblock_mb_level(ps_dec, ps_cur_mb_info, j); + } + + if((ps_dec->u4_num_cores >= 3) && (u1_mbaff == 0)) + ih264d_copy_intra_pred_line(ps_dec, ps_cur_mb_info, j); + if(u1_mbaff) + { + if(u4_update_mbaff) + { + UWORD32 u4_mb_num = ps_cur_mb_info->u2_mbx + + ps_dec->u2_frm_wd_in_mbs + * (ps_cur_mb_info->u2_mby >> 1); + UPDATE_MB_MAP_MBNUM_BYTE(ps_dec->pu1_recon_mb_map, u4_mb_num); + u4_update_mbaff = 0; + } + else + { + u4_update_mbaff = 1; + } + } + else + { + UWORD32 u4_mb_num = ps_cur_mb_info->u2_mbx + + ps_dec->u2_frm_wd_in_mbs * ps_cur_mb_info->u2_mby; + UPDATE_MB_MAP_MBNUM_BYTE(ps_dec->pu1_recon_mb_map, u4_mb_num); + } + ps_dec->cur_dec_mb_num++; + } + + + /*handle the last mb in picture case*/ + if(ps_dec->cur_dec_mb_num > ps_dec->ps_cur_sps->u2_max_mb_addr) + ps_dec->u4_cur_slice_decode_done = 1; + + if(i != u1_num_mbs) + { + u1_end_of_row = 0; + /*Number of MB's left in row*/ + u1_num_mbs_next = u1_num_mbs_next + ((u1_num_mbs - i) >> u1_mbaff); + } + + ih264d_decode_tfr_nmb(ps_dec, (i), u1_num_mbs_next, u1_end_of_row); + + return OK; +} + +WORD32 ih264d_decode_slice_thread(dec_struct_t *ps_dec /* Decoder parameters */ +) +{ + UWORD8 u1_num_mbs_next, u1_num_mbsleft, u1_end_of_row = 0; //, u1_slice_end, u1_tfr_n_mb, u1_decode_nmb; + const UWORD32 i2_pic_wdin_mbs = ps_dec->u2_frm_wd_in_mbs; + UWORD8 u1_mbaff, u1_num_mbs; //,uc_more_data_flag,u1_mb_idx; + + UWORD16 u2_first_mb_in_slice; + + /*dec_bit_stream_t *const ps_bitstrm = ps_dec->ps_bitstrm; + UWORD32 * pu4_bitstrm_buf = ps_bitstrm->pu4_buffer; + UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;*/ + + UWORD16 i16_mb_x, i16_mb_y; + UWORD8 u1_field_pic; + UWORD32 u4_frame_stride, x_offset, y_offset; + WORD32 ret; + + tfr_ctxt_t *ps_trns_addr; + + if(ps_dec->ps_decode_cur_slice->slice_header_done != 2) + return ERROR_INV_SLICE_HDR_T; + + + + u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag; + + u2_first_mb_in_slice = ps_dec->ps_decode_cur_slice->u4_first_mb_in_slice; + + i16_mb_x = MOD(u2_first_mb_in_slice, i2_pic_wdin_mbs); + i16_mb_y = DIV(u2_first_mb_in_slice, i2_pic_wdin_mbs); + i16_mb_y <<= u1_mbaff; + ps_dec->i2_dec_thread_mb_y = i16_mb_y; + + /*if((i16_mb_x > (i2_pic_wdin_mbs - 1)) + || (i16_mb_y > ps_dec->u2_frm_ht_in_mbs - 1)) + { + }*/ + if(ps_dec->cur_dec_mb_num == u2_first_mb_in_slice << u1_mbaff) + { + ps_dec->u2_mb_skip_error = 0; + } + else + { + ps_dec->u2_mb_skip_error = 1; + } + ps_dec->cur_dec_mb_num = u2_first_mb_in_slice << u1_mbaff; + + // recalculate recon pointers + u1_field_pic = ps_dec->ps_cur_slice->u1_field_pic_flag; + u4_frame_stride = ps_dec->u2_frm_wd_y << u1_field_pic; + x_offset = i16_mb_x << 4; + y_offset = (i16_mb_y * u4_frame_stride) << 4; + + ps_trns_addr = &(ps_dec->s_tran_addrecon); + + ps_trns_addr->pu1_dest_y = ps_dec->s_cur_pic.pu1_buf1 + x_offset + y_offset; + + u4_frame_stride = ps_dec->u2_frm_wd_uv << u1_field_pic; + x_offset >>= 1; + y_offset = (i16_mb_y * u4_frame_stride) << 3; + + x_offset *= YUV420SP_FACTOR; + + ps_trns_addr->pu1_dest_u = ps_dec->s_cur_pic.pu1_buf2 + x_offset + y_offset; + ps_trns_addr->pu1_dest_v = ps_dec->s_cur_pic.pu1_buf3 + x_offset + y_offset; + + ps_trns_addr->pu1_mb_y = ps_trns_addr->pu1_dest_y; + ps_trns_addr->pu1_mb_u = ps_trns_addr->pu1_dest_u; + ps_trns_addr->pu1_mb_v = ps_trns_addr->pu1_dest_v; + + if(ps_dec->u4_mb_level_deblk == 1) + { + /*If it is not the first mb in row,the previous MB which needs to be deblocked + * as there is delay of 1 MB*/ + if(i16_mb_x != 0) + { + ps_trns_addr->pu1_mb_y -= MB_SIZE; + ps_trns_addr->pu1_mb_u -= BLK8x8SIZE * YUV420SP_FACTOR; + ps_trns_addr->pu1_mb_v -= BLK8x8SIZE; + } + } + + /**********Number of Mbs in Slice**********/ + + ps_dec->ps_deblk_mbn_dec_thrd = ps_dec->ps_deblk_pic + + (u2_first_mb_in_slice << u1_mbaff); + + /* Initialise MC and formMbPartInfo fn ptrs one time based on profile_idc */ + + { + ps_dec->p_mc_dec_thread = ih264d_motion_compensate_bp; + ps_dec->p_form_mb_part_info_thread = ih264d_form_mb_part_info_bp; + } + { + UWORD8 uc_nofield_nombaff; + uc_nofield_nombaff = ((ps_dec->ps_cur_slice->u1_field_pic_flag == 0) + && (ps_dec->ps_cur_slice->u1_mbaff_frame_flag == 0) + && (ps_dec->ps_decode_cur_slice->slice_type != B_SLICE) + && (ps_dec->ps_cur_pps->u1_wted_pred_flag == 0)); + + if(uc_nofield_nombaff == 0) + { + ps_dec->p_mc_dec_thread = ih264d_motion_compensate_mp; + ps_dec->p_form_mb_part_info_thread = ih264d_form_mb_part_info_mp; + } + + } + + ps_dec->u4_cur_slice_decode_done = 0; + + + while(ps_dec->u4_cur_slice_decode_done != 1) + { + + u1_num_mbsleft = ((i2_pic_wdin_mbs - i16_mb_x) << u1_mbaff); + + if(u1_num_mbsleft <= ps_dec->u1_recon_mb_grp) + { + u1_num_mbs = u1_num_mbsleft; + + /*Indicate number of mb's left in a row*/ + u1_num_mbs_next = 0; + u1_end_of_row = 1; + i16_mb_x = 0; + } + else + { + u1_num_mbs = ps_dec->u1_recon_mb_grp; + + /*Indicate number of mb's left in a row*/ + u1_num_mbs_next = i2_pic_wdin_mbs - i16_mb_x + - (ps_dec->u1_recon_mb_grp >> u1_mbaff); + i16_mb_x += (u1_num_mbs >> u1_mbaff); + u1_end_of_row = 0; + + } + ret = ih264d_decode_recon_tfr_nmb_thread(ps_dec, u1_num_mbs, u1_num_mbs_next, + u1_end_of_row); + if(ret != OK) + return ret; + } + return OK; +} + +void ih264d_decode_picture_thread(dec_struct_t *ps_dec ) +{ + volatile WORD32 i4_err_status; + + + ithread_set_name("ih264d_decode_picture_thread"); + + + + // run the loop till all slices are decoded + + while(1) + { + if(ps_dec->u4_start_frame_decode) + { + break; + } + else + { + NOP(32); + + } + } + + DEBUG_THREADS_PRINTF("Got start of frame u4_flag\n"); + + if(ps_dec->u4_start_frame_decode == 1) + { + while(1) + { + /*Complete all writes before processing next slice*/ + DATA_SYNC(); + /*wait untill all the slice params have been populated*/ + while(ps_dec->ps_decode_cur_slice->slice_header_done == 0) + { + NOP(32); DEBUG_THREADS_PRINTF(" waiting for slice header \n"); + } + + DEBUG_THREADS_PRINTF(" Entering decode slice\n"); + + ih264d_decode_slice_thread(ps_dec); + DEBUG_THREADS_PRINTF(" Exit ih264d_decode_slice_thread \n"); + + /*Complete all writes before processing next slice*/ + DATA_SYNC(); + + while(1) + { + volatile void * parse_addr, *dec_addr; + volatile UWORD32 last_slice; + + parse_addr = (volatile void *)ps_dec->ps_parse_cur_slice; + dec_addr = (volatile void *)ps_dec->ps_decode_cur_slice; + last_slice = ps_dec->ps_decode_cur_slice->last_slice_in_frame; + + if(last_slice == 1) + break; + + if(parse_addr != dec_addr) + break; + + DEBUG_THREADS_PRINTF("Waiting for next slice or end of frame\n"); + + NOP(32); + if(i4_err_status != 0) + { + /*In the case of error set decode Mb number ,so that the + parse thread does not wait because of mb difference being + greated the 32*/ + ps_dec->cur_dec_mb_num = ps_dec->u2_cur_mb_addr - 1; + } + } + + DEBUG_THREADS_PRINTF("Got next slice/end of frame signal \n "); + + if((void *)ps_dec->ps_parse_cur_slice + > (void *)ps_dec->ps_decode_cur_slice) + { + ps_dec->ps_decode_cur_slice++; + ps_dec->u2_cur_slice_num_dec_thread++; + } + else + { + /*Last slice in frame*/ + break; + } + + } + } + + if(ps_dec->u4_output_present) + { + while(1) + { + volatile UWORD32 *u4_flag = &(ps_dec->as_fmt_conv_part[1].u4_flag); + + DEBUG_THREADS_PRINTF(" Format conversion loop in decode *u4_flag = %d\n",*u4_flag); + if(2 == *u4_flag) + { + if(ps_dec->as_fmt_conv_part[1].u4_num_rows_y) + ih264d_format_convert( + ps_dec, &(ps_dec->s_disp_op), + ps_dec->as_fmt_conv_part[1].u4_start_y, + ps_dec->as_fmt_conv_part[1].u4_num_rows_y); + + break; + } + else if(1 == *u4_flag) + { + NOP(32); + + } + else + break; + + } + } + + ithread_exit(0); + +} + +void ih264d_signal_decode_thread(dec_struct_t *ps_dec) +{ + if(ps_dec->u4_dec_thread_created == 1) + { + + if(ps_dec->u4_start_frame_decode == 1) + ps_dec->ps_parse_cur_slice->last_slice_in_frame = 1; + else + /*to indicate frame in error*/ + ps_dec->u4_start_frame_decode = 2; + + ithread_join(ps_dec->pv_dec_thread_handle, NULL); + ps_dec->u4_dec_thread_created = 0; + } +} +void ih264d_signal_bs_deblk_thread(dec_struct_t *ps_dec) +{ + if(ps_dec->u4_bs_deblk_thread_created) + { + /*signal error*/ + if(ps_dec->u4_start_bs_deblk == 0) + ps_dec->u4_start_bs_deblk = 2; + + ithread_join(ps_dec->pv_bs_deblk_thread_handle, NULL); + ps_dec->u4_bs_deblk_thread_created = 0; + } + +} diff --git a/decoder/ih264d_thread_parse_decode.h b/decoder/ih264d_thread_parse_decode.h new file mode 100755 index 0000000..013b14f --- /dev/null +++ b/decoder/ih264d_thread_parse_decode.h @@ -0,0 +1,48 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/* + * ih264d_thread_parse_decode.h + * + * Created on: Feb 21, 2012 + * Author: 100492 + */ + +#ifndef _IH264D_THREAD_PARSE_DECPDE_H_ +#define _IH264D_THREAD_PARSE_DECPDE_H_ +void ih264d_parse_tfr_nmb(dec_struct_t *ps_dec, + UWORD8 u1_mb_idx, + UWORD8 u1_num_mbs, + UWORD8 u1_num_mbs_next, + UWORD8 u1_tfr_n_mb, + UWORD8 u1_end_of_row); +void ih264d_decode_tfr_nmb(dec_struct_t *ps_dec, + UWORD8 u1_num_mbs, + UWORD8 u1_num_mbs_next, + UWORD8 u1_end_of_row); +WORD32 ih264d_decode_recon_tfr_nmb_thread(dec_struct_t *ps_dec, + UWORD8 u1_num_mbs, + UWORD8 u1_num_mbs_next, + UWORD8 u1_end_of_row); +void ih264d_decode_picture_thread(dec_struct_t *ps_dec); +WORD32 ih264d_decode_slice_thread(dec_struct_t *ps_dec); + + + +#endif /* _IH264D_THREAD_PARSE_DECPDE_H_ */ diff --git a/decoder/ih264d_transfer_address.h b/decoder/ih264d_transfer_address.h new file mode 100755 index 0000000..aa64b85 --- /dev/null +++ b/decoder/ih264d_transfer_address.h @@ -0,0 +1,45 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +#ifndef _IH264D_TRANSFER_ADDRESS_H_ +#define _IH264D_TRANSFER_ADDRESS_H_ + +typedef struct +{ + UWORD8 *pu1_src_y; + UWORD8 *pu1_src_u; + UWORD8 *pu1_src_v; + UWORD8 *pu1_dest_y; + UWORD8 *pu1_dest_u; + UWORD8 *pu1_dest_v; + UWORD32 u4_inc_y[2]; + UWORD32 u4_inc_uv[2]; + UWORD16 u2_frm_wd_y; + UWORD16 u2_frm_wd_uv; + UWORD8 *pu1_mb_y; + UWORD8 *pu1_mb_u; + UWORD8 *pu1_mb_v; + UWORD16 u2_mv_left_inc; + UWORD16 u2_mv_top_left_inc; + UWORD32 u4_y_inc; + UWORD32 u4_uv_inc; + +} tfr_ctxt_t; + +#endif diff --git a/decoder/ih264d_utils.c b/decoder/ih264d_utils.c new file mode 100755 index 0000000..f60d99c --- /dev/null +++ b/decoder/ih264d_utils.c @@ -0,0 +1,2625 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*! + ************************************************************************** + * \file ih264d_utils.c + * + * \brief + * Contains routines that handle of start and end of pic processing + * + * \date + * 19/12/2002 + * + * \author AI + ************************************************************************** + */ + +#include <string.h> +#include "ih264_typedefs.h" +#include "ithread.h" +#include "ih264d_deblocking.h" +#include "ih264d_parse_slice.h" +#include "ih264d_parse_cavlc.h" +#include "ih264d_dpb_manager.h" +#include "ih264d_defs.h" +#include "ih264d_structs.h" +#include "ih264d_mem_request.h" +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264d_tables.h" +#include "ih264d_debug.h" +#include "ih264d_mb_utils.h" +#include "ih264d_error_handler.h" +#include "ih264d_dpb_manager.h" +#include "ih264d_utils.h" +#include "ih264d_defs.h" +#include "ih264d_tables.h" +#include "ih264d_inter_pred.h" +#include "ih264d_dpb_manager.h" +#include "iv.h" +#include "ivd.h" +#include "ih264d_format_conv.h" +#include "ih264_error.h" +#include "ih264_disp_mgr.h" +#include "ih264_buf_mgr.h" +#include "ih264d_utils.h" + +/*! + ************************************************************************** + * \if Function name : ih264d_is_end_of_pic \endif + * + * \brief + * Determines whether current slice is first slice of a new picture as + * defined in 7.4.1.2.4 of 14496-10. + * + * \return + * Return 1 if current slice is first slice of a new picture + * Otherwise it returns 0 + ************************************************************************** + */ +UWORD8 ih264d_is_end_of_pic(UWORD16 u2_frame_num, + UWORD8 u1_nal_ref_idc, + pocstruct_t *ps_cur_poc, + pocstruct_t *ps_prev_poc, + dec_slice_params_t * ps_prev_slice, /*!< Previous slice parameters*/ + UWORD8 u1_pic_order_cnt_type, + UWORD8 u1_nal_unit_type, + UWORD32 u4_idr_pic_id, + UWORD8 u1_field_pic_flag, + UWORD8 u1_bottom_field_flag) +{ + WORD8 i1_is_end_of_pic; + WORD8 a, b, c, d, e, f, g, h; + + a = b = c = d = e = f = g = h = 0; + a = (ps_prev_slice->u2_frame_num != u2_frame_num); + b = (ps_prev_slice->u1_field_pic_flag != u1_field_pic_flag); + if(u1_field_pic_flag && ps_prev_slice->u1_field_pic_flag) + c = (u1_bottom_field_flag != ps_prev_slice->u1_bottom_field_flag); + d = + (u1_nal_ref_idc == 0 && ps_prev_slice->u1_nal_ref_idc != 0) + || (u1_nal_ref_idc != 0 + && ps_prev_slice->u1_nal_ref_idc + == 0); + if(!a) + { + if((u1_pic_order_cnt_type == 0) + && (ps_prev_slice->u1_pic_order_cnt_type == 0)) + { + e = + ((ps_cur_poc->i4_pic_order_cnt_lsb + != ps_prev_poc->i4_pic_order_cnt_lsb) + || (ps_cur_poc->i4_delta_pic_order_cnt_bottom + != ps_prev_poc->i4_delta_pic_order_cnt_bottom)); + } + + if((u1_pic_order_cnt_type == 1) + && (ps_prev_slice->u1_pic_order_cnt_type == 1)) + { + f = + ((ps_cur_poc->i4_delta_pic_order_cnt[0] + != ps_prev_poc->i4_delta_pic_order_cnt[0]) + || (ps_cur_poc->i4_delta_pic_order_cnt[1] + != ps_prev_poc->i4_delta_pic_order_cnt[1])); + } + } + + if((u1_nal_unit_type == IDR_SLICE_NAL) + && (ps_prev_slice->u1_nal_unit_type == IDR_SLICE_NAL)) + { + g = (u4_idr_pic_id != ps_prev_slice->u4_idr_pic_id); + } + + if((u1_nal_unit_type == IDR_SLICE_NAL) + && (ps_prev_slice->u1_nal_unit_type != IDR_SLICE_NAL)) + { + h = 1; + } + i1_is_end_of_pic = a + b + c + d + e + f + g + h; + return (i1_is_end_of_pic); +} + +/*! + ************************************************************************** + * \if Function name : ih264d_decode_pic_order_cnt \endif + * + * \brief + * Calculates picture order count of picture. + * + * \return + * Returns the pic order count of the picture to which current + * Slice belongs. + * + ************************************************************************** + */ +WORD32 ih264d_decode_pic_order_cnt(UWORD8 u1_is_idr_slice, + UWORD32 u2_frame_num, + pocstruct_t *ps_prev_poc, + pocstruct_t *ps_cur_poc, + dec_slice_params_t *ps_cur_slice, /*!< Pointer to current slice Params*/ + dec_pic_params_t * ps_pps, + UWORD8 u1_nal_ref_idc, + UWORD8 u1_bottom_field_flag, + UWORD8 u1_field_pic_flag, + WORD32 *pi4_poc) +{ + WORD16 i1_pic_msb; + WORD32 i4_top_field_order_cnt = 0, i4_bottom_field_order_cnt = 0; + dec_seq_params_t *ps_seq = ps_pps->ps_sps; + WORD32 i4_prev_frame_num_ofst; + + switch(ps_seq->u1_pic_order_cnt_type) + { + case 0: + /* POC TYPE 0 */ + if(u1_is_idr_slice) + { + ps_prev_poc->i4_pic_order_cnt_msb = 0; + ps_prev_poc->i4_pic_order_cnt_lsb = 0; + } + if(ps_prev_poc->u1_mmco_equalto5) + { + if(ps_prev_poc->u1_bot_field != 1) + { + ps_prev_poc->i4_pic_order_cnt_msb = 0; + ps_prev_poc->i4_pic_order_cnt_lsb = + ps_prev_poc->i4_top_field_order_count; + } + else + { + ps_prev_poc->i4_pic_order_cnt_msb = 0; + ps_prev_poc->i4_pic_order_cnt_lsb = 0; + } + } + + if((ps_cur_poc->i4_pic_order_cnt_lsb + < ps_prev_poc->i4_pic_order_cnt_lsb) + && ((ps_prev_poc->i4_pic_order_cnt_lsb + - ps_cur_poc->i4_pic_order_cnt_lsb) + >= (ps_seq->i4_max_pic_order_cntLsb + >> 1))) + { + i1_pic_msb = ps_prev_poc->i4_pic_order_cnt_msb + + ps_seq->i4_max_pic_order_cntLsb; + } + else if((ps_cur_poc->i4_pic_order_cnt_lsb + > ps_prev_poc->i4_pic_order_cnt_lsb) + && ((ps_cur_poc->i4_pic_order_cnt_lsb + - ps_prev_poc->i4_pic_order_cnt_lsb) + >= (ps_seq->i4_max_pic_order_cntLsb + >> 1))) + { + i1_pic_msb = ps_prev_poc->i4_pic_order_cnt_msb + - ps_seq->i4_max_pic_order_cntLsb; + } + else + { + i1_pic_msb = ps_prev_poc->i4_pic_order_cnt_msb; + } + + if(!u1_field_pic_flag || !u1_bottom_field_flag) + i4_top_field_order_cnt = i1_pic_msb + + ps_cur_poc->i4_pic_order_cnt_lsb; + + if(!u1_field_pic_flag) + { + i4_bottom_field_order_cnt = i4_top_field_order_cnt + + ps_cur_poc->i4_delta_pic_order_cnt_bottom; + } + else if(u1_bottom_field_flag) + { + i4_bottom_field_order_cnt = i1_pic_msb + + ps_cur_poc->i4_pic_order_cnt_lsb; + } + ps_cur_poc->i4_pic_order_cnt_msb = i1_pic_msb; + break; + + case 1: + { + /* POC TYPE 1 */ + UWORD8 i; + WORD32 prev_frame_num; + WORD32 frame_num_ofst; + WORD32 abs_frm_num; + WORD32 poc_cycle_cnt, frame_num_in_poc_cycle; + WORD32 expected_delta_poc_cycle; + WORD32 expected_poc; + + prev_frame_num = (WORD32)ps_cur_slice->u2_frame_num; + if(!u1_is_idr_slice) + { + if(ps_cur_slice->u1_mmco_equalto5) + { + prev_frame_num = 0; + i4_prev_frame_num_ofst = 0; + } + else + { + i4_prev_frame_num_ofst = ps_prev_poc->i4_prev_frame_num_ofst; + } + } + else + i4_prev_frame_num_ofst = 0; + + /* 1. Derivation for FrameNumOffset */ + if(u1_is_idr_slice) + { + frame_num_ofst = 0; + ps_cur_poc->i4_delta_pic_order_cnt[0] = 0; + ps_cur_poc->i4_delta_pic_order_cnt[1] = 0; + } + else if(prev_frame_num > ((WORD32)u2_frame_num)) + { + frame_num_ofst = i4_prev_frame_num_ofst + + ps_seq->u2_u4_max_pic_num_minus1 + 1; + } + else + frame_num_ofst = i4_prev_frame_num_ofst; + + /* 2. Derivation for absFrameNum */ + if(0 != ps_seq->u1_num_ref_frames_in_pic_order_cnt_cycle) + abs_frm_num = frame_num_ofst + u2_frame_num; + else + abs_frm_num = 0; + if((u1_nal_ref_idc == 0) && (abs_frm_num > 0)) + abs_frm_num = abs_frm_num - 1; + + /* 4. expectedDeltaPerPicOrderCntCycle is derived as */ + expected_delta_poc_cycle = 0; + for(i = 0; i < ps_seq->u1_num_ref_frames_in_pic_order_cnt_cycle; + i++) + { + expected_delta_poc_cycle += + ps_seq->i4_ofst_for_ref_frame[i]; + } + + /* 3. When absFrameNum > 0, picOrderCntCycleCnt and + frame_num_in_poc_cycle are derived as : */ + /* 5. expectedPicOrderCnt is derived as : */ + if(abs_frm_num > 0) + { + poc_cycle_cnt = + DIV((abs_frm_num - 1), + ps_seq->u1_num_ref_frames_in_pic_order_cnt_cycle); + frame_num_in_poc_cycle = + MOD((abs_frm_num - 1), + ps_seq->u1_num_ref_frames_in_pic_order_cnt_cycle); + + expected_poc = poc_cycle_cnt + * expected_delta_poc_cycle; + for(i = 0; i <= frame_num_in_poc_cycle; i++) + { + expected_poc = expected_poc + + ps_seq->i4_ofst_for_ref_frame[i]; + } + } + else + expected_poc = 0; + + if(u1_nal_ref_idc == 0) + { + expected_poc = expected_poc + + ps_seq->i4_ofst_for_non_ref_pic; + } + + /* 6. TopFieldOrderCnt or BottomFieldOrderCnt are derived as */ + if(!u1_field_pic_flag) + { + i4_top_field_order_cnt = expected_poc + + ps_cur_poc->i4_delta_pic_order_cnt[0]; + i4_bottom_field_order_cnt = i4_top_field_order_cnt + + ps_seq->i4_ofst_for_top_to_bottom_field + + ps_cur_poc->i4_delta_pic_order_cnt[1]; + } + else if(!u1_bottom_field_flag) + { + i4_top_field_order_cnt = expected_poc + + ps_cur_poc->i4_delta_pic_order_cnt[0]; + } + else + { + i4_bottom_field_order_cnt = expected_poc + + ps_seq->i4_ofst_for_top_to_bottom_field + + ps_cur_poc->i4_delta_pic_order_cnt[0]; + } + /* Copy the current POC info into Previous POC structure */ + ps_cur_poc->i4_prev_frame_num_ofst = frame_num_ofst; + } + + break; + case 2: + { + /* POC TYPE 2 */ + WORD32 prev_frame_num; + WORD32 frame_num_ofst; + WORD32 tmp_poc; + + prev_frame_num = (WORD32)ps_cur_slice->u2_frame_num; + if(!u1_is_idr_slice) + { + if(ps_cur_slice->u1_mmco_equalto5) + { + prev_frame_num = 0; + i4_prev_frame_num_ofst = 0; + } + else + i4_prev_frame_num_ofst = ps_prev_poc->i4_prev_frame_num_ofst; + } + else + i4_prev_frame_num_ofst = 0; + + /* 1. Derivation for FrameNumOffset */ + if(u1_is_idr_slice) + { + frame_num_ofst = 0; + ps_cur_poc->i4_delta_pic_order_cnt[0] = 0; + ps_cur_poc->i4_delta_pic_order_cnt[1] = 0; + } + else if(prev_frame_num > ((WORD32)u2_frame_num)) + { + frame_num_ofst = i4_prev_frame_num_ofst + + ps_seq->u2_u4_max_pic_num_minus1 + 1; + } + else + frame_num_ofst = i4_prev_frame_num_ofst; + + /* 2. Derivation for tempPicOrderCnt */ + if(u1_is_idr_slice) + tmp_poc = 0; + else if(u1_nal_ref_idc == 0) + tmp_poc = ((frame_num_ofst + u2_frame_num) << 1) + - 1; + else + tmp_poc = ((frame_num_ofst + u2_frame_num) << 1); + + /* 6. TopFieldOrderCnt or BottomFieldOrderCnt are derived as */ + if(!u1_field_pic_flag) + { + i4_top_field_order_cnt = tmp_poc; + i4_bottom_field_order_cnt = tmp_poc; + } + else if(!u1_bottom_field_flag) + i4_top_field_order_cnt = tmp_poc; + else + i4_bottom_field_order_cnt = tmp_poc; + + /* Copy the current POC info into Previous POC structure */ + ps_prev_poc->i4_prev_frame_num_ofst = frame_num_ofst; + ps_cur_poc->i4_prev_frame_num_ofst = frame_num_ofst; + } + break; + default: + return ERROR_INV_POC_TYPE_T; + break; + } + + if(!u1_field_pic_flag) // or a complementary field pair + { + *pi4_poc = MIN(i4_top_field_order_cnt, i4_bottom_field_order_cnt); + ps_pps->i4_top_field_order_cnt = i4_top_field_order_cnt; + ps_pps->i4_bottom_field_order_cnt = i4_bottom_field_order_cnt; + } + else if(!u1_bottom_field_flag) + { + *pi4_poc = i4_top_field_order_cnt; + ps_pps->i4_top_field_order_cnt = i4_top_field_order_cnt; + } + else + { + *pi4_poc = i4_bottom_field_order_cnt; + ps_pps->i4_bottom_field_order_cnt = i4_bottom_field_order_cnt; + } + + ps_pps->i4_avg_poc = *pi4_poc; + + return OK; +} + +/*! + ************************************************************************** + * \if Function name : ih264d_end_of_pic_processing \endif + * + * \brief + * Performs the end of picture processing. + * + * It performs deblocking on the current picture and sets the i4_status of + * current picture as decoded. + * + * \return + * 0 on Success and Error code otherwise. + ************************************************************************** + */ +WORD32 ih264d_end_of_pic_processing(dec_struct_t *ps_dec) +{ + UWORD8 u1_pic_type, u1_nal_ref_idc; + dec_slice_params_t *ps_cur_slice = ps_dec->ps_cur_slice; + WORD32 ret; + + /* If nal_ref_idc is equal to 0 for one slice or slice data partition NAL + unit of a particular picture, it shall be equal to 0 for all slice and + slice data partition NAL units of the picture. nal_ref_idc greater + than 0 indicates that the content of the NAL unit belongs to a decoded + picture that is stored and marked for use as a reference picture in the + decoded picture buffer. */ + + /* 1. Do MMCO + 2. Add Cur Pic to list of reference pics. + */ + + /* Call MMCO */ + u1_pic_type = 0; + u1_nal_ref_idc = ps_cur_slice->u1_nal_ref_idc; + + if(u1_nal_ref_idc) + { + if(ps_cur_slice->u1_nal_unit_type == IDR_SLICE_NAL) + { + if(ps_dec->ps_dpb_cmds->u1_long_term_reference_flag == 0) + { + ih264d_reset_ref_bufs(ps_dec->ps_dpb_mgr); + + { + ret = ih264d_insert_st_node(ps_dec->ps_dpb_mgr, + ps_dec->ps_cur_pic, + ps_dec->u1_pic_buf_id, + ps_cur_slice->u2_frame_num); + if(ret != OK) + return ret; + } + } + else + { + /* Equivalent of inserting a pic directly as longterm Pic */ + + { + ret = ih264d_insert_st_node(ps_dec->ps_dpb_mgr, + ps_dec->ps_cur_pic, + ps_dec->u1_pic_buf_id, + ps_cur_slice->u2_frame_num); + if(ret != OK) + return ret; + /* Set longTermIdx = 0, MaxLongTermFrameIdx = 0 */ + ret = ih264d_delete_st_node_or_make_lt( + ps_dec->ps_dpb_mgr, + ps_cur_slice->u2_frame_num, 0, + ps_cur_slice->u1_field_pic_flag); + if(ret != OK) + return ret; + ps_dec->ps_dpb_mgr->u1_max_lt_pic_idx_plus1 = 1; + } + } + } + else + { + + { + UWORD16 u2_pic_num = ps_cur_slice->u2_frame_num; + + + + ret = ih264d_do_mmco_buffer( + ps_dec->ps_dpb_cmds, ps_dec->ps_dpb_mgr, + ps_dec->ps_cur_sps->u1_num_ref_frames, + u2_pic_num, + (ps_dec->ps_cur_sps->u2_u4_max_pic_num_minus1), + ps_dec->u1_nal_unit_type, ps_dec->ps_cur_pic, + ps_dec->u1_pic_buf_id, + ps_cur_slice->u1_field_pic_flag, + ps_dec->e_dec_status); + if(ret != OK) + return ret; + } + } + ih264d_update_default_index_list(ps_dec->ps_dpb_mgr); + } + + if(ps_cur_slice->u1_field_pic_flag) + { + if(ps_cur_slice->u1_bottom_field_flag) + { + if(u1_nal_ref_idc) + u1_pic_type = u1_pic_type | BOT_REF; + u1_pic_type = u1_pic_type | BOT_FLD; + } + else + { + if(u1_nal_ref_idc) + u1_pic_type = u1_pic_type | TOP_REF; + u1_pic_type = u1_pic_type | TOP_FLD; + } + } + else + u1_pic_type = TOP_REF | BOT_REF; + ps_dec->ps_cur_pic->u1_pic_type |= u1_pic_type; + +#if ROW_ACCESSES_STAT + { + H264_DEC_DEBUG_PRINT("Row_Accesses_BeforeBB = %6d, Row_Accesses_AfterBB = %6d \n\n", + gui_Row_Accesses_BeforeBB, gui_Row_Accesses_AfterBB); + gui_Row_Accesses_BeforeBBTotal += gui_Row_Accesses_BeforeBB; + gui_Row_Accesses_AfterBBTotal += gui_Row_Accesses_AfterBB; + gui_Row_Accesses_AfterBB = 0; + gui_Row_Accesses_BeforeBB = 0; + } +#endif + + if(ps_cur_slice->u1_field_pic_flag) + { + H264_DEC_DEBUG_PRINT("Toggling secondField\n"); + ps_dec->u1_second_field = 1 - ps_dec->u1_second_field; + } + + return OK; +} + +/*****************************************************************************/ +/* */ +/* Function Name : init_dpb_size */ +/* */ +/* Description : This function calculates the DBP i4_size in frames */ +/* Inputs : ps_seq - current sequence params */ +/* */ +/* Globals : None */ +/* */ +/* Outputs : None */ +/* */ +/* Returns : DPB in frames */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 28 04 2005 NS Draft */ +/* */ +/*****************************************************************************/ +WORD32 ih264d_get_dpb_size(dec_seq_params_t *ps_seq, dec_struct_t *ps_dec) +{ + WORD32 i4_size; + UWORD8 u1_level_idc; + + + u1_level_idc = ps_seq->u1_level_idc; //harcode for the time being + +#if DPB_HACK + u1_level_idc = (u1_level_idc < 30) ? 30 : u1_level_idc; + u1_level_idc = (u1_level_idc > 30) ? 30 : u1_level_idc; +#endif + + u1_level_idc = MIN(u1_level_idc, ps_dec->u4_level_at_init); + //DPB_HACK + + + switch(u1_level_idc) + { + case 10: + i4_size = 152064; + break; + case 11: + i4_size = 345600; + break; + case 12: + i4_size = 912384; + break; + case 13: + i4_size = 912384; + break; + case 20: + i4_size = 912384; + break; + case 21: + i4_size = 1824768; + break; + case 22: + i4_size = 3110400; + break; + case 30: + i4_size = 3110400; + break; + case 31: + i4_size = 6912000; + break; + case 32: + i4_size = 7864320; + break; + case 40: + i4_size = 12582912; + break; + case 41: + i4_size = 12582912; + break; + case 42: + i4_size = 12582912; + break; + case 50: + i4_size = 42393600; + break; + case 51: + i4_size = 70778880; + break; + default: + i4_size = 6912000; + break; + /* + * Not calling the error handler if the level has come wrong. + */ + /*{ + UWORD32 i4_error_code; + i4_error_code = ERROR_UNKNOWN_LEVEL ; + + } + break;*/ + } + + /* Temporary hack to run Tractor Cav/Cab/MbAff Profiler ps_bitstrm */ +#if DPB_HACK + i4_size = 6912000; +#endif + + i4_size = + i4_size + / (ps_seq->u2_frm_wd_in_mbs + * (ps_seq->u2_frm_ht_in_mbs + << (1 + - ps_seq->u1_frame_mbs_only_flag))); + i4_size = i4_size / 384; // temp / (256 * 1.5) + i4_size = MIN(i4_size, 16); + i4_size = MAX(i4_size, 1); + return (i4_size); +} + +WORD32 ih264d_get_dpb_size_new(UWORD32 u4_level_idc, + UWORD32 u2_frm_wd_in_mbs, + UWORD32 u2_frm_ht_in_mbs) +{ + + UWORD32 i4_size = 0; + + switch(u4_level_idc) + { + case 10: + i4_size = 152064; + break; + case 11: + i4_size = 345600; + break; + case 12: + i4_size = 912384; + break; + case 13: + i4_size = 912384; + break; + case 20: + i4_size = 912384; + break; + case 21: + i4_size = 1824768; + break; + case 22: + i4_size = 3110400; + break; + case 30: + i4_size = 3110400; + break; + case 31: + i4_size = 6912000; + break; + case 32: + i4_size = 7864320; + break; + case 40: + i4_size = 12582912; + break; + case 41: + i4_size = 12582912; + break; + case 42: + i4_size = 12582912; + break; + case 50: + i4_size = 42393600; + break; + case 51: + i4_size = 70778880; + break; + default: + { + return -1; + } + break; + } + + i4_size = i4_size / (u2_frm_wd_in_mbs * (u2_frm_ht_in_mbs)); + i4_size = (i4_size + 383) / 384; + i4_size = MIN(i4_size, 16); + i4_size = MAX(i4_size, 1); + return (i4_size); +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_max_possible_ref_pics */ +/* */ +/* Description : This function returns the maximum number of */ +/* reference buffers corresponding to the current Level */ +/* in accordance to "Table A-1 Level limits" in standard. */ +/* Please refer to Annex A - Profiles and Levels */ +/* Maximum Number of reference buffers are derived from */ +/* the dbpsize and max_mbs_in frame given in the table */ +/* Inputs : level number */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 19 05 2005 SWRN Draft */ +/* */ +/*****************************************************************************/ + +UWORD8 ih264d_max_possible_ref_pics(UWORD8 u1_level) +{ + switch(u1_level) + { + case H264_LEVEL_1_0: + return (MAX_REF_LEVEL_1_0); + case H264_LEVEL_1_1: + return (MAX_REF_LEVEL_1_1); + case H264_LEVEL_1_2: + return (MAX_REF_LEVEL_1_2); + case H264_LEVEL_1_3: + return (MAX_REF_LEVEL_1_3); + case H264_LEVEL_2_0: + return (MAX_REF_LEVEL_2_0); + case H264_LEVEL_2_1: + return (MAX_REF_LEVEL_2_1); + case H264_LEVEL_2_2: + return (MAX_REF_LEVEL_2_2); + case H264_LEVEL_3_0: + return (MAX_REF_LEVEL_3_0); + } + + return (H264_MAX_REF_PICS); +} + +/***************************************************************************/ +/* If change in Level or the required PicBuffers i4_size is more than the */ +/* current one FREE the current PicBuffers and allocate affresh */ +/***************************************************************************/ +UWORD8 ih264d_is_sps_changed(prev_seq_params_t * ps_prv, + dec_seq_params_t * ps_cur) +{ + + if((ps_prv->u2_frm_wd_in_mbs != ps_cur->u2_frm_wd_in_mbs) + || (ps_prv->u1_level_idc != ps_cur->u1_level_idc) + || (ps_prv->u1_profile_idc != ps_cur->u1_profile_idc) + || (ps_cur->u2_frm_ht_in_mbs != ps_prv->u2_frm_ht_in_mbs) + || (ps_cur->u1_frame_mbs_only_flag + != ps_prv->u1_frame_mbs_only_flag) + || (ps_cur->u1_direct_8x8_inference_flag + != ps_prv->u1_direct_8x8_inference_flag)) + return 1; + + return 0; +} + +/**************************************************************************/ +/* This function initialises the value of ps_dec->u1_recon_mb_grp */ +/* ps_dec->u1_recon_mb_grp must satisfy the following criteria */ +/* - multiple of 2 (required for N/2 parse-mvpred design) */ +/* - multiple of 4 (if it is not a frame_mbs_only sequence), */ +/* in this case N/2 itself needs to be even for mbpair processing */ +/* - lesser than ps_dec->u2_frm_wd_in_mbs/2 (at least 3 N-Chunks */ +/* should make a row to ensure proper MvTop transferring) */ +/**************************************************************************/ +WORD32 ih264d_init_dec_mb_grp(dec_struct_t *ps_dec) +{ + dec_seq_params_t *ps_seq = ps_dec->ps_cur_sps; + UWORD8 u1_frm = ps_seq->u1_frame_mbs_only_flag; + + ps_dec->u1_recon_mb_grp = PARSE_MB_GROUP_4; + + //NMB set to width in MBs for non-mbaff cases + if(0 == ps_seq->u1_mb_aff_flag) + ps_dec->u1_recon_mb_grp = ps_dec->u2_frm_wd_in_mbs; + + ps_dec->u1_recon_mb_grp_pair = ps_dec->u1_recon_mb_grp >> 1; + + if(!ps_dec->u1_recon_mb_grp) + { + return ERROR_MB_GROUP_ASSGN_T; + } + + ps_dec->u4_num_mbs_prev_nmb = ps_dec->u1_recon_mb_grp; + + return OK; +} + +/*! + ************************************************************************** + * \if Function name : get_numbuf_dpb_bank \endif + * + * \brief + * Initializes the picture. + * + * \return + * 0 on Success and Error code otherwise + * + * \note + * This function is called when first slice of the + * NON -IDR picture is encountered. + ************************************************************************** + */ +static WORD32 get_numbuf_dpb_bank(dec_struct_t *ps_dec) +{ + WORD32 i4_DPB_size; + WORD32 i4_pic_size; + WORD32 i4_num_buf_alloc; + UWORD32 Ysize; + UWORD32 UVsize; + UWORD32 one_frm_size; + UWORD32 luma_height; + + luma_height = ps_dec->u2_pic_ht; + + i4_DPB_size = ps_dec->ps_mem_tab[MEM_REC_REF_PIC].u4_mem_size; + + Ysize = (ps_dec->u2_frm_wd_y) * (luma_height + (PAD_LEN_Y_V << 2)); + + UVsize = Ysize >> 2; + + { + if(ps_dec->u4_share_disp_buf == 1) + { + /* In case of buffers getting shared between application and library + there is no need of reference memtabs. Instead of setting the i4_size + to zero, it is reduced to a small i4_size to ensure that changes + in the code are minimal */ + if((ps_dec->u1_chroma_format == IV_YUV_420SP_UV) + || (ps_dec->u1_chroma_format == IV_YUV_420SP_VU) + || (ps_dec->u1_chroma_format == IV_YUV_420P)) + { + Ysize = 64; + } + if(ps_dec->u1_chroma_format == IV_YUV_420SP_UV) + { + UVsize = 64; + } + + } + } + + one_frm_size = (((Ysize + 127) >> 7) << 7) + + ((((UVsize << 1) + 127) >> 7) << 7); + i4_num_buf_alloc = i4_DPB_size / (one_frm_size); + + return i4_num_buf_alloc; +} +/*! + ************************************************************************** + * \if Function name : ih264d_init_pic \endif + * + * \brief + * Initializes the picture. + * + * \return + * 0 on Success and Error code otherwise + * + * \note + * This function is called when first slice of the + * NON -IDR picture is encountered. + ************************************************************************** + */ +WORD32 ih264d_init_pic(dec_struct_t *ps_dec, + UWORD16 u2_frame_num, + WORD32 i4_poc, + dec_pic_params_t *ps_pps) +{ + dec_seq_params_t *ps_seq = ps_pps->ps_sps; + prev_seq_params_t * ps_prev_seq_params = &ps_dec->s_prev_seq_params; + WORD32 i4_pic_bufs; + WORD32 ret; + + ps_dec->ps_cur_slice->u2_frame_num = u2_frame_num; + ps_dec->ps_cur_slice->i4_poc = i4_poc; + ps_dec->ps_cur_pps = ps_pps; + ps_dec->ps_cur_pps->pv_codec_handle = ps_dec; + + ps_dec->ps_cur_sps = ps_seq; + ps_dec->ps_dpb_mgr->i4_max_frm_num = ps_seq->u2_u4_max_pic_num_minus1 + + 1; + + ps_dec->ps_dpb_mgr->u2_pic_ht = ps_dec->u2_pic_ht; + ps_dec->ps_dpb_mgr->u2_pic_wd = ps_dec->u2_pic_wd; + ps_dec->i4_pic_type = -1; + ps_dec->i4_frametype = -1; + ps_dec->i4_content_type = -1; + + /*--------------------------------------------------------------------*/ + /* Get the value of MaxMbAddress and frmheight in Mbs */ + /*--------------------------------------------------------------------*/ + ps_seq->u2_max_mb_addr = + (ps_seq->u2_frm_wd_in_mbs + * (ps_dec->u2_pic_ht + >> (4 + + ps_dec->ps_cur_slice->u1_field_pic_flag))) + - 1; + ps_dec->u2_frm_ht_in_mbs = (ps_dec->u2_pic_ht + >> (4 + ps_dec->ps_cur_slice->u1_field_pic_flag)); + + + /***************************************************************************/ + /* If change in Level or the required PicBuffers i4_size is more than the */ + /* current one FREE the current PicBuffers and allocate affresh */ + /***************************************************************************/ + if(!ps_dec->u1_init_dec_flag + || ih264d_is_sps_changed(ps_prev_seq_params, ps_seq)) + { + + + ivd_video_decode_ip_t *ps_dec_in = ps_dec->pv_dec_in; + ivd_video_decode_op_t *ps_dec_out = ps_dec->pv_dec_out; + + if(ps_dec->u4_share_disp_buf == 0) + { + i4_pic_bufs = get_numbuf_dpb_bank(ps_dec); + } + else + { + i4_pic_bufs = (WORD32)ps_dec->u4_num_disp_bufs; + } + + ps_dec->u1_pic_bufs = CLIP_U8(i4_pic_bufs); + + if(ps_dec->u4_share_disp_buf == 0) + ps_dec->u1_pic_bufs = MIN(ps_dec->u1_pic_bufs, + (H264_MAX_REF_PICS * 2)); + + ps_dec->u1_max_dec_frame_buffering = ih264d_get_dpb_size(ps_seq, + ps_dec); + + if(ps_dec->u4_share_disp_buf) + ps_dec->u1_max_dec_frame_buffering = MAX( + ps_dec->u1_max_dec_frame_buffering, 5); + + ps_dec->u1_max_dec_frame_buffering = MIN( + ps_dec->u1_max_dec_frame_buffering, + ps_dec->u4_num_ref_frames_at_init); + ps_dec->u1_max_dec_frame_buffering = MIN( + ps_dec->u1_max_dec_frame_buffering, + ps_dec->u1_pic_bufs); + +// ps_dec->u1_pic_bufs = ps_dec->i1_max_dec_frame_buffering; + + /* Fix is for handling one pic in and one pic out incase of */ + /* MMCO 5 or IDR */ + + ps_dec->i4_display_delay = MIN(ps_dec->u4_num_reorder_frames_at_init, + ps_dec->u1_max_dec_frame_buffering); + + if(1 == ps_seq->u1_vui_parameters_present_flag) + { + if(ps_seq->u1_frame_mbs_only_flag == 1) + ps_dec->i4_display_delay = MIN( + (UWORD32 )ps_dec->i4_display_delay, + ((UWORD32 )ps_seq->s_vui.u4_num_reorder_frames + + 1)); + else + ps_dec->i4_display_delay = MIN( + (UWORD32 )ps_dec->i4_display_delay, + ((UWORD32 )ps_seq->s_vui.u4_num_reorder_frames + + 1) * 2); + } + + /* Temporary hack to run Tractor Cav/Cab/MbAff Profiler streams also for CAFI1_SVA_C.264 in conformance*/ + if(ps_dec->u1_init_dec_flag) + { + ih264d_release_pics_in_dpb((void *)ps_dec, + ps_dec->u1_pic_bufs); + ih264d_release_display_bufs(ps_dec); + ih264d_reset_ref_bufs(ps_dec->ps_dpb_mgr); + } + + /*********************************************************************/ + /* Configuring decoder parameters based on level and then */ + /* fresh pointer initialisation in decoder scratch and state buffers */ + /*********************************************************************/ + if(!ps_dec->u1_init_dec_flag || + ((ps_seq->u1_level_idc < H264_LEVEL_3_0) ^ (ps_prev_seq_params->u1_level_idc < H264_LEVEL_3_0))) + { + ret = ih264d_init_dec_mb_grp(ps_dec); + if(ret != OK) + return ret; + } + + ret = ih264d_create_pic_buffers(ps_dec->u1_pic_bufs, + ps_dec); + if(ret != OK) + return ret; + + ih264d_get_memory_dec_params(ps_dec); + + ret = ih264d_create_mv_bank(ps_dec, ps_dec->u2_pic_wd, + ps_dec->u2_pic_ht); + if(ret != OK) + return ret; + + /* In shared mode, set all of them as used by display */ + if(ps_dec->u4_share_disp_buf == 1) + { + WORD32 i; + + for(i = 0; i < ps_dec->u1_pic_bufs; i++) + { + ih264_buf_mgr_set_status((buf_mgr_t *)ps_dec->pv_pic_buf_mgr, i, + BUF_MGR_IO); + } + } + + ps_dec->u1_init_dec_flag = 1; + ps_prev_seq_params->u2_frm_wd_in_mbs = ps_seq->u2_frm_wd_in_mbs; + ps_prev_seq_params->u1_level_idc = ps_seq->u1_level_idc; + ps_prev_seq_params->u1_profile_idc = ps_seq->u1_profile_idc; + ps_prev_seq_params->u2_frm_ht_in_mbs = ps_seq->u2_frm_ht_in_mbs; + ps_prev_seq_params->u1_frame_mbs_only_flag = + ps_seq->u1_frame_mbs_only_flag; + ps_prev_seq_params->u1_direct_8x8_inference_flag = + ps_seq->u1_direct_8x8_inference_flag; + + ps_dec->i4_cur_display_seq = 0; + ps_dec->i4_prev_max_display_seq = 0; + ps_dec->i4_max_poc = 0; + + { + /* 0th entry of CtxtIncMbMap will be always be containing default values + for CABAC context representing MB not available */ + ctxt_inc_mb_info_t *p_DefCtxt = ps_dec->p_ctxt_inc_mb_map - 1; + UWORD8 *pu1_temp; + WORD8 i; + p_DefCtxt->u1_mb_type = CAB_SKIP; + + p_DefCtxt->u1_cbp = 0x0f; + p_DefCtxt->u1_intra_chroma_pred_mode = 0; + + p_DefCtxt->u1_yuv_dc_csbp = 0x7; + + p_DefCtxt->u1_transform8x8_ctxt = 0; + + pu1_temp = (UWORD8*)p_DefCtxt->i1_ref_idx; + for(i = 0; i < 4; i++, pu1_temp++) + (*pu1_temp) = 0; + pu1_temp = (UWORD8*)p_DefCtxt->u1_mv; + for(i = 0; i < 16; i++, pu1_temp++) + (*pu1_temp) = 0; + ps_dec->ps_def_ctxt_mb_info = p_DefCtxt; + } + + } + /* reset DBP commands read u4_flag */ + ps_dec->ps_dpb_cmds->u1_dpb_commands_read = 0; + + return OK; +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_get_next_display_field */ +/* */ +/* Description : Application calls this module to get the next field */ +/* to be displayed */ +/* */ +/* Inputs : 1. IBUFAPI_Handle Hnadle to the Display buffer */ +/* 2. IH264DEC_DispUnit Pointer to the display struct */ +/* */ +/* Globals : */ +/* */ +/* */ +/* Processing : None */ +/* Outputs : None */ +/* Returns : None */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 27 05 2005 Ittiam Draft */ +/* */ +/*****************************************************************************/ + +WORD32 ih264d_get_next_display_field(dec_struct_t * ps_dec, + ivd_out_bufdesc_t *ps_out_buffer, + ivd_get_display_frame_op_t *pv_disp_op) +{ + pic_buffer_t *pic_buf; + + UWORD8 i1_cur_fld; + WORD32 u4_api_ret = -1; + WORD32 i4_disp_buf_id; + iv_yuv_buf_t *ps_op_frm; + + + + ps_op_frm = &(ps_dec->s_disp_frame_info); + H264_MUTEX_LOCK(&ps_dec->process_disp_mutex); + pic_buf = (pic_buffer_t *)ih264_disp_mgr_get( + (disp_mgr_t *)ps_dec->pv_disp_buf_mgr, &i4_disp_buf_id); + ps_dec->u4_num_fld_in_frm = 0; + u4_api_ret = -1; + pv_disp_op->u4_ts = -1; + pv_disp_op->e_output_format = ps_dec->u1_chroma_format; + + pv_disp_op->s_disp_frm_buf.pv_y_buf = ps_out_buffer->pu1_bufs[0]; + pv_disp_op->s_disp_frm_buf.pv_u_buf = ps_out_buffer->pu1_bufs[1]; + pv_disp_op->s_disp_frm_buf.pv_v_buf = ps_out_buffer->pu1_bufs[2]; + if(pic_buf != NULL) + { + pv_disp_op->e4_fld_type = 0; + pv_disp_op->u4_disp_buf_id = i4_disp_buf_id; + + ps_op_frm->u4_y_ht = pic_buf->u2_disp_height << 1; + ps_op_frm->u4_u_ht = ps_op_frm->u4_v_ht = ps_op_frm->u4_y_ht >> 1; + ps_op_frm->u4_y_wd = pic_buf->u2_disp_width; + + ps_op_frm->u4_u_wd = ps_op_frm->u4_v_wd = ps_op_frm->u4_y_wd >> 1; + + ps_op_frm->u4_y_strd = pic_buf->u2_frm_wd_y; + ps_op_frm->u4_u_strd = ps_op_frm->u4_v_strd = pic_buf->u2_frm_wd_uv; + + /* ! */ + pv_disp_op->u4_ts = pic_buf->u4_ts; + + /* set the start of the Y, U and V buffer pointer for display */ + ps_op_frm->pv_y_buf = pic_buf->pu1_buf1 + pic_buf->u2_crop_offset_y; + ps_op_frm->pv_u_buf = pic_buf->pu1_buf2 + pic_buf->u2_crop_offset_uv; + ps_op_frm->pv_v_buf = pic_buf->pu1_buf3 + pic_buf->u2_crop_offset_uv; + ps_dec->u4_num_fld_in_frm++; + ps_dec->u4_num_fld_in_frm++; + u4_api_ret = 0; + + if(pic_buf->u1_picturetype == 0) + pv_disp_op->u4_progressive_frame_flag = 1; + else + pv_disp_op->u4_progressive_frame_flag = 0; + + } H264_MUTEX_UNLOCK(&ps_dec->process_disp_mutex); + pv_disp_op->u4_error_code = u4_api_ret; + pv_disp_op->e_pic_type = 0xFFFFFFFF; //Junk; + + if(u4_api_ret) + { + pv_disp_op->u4_error_code = 1; //put a proper error code here + } + else + { + + //Release the buffer if being sent for display + UWORD32 temp; + UWORD32 dest_inc_Y = 0, dest_inc_UV = 0; + + pv_disp_op->s_disp_frm_buf.u4_y_wd = temp = MIN(ps_op_frm->u4_y_wd, + ps_op_frm->u4_y_strd); + pv_disp_op->s_disp_frm_buf.u4_u_wd = pv_disp_op->s_disp_frm_buf.u4_y_wd + >> 1; + pv_disp_op->s_disp_frm_buf.u4_v_wd = pv_disp_op->s_disp_frm_buf.u4_y_wd + >> 1; + + pv_disp_op->s_disp_frm_buf.u4_y_ht = ps_op_frm->u4_y_ht; + pv_disp_op->s_disp_frm_buf.u4_u_ht = pv_disp_op->s_disp_frm_buf.u4_y_ht + >> 1; + pv_disp_op->s_disp_frm_buf.u4_v_ht = pv_disp_op->s_disp_frm_buf.u4_y_ht + >> 1; + if(0 == ps_dec->u4_share_disp_buf) + { + pv_disp_op->s_disp_frm_buf.u4_y_strd = + pv_disp_op->s_disp_frm_buf.u4_y_wd; + pv_disp_op->s_disp_frm_buf.u4_u_strd = + pv_disp_op->s_disp_frm_buf.u4_y_wd >> 1; + pv_disp_op->s_disp_frm_buf.u4_v_strd = + pv_disp_op->s_disp_frm_buf.u4_y_wd >> 1; + + } + else + { + pv_disp_op->s_disp_frm_buf.u4_y_strd = ps_op_frm->u4_y_strd; + } + + if(ps_dec->u4_app_disp_width) + { + pv_disp_op->s_disp_frm_buf.u4_y_strd = MAX( + ps_dec->u4_app_disp_width, + pv_disp_op->s_disp_frm_buf.u4_y_strd); + } + + pv_disp_op->u4_error_code = 0; + if(pv_disp_op->e_output_format == IV_YUV_420P) + { + UWORD32 i; + pv_disp_op->s_disp_frm_buf.u4_u_strd = + pv_disp_op->s_disp_frm_buf.u4_y_strd >> 1; + pv_disp_op->s_disp_frm_buf.u4_v_strd = + pv_disp_op->s_disp_frm_buf.u4_y_strd >> 1; + + pv_disp_op->s_disp_frm_buf.u4_u_wd = ps_op_frm->u4_y_wd >> 1; + pv_disp_op->s_disp_frm_buf.u4_v_wd = ps_op_frm->u4_y_wd >> 1; + + if(1 == ps_dec->u4_share_disp_buf) + { + pv_disp_op->s_disp_frm_buf.pv_y_buf = ps_op_frm->pv_y_buf; + + for(i = 0; i < MAX_DISP_BUFS_NEW; i++) + { + UWORD8 *buf = ps_dec->disp_bufs[i].buf[0]; + buf += ps_dec->disp_bufs[i].u4_ofst[0]; + if(((UWORD8 *)pv_disp_op->s_disp_frm_buf.pv_y_buf + - pic_buf->u2_crop_offset_y) == buf) + { + buf = ps_dec->disp_bufs[i].buf[1]; + buf += ps_dec->disp_bufs[i].u4_ofst[1]; + pv_disp_op->s_disp_frm_buf.pv_u_buf = buf + + pic_buf->u2_crop_offset_uv; + + buf = ps_dec->disp_bufs[i].buf[2]; + buf += ps_dec->disp_bufs[i].u4_ofst[2]; + pv_disp_op->s_disp_frm_buf.pv_v_buf = buf + + pic_buf->u2_crop_offset_uv; + } + } + } + + } + else if((pv_disp_op->e_output_format == IV_YUV_420SP_UV) + || (pv_disp_op->e_output_format == IV_YUV_420SP_VU)) + { + pv_disp_op->s_disp_frm_buf.u4_u_strd = + pv_disp_op->s_disp_frm_buf.u4_y_strd; + pv_disp_op->s_disp_frm_buf.u4_v_strd = 0; + + if(1 == ps_dec->u4_share_disp_buf) + { + UWORD32 i; + + pv_disp_op->s_disp_frm_buf.pv_y_buf = ps_op_frm->pv_y_buf; + + for(i = 0; i < MAX_DISP_BUFS_NEW; i++) + { + UWORD8 *buf = ps_dec->disp_bufs[i].buf[0]; + buf += ps_dec->disp_bufs[i].u4_ofst[0]; + if((UWORD8 *)pv_disp_op->s_disp_frm_buf.pv_y_buf + - pic_buf->u2_crop_offset_y == buf) + { + buf = ps_dec->disp_bufs[i].buf[1]; + buf += ps_dec->disp_bufs[i].u4_ofst[1]; + pv_disp_op->s_disp_frm_buf.pv_u_buf = buf + + pic_buf->u2_crop_offset_uv; + ; + + buf = ps_dec->disp_bufs[i].buf[2]; + buf += ps_dec->disp_bufs[i].u4_ofst[2]; + pv_disp_op->s_disp_frm_buf.pv_v_buf = buf + + pic_buf->u2_crop_offset_uv; + ; + } + } + } + pv_disp_op->s_disp_frm_buf.u4_u_wd = + pv_disp_op->s_disp_frm_buf.u4_y_wd; + pv_disp_op->s_disp_frm_buf.u4_v_wd = 0; + + } + else if((pv_disp_op->e_output_format == IV_RGB_565) + || (pv_disp_op->e_output_format == IV_YUV_422ILE)) + { + + pv_disp_op->s_disp_frm_buf.u4_u_strd = 0; + pv_disp_op->s_disp_frm_buf.u4_v_strd = 0; + pv_disp_op->s_disp_frm_buf.u4_u_wd = 0; + pv_disp_op->s_disp_frm_buf.u4_v_wd = 0; + pv_disp_op->s_disp_frm_buf.u4_u_ht = 0; + pv_disp_op->s_disp_frm_buf.u4_v_ht = 0; + + } + + + } + + return u4_api_ret; +} + + +/*****************************************************************************/ +/* Function Name : ih264d_release_display_field */ +/* */ +/* Description : This function releases the display field that was returned */ +/* here. */ +/* Inputs : ps_dec - Decoder parameters */ +/* Globals : None */ +/* Processing : Refer bumping process in the standard */ +/* Outputs : Assigns display sequence number. */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 27 04 2005 NS Draft */ +/* */ +/*****************************************************************************/ +void ih264d_release_display_field(dec_struct_t *ps_dec, + ivd_get_display_frame_op_t *pv_disp_op) +{ + if(1 == pv_disp_op->u4_error_code) + { + if(1 == ps_dec->u1_flushfrm) + { + UWORD32 i; + + if(1 == ps_dec->u4_share_disp_buf) + { + H264_MUTEX_LOCK(&ps_dec->process_disp_mutex); + for(i = 0; i < (MAX_DISP_BUFS_NEW); i++) + { + if(1 == ps_dec->u4_disp_buf_mapping[i]) + { + ih264_buf_mgr_release( + (buf_mgr_t *)ps_dec->pv_pic_buf_mgr, i, + BUF_MGR_IO); + ps_dec->u4_disp_buf_mapping[i] = 0; + } + } H264_MUTEX_UNLOCK(&ps_dec->process_disp_mutex); + + memset(ps_dec->u4_disp_buf_to_be_freed, 0, + (MAX_DISP_BUFS_NEW) * sizeof(UWORD32)); + for(i = 0; i < ps_dec->u1_pic_bufs; i++) + ps_dec->u4_disp_buf_mapping[i] = 1; + } + ps_dec->u1_flushfrm = 0; + + } + } + else + { + H264_MUTEX_LOCK(&ps_dec->process_disp_mutex); + + if(0 == ps_dec->u4_share_disp_buf) + { + ih264_buf_mgr_release((buf_mgr_t *)ps_dec->pv_pic_buf_mgr, + pv_disp_op->u4_disp_buf_id, + BUF_MGR_IO); + + } + else + { + ps_dec->u4_disp_buf_mapping[pv_disp_op->u4_disp_buf_id] = 1; + } H264_MUTEX_UNLOCK(&ps_dec->process_disp_mutex); + + } +} +/*****************************************************************************/ +/* Function Name : ih264d_assign_display_seq */ +/* */ +/* Description : This function implments bumping process. Every outgoing */ +/* frame from DPB is assigned a display sequence number */ +/* which increases monotonically. System looks for this */ +/* number to display a frame. */ +/* here. */ +/* Inputs : ps_dec - Decoder parameters */ +/* Globals : None */ +/* Processing : Refer bumping process in the standard */ +/* Outputs : Assigns display sequence number. */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 27 04 2005 NS Draft */ +/* */ +/*****************************************************************************/ +WORD32 ih264d_assign_display_seq(dec_struct_t *ps_dec) +{ + WORD32 i; + WORD32 i4_min_poc; + WORD32 i4_min_poc_buf_id; + WORD32 i4_min_index; + dpb_manager_t *ps_dpb_mgr = ps_dec->ps_dpb_mgr; + WORD32 (*i4_poc_buf_id_map)[3] = ps_dpb_mgr->ai4_poc_buf_id_map; + + i4_min_poc = 0x7fffffff; + i4_min_poc_buf_id = -1; + i4_min_index = -1; + + if(ps_dpb_mgr->i1_poc_buf_id_entries >= ps_dec->i4_display_delay) + { + for(i = 0; i < MAX_FRAMES; i++) + { + if((i4_poc_buf_id_map[i][0] != -1) + && (DO_NOT_DISP + != ps_dpb_mgr->ai4_poc_buf_id_map[i][0])) + { + if(i4_poc_buf_id_map[i][1] < i4_min_poc) + { + i4_min_poc = i4_poc_buf_id_map[i][1]; + i4_min_poc_buf_id = i4_poc_buf_id_map[i][0]; + i4_min_index = i; + } + } + } + + if((i4_min_index != -1) && (DO_NOT_DISP != i4_min_poc_buf_id)) + { + ps_dec->i4_cur_display_seq++; + ih264_disp_mgr_add( + (disp_mgr_t *)ps_dec->pv_disp_buf_mgr, + i4_min_poc_buf_id, ps_dec->i4_cur_display_seq, + ps_dec->apv_buf_id_pic_buf_map[i4_min_poc_buf_id]); + i4_poc_buf_id_map[i4_min_index][0] = -1; + i4_poc_buf_id_map[i4_min_index][1] = 0x7fffffff; + ps_dpb_mgr->i1_poc_buf_id_entries--; + } + else if(DO_NOT_DISP == i4_min_poc_buf_id) + { + WORD32 i4_error_code; + i4_error_code = ERROR_GAPS_IN_FRM_NUM; +// i4_error_code |= 1<<IVD_CORRUPTEDDATA; + return i4_error_code; + } + } + return OK; +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_release_display_bufs */ +/* */ +/* Description : This function implments bumping process when mmco = 5. */ +/* Each outgoing frame from DPB is assigned a display */ +/* sequence number which increases monotonically. System */ +/* looks for this number to display a frame. */ +/* Inputs : ps_dec - Decoder parameters */ +/* Globals : None */ +/* Processing : Refer bumping process in the standard for mmco = 5 */ +/* Outputs : Assigns display sequence number. */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 27 04 2005 NS Draft */ +/* */ +/*****************************************************************************/ +void ih264d_release_display_bufs(dec_struct_t *ps_dec) +{ + WORD32 i, j; + WORD32 i4_min_poc; + WORD32 i4_min_poc_buf_id; + WORD32 i4_min_index; + dpb_manager_t *ps_dpb_mgr = ps_dec->ps_dpb_mgr; + WORD32 (*i4_poc_buf_id_map)[3] = ps_dpb_mgr->ai4_poc_buf_id_map; + + i4_min_poc = 0x7fffffff; + i4_min_poc_buf_id = -1; + i4_min_index = -1; + + ih264d_delete_nonref_nondisplay_pics(ps_dpb_mgr); + + for(j = 0; j < ps_dpb_mgr->i1_poc_buf_id_entries; j++) + { + i4_min_poc = 0x7fffffff; + for(i = 0; i < MAX_FRAMES; i++) + { + if(i4_poc_buf_id_map[i][0] != -1) + { + if(i4_poc_buf_id_map[i][1] < i4_min_poc) + { + i4_min_poc = i4_poc_buf_id_map[i][1]; + i4_min_poc_buf_id = i4_poc_buf_id_map[i][0]; + i4_min_index = i; + } + } + } + + if(DO_NOT_DISP != i4_min_poc_buf_id) + { + ps_dec->i4_cur_display_seq++; + ih264_disp_mgr_add( + (disp_mgr_t *)ps_dec->pv_disp_buf_mgr, + i4_min_poc_buf_id, ps_dec->i4_cur_display_seq, + ps_dec->apv_buf_id_pic_buf_map[i4_min_poc_buf_id]); + i4_poc_buf_id_map[i4_min_index][0] = -1; + i4_poc_buf_id_map[i4_min_index][1] = 0x7fffffff; + ps_dpb_mgr->ai4_poc_buf_id_map[i4_min_index][2] = 0; + } + else + { + i4_poc_buf_id_map[i4_min_index][0] = -1; + i4_poc_buf_id_map[i4_min_index][1] = 0x7fffffff; + ps_dpb_mgr->ai4_poc_buf_id_map[i4_min_index][2] = 0; + } + } + ps_dpb_mgr->i1_poc_buf_id_entries = 0; + ps_dec->i4_prev_max_display_seq = ps_dec->i4_prev_max_display_seq + + ps_dec->i4_max_poc + ps_dec->u1_max_dec_frame_buffering + + 1; + ps_dec->i4_max_poc = 0; +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_assign_pic_num */ +/* */ +/* Description : This function assigns pic num to each reference frame */ +/* depending on the cur_frame_num as speified in section */ +/* 8.2.4.1 */ +/* */ +/* Inputs : ps_dec */ +/* */ +/* Globals : NO globals used */ +/* */ +/* Processing : for all ST pictures */ +/* if( FrameNum > cur_frame_num) */ +/* PicNum = FrameNum - MaxFrameNum */ +/* else */ +/* PicNum = FrameNum */ +/* */ +/* Returns : void */ +/* */ +/* Issues : NO */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 13 07 2002 Jay Draft */ +/* */ +/*****************************************************************************/ + +void ih264d_assign_pic_num(dec_struct_t *ps_dec) +{ + dpb_manager_t *ps_dpb_mgr; + struct dpb_info_t *ps_next_dpb; + WORD8 i; + WORD32 i4_cur_frame_num, i4_max_frame_num; + WORD32 i4_ref_frame_num; + UWORD8 u1_fld_pic_flag = ps_dec->ps_cur_slice->u1_field_pic_flag; + + i4_max_frame_num = ps_dec->ps_cur_sps->u2_u4_max_pic_num_minus1 + 1; + i4_cur_frame_num = ps_dec->ps_cur_pic->i4_frame_num; + ps_dpb_mgr = ps_dec->ps_dpb_mgr; + + /* Start from ST head */ + ps_next_dpb = ps_dpb_mgr->ps_dpb_st_head; + for(i = 0; i < ps_dpb_mgr->u1_num_st_ref_bufs; i++) + { + WORD32 i4_pic_num; + + i4_ref_frame_num = ps_next_dpb->ps_pic_buf->i4_frame_num; + if(i4_ref_frame_num > i4_cur_frame_num) + { + /* RefPic Buf frame_num is before Current frame_num in decode order */ + i4_pic_num = i4_ref_frame_num - i4_max_frame_num; + } + else + { + /* RefPic Buf frame_num is after Current frame_num in decode order */ + i4_pic_num = i4_ref_frame_num; + } + + ps_next_dpb->ps_pic_buf->i4_pic_num = i4_pic_num; + ps_next_dpb->i4_frame_num = i4_pic_num; + ps_next_dpb->ps_pic_buf->u1_long_term_frm_idx = MAX_REF_BUFS + 1; + if(u1_fld_pic_flag) + { + /* Assign the pic num to top fields and bot fields */ + + ps_next_dpb->s_top_field.i4_pic_num = i4_pic_num * 2 + + !(ps_dec->ps_cur_slice->u1_bottom_field_flag); + ps_next_dpb->s_bot_field.i4_pic_num = i4_pic_num * 2 + + ps_dec->ps_cur_slice->u1_bottom_field_flag; + } + /* Chase the next link */ + ps_next_dpb = ps_next_dpb->ps_prev_short; + } + + if(ps_dec->ps_cur_sps->u1_gaps_in_frame_num_value_allowed_flag + && ps_dpb_mgr->u1_num_gaps) + { + WORD32 i4_start_frm, i4_end_frm; + /* Assign pic numbers for gaps */ + for(i = 0; i < MAX_FRAMES; i++) + { + i4_start_frm = ps_dpb_mgr->ai4_gaps_start_frm_num[i]; + if(i4_start_frm != INVALID_FRAME_NUM) + { + if(i4_start_frm > i4_cur_frame_num) + { + /* gap's frame_num is before Current frame_num in + decode order */ + i4_start_frm -= i4_max_frame_num; + } + ps_dpb_mgr->ai4_gaps_start_frm_num[i] = i4_start_frm; + i4_end_frm = ps_dpb_mgr->ai4_gaps_end_frm_num[i]; + + if(i4_end_frm > i4_cur_frame_num) + { + /* gap's frame_num is before Current frame_num in + decode order */ + i4_end_frm -= i4_max_frame_num; + } + ps_dpb_mgr->ai4_gaps_end_frm_num[i] = i4_end_frm; + } + } + } +} + +/*! + ************************************************************************** + * \if Function name : ih264d_update_qp \endif + * + * \brief + * Updates the values of QP and its related entities + * + * \return + * 0 on Success and Error code otherwise + * + ************************************************************************** + */ +WORD32 ih264d_update_qp(dec_struct_t * ps_dec, const WORD8 i1_qp) +{ + WORD32 i_temp; + i_temp = (ps_dec->u1_qp + i1_qp + 52) % 52; + + if((i_temp < 0) || (i_temp > 51) || (i1_qp < -26) || (i1_qp > 25)) + return ERROR_INV_RANGE_QP_T; + + ps_dec->u1_qp = i_temp; + ps_dec->u1_qp_y_rem6 = ps_dec->u1_qp % 6; + ps_dec->u1_qp_y_div6 = ps_dec->u1_qp / 6; + i_temp = CLIP3(0, 51, ps_dec->u1_qp + ps_dec->ps_cur_pps->i1_chroma_qp_index_offset); + ps_dec->u1_qp_u_rem6 = MOD(gau1_ih264d_qp_scale_cr[12 + i_temp], 6); + ps_dec->u1_qp_u_div6 = DIV(gau1_ih264d_qp_scale_cr[12 + i_temp], 6); + + i_temp = CLIP3(0, 51, ps_dec->u1_qp + ps_dec->ps_cur_pps->i1_second_chroma_qp_index_offset); + ps_dec->u1_qp_v_rem6 = MOD(gau1_ih264d_qp_scale_cr[12 + i_temp], 6); + ps_dec->u1_qp_v_div6 = DIV(gau1_ih264d_qp_scale_cr[12 + i_temp], 6); + + ps_dec->pu2_quant_scale_y = + gau2_ih264_iquant_scale_4x4[ps_dec->u1_qp_y_rem6]; + ps_dec->pu2_quant_scale_u = + gau2_ih264_iquant_scale_4x4[ps_dec->u1_qp_u_rem6]; + ps_dec->pu2_quant_scale_v = + gau2_ih264_iquant_scale_4x4[ps_dec->u1_qp_v_rem6]; + return OK; +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_decode_gaps_in_frame_num */ +/* */ +/* Description : This function decodes gaps in frame number */ +/* */ +/* Inputs : ps_dec Decoder parameters */ +/* u2_frame_num current frame number */ +/* */ +/* Globals : None */ +/* Processing : This functionality needs to be implemented */ +/* Outputs : None */ +/* Returns : None */ +/* */ +/* Issues : Not implemented */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 06 05 2002 NS Draft */ +/* */ +/*****************************************************************************/ +WORD32 ih264d_decode_gaps_in_frame_num(dec_struct_t *ps_dec, + UWORD16 u2_frame_num) +{ + UWORD32 u4_next_frm_num, u4_start_frm_num; + UWORD32 u4_max_frm_num; + pocstruct_t s_tmp_poc; + WORD32 i4_poc; + dec_slice_params_t *ps_cur_slice; + + dec_pic_params_t *ps_pic_params; + WORD8 i1_gap_idx; + WORD32 *i4_gaps_start_frm_num; + dpb_manager_t *ps_dpb_mgr; + WORD32 i4_frame_gaps; + WORD8 *pi1_gaps_per_seq; + WORD32 ret; + + ps_cur_slice = ps_dec->ps_cur_slice; + if(ps_cur_slice->u1_field_pic_flag) + { + if(ps_dec->u2_prev_ref_frame_num == u2_frame_num) + return 0; + } + + u4_next_frm_num = ps_dec->u2_prev_ref_frame_num + 1; + u4_max_frm_num = ps_dec->ps_cur_sps->u2_u4_max_pic_num_minus1 + 1; + + // check + if(u4_next_frm_num >= u4_max_frm_num) + { + u4_next_frm_num -= u4_max_frm_num; + } + + if(u4_next_frm_num == u2_frame_num) + { + return (0); + } + + // check + if((ps_dec->u1_nal_unit_type == IDR_SLICE_NAL) + && (u4_next_frm_num >= u2_frame_num)) + { + return (0); + } + u4_start_frm_num = u4_next_frm_num; + + s_tmp_poc.i4_pic_order_cnt_lsb = 0; + s_tmp_poc.i4_delta_pic_order_cnt_bottom = 0; + s_tmp_poc.i4_pic_order_cnt_lsb = 0; + s_tmp_poc.i4_delta_pic_order_cnt_bottom = 0; + s_tmp_poc.i4_delta_pic_order_cnt[0] = 0; + s_tmp_poc.i4_delta_pic_order_cnt[1] = 0; + + ps_cur_slice = ps_dec->ps_cur_slice; + ps_pic_params = ps_dec->ps_cur_pps; + ps_cur_slice->u1_field_pic_flag = 0; + + i4_frame_gaps = 0; + ps_dpb_mgr = ps_dec->ps_dpb_mgr; + + /* Find a empty slot to store gap seqn info */ + i4_gaps_start_frm_num = ps_dpb_mgr->ai4_gaps_start_frm_num; + for(i1_gap_idx = 0; i1_gap_idx < MAX_FRAMES; i1_gap_idx++) + { + if(INVALID_FRAME_NUM == i4_gaps_start_frm_num[i1_gap_idx]) + break; + } + if(MAX_FRAMES == i1_gap_idx) + { + UWORD32 i4_error_code; + i4_error_code = ERROR_DBP_MANAGER_T; +// i4_error_code |= 1<<IVD_CORRUPTEDDATA; + return i4_error_code; + } + + i4_poc = 0; + i4_gaps_start_frm_num[i1_gap_idx] = u4_start_frm_num; + ps_dpb_mgr->ai4_gaps_end_frm_num[i1_gap_idx] = u2_frame_num - 1; + pi1_gaps_per_seq = ps_dpb_mgr->ai1_gaps_per_seq; + pi1_gaps_per_seq[i1_gap_idx] = 0; + while(u4_next_frm_num != u2_frame_num) + { + ih264d_delete_nonref_nondisplay_pics(ps_dpb_mgr); + if(ps_pic_params->ps_sps->u1_pic_order_cnt_type) + { + /* allocate a picture buffer and insert it as ST node */ + ret = ih264d_decode_pic_order_cnt(0, u4_next_frm_num, + &ps_dec->s_prev_pic_poc, + &s_tmp_poc, ps_cur_slice, + ps_pic_params, 1, 0, 0, + &i4_poc); + if(ret != OK) + return ret; + + /* Display seq no calculations */ + if(i4_poc >= ps_dec->i4_max_poc) + ps_dec->i4_max_poc = i4_poc; + /* IDR Picture or POC wrap around */ + if(i4_poc == 0) + { + ps_dec->i4_prev_max_display_seq = + ps_dec->i4_prev_max_display_seq + + ps_dec->i4_max_poc + + ps_dec->u1_max_dec_frame_buffering + + 1; + ps_dec->i4_max_poc = 0; + } + + ps_cur_slice->u1_mmco_equalto5 = 0; + ps_cur_slice->u2_frame_num = u4_next_frm_num; + } + + // check + if(ps_dpb_mgr->i1_poc_buf_id_entries + >= ps_dec->u1_max_dec_frame_buffering) + { + ret = ih264d_assign_display_seq(ps_dec); + if(ret != OK) + return ret; + } + + ret = ih264d_insert_pic_in_display_list( + ps_dec->ps_dpb_mgr, (WORD8) DO_NOT_DISP, + (WORD32)(ps_dec->i4_prev_max_display_seq + i4_poc), + u4_next_frm_num); + if(ret != OK) + return ret; + + pi1_gaps_per_seq[i1_gap_idx]++; + ret = ih264d_do_mmco_for_gaps(ps_dpb_mgr, + ps_dec->ps_cur_sps->u1_num_ref_frames); + if(ret != OK) + return ret; + + ih264d_delete_nonref_nondisplay_pics(ps_dpb_mgr); + + u4_next_frm_num++; + if(u4_next_frm_num >= u4_max_frm_num) + { + u4_next_frm_num -= u4_max_frm_num; + } + + i4_frame_gaps++; + } + + return OK; +} + +/*! + ************************************************************************** + * \if Function name : ih264d_create_pic_buffers \endif + * + * \brief + * This function creates Picture Buffers. + * + * \return + * 0 on Success and -1 on error + ************************************************************************** + */ +WORD32 ih264d_create_pic_buffers(UWORD8 u1_num_of_buf, + dec_struct_t *ps_dec) +{ + struct pic_buffer_t *ps_pic_buf; + UWORD8 i; + UWORD32 u4_luma_size, u4_chroma_size; + UWORD8 u1_frm = ps_dec->ps_cur_sps->u1_frame_mbs_only_flag; + WORD32 j; + UWORD32 u4_pic_buf_mem_used, u4_ref_buf_mem_used; + UWORD8 *pu1_pic_buf_mem_base, *pu1_ref_buf_mem_base; + + u4_pic_buf_mem_used = 0; + pu1_pic_buf_mem_base = ps_dec->ps_mem_tab[MEM_REC_PIC_BUF_MGR].pv_base; + + ps_dec->pv_disp_buf_mgr = (void *)(pu1_pic_buf_mem_base + + u4_pic_buf_mem_used); + u4_pic_buf_mem_used += sizeof(disp_mgr_t); + ih264_disp_mgr_init((disp_mgr_t *)ps_dec->pv_disp_buf_mgr); + + ps_dec->pv_pic_buf_mgr = + (void *)(pu1_pic_buf_mem_base + u4_pic_buf_mem_used); + u4_pic_buf_mem_used += sizeof(buf_mgr_t) + ithread_get_mutex_lock_size(); + ih264_buf_mgr_init((buf_mgr_t *)ps_dec->pv_pic_buf_mgr); + + ps_pic_buf = (pic_buffer_t *)(pu1_pic_buf_mem_base + u4_pic_buf_mem_used); + u4_pic_buf_mem_used += sizeof(struct pic_buffer_t) + * (H264_MAX_REF_PICS * 2); + + u4_luma_size = ps_dec->u2_frm_wd_y * ps_dec->u2_frm_ht_y; + u4_chroma_size = ps_dec->u2_frm_wd_uv * ps_dec->u2_frm_ht_uv; + + { + if(ps_dec->u4_share_disp_buf == 1) + { + /* In case of buffers getting shared between application and library + there is no need of reference memtabs. Instead of setting the i4_size + to zero, it is reduced to a small i4_size to ensure that changes + in the code are minimal */ + if((ps_dec->u1_chroma_format == IV_YUV_420SP_UV) + || (ps_dec->u1_chroma_format == IV_YUV_420SP_VU) + || (ps_dec->u1_chroma_format == IV_YUV_420P)) + { + u4_luma_size = 64; + } + + if(ps_dec->u1_chroma_format == IV_YUV_420SP_UV) + + { + u4_chroma_size = 64; + } + + } + } + + pu1_ref_buf_mem_base = ps_dec->ps_mem_tab[MEM_REC_REF_PIC].pv_base; + u4_ref_buf_mem_used = 0; + + /* Allocate memory for refernce buffers */ + for(i = 0; i < u1_num_of_buf; i++) + { + UWORD32 u4_offset; + WORD32 buf_ret; + UWORD8 *pu1_luma, *pu1_chroma; + + pu1_luma = pu1_ref_buf_mem_base + u4_ref_buf_mem_used; + u4_ref_buf_mem_used += u4_luma_size; + pu1_chroma = pu1_ref_buf_mem_base + u4_ref_buf_mem_used; + u4_ref_buf_mem_used += u4_chroma_size; + + /* Offset to the start of the pic from the top left corner of the frame + buffer */ + + if((0 == ps_dec->u4_share_disp_buf) + || (NULL == ps_dec->disp_bufs[i].buf[0])) + { + UWORD32 pad_len_h, pad_len_v; + + u4_offset = ps_dec->u2_frm_wd_y * (PAD_LEN_Y_V << 1) + PAD_LEN_Y_H; + ps_pic_buf->pu1_buf1 = (UWORD8 *)(pu1_luma) + u4_offset; + + pad_len_h = MAX(PAD_LEN_UV_H, (PAD_LEN_Y_H >> 1)); + pad_len_v = MAX(PAD_LEN_UV_V, PAD_LEN_Y_V); + + u4_offset = ps_dec->u2_frm_wd_uv * pad_len_v + pad_len_h; + + ps_pic_buf->pu1_buf2 = (UWORD8 *)(pu1_chroma) + u4_offset; + ps_pic_buf->pu1_buf3 = (UWORD8 *)(NULL) + u4_offset; + + } + else + { + UWORD32 pad_len_h, pad_len_v; + u4_offset = ps_dec->u2_frm_wd_y * (PAD_LEN_Y_V << 1) + PAD_LEN_Y_H; + ps_pic_buf->pu1_buf1 = (UWORD8 *)ps_dec->disp_bufs[i].buf[0] + + u4_offset; + + ps_dec->disp_bufs[i].u4_ofst[0] = u4_offset; + + if(ps_dec->u1_chroma_format == IV_YUV_420P) + { + pad_len_h = MAX(PAD_LEN_UV_H * YUV420SP_FACTOR, + (PAD_LEN_Y_H >> 1)); + pad_len_v = MAX(PAD_LEN_UV_V, PAD_LEN_Y_V); + + u4_offset = ps_dec->u2_frm_wd_uv * pad_len_v + pad_len_h; + ps_pic_buf->pu1_buf2 = (UWORD8 *)(pu1_chroma) + u4_offset; + ps_pic_buf->pu1_buf3 = (UWORD8 *)(NULL) + u4_offset; + + ps_dec->disp_bufs[i].u4_ofst[1] = u4_offset; + ps_dec->disp_bufs[i].u4_ofst[2] = u4_offset; + + } + else + { + pad_len_h = MAX(PAD_LEN_UV_H * YUV420SP_FACTOR, + (PAD_LEN_Y_H >> 1)); + pad_len_v = MAX(PAD_LEN_UV_V, PAD_LEN_Y_V); + + u4_offset = ps_dec->u2_frm_wd_uv * pad_len_v + pad_len_h; + ps_pic_buf->pu1_buf2 = (UWORD8 *)(ps_dec->disp_bufs[i].buf[1]) + + u4_offset; + ps_pic_buf->pu1_buf3 = (UWORD8 *)(ps_dec->disp_bufs[i].buf[1]) + + u4_offset; + + ps_dec->disp_bufs[i].u4_ofst[1] = u4_offset; + ps_dec->disp_bufs[i].u4_ofst[2] = u4_offset; + + } + + } + + ps_pic_buf->u2_frm_ht_y = ps_dec->u2_frm_ht_y; + ps_pic_buf->u2_frm_ht_uv = ps_dec->u2_frm_ht_uv; + ps_pic_buf->u2_frm_wd_y = ps_dec->u2_frm_wd_y; + ps_pic_buf->u2_frm_wd_uv = ps_dec->u2_frm_wd_uv; + + ps_pic_buf->u1_pic_buf_id = i; + + buf_ret = ih264_buf_mgr_add((buf_mgr_t *)ps_dec->pv_pic_buf_mgr, + ps_pic_buf, i); + if(0 != buf_ret) + { + ps_dec->i4_error_code = ERROR_BUF_MGR; + return ERROR_BUF_MGR; + } + + ps_dec->apv_buf_id_pic_buf_map[i] = (void *)ps_pic_buf; + ps_pic_buf++; + } + + if((u4_ref_buf_mem_used > ps_dec->ps_mem_tab[MEM_REC_REF_PIC].u4_mem_size) || + (u4_pic_buf_mem_used > ps_dec->ps_mem_tab[MEM_REC_PIC_BUF_MGR].u4_mem_size)) + { + ps_dec->i4_error_code = ERROR_BUF_MGR; + return ERROR_BUF_MGR; + } + + if(1 == ps_dec->u4_share_disp_buf) + { + for(i = 0; i < u1_num_of_buf; i++) + ps_dec->u4_disp_buf_mapping[i] = 1; + } + return OK; +} + +/*! + ************************************************************************** + * \if Function name : ih264d_get_memory_dec_params \endif + * + * \brief + * This function allocates memory required by Decoder. + * + * \param ps_dec: Pointer to dec_struct_t. + * + * \return + * Returns i4_status as returned by MemManager. + * + ************************************************************************** + */ +//WORD16 i16_res_coeff[2 * 3600 * (MB_LUM_SIZE + 2 * MB_CHROM_SIZE)]; +//pred_info_t s_pred_frame[4000 * 60]; +//pred_info_t *ps_pred_frame; + +WORD16 ih264d_get_memory_dec_params(dec_struct_t * ps_dec) +{ + struct MemReq s_MemReq; + struct MemBlock *p_MemBlock; + + pred_info_t *ps_pred_frame; + dec_mb_info_t *ps_frm_mb_info; + dec_slice_struct_t *ps_dec_slice_buf; + UWORD8 *pu1_dec_mb_map, *pu1_recon_mb_map; + UWORD16 *pu2_slice_num_map; + + WORD16 *pi16_res_coeff; + WORD16 i16_status = 0; + UWORD8 uc_frmOrFld = (1 - ps_dec->ps_cur_sps->u1_frame_mbs_only_flag); + UWORD16 u4_luma_wd = ps_dec->u2_frm_wd_y; + UWORD16 u4_chroma_wd = ps_dec->u2_frm_wd_uv; + WORD8 c_i = 0; + dec_seq_params_t *ps_sps = ps_dec->ps_cur_sps; + UWORD32 u4_total_mbs = ps_sps->u2_total_num_of_mbs << uc_frmOrFld; + UWORD32 u4_wd_mbs = ps_dec->u2_frm_wd_in_mbs; + UWORD32 u4_ht_mbs = ps_dec->u2_frm_ht_in_mbs; + UWORD32 u4_blk_wd; + UWORD32 ui_size = 0; + UWORD32 u4_int_scratch_size = 0, u4_ref_pred_size = 0; + UWORD8 *pu1_buf; + + ps_dec->ps_deblk_pic = ps_dec->ps_mem_tab[MEM_REC_DEBLK_MB_INFO].pv_base; + + ps_dec->pu1_dec_mb_map = ps_dec->ps_mem_tab[MEM_REC_PARSE_MAP].pv_base; + + ps_dec->pu1_recon_mb_map = ps_dec->ps_mem_tab[MEM_REC_PROC_MAP].pv_base; + + ps_dec->pu2_slice_num_map = + ps_dec->ps_mem_tab[MEM_REC_SLICE_NUM_MAP].pv_base; + + ps_dec->ps_dec_slice_buf = ps_dec->ps_mem_tab[MEM_REC_SLICE_HDR].pv_base; + pu1_buf = (UWORD8 *)ps_dec->ps_dec_slice_buf; + pu1_buf += sizeof(dec_slice_struct_t) * u4_total_mbs; + ps_dec->pv_map_ref_idx_to_poc_buf = (void *)pu1_buf; + + ps_dec->ps_frm_mb_info = ps_dec->ps_mem_tab[MEM_REC_MB_INFO].pv_base; + memset(ps_dec->ps_frm_mb_info, 0, ps_dec->ps_mem_tab[MEM_REC_MB_INFO].u4_mem_size); + + ps_dec->ps_pred = ps_dec->ps_mem_tab[MEM_REC_PRED_INFO].pv_base; + + ps_dec->pi2_coeff_data = ps_dec->ps_mem_tab[MEM_REC_COEFF_DATA].pv_base; + + ps_dec->pv_pic_tu_coeff_data = (void *)(ps_dec->pi2_coeff_data + MB_LUM_SIZE); + + /*scratch memory allocations*/ + { + UWORD8 *pu1_scratch_mem_base; + UWORD32 u4_scratch_mem_used; + + pu1_scratch_mem_base = + ps_dec->ps_mem_tab[MEM_REC_INTERNAL_SCRATCH].pv_base; + u4_scratch_mem_used = 0; + + ps_dec->ppv_map_ref_idx_to_poc = (void *)(pu1_scratch_mem_base + + u4_scratch_mem_used); + u4_scratch_mem_used = ALIGN64(u4_scratch_mem_used); + u4_scratch_mem_used += ((TOTAL_LIST_ENTRIES + PAD_MAP_IDX_POC) + * sizeof(void *)); + u4_scratch_mem_used = ALIGN64(u4_scratch_mem_used); + memset(ps_dec->ppv_map_ref_idx_to_poc, 0, (TOTAL_LIST_ENTRIES + PAD_MAP_IDX_POC) + * sizeof(void *)); + + ps_dec->p_cabac_ctxt_table_t = (void *)(pu1_scratch_mem_base + + u4_scratch_mem_used); + u4_scratch_mem_used += (sizeof(bin_ctxt_model_t) * NUM_CABAC_CTXTS); + u4_scratch_mem_used = ALIGN64(u4_scratch_mem_used); + + ps_dec->ps_left_mb_ctxt_info = (void *)(pu1_scratch_mem_base + + u4_scratch_mem_used); + u4_scratch_mem_used += sizeof(ctxt_inc_mb_info_t); + u4_scratch_mem_used = ALIGN64(u4_scratch_mem_used); + + ps_dec->pu4_defI_wts_ofsts = (void *)(pu1_scratch_mem_base + + u4_scratch_mem_used); + u4_scratch_mem_used += + sizeof(UWORD32) + * (ps_sps->u1_num_ref_frames + * ps_sps->u1_num_ref_frames); + u4_scratch_mem_used = ALIGN64(u4_scratch_mem_used); + + ps_dec->pu1_ref_buff = (void *)(pu1_scratch_mem_base + + u4_scratch_mem_used); + u4_scratch_mem_used += MAX_REF_BUF_SIZE; + u4_scratch_mem_used = ALIGN64(u4_scratch_mem_used); + ps_dec->pi2_pred1 = + (void *)(pu1_scratch_mem_base + u4_scratch_mem_used); + u4_scratch_mem_used += ((sizeof(WORD16)) * PRED_BUFFER_WIDTH + * PRED_BUFFER_HEIGHT); + u4_scratch_mem_used = ALIGN64(u4_scratch_mem_used); + + ps_dec->pu1_temp_mc_buffer = (void *)(pu1_scratch_mem_base + + u4_scratch_mem_used); + u4_scratch_mem_used += sizeof(UWORD8) * (MB_LUM_SIZE); + + ps_dec->ps_parse_mb_data = (void *)(pu1_scratch_mem_base + + u4_scratch_mem_used); + u4_scratch_mem_used += sizeof(parse_pmbarams_t) + * (ps_dec->u1_recon_mb_grp); + u4_scratch_mem_used = ALIGN64(u4_scratch_mem_used); + + ps_dec->ps_parse_part_params = (void *)(pu1_scratch_mem_base + + u4_scratch_mem_used); + u4_scratch_mem_used += sizeof(parse_part_params_t) + * ((ps_dec->u1_recon_mb_grp) << 4); + u4_scratch_mem_used = ALIGN64(u4_scratch_mem_used); + + ps_dec->ps_dpb_mgr->ps_init_dpb[0][0] = + (struct pic_buffer_t*)(pu1_scratch_mem_base + + u4_scratch_mem_used); + u4_scratch_mem_used += 2 * MAX_REF_BUFS * sizeof(struct pic_buffer_t); + + u4_scratch_mem_used = ALIGN64(u4_scratch_mem_used); + ps_dec->ps_dpb_mgr->ps_init_dpb[1][0] = + (struct pic_buffer_t*)(pu1_scratch_mem_base + u4_scratch_mem_used); + u4_scratch_mem_used += 2 * MAX_REF_BUFS * sizeof(struct pic_buffer_t); + u4_scratch_mem_used = ALIGN64(u4_scratch_mem_used); + ps_dec->pu4_mbaff_wt_mat = (UWORD32 *)(pu1_scratch_mem_base + u4_scratch_mem_used); + + u4_scratch_mem_used += (sizeof(UWORD32) * 3 + * (MAX_FRAMES * MAX_FRAMES)) + << 3; + u4_scratch_mem_used = ALIGN64(u4_scratch_mem_used); + + ps_dec->pu4_wts_ofsts_mat = (UWORD32 *)(pu1_scratch_mem_base + u4_scratch_mem_used); + u4_scratch_mem_used += sizeof(UWORD32) * 2 * 3 + * (MAX_FRAMES * MAX_FRAMES); + u4_scratch_mem_used = ALIGN64(u4_scratch_mem_used); + } + /********************************************************************/ + /* check whether deblk memory used is less than the scratch buffer */ + /* and assign deblocking pointers in the the reference buffers */ + /********************************************************************/ + { + /************************************************************/ + /* Post allocation Initialisations */ + /************************************************************/ + memset(ps_dec->ppv_map_ref_idx_to_poc, 0, + (TOTAL_LIST_ENTRIES + PAD_MAP_IDX_POC) * sizeof(void *)); + ps_dec->ppv_map_ref_idx_to_poc += OFFSET_MAP_IDX_POC; + + { + UWORD32 u4_ref_size; + u4_ref_size = MAX_REF_BUF_SIZE; + + { + + ps_dec->ps_parse_cur_slice = &(ps_dec->ps_dec_slice_buf[0]); + ps_dec->ps_decode_cur_slice = &(ps_dec->ps_dec_slice_buf[0]); + ps_dec->ps_computebs_cur_slice = &(ps_dec->ps_dec_slice_buf[0]); + ps_dec->ps_parse_cur_slice->slice_header_done = 0; + + ps_dec->ps_pred_start = ps_dec->ps_pred; + ps_dec->u4_ref_buf_size = u4_ref_size; + } + } + + { + UWORD8 i; + struct pic_buffer_t *ps_init_dpb; + ps_init_dpb = ps_dec->ps_dpb_mgr->ps_init_dpb[0][0]; + for(i = 0; i < 2 * MAX_REF_BUFS; i++) + { + ps_init_dpb->pu1_buf1 = NULL; + ps_init_dpb->u1_long_term_frm_idx = MAX_REF_BUFS + 1; + ps_dec->ps_dpb_mgr->ps_init_dpb[0][i] = ps_init_dpb; + ps_dec->ps_dpb_mgr->ps_mod_dpb[0][i] = ps_init_dpb; + ps_init_dpb++; + } + + ps_init_dpb = ps_dec->ps_dpb_mgr->ps_init_dpb[1][0]; + for(i = 0; i < 2 * MAX_REF_BUFS; i++) + { + ps_init_dpb->pu1_buf1 = NULL; + ps_init_dpb->u1_long_term_frm_idx = MAX_REF_BUFS + 1; + ps_dec->ps_dpb_mgr->ps_init_dpb[1][i] = ps_init_dpb; + ps_dec->ps_dpb_mgr->ps_mod_dpb[1][i] = ps_init_dpb; + ps_init_dpb++; + } + } + } + + /*persistent memory allocations*/ + + { + UWORD8 *pu1_persitent_mem_base; + UWORD32 u4_persistent_mem_used; + + pu1_persitent_mem_base = + ps_dec->ps_mem_tab[MEM_REC_INTERNAL_PERSIST].pv_base; + u4_persistent_mem_used = 0; + + ps_dec->ps_deblk_top_mb = (void *)(pu1_persitent_mem_base + + u4_persistent_mem_used); + u4_persistent_mem_used += ((u4_wd_mbs + * sizeof(deblkmb_neighbour_t)) << uc_frmOrFld); + u4_persistent_mem_used = ALIGN64(u4_persistent_mem_used); + + ps_dec->ps_left_mvpred_addr = (void *)(pu1_persitent_mem_base + + u4_persistent_mem_used); + u4_persistent_mem_used += (sizeof(neighbouradd_t) << 2); + u4_persistent_mem_used = ALIGN64(u4_persistent_mem_used); + + ps_dec->p_ctxt_inc_mb_map = (void *)(pu1_persitent_mem_base + + u4_persistent_mem_used); + u4_persistent_mem_used += ((sizeof(ctxt_inc_mb_info_t)) + * (((u4_wd_mbs + 1) << uc_frmOrFld) + 1)); + u4_persistent_mem_used = ALIGN64(u4_persistent_mem_used); + + ps_dec->ps_mv_p[0] = (void *)(pu1_persitent_mem_base + + u4_persistent_mem_used); + u4_persistent_mem_used += (sizeof(mv_pred_t) * ps_dec->u1_recon_mb_grp + * 16); + u4_persistent_mem_used = ALIGN64(u4_persistent_mem_used); + + ps_dec->ps_mv_p[1] = (void *)(pu1_persitent_mem_base + + u4_persistent_mem_used); + u4_persistent_mem_used += (sizeof(mv_pred_t) * ps_dec->u1_recon_mb_grp + * 16); + u4_persistent_mem_used = ALIGN64(u4_persistent_mem_used); + + { + UWORD8 i; + for(i = 0; i < MV_SCRATCH_BUFS; i++) + { + + ps_dec->ps_mv_top_p[i] = (void *)(pu1_persitent_mem_base + + u4_persistent_mem_used); + u4_persistent_mem_used += (sizeof(mv_pred_t) + * ps_dec->u1_recon_mb_grp * 4); + u4_persistent_mem_used = ALIGN64(u4_persistent_mem_used); + + } + } + + { + UWORD32 u4_numRows = MB_SIZE << 1; + + /* Allocate memory for ping, pong and left reconstruction buffers */ + u4_blk_wd = ((ps_dec->u1_recon_mb_grp << 4) >> 1) + 8; + + ps_dec->pu1_y_scratch[0] = (void *)(pu1_persitent_mem_base + + u4_persistent_mem_used); + u4_persistent_mem_used += sizeof(UWORD8) * u4_numRows * u4_blk_wd; + u4_persistent_mem_used = ALIGN64(u4_persistent_mem_used); + + ps_dec->pu1_y_scratch[1] = (void *)(pu1_persitent_mem_base + + u4_persistent_mem_used); + u4_persistent_mem_used += sizeof(UWORD8) * u4_numRows * u4_blk_wd; + u4_persistent_mem_used = ALIGN64(u4_persistent_mem_used); + + u4_numRows = BLK8x8SIZE << 1; + u4_blk_wd = ((ps_dec->u1_recon_mb_grp << 3) >> 1) + 8; + + ps_dec->pu1_u_scratch[0] = (void *)(pu1_persitent_mem_base + + u4_persistent_mem_used); + u4_persistent_mem_used += sizeof(UWORD8) * u4_numRows * u4_blk_wd; + u4_persistent_mem_used = ALIGN64(u4_persistent_mem_used); + + ps_dec->pu1_v_scratch[0] = (void *)(pu1_persitent_mem_base + + u4_persistent_mem_used); + u4_persistent_mem_used += sizeof(UWORD8) * u4_numRows * u4_blk_wd; + u4_persistent_mem_used = ALIGN64(u4_persistent_mem_used); + + ps_dec->pu1_u_scratch[1] = (void *)(pu1_persitent_mem_base + + u4_persistent_mem_used); + u4_persistent_mem_used += sizeof(UWORD8) * u4_numRows * u4_blk_wd; + u4_persistent_mem_used = ALIGN64(u4_persistent_mem_used); + + ps_dec->pu1_v_scratch[1] = (void *)(pu1_persitent_mem_base + + u4_persistent_mem_used); + u4_persistent_mem_used += sizeof(UWORD8) * u4_numRows * u4_blk_wd; + u4_persistent_mem_used += 32; + u4_persistent_mem_used = ALIGN64(u4_persistent_mem_used); + + } + + ps_dec->pu1_y_intra_pred_line = (void *)(pu1_persitent_mem_base + + u4_persistent_mem_used); + u4_persistent_mem_used += sizeof(UWORD8) * (u4_luma_wd + 16) * 2; + u4_persistent_mem_used = ALIGN64(u4_persistent_mem_used); + + ps_dec->pu1_u_intra_pred_line = (void *)(pu1_persitent_mem_base + + u4_persistent_mem_used); + u4_persistent_mem_used += sizeof(UWORD8) * (u4_chroma_wd + 16) * 2 + * YUV420SP_FACTOR; + u4_persistent_mem_used = ALIGN64(u4_persistent_mem_used); + + ps_dec->pu1_v_intra_pred_line = (void *)(pu1_persitent_mem_base + + u4_persistent_mem_used); + u4_persistent_mem_used += sizeof(UWORD8) * (u4_chroma_wd + 16) * 2; + u4_persistent_mem_used = ALIGN64(u4_persistent_mem_used); + + ps_dec->ps_nbr_mb_row = (void *)(pu1_persitent_mem_base + + u4_persistent_mem_used); + if(ps_dec->u1_separate_parse) + { + u4_persistent_mem_used += sizeof(mb_neigbour_params_t) + * ((u4_wd_mbs + 1) * u4_ht_mbs); + memset(ps_dec->ps_nbr_mb_row, 0, sizeof(mb_neigbour_params_t) + * ((u4_wd_mbs + 1) * u4_ht_mbs)); + } + else + { + u4_persistent_mem_used += sizeof(mb_neigbour_params_t) + * ((u4_wd_mbs + 1) << uc_frmOrFld); + memset(ps_dec->ps_nbr_mb_row, 0, sizeof(mb_neigbour_params_t) + * ((u4_wd_mbs + 1) << uc_frmOrFld)); + + } + u4_persistent_mem_used = ALIGN64(u4_persistent_mem_used); + ps_dec->s_pad_mgr.pu1_row_y = (void *)(pu1_persitent_mem_base + + u4_persistent_mem_used); + u4_persistent_mem_used += ps_dec->u2_frm_wd_y; + u4_persistent_mem_used = ALIGN64(u4_persistent_mem_used); + + ps_dec->s_pad_mgr.pu1_row_u = (void *)(pu1_persitent_mem_base + + u4_persistent_mem_used); + u4_persistent_mem_used += ps_dec->u2_frm_wd_uv; + u4_persistent_mem_used = ALIGN64(u4_persistent_mem_used); + + ps_dec->s_pad_mgr.pu1_row_v = (void *)(pu1_persitent_mem_base + + u4_persistent_mem_used); + u4_persistent_mem_used += ps_dec->u2_frm_wd_uv; + u4_persistent_mem_used = ALIGN64(u4_persistent_mem_used); + + ps_dec->s_pad_mgr.pu1_mb_y = (void *)(pu1_persitent_mem_base + + u4_persistent_mem_used); + u4_persistent_mem_used += ((MB_SIZE + 4) << uc_frmOrFld) * PAD_LEN_Y_H; + u4_persistent_mem_used = ALIGN64(u4_persistent_mem_used); + + ps_dec->s_pad_mgr.pu1_mb_u = (void *)(pu1_persitent_mem_base + + u4_persistent_mem_used); + u4_persistent_mem_used += ((BLK8x8SIZE + 2) << uc_frmOrFld) + * PAD_LEN_UV_H; + u4_persistent_mem_used = ALIGN64(u4_persistent_mem_used); + + ps_dec->s_pad_mgr.pu1_mb_v = (void *)(pu1_persitent_mem_base + + u4_persistent_mem_used); + u4_persistent_mem_used += ((BLK8x8SIZE + 2) << uc_frmOrFld) + * PAD_LEN_UV_H; + u4_persistent_mem_used = ALIGN64(u4_persistent_mem_used); + } + + /*Post allocation initializations*/ + memset(ps_dec->pu1_y_intra_pred_line, 0, + sizeof(UWORD8) * u4_luma_wd + PAD_LEN_Y_H); + memset(ps_dec->pu1_u_intra_pred_line, 0, + sizeof(UWORD8) * u4_chroma_wd + PAD_LEN_UV_H); + memset(ps_dec->pu1_v_intra_pred_line, 0, + sizeof(UWORD8) * u4_chroma_wd + PAD_LEN_UV_H); + + /* 0th entry of CtxtIncMbMap will be always be containing default values + for CABAC context representing MB not available */ + ps_dec->p_ctxt_inc_mb_map += 1; + /* Post allocation Increment Actions */ + + /***************************************************************************/ + /*Initialize cabac context pointers for every SE that has fixed contextIdx */ + /***************************************************************************/ + { + bin_ctxt_model_t * const p_cabac_ctxt_table_t = + ps_dec->p_cabac_ctxt_table_t; + bin_ctxt_model_t * * p_coeff_abs_level_minus1_t = + ps_dec->p_coeff_abs_level_minus1_t; + bin_ctxt_model_t * * p_cbf_t = ps_dec->p_cbf_t; + + ps_dec->p_mb_field_dec_flag_t = p_cabac_ctxt_table_t + + MB_FIELD_DECODING_FLAG; + ps_dec->p_prev_intra4x4_pred_mode_flag_t = p_cabac_ctxt_table_t + + PREV_INTRA4X4_PRED_MODE_FLAG; + ps_dec->p_rem_intra4x4_pred_mode_t = p_cabac_ctxt_table_t + + REM_INTRA4X4_PRED_MODE; + ps_dec->p_intra_chroma_pred_mode_t = p_cabac_ctxt_table_t + + INTRA_CHROMA_PRED_MODE; + ps_dec->p_mb_qp_delta_t = p_cabac_ctxt_table_t + MB_QP_DELTA; + ps_dec->p_ref_idx_t = p_cabac_ctxt_table_t + REF_IDX; + ps_dec->p_mvd_x_t = p_cabac_ctxt_table_t + MVD_X; + ps_dec->p_mvd_y_t = p_cabac_ctxt_table_t + MVD_Y; + p_cbf_t[0] = p_cabac_ctxt_table_t + CBF + 0; + p_cbf_t[1] = p_cabac_ctxt_table_t + CBF + 4; + p_cbf_t[2] = p_cabac_ctxt_table_t + CBF + 8; + p_cbf_t[3] = p_cabac_ctxt_table_t + CBF + 12; + p_cbf_t[4] = p_cabac_ctxt_table_t + CBF + 16; + ps_dec->p_cbp_luma_t = p_cabac_ctxt_table_t + CBP_LUMA; + ps_dec->p_cbp_chroma_t = p_cabac_ctxt_table_t + CBP_CHROMA; + + p_coeff_abs_level_minus1_t[LUMA_DC_CTXCAT] = p_cabac_ctxt_table_t + + COEFF_ABS_LEVEL_MINUS1 + COEFF_ABS_LEVEL_CAT_0_OFFSET; + + p_coeff_abs_level_minus1_t[LUMA_AC_CTXCAT] = p_cabac_ctxt_table_t + + COEFF_ABS_LEVEL_MINUS1 + COEFF_ABS_LEVEL_CAT_1_OFFSET; + + p_coeff_abs_level_minus1_t[LUMA_4X4_CTXCAT] = p_cabac_ctxt_table_t + + COEFF_ABS_LEVEL_MINUS1 + COEFF_ABS_LEVEL_CAT_2_OFFSET; + + p_coeff_abs_level_minus1_t[CHROMA_DC_CTXCAT] = p_cabac_ctxt_table_t + + COEFF_ABS_LEVEL_MINUS1 + COEFF_ABS_LEVEL_CAT_3_OFFSET; + + p_coeff_abs_level_minus1_t[CHROMA_AC_CTXCAT] = p_cabac_ctxt_table_t + + COEFF_ABS_LEVEL_MINUS1 + COEFF_ABS_LEVEL_CAT_4_OFFSET; + + p_coeff_abs_level_minus1_t[LUMA_8X8_CTXCAT] = p_cabac_ctxt_table_t + + COEFF_ABS_LEVEL_MINUS1_8X8 + + COEFF_ABS_LEVEL_CAT_5_OFFSET; + + /********************************************************/ + /* context for the high profile related syntax elements */ + /* This is maintained seperately in s_high_profile */ + /********************************************************/ + { + + ps_dec->s_high_profile.ps_transform8x8_flag = p_cabac_ctxt_table_t + + TRANSFORM_SIZE_8X8_FLAG; + + ps_dec->s_high_profile.ps_sigcoeff_8x8_frame = p_cabac_ctxt_table_t + + SIGNIFICANT_COEFF_FLAG_8X8_FRAME; + + ps_dec->s_high_profile.ps_last_sigcoeff_8x8_frame = + p_cabac_ctxt_table_t + + LAST_SIGNIFICANT_COEFF_FLAG_8X8_FRAME; + + ps_dec->s_high_profile.ps_coeff_abs_levelminus1 = + p_cabac_ctxt_table_t + COEFF_ABS_LEVEL_MINUS1_8X8; + + ps_dec->s_high_profile.ps_sigcoeff_8x8_field = p_cabac_ctxt_table_t + + SIGNIFICANT_COEFF_FLAG_8X8_FIELD; + + ps_dec->s_high_profile.ps_last_sigcoeff_8x8_field = + p_cabac_ctxt_table_t + + LAST_SIGNIFICANT_COEFF_FLAG_8X8_FIELD; + + } + + } + return (i16_status); +} + +/*! + ************************************************************************** + * \if Function name : ih264d_create_mv_bank \endif + * + * \brief + * This function creates MV bank. + * + * \param memType : Type of memory being handled + * 0: Display Buffer + * 1: Decoder Buffer + * 2: Internal Buffer + * \param u1_num_of_buf: Number of decode or display buffers. + * \param u4_wd : Frame width. + * \param u4_ht : Frame Height. + * \param ps_pic_buf_api : Pointer to Picture Buffer API. + * \param ih264d_dec_mem_manager : Memory manager utility supplied by system. + * + * \return + * 0 on Success and -1 on error + * + ************************************************************************** + */ +WORD32 ih264d_create_mv_bank(void *pv_dec, + UWORD32 ui_width, + UWORD32 ui_height) +{ + UWORD8 i; + UWORD32 col_flag_buffer_size, mvpred_buffer_size; + UWORD8 *pu1_mv_buf_mgr_base, *pu1_mv_bank_base; + UWORD32 u4_mv_buf_mgr_mem_used, u4_mv_bank_mem_used; + col_mv_buf_t *ps_col_mv; + mv_pred_t *ps_mv; + UWORD8 *pu1_col_zero_flag_buf; + dec_struct_t *ps_dec = (dec_struct_t *)pv_dec; + WORD32 buf_ret; + + pu1_mv_buf_mgr_base = ps_dec->ps_mem_tab[MEM_REC_MV_BUF_MGR].pv_base; + u4_mv_buf_mgr_mem_used = 0; + col_flag_buffer_size = ((ui_width * ui_height) >> 4); + + pu1_mv_bank_base = ps_dec->ps_mem_tab[MEM_REC_MVBANK].pv_base; + u4_mv_bank_mem_used = 0; + mvpred_buffer_size = sizeof(mv_pred_t) + * ((ui_width * (ui_height + PAD_MV_BANK_ROW)) >> 4); + + ps_dec->pv_mv_buf_mgr = (void *)(pu1_mv_buf_mgr_base + u4_mv_buf_mgr_mem_used); + u4_mv_buf_mgr_mem_used += sizeof(buf_mgr_t) + ithread_get_mutex_lock_size(); + ih264_buf_mgr_init((buf_mgr_t *)ps_dec->pv_mv_buf_mgr); + + ps_col_mv = (col_mv_buf_t *)(pu1_mv_buf_mgr_base + u4_mv_buf_mgr_mem_used); + u4_mv_buf_mgr_mem_used += sizeof(col_mv_buf_t) * (H264_MAX_REF_PICS * 2); + u4_mv_buf_mgr_mem_used = ALIGN128(u4_mv_buf_mgr_mem_used); + + for(i = 0 ; i < ps_dec->u1_max_dec_frame_buffering + 1; i++) + { + pu1_col_zero_flag_buf = pu1_mv_buf_mgr_base + u4_mv_buf_mgr_mem_used; + u4_mv_buf_mgr_mem_used += col_flag_buffer_size; + + ps_mv = (mv_pred_t *)(pu1_mv_bank_base + u4_mv_bank_mem_used); + u4_mv_bank_mem_used += mvpred_buffer_size; + + memset(ps_mv, 0, ((ui_width*OFFSET_MV_BANK_ROW) >> 4) * sizeof(mv_pred_t)); + ps_mv += (ui_width*OFFSET_MV_BANK_ROW) >> 4; + + ps_col_mv->pv_col_zero_flag = (void *)pu1_col_zero_flag_buf; + ps_col_mv->pv_mv = (void *)ps_mv; + buf_ret = ih264_buf_mgr_add((buf_mgr_t *)ps_dec->pv_mv_buf_mgr, ps_col_mv, i); + if(0 != buf_ret) + { + ps_dec->i4_error_code = ERROR_BUF_MGR; + return ERROR_BUF_MGR; + } + ps_col_mv++; + } + + if((u4_mv_buf_mgr_mem_used > ps_dec->ps_mem_tab[MEM_REC_MV_BUF_MGR].u4_mem_size) || + (u4_mv_bank_mem_used > ps_dec->ps_mem_tab[MEM_REC_MVBANK].u4_mem_size)) + { + ps_dec->i4_error_code = ERROR_BUF_MGR; + return ERROR_BUF_MGR; + } + + return OK; + +} + + +void ih264d_unpack_coeff4x4_dc_4x4blk(tu_sblk4x4_coeff_data_t *ps_tu_4x4, + WORD16 *pi2_out_coeff_data, + UWORD8 *pu1_inv_scan) +{ + UWORD16 u2_sig_coeff_map = ps_tu_4x4->u2_sig_coeff_map; + WORD32 idx; + WORD16 *pi2_coeff_data = &ps_tu_4x4->ai2_level[0]; + + while(u2_sig_coeff_map) + { + idx = CLZ(u2_sig_coeff_map); + + idx = 31 - idx; + RESET_BIT(u2_sig_coeff_map,idx); + + idx = pu1_inv_scan[idx]; + pi2_out_coeff_data[idx] = *pi2_coeff_data++; + + } +} diff --git a/decoder/ih264d_utils.h b/decoder/ih264d_utils.h new file mode 100755 index 0000000..a1a64d5 --- /dev/null +++ b/decoder/ih264d_utils.h @@ -0,0 +1,101 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +#ifndef _IH264D_UTILS_H_ +#define _IH264D_UTILS_H_ +/*! +************************************************************************** +* \file ih264d_utils.h +* +* \brief +* Contains declaration of routines +* that handle of start and end of pic processing +* +* \date +* 19/12/2002 +* +* \author AI +************************************************************************** +*/ +#include "ih264d_defs.h" +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264d_structs.h" +#include "ih264d_parse_cavlc.h" + +void pad_frm_buff_vert(dec_struct_t *ps_dec); + +UWORD8 ih264d_is_end_of_pic(UWORD16 u2_frame_num, + UWORD8 u1_nal_ref_idc, + pocstruct_t *ps_cur_poc, + pocstruct_t *ps_prev_poc, + dec_slice_params_t * ps_prev_slice, + UWORD8 u1_pic_order_cnt_type, + UWORD8 u1_nal_unit_type, + UWORD32 u4_idr_pic_id, + UWORD8 u1_field_pic_flag, + UWORD8 u1_bottom_field_flag); + +WORD32 ih264d_end_of_pic_processing(dec_struct_t * ps_dec); + +WORD32 ih264d_init_pic(dec_struct_t *ps_dec, + UWORD16 u2_frame_num, + WORD32 i4_poc, + dec_pic_params_t * ps_pps); + +WORD32 ih264d_end_of_pic_processing(dec_struct_t * ps_dec); +WORD32 ih264d_decode_pic_order_cnt(UWORD8 u1_is_idr_slice, + UWORD32 u2_frame_num, + pocstruct_t *ps_prev_poc, + pocstruct_t *ps_cur_poc, + dec_slice_params_t *ps_cur_slice, + dec_pic_params_t * ps_pps, + UWORD8 u1_nal_ref_idc, + UWORD8 u1_bottom_field_flag, + UWORD8 u1_field_pic_flag, + WORD32 *pi4_poc); +void ih264d_release_display_bufs(dec_struct_t *ps_dec); +WORD32 ih264d_assign_display_seq(dec_struct_t *ps_dec); +void ih264d_assign_pic_num(dec_struct_t *ps_dec); + +void ih264d_unpack_coeff4x4_dc_4x4blk(tu_sblk4x4_coeff_data_t *ps_tu_4x4, + WORD16 *pi2_out_coeff_data, + UWORD8 *pu1_inv_scan); + +WORD32 ih264d_update_qp(dec_struct_t * ps_dec, const WORD8 i1_qp); +WORD32 ih264d_decode_gaps_in_frame_num(dec_struct_t *ps_dec, + UWORD16 u2_frame_num); + +WORD32 ih264d_get_next_display_field(dec_struct_t * ps_dec, + ivd_out_bufdesc_t *ps_out_buffer, + ivd_get_display_frame_op_t *pv_disp_op); + +void ih264d_release_display_field(dec_struct_t *ps_dec, + ivd_get_display_frame_op_t *pv_disp_op); +void ih264d_close_video_decoder(iv_obj_t *iv_obj_t); +WORD32 ih264d_get_dpb_size_new(UWORD32 u4_level_idc, + UWORD32 width, + UWORD32 height); +WORD32 ih264d_get_next_nal_unit(UWORD8 *pu1_buf, + UWORD32 u4_cur_pos, + UWORD32 u4_max_ofst, + UWORD32 *pu4_length_of_start_code); + +#endif /* _IH264D_UTILS_H_ */ diff --git a/decoder/ih264d_vui.c b/decoder/ih264d_vui.c new file mode 100755 index 0000000..87276bd --- /dev/null +++ b/decoder/ih264d_vui.c @@ -0,0 +1,233 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/*****************************************************************************/ +/* */ +/* File Name : ih264d_vui.c */ +/* */ +/* Description : This file contains routines to parse VUI NAL's */ +/* */ +/* List of Functions : <List the functions defined in this file> */ +/* */ +/* Issues / Problems : None */ +/* */ +/* Revision History : */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 25 05 2005 NS Draft */ +/* */ +/*****************************************************************************/ + +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264d_vui.h" +#include "ih264d_bitstrm.h" +#include "ih264d_parse_cavlc.h" +#include "ih264d_structs.h" +#include "ih264d_error_handler.h" + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_parse_hrd_parametres */ +/* */ +/* Description : This function parses hrd_t parametres */ +/* Inputs : ps_hrd pointer to HRD params */ +/* ps_bitstrm Bitstream */ +/* Globals : None */ +/* Processing : Parses HRD params */ +/* Outputs : None */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 06 05 2002 NS Draft */ +/* */ +/*****************************************************************************/ + +WORD32 ih264d_parse_hrd_parametres(hrd_t *ps_hrd, + dec_bit_stream_t *ps_bitstrm) +{ + UWORD8 u1_index; + UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst; + UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer; + + ps_hrd->u4_cpb_cnt = 1 + + ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf); + if(ps_hrd->u4_cpb_cnt > 31) + return ERROR_INV_SPS_PPS_T; + ps_hrd->u1_bit_rate_scale = ih264d_get_bits_h264(ps_bitstrm, 4); + ps_hrd->u1_cpb_size_scale = ih264d_get_bits_h264(ps_bitstrm, 4); + + for(u1_index = 0; u1_index < (UWORD8)ps_hrd->u4_cpb_cnt; u1_index++) + { + ps_hrd->u4_bit_rate[u1_index] = 1 + + ih264d_uev(pu4_bitstrm_ofst, + pu4_bitstrm_buf); + ps_hrd->u4_cpb_size[u1_index] = 1 + + ih264d_uev(pu4_bitstrm_ofst, + pu4_bitstrm_buf); + ps_hrd->u1_cbr_flag[u1_index] = ih264d_get_bits_h264(ps_bitstrm, 1); + } + + ps_hrd->u1_initial_cpb_removal_delay = 1 + + ih264d_get_bits_h264(ps_bitstrm, 5); + ps_hrd->u1_cpb_removal_delay_length = 1 + + ih264d_get_bits_h264(ps_bitstrm, 5); + ps_hrd->u1_dpb_output_delay_length = 1 + + ih264d_get_bits_h264(ps_bitstrm, 5); + ps_hrd->u1_time_offset_length = ih264d_get_bits_h264(ps_bitstrm, 5); + + return OK; +} + +/*****************************************************************************/ +/* */ +/* Function Name : ih264d_parse_vui_parametres */ +/* */ +/* Description : This function parses VUI NALs. */ +/* Inputs : ps_vu4 pointer to VUI params */ +/* ps_bitstrm Bitstream */ +/* Globals : None */ +/* Processing : Parses VUI NAL's units and stores the info */ +/* Outputs : None */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 06 05 2002 NS Draft */ +/* */ +/*****************************************************************************/ + +WORD32 ih264d_parse_vui_parametres(vui_t *ps_vu4, + dec_bit_stream_t *ps_bitstrm) +{ + UWORD8 u4_bits; + UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst; + UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer; + WORD32 ret; + + u4_bits = ih264d_get_bits_h264(ps_bitstrm, 1); + if(u4_bits) + { + u4_bits = ih264d_get_bits_h264(ps_bitstrm, 8); + ps_vu4->u1_aspect_ratio_idc = (UWORD8)u4_bits; + if(VUI_EXTENDED_SAR == u4_bits) + { + ps_vu4->u2_sar_width = ih264d_get_bits_h264(ps_bitstrm, 16); + ps_vu4->u2_sar_height = ih264d_get_bits_h264(ps_bitstrm, 16); + } + } + + u4_bits = ih264d_get_bits_h264(ps_bitstrm, 1); + if(u4_bits) + { + ps_vu4->u1_overscan_appropriate_flag = ih264d_get_bits_h264( + ps_bitstrm, 1); + } + u4_bits = ih264d_get_bits_h264(ps_bitstrm, 1); + if(u4_bits) + { + ps_vu4->u1_video_format = ih264d_get_bits_h264(ps_bitstrm, 3); + ps_vu4->u1_video_full_range_flag = ih264d_get_bits_h264(ps_bitstrm, + 1); + u4_bits = ih264d_get_bits_h264(ps_bitstrm, 1); + if(u4_bits) + { + ps_vu4->u1_colour_primaries = ih264d_get_bits_h264(ps_bitstrm, + 8); + ps_vu4->u1_tfr_chars = ih264d_get_bits_h264(ps_bitstrm, 8); + ps_vu4->u1_matrix_coeffs = ih264d_get_bits_h264(ps_bitstrm, 8); + } + } + + u4_bits = ih264d_get_bits_h264(ps_bitstrm, 1); + if(u4_bits) + { + ps_vu4->u1_cr_top_field = ih264d_uev(pu4_bitstrm_ofst, + pu4_bitstrm_buf); + ps_vu4->u1_cr_bottom_field = ih264d_uev(pu4_bitstrm_ofst, + pu4_bitstrm_buf); + } + + u4_bits = ih264d_get_bits_h264(ps_bitstrm, 1); + if(u4_bits) + { + ps_vu4->u4_num_units_in_tick = ih264d_get_bits_h264(ps_bitstrm, 32); + ps_vu4->u4_time_scale = ih264d_get_bits_h264(ps_bitstrm, 32); + ps_vu4->u1_fixed_frame_rate_flag = ih264d_get_bits_h264(ps_bitstrm, + 1); + } + + u4_bits = ih264d_get_bits_h264(ps_bitstrm, 1); + ps_vu4->u1_nal_hrd_params_present = u4_bits; + if(u4_bits) + { + ret = ih264d_parse_hrd_parametres(&ps_vu4->s_nal_hrd, ps_bitstrm); + if(ret != OK) + return ret; + } + u4_bits = ih264d_get_bits_h264(ps_bitstrm, 1); + ps_vu4->u1_vcl_hrd_params_present = u4_bits; + if(u4_bits) + { + ret = ih264d_parse_hrd_parametres(&ps_vu4->s_vcl_hrd, ps_bitstrm); + if(ret != OK) + return ret; + } + + if(ps_vu4->u1_nal_hrd_params_present || u4_bits) + { + ps_vu4->u1_low_delay_hrd_flag = ih264d_get_bits_h264(ps_bitstrm, 1); + } + ps_vu4->u1_pic_struct_present_flag = ih264d_get_bits_h264(ps_bitstrm, 1); + + u4_bits = ih264d_get_bits_h264(ps_bitstrm, 1); + if(u4_bits) + { + ps_vu4->u1_mv_over_pic_boundaries_flag = ih264d_get_bits_h264( + ps_bitstrm, 1); + ps_vu4->u4_max_bytes_per_pic_denom = ih264d_uev(pu4_bitstrm_ofst, + pu4_bitstrm_buf); + ps_vu4->u4_max_bits_per_mb_denom = ih264d_uev(pu4_bitstrm_ofst, + pu4_bitstrm_buf); + ps_vu4->u4_log2_max_mv_length_horz = ih264d_uev(pu4_bitstrm_ofst, + pu4_bitstrm_buf); + ps_vu4->u4_log2_max_mv_length_vert = ih264d_uev(pu4_bitstrm_ofst, + pu4_bitstrm_buf); + ps_vu4->u4_num_reorder_frames = ih264d_uev(pu4_bitstrm_ofst, + pu4_bitstrm_buf); + ps_vu4->u4_max_dec_frame_buffering = ih264d_uev(pu4_bitstrm_ofst, + pu4_bitstrm_buf); + } + else + { + /* Setting this to a large value if not present */ + ps_vu4->u4_num_reorder_frames = 64; + } + + return OK; +} diff --git a/decoder/ih264d_vui.h b/decoder/ih264d_vui.h new file mode 100755 index 0000000..e380a5b --- /dev/null +++ b/decoder/ih264d_vui.h @@ -0,0 +1,96 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/*****************************************************************************/ +/* */ +/* File Name : ih264d_vui.h */ +/* */ +/* Description : This file contains routines to parse SEI NAL's */ +/* */ +/* List of Functions : <List the functions defined in this file> */ +/* */ +/* Issues / Problems : None */ +/* */ +/* Revision History : */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 25 05 2005 NS Draft */ +/* */ +/*****************************************************************************/ + +#ifndef _IH264D_VUI_H_ +#define _IH264D_VUI_H_ + +#include "ih264_typedefs.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264d_bitstrm.h" + +#define VUI_EXTENDED_SAR 255 + +typedef struct +{ + UWORD32 u4_cpb_cnt; + UWORD8 u1_bit_rate_scale; + UWORD8 u1_cpb_size_scale; + UWORD32 u4_bit_rate[32]; + UWORD32 u4_cpb_size[32]; + UWORD8 u1_cbr_flag[32]; + UWORD8 u1_initial_cpb_removal_delay; + UWORD8 u1_cpb_removal_delay_length; + UWORD8 u1_dpb_output_delay_length; + UWORD8 u1_time_offset_length; +} hrd_t; + +typedef struct +{ + UWORD8 u1_aspect_ratio_idc; + UWORD16 u2_sar_width; + UWORD16 u2_sar_height; + UWORD8 u1_overscan_appropriate_flag; + UWORD8 u1_video_format; + UWORD8 u1_video_full_range_flag; + UWORD8 u1_colour_primaries; + UWORD8 u1_tfr_chars; + UWORD8 u1_matrix_coeffs; + UWORD8 u1_cr_top_field; + UWORD8 u1_cr_bottom_field; + UWORD32 u4_num_units_in_tick; + UWORD32 u4_time_scale; + UWORD8 u1_fixed_frame_rate_flag; + UWORD8 u1_nal_hrd_params_present; + hrd_t s_nal_hrd; + UWORD8 u1_vcl_hrd_params_present; + hrd_t s_vcl_hrd; + UWORD8 u1_low_delay_hrd_flag; + UWORD8 u1_pic_struct_present_flag; + UWORD8 u1_mv_over_pic_boundaries_flag; + UWORD32 u4_max_bytes_per_pic_denom; + UWORD32 u4_max_bits_per_mb_denom; + UWORD32 u4_log2_max_mv_length_horz; + UWORD32 u4_log2_max_mv_length_vert; + UWORD32 u4_num_reorder_frames; + UWORD32 u4_max_dec_frame_buffering; +} vui_t; + +WORD32 ih264d_parse_vui_parametres(vui_t *ps_vu4, + dec_bit_stream_t *ps_bitstrm); +#endif /* _SEI_H_ */ + diff --git a/decoder/iv.h b/decoder/iv.h new file mode 100755 index 0000000..3a2ebf5 --- /dev/null +++ b/decoder/iv.h @@ -0,0 +1,420 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* iv.h +* +* @brief +* This file contains all the necessary structure and enumeration +* definitions needed for the Application Program Interface(API) of the +* Ittiam Video and Image codecs +* +* @author +* 100239(RCY) +* +* @par List of Functions: +* +* @remarks +* None +* +******************************************************************************* +*/ + + +#ifndef _IV_H +#define _IV_H + +/*****************************************************************************/ +/* Constant Macros */ +/*****************************************************************************/ + + +/*****************************************************************************/ +/* Typedefs */ +/*****************************************************************************/ + +/*****************************************************************************/ +/* Enums */ +/*****************************************************************************/ + + +/* IV_API_CALL_STATUS_T:This is only to return the FAIL/PASS status to the */ +/* application for the current API call */ + +typedef enum { + IV_STATUS_NA = 0x7FFFFFFF, + IV_SUCCESS = 0x0, + IV_FAIL = 0x1, +}IV_API_CALL_STATUS_T; + +/* IV_MEM_TYPE_T: This Enumeration defines the type of memory (Internal/Ext */ +/* -ernal) along with the cacheable/non-cacheable attributes */ + +typedef enum { + IV_NA_MEM_TYPE = 0x7FFFFFFF, + IV_INTERNAL_CACHEABLE_PERSISTENT_MEM = 0x1, + IV_INTERNAL_CACHEABLE_SCRATCH_MEM = 0x2, + IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM = 0x3, + IV_EXTERNAL_CACHEABLE_SCRATCH_MEM = 0x4, + IV_INTERNAL_NONCACHEABLE_PERSISTENT_MEM = 0x5, + IV_INTERNAL_NONCACHEABLE_SCRATCH_MEM = 0x6, + IV_EXTERNAL_NONCACHEABLE_PERSISTENT_MEM = 0x7, + IV_EXTERNAL_NONCACHEABLE_SCRATCH_MEM = 0x8 +}IV_MEM_TYPE_T; + +/* IV_COLOR_FORMAT_T: This enumeration lists all the color formats which */ +/* finds usage in video/image codecs */ + +typedef enum { + IV_CHROMA_NA = 0x7FFFFFFF, + IV_YUV_420P = 0x1, + IV_YUV_422P = 0x2, + IV_420_UV_INTL = 0x3, + IV_YUV_422IBE = 0x4, + IV_YUV_422ILE = 0x5, + IV_YUV_444P = 0x6, + IV_YUV_411P = 0x7, + IV_GRAY = 0x8, + IV_RGB_565 = 0x9, + IV_RGB_24 = 0xa, + IV_YUV_420SP_UV = 0xb, + IV_YUV_420SP_VU = 0xc, + IV_RGBA_8888 = 0xd +}IV_COLOR_FORMAT_T; + +/* IV_PICTURE_CODING_TYPE_T: VOP/Frame coding type Enumeration */ + +typedef enum { + IV_NA_FRAME = 0x7FFFFFFF, + IV_I_FRAME = 0x0, + IV_P_FRAME = 0x1, + IV_B_FRAME = 0x2, + IV_IDR_FRAME = 0x3, + IV_II_FRAME = 0x4, + IV_IP_FRAME = 0x5, + IV_IB_FRAME = 0x6, + IV_PI_FRAME = 0x7, + IV_PP_FRAME = 0x8, + IV_PB_FRAME = 0x9, + IV_BI_FRAME = 0xa, + IV_BP_FRAME = 0xb, + IV_BB_FRAME = 0xc, + IV_MBAFF_I_FRAME = 0xd, + IV_MBAFF_P_FRAME = 0xe, + IV_MBAFF_B_FRAME = 0xf, + IV_MBAFF_IDR_FRAME = 0x10, + IV_NOT_CODED_FRAME = 0x11, + IV_FRAMETYPE_DEFAULT = IV_I_FRAME +}IV_PICTURE_CODING_TYPE_T; + +/* IV_FLD_TYPE_T: field type Enumeration */ + +typedef enum { + IV_NA_FLD = 0x7FFFFFFF, + IV_TOP_FLD = 0x0, + IV_BOT_FLD = 0x1, + IV_FLD_TYPE_DEFAULT = IV_TOP_FLD +}IV_FLD_TYPE_T; + +/* IV_CONTENT_TYPE_T: Video content type */ + +typedef enum { + IV_CONTENTTYPE_NA = 0x7FFFFFFF, + IV_PROGRESSIVE = 0x0, + IV_INTERLACED = 0x1, + IV_PROGRESSIVE_FRAME = 0x2, + IV_INTERLACED_FRAME = 0x3, + IV_INTERLACED_TOPFIELD = 0x4, + IV_INTERLACED_BOTTOMFIELD = 0x5, + IV_CONTENTTYPE_DEFAULT = IV_PROGRESSIVE, +}IV_CONTENT_TYPE_T; + +/* IV_API_COMMAND_TYPE_T:API command type */ +typedef enum { + IV_CMD_NA = 0x7FFFFFFF, + IV_CMD_GET_NUM_MEM_REC = 0x0, + IV_CMD_FILL_NUM_MEM_REC = 0x1, + IV_CMD_RETRIEVE_MEMREC = 0x2, + IV_CMD_INIT = 0x3, + IV_CMD_DUMMY_ELEMENT = 0x4, +}IV_API_COMMAND_TYPE_T; + +/*****************************************************************************/ +/* Structure */ +/*****************************************************************************/ + +/* IV_OBJ_T: This structure defines the handle for the codec instance */ + +typedef struct { + /** + * u4_size of the structure + */ + UWORD32 u4_size; + + /** + * Pointer to the API function pointer table of the codec + */ + void *pv_fxns; + + /** + * Pointer to the handle of the codec + */ + void *pv_codec_handle; +}iv_obj_t; + +/* iv_mem_rec_t: This structure defines the memory record holder which will */ +/* be used by the codec to communicate its memory requirements to the */ +/* application through appropriate API functions */ + +typedef struct { + /** + * u4_size of the structure + */ + UWORD32 u4_size; + + /** + * Pointer to the memory allocated by the application + */ + void *pv_base; + + /** + * u4_size of the memory to be allocated + */ + UWORD32 u4_mem_size; + + /** + * Alignment of the memory pointer + */ + UWORD32 u4_mem_alignment; + /** + * Nature of the memory to be allocated + */ + IV_MEM_TYPE_T e_mem_type; +}iv_mem_rec_t; + +/* IV_YUV_BUF_T: This structure defines attributes for the yuv buffer */ + +typedef struct { + /** + * u4_size of the structure + */ + UWORD32 u4_size; + + /** + * Pointer to Luma (Y) Buffer + */ + + void *pv_y_buf; + /** + * Pointer to Chroma (Cb) Buffer + */ + void *pv_u_buf; + + /** + * Pointer to Chroma (Cr) Buffer + */ + void *pv_v_buf; + + /** + * Width of the Luma (Y) Buffer + */ + UWORD32 u4_y_wd; + + /** + * Height of the Luma (Y) Buffer + */ + UWORD32 u4_y_ht; + + /** + * Stride/Pitch of the Luma (Y) Buffer + */ + UWORD32 u4_y_strd; + + /** + * Width of the Chroma (Cb) Buffer + */ + UWORD32 u4_u_wd; + + /** + * Height of the Chroma (Cb) Buffer + */ + UWORD32 u4_u_ht; + + /** + * Stride/Pitch of the Chroma (Cb) Buffer + */ + UWORD32 u4_u_strd; + + /** + * Width of the Chroma (Cr) Buffer + */ + UWORD32 u4_v_wd; + + /** + * Height of the Chroma (Cr) Buffer + */ + UWORD32 u4_v_ht; + + /** + * Stride/Pitch of the Chroma (Cr) Buffer + */ + UWORD32 u4_v_strd; +}iv_yuv_buf_t; + +/*****************************************************************************/ +/* Get Number of Memory Records */ +/*****************************************************************************/ + +/* IV_API_COMMAND_TYPE_T::e_cmd = IV_CMD_GET_NUM_MEM_REC */ + + +typedef struct { + /** + * u4_size of the structure + */ + UWORD32 u4_size; + + /** + * cmd + */ + IV_API_COMMAND_TYPE_T e_cmd; +}iv_num_mem_rec_ip_t; + + +typedef struct { + /** + * u4_size of the structure + */ + UWORD32 u4_size; + + /** + * error code + */ + UWORD32 u4_error_code; + + /** + * num_mem_rec + */ + UWORD32 u4_num_mem_rec; +}iv_num_mem_rec_op_t; + + +/*****************************************************************************/ +/* Fill Memory Records */ +/*****************************************************************************/ + +/* IV_API_COMMAND_TYPE_T::e_cmd = IV_CMD_FILL_NUM_MEM_REC */ + + +typedef struct { + /** + * u4_size of the structure + */ + UWORD32 u4_size; + + /** + * cmd + */ + IV_API_COMMAND_TYPE_T e_cmd; + + /** + * pointer to array of memrecords structures should be filled by codec + with details of memory resource requirements + */ + iv_mem_rec_t *pv_mem_rec_location; + + /** + * maximum width for which codec should request memory requirements + */ + UWORD32 u4_max_frm_wd; + + /** + * maximum height for which codec should request memory requirements + */ + UWORD32 u4_max_frm_ht; +}iv_fill_mem_rec_ip_t; + + +typedef struct { + /** + * u4_size of the structure + */ + UWORD32 u4_size; + + /** + * error_code + */ + UWORD32 u4_error_code; + + /** + * no of memory record structures which are filled by codec + */ + UWORD32 u4_num_mem_rec_filled; +}iv_fill_mem_rec_op_t; + + +/*****************************************************************************/ +/* Retrieve Memory Records */ +/*****************************************************************************/ + +/* IV_API_COMMAND_TYPE_T::e_cmd = IV_CMD_RETRIEVE_MEMREC */ + + + +typedef struct { + /** + * u4_size of the structure + */ + UWORD32 u4_size; + + /** + * cmd + */ + IV_API_COMMAND_TYPE_T e_cmd; + + /** + * array of structures where codec should fill with all resources(memory) with it + */ + iv_mem_rec_t *pv_mem_rec_location; +}iv_retrieve_mem_rec_ip_t; + + +typedef struct { + /** + * u4_size of the structure + */ + UWORD32 u4_size; + + /** + * error_code + */ + UWORD32 u4_error_code; + + /** + * no of memory records filled by codec + */ + UWORD32 u4_num_mem_rec_filled; +}iv_retrieve_mem_rec_op_t; + + + +#endif /* _IV_H */ + diff --git a/decoder/ivd.h b/decoder/ivd.h new file mode 100755 index 0000000..955b81f --- /dev/null +++ b/decoder/ivd.h @@ -0,0 +1,585 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*****************************************************************************/ +/* */ +/* File Name : ivd.h */ +/* */ +/* Description : This file contains all the necessary structure and */ +/* enumeration definitions needed for the Application */ +/* Program Interface(API) of the Ittiam Video Decoders */ +/* */ +/* List of Functions : None */ +/* */ +/* Issues / Problems : None */ +/* */ +/* Revision History : */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 26 08 2010 100239(RCY) Draft */ +/* */ +/*****************************************************************************/ + +#ifndef _IVD_H +#define _IVD_H + +/*****************************************************************************/ +/* Constant Macros */ +/*****************************************************************************/ +#define IVD_VIDDEC_MAX_IO_BUFFERS 64 +/*****************************************************************************/ +/* Typedefs */ +/*****************************************************************************/ + +/*****************************************************************************/ +/* Enums */ +/*****************************************************************************/ + +/* IVD_ARCH_T: Architecture Enumeration */ +typedef enum +{ + ARCH_NA = 0x7FFFFFFF, + ARCH_ARM_NONEON = 0x0, + ARCH_ARM_A9Q, + ARCH_ARM_A9A, + ARCH_ARM_A9, + ARCH_ARM_A7, + ARCH_ARM_A5, + ARCH_ARM_A15, + ARCH_ARM_NEONINTR, + ARCH_ARMV8_GENERIC, + ARCH_X86_GENERIC = 0x100, + ARCH_X86_SSSE3, + ARCH_X86_SSE42, + ARCH_X86_AVX2, + ARCH_MIPS_GENERIC = 0x200, + ARCH_MIPS_32 +}IVD_ARCH_T; + +/* IVD_SOC_T: SOC Enumeration */ +typedef enum +{ + SOC_NA = 0x7FFFFFFF, + SOC_GENERIC = 0x0, + SOC_HISI_37X = 0x100, +}IVD_SOC_T; + +/* IVD_FRAME_SKIP_MODE_T:Skip mode Enumeration */ + +typedef enum { + IVD_SKIP_NONE = 0x7FFFFFFF, + IVD_SKIP_P = 0x1, + IVD_SKIP_B = 0x2, + IVD_SKIP_I = 0x3, + IVD_SKIP_IP = 0x4, + IVD_SKIP_IB = 0x5, + IVD_SKIP_PB = 0x6, + IVD_SKIP_IPB = 0x7, + IVD_SKIP_IDR = 0x8, + IVD_SKIP_DEFAULT = IVD_SKIP_NONE, +}IVD_FRAME_SKIP_MODE_T; + +/* IVD_VIDEO_DECODE_MODE_T: Set decoder to decode either frame worth of data */ +/* or only header worth of data */ + +typedef enum { + IVD_DECODE_MODE_NA = 0x7FFFFFFF, + + /* This enables the codec to process all decodable units */ + IVD_DECODE_FRAME = 0x0, + + /* This enables the codec to decode header only */ + IVD_DECODE_HEADER = 0x1, + + + +}IVD_VIDEO_DECODE_MODE_T; + + +/* IVD_DISPLAY_FRAME_OUT_MODE_T: Video Display Frame Output Mode */ + +typedef enum { + + IVD_DISPLAY_ORDER_NA = 0x7FFFFFFF, + /* To set codec to fill output buffers in display order */ + IVD_DISPLAY_FRAME_OUT = 0x0, + + /* To set codec to fill output buffers in decode order */ + IVD_DECODE_FRAME_OUT = 0x1, +}IVD_DISPLAY_FRAME_OUT_MODE_T; + + +/* IVD_API_COMMAND_TYPE_T:API command type */ +typedef enum { + IVD_CMD_VIDEO_NA = 0x7FFFFFFF, + IVD_CMD_VIDEO_CTL = IV_CMD_DUMMY_ELEMENT + 1, + IVD_CMD_VIDEO_DECODE, + IVD_CMD_GET_DISPLAY_FRAME, + IVD_CMD_REL_DISPLAY_FRAME, + IVD_CMD_SET_DISPLAY_FRAME +}IVD_API_COMMAND_TYPE_T; + +/* IVD_CONTROL_API_COMMAND_TYPE_T: Video Control API command type */ + +typedef enum { + IVD_CMD_NA = 0x7FFFFFFF, + IVD_CMD_CTL_GETPARAMS = 0x0, + IVD_CMD_CTL_SETPARAMS = 0x1, + IVD_CMD_CTL_RESET = 0x2, + IVD_CMD_CTL_SETDEFAULT = 0x3, + IVD_CMD_CTL_FLUSH = 0x4, + IVD_CMD_CTL_GETBUFINFO = 0x5, + IVD_CMD_CTL_GETVERSION = 0x6, + IVD_CMD_CTL_CODEC_SUBCMD_START = 0x7 +}IVD_CONTROL_API_COMMAND_TYPE_T; + + +/* IVD_ERROR_BITS_T: A UWORD32 container will be used for reporting the error*/ +/* code to the application. The first 8 bits starting from LSB have been */ +/* reserved for the codec to report internal error details. The rest of the */ +/* bits will be generic for all video decoders and each bit has an associated*/ +/* meaning as mentioned below. The unused bit fields are reserved for future */ +/* extenstions and will be zero in the current implementation */ + +typedef enum { + /* Bit 8 - Applied concealment. */ + IVD_APPLIEDCONCEALMENT = 0x8, + /* Bit 9 - Insufficient input data. */ + IVD_INSUFFICIENTDATA = 0x9, + /* Bit 10 - Data problem/corruption. */ + IVD_CORRUPTEDDATA = 0xa, + /* Bit 11 - Header problem/corruption. */ + IVD_CORRUPTEDHEADER = 0xb, + /* Bit 12 - Unsupported feature/parameter in input. */ + IVD_UNSUPPORTEDINPUT = 0xc, + /* Bit 13 - Unsupported input parameter orconfiguration. */ + IVD_UNSUPPORTEDPARAM = 0xd, + /* Bit 14 - Fatal error (stop the codec).If there is an */ + /* error and this bit is not set, the error is a recoverable one. */ + IVD_FATALERROR = 0xe, + /* Bit 15 - Invalid bitstream. Applies when Bitstream/YUV frame */ + /* buffer for encode/decode call is made with non-valid or zero u4_size */ + /* data */ + IVD_INVALID_BITSTREAM = 0xf, + /* Bit 16 */ + IVD_INCOMPLETE_BITSTREAM = 0x10, + IVD_ERROR_BITS_T_DUMMY_ELEMENT = 0x7FFFFFFF +}IVD_ERROR_BITS_T; + + +/* IVD_CONTROL_API_COMMAND_TYPE_T: Video Control API command type */ +typedef enum { + IVD_ERROR_NONE = 0x0, + IVD_NUM_MEM_REC_FAILED = 0x1, + IVD_NUM_REC_NOT_SUFFICIENT = 0x2, + IVD_FILL_MEM_REC_FAILED = 0x3, + IVD_REQUESTED_WIDTH_NOT_SUPPPORTED = 0x4, + IVD_REQUESTED_HEIGHT_NOT_SUPPPORTED = 0x5, + IVD_INIT_DEC_FAILED = 0x6, + IVD_INIT_DEC_NOT_SUFFICIENT = 0x7, + IVD_INIT_DEC_WIDTH_NOT_SUPPPORTED = 0x8, + IVD_INIT_DEC_HEIGHT_NOT_SUPPPORTED = 0x9, + IVD_INIT_DEC_MEM_NOT_ALIGNED = 0xa, + IVD_INIT_DEC_COL_FMT_NOT_SUPPORTED = 0xb, + IVD_INIT_DEC_MEM_REC_NOT_SUFFICIENT = 0xc, + IVD_GET_VERSION_DATABUFFER_SZ_INSUFFICIENT = 0xd, + IVD_BUFFER_SIZE_SET_TO_ZERO = 0xe, + IVD_UNEXPECTED_END_OF_STREAM = 0xf, + IVD_SEQUENCE_HEADER_NOT_DECODED = 0x10, + IVD_STREAM_WIDTH_HEIGHT_NOT_SUPPORTED = 0x11, + IVD_MAX_FRAME_LIMIT_REACHED = 0x12, + IVD_IP_API_STRUCT_SIZE_INCORRECT = 0x13, + IVD_OP_API_STRUCT_SIZE_INCORRECT = 0x14, + IVD_HANDLE_NULL = 0x15, + IVD_HANDLE_STRUCT_SIZE_INCORRECT = 0x16, + IVD_INVALID_HANDLE_NULL = 0x17, + IVD_INVALID_API_CMD = 0x18, + IVD_UNSUPPORTED_API_CMD = 0x19, + IVD_MEM_REC_STRUCT_SIZE_INCORRECT = 0x1a, + IVD_DISP_FRM_ZERO_OP_BUFS = 0x1b, + IVD_DISP_FRM_OP_BUF_NULL = 0x1c, + IVD_DISP_FRM_ZERO_OP_BUF_SIZE = 0x1d, + IVD_DEC_FRM_BS_BUF_NULL = 0x1e, + IVD_SET_CONFG_INVALID_DEC_MODE = 0x1f, + IVD_SET_CONFG_UNSUPPORTED_DISP_WIDTH = 0x20, + IVD_RESET_FAILED = 0x21, + IVD_INIT_DEC_MEM_REC_OVERLAP_ERR = 0x22, + IVD_INIT_DEC_MEM_REC_BASE_NULL = 0x23, + IVD_INIT_DEC_MEM_REC_ALIGNMENT_ERR = 0x24, + IVD_INIT_DEC_MEM_REC_INSUFFICIENT_SIZE = 0x25, + IVD_INIT_DEC_MEM_REC_INCORRECT_TYPE = 0x26, + IVD_DEC_NUMBYTES_INV = 0x27, + IVD_DEC_REF_BUF_NULL = 0x28, + IVD_DEC_FRM_SKIPPED = 0x29, + IVD_RES_CHANGED = 0x2a, + IVD_DUMMY_ELEMENT_FOR_CODEC_EXTENSIONS = 0x300, +}IVD_ERROR_CODES_T; + + +/*****************************************************************************/ +/* Structure */ +/*****************************************************************************/ +/* structure for passing output buffers to codec during get display buffer */ +/* call */ +typedef struct { + + /* number of output buffers */ + UWORD32 u4_num_bufs; + + /* list of pointers to output buffers */ + UWORD8 *pu1_bufs[IVD_VIDDEC_MAX_IO_BUFFERS]; + + /* sizes of each output buffer */ + UWORD32 u4_min_out_buf_size[IVD_VIDDEC_MAX_IO_BUFFERS]; + +}ivd_out_bufdesc_t; + +/*****************************************************************************/ +/* Initialize decoder */ +/*****************************************************************************/ + +/* IVD_API_COMMAND_TYPE_T::e_cmd = IVD_CMD_INIT */ + + +typedef struct { + /* u4_size of the structure */ + UWORD32 u4_size; + IVD_API_COMMAND_TYPE_T e_cmd; + /* no memrecords which are allocated on request of codec through + fill mem records */ + UWORD32 u4_num_mem_rec; + /* maximum height for which codec should be initialized */ + UWORD32 u4_frm_max_wd; + /* maximum width for which codec should be initialized */ + UWORD32 u4_frm_max_ht; + /* format in which codec has to give out frame data for display */ + IV_COLOR_FORMAT_T e_output_format; + /* pointer to memrecord array, which contains allocated resources */ + iv_mem_rec_t *pv_mem_rec_location; +}ivd_init_ip_t; + + +typedef struct{ + /* u4_size of the structure */ + UWORD32 u4_size; + UWORD32 u4_error_code; +}ivd_init_op_t; + + +/*****************************************************************************/ +/* Video Decode */ +/*****************************************************************************/ + + +/* IVD_API_COMMAND_TYPE_T::e_cmd = IVD_CMD_VIDEO_DECODE */ + + +typedef struct { + /* u4_size of the structure */ + UWORD32 u4_size; + IVD_API_COMMAND_TYPE_T e_cmd; + UWORD32 u4_ts; + UWORD32 u4_num_Bytes; + void *pv_stream_buffer; + + /* output buffer desc */ + ivd_out_bufdesc_t s_out_buffer; + +}ivd_video_decode_ip_t; + + +typedef struct{ + /* u4_size of the structure */ + UWORD32 u4_size; + UWORD32 u4_error_code; + UWORD32 u4_num_bytes_consumed; + UWORD32 u4_pic_wd; + UWORD32 u4_pic_ht; + IV_PICTURE_CODING_TYPE_T e_pic_type; + UWORD32 u4_frame_decoded_flag; + UWORD32 u4_new_seq; + + UWORD32 u4_output_present; + UWORD32 u4_progressive_frame_flag; + UWORD32 u4_is_ref_flag; + IV_COLOR_FORMAT_T e_output_format; + iv_yuv_buf_t s_disp_frm_buf; + IV_FLD_TYPE_T e4_fld_type; + UWORD32 u4_ts; + UWORD32 u4_disp_buf_id; +}ivd_video_decode_op_t; + + +/*****************************************************************************/ +/* Get Display Frame */ +/*****************************************************************************/ + + +/* IVD_API_COMMAND_TYPE_T::e_cmd = IVD_CMD_GET_DISPLAY_FRAME */ + +typedef struct +{ + /* u4_size of the structure */ + UWORD32 u4_size; + + IVD_API_COMMAND_TYPE_T e_cmd; + + /* output buffer desc */ + ivd_out_bufdesc_t s_out_buffer; + +}ivd_get_display_frame_ip_t; + + +typedef struct +{ + /* u4_size of the structure */ + UWORD32 u4_size; + UWORD32 u4_error_code; + UWORD32 u4_progressive_frame_flag; + IV_PICTURE_CODING_TYPE_T e_pic_type; + UWORD32 u4_is_ref_flag; + IV_COLOR_FORMAT_T e_output_format; + iv_yuv_buf_t s_disp_frm_buf; + IV_FLD_TYPE_T e4_fld_type; + UWORD32 u4_ts; + UWORD32 u4_disp_buf_id; +}ivd_get_display_frame_op_t; + +/*****************************************************************************/ +/* Set Display Frame */ +/*****************************************************************************/ + + +/* IVD_API_COMMAND_TYPE_T::e_cmd = IVD_CMD_SET_DISPLAY_FRAME */ + +typedef struct +{ + /* u4_size of the structure */ + UWORD32 u4_size; + + IVD_API_COMMAND_TYPE_T e_cmd; + + UWORD32 num_disp_bufs; + + /* output buffer desc */ + ivd_out_bufdesc_t s_disp_buffer[IVD_VIDDEC_MAX_IO_BUFFERS]; + +}ivd_set_display_frame_ip_t; + + +typedef struct +{ + /* u4_size of the structure */ + UWORD32 u4_size; + UWORD32 u4_error_code; +}ivd_set_display_frame_op_t; + + +/*****************************************************************************/ +/* Release Display Frame */ +/*****************************************************************************/ + + +/* IVD_API_COMMAND_TYPE_T::e_cmd = IVD_CMD_SET_DISPLAY_FRAME */ + +typedef struct +{ + /* u4_size of the structure */ + UWORD32 u4_size; + IVD_API_COMMAND_TYPE_T e_cmd; + UWORD32 u4_disp_buf_id; +}ivd_rel_display_frame_ip_t; + + +typedef struct +{ + /* u4_size of the structure */ + UWORD32 u4_size; + UWORD32 u4_error_code; +}ivd_rel_display_frame_op_t; + +/*****************************************************************************/ +/* Video control Flush */ +/*****************************************************************************/ +/* IVD_API_COMMAND_TYPE_T::e_cmd = IVD_CMD_VIDEO_CTL */ +/* IVD_CONTROL_API_COMMAND_TYPE_T::e_sub_cmd = IVD_CMD_ctl_FLUSH */ + + + +typedef struct{ + /* u4_size of the structure */ + UWORD32 u4_size; + IVD_API_COMMAND_TYPE_T e_cmd; + IVD_CONTROL_API_COMMAND_TYPE_T e_sub_cmd; +}ivd_ctl_flush_ip_t; + + +typedef struct{ + /* u4_size of the structure */ + UWORD32 u4_size; + UWORD32 u4_error_code; +}ivd_ctl_flush_op_t; + +/*****************************************************************************/ +/* Video control reset */ +/*****************************************************************************/ +/* IVD_API_COMMAND_TYPE_T::e_cmd = IVD_CMD_VIDEO_CTL */ +/* IVD_CONTROL_API_COMMAND_TYPE_T::e_sub_cmd = IVD_CMD_ctl_RESET */ + + +typedef struct{ + /* u4_size of the structure */ + UWORD32 u4_size; + IVD_API_COMMAND_TYPE_T e_cmd; + IVD_CONTROL_API_COMMAND_TYPE_T e_sub_cmd; +}ivd_ctl_reset_ip_t; + + +typedef struct{ + /* u4_size of the structure */ + UWORD32 u4_size; + UWORD32 u4_error_code; +}ivd_ctl_reset_op_t; + + +/*****************************************************************************/ +/* Video control Set Params */ +/*****************************************************************************/ +/* IVD_API_COMMAND_TYPE_T::e_cmd = IVD_CMD_VIDEO_CTL */ +/* IVD_CONTROL_API_COMMAND_TYPE_T::e_sub_cmd=IVD_CMD_ctl_SETPARAMS */ +/* IVD_CONTROL_API_COMMAND_TYPE_T::e_sub_cmd=IVD_CMD_ctl_SETDEFAULT */ + + + +typedef struct { + /* u4_size of the structure */ + UWORD32 u4_size; + IVD_API_COMMAND_TYPE_T e_cmd; + IVD_CONTROL_API_COMMAND_TYPE_T e_sub_cmd; + IVD_VIDEO_DECODE_MODE_T e_vid_dec_mode; + UWORD32 u4_disp_wd; + IVD_FRAME_SKIP_MODE_T e_frm_skip_mode; + IVD_DISPLAY_FRAME_OUT_MODE_T e_frm_out_mode; +}ivd_ctl_set_config_ip_t; + + +typedef struct{ + /* u4_size of the structure */ + UWORD32 u4_size; + UWORD32 u4_error_code; +}ivd_ctl_set_config_op_t; + +/*****************************************************************************/ +/* Video control:Get Buf Info */ +/*****************************************************************************/ + +/* IVD_API_COMMAND_TYPE_T::e_cmd = IVD_CMD_VIDEO_CTL */ +/* IVD_CONTROL_API_COMMAND_TYPE_T::e_sub_cmd=IVD_CMD_ctl_GETBUFINFO */ + + +typedef struct{ + /* u4_size of the structure */ + UWORD32 u4_size; + IVD_API_COMMAND_TYPE_T e_cmd; + IVD_CONTROL_API_COMMAND_TYPE_T e_sub_cmd; +}ivd_ctl_getbufinfo_ip_t; + + +typedef struct{ + /* u4_size of the structure */ + UWORD32 u4_size; + UWORD32 u4_error_code; + /* no of display buffer sets required by codec */ + UWORD32 u4_num_disp_bufs; + /* no of input buffers required for codec */ + UWORD32 u4_min_num_in_bufs; + /* no of output buffers required for codec */ + UWORD32 u4_min_num_out_bufs; + /* sizes of each input buffer required */ + UWORD32 u4_min_in_buf_size[IVD_VIDDEC_MAX_IO_BUFFERS]; + /* sizes of each output buffer required */ + UWORD32 u4_min_out_buf_size[IVD_VIDDEC_MAX_IO_BUFFERS]; +}ivd_ctl_getbufinfo_op_t; + + +/*****************************************************************************/ +/* Video control:Getstatus Call */ +/*****************************************************************************/ + + +/* IVD_API_COMMAND_TYPE_T::e_cmd = IVD_CMD_VIDEO_CTL */ +/* IVD_CONTROL_API_COMMAND_TYPE_T::e_sub_cmd=IVD_CMD_ctl_GETPARAMS */ + + +typedef struct{ + /* u4_size of the structure */ + UWORD32 u4_size; + IVD_API_COMMAND_TYPE_T e_cmd; + IVD_CONTROL_API_COMMAND_TYPE_T e_sub_cmd; +}ivd_ctl_getstatus_ip_t; + + +typedef struct{ + UWORD32 u4_size; + UWORD32 u4_error_code; + /* no of display buffer sets required by codec */ + UWORD32 u4_num_disp_bufs; + UWORD32 u4_pic_ht; + UWORD32 u4_pic_wd; + UWORD32 u4_frame_rate; + UWORD32 u4_bit_rate; + IV_CONTENT_TYPE_T e_content_type; + IV_COLOR_FORMAT_T e_output_chroma_format; + /* no of input buffers required for codec */ + UWORD32 u4_min_num_in_bufs; + /* no of output buffers required for codec */ + UWORD32 u4_min_num_out_bufs; + /* sizes of each input buffer required */ + UWORD32 u4_min_in_buf_size[IVD_VIDDEC_MAX_IO_BUFFERS]; + /* sizes of each output buffer required */ + UWORD32 u4_min_out_buf_size[IVD_VIDDEC_MAX_IO_BUFFERS]; +}ivd_ctl_getstatus_op_t; + + +/*****************************************************************************/ +/* Video control:Get Version Info */ +/*****************************************************************************/ + +/* IVD_API_COMMAND_TYPE_T::e_cmd = IVD_CMD_VIDEO_CTL */ +/* IVD_CONTROL_API_COMMAND_TYPE_T::e_sub_cmd=IVD_CMD_ctl_GETVERSION */ + + +typedef struct{ + /* u4_size of the structure */ + UWORD32 u4_size; + IVD_API_COMMAND_TYPE_T e_cmd; + IVD_CONTROL_API_COMMAND_TYPE_T e_sub_cmd; + void *pv_version_buffer; + UWORD32 u4_version_buffer_size; +}ivd_ctl_getversioninfo_ip_t; + + +typedef struct{ + /* u4_size of the structure */ + UWORD32 u4_size; + UWORD32 u4_error_code; +}ivd_ctl_getversioninfo_op_t; + +#endif /* __IVD_H__ */ + diff --git a/decoder/mips/ih264d_function_selector.c b/decoder/mips/ih264d_function_selector.c new file mode 100755 index 0000000..13680ed --- /dev/null +++ b/decoder/mips/ih264d_function_selector.c @@ -0,0 +1,66 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* imp2d_function_selector.c +* +* @brief +* Contains functions to initialize function pointers used in hevc +* +* @author +* Naveen +* +* @par List of Functions: +* @remarks +* None +* +******************************************************************************* +*/ +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +/* User Include files */ +#include "ih264_typedefs.h" +#include "iv.h" +#include "ivd.h" +#include "ih264_defs.h" +#include "ih264_size_defs.h" +#include "ih264_error.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include "ih264_inter_pred_filters.h" + +#include "ih264d_structs.h" +#include "ih264d_function_selector.h" + +void ih264d_init_function_ptr(dec_struct_t *ps_codec) +{ + ih264d_init_function_ptr_generic(ps_codec); +} +void ih264d_init_arch(dec_struct_t *ps_codec) +{ + ps_codec->e_processor_arch = ARCH_NA; +} diff --git a/decoder/x86/ih264d_function_selector.c b/decoder/x86/ih264d_function_selector.c new file mode 100755 index 0000000..9fc5c39 --- /dev/null +++ b/decoder/x86/ih264d_function_selector.c @@ -0,0 +1,94 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* imp2d_function_selector.c +* +* @brief +* Contains functions to initialize function pointers used in hevc +* +* @author +* Naveen +* +* @par List of Functions: +* @remarks +* None +* +******************************************************************************* +*/ +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +/* User Include files */ +#include "ih264_typedefs.h" +#include "iv.h" +#include "ivd.h" +#include "ih264_defs.h" +#include "ih264_size_defs.h" +#include "ih264_error.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include "ih264_inter_pred_filters.h" + +#include "ih264d_structs.h" +#include "ih264d_function_selector.h" + +void ih264d_init_function_ptr(dec_struct_t *ps_codec) +{ + + ih264d_init_function_ptr_generic(ps_codec); + switch(ps_codec->e_processor_arch) + { + case ARCH_X86_GENERIC: + ih264d_init_function_ptr_generic(ps_codec); + break; + case ARCH_X86_SSSE3: + ih264d_init_function_ptr_ssse3(ps_codec); + break; + case ARCH_X86_SSE42: + default: + ih264d_init_function_ptr_ssse3(ps_codec); + ih264d_init_function_ptr_sse42(ps_codec); + break; + } +} +void ih264d_init_arch(dec_struct_t *ps_codec) +{ +#ifdef DEFAULT_ARCH +#if DEFAULT_ARCH == D_ARCH_X86_SSE42 + ps_codec->e_processor_arch = ARCH_X86_SSE42; +#elif DEFAULT_ARCH == D_ARCH_X86_SSSE3 + ps_codec->e_processor_arch = ARCH_X86_SSSE3; +#elif DEFAULT_ARCH == D_ARCH_X86_AVX2 + ps_codec->e_processor_arch = D_ARCH_X86_AVX2; +#else + ps_codec->e_processor_arch = ARCH_X86_GENERIC; +#endif +#else + ps_codec->e_processor_arch = ARCH_X86_SSE42; +#endif + +} diff --git a/decoder/x86/ih264d_function_selector_sse42.c b/decoder/x86/ih264d_function_selector_sse42.c new file mode 100755 index 0000000..0c493d2 --- /dev/null +++ b/decoder/x86/ih264d_function_selector_sse42.c @@ -0,0 +1,95 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ih264e_function_selector_generic.c +* +* @brief +* Contains functions to initialize function pointers of codec context +* +* @author +* Ittiam +* +* @par List of Functions: +* - ih264e_init_function_ptr_generic +* +* @remarks +* None +* +******************************************************************************* +*/ + + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System Include files */ +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +/* User Include files */ +#include "ih264_typedefs.h" +#include "iv.h" +#include "ivd.h" +#include "ih264_defs.h" +#include "ih264_size_defs.h" +#include "ih264_error.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include "ih264_inter_pred_filters.h" + +#include "ih264d_structs.h" + + +/** +******************************************************************************* +* +* @brief Initialize the intra/inter/transform/deblk function pointers of +* codec context +* +* @par Description: the current routine initializes the function pointers of +* codec context basing on the architecture in use +* +* @param[in] ps_codec +* Codec context pointer +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void ih264d_init_function_ptr_sse42(dec_struct_t *ps_codec) +{ + ps_codec->pf_default_weighted_pred_luma = ih264_default_weighted_pred_luma_sse42; + ps_codec->pf_default_weighted_pred_chroma = ih264_default_weighted_pred_chroma_sse42; + ps_codec->pf_weighted_pred_luma = ih264_weighted_pred_luma_sse42; + ps_codec->pf_weighted_pred_chroma = ih264_weighted_pred_chroma_sse42; + ps_codec->pf_weighted_bi_pred_luma = ih264_weighted_bi_pred_luma_sse42; + ps_codec->pf_weighted_bi_pred_chroma = ih264_weighted_bi_pred_chroma_sse42; + + ps_codec->pf_iquant_itrans_recon_luma_4x4 = ih264_iquant_itrans_recon_4x4_sse42; + ps_codec->pf_iquant_itrans_recon_chroma_4x4 = ih264_iquant_itrans_recon_chroma_4x4_sse42; + ps_codec->pf_ihadamard_scaling_4x4 = ih264_ihadamard_scaling_4x4_sse42; + return; +} diff --git a/decoder/x86/ih264d_function_selector_ssse3.c b/decoder/x86/ih264d_function_selector_ssse3.c new file mode 100755 index 0000000..1786213 --- /dev/null +++ b/decoder/x86/ih264d_function_selector_ssse3.c @@ -0,0 +1,181 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ih264e_function_selector_generic.c +* +* @brief +* Contains functions to initialize function pointers of codec context +* +* @author +* Ittiam +* +* @par List of Functions: +* - ih264e_init_function_ptr_generic +* +* @remarks +* None +* +******************************************************************************* +*/ + + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System Include files */ +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +/* User Include files */ +#include "ih264_typedefs.h" +#include "iv.h" +#include "ivd.h" +#include "ih264_defs.h" +#include "ih264_size_defs.h" +#include "ih264_error.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include "ih264_inter_pred_filters.h" + +#include "ih264d_structs.h" + + +/** +******************************************************************************* +* +* @brief Initialize the intra/inter/transform/deblk function pointers of +* codec context +* +* @par Description: the current routine initializes the function pointers of +* codec context basing on the architecture in use +* +* @param[in] ps_codec +* Codec context pointer +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void ih264d_init_function_ptr_ssse3(dec_struct_t *ps_codec) +{ + + + + /* Init function pointers for intra pred leaf level functions luma + * Intra 16x16 */ + ps_codec->apf_intra_pred_luma_16x16[0] = ih264_intra_pred_luma_16x16_mode_vert_ssse3; + ps_codec->apf_intra_pred_luma_16x16[1] = ih264_intra_pred_luma_16x16_mode_horz_ssse3; + ps_codec->apf_intra_pred_luma_16x16[2] = ih264_intra_pred_luma_16x16_mode_dc_ssse3; + ps_codec->apf_intra_pred_luma_16x16[3] = ih264_intra_pred_luma_16x16_mode_plane_ssse3; + + /* Init function pointers for intra pred leaf level functions luma + * Intra 4x4 */ + ps_codec->apf_intra_pred_luma_4x4[0] = ih264_intra_pred_luma_4x4_mode_vert_ssse3; + ps_codec->apf_intra_pred_luma_4x4[1] = ih264_intra_pred_luma_4x4_mode_horz_ssse3; + ps_codec->apf_intra_pred_luma_4x4[2] = ih264_intra_pred_luma_4x4_mode_dc_ssse3; + ps_codec->apf_intra_pred_luma_4x4[3] = ih264_intra_pred_luma_4x4_mode_diag_dl_ssse3; + ps_codec->apf_intra_pred_luma_4x4[4] = ih264_intra_pred_luma_4x4_mode_diag_dr_ssse3; + ps_codec->apf_intra_pred_luma_4x4[5] = ih264_intra_pred_luma_4x4_mode_vert_r_ssse3; + ps_codec->apf_intra_pred_luma_4x4[6] = ih264_intra_pred_luma_4x4_mode_horz_d_ssse3; + ps_codec->apf_intra_pred_luma_4x4[7] = ih264_intra_pred_luma_4x4_mode_vert_l_ssse3; + ps_codec->apf_intra_pred_luma_4x4[8] = ih264_intra_pred_luma_4x4_mode_horz_u_ssse3; + + /* Init function pointers for intra pred leaf level functions luma + * Intra 8x8 */ + ps_codec->apf_intra_pred_luma_8x8[0] = ih264_intra_pred_luma_8x8_mode_vert_ssse3; + ps_codec->apf_intra_pred_luma_8x8[1] = ih264_intra_pred_luma_8x8_mode_horz_ssse3; + ps_codec->apf_intra_pred_luma_8x8[2] = ih264_intra_pred_luma_8x8_mode_dc_ssse3; + ps_codec->apf_intra_pred_luma_8x8[3] = ih264_intra_pred_luma_8x8_mode_diag_dl_ssse3; + ps_codec->apf_intra_pred_luma_8x8[4] = ih264_intra_pred_luma_8x8_mode_diag_dr_ssse3; + ps_codec->apf_intra_pred_luma_8x8[5] = ih264_intra_pred_luma_8x8_mode_vert_r_ssse3; + ps_codec->apf_intra_pred_luma_8x8[6] = ih264_intra_pred_luma_8x8_mode_horz_d_ssse3; + ps_codec->apf_intra_pred_luma_8x8[7] = ih264_intra_pred_luma_8x8_mode_vert_l_ssse3; + ps_codec->apf_intra_pred_luma_8x8[8] = ih264_intra_pred_luma_8x8_mode_horz_u_ssse3; + + ps_codec->pf_intra_pred_ref_filtering = ih264_intra_pred_luma_8x8_mode_ref_filtering; + + /* Init function pointers for intra pred leaf level functions chroma + * Intra 8x8 */ + ps_codec->apf_intra_pred_chroma[0] = ih264_intra_pred_chroma_8x8_mode_vert_ssse3; + ps_codec->apf_intra_pred_chroma[1] = ih264_intra_pred_chroma_8x8_mode_horz_ssse3; + ps_codec->apf_intra_pred_chroma[2] = ih264_intra_pred_chroma_8x8_mode_dc; + ps_codec->apf_intra_pred_chroma[3] = ih264_intra_pred_chroma_8x8_mode_plane_ssse3; + + + ps_codec->pf_pad_left_luma = ih264_pad_left_luma_ssse3; + ps_codec->pf_pad_left_chroma = ih264_pad_left_chroma_ssse3; + ps_codec->pf_pad_right_luma = ih264_pad_right_luma_ssse3; + ps_codec->pf_pad_right_chroma = ih264_pad_right_chroma_ssse3; + + + ps_codec->pf_iquant_itrans_recon_luma_4x4 = ih264_iquant_itrans_recon_4x4_ssse3; + ps_codec->pf_iquant_itrans_recon_luma_4x4_dc = ih264_iquant_itrans_recon_4x4_dc_ssse3; + ps_codec->pf_iquant_itrans_recon_luma_8x8 = ih264_iquant_itrans_recon_8x8_ssse3; + ps_codec->pf_iquant_itrans_recon_luma_8x8_dc = ih264_iquant_itrans_recon_8x8_dc_ssse3; + + ps_codec->pf_iquant_itrans_recon_chroma_4x4_dc = ih264_iquant_itrans_recon_chroma_4x4_dc_ssse3; + + /* Init fn ptr luma deblocking */ + ps_codec->pf_deblk_luma_vert_bs4 = ih264_deblk_luma_vert_bs4_ssse3; + ps_codec->pf_deblk_luma_vert_bslt4 = ih264_deblk_luma_vert_bslt4_ssse3; + ps_codec->pf_deblk_luma_vert_bs4_mbaff = ih264_deblk_luma_vert_bs4_mbaff_ssse3; + ps_codec->pf_deblk_luma_vert_bslt4_mbaff = ih264_deblk_luma_vert_bslt4_mbaff_ssse3; + + ps_codec->pf_deblk_luma_horz_bs4 = ih264_deblk_luma_horz_bs4_ssse3; + ps_codec->pf_deblk_luma_horz_bslt4 = ih264_deblk_luma_horz_bslt4_ssse3; + + /* Init fn ptr chroma deblocking */ + ps_codec->pf_deblk_chroma_vert_bs4 = ih264_deblk_chroma_vert_bs4_ssse3; + ps_codec->pf_deblk_chroma_horz_bs4 = ih264_deblk_chroma_horz_bs4_ssse3; + ps_codec->pf_deblk_chroma_vert_bs4_mbaff = ih264_deblk_chroma_vert_bs4_mbaff_ssse3; + ps_codec->pf_deblk_chroma_vert_bslt4 = ih264_deblk_chroma_vert_bslt4_ssse3; + ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4_ssse3; + ps_codec->pf_deblk_chroma_vert_bslt4_mbaff = ih264_deblk_chroma_vert_bslt4_mbaff_ssse3; + + /* Inter pred leaf level functions */ + + ps_codec->apf_inter_pred_luma[0] = ih264_inter_pred_luma_copy_ssse3; + ps_codec->apf_inter_pred_luma[1] = ih264_inter_pred_luma_horz_qpel_ssse3; + ps_codec->apf_inter_pred_luma[2] = ih264_inter_pred_luma_horz_ssse3; + ps_codec->apf_inter_pred_luma[3] = ih264_inter_pred_luma_horz_qpel_ssse3; + ps_codec->apf_inter_pred_luma[4] = ih264_inter_pred_luma_vert_qpel_ssse3; + ps_codec->apf_inter_pred_luma[5] = ih264_inter_pred_luma_horz_qpel_vert_qpel_ssse3; + ps_codec->apf_inter_pred_luma[6] = ih264_inter_pred_luma_horz_hpel_vert_qpel_ssse3; + ps_codec->apf_inter_pred_luma[7] = ih264_inter_pred_luma_horz_qpel_vert_qpel_ssse3; + ps_codec->apf_inter_pred_luma[8] = ih264_inter_pred_luma_vert_ssse3; + ps_codec->apf_inter_pred_luma[9] = ih264_inter_pred_luma_horz_qpel_vert_hpel_ssse3; + ps_codec->apf_inter_pred_luma[10] = ih264_inter_pred_luma_horz_hpel_vert_hpel_ssse3; + ps_codec->apf_inter_pred_luma[11] = ih264_inter_pred_luma_horz_qpel_vert_hpel_ssse3; + ps_codec->apf_inter_pred_luma[12] = ih264_inter_pred_luma_vert_qpel_ssse3; + ps_codec->apf_inter_pred_luma[13] = ih264_inter_pred_luma_horz_qpel_vert_qpel_ssse3; + ps_codec->apf_inter_pred_luma[14] = ih264_inter_pred_luma_horz_hpel_vert_qpel_ssse3; + ps_codec->apf_inter_pred_luma[15] = ih264_inter_pred_luma_horz_qpel_vert_qpel_ssse3; + + ps_codec->pf_inter_pred_chroma = ih264_inter_pred_chroma_ssse3; + + + return; +} diff --git a/encoder.arm.mk b/encoder.arm.mk new file mode 100755 index 0000000..81ed22f --- /dev/null +++ b/encoder.arm.mk @@ -0,0 +1,47 @@ +libavce_inc_dir_arm += $(LOCAL_PATH)/encoder/arm +libavce_inc_dir_arm += $(LOCAL_PATH)/common/arm + +libavce_cflags_arm += -DDISABLE_NEONINTR -DARM -DARMGCC + +libavce_srcs_c_arm += encoder/arm/ih264e_function_selector.c + +ifeq ($(ARCH_ARM_HAVE_NEON),true) +libavce_srcs_c_arm += encoder/arm/ih264e_function_selector_a9q.c + +libavce_srcs_asm_arm += common/arm/ih264_resi_trans_quant_a9.s +libavce_srcs_asm_arm += common/arm/ih264_iquant_itrans_recon_a9.s +libavce_srcs_asm_arm += common/arm/ih264_iquant_itrans_recon_dc_a9.s +libavce_srcs_asm_arm += common/arm/ih264_ihadamard_scaling_a9.s +libavce_srcs_asm_arm += common/arm/ih264_deblk_chroma_a9.s +libavce_srcs_asm_arm += common/arm/ih264_deblk_luma_a9.s +libavce_srcs_asm_arm += common/arm/ih264_intra_pred_chroma_a9q.s +libavce_srcs_asm_arm += common/arm/ih264_intra_pred_luma_16x16_a9q.s +libavce_srcs_asm_arm += common/arm/ih264_intra_pred_luma_4x4_a9q.s +libavce_srcs_asm_arm += common/arm/ih264_intra_pred_luma_8x8_a9q.s +libavce_srcs_asm_arm += common/arm/ih264_inter_pred_chroma_a9q.s +libavce_srcs_asm_arm += common/arm/ih264_inter_pred_filters_luma_horz_a9q.s +libavce_srcs_asm_arm += common/arm/ih264_inter_pred_filters_luma_vert_a9q.s +libavce_srcs_asm_arm += common/arm/ih264_inter_pred_luma_bilinear_a9q.s +libavce_srcs_asm_arm += common/arm/ih264_inter_pred_luma_copy_a9q.s +libavce_srcs_asm_arm += common/arm/ih264_padding_neon.s +libavce_srcs_asm_arm += common/arm/ih264_mem_fns_neon.s +libavce_srcs_asm_arm += common/arm/ih264_arm_memory_barrier.s + +libavce_srcs_asm_arm += encoder/arm/ih264e_evaluate_intra16x16_modes_a9q.s +libavce_srcs_asm_arm += encoder/arm/ih264e_evaluate_intra4x4_modes_a9q.s +libavce_srcs_asm_arm += encoder/arm/ih264e_evaluate_intra_chroma_modes_a9q.s +libavce_srcs_asm_arm += encoder/arm/ih264e_half_pel.s +libavce_srcs_asm_arm += encoder/arm/ih264e_fmt_conv.s + +#ME +libavce_srcs_asm_arm += encoder/arm/ime_distortion_metrics_a9q.s + +libavce_cflags_arm += -DDEFAULT_ARCH=D_ARCH_ARM_A9Q + +else #No Neon +libavce_cflags_arm += -DDISABLE_NEON -DDEFAULT_ARCH=D_ARCH_ARM_NONEON +endif #Neon check + +LOCAL_SRC_FILES_arm += $(libavce_srcs_c_arm) $(libavce_srcs_asm_arm) +LOCAL_C_INCLUDES_arm += $(libavce_inc_dir_arm) +LOCAL_CFLAGS_arm += $(libavce_cflags_arm) diff --git a/encoder.arm64.mk b/encoder.arm64.mk new file mode 100755 index 0000000..845b481 --- /dev/null +++ b/encoder.arm64.mk @@ -0,0 +1,48 @@ +libavce_cflags_arm64 += -DARMV8 +libavce_cflags_arm64 += -DDISABLE_NEONINTR -DARM -DARMGCC + +libavce_inc_dir_arm64 += $(LOCAL_PATH)/encoder/arm +libavce_inc_dir_arm64 += $(LOCAL_PATH)/encoder/armv8 +libavce_inc_dir_arm64 += $(LOCAL_PATH)/common/armv8 + +libavce_srcs_c_arm64 += encoder/arm/ih264e_function_selector.c + +ifeq ($(ARCH_ARM_HAVE_NEON),true) +libavce_srcs_c_arm64 += encoder/arm/ih264e_function_selector_av8.c + +libavce_srcs_asm_arm64 += common/armv8/ih264_resi_trans_quant_av8.s +libavce_srcs_asm_arm64 += common/armv8/ih264_iquant_itrans_recon_av8.s +libavce_srcs_asm_arm64 += common/armv8/ih264_iquant_itrans_recon_dc_av8.s +libavce_srcs_asm_arm64 += common/armv8/ih264_ihadamard_scaling_av8.s + +libavce_srcs_asm_arm64 += common/armv8/ih264_intra_pred_chroma_av8.s +libavce_srcs_asm_arm64 += common/armv8/ih264_intra_pred_luma_16x16_av8.s +libavce_srcs_asm_arm64 += common/armv8/ih264_intra_pred_luma_4x4_av8.s +libavce_srcs_asm_arm64 += common/armv8/ih264_intra_pred_luma_8x8_av8.s +libavce_srcs_asm_arm64 += common/armv8/ih264_inter_pred_luma_copy_av8.s +libavce_srcs_asm_arm64 += common/armv8/ih264_inter_pred_chroma_av8.s +libavce_srcs_asm_arm64 += common/armv8/ih264_inter_pred_filters_luma_horz_av8.s +libavce_srcs_asm_arm64 += common/armv8/ih264_inter_pred_filters_luma_vert_av8.s +libavce_srcs_asm_arm64 += common/armv8/ih264_padding_neon_av8.s +libavce_srcs_asm_arm64 += common/armv8/ih264_mem_fns_neon_av8.s +libavce_srcs_asm_arm64 += common/armv8/ih264_deblk_luma_av8.s +libavce_srcs_asm_arm64 += common/armv8/ih264_deblk_chroma_av8.s + +libavce_srcs_asm_arm64 += encoder/armv8/ih264e_evaluate_intra16x16_modes_av8.s +libavce_srcs_asm_arm64 += encoder/armv8/ih264e_evaluate_intra_chroma_modes_av8.s +libavce_srcs_asm_arm64 += encoder/armv8/ih264e_half_pel_av8.s + +#ME +libavce_srcs_asm_arm64 += encoder/armv8/ime_distortion_metrics_av8.s + +libavce_cflags_arm += -DDEFAULT_ARCH=D_ARCH_ARMV8_GENERIC +else +libavce_cflags_arm64 += -DDISABLE_NEON -DDEFAULT_ARCH=D_ARCH_ARM_NONEON +endif + + + + +LOCAL_SRC_FILES_arm64 += $(libavce_srcs_c_arm64) $(libavce_srcs_asm_arm64) +LOCAL_C_INCLUDES_arm64 += $(libavce_inc_dir_arm64) +LOCAL_CFLAGS_arm64 += $(libavce_cflags_arm64) diff --git a/encoder.mips.mk b/encoder.mips.mk new file mode 100755 index 0000000..92ae5de --- /dev/null +++ b/encoder.mips.mk @@ -0,0 +1,7 @@ +libavce_inc_dir_mips += $(LOCAL_PATH)/common/mips +libavce_inc_dir_mips += $(LOCAL_PATH)/encoder/mips + +libavce_srcs_c_mips += encoder/mips/ih264e_function_selector.c + +LOCAL_C_INCLUDES_mips += $(libavce_inc_dir_mips) +LOCAL_SRC_FILES_mips += $(libavce_srcs_c_mips) diff --git a/encoder.mips64.mk b/encoder.mips64.mk new file mode 100755 index 0000000..5181fd9 --- /dev/null +++ b/encoder.mips64.mk @@ -0,0 +1,7 @@ +libavce_inc_dir_mips64 += $(LOCAL_PATH)/common/mips +libavce_inc_dir_mips64 += $(LOCAL_PATH)/encoder/mips + +libavce_srcs_c_mips64 += encoder/mips/ih264e_function_selector.c + +LOCAL_C_INCLUDES_mips64 += $(libavce_inc_dir_mips) +LOCAL_SRC_FILES_mips64 += $(libavce_srcs_c_mips) diff --git a/encoder.mk b/encoder.mk new file mode 100755 index 0000000..5829118 --- /dev/null +++ b/encoder.mk @@ -0,0 +1,90 @@ +LOCAL_PATH := $(call my-dir) +include $(CLEAR_VARS) + +libavce_source_dir := $(LOCAL_PATH) + +## Arch-common settings +LOCAL_MODULE := libavcenc +#LOCAL_32_BIT_ONLY := true + +LOCAL_MODULE_CLASS := STATIC_LIBRARIES + +LOCAL_CFLAGS += -D_LIB -DMULTICORE -DANDROID -DNDEBUG -UHP_PL -DN_MB_ENABLE -URC_FIXED_POINT -fPIC +LOCAL_CFLAGS += -O3 -DANDROID + +LOCAL_C_INCLUDES := $(LOCAL_PATH)/encoder $(LOCAL_PATH)/common + +libavce_srcs_c += common/ih264_resi_trans_quant.c +libavce_srcs_c += common/ih264_iquant_itrans_recon.c +libavce_srcs_c += common/ih264_ihadamard_scaling.c +libavce_srcs_c += common/ih264_inter_pred_filters.c +libavce_srcs_c += common/ih264_luma_intra_pred_filters.c +libavce_srcs_c += common/ih264_chroma_intra_pred_filters.c +libavce_srcs_c += common/ih264_padding.c +libavce_srcs_c += common/ih264_mem_fns.c +libavce_srcs_c += common/ih264_deblk_edge_filters.c +libavce_srcs_c += common/ih264_deblk_tables.c +libavce_srcs_c += common/ih264_cavlc_tables.c +libavce_srcs_c += common/ih264_cabac_tables.c +libavce_srcs_c += common/ih264_common_tables.c +libavce_srcs_c += common/ih264_trans_data.c +libavce_srcs_c += common/ih264_buf_mgr.c +libavce_srcs_c += common/ih264_dpb_mgr.c +libavce_srcs_c += common/ih264_list.c + + +libavce_srcs_c += common/ithread.c + +libavce_srcs_c += encoder/ih264e_globals.c +libavce_srcs_c += encoder/ih264e_intra_modes_eval.c +libavce_srcs_c += encoder/ih264e_half_pel.c +libavce_srcs_c += encoder/ih264e_mc.c +libavce_srcs_c += encoder/ih264e_me.c +libavce_srcs_c += encoder/ih264e_rc_mem_interface.c +libavce_srcs_c += encoder/ih264e_time_stamp.c +libavce_srcs_c += encoder/ih264e_modify_frm_rate.c +libavce_srcs_c += encoder/ih264e_rate_control.c +libavce_srcs_c += encoder/ih264e_core_coding.c +libavce_srcs_c += encoder/ih264e_deblk.c +libavce_srcs_c += encoder/ih264e_api.c +libavce_srcs_c += encoder/ih264e_process.c +libavce_srcs_c += encoder/ih264e_encode.c +libavce_srcs_c += encoder/ih264e_utils.c +libavce_srcs_c += encoder/ih264e_version.c +libavce_srcs_c += encoder/ih264e_bitstream.c +libavce_srcs_c += encoder/ih264e_cavlc.c +libavce_srcs_c += encoder/ih264e_encode_header.c +libavce_srcs_c += encoder/ih264e_function_selector_generic.c +libavce_srcs_c += encoder/ih264e_fmt_conv.c + +#Rate Control +libavce_srcs_c += encoder/irc_rate_control_api.c +libavce_srcs_c += encoder/irc_bit_allocation.c +libavce_srcs_c += encoder/irc_cbr_buffer_control.c +libavce_srcs_c += encoder/irc_est_sad.c +libavce_srcs_c += encoder/irc_fixed_point_error_bits.c +libavce_srcs_c += encoder/irc_frame_info_collector.c +libavce_srcs_c += encoder/irc_mb_model_based.c +libavce_srcs_c += encoder/irc_picture_type.c +libavce_srcs_c += encoder/irc_rd_model.c +libavce_srcs_c += encoder/irc_vbr_storage_vbv.c +libavce_srcs_c += encoder/irc_vbr_str_prms.c + +#ME files +libavce_srcs_c += encoder/ime.c +libavce_srcs_c += encoder/ime_distortion_metrics.c + + + +LOCAL_SRC_FILES := $(libavce_srcs_c) $(libavce_srcs_asm) + + +# Load the arch-specific settings +include $(LOCAL_PATH)/encoder.arm.mk +include $(LOCAL_PATH)/encoder.arm64.mk +include $(LOCAL_PATH)/encoder.x86.mk +include $(LOCAL_PATH)/encoder.x86_64.mk +include $(LOCAL_PATH)/encoder.mips.mk +include $(LOCAL_PATH)/encoder.mips64.mk + +include $(BUILD_STATIC_LIBRARY) diff --git a/encoder.x86.mk b/encoder.x86.mk new file mode 100755 index 0000000..e9b6a5f --- /dev/null +++ b/encoder.x86.mk @@ -0,0 +1,37 @@ +libavce_cflags_x86 += -DX86 -DDISABLE_AVX2 -m32 -msse4.2 -mno-avx -DDEFAULT_ARCH=D_ARCH_X86_SSE42 + +libavce_inc_dir_x86 += $(LOCAL_PATH)/encoder/x86 +libavce_inc_dir_x86 += $(LOCAL_PATH)/common/x86 + +libavce_srcs_c_x86 += encoder/x86/ih264e_function_selector.c +libavce_srcs_c_x86 += encoder/x86/ih264e_function_selector_sse42.c +libavce_srcs_c_x86 += encoder/x86/ih264e_function_selector_ssse3.c + +libavce_srcs_c_x86 += common/x86/ih264_iquant_itrans_recon_ssse3.c +libavce_srcs_c_x86 += common/x86/ih264_iquant_itrans_recon_dc_ssse3.c +libavce_srcs_c_x86 += common/x86/ih264_ihadamard_scaling_ssse3.c +libavce_srcs_c_x86 += common/x86/ih264_inter_pred_filters_ssse3.c +libavce_srcs_c_x86 += common/x86/ih264_mem_fns_ssse3.c +libavce_srcs_c_x86 += common/x86/ih264_padding_ssse3.c +libavce_srcs_c_x86 += common/x86/ih264_luma_intra_pred_filters_ssse3.c +libavce_srcs_c_x86 += common/x86/ih264_chroma_intra_pred_filters_ssse3.c +libavce_srcs_c_x86 += common/x86/ih264_deblk_chroma_ssse3.c +libavce_srcs_c_x86 += common/x86/ih264_deblk_luma_ssse3.c +libavce_srcs_c_x86 += common/x86/ih264_iquant_itrans_recon_sse42.c +libavce_srcs_c_x86 += common/x86/ih264_ihadamard_scaling_sse42.c +libavce_srcs_c_x86 += common/x86/ih264_resi_trans_quant_sse42.c +libavce_srcs_c_x86 += common/x86/ih264_weighted_pred_sse42.c + +libavce_srcs_c_x86 += encoder/x86/ih264e_half_pel_ssse3.c +libavce_srcs_c_x86 += encoder/x86/ih264e_intra_modes_eval_ssse3.c +libavce_srcs_c_x86 += encoder/x86/ime_distortion_metrics_sse42.c + + + + + + +LOCAL_SRC_FILES_x86 += $(libavce_srcs_c_x86) $(libavce_srcs_asm_x86) +LOCAL_C_INCLUDES_x86 += $(libavce_inc_dir_x86) +LOCAL_CFLAGS_x86 += $(libavce_cflags_x86) + diff --git a/encoder.x86_64.mk b/encoder.x86_64.mk new file mode 100755 index 0000000..deb004b --- /dev/null +++ b/encoder.x86_64.mk @@ -0,0 +1,35 @@ +libavce_cflags_x86_64 += -DX86 -DDISABLE_AVX2 -m64 -msse4.2 -mno-avx -DDEFAULT_ARCH=D_ARCH_X86_SSE42 + +libavce_inc_dir_x86_64 += $(LOCAL_PATH)/encoder/x86 +libavce_inc_dir_x86_64 += $(LOCAL_PATH)/common/x86 + +libavce_srcs_c_x86_64 += encoder/x86/ih264e_function_selector.c +libavce_srcs_c_x86_64 += encoder/x86/ih264e_function_selector_sse42.c +libavce_srcs_c_x86_64 += encoder/x86/ih264e_function_selector_ssse3.c + +libavce_srcs_c_x86_64 += common/x86/ih264_iquant_itrans_recon_ssse3.c +libavce_srcs_c_x86_64 += common/x86/ih264_iquant_itrans_recon_dc_ssse3.c +libavce_srcs_c_x86_64 += common/x86/ih264_ihadamard_scaling_ssse3.c +libavce_srcs_c_x86_64 += common/x86/ih264_inter_pred_filters_ssse3.c +libavce_srcs_c_x86_64 += common/x86/ih264_mem_fns_ssse3.c +libavce_srcs_c_x86_64 += common/x86/ih264_padding_ssse3.c +libavce_srcs_c_x86_64 += common/x86/ih264_luma_intra_pred_filters_ssse3.c +libavce_srcs_c_x86_64 += common/x86/ih264_chroma_intra_pred_filters_ssse3.c +libavce_srcs_c_x86_64 += common/x86/ih264_deblk_chroma_ssse3.c +libavce_srcs_c_x86_64 += common/x86/ih264_deblk_luma_ssse3.c +libavce_srcs_c_x86_64 += common/x86/ih264_iquant_itrans_recon_sse42.c +libavce_srcs_c_x86_64 += common/x86/ih264_ihadamard_scaling_sse42.c +libavce_srcs_c_x86_64 += common/x86/ih264_resi_trans_quant_sse42.c +libavce_srcs_c_x86_64 += common/x86/ih264_weighted_pred_sse42.c + +libavce_srcs_c_x86_64 += encoder/x86/ih264e_half_pel_ssse3.c +libavce_srcs_c_x86_64 += encoder/x86/ih264e_intra_modes_eval_ssse3.c +libavce_srcs_c_x86_64 += encoder/x86/ime_distortion_metrics_sse42.c + + +LOCAL_SRC_FILES_x86_64 += $(libavce_srcs_c_x86_64) $(libavce_srcs_asm_x86_64) +LOCAL_C_INCLUDES_x86_64 += $(libavce_inc_dir_x86_64) +LOCAL_CFLAGS_x86_64 += $(libavce_cflags_x86_64) + + + diff --git a/encoder/arm/ih264e_evaluate_intra16x16_modes_a9q.s b/encoder/arm/ih264e_evaluate_intra16x16_modes_a9q.s new file mode 100755 index 0000000..fe0ce17 --- /dev/null +++ b/encoder/arm/ih264e_evaluate_intra16x16_modes_a9q.s @@ -0,0 +1,313 @@ +@/****************************************************************************** +@ * +@ * Copyright (C) 2015 The Android Open Source Project +@ * +@ * Licensed under the Apache License, Version 2.0 (the "License"); +@ * you may not use this file except in compliance with the License. +@ * You may obtain a copy of the License at: +@ * +@ * http://www.apache.org/licenses/LICENSE-2.0 +@ * +@ * Unless required by applicable law or agreed to in writing, software +@ * distributed under the License is distributed on an "AS IS" BASIS, +@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ * See the License for the specific language governing permissions and +@ * limitations under the License. +@ * +@ ***************************************************************************** +@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +@*/ +@/** + +@/** +@****************************************************************************** +@* +@* @brief :Evaluate best intra 16x16 mode (among VERT, HORZ and DC ) +@* and do the prediction. +@* +@* @par Description +@* This function evaluates first three 16x16 modes and compute corresponding sad +@* and return the buffer predicted with best mode. +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@** @param[in] pu1_ngbr_pels_i16 +@* UWORD8 pointer to neighbouring pels +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@* @param[in] u4_n_avblty +@* availability of neighbouring pixels +@* +@* @param[in] u4_intra_mode +@* Pointer to the variable in which best mode is returned +@* +@* @param[in] pu4_sadmin +@* Pointer to the variable in which minimum sad is returned +@* +@* @param[in] u4_valid_intra_modes +@* Says what all modes are valid +@* +@* +@* @return none +@* +@****************************************************************************** +@*/ +@ +@void ih264e_evaluate_intra16x16_modes(UWORD8 *pu1_src, +@ UWORD8 *pu1_ngbr_pels_i16, +@ UWORD8 *pu1_dst, +@ UWORD32 src_strd, +@ UWORD32 dst_strd, +@ WORD32 u4_n_avblty, +@ UWORD32 *u4_intra_mode, +@ WORD32 *pu4_sadmin, +@ UWORD32 u4_valid_intra_modes) +@ +.text +.p2align 2 + + .global ih264e_evaluate_intra16x16_modes_a9q + +ih264e_evaluate_intra16x16_modes_a9q: + +@r0 = pu1_src, +@r1 = pu1_ngbr_pels_i16, +@r2 = pu1_dst, +@r3 = src_strd, +@r4 = dst_strd, +@r5 = u4_n_avblty, +@r6 = u4_intra_mode, +@r7 = pu4_sadmin + + + + stmfd sp!, {r4-r12, r14} @store register values to stack + ldr r5, [sp, #44] + + + vpush {d8-d15} + vld1.32 {q4}, [r1]! + sub r6, r1, #1 + add r1, r1, #1 + mov r10, #0 + vld1.32 {q5}, [r1]! + mov r11, #0 + mov r4, #0 + @/* Left available ???? + ands r7, r5, #01 + movne r10, #1 + + @/* Top available ???? + ands r8, r5, #04 + lsl r9, r10, #3 + movne r11, #1 + lsl r12, r11, #3 + adds r8, r9, r12 + + + @/* None available :( + moveq r4, #128 + + + +@/fINDING dc val*/ + @---------------------- + vaddl.u8 q15, d8, d9 + + vaddl.u8 q14, d10, d11 + + vadd.u16 q15, q14, q15 + @ VLD1.32 {q2},[r0],r3;row 2 + vadd.u16 d30, d31, d30 + vpadd.u16 d30, d30 + @ VLD1.32 {q3},[r0],r3 ;row 3 + vpadd.u16 d30, d30 + @--------------------- + + + vmov.u16 r7, d30[0] + add r7, r7, r8 + add r11, r11, #3 + add r8, r10, r11 + + lsr r7, r8 + add r7, r4, r7 + vld1.32 {q0}, [r0], r3 @ source r0w 0 + vdup.8 q15, r7 @dc val + +@/* computing SADs for all three modes*/ + ldrb r7, [r6] + vdup.8 q10, r7 @/HORIZONTAL VALUE ROW=0; + @/vertical row 0; + vabdl.u8 q8, d0, d10 + vabdl.u8 q9, d1, d11 + sub r6, r6, #1 + @/HORZ row 0; + vabdl.u8 q13, d0, d20 + vabdl.u8 q14, d1, d21 + mov r1, #15 + @/dc row 0; + vabdl.u8 q11, d0, d30 + vabdl.u8 q12, d1, d31 + + +loop: + vld1.32 {q1}, [r0], r3 @row i + @/dc row i; + vabal.u8 q11, d2, d30 + ldrb r7, [r6] + vabal.u8 q12, d3, d31 + + @/vertical row i; + vabal.u8 q8, d2, d10 + vdup.8 q10, r7 @/HORIZONTAL VALUE ROW=i; + sub r6, r6, #1 + vabal.u8 q9, d3, d11 + + subs r1, r1, #1 + @/HORZ row i; + vabal.u8 q13, d2, d20 + vabal.u8 q14, d3, d21 + bne loop + + @------------------------------------------------------------------------------ + + vadd.i16 q9, q9, q8 @/VERT + vadd.i16 d18, d19, d18 @/VERT + vpaddl.u16 d18, d18 @/VERT + vadd.i16 q14, q13, q14 @/HORZ + vadd.i16 d28, d29, d28 @/HORZ + vpaddl.u32 d18, d18 @/VERT + vpaddl.u16 d28, d28 @/HORZ + + vpaddl.u32 d28, d28 @/HORZ + vmov.u32 r8, d18[0] @ vert + vadd.i16 q12, q11, q12 @/DC + vmov.u32 r9, d28[0] @horz + mov r11, #1 + vadd.i16 d24, d24, d25 @/DC + lsl r11 , #30 + + @----------------------- + ldr r0, [sp, #120] @ u4_valid_intra_modes + @-------------------------------------------- + ands r7, r0, #01 @ vert mode valid???????????? + moveq r8, r11 + vpaddl.u16 d24, d24 @/DC + + ands r6, r0, #02 @ horz mode valid???????????? + moveq r9, r11 + vpaddl.u32 d24, d24 @/DC + + vmov.u32 r10, d24[0] @dc +@-------------------------------- + ldr r4, [sp, #104] @r4 = dst_strd, + ldr r7, [sp, #116] @r7 = pu4_sadmin +@---------------------------------------------- + ands r6, r0, #04 @ dc mode valid???????????? + moveq r10, r11 + + @--------------------------- + ldr r6, [sp, #112] @ R6 =MODE + @-------------------------- + + cmp r8, r9 + bgt not_vert + cmp r8, r10 + bgt do_dc + + @/---------------------- + @DO VERTICAL PREDICTION + str r8 , [r7] @MIN SAD + mov r8, #0 + str r8 , [r6] @ MODE + vmov q15, q5 + + b do_dc_vert + @----------------------------- +not_vert: + cmp r9, r10 + bgt do_dc + + @/---------------------- + @DO HORIZONTAL + vdup.8 q5, d9[7] @0 + str r9 , [r7] @MIN SAD + vdup.8 q6, d9[6] @1 + mov r9, #1 + vdup.8 q7, d9[5] @2 + vst1.32 {d10, d11} , [r2], r4 @0 + vdup.8 q8, d9[4] @3 + str r9 , [r6] @ MODE + vdup.8 q9, d9[3] @4 + vst1.32 {d12, d13} , [r2], r4 @1 + vdup.8 q10, d9[2] @5 + vst1.32 {d14, d15} , [r2], r4 @2 + vdup.8 q11, d9[1] @6 + vst1.32 {d16, d17} , [r2], r4 @3 + vdup.8 q12, d9[0] @7 + vst1.32 {d18, d19} , [r2], r4 @4 + vdup.8 q13, d8[7] @8 + vst1.32 {d20, d21} , [r2], r4 @5 + vdup.8 q14, d8[6] @9 + vst1.32 {d22, d23} , [r2], r4 @6 + vdup.8 q15, d8[5] @10 + vst1.32 {d24, d25} , [r2], r4 @7 + vdup.8 q1, d8[4] @11 + vst1.32 {d26, d27} , [r2], r4 @8 + vdup.8 q2, d8[3] @12 + vst1.32 {d28, d29} , [r2], r4 @9 + vdup.8 q3, d8[2] @13 + vst1.32 {d30, d31}, [r2], r4 @10 + vdup.8 q5, d8[1] @14 + vst1.32 {d2, d3} , [r2], r4 @11 + vdup.8 q6, d8[0] @15 + vst1.32 {d4, d5} , [r2], r4 @12 + + vst1.32 {d6, d7} , [r2], r4 @13 + + vst1.32 {d10, d11} , [r2], r4 @14 + + vst1.32 {d12, d13} , [r2], r4 @15 + b end_func + + + @/----------------------------- + +do_dc: @/--------------------------------- + @DO DC + str r10 , [r7] @MIN SAD + mov r10, #2 + str r10 , [r6] @ MODE +do_dc_vert: + vst1.32 {d30, d31}, [r2], r4 @0 + vst1.32 {d30, d31}, [r2], r4 @1 + vst1.32 {d30, d31}, [r2], r4 @2 + vst1.32 {d30, d31}, [r2], r4 @3 + vst1.32 {d30, d31}, [r2], r4 @4 + vst1.32 {d30, d31}, [r2], r4 @5 + vst1.32 {d30, d31}, [r2], r4 @6 + vst1.32 {d30, d31}, [r2], r4 @7 + vst1.32 {d30, d31}, [r2], r4 @8 + vst1.32 {d30, d31}, [r2], r4 @9 + vst1.32 {d30, d31}, [r2], r4 @10 + vst1.32 {d30, d31}, [r2], r4 @11 + vst1.32 {d30, d31}, [r2], r4 @12 + vst1.32 {d30, d31}, [r2], r4 @13 + vst1.32 {d30, d31}, [r2], r4 @14 + vst1.32 {d30, d31}, [r2], r4 @15 + @/------------------ +end_func: + vpop {d8-d15} + ldmfd sp!, {r4-r12, pc} @Restoring registers from stack + + diff --git a/encoder/arm/ih264e_evaluate_intra4x4_modes_a9q.s b/encoder/arm/ih264e_evaluate_intra4x4_modes_a9q.s new file mode 100755 index 0000000..568e623 --- /dev/null +++ b/encoder/arm/ih264e_evaluate_intra4x4_modes_a9q.s @@ -0,0 +1,529 @@ +@/****************************************************************************** +@ * +@ * Copyright (C) 2015 The Android Open Source Project +@ * +@ * Licensed under the Apache License, Version 2.0 (the "License"); +@ * you may not use this file except in compliance with the License. +@ * You may obtain a copy of the License at: +@ * +@ * http://www.apache.org/licenses/LICENSE-2.0 +@ * +@ * Unless required by applicable law or agreed to in writing, software +@ * distributed under the License is distributed on an "AS IS" BASIS, +@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ * See the License for the specific language governing permissions and +@ * limitations under the License. +@ * +@ ***************************************************************************** +@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +@*/ +@/** + +@/** + +.data +.p2align 2 + +scratch_intrapred_luma_4x4_prediction: + .long ver, hor, d_c, dia_dl + .long dia_dr, ver_r, hor_d, ver_l + .long hor_u + + +.text +.p2align 2 + +scratch_intrapred_luma_4x4_prediction_addr1: + .long scratch_intrapred_luma_4x4_prediction - scrintra_4x4 - 8 + + + +@/** +@/** +@****************************************************************************** +@* +@* @brief :Evaluate best intra 4x4 mode +@* and do the prediction. +@* +@* @par Description +@* This function evaluates 4x4 modes and compute corresponding sad +@* and return the buffer predicted with best mode. +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@** @param[in] pu1_ngbr_pels +@* UWORD8 pointer to neighbouring pels +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@* @param[in] u4_n_avblty +@* availability of neighbouring pixels +@* +@* @param[in] u4_intra_mode +@* Pointer to the variable in which best mode is returned +@* +@* @param[in] pu4_sadmin +@* Pointer to the variable in which minimum cost is returned +@* +@* @param[in] u4_valid_intra_modes +@* Says what all modes are valid +@* +@* * @param[in] u4_lambda +@* Lamda value for computing cost from SAD +@* +@* @param[in] u4_predictd_mode +@* Predicted mode for cost computation +@* +@* +@* +@* @return none +@* +@****************************************************************************** +@*/ +@void ih264e_evaluate_intra_4x4_modes(UWORD8 *pu1_src, +@ UWORD8 *pu1_ngbr_pels, +@ UWORD8 *pu1_dst, +@ UWORD32 src_strd, +@ UWORD32 dst_strd, +@ WORD32 u4_n_avblty, +@ UWORD32 *u4_intra_mode, +@ WORD32 *pu4_sadmin, +@ UWORD32 u4_valid_intra_modes, +@ UWORD32 u4_lambda, +@ UWORD32 u4_predictd_mode) + + + + .global ih264e_evaluate_intra_4x4_modes_a9q + +ih264e_evaluate_intra_4x4_modes_a9q: + +@r0 = pu1_src, +@r1 = pu1_ngbr_pels_i16, +@r2 = pu1_dst, +@r3 = src_strd, +@r4 = dst_strd, +@r5 = u4_n_avblty, +@r6 = u4_intra_mode, +@r7 = pu4_sadmin +@r8 = u4_valid_intra_modes +@r0 =u4_lambda +@r1 = u4_predictd_mode + + + stmfd sp!, {r4-r12, r14} @store register values to stack + +@-------------------- + ldr r5, [sp, #44] @r5 = u4_n_avblty, +@---------------------- + vpush {d8-d15} +@Loading neighbours + vld1.32 {q0}, [r1] + add r4, r1, #12 + vld1.8 d1[5], [r4] + vld1.8 d1[7], [r1] + @-------------------------------- + ldr r8, [sp, #120] @u4_valid_intra_modes +@---------------------------------------------- + + + +@ LOADING pu1_src + vld1.32 {d20[0]}, [r0], r3 + vext.8 q1, q0, q0, #1 + vld1.32 {d20[1]}, [r0], r3 + mov r11, #1 + vld1.32 {d21[0]}, [r0], r3 + lsl r11, r11, #30 + vld1.32 {d21[1]}, [r0], r3 + + + +@-------------------------------- + ldr r0, [sp, #124] @r0 =u4_lambda + ldr r1, [sp, #128] @r1 = u4_predictd_mode +@------ + + +vert: + ands r10, r8, #01 @VERT sad ?? + beq horz + vdup.32 q2, d2[1] + vabdl.u8 q14, d4, d20 + vabal.u8 q14, d4, d21 + vadd.i16 d28, d29, d28 + subs r6, r1, #0 + vpaddl.u16 d28, d28 @ + lslne r6, r0, #2 + vpaddl.u32 d28, d28 @/ + moveq r6, r0 @ + vmov.u32 r9, d28[0] @ vert + add r9, r6, r9 + + subs r6, r11, r9 + movgt r11, r9 + movgt r12, #0 + +horz: + ands r10, r8, #02 @HORZ sad ?? + beq dc + vdup.32 q3, d0[0] + vmov.32 q4, q3 + vtrn.8 q3, q4 + vtrn.16 d7, d6 + vtrn.16 d9, d8 + vtrn.32 d9, d7 + vtrn.32 d8, d6 + vabdl.u8 q14, d6, d20 + subs r6, r1, #1 + vabal.u8 q14, d7, d21 + vadd.i16 d28, d29, d28 + lslne r6, r0, #2 + vpaddl.u16 d28, d28 @ + vpaddl.u32 d28, d28 @/ + vmov.u32 r9, d28[0] @ + moveq r6, r0 @ + add r9, r6, r9 + + subs r6, r11, r9 + movgt r11, r9 + movgt r12, #1 + +dc: + ands r10, r8, #04 @DC sad ?? + beq diags + vext.8 q4, q0, q0, #5 + vaddl.u8 q4, d0, d8 + vpaddl.u16 d8, d8 @ + vpaddl.u32 d8, d8 @/ + vmov.u32 r4, d8[0] @ + mov r14, #1 + ands r10, r5, #1 + addne r4, r4, #2 + addne r14, r14, #1 + ands r10, r5, #4 + addne r4, r4, #2 + addne r14, r14, #1 + ands r10, r5, #5 + moveq r4, #128 + moveq r14, #0 + subs r6, r1, #2 + lsr r4, r4, r14 + vdup.8 q4, r4 + lslne r6, r0, #2 + vabdl.u8 q14, d8, d20 + vabal.u8 q14, d9, d21 + vadd.i16 d28, d29, d28 + vpaddl.u16 d28, d28 @ + vpaddl.u32 d28, d28 @/ + vmov.u32 r9, d28[0] @ + + moveq r6, r0 @ + add r9, r6, r9 + + subs r6, r11, r9 + movgt r11, r9 + movgt r12, #2 + +diags: + ands r10, r8, #504 @/* if modes other than VERT, HORZ and DC are valid ????*/ + beq pred + @/* Performing FILT11 and FILT121 operation for all neighbour values*/ + vext.8 q5, q0, q0, #2 + vaddl.u8 q6, d0, d2 + vaddl.u8 q7, d1, d3 + vaddl.u8 q8, d10, d2 + vaddl.u8 q9, d11, d3 + vadd.u16 q12, q10, q11 + vqrshrun.s16 d10, q6, #1 + vqrshrun.s16 d11, q7, #1 + vadd.u16 q11, q6, q8 + vadd.u16 q12, q7, q9 + vqrshrun.s16 d12, q11, #2 + vqrshrun.s16 d13, q12, #2 + mov r14, #0 + vdup.32 q13 , r14 + mov r14, #-1 + vmov.i32 d26[0], r14 + +diag_dl: + ands r10, r8, #0x08 @DIAG_DL sad ?? + beq diag_dr + + vext.8 q15, q6, q6, #5 + vbit.32 d14, d30, d26 + vext.8 q15, q6, q6, #15 + vbit.32 d15, d31, d26 + vext.8 q15, q6, q6, #2 + vext.32 q14, q13, q13, #3 + vbit.32 d14, d30, d28 + vext.8 q15, q6, q6, #4 + vbit.32 d15, d30, d28 + vabdl.u8 q14, d14, d20 + subs r6, r1, #3 + vabal.u8 q14, d15, d21 + vadd.i16 d28, d29, d28 + vpaddl.u16 d28, d28 @ + lslne r6, r0, #2 + vpaddl.u32 d28, d28 @/ + vmov.u32 r9, d28[0] @ + + moveq r6, r0 @ + add r9, r6, r9 + + subs r6, r11, r9 + movgt r11, r9 + movgt r12, #3 + +diag_dr: + ands r10, r8, #16 @DIAG_DR sad ?? + beq vert_r + + vext.8 q15, q6, q6, #3 + vbit.32 d16, d30, d26 + vext.8 q15, q6, q6, #1 + vbit.32 d17, d30, d26 + vext.8 q15, q6, q6, #4 + vext.32 q14, q13, q13, #3 + vbit.32 d17, d31, d28 + vext.8 q15, q6, q6, #6 + vbit.32 d16, d31, d28 + vabdl.u8 q14, d16, d20 + subs r6, r1, #4 + vabal.u8 q14, d17, d21 + vadd.i16 d28, d29, d28 + vpaddl.u16 d28, d28 @ + lslne r6, r0, #2 + vpaddl.u32 d28, d28 @/ + vmov.u32 r9, d28[0] @ + + moveq r6, r0 @ + add r9, r6, r9 + + subs r6, r11, r9 + movgt r11, r9 + movgt r12, #4 + +vert_r: + ands r10, r8, #32 @VERT_R sad ?? + beq horz_d + vext.8 q15, q5, q5, #4 + vbit.32 d18, d30, d26 + vext.8 q15, q5, q5, #3 + vbit.32 d19, d30, d26 + vext.32 q14, q13, q13, #3 + vext.8 q15, q6, q6, #15 + vbit.32 d18, d30, d28 + vext.8 q15, q6, q6, #14 + vbit.32 d19, d30, d28 + mov r14, #0 + vdup.32 q14 , r14 + mov r14, #0xff + vmov.i8 d28[0], r14 + vext.8 q15, q6, q6, #2 + vbit.32 d19, d30, d28 + vext.32 q14, q14, q14, #3 + subs r6, r1, #5 + vext.8 q15, q6, q6, #13 + vbit.32 d19, d30, d28 + lslne r6, r0, #2 + vabdl.u8 q14, d18, d20 + vabal.u8 q14, d19, d21 + vadd.i16 d28, d29, d28 + vpaddl.u16 d28, d28 @ + vpaddl.u32 d28, d28 @/ + vmov.u32 r9, d28[0] @ + + + moveq r6, r0 @ + add r9, r6, r9 + + subs r6, r11, r9 + movgt r11, r9 + movgt r12, #5 + +horz_d: + vmov.8 q1, q5 + vmov.8 q15, q6 + vzip.8 q1, q15 + + ands r10, r8, #64 @HORZ_D sad ?? + beq vert_l + vext.8 q15, q6, q6, #2 + vbit.32 d8, d30, d26 + mov r14, #0 + vdup.32 q14 , r14 + mov r14, #0xff + vmov.i8 d28[0], r14 + vext.8 q15, q5, q5, #3 + vbit.32 d8, d30, d28 + vext.8 q15, q1, q1, #2 + vbit.32 d9, d30, d26 + vext.32 q14, q13, q13, #3 + vbit.32 d8, d2, d28 + subs r6, r1, #6 + vext.8 q15, q1, q1, #12 + vbit.32 d9, d30, d28 + vabdl.u8 q14, d8, d20 + vabal.u8 q14, d9, d21 + vadd.i16 d28, d29, d28 + vpaddl.u16 d28, d28 @ + lslne r6, r0, #2 + vpaddl.u32 d28, d28 @/ + vmov.u32 r9, d28[0] @ + + + moveq r6, r0 @ + add r9, r6, r9 + + subs r6, r11, r9 + movgt r11, r9 + movgt r12, #6 +vert_l: + ands r10, r8, #128 @VERT_L sad ?? + beq horz_u + vext.8 q15, q5, q5, #5 + vbit.32 d24, d30, d26 + vext.8 q15, q15, q15, #1 + vbit.32 d25, d30, d26 + vext.8 q15, q6, q6, #1 + vext.32 q14, q13, q13, #3 + vbit.32 d24, d30, d28 + vext.8 q15, q15, q15, #1 + subs r6, r1, #7 + vbit.32 d25, d30, d28 + vabdl.u8 q14, d24, d20 + vabal.u8 q14, d25, d21 + vadd.i16 d28, d29, d28 + vpaddl.u16 d28, d28 @ + lslne r6, r0, #2 + vpaddl.u32 d28, d28 @/ + vmov.u32 r9, d28[0] @ + + moveq r6, r0 @ + add r9, r6, r9 + + subs r6, r11, r9 + movgt r11, r9 + movgt r12, #7 + +horz_u: + ands r10, r8, #256 @HORZ_U sad ?? + beq pred + vrev64.8 q5, q1 + vdup.8 q1, d0[0] + vext.8 q6, q6, #7 + mov r14, #0 + vdup.32 q14 , r14 + mov r14, #0xff + vmov.i8 d28[0], r14 + vbit.32 d11, d13, d28 + movw r14, #0xffff + vmov.i16 d28[0], r14 + vext.8 q6, q5, q5, #7 + subs r6, r1, #8 + vbit.32 d3, d12, d28 + vext.8 q6, q5, q5, #3 + vbit.32 d2, d12, d26 + vext.32 q14, q13, q13, #3 + vext.8 q6, q5, q5, #1 + vbit.32 d2, d12, d28 + vabdl.u8 q14, d2, d20 + vabal.u8 q14, d3, d21 + vadd.i16 d28, d29, d28 + vpaddl.u16 d28, d28 @ + lslne r6, r0, #2 + vpaddl.u32 d28, d28 @/ + vmov.u32 r9, d28[0] @ + + + moveq r6, r0 @ + add r9, r6, r9 + + subs r6, r11, r9 + movgt r11, r9 + movgt r12, #8 + +pred: @/*dOING FINAL PREDICTION*/ +@--------------------------- + ldr r7, [sp, #116] @r7 = pu4_sadmin + ldr r6, [sp, #112] @ R6 =MODE +@-------------------------- + str r11, [r7] @/STORING MIN SAD*/ + str r12, [r6] @/FINAL MODE*/ + + + ldr r3, scratch_intrapred_luma_4x4_prediction_addr1 +scrintra_4x4: + add r3, r3, pc + lsl r12, r12, #2 + add r3, r3, r12 + + ldr r5, [r3] + and r5, r5, #0xfffffffe + + bx r5 + + +ver: + vext.8 q0, q0, q0, #1 + vdup.32 q15, d0[1] + b store + +hor: + vmov.32 q15, q3 + b store + +d_c: + vdup.8 q15, r4 + b store + +dia_dl: + vmov.32 q15, q7 + b store + +dia_dr: + vmov.32 q15, q8 + b store + +ver_r: + vmov.32 q15, q9 + b store + +hor_d: + vmov.32 q15, q4 + b store + +ver_l: + vmov.32 q15, q12 + b store + +hor_u: + vmov.32 q15, q1 + +store: @/* storing to pu1_dst*/ + + ldr r4, [sp, #104] @r4 = dst_strd, + + vst1.32 {d30[0]}, [r2], r4 + vst1.32 {d30[1]}, [r2], r4 + vst1.32 {d31[0]}, [r2], r4 + vst1.32 {d31[1]}, [r2], r4 + + +end_func: + vpop {d8-d15} + ldmfd sp!, {r4-r12, pc} @Restoring registers from stack + + + + + diff --git a/encoder/arm/ih264e_evaluate_intra_chroma_modes_a9q.s b/encoder/arm/ih264e_evaluate_intra_chroma_modes_a9q.s new file mode 100755 index 0000000..e4dfca8 --- /dev/null +++ b/encoder/arm/ih264e_evaluate_intra_chroma_modes_a9q.s @@ -0,0 +1,346 @@ +@/****************************************************************************** +@ * +@ * Copyright (C) 2015 The Android Open Source Project +@ * +@ * Licensed under the Apache License, Version 2.0 (the "License"); +@ * you may not use this file except in compliance with the License. +@ * You may obtain a copy of the License at: +@ * +@ * http://www.apache.org/licenses/LICENSE-2.0 +@ * +@ * Unless required by applicable law or agreed to in writing, software +@ * distributed under the License is distributed on an "AS IS" BASIS, +@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ * See the License for the specific language governing permissions and +@ * limitations under the License. +@ * +@ ***************************************************************************** +@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +@*/ +@/** + +@/** +@****************************************************************************** +@* +@* @brief :Evaluate best intr chroma mode (among VERT, HORZ and DC ) +@* and do the prediction. +@* +@* @par Description +@* This function evaluates first three intra chroma modes and compute corresponding sad +@* and return the buffer predicted with best mode. +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@** @param[in] pu1_ngbr_pels +@* UWORD8 pointer to neighbouring pels +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@* @param[in] u4_n_avblty +@* availability of neighbouring pixels +@* +@* @param[in] u4_intra_mode +@* Pointer to the variable in which best mode is returned +@* +@* @param[in] pu4_sadmin +@* Pointer to the variable in which minimum sad is returned +@* +@* @param[in] u4_valid_intra_modes +@* Says what all modes are valid +@* +@* +@* @return none +@* +@****************************************************************************** +@*/ +@ +@void ih264e_evaluate_intra_chroma_modes(UWORD8 *pu1_src, +@ UWORD8 *pu1_ngbr_pels_i16, +@ UWORD8 *pu1_dst, +@ UWORD32 src_strd, +@ UWORD32 dst_strd, +@ WORD32 u4_n_avblty, +@ UWORD32 *u4_intra_mode, +@ WORD32 *pu4_sadmin, +@ UWORD32 u4_valid_intra_modes) +@ +.text +.p2align 2 + + .global ih264e_evaluate_intra_chroma_modes_a9q + +ih264e_evaluate_intra_chroma_modes_a9q: + +@r0 = pu1_src, +@r1 = pu1_ngbr_pels_i16, +@r2 = pu1_dst, +@r3 = src_strd, +@r4 = dst_strd, +@r5 = u4_n_avblty, +@r6 = u4_intra_mode, +@r7 = pu4_sadmin + + + + stmfd sp!, {r4-r12, r14} @store register values to stack + @----------------------- + ldr r5, [sp, #44] @r5 = u4_n_avblty, + @------------------------- + mov r12, r1 @ + vpush {d8-d15} + vld1.32 {q4}, [r1]! + add r1, r1, #2 + vld1.32 {q5}, [r1]! + + vuzp.u8 q4, q5 @ + + vpaddl.u8 d8, d8 + vpadd.u16 d8, d8 + + vpaddl.u8 d9, d9 + vpadd.u16 d9, d9 + + vpaddl.u8 d10, d10 + vpadd.u16 d10, d10 + + vpaddl.u8 d11, d11 + + and r7, r5, #5 + vpadd.u16 d11, d11 + subs r8, r7, #5 + beq all_available + subs r8, r7, #4 + beq top_available + subs r8, r7, #1 + beq left_available + mov r10, #128 + vdup.8 q14, r10 + vdup.8 q15, r10 + b sad + +all_available: + vzip.u16 q4, q5 + vext.16 q6, q4, q4, #2 + vadd.u16 q7, q5, q6 + vqrshrn.u16 d14, q7, #3 + vqrshrn.u16 d15, q4, #2 + vqrshrn.u16 d16, q5, #2 + vdup.16 d28, d14[0] + vdup.16 d29, d16[1] + vdup.16 d30, d15[0] + vdup.16 d31, d14[1] + b sad +top_available: + vzip.u16 q4, q5 + vqrshrn.u16 d16, q5, #2 + vdup.16 d28, d16[0] + vdup.16 d29, d16[1] + vdup.16 d30, d16[0] + vdup.16 d31, d16[1] + b sad +left_available: + vzip.u16 q4, q5 + vqrshrn.u16 d16, q4, #2 + vdup.16 d28, d16[3] + vdup.16 d29, d16[3] + vdup.16 d30, d16[2] + vdup.16 d31, d16[2] + + +sad: + vld1.32 {q4}, [r12]! + sub r8, r12, #2 + add r12, r12, #2 + vld1.32 {q5}, [r12]! + add r12, r0, r3, lsl #2 + sub r10, r8, #8 + vld1.32 {q0}, [r0], r3 + ldrh r9, [r8] + vdup.16 q10, r9 @ row 0 + + @/vertical row 0; + vabdl.u8 q8, d0, d10 + vabdl.u8 q9, d1, d11 + sub r8, r8, #2 + vld1.32 {q1}, [r12], r3 + + @/HORZ row 0; + vabdl.u8 q13, d0, d20 + vabdl.u8 q7, d1, d21 + ldrh r9, [r10] + @/dc row 0; + vabdl.u8 q11, d0, d28 + vabdl.u8 q12, d1, d29 + + + vdup.16 q10, r9 @ row 4 + @/vertical row 4; + vabal.u8 q8, d2, d10 + vabal.u8 q9, d3, d11 + sub r10, r10, #2 + + @/HORZ row 4; + vabal.u8 q13, d2, d20 + vabal.u8 q7, d3, d21 + @/dc row 4; + vabal.u8 q11, d2, d30 + vabal.u8 q12, d3, d31 + + mov r11, #3 + +loop: + vld1.32 {q0}, [r0], r3 + ldrh r9, [r8] + + + @/vertical row i; + vabal.u8 q8, d0, d10 + vabal.u8 q9, d1, d11 + + vdup.16 q10, r9 @ row i + vld1.32 {q1}, [r12], r3 + sub r8, r8, #2 + @/HORZ row i; + vabal.u8 q13, d0, d20 + vabal.u8 q7, d1, d21 + ldrh r9, [r10] + @/dc row i; + vabal.u8 q11, d0, d28 + vabal.u8 q12, d1, d29 + sub r10, r10, #2 + + vdup.16 q10, r9 @ row i+4 + @/vertical row 4; + vabal.u8 q8, d2, d10 + vabal.u8 q9, d3, d11 + subs r11, r11, #1 + + @/HORZ row i+4; + vabal.u8 q13, d2, d20 + vabal.u8 q7, d3, d21 + @/dc row i+4; + vabal.u8 q11, d2, d30 + vabal.u8 q12, d3, d31 + bne loop + + + +@------------------------------------------- + + vadd.i16 q9, q9, q8 @/VERT + vadd.i16 q7, q13, q7 @/HORZ + vadd.i16 q12, q11, q12 @/DC + vadd.i16 d18, d19, d18 @/VERT + vadd.i16 d14, d15, d14 @/HORZ + vadd.i16 d24, d24, d25 @/DC + vpaddl.u16 d18, d18 @/VERT + vpaddl.u16 d14, d14 @/HORZ + vpaddl.u16 d24, d24 @/DC + vpaddl.u32 d18, d18 @/VERT + vpaddl.u32 d14, d14 @/HORZ + vpaddl.u32 d24, d24 @/DC + + + + vmov.u32 r8, d18[0] @ vert + vmov.u32 r9, d14[0] @horz + vmov.u32 r10, d24[0] @dc + + mov r11, #1 +@----------------------- + ldr r0, [sp, #120] @ u4_valid_intra_modes +@-------------------------------------------- + + + lsl r11 , #30 + + ands r7, r0, #04 @ vert mode valid???????????? + moveq r8, r11 + + ands r6, r0, #02 @ horz mode valid???????????? + moveq r9, r11 + + ands r6, r0, #01 @ dc mode valid???????????? + moveq r10, r11 + + + @--------------------------- + ldr r4, [sp, #104] @r4 = dst_strd, + ldr r6, [sp, #112] @ R6 =MODE + ldr r7, [sp, #116] @r7 = pu4_sadmin + + @-------------------------- + + cmp r10, r9 + bgt not_dc + cmp r10, r8 + bgt do_vert + + @/---------------------- + @DO DC PREDICTION + str r10 , [r7] @MIN SAD + mov r10, #0 + str r10 , [r6] @ MODE + b do_dc_vert + @----------------------------- + +not_dc: + cmp r9, r8 + bgt do_vert + @/---------------------- + @DO HORIZONTAL + + vdup.16 q10, d9[3] @/HORIZONTAL VALUE ROW=0; + str r9 , [r7] @MIN SAD + mov r9, #1 + vdup.16 q11, d9[2] @/HORIZONTAL VALUE ROW=1; + str r9 , [r6] @ MODE + vdup.16 q12, d9[1] @/HORIZONTAL VALUE ROW=2; + vst1.32 {d20, d21} , [r2], r4 @0 + vdup.16 q13, d9[0] @/HORIZONTAL VALUE ROW=3; + vst1.32 {d22, d23} , [r2], r4 @1 + vdup.16 q14, d8[3] @/HORIZONTAL VALUE ROW=4; + vst1.32 {d24, d25} , [r2], r4 @2 + vdup.16 q15, d8[2] @/HORIZONTAL VALUE ROW=5; + vst1.32 {d26, d27} , [r2], r4 @3 + vdup.16 q1, d8[1] @/HORIZONTAL VALUE ROW=6; + vst1.32 {d28, d29} , [r2], r4 @4 + vdup.16 q2, d8[0] @/HORIZONTAL VALUE ROW=7; + vst1.32 {d30, d31} , [r2], r4 @5 + vst1.32 {d2, d3} , [r2], r4 @6 + vst1.32 {d4, d5} , [r2], r4 @7 + b end_func + +do_vert: + @DO VERTICAL PREDICTION + str r8 , [r7] @MIN SAD + mov r8, #2 + str r8 , [r6] @ MODE + vmov q15, q5 + vmov q14, q5 + +do_dc_vert: + vst1.32 {d28, d29} , [r2], r4 @0 + vst1.32 {d28, d29} , [r2], r4 @1 + vst1.32 {d28, d29} , [r2], r4 @2 + vst1.32 {d28, d29} , [r2], r4 @3 + vst1.32 {d30, d31} , [r2], r4 @4 + vst1.32 {d30, d31} , [r2], r4 @5 + vst1.32 {d30, d31} , [r2], r4 @6 + vst1.32 {d30, d31} , [r2], r4 @7 + + +end_func: + vpop {d8-d15} + ldmfd sp!, {r4-r12, pc} @Restoring registers from stack + + + diff --git a/encoder/arm/ih264e_fmt_conv.s b/encoder/arm/ih264e_fmt_conv.s new file mode 100755 index 0000000..2bf1479 --- /dev/null +++ b/encoder/arm/ih264e_fmt_conv.s @@ -0,0 +1,329 @@ +@/****************************************************************************** +@ * +@ * Copyright (C) 2015 The Android Open Source Project +@ * +@ * Licensed under the Apache License, Version 2.0 (the "License"); +@ * you may not use this file except in compliance with the License. +@ * You may obtain a copy of the License at: +@ * +@ * http://www.apache.org/licenses/LICENSE-2.0 +@ * +@ * Unless required by applicable law or agreed to in writing, software +@ * distributed under the License is distributed on an "AS IS" BASIS, +@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ * See the License for the specific language governing permissions and +@ * limitations under the License. +@ * +@ ***************************************************************************** +@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +@*/ +@/** + +.text +.p2align 2 +@/** + +@/***************************************************************************** +@* * +@* Function Name : IH264D_CXA8_YUV420toYUV420SP_UV() * +@* * +@* Description : This function conversts the image from YUV420P color * +@* space to 420SP color space(UV interleaved). * +@* * +@* Arguments : R0 pu1_y * +@* R1 pu1_u * +@* R2 pu1_v * +@* R3 pu1_dest_y * +@* [R13 #40] pu1_dest_uv * +@* [R13 #44] u2_height * +@* [R13 #48] u2_width * +@* [R13 #52] u2_stridey * +@* [R13 #56] u2_strideu * +@* [R13 #60] u2_stridev * +@* [R13 #64] u2_dest_stride_y * +@* [R13 #68] u2_dest_stride_uv * +@* [R13 #72] convert_uv_only * +@* * +@* Values Returned : None * +@* * +@* Register Usage : R0 - R14 * +@* * +@* Stack Usage : 40 Bytes * +@* * +@* Interruptibility : Interruptible * +@* * +@* Known Limitations * +@* Assumptions: Image Width: Assumed to be multiple of 16 and * +@* greater than or equal to 16 * +@* Image Height: Assumed to be even. * +@* * +@* Revision History : * +@* DD MM YYYY Author(s) Changes (Describe the changes made) * +@* 07 06 2010 Varshita Draft * +@* 07 06 2010 Naveen Kr T Completed * +@* * +@*****************************************************************************/ + .global ih264e_fmt_conv_420p_to_420sp_a9q + +ih264e_fmt_conv_420p_to_420sp_a9q: + + @// push the registers on the stack + stmfd sp!, {r4-r12, lr} + + ldr r4, [sp, #72] @// Load convert_uv_only + + cmp r4, #1 + beq yuv420sp_uv_chroma + @/* Do the preprocessing before the main loops start */ + @// Load the parameters from stack + ldr r4, [sp, #44] @// Load u2_height from stack + ldr r5, [sp, #48] @// Load u2_width from stack + ldr r7, [sp, #52] @// Load u2_stridey from stack + ldr r8, [sp, #64] @// Load u2_dest_stride_y from stack + sub r7, r7, r5 @// Source increment + sub r8, r8, r5 @// Destination increment + + vpush {d8-d15} +yuv420sp_uv_row_loop_y: + mov r6, r5 + +yuv420sp_uv_col_loop_y: + pld [r0, #128] + vld1.8 {d0, d1}, [r0]! + vst1.8 {d0, d1}, [r3]! + sub r6, r6, #16 + cmp r6, #15 + bgt yuv420sp_uv_col_loop_y + + cmp r6, #0 + beq yuv420sp_uv_row_loop_end_y + @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read + @//Ex if width is 162, above loop will process 160 pixels. And + @//Both source and destination will point to 146th pixel and then 16 bytes will be read + @// and written using VLD1 and VST1 + rsb r6, r6, #16 + sub r0, r0, r6 + sub r3, r3, r6 + + vld1.8 {d0, d1}, [r0]! + vst1.8 {d0, d1}, [r3]! + +yuv420sp_uv_row_loop_end_y: + add r0, r0, r7 + add r3, r3, r8 + subs r4, r4, #1 + bgt yuv420sp_uv_row_loop_y + +yuv420sp_uv_chroma: + + ldr r3, [sp, #40] @// Load pu1_dest_uv from stack + + ldr r4, [sp, #44] @// Load u2_height from stack + + ldr r5, [sp, #48] @// Load u2_width from stack + + + ldr r7, [sp, #56] @// Load u2_strideu from stack + + ldr r8, [sp, #68] @// Load u2_dest_stride_uv from stack + + sub r7, r7, r5, lsr #1 @// Source increment + + sub r8, r8, r5 @// Destination increment + + mov r5, r5, lsr #1 + mov r4, r4, lsr #1 + ldr r3, [sp, #40] @// Load pu1_dest_uv from stack + vpush {d8-d15} +yuv420sp_uv_row_loop_uv: + mov r6, r5 + + +yuv420sp_uv_col_loop_uv: + pld [r1, #128] + pld [r2, #128] + vld1.8 d0, [r1]! + vld1.8 d1, [r2]! + vst2.8 {d0, d1}, [r3]! + sub r6, r6, #8 + cmp r6, #7 + bgt yuv420sp_uv_col_loop_uv + + cmp r6, #0 + beq yuv420sp_uv_row_loop_end_uv + @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read + @//Ex if width is 162, above loop will process 160 pixels. And + @//Both source and destination will point to 146th pixel and then 16 bytes will be read + @// and written using VLD1 and VST1 + rsb r6, r6, #8 + sub r1, r1, r6 + sub r2, r2, r6 + sub r3, r3, r6, lsl #1 + + vld1.8 d0, [r1]! + vld1.8 d1, [r2]! + vst2.8 {d0, d1}, [r3]! + +yuv420sp_uv_row_loop_end_uv: + add r1, r1, r7 + add r2, r2, r7 + add r3, r3, r8 + subs r4, r4, #1 + bgt yuv420sp_uv_row_loop_uv + @//POP THE REGISTERS + vpop {d8-d15} + ldmfd sp!, {r4-r12, pc} + + + + + +@ /** +@ ******************************************************************************* +@ * +@ * @brief ih264e_fmt_conv_422i_to_420sp_a9q +@ * Function used from format conversion or frame copy +@ * +@ * +@ * +@ *Inputs : r0 - pu1_y - UWORD8 pointer to y plane. +@ * r1 - pu1_u - UWORD8 pointer to u plane. +@ * r2 - pu1_v - UWORD8 pointer to u plane. +@ * r3 - pu2_yuv422i - UWORD16 pointer to yuv422iimage. +@ * stack + 40 - u4_width - Width of the Y plane. +@ * 44 - u4_height - Height of the Y plane. +@ * 48 - u4_stride_y - Stride in pixels of Y plane. +@ * 52 - u4_stride_u - Stride in pixels of U plane. +@ * 56 - u4_stride_v - Stride in pixels of V plane. +@ * 60 - u4_stride_yuv422i- Stride in pixels of yuv422i image. +@ * +@ * @par Description +@ * Function used from copying or converting a reference frame to display buffer +@ * in non shared mode +@ * +@ * @param[in] pu1_y_dst +@ * Output Y pointer +@ * +@ * @param[in] pu1_u_dst +@ * Output U/UV pointer ( UV is interleaved in the same format as that of input) +@ * +@ * @param[in] pu1_v_dst +@ * Output V pointer ( used in 420P output case) +@ * +@ * @param[in] u4_dst_y_strd +@ * Stride of destination Y buffer +@ * +@ * @param[in] u4_dst_u_strd +@ * Stride of destination U/V buffer +@ * +@ * +@ * @param[in] blocking +@ * To indicate whether format conversion should wait till frame is reconstructed +@ * and then return after complete copy is done. To be set to 1 when called at the +@ * end of frame processing and set to 0 when called between frame processing modules +@ * in order to utilize available MCPS +@ * +@ * @returns Error from IH264E_ERROR_T +@ * +@ * @remarks +@ * Assumes that the stride of U and V buffers are same. +@ * This is correct in most cases +@ * If a case comes where this is not true we need to modify the fmt conversion funcnions called inside also +@ * Since we read 4 pixels ata time the width should be aligned to 4 +@ * In assembly width should be aligned to 16 and height to 2. +@ * +@ * +@ * Revision History : +@ * DD MM YYYY Author(s) Changes (Describe the changes made) +@ * 07 06 2010 Harinarayanan K K Adapeted to 422p +@ * +@ ******************************************************************************* +@ */ + +@//` +@*/ + .global ih264e_fmt_conv_422i_to_420sp_a9q +ih264e_fmt_conv_422i_to_420sp_a9q: + stmfd sp!, {r4-r12, lr} @// Back the register which are used + + + + @/* Do the preprocessing before the main loops start */ + @// Load the parameters from stack + ldr r4, [sp, #48] @// Load u4_stride_y from stack + + ldr r5, [sp, #60] @// Load u4_stride_yuv422i from stack + add r6, r0, r4 @// pu1_y_nxt_row = pu1_y + u4_stride_y + + ldr r7, [sp, #40] @// Load u4_width from stack + add r8, r3, r5, lsl #1 @// pu2_yuv422i_nxt_row = pu2_yuv422i_y + u4_stride_yuv422i(2 Bytes for each pixel) + + ldr r9, [sp, #52] @// Load u4_stride_u from stack + sub r12, r4, r7 @// u2_offset1 = u4_stride_y - u4_width + +@LDR r10,[sp,#56] ;// Load u4_stride_v from stack + sub r14, r5, r7 @// u2_offset_yuv422i = u4_stride_yuv422i - u4_width + + ldr r11, [sp, #44] @// Load u4_height from stack + sub r9, r9, r7 @// u2_offset2 = u4_stride_u - u4_width >> 1 + +@ SUB r10,r10,r7,ASR #1 ;// u2_offset3 = u4_stride_v - u4_width >> 1 + mov r14, r14, lsl #1 @// u2_offset_yuv422i = u2_offset_yuv422i * 2 + + mov r7, r7, asr #4 @// u4_width = u4_width / 16 (u4_width >> 4) + mov r11, r11, asr #1 @// u4_width = u4_width / 2 (u4_width >> 1) + + add r4, r12, r4 @// u2_offset1 = u2_offset1 + u4_stride_y + add r5, r14, r5, lsl #1 @// u2_offset_yuv422i = u2_offset_yuv422i + u4_stride_yuv422i + + vpush {d8-d15} + +@// Register Assignment +@// pu1_y - r0 +@// pu1_y_nxt_row - r6 +@// pu1_u - r1 +@// pu1_v - r2 +@// pu2_yuv422i - r3 +@// pu2_yuv422i_nxt_row - r8 +@// u2_offset1 - r4 +@// u2_offset2 - r9 +@// u2_offset3 - r10 +@// u2_offset_yuv422i - r5 +@// u4_width / 16 - r7 +@// u4_height / 2 - r11 +@// inner loop count - r12 +yuv420_to_yuv422i_hight_loop: + + mov r12, r7 @// Inner loop count = u4_width / 16 + +yuv420_to_yuv422i_width_loop: + vld4.8 {d0, d1, d2, d3}, [r3]! @// Load the 16 elements of row 1 + vld4.8 {d4, d5, d6, d7}, [r8]! @// Load the 16 elements of row 2 + subs r12, r12, #1 + + vrhadd.u8 d0, d0, d4 + vrhadd.u8 d2, d2, d6 + + vst2.8 {d1, d3}, [r0]! @// Store the 16 elements of row1 Y + vst2.8 {d5, d7}, [r6]! @// Store the 16 elements of row2 Y + + vst2.8 {d0, d2}, [r1]! @// Store the 8 elements of row1/2 U + + bgt yuv420_to_yuv422i_width_loop + + @// Update the buffer pointer so that they will refer to next pair of rows + add r0, r0, r4 @// pu1_y = pu1_y + u2_offset1 + add r6, r6, r4 @// pu1_y_nxt_row = pu1_y_nxt_row + u2_offset1 + + add r1, r1, r9 @// pu1_u = pu1_u + u2_offset2 + subs r11, r11, #1 + + add r3, r3, r5 @// pu2_yuv422i = pu2_yuv422i + u2_offset_yuv422i + + add r8, r8, r5 @// pu2_yuv422i_nxt_row = pu2_yuv422i_nxt_row + u2_offset_yuv422i + bgt yuv420_to_yuv422i_hight_loop + vpop {d8-d15} + ldmfd sp!, {r4-r12, pc} @// Restore the register which are used + + + diff --git a/encoder/arm/ih264e_function_selector.c b/encoder/arm/ih264e_function_selector.c new file mode 100755 index 0000000..bb181c1 --- /dev/null +++ b/encoder/arm/ih264e_function_selector.c @@ -0,0 +1,170 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +******************************************************************************* +* @file +* ih264e_function_selector.c +* +* @brief +* Contains functions to initialize function pointers used in h264 +* +* @author +* Ittiam +* +* @par List of Functions: +* +* @remarks +* None +* +******************************************************************************* +*/ + + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System Include Files */ +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +/* User Include Files */ +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" +#include "ih264_defs.h" +#include "ih264_size_defs.h" +#include "ih264e_defs.h" +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "ime_distortion_metrics.h" +#include "ime_structs.h" +#include "ih264_defs.h" +#include "ih264_error.h" +#include "ih264_structs.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include "ih264_inter_pred_filters.h" +#include "ih264_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" + +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264e_defs.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "ih264e_rate_control.h" +#include "ih264e_structs.h" +#include "ih264e_platform_macros.h" + +/** +******************************************************************************* +* +* @brief Initialize the intra/inter/transform/deblk function pointers of +* codec context +* +* @par Description: the current routine initializes the function pointers of +* codec context basing on the architecture in use +* +* @param[in] ps_codec +* Codec context pointer +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +#ifdef ARMV8 +void ih264e_init_function_ptr(void *pv_codec) +{ + codec_t *ps_codec = (codec_t *)pv_codec; + ih264e_init_function_ptr_generic(ps_codec); + switch(ps_codec->s_cfg.e_arch) + { + case ARCH_ARM_NONEON: + break; + case ARCH_ARM_A53: + case ARCH_ARM_A57: + case ARCH_ARM_V8_NEON: + ih264e_init_function_ptr_neon_av8(ps_codec); + break; + default: + ih264e_init_function_ptr_neon_av8(ps_codec); + break; + } +} + +/** +******************************************************************************* +* +* @brief Determine the architecture of the encoder executing environment +* +* @par Description: This routine returns the architecture of the enviro- +* ment in which the current encoder is being tested +* +* @param[in] void +* +* @returns IV_ARCH_T +* architecture +* +* @remarks none +* +******************************************************************************* +*/ +IV_ARCH_T ih264e_default_arch(void) +{ + return ARCH_ARM_V8_NEON; +} + +#else + +void ih264e_init_function_ptr(void *pv_codec) +{ + codec_t *ps_codec = (codec_t *)pv_codec; + ih264e_init_function_ptr_generic(ps_codec); + switch(ps_codec->s_cfg.e_arch) + { + case ARCH_ARM_NONEON: + break; + case ARCH_ARM_A9Q: + case ARCH_ARM_A9A: + case ARCH_ARM_A9: + case ARCH_ARM_A7: + case ARCH_ARM_A5: + case ARCH_ARM_A15: + ih264e_init_function_ptr_neon_a9q(ps_codec); + break; + default: + ih264e_init_function_ptr_neon_a9q(ps_codec); + break; + } +} + +IV_ARCH_T ih264e_default_arch(void) +{ + return ARCH_ARM_A9Q; +} + +#endif diff --git a/encoder/arm/ih264e_function_selector_a9q.c b/encoder/arm/ih264e_function_selector_a9q.c new file mode 100755 index 0000000..8b2879b --- /dev/null +++ b/encoder/arm/ih264e_function_selector_a9q.c @@ -0,0 +1,252 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ih264e_function_selector_generic.c +* +* @brief +* Contains functions to initialize function pointers of codec context +* +* @author +* Ittiam +* +* @par List of Functions: +* - ih264e_init_function_ptr_generic +* +* @remarks +* None +* +******************************************************************************* +*/ + + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System Include files */ +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +/* User Include files */ +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" +#include "ih264_defs.h" +#include "ih264_size_defs.h" +#include "ih264e_defs.h" +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "ime_distortion_metrics.h" +#include "ime_structs.h" +#include "ih264_defs.h" +#include "ih264_error.h" +#include "ih264_structs.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include "ih264_inter_pred_filters.h" +#include "ih264_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" + +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "ih264e_rate_control.h" +#include "ih264e_structs.h" +#include "ih264e_platform_macros.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include "ih264e_defs.h" +#include "ih264e_structs.h" +#include "ih264_deblk_edge_filters.h" +#include "ih264e_core_coding.h" +#include "ih264_cavlc_tables.h" +#include "ih264e_cavlc.h" +#include "ih264_padding.h" +#include "ih264e_intra_modes_eval.h" +#include "ih264_mem_fns.h" +#include "ih264e_fmt_conv.h" +#include "ih264e_half_pel.h" + +/** +******************************************************************************* +* +* @brief Initialize the intra/inter/transform/deblk function pointers of +* codec context +* +* @par Description: the current routine initializes the function pointers of +* codec context basing on the architecture in use +* +* @param[in] ps_codec +* Codec context pointer +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void ih264e_init_function_ptr_neon_a9q(codec_t *ps_codec) +{ + WORD32 i= 0; + + /* curr proc ctxt */ + process_ctxt_t *ps_proc = NULL; + me_ctxt_t *ps_me_ctxt = NULL; + + /* Init function pointers for intra pred leaf level functions luma + * Intra 16x16 */ + ps_codec->apf_intra_pred_16_l[0] = ih264_intra_pred_luma_16x16_mode_vert_a9q; + ps_codec->apf_intra_pred_16_l[1] = ih264_intra_pred_luma_16x16_mode_horz_a9q; + ps_codec->apf_intra_pred_16_l[2] = ih264_intra_pred_luma_16x16_mode_dc_a9q; + ps_codec->apf_intra_pred_16_l[3] = ih264_intra_pred_luma_16x16_mode_plane_a9q; + + /* Init function pointers for intra pred leaf level functions luma + * Intra 4x4 */ + ps_codec->apf_intra_pred_4_l[0] = ih264_intra_pred_luma_4x4_mode_vert_a9q; + ps_codec->apf_intra_pred_4_l[1] = ih264_intra_pred_luma_4x4_mode_horz_a9q; + ps_codec->apf_intra_pred_4_l[2] = ih264_intra_pred_luma_4x4_mode_dc_a9q; + ps_codec->apf_intra_pred_4_l[3] = ih264_intra_pred_luma_4x4_mode_diag_dl_a9q; + ps_codec->apf_intra_pred_4_l[4] = ih264_intra_pred_luma_4x4_mode_diag_dr_a9q; + ps_codec->apf_intra_pred_4_l[5] = ih264_intra_pred_luma_4x4_mode_vert_r_a9q; + ps_codec->apf_intra_pred_4_l[6] = ih264_intra_pred_luma_4x4_mode_horz_d_a9q; + ps_codec->apf_intra_pred_4_l[7] = ih264_intra_pred_luma_4x4_mode_vert_l_a9q; + ps_codec->apf_intra_pred_4_l[8] = ih264_intra_pred_luma_4x4_mode_horz_u_a9q; + + /* Init function pointers for intra pred leaf level functions luma + * Intra 8x8 */ + ps_codec->apf_intra_pred_8_l[0] = ih264_intra_pred_luma_8x8_mode_vert_a9q; + ps_codec->apf_intra_pred_8_l[2] = ih264_intra_pred_luma_8x8_mode_dc_a9q; + ps_codec->apf_intra_pred_8_l[3] = ih264_intra_pred_luma_8x8_mode_diag_dl_a9q; + ps_codec->apf_intra_pred_8_l[4] = ih264_intra_pred_luma_8x8_mode_diag_dr_a9q; + ps_codec->apf_intra_pred_8_l[5] = ih264_intra_pred_luma_8x8_mode_vert_r_a9q; + ps_codec->apf_intra_pred_8_l[6] = ih264_intra_pred_luma_8x8_mode_horz_d_a9q; + ps_codec->apf_intra_pred_8_l[7] = ih264_intra_pred_luma_8x8_mode_vert_l_a9q; + ps_codec->apf_intra_pred_8_l[8] = ih264_intra_pred_luma_8x8_mode_horz_u_a9q; + + /* Init function pointers for intra pred leaf level functions chroma + * Intra 8x8 */ + ps_codec->apf_intra_pred_c[0] = ih264_intra_pred_chroma_8x8_mode_dc_a9q; + ps_codec->apf_intra_pred_c[1] = ih264_intra_pred_chroma_8x8_mode_horz_a9q; + ps_codec->apf_intra_pred_c[2] = ih264_intra_pred_chroma_8x8_mode_vert_a9q; + ps_codec->apf_intra_pred_c[3] = ih264_intra_pred_chroma_8x8_mode_plane_a9q; + + /* Init forward transform fn ptr */ + ps_codec->pf_resi_trans_quant_8x8 = ih264_resi_trans_quant_8x8; + ps_codec->pf_resi_trans_quant_4x4 = ih264_resi_trans_quant_4x4_a9; + ps_codec->pf_resi_trans_quant_chroma_4x4 = ih264_resi_trans_quant_chroma_4x4_a9; + ps_codec->pf_hadamard_quant_4x4 = ih264_hadamard_quant_4x4_a9; + ps_codec->pf_hadamard_quant_2x2_uv = ih264_hadamard_quant_2x2_uv_a9; + + /* Init inverse transform fn ptr */ + ps_codec->pf_iquant_itrans_recon_8x8 = ih264_iquant_itrans_recon_8x8; + ps_codec->pf_iquant_itrans_recon_4x4 = ih264_iquant_itrans_recon_4x4_a9; + ps_codec->pf_iquant_itrans_recon_4x4_dc = ih264_iquant_itrans_recon_4x4_dc_a9; + ps_codec->pf_iquant_itrans_recon_chroma_4x4 = ih264_iquant_itrans_recon_chroma_4x4_a9; + ps_codec->pf_iquant_itrans_recon_chroma_4x4_dc = ih264_iquant_itrans_recon_chroma_4x4_dc_a9; + ps_codec->pf_ihadamard_scaling_4x4 = ih264_ihadamard_scaling_4x4_a9; + ps_codec->pf_ihadamard_scaling_2x2_uv = ih264_ihadamard_scaling_2x2_uv_a9; + ps_codec->pf_interleave_copy = ih264_interleave_copy_a9; + + /* Init fn ptr luma core coding */ + ps_codec->luma_energy_compaction[0] = ih264e_code_luma_intra_macroblock_16x16; + ps_codec->luma_energy_compaction[1] = ih264e_code_luma_intra_macroblock_4x4; + ps_codec->luma_energy_compaction[3] = ih264e_code_luma_inter_macroblock_16x16; + + /* Init fn ptr chroma core coding */ + ps_codec->chroma_energy_compaction[0] = ih264e_code_chroma_intra_macroblock_8x8; + ps_codec->chroma_energy_compaction[1] = ih264e_code_chroma_inter_macroblock_8x8; + + /* Init fn ptr luma deblocking */ + ps_codec->pf_deblk_luma_vert_bs4 = ih264_deblk_luma_vert_bs4_a9; + ps_codec->pf_deblk_luma_vert_bslt4 = ih264_deblk_luma_vert_bslt4_a9; + ps_codec->pf_deblk_luma_horz_bs4 = ih264_deblk_luma_horz_bs4_a9; + ps_codec->pf_deblk_luma_horz_bslt4 = ih264_deblk_luma_horz_bslt4_a9; + + /* Init fn ptr chroma deblocking */ + ps_codec->pf_deblk_chroma_vert_bs4 = ih264_deblk_chroma_vert_bs4_a9; + ps_codec->pf_deblk_chroma_vert_bslt4 = ih264_deblk_chroma_vert_bslt4_a9; + ps_codec->pf_deblk_chroma_horz_bs4 = ih264_deblk_chroma_horz_bs4_a9; + ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4_a9; + + /* write mb syntax layer */ + ps_codec->pf_write_mb_syntax_layer[ISLICE] = ih264e_write_islice_mb; + ps_codec->pf_write_mb_syntax_layer[PSLICE] = ih264e_write_pslice_mb; + + /* Padding Functions */ + ps_codec->pf_pad_top = ih264_pad_top_a9q; + ps_codec->pf_pad_bottom = ih264_pad_bottom; + ps_codec->pf_pad_left_luma = ih264_pad_left_luma_a9q; + ps_codec->pf_pad_left_chroma = ih264_pad_left_chroma_a9q; + ps_codec->pf_pad_right_luma = ih264_pad_right_luma_a9q; + ps_codec->pf_pad_right_chroma = ih264_pad_right_chroma_a9q; + + /* Inter pred leaf level functions */ + ps_codec->pf_inter_pred_luma_copy = ih264_inter_pred_luma_copy_a9q; + ps_codec->pf_inter_pred_luma_horz = ih264_inter_pred_luma_horz_a9q; + ps_codec->pf_inter_pred_luma_vert = ih264_inter_pred_luma_vert_a9q; + ps_codec->pf_inter_pred_luma_bilinear = ih264_inter_pred_luma_bilinear_a9q; + ps_codec->pf_inter_pred_chroma = ih264_inter_pred_chroma_a9q; + + /* sad me level functions */ + ps_codec->apf_compute_sad_16x16[0] = ime_compute_sad_16x16_a9q; + ps_codec->apf_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_a9q; + ps_codec->pf_compute_sad_16x8 = ime_compute_sad_16x8_a9q; + + /* memor handling operations */ + ps_codec->pf_mem_cpy = ih264_memcpy_a9q; + ps_codec->pf_mem_cpy_mul8 = ih264_memcpy_mul_8_a9q; + ps_codec->pf_mem_set = ih264_memset_a9q; + ps_codec->pf_mem_set_mul8 = ih264_memset_mul_8_a9q; + + /* sad me level functions */ + for(i = 0; i < (MAX_PROCESS_CTXT); i++) + { + ps_proc = &ps_codec->as_process[i]; + ps_me_ctxt = &ps_proc->s_me_ctxt; + ps_me_ctxt->pf_ime_compute_sad_16x16[0] = ime_compute_sad_16x16_a9q; + ps_me_ctxt->pf_ime_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_a9q; + ps_me_ctxt->pf_ime_compute_sad_16x8 = ime_compute_sad_16x8_a9q; + ps_me_ctxt->pf_ime_compute_sad4_diamond = ime_calculate_sad4_prog_a9q; + ps_me_ctxt->pf_ime_compute_sad3_diamond = ime_calculate_sad3_prog_a9q; + ps_me_ctxt->pf_ime_compute_sad2_diamond = ime_calculate_sad2_prog_a9q; + ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16 = ime_sub_pel_compute_sad_16x16_a9q; + ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16 = ime_compute_satqd_16x16_lumainter_a9q; + } + + /* intra mode eval -encoder level function */ + ps_codec->pf_ih264e_evaluate_intra16x16_modes = ih264e_evaluate_intra16x16_modes_a9q; + ps_codec->pf_ih264e_evaluate_intra_chroma_modes = ih264e_evaluate_intra_chroma_modes_a9q; + ps_codec->pf_ih264e_evaluate_intra_4x4_modes = ih264e_evaluate_intra_4x4_modes_a9q; + + /* csc */ + ps_codec->pf_ih264e_conv_420p_to_420sp = ih264e_fmt_conv_420p_to_420sp_a9q; + ps_codec->pf_ih264e_fmt_conv_422i_to_420sp = ih264e_fmt_conv_422i_to_420sp_a9q; + + /* Halp pel generation function - encoder level*/ + ps_codec->pf_ih264e_sixtapfilter_horz = ih264e_sixtapfilter_horz_a9q; + ps_codec->pf_ih264e_sixtap_filter_2dvh_vert = ih264e_sixtap_filter_2dvh_vert_a9q; + + return ; + } + diff --git a/encoder/arm/ih264e_function_selector_av8.c b/encoder/arm/ih264e_function_selector_av8.c new file mode 100755 index 0000000..173c2d5 --- /dev/null +++ b/encoder/arm/ih264e_function_selector_av8.c @@ -0,0 +1,259 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ih264e_function_selector_generic.c +* +* @brief +* Contains functions to initialize function pointers of codec context +* +* @author +* Ittiam +* +* @par List of Functions: +* - ih264e_init_function_ptr_generic +* +* @remarks +* None +* +******************************************************************************* +*/ + + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System Include files */ +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +/* User Include files */ +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" +#include "ih264_defs.h" +#include "ih264_size_defs.h" +#include "ih264e_defs.h" +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "ime_distortion_metrics.h" +#include "ime_structs.h" +#include "ih264_defs.h" +#include "ih264_error.h" +#include "ih264_structs.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include "ih264_inter_pred_filters.h" +#include "ih264_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" + +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "ih264e_rate_control.h" +#include "ih264e_structs.h" +#include "ih264e_platform_macros.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include "ih264e_defs.h" +#include "ih264e_structs.h" +#include "ih264_deblk_edge_filters.h" +#include "ih264e_core_coding.h" +#include "ih264_cavlc_tables.h" +#include "ih264e_cavlc.h" +#include "ih264_padding.h" +#include "ih264e_intra_modes_eval.h" +#include "ih264_mem_fns.h" +#include "ih264e_fmt_conv.h" +#include "ih264e_half_pel.h" + + +/** +******************************************************************************* +* +* @brief Initialize the intra/inter/transform/deblk function pointers of +* codec context +* +* @par Description: the current routine initializes the function pointers of +* codec context basing on the architecture in use +* +* @param[in] ps_codec +* Codec context pointer +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void ih264e_init_function_ptr_neon_av8(codec_t *ps_codec) +{ + + WORD32 i= 0; + + /* curr proc ctxt */ + process_ctxt_t *ps_proc = NULL; + me_ctxt_t *ps_me_ctxt = NULL; + + /* Init function pointers for intra pred leaf level functions luma + * Intra 16x16 */ + ps_codec->apf_intra_pred_16_l[0] = ih264_intra_pred_luma_16x16_mode_vert_av8; + ps_codec->apf_intra_pred_16_l[1] = ih264_intra_pred_luma_16x16_mode_horz_av8; + ps_codec->apf_intra_pred_16_l[2] = ih264_intra_pred_luma_16x16_mode_dc_av8; + ps_codec->apf_intra_pred_16_l[3] = ih264_intra_pred_luma_16x16_mode_plane_av8; + + /* Init function pointers for intra pred leaf level functions luma + * Intra 4x4 */ + ps_codec->apf_intra_pred_4_l[0] = ih264_intra_pred_luma_4x4_mode_vert_av8; + ps_codec->apf_intra_pred_4_l[1] = ih264_intra_pred_luma_4x4_mode_horz_av8; + ps_codec->apf_intra_pred_4_l[2] = ih264_intra_pred_luma_4x4_mode_dc_av8; + ps_codec->apf_intra_pred_4_l[3] = ih264_intra_pred_luma_4x4_mode_diag_dl_av8; + ps_codec->apf_intra_pred_4_l[4] = ih264_intra_pred_luma_4x4_mode_diag_dr_av8; + ps_codec->apf_intra_pred_4_l[5] = ih264_intra_pred_luma_4x4_mode_vert_r_av8; + ps_codec->apf_intra_pred_4_l[6] = ih264_intra_pred_luma_4x4_mode_horz_d_av8; + ps_codec->apf_intra_pred_4_l[7] = ih264_intra_pred_luma_4x4_mode_vert_l_av8; + ps_codec->apf_intra_pred_4_l[8] = ih264_intra_pred_luma_4x4_mode_horz_u_av8; + + /* Init function pointers for intra pred leaf level functions luma + * Intra 8x8 */ + ps_codec->apf_intra_pred_8_l[0] = ih264_intra_pred_luma_8x8_mode_vert_av8; + ps_codec->apf_intra_pred_8_l[2] = ih264_intra_pred_luma_8x8_mode_dc_av8; + ps_codec->apf_intra_pred_8_l[3] = ih264_intra_pred_luma_8x8_mode_diag_dl_av8; + ps_codec->apf_intra_pred_8_l[4] = ih264_intra_pred_luma_8x8_mode_diag_dr_av8; + ps_codec->apf_intra_pred_8_l[5] = ih264_intra_pred_luma_8x8_mode_vert_r_av8; + ps_codec->apf_intra_pred_8_l[6] = ih264_intra_pred_luma_8x8_mode_horz_d_av8; + ps_codec->apf_intra_pred_8_l[7] = ih264_intra_pred_luma_8x8_mode_vert_l_av8; + ps_codec->apf_intra_pred_8_l[8] = ih264_intra_pred_luma_8x8_mode_horz_u_av8; + + /* Init function pointers for intra pred leaf level functions chroma + * Intra 8x8 */ + ps_codec->apf_intra_pred_c[0] = ih264_intra_pred_chroma_8x8_mode_dc_av8; + ps_codec->apf_intra_pred_c[1] = ih264_intra_pred_chroma_8x8_mode_horz_av8; + ps_codec->apf_intra_pred_c[2] = ih264_intra_pred_chroma_8x8_mode_vert_av8; + ps_codec->apf_intra_pred_c[3] = ih264_intra_pred_chroma_8x8_mode_plane_av8; + + + /* Init forward transform fn ptr */ + ps_codec->pf_resi_trans_quant_8x8 = ih264_resi_trans_quant_8x8; + ps_codec->pf_resi_trans_quant_4x4 = ih264_resi_trans_quant_4x4_av8; + ps_codec->pf_resi_trans_quant_chroma_4x4 = ih264_resi_trans_quant_chroma_4x4_av8; + ps_codec->pf_hadamard_quant_4x4 = ih264_hadamard_quant_4x4_av8; + ps_codec->pf_hadamard_quant_2x2_uv = ih264_hadamard_quant_2x2_uv_av8; + + /* Init inverse transform fn ptr */ + ps_codec->pf_iquant_itrans_recon_8x8 = ih264_iquant_itrans_recon_8x8_av8; + ps_codec->pf_iquant_itrans_recon_4x4 = ih264_iquant_itrans_recon_4x4_av8; + ps_codec->pf_iquant_itrans_recon_4x4_dc = ih264_iquant_itrans_recon_4x4_dc_av8; + ps_codec->pf_iquant_itrans_recon_chroma_4x4 = ih264_iquant_itrans_recon_chroma_4x4_av8; + ps_codec->pf_iquant_itrans_recon_chroma_4x4_dc = ih264_iquant_itrans_recon_chroma_4x4_dc_av8; + ps_codec->pf_ihadamard_scaling_4x4 = ih264_ihadamard_scaling_4x4_av8; + ps_codec->pf_ihadamard_scaling_2x2_uv = ih264_ihadamard_scaling_2x2_uv_av8; + ps_codec->pf_interleave_copy = ih264_interleave_copy_av8; + + /* Init fn ptr luma core coding */ + ps_codec->luma_energy_compaction[0] = ih264e_code_luma_intra_macroblock_16x16; + ps_codec->luma_energy_compaction[1] = ih264e_code_luma_intra_macroblock_4x4; + ps_codec->luma_energy_compaction[3] = ih264e_code_luma_inter_macroblock_16x16; + + /* Init fn ptr chroma core coding */ + ps_codec->chroma_energy_compaction[0] = ih264e_code_chroma_intra_macroblock_8x8; + ps_codec->chroma_energy_compaction[1] = ih264e_code_chroma_inter_macroblock_8x8; + + /* Init fn ptr luma deblocking */ + ps_codec->pf_deblk_luma_vert_bs4 = ih264_deblk_luma_vert_bs4_av8; + ps_codec->pf_deblk_luma_vert_bslt4 = ih264_deblk_luma_vert_bslt4_av8; + ps_codec->pf_deblk_luma_horz_bs4 = ih264_deblk_luma_horz_bs4_av8; + ps_codec->pf_deblk_luma_horz_bslt4 = ih264_deblk_luma_horz_bslt4_av8; + + /* Init fn ptr chroma deblocking */ + ps_codec->pf_deblk_chroma_vert_bs4 = ih264_deblk_chroma_vert_bs4_av8; + ps_codec->pf_deblk_chroma_vert_bslt4 = ih264_deblk_chroma_vert_bslt4_av8; + ps_codec->pf_deblk_chroma_horz_bs4 = ih264_deblk_chroma_horz_bs4_av8; + ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4_av8; + + /* write mb syntax layer */ + ps_codec->pf_write_mb_syntax_layer[ISLICE] = ih264e_write_islice_mb; + ps_codec->pf_write_mb_syntax_layer[PSLICE] = ih264e_write_pslice_mb; + + /* Padding Functions */ + ps_codec->pf_pad_top = ih264_pad_top_av8; + ps_codec->pf_pad_bottom = ih264_pad_bottom; + ps_codec->pf_pad_left_luma = ih264_pad_left_luma_av8; + ps_codec->pf_pad_left_chroma = ih264_pad_left_chroma_av8; + ps_codec->pf_pad_right_luma = ih264_pad_right_luma_av8; + ps_codec->pf_pad_right_chroma = ih264_pad_right_chroma_av8; + + /* Inter pred leaf level functions */ + ps_codec->pf_inter_pred_luma_copy = ih264_inter_pred_luma_copy_av8; + ps_codec->pf_inter_pred_luma_horz = ih264_inter_pred_luma_horz_av8; + ps_codec->pf_inter_pred_luma_vert = ih264_inter_pred_luma_vert_av8; + ps_codec->pf_inter_pred_luma_bilinear = ih264_inter_pred_luma_bilinear; + ps_codec->pf_inter_pred_chroma = ih264_inter_pred_chroma_av8; + + /* sad me level functions */ + ps_codec->apf_compute_sad_16x16[0] = ime_compute_sad_16x16_av8; + ps_codec->apf_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_av8; + ps_codec->pf_compute_sad_16x8 = ime_compute_sad_16x8_av8; + + /* memor handling operations */ + ps_codec->pf_mem_cpy = ih264_memcpy_av8; + ps_codec->pf_mem_cpy_mul8 = ih264_memcpy_mul_8_av8; + ps_codec->pf_mem_set = ih264_memset_av8; + ps_codec->pf_mem_set_mul8 = ih264_memset_mul_8_av8; + + /* sad me level functions */ + for(i = 0; i < (MAX_PROCESS_CTXT); i++) + { + ps_proc = &ps_codec->as_process[i]; + ps_me_ctxt = &ps_proc->s_me_ctxt; + ps_me_ctxt->pf_ime_compute_sad_16x16[0] = ime_compute_sad_16x16_av8; + ps_me_ctxt->pf_ime_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_av8; + ps_me_ctxt->pf_ime_compute_sad_16x8 = ime_compute_sad_16x8_av8; + ps_me_ctxt->pf_ime_compute_sad4_diamond = ime_calculate_sad4_prog_av8; + ps_me_ctxt->pf_ime_compute_sad3_diamond = ime_calculate_sad3_prog_av8; + ps_me_ctxt->pf_ime_compute_sad2_diamond = ime_calculate_sad2_prog_av8; + ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16 = ime_sub_pel_compute_sad_16x16_av8; + ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16 = ime_compute_satqd_16x16_lumainter_av8; + } + + /* intra mode eval -encoder level function */ + ps_codec->pf_ih264e_evaluate_intra16x16_modes = ih264e_evaluate_intra16x16_modes_av8; + ps_codec->pf_ih264e_evaluate_intra_chroma_modes = ih264e_evaluate_intra_chroma_modes_av8; + ps_codec->pf_ih264e_evaluate_intra_4x4_modes = ih264e_evaluate_intra_4x4_modes; + + /* csc */ + ps_codec->pf_ih264e_conv_420p_to_420sp = ih264e_fmt_conv_420p_to_420sp; + ps_codec->pf_ih264e_fmt_conv_422i_to_420sp = ih264e_fmt_conv_422i_to_420sp; + + /* Halp pel generation function - encoder level*/ + ps_codec->pf_ih264e_sixtapfilter_horz = ih264e_sixtapfilter_horz_av8; + ps_codec->pf_ih264e_sixtap_filter_2dvh_vert = ih264e_sixtap_filter_2dvh_vert_av8; + + return ; + } + diff --git a/encoder/arm/ih264e_half_pel.s b/encoder/arm/ih264e_half_pel.s new file mode 100755 index 0000000..1b9a87a --- /dev/null +++ b/encoder/arm/ih264e_half_pel.s @@ -0,0 +1,951 @@ +@/****************************************************************************** +@ * +@ * Copyright (C) 2015 The Android Open Source Project +@ * +@ * Licensed under the Apache License, Version 2.0 (the "License"); +@ * you may not use this file except in compliance with the License. +@ * You may obtain a copy of the License at: +@ * +@ * http://www.apache.org/licenses/LICENSE-2.0 +@ * +@ * Unless required by applicable law or agreed to in writing, software +@ * distributed under the License is distributed on an "AS IS" BASIS, +@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ * See the License for the specific language governing permissions and +@ * limitations under the License. +@ * +@ ***************************************************************************** +@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +@*/ +@/** +@ ******************************************************************************* +@ * @file +@ * ih264e_half_pel.s +@ * +@ * @brief +@ * +@ * +@ * @author +@ * Ittiam +@ * +@ * @par List of Functions: +@ * ih264e_sixtapfilter_horz +@ * ih264e_sixtap_filter_2dvh_vert +@ +@ * +@ * @remarks +@ * None +@ * +@ ******************************************************************************* +@ */ + + +.text +.p2align 2 + +@ /** +@/******************************************************************************* +@* +@* @brief +@* Interprediction luma filter for horizontal input(Filter run for width = 17 and height =16) +@* +@* @par Description: +@* Applies a 6 tap horizontal filter .The output is clipped to 8 bits +@* sec 8.4.2.2.1 titled "Luma sample interpolation process" +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@* +@* @returns +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ +@void ih264e_sixtapfilter_horz(UWORD8 *pu1_src, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd, +@ WORD32 dst_strd); + + +.equ HALFPEL_WIDTH , 17 + 1 @( make it even, two rows are processed at a time) + + + .global ih264e_sixtapfilter_horz_a9q +ih264e_sixtapfilter_horz_a9q: + stmfd sp!, {lr} + + vmov.i8 d0, #5 + sub r0, r0, #2 + + vmov.i8 d1, #20 + mov r14, #HALFPEL_WIDTH + vpush {d8-d15} + +filter_horz_loop: + + + vld1.8 {d2, d3, d4}, [r0], r2 @// Load row0 + vld1.8 {d5, d6, d7}, [r0], r2 @// Load row1 + + @// Processing row0 and row1 + + vext.8 d31, d2, d3, #5 @//extract a[5] (column1,row0) + vext.8 d30, d3, d4, #5 @//extract a[5] (column2,row0) + + vaddl.u8 q4, d31, d2 @// a0 + a5 (column1,row0) + vext.8 d29, d4, d4, #5 @//extract a[5] (column3,row0) + vaddl.u8 q5, d30, d3 @// a0 + a5 (column2,row0) + vext.8 d28, d5, d6, #5 @//extract a[5] (column1,row1) + vaddl.u8 q6, d29, d4 @// a0 + a5 (column3,row0) + vext.8 d27, d6, d7, #5 @//extract a[5] (column2,row1) + vaddl.u8 q7, d28, d5 @// a0 + a5 (column1,row1) + vext.8 d26, d7, d7, #5 @//extract a[5] (column3,row1) + + vaddl.u8 q8, d27, d6 @// a0 + a5 (column2,row1) + vext.8 d31, d2, d3, #2 @//extract a[2] (column1,row0) + vaddl.u8 q9, d26, d7 @// a0 + a5 (column3,row1) + vext.8 d30, d3, d4, #2 @//extract a[2] (column2,row0) + vmlal.u8 q4, d31, d1 @// a0 + a5 + 20a2 (column1,row0) + vext.8 d29, d4, d4, #2 @//extract a[2] (column3,row0) + vmlal.u8 q5, d30, d1 @// a0 + a5 + 20a2 (column2,row0) + vext.8 d28, d5, d6, #2 @//extract a[2] (column1,row1) + vmlal.u8 q6, d29, d1 @// a0 + a5 + 20a2 (column3,row0) + vext.8 d27, d6, d7, #2 @//extract a[2] (column2,row1) + vmlal.u8 q7, d28, d1 @// a0 + a5 + 20a2 (column1,row1) + vext.8 d26, d7, d7, #2 @//extract a[2] (column3,row1) + + vmlal.u8 q8, d27, d1 @// a0 + a5 + 20a2 (column2,row1) + vext.8 d31, d2, d3, #3 @//extract a[3] (column1,row0) + vmlal.u8 q9, d26, d1 @// a0 + a5 + 20a2 (column3,row1) + vext.8 d30, d3, d4, #3 @//extract a[3] (column2,row0) + vmlal.u8 q4, d31, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row0) + vext.8 d29, d4, d4, #3 @//extract a[3] (column3,row0) + vmlal.u8 q5, d30, d1 @// a0 + a5 + 20a2 + 20a3 (column2,row0) + vext.8 d28, d5, d6, #3 @//extract a[3] (column1,row1) + vmlal.u8 q6, d29, d1 @// a0 + a5 + 20a2 + 20a3 (column3,row0) + vext.8 d27, d6, d7, #3 @//extract a[3] (column2,row1) + vmlal.u8 q7, d28, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row1) + vext.8 d26, d7, d7, #3 @//extract a[3] (column3,row1) + + vmlal.u8 q8, d27, d1 @// a0 + a5 + 20a2 + 20a3 (column2,row1) + vext.8 d31, d2, d3, #1 @//extract a[1] (column1,row0) + vmlal.u8 q9, d26, d1 @// a0 + a5 + 20a2 + 20a3 (column3,row1) + vext.8 d30, d3, d4, #1 @//extract a[1] (column2,row0) + vmlsl.u8 q4, d31, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0) + vext.8 d29, d4, d4, #1 @//extract a[1] (column3,row0) + vmlsl.u8 q5, d30, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0) + vext.8 d28, d5, d6, #1 @//extract a[1] (column1,row1) + vmlsl.u8 q6, d29, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row0) + vext.8 d27, d6, d7, #1 @//extract a[1] (column2,row1) + vmlsl.u8 q7, d28, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row1) + vext.8 d26, d7, d7, #1 @//extract a[1] (column3,row1) + + vmlsl.u8 q8, d27, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row1) + vext.8 d31, d2, d3, #4 @//extract a[4] (column1,row0) + vmlsl.u8 q9, d26, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row1) + vext.8 d30, d3, d4, #4 @//extract a[4] (column2,row0) + vmlsl.u8 q4, d31, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0) + vext.8 d29, d4, d4, #4 @//extract a[4] (column3,row0) + vmlsl.u8 q5, d30, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0) + vext.8 d28, d5, d6, #4 @//extract a[4] (column1,row1) + vmlsl.u8 q6, d29, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row0) + vext.8 d27, d6, d7, #4 @//extract a[4] (column2,row1) + vmlsl.u8 q7, d28, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row1) + vext.8 d26, d7, d7, #4 @//extract a[4] (column3,row1) + + vmlsl.u8 q8, d27, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row1) + vmlsl.u8 q9, d26, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row1) + + vqrshrun.s16 d20, q4, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0) + vqrshrun.s16 d21, q5, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0) + vqrshrun.s16 d22, q6, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0) + vqrshrun.s16 d23, q7, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row1) + vqrshrun.s16 d24, q8, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row1) + vqrshrun.s16 d25, q9, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row1) + + vst1.8 {d20, d21, d22}, [r1], r3 @//Store dest row0 + vst1.8 {d23, d24, d25}, [r1], r3 @//Store dest row1 + + subs r14, r14, #2 @ decrement counter + + bne filter_horz_loop + + vpop {d8-d15} + ldmfd sp!, {pc} + + + + + + + + + +@/** +@******************************************************************************* +@* +@* @brief +@* This function implements a two stage cascaded six tap filter. It +@* applies the six tap filter in the vertical direction on the +@* predictor values, followed by applying the same filter in the +@* horizontal direction on the output of the first stage. The six tap +@* filtering operation is described in sec 8.4.2.2.1 titled "Luma sample +@* interpolation process" +@* (Filter run for width = 17 and height =17) +@* @par Description: +@* The function interpolates +@* the predictors first in the vertical direction and then in the +@* horizontal direction to output the (1/2,1/2). The output of the first +@* stage of the filter is stored in the buffer pointed to by pi16_pred1(only in C) +@* in 16 bit precision. +@* +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@* @param[out] pu1_dst1 +@* UWORD8 pointer to the destination(vertical filtered output) +@* +@* @param[out] pu1_dst2 +@* UWORD8 pointer to the destination(out put after applying horizontal filter to the intermediate vertical output) +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride of pu1_dst +@* +@* @param[in]pi16_pred1 +@* Pointer to 16bit intermediate buffer(used only in c) +@* +@* @param[in] pi16_pred1_strd +@* integer destination stride of pi16_pred1 +@* +@* +@* @returns +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ +@void ih264e_sixtap_filter_2dvh_vert(UWORD8 *pu1_src, +@ UWORD8 *pu1_dst1, +@ UWORD8 *pu1_dst2, +@ WORD32 src_strd, +@ WORD32 dst_strd, +@ WORD32 *pi16_pred1,/* Pointer to 16bit intermmediate buffer (used only in c)*/ +@ WORD32 pi16_pred1_strd) + + + + + .global ih264e_sixtap_filter_2dvh_vert_a9q + +ih264e_sixtap_filter_2dvh_vert_a9q: + stmfd sp!, {r10, r11, r12, lr} + +@//r0 - pu1_ref +@//r3 - u4_ref_width + vpush {d8-d15} + @// Load six rows for vertical interpolation + lsl r12, r3, #1 + sub r0, r0, r12 + sub r0, r0, #2 + vld1.8 {d2, d3, d4}, [r0], r3 + vld1.8 {d5, d6, d7}, [r0], r3 + vld1.8 {d8, d9, d10}, [r0], r3 + mov r12, #5 + vld1.8 {d11, d12, d13}, [r0], r3 + mov r14, #20 + vld1.8 {d14, d15, d16}, [r0], r3 + vmov.16 d0[0], r12 + vmov.16 d0[1], r14 + vld1.8 {d17, d18, d19}, [r0], r3 + vmov.i8 d1, #20 + +@// r12 - u2_buff1_width +@// r14 - u2_buff2_width + ldr r12, [sp, #80] + add r11, r1, #6 + + mov r14, r12 + + mov r10, #3 @loop counter + + +filter_2dvh_loop: + + @// ////////////// ROW 1 /////////////////////// + +@// Process first vertical interpolated row +@// each column is + vaddl.u8 q10, d2, d17 @// a0 + a5 (column1,row0) + vmov.i8 d31, #5 + vmlal.u8 q10, d8, d1 @// a0 + a5 + 20a2 (column1,row0) + vmlal.u8 q10, d11, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row0) + vmlsl.u8 q10, d5, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0) + vmlsl.u8 q10, d14, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0) + + + vaddl.u8 q11, d3, d18 @// a0 + a5 (column2,row0) + vmlal.u8 q11, d9, d1 @// a0 + a5 + 20a2 (column2,row0) + vmlal.u8 q11, d12, d1 @// a0 + a5 + 20a2 + 20a3 (column2,row0) + vmlsl.u8 q11, d6, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0) + vmlsl.u8 q11, d15, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0) + vext.16 d30, d20, d21, #2 @//extract a[2] (set1) + + vaddl.u8 q12, d4, d19 @// a0 + a5 (column3,row0) + vext.16 d29, d20, d21, #3 @//extract a[3] (set1) + vmlal.u8 q12, d10, d1 @// a0 + a5 + 20a2 (column3,row0) + vmlal.u8 q12, d13, d1 @// a0 + a5 + 20a2 + 20a3 (column3,row0) + vmlsl.u8 q12, d7, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row0) + vmlsl.u8 q12, d16, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row0) + + vqrshrun.s16 d2, q10, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0) + vext.16 d31, d21, d22, #1 @//extract a[5] (set1) + vqrshrun.s16 d3, q11, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0) + vext.16 d28, d20, d21, #1 @//extract a[1] (set1) + + vaddl.s16 q13, d31, d20 @// a0 + a5 (set1) + vext.16 d31, d22, d23, #1 @//extract a[5] (set2) + vmlal.s16 q13, d30, d0[1] @// a0 + a5 + 20a2 (set1) + vmlal.s16 q13, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set1) + vmlsl.s16 q13, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set1) + vmlsl.s16 q13, d21, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1) + vext.16 d30, d21, d22, #2 @//extract a[2] (set2) + + vqrshrun.s16 d4, q12, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0) + vext.16 d29, d21, d22, #3 @//extract a[3] (set2) + + vext.16 d28, d21, d22, #1 @//extract a[1] (set2) + vaddl.s16 q10, d31, d21 @// a0 + a5 (set2) + vmlal.s16 q10, d30, d0[1] @// a0 + a5 + 20a2 (set2) + vmlal.s16 q10, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set2) + vmlsl.s16 q10, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set2) + vmlsl.s16 q10, d22, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2) + vext.16 d31, d23, d24, #1 @//extract a[5] (set3) + + vext.8 d2, d2, d3, #2 + vst1.8 {d3, d4}, [r11], r12 @// store row1 - 1,1/2 grid + vst1.8 {d2}, [r1], r12 @// store row1 - 1,1/2 grid + + vext.16 d30, d22, d23, #2 @//extract a[2] (set3) + vext.16 d29, d22, d23, #3 @//extract a[3] (set3) + + vaddl.s16 q1, d31, d22 @// a0 + a5 (set3) + vext.16 d28, d22, d23, #1 @//extract a[1] (set3) + vmlal.s16 q1, d30, d0[1] @// a0 + a5 + 20a2 (set3) + vmlal.s16 q1, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set3) + vmlsl.s16 q1, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set3) + vmlsl.s16 q1, d23, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3) + vext.16 d31, d24, d25, #1 @//extract a[5] (set4) + + vshrn.s32 d21, q10, #8 @// shift by 8 and later we will shift by 2 more with rounding (set2) + vext.16 d30, d23, d24, #2 @//extract a[2] (set4) + vshrn.s32 d20, q13, #8 @// shift by 8 and later we will shift by 2 more with rounding (set1) + vext.16 d29, d23, d24, #3 @//extract a[3] (set4) + + vaddl.s16 q13, d31, d23 @// a0 + a5 (set4) + vext.16 d28, d23, d24, #1 @//extract a[1] (set4) + vext.16 d31, d25, d25, #1 @//extract a[5] (set5) ;//here only first element in the row is valid + vmlal.s16 q13, d30, d0[1] @// a0 + a5 + 20a2 (set4) + vmlal.s16 q13, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set4) + vmlsl.s16 q13, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set4) + vmlsl.s16 q13, d24, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4) + vext.16 d30, d24, d25, #2 @//extract a[2] (set5) + + vaddl.s16 q11, d31, d24 @// a0 + a5 (set5) + vext.16 d29, d24, d25, #3 @//extract a[3] (set5) + + vext.16 d31, d24, d25, #1 @//extract a[1] (set5) + vshrn.s32 d28, q1, #8 @// shift by 8 and later we will shift by 2 more with rounding (set3) + + vld1.8 {d2, d3, d4}, [r0], r3 @// Load next Row data + vmlal.s16 q11, d30, d0[1] @// a0 + a5 + 20a2 (set5) + vmlal.s16 q11, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set5) + vmlsl.s16 q11, d31, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set5) + vmlsl.s16 q11, d25, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5) + vshrn.s32 d29, q13, #8 @// shift by 8 and later we will shift by 2 more with rounding (set4) + vqrshrun.s16 d26, q10, #2 @// half,half gird set1,2 + + + @//VQRSHRUN.s16 D27,Q14,#2 ;// half,half gird set3,4 + @//VSHRN.s32 D28,Q11,#8 ;// shift by 8 and later we will shift by 2 more with rounding (set5) + + @//VQRSHRUN.s16 D28,Q14,#2 ;// half,half gird set5 + + @//VST1.8 {D26,D27,D28},[r2],r14 ;// store 1/2,1,2 grif values + @// ////////////// ROW 2 /////////////////////// + +@// Process first vertical interpolated row +@// each column is + vaddl.u8 q10, d5, d2 @// a0 + a5 (column1,row0) + vmov.i8 d31, #5 + vmlal.u8 q10, d11, d1 @// a0 + a5 + 20a2 (column1,row0) + vmlal.u8 q10, d14, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row0) + vmlsl.u8 q10, d8, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0) + vmlsl.u8 q10, d17, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0) + + vqrshrun.s16 d27, q14, #2 @// half,half gird set3,4 + vshrn.s32 d28, q11, #8 @// shift by 8 and later we will shift by 2 more with rounding (set5) + + vaddl.u8 q11, d6, d3 @// a0 + a5 (column2,row0) + vmlal.u8 q11, d12, d1 @// a0 + a5 + 20a2 (column2,row0) + vmlal.u8 q11, d15, d1 @// a0 + a5 + 20a2 + 20a3 (column2,row0) + vmlsl.u8 q11, d9, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0) + vmlsl.u8 q11, d18, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0) + + vqrshrun.s16 d28, q14, #2 @// half,half gird set5 + vext.16 d30, d20, d21, #2 @//extract a[2] (set1) + + vaddl.u8 q12, d7, d4 @// a0 + a5 (column3,row0) + vext.16 d29, d20, d21, #3 @//extract a[3] (set1) + vmlal.u8 q12, d13, d1 @// a0 + a5 + 20a2 (column3,row0) + vmlal.u8 q12, d16, d1 @// a0 + a5 + 20a2 + 20a3 (column3,row0) + vmlsl.u8 q12, d10, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row0) + vmlsl.u8 q12, d19, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row0) + vst1.8 {d26, d27, d28}, [r2], r14 @// store 1/2,1,2 grif values + + vqrshrun.s16 d5, q10, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0) + vext.16 d31, d21, d22, #1 @//extract a[5] (set1) + vqrshrun.s16 d6, q11, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0) + vext.16 d28, d20, d21, #1 @//extract a[1] (set1) + + vaddl.s16 q13, d31, d20 @// a0 + a5 (set1) + vext.16 d31, d22, d23, #1 @//extract a[5] (set2) + vmlal.s16 q13, d30, d0[1] @// a0 + a5 + 20a2 (set1) + vmlal.s16 q13, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set1) + vmlsl.s16 q13, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set1) + vmlsl.s16 q13, d21, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1) + vext.16 d30, d21, d22, #2 @//extract a[2] (set2) + + vqrshrun.s16 d7, q12, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0) + vext.16 d29, d21, d22, #3 @//extract a[3] (set2) + + vext.16 d28, d21, d22, #1 @//extract a[1] (set2) + vaddl.s16 q10, d31, d21 @// a0 + a5 (set2) + vmlal.s16 q10, d30, d0[1] @// a0 + a5 + 20a2 (set2) + vmlal.s16 q10, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set2) + vmlsl.s16 q10, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set2) + vmlsl.s16 q10, d22, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2) + vext.16 d31, d23, d24, #1 @//extract a[5] (set3) + + vext.8 d5, d5, d6, #2 + vst1.8 {d6, d7}, [r11], r12 @// store row1 - 1,1/2 grid + vst1.8 {d5}, [r1], r12 @// store row1 - 1,1/2 grid + + vext.16 d30, d22, d23, #2 @//extract a[2] (set3) + vext.16 d29, d22, d23, #3 @//extract a[3] (set3) + + vaddl.s16 q3, d31, d22 @// a0 + a5 (set3) + vext.16 d28, d22, d23, #1 @//extract a[1] (set3) + vmlal.s16 q3, d30, d0[1] @// a0 + a5 + 20a2 (set3) + vmlal.s16 q3, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set3) + vmlsl.s16 q3, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set3) + vmlsl.s16 q3, d23, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3) + vext.16 d31, d24, d25, #1 @//extract a[5] (set4) + + vshrn.s32 d21, q10, #8 @// shift by 8 and later we will shift by 2 more with rounding (set2) + vext.16 d30, d23, d24, #2 @//extract a[2] (set4) + vshrn.s32 d20, q13, #8 @// shift by 8 and later we will shift by 2 more with rounding (set1) + vext.16 d29, d23, d24, #3 @//extract a[3] (set4) + + vaddl.s16 q13, d31, d23 @// a0 + a5 (set4) + vext.16 d28, d23, d24, #1 @//extract a[1] (set4) + vext.16 d31, d25, d25, #1 @//extract a[5] (set5) ;//here only first element in the row is valid + vmlal.s16 q13, d30, d0[1] @// a0 + a5 + 20a2 (set4) + vmlal.s16 q13, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set4) + vmlsl.s16 q13, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set4) + vmlsl.s16 q13, d24, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4) + vext.16 d30, d24, d25, #2 @//extract a[2] (set5) + + vaddl.s16 q11, d31, d24 @// a0 + a5 (set5) + vext.16 d29, d24, d25, #3 @//extract a[3] (set5) + + vext.16 d31, d24, d25, #1 @//extract a[1] (set5) + vshrn.s32 d28, q3, #8 @// shift by 8 and later we will shift by 2 more with rounding (set3) + + vld1.8 {d5, d6, d7}, [r0], r3 @// Load next Row data + vmlal.s16 q11, d30, d0[1] @// a0 + a5 + 20a2 (set5) + vmlal.s16 q11, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set5) + vmlsl.s16 q11, d31, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set5) + vmlsl.s16 q11, d25, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5) + vshrn.s32 d29, q13, #8 @// shift by 8 and later we will shift by 2 more with rounding (set4) + vqrshrun.s16 d26, q10, #2 @// half,half gird set1,2 + + + @//VQRSHRUN.s16 D27,Q14,#2 ;// half,half gird set3,4 + @//VSHRN.s32 D28,Q11,#8 ;// shift by 8 and later we will shift by 2 more with rounding (set5) + + @//VQRSHRUN.s16 D28,Q14,#2 ;// half,half gird set5 + + @//VST1.8 {D26,D27,D28},[r2],r14 ;// store 1/2,1,2 grif values + @// ////////////// ROW 3 /////////////////////// + +@// Process first vertical interpolated row +@// each column is + vaddl.u8 q10, d8, d5 @// a0 + a5 (column1,row0) + vmov.i8 d31, #5 + vmlal.u8 q10, d14, d1 @// a0 + a5 + 20a2 (column1,row0) + vmlal.u8 q10, d17, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row0) + vmlsl.u8 q10, d11, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0) + vmlsl.u8 q10, d2, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0) + + vqrshrun.s16 d27, q14, #2 @// half,half gird set3,4 + vshrn.s32 d28, q11, #8 @// shift by 8 and later we will shift by 2 more with rounding (set5) + + vaddl.u8 q11, d9, d6 @// a0 + a5 (column2,row0) + vmlal.u8 q11, d15, d1 @// a0 + a5 + 20a2 (column2,row0) + vmlal.u8 q11, d18, d1 @// a0 + a5 + 20a2 + 20a3 (column2,row0) + vmlsl.u8 q11, d12, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0) + vmlsl.u8 q11, d3, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0) + + vqrshrun.s16 d28, q14, #2 @// half,half gird set5 + vext.16 d30, d20, d21, #2 @//extract a[2] (set1) + + vaddl.u8 q12, d10, d7 @// a0 + a5 (column3,row0) + vext.16 d29, d20, d21, #3 @//extract a[3] (set1) + vmlal.u8 q12, d16, d1 @// a0 + a5 + 20a2 (column3,row0) + vmlal.u8 q12, d19, d1 @// a0 + a5 + 20a2 + 20a3 (column3,row0) + vmlsl.u8 q12, d13, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row0) + vmlsl.u8 q12, d4, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row0) + + vst1.8 {d26, d27, d28}, [r2], r14 @// store 1/2,1,2 grif values + + vqrshrun.s16 d8, q10, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0) + vext.16 d31, d21, d22, #1 @//extract a[5] (set1) + vqrshrun.s16 d9, q11, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0) + vext.16 d28, d20, d21, #1 @//extract a[1] (set1) + + vaddl.s16 q13, d31, d20 @// a0 + a5 (set1) + vext.16 d31, d22, d23, #1 @//extract a[5] (set2) + vmlal.s16 q13, d30, d0[1] @// a0 + a5 + 20a2 (set1) + vmlal.s16 q13, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set1) + vmlsl.s16 q13, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set1) + vmlsl.s16 q13, d21, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1) + vext.16 d30, d21, d22, #2 @//extract a[2] (set2) + + vqrshrun.s16 d10, q12, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0) + vext.16 d29, d21, d22, #3 @//extract a[3] (set2) + + vext.16 d28, d21, d22, #1 @//extract a[1] (set2) + vaddl.s16 q10, d31, d21 @// a0 + a5 (set2) + vmlal.s16 q10, d30, d0[1] @// a0 + a5 + 20a2 (set2) + vmlal.s16 q10, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set2) + vmlsl.s16 q10, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set2) + vmlsl.s16 q10, d22, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2) + vext.16 d31, d23, d24, #1 @//extract a[5] (set3) + + vext.8 d8, d8, d9, #2 + vst1.8 {d9, d10}, [r11], r12 @// store row1 - 1,1/2 grid + vst1.8 {d8}, [r1], r12 @// store row1 - 1,1/2 grid + + vext.16 d30, d22, d23, #2 @//extract a[2] (set3) + vext.16 d29, d22, d23, #3 @//extract a[3] (set3) + + vaddl.s16 q4, d31, d22 @// a0 + a5 (set3) + vext.16 d28, d22, d23, #1 @//extract a[1] (set3) + vmlal.s16 q4, d30, d0[1] @// a0 + a5 + 20a2 (set3) + vmlal.s16 q4, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set3) + vmlsl.s16 q4, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set3) + vmlsl.s16 q4, d23, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3) + vext.16 d31, d24, d25, #1 @//extract a[5] (set4) + + vshrn.s32 d21, q10, #8 @// shift by 8 and later we will shift by 2 more with rounding (set2) + vext.16 d30, d23, d24, #2 @//extract a[2] (set4) + vshrn.s32 d20, q13, #8 @// shift by 8 and later we will shift by 2 more with rounding (set1) + vext.16 d29, d23, d24, #3 @//extract a[3] (set4) + + vaddl.s16 q13, d31, d23 @// a0 + a5 (set4) + vext.16 d28, d23, d24, #1 @//extract a[1] (set4) + vext.16 d31, d25, d25, #1 @//extract a[5] (set5) ;//here only first element in the row is valid + vmlal.s16 q13, d30, d0[1] @// a0 + a5 + 20a2 (set4) + vmlal.s16 q13, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set4) + vmlsl.s16 q13, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set4) + vmlsl.s16 q13, d24, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4) + vext.16 d30, d24, d25, #2 @//extract a[2] (set5) + + vaddl.s16 q11, d31, d24 @// a0 + a5 (set5) + vext.16 d29, d24, d25, #3 @//extract a[3] (set5) + + vext.16 d31, d24, d25, #1 @//extract a[1] (set5) + vshrn.s32 d28, q4, #8 @// shift by 8 and later we will shift by 2 more with rounding (set3) + + vld1.8 {d8, d9, d10}, [r0], r3 @// Load next Row data + vmlal.s16 q11, d30, d0[1] @// a0 + a5 + 20a2 (set5) + vmlal.s16 q11, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set5) + vmlsl.s16 q11, d31, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set5) + vmlsl.s16 q11, d25, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5) + vshrn.s32 d29, q13, #8 @// shift by 8 and later we will shift by 2 more with rounding (set4) + vqrshrun.s16 d26, q10, #2 @// half,half gird set1,2 + + + @//VQRSHRUN.s16 D27,Q14,#2 ;// half,half gird set3,4 + @//VSHRN.s32 D28,Q11,#8 ;// shift by 8 and later we will shift by 2 more with rounding (set5) + + @//VQRSHRUN.s16 D28,Q14,#2 ;// half,half gird set5 + + @//VST1.8 {D26,D27,D28},[r2],r14 ;// store 1/2,1,2 grif values + @// ////////////// ROW 4 /////////////////////// + +@// Process first vertical interpolated row +@// each column is + vaddl.u8 q10, d11, d8 @// a0 + a5 (column1,row0) + vmov.i8 d31, #5 + vmlal.u8 q10, d17, d1 @// a0 + a5 + 20a2 (column1,row0) + vmlal.u8 q10, d2, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row0) + vmlsl.u8 q10, d14, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0) + vmlsl.u8 q10, d5, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0) + + vqrshrun.s16 d27, q14, #2 @// half,half gird set3,4 + vshrn.s32 d28, q11, #8 @// shift by 8 and later we will shift by 2 more with rounding (set5) + + vaddl.u8 q11, d12, d9 @// a0 + a5 (column2,row0) + vmlal.u8 q11, d18, d1 @// a0 + a5 + 20a2 (column2,row0) + vmlal.u8 q11, d3, d1 @// a0 + a5 + 20a2 + 20a3 (column2,row0) + vmlsl.u8 q11, d15, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0) + vmlsl.u8 q11, d6, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0) + + vqrshrun.s16 d28, q14, #2 @// half,half gird set5 + vext.16 d30, d20, d21, #2 @//extract a[2] (set1) + + vaddl.u8 q12, d13, d10 @// a0 + a5 (column3,row0) + vext.16 d29, d20, d21, #3 @//extract a[3] (set1) + vmlal.u8 q12, d19, d1 @// a0 + a5 + 20a2 (column3,row0) + vmlal.u8 q12, d4, d1 @// a0 + a5 + 20a2 + 20a3 (column3,row0) + vmlsl.u8 q12, d16, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row0) + vmlsl.u8 q12, d7, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row0) + + vst1.8 {d26, d27, d28}, [r2], r14 @// store 1/2,1,2 grif values + + vqrshrun.s16 d11, q10, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0) + vext.16 d31, d21, d22, #1 @//extract a[5] (set1) + vqrshrun.s16 d12, q11, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0) + vext.16 d28, d20, d21, #1 @//extract a[1] (set1) + + vaddl.s16 q13, d31, d20 @// a0 + a5 (set1) + vext.16 d31, d22, d23, #1 @//extract a[5] (set2) + vmlal.s16 q13, d30, d0[1] @// a0 + a5 + 20a2 (set1) + vmlal.s16 q13, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set1) + vmlsl.s16 q13, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set1) + vmlsl.s16 q13, d21, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1) + vext.16 d30, d21, d22, #2 @//extract a[2] (set2) + + vqrshrun.s16 d13, q12, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0) + vext.16 d29, d21, d22, #3 @//extract a[3] (set2) + + vext.16 d28, d21, d22, #1 @//extract a[1] (set2) + vaddl.s16 q10, d31, d21 @// a0 + a5 (set2) + vmlal.s16 q10, d30, d0[1] @// a0 + a5 + 20a2 (set2) + vmlal.s16 q10, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set2) + vmlsl.s16 q10, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set2) + vmlsl.s16 q10, d22, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2) + vext.16 d31, d23, d24, #1 @//extract a[5] (set3) + + vext.8 d11, d11, d12, #2 + vst1.8 {d12, d13}, [r11], r12 @// store row1 - 1,1/2 grid + vst1.8 {d11}, [r1], r12 @// store row1 - 1,1/2 grid + + vext.16 d30, d22, d23, #2 @//extract a[2] (set3) + vext.16 d29, d22, d23, #3 @//extract a[3] (set3) + + vaddl.s16 q6, d31, d22 @// a0 + a5 (set3) + vext.16 d28, d22, d23, #1 @//extract a[1] (set3) + vmlal.s16 q6, d30, d0[1] @// a0 + a5 + 20a2 (set3) + vmlal.s16 q6, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set3) + vmlsl.s16 q6, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set3) + vmlsl.s16 q6, d23, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3) + vext.16 d31, d24, d25, #1 @//extract a[5] (set4) + + vshrn.s32 d21, q10, #8 @// shift by 8 and later we will shift by 2 more with rounding (set2) + vext.16 d30, d23, d24, #2 @//extract a[2] (set4) + vshrn.s32 d20, q13, #8 @// shift by 8 and later we will shift by 2 more with rounding (set1) + vext.16 d29, d23, d24, #3 @//extract a[3] (set4) + + vaddl.s16 q13, d31, d23 @// a0 + a5 (set4) + vext.16 d28, d23, d24, #1 @//extract a[1] (set4) + vext.16 d31, d25, d25, #1 @//extract a[5] (set5) ;//here only first element in the row is valid + vmlal.s16 q13, d30, d0[1] @// a0 + a5 + 20a2 (set4) + vmlal.s16 q13, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set4) + vmlsl.s16 q13, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set4) + vmlsl.s16 q13, d24, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4) + vext.16 d30, d24, d25, #2 @//extract a[2] (set5) + + vaddl.s16 q11, d31, d24 @// a0 + a5 (set5) + vext.16 d29, d24, d25, #3 @//extract a[3] (set5) + + vext.16 d31, d24, d25, #1 @//extract a[1] (set5) + vshrn.s32 d28, q6, #8 @// shift by 8 and later we will shift by 2 more with rounding (set3) + + vld1.8 {d11, d12, d13}, [r0], r3 @// Load next Row data + vmlal.s16 q11, d30, d0[1] @// a0 + a5 + 20a2 (set5) + vmlal.s16 q11, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set5) + vmlsl.s16 q11, d31, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set5) + vmlsl.s16 q11, d25, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5) + vshrn.s32 d29, q13, #8 @// shift by 8 and later we will shift by 2 more with rounding (set4) + vqrshrun.s16 d26, q10, #2 @// half,half gird set1,2 + + + @//VQRSHRUN.s16 D27,Q14,#2 ;// half,half gird set3,4 + @//VSHRN.s32 D28,Q11,#8 ;// shift by 8 and later we will shift by 2 more with rounding (set5) + + @//VQRSHRUN.s16 D28,Q14,#2 ;// half,half gird set5 + + @//VST1.8 {D26,D27,D28},[r2],r14 ;// store 1/2,1,2 grif values + @// ////////////// ROW 5 /////////////////////// + +@// Process first vertical interpolated row +@// each column is + vaddl.u8 q10, d14, d11 @// a0 + a5 (column1,row0) + vmov.i8 d31, #5 + vmlal.u8 q10, d2, d1 @// a0 + a5 + 20a2 (column1,row0) + vmlal.u8 q10, d5, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row0) + vmlsl.u8 q10, d17, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0) + vmlsl.u8 q10, d8, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0) + + vqrshrun.s16 d27, q14, #2 @// half,half gird set3,4 + vshrn.s32 d28, q11, #8 @// shift by 8 and later we will shift by 2 more with rounding (set5) + + vaddl.u8 q11, d15, d12 @// a0 + a5 (column2,row0) + vmlal.u8 q11, d3, d1 @// a0 + a5 + 20a2 (column2,row0) + vmlal.u8 q11, d6, d1 @// a0 + a5 + 20a2 + 20a3 (column2,row0) + vmlsl.u8 q11, d18, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0) + vmlsl.u8 q11, d9, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0) + + vqrshrun.s16 d28, q14, #2 @// half,half gird set5 + vext.16 d30, d20, d21, #2 @//extract a[2] (set1) + + vaddl.u8 q12, d16, d13 @// a0 + a5 (column3,row0) + vext.16 d29, d20, d21, #3 @//extract a[3] (set1) + vmlal.u8 q12, d4, d1 @// a0 + a5 + 20a2 (column3,row0) + vmlal.u8 q12, d7, d1 @// a0 + a5 + 20a2 + 20a3 (column3,row0) + vmlsl.u8 q12, d19, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row0) + vmlsl.u8 q12, d10, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row0) + + vst1.8 {d26, d27, d28}, [r2], r14 @// store 1/2,1,2 grif values + + vqrshrun.s16 d14, q10, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0) + vext.16 d31, d21, d22, #1 @//extract a[5] (set1) + vqrshrun.s16 d15, q11, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0) + vext.16 d28, d20, d21, #1 @//extract a[1] (set1) + + vaddl.s16 q13, d31, d20 @// a0 + a5 (set1) + vext.16 d31, d22, d23, #1 @//extract a[5] (set2) + vmlal.s16 q13, d30, d0[1] @// a0 + a5 + 20a2 (set1) + vmlal.s16 q13, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set1) + vmlsl.s16 q13, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set1) + vmlsl.s16 q13, d21, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1) + vext.16 d30, d21, d22, #2 @//extract a[2] (set2) + + vqrshrun.s16 d16, q12, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0) + vext.16 d29, d21, d22, #3 @//extract a[3] (set2) + + vext.16 d28, d21, d22, #1 @//extract a[1] (set2) + vaddl.s16 q10, d31, d21 @// a0 + a5 (set2) + vmlal.s16 q10, d30, d0[1] @// a0 + a5 + 20a2 (set2) + vmlal.s16 q10, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set2) + vmlsl.s16 q10, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set2) + vmlsl.s16 q10, d22, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2) + vext.16 d31, d23, d24, #1 @//extract a[5] (set3) + + vext.8 d14, d14, d15, #2 + vst1.8 {d15, d16}, [r11], r12 @// store row1 - 1,1/2 grid + vst1.8 {d14}, [r1], r12 @// store row1 - 1,1/2 grid + + vext.16 d30, d22, d23, #2 @//extract a[2] (set3) + vext.16 d29, d22, d23, #3 @//extract a[3] (set3) + + vaddl.s16 q7, d31, d22 @// a0 + a5 (set3) + vext.16 d28, d22, d23, #1 @//extract a[1] (set3) + vmlal.s16 q7, d30, d0[1] @// a0 + a5 + 20a2 (set3) + vmlal.s16 q7, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set3) + vmlsl.s16 q7, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set3) + vmlsl.s16 q7, d23, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3) + vext.16 d31, d24, d25, #1 @//extract a[5] (set4) + + vshrn.s32 d21, q10, #8 @// shift by 8 and later we will shift by 2 more with rounding (set2) + vext.16 d30, d23, d24, #2 @//extract a[2] (set4) + vshrn.s32 d20, q13, #8 @// shift by 8 and later we will shift by 2 more with rounding (set1) + vext.16 d29, d23, d24, #3 @//extract a[3] (set4) + + vaddl.s16 q13, d31, d23 @// a0 + a5 (set4) + vext.16 d28, d23, d24, #1 @//extract a[1] (set4) + vext.16 d31, d25, d25, #1 @//extract a[5] (set5) ;//here only first element in the row is valid + vmlal.s16 q13, d30, d0[1] @// a0 + a5 + 20a2 (set4) + vmlal.s16 q13, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set4) + vmlsl.s16 q13, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set4) + vmlsl.s16 q13, d24, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4) + vext.16 d30, d24, d25, #2 @//extract a[2] (set5) + + vaddl.s16 q11, d31, d24 @// a0 + a5 (set5) + vext.16 d29, d24, d25, #3 @//extract a[3] (set5) + + vext.16 d31, d24, d25, #1 @//extract a[1] (set5) + vshrn.s32 d28, q7, #8 @// shift by 8 and later we will shift by 2 more with rounding (set3) + + vld1.8 {d14, d15, d16}, [r0], r3 @// Load next Row data + vmlal.s16 q11, d30, d0[1] @// a0 + a5 + 20a2 (set5) + vmlal.s16 q11, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set5) + vmlsl.s16 q11, d31, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set5) + vmlsl.s16 q11, d25, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5) + vshrn.s32 d29, q13, #8 @// shift by 8 and later we will shift by 2 more with rounding (set4) + vqrshrun.s16 d26, q10, #2 @// half,half gird set1,2 + + + @//VQRSHRUN.s16 D27,Q14,#2 ;// half,half gird set3,4 + @//VSHRN.s32 D28,Q11,#8 ;// shift by 8 and later we will shift by 2 more with rounding (set5) + + @//VQRSHRUN.s16 D28,Q14,#2 ;// half,half gird set5 + + @//VST1.8 {D26,D27,D28},[r2],r14 ;// store 1/2,1,2 grif values + @// ////////////// ROW 6 /////////////////////// + +@// Process first vertical interpolated row +@// each column is + + cmp r10, #1 @// if it 17 rows are complete skip + beq filter_2dvh_skip_row + vaddl.u8 q10, d17, d14 @// a0 + a5 (column1,row0) + vmov.i8 d31, #5 + vmlal.u8 q10, d5, d1 @// a0 + a5 + 20a2 (column1,row0) + vmlal.u8 q10, d8, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row0) + vmlsl.u8 q10, d2, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0) + vmlsl.u8 q10, d11, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0) + + vqrshrun.s16 d27, q14, #2 @// half,half gird set3,4 + vshrn.s32 d28, q11, #8 @// shift by 8 and later we will shift by 2 more with rounding (set5) + + vaddl.u8 q11, d18, d15 @// a0 + a5 (column2,row0) + vmlal.u8 q11, d6, d1 @// a0 + a5 + 20a2 (column2,row0) + vmlal.u8 q11, d9, d1 @// a0 + a5 + 20a2 + 20a3 (column2,row0) + vmlsl.u8 q11, d3, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0) + vmlsl.u8 q11, d12, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0) + + vqrshrun.s16 d28, q14, #2 @// half,half gird set5 + vext.16 d30, d20, d21, #2 @//extract a[2] (set1) + + vaddl.u8 q12, d19, d16 @// a0 + a5 (column3,row0) + vext.16 d29, d20, d21, #3 @//extract a[3] (set1) + vmlal.u8 q12, d7, d1 @// a0 + a5 + 20a2 (column3,row0) + vmlal.u8 q12, d10, d1 @// a0 + a5 + 20a2 + 20a3 (column3,row0) + vmlsl.u8 q12, d4, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row0) + vmlsl.u8 q12, d13, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row0) + + vst1.8 {d26, d27, d28}, [r2], r14 @// store 1/2,1,2 grif values + + vqrshrun.s16 d17, q10, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0) + vext.16 d31, d21, d22, #1 @//extract a[5] (set1) + vqrshrun.s16 d18, q11, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0) + vext.16 d28, d20, d21, #1 @//extract a[1] (set1) + + vaddl.s16 q13, d31, d20 @// a0 + a5 (set1) + vext.16 d31, d22, d23, #1 @//extract a[5] (set2) + vmlal.s16 q13, d30, d0[1] @// a0 + a5 + 20a2 (set1) + vmlal.s16 q13, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set1) + vmlsl.s16 q13, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set1) + vmlsl.s16 q13, d21, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1) + vext.16 d30, d21, d22, #2 @//extract a[2] (set2) + + vqrshrun.s16 d19, q12, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0) + vext.16 d29, d21, d22, #3 @//extract a[3] (set2) + + vext.16 d28, d21, d22, #1 @//extract a[1] (set2) + vaddl.s16 q10, d31, d21 @// a0 + a5 (set2) + vmlal.s16 q10, d30, d0[1] @// a0 + a5 + 20a2 (set2) + vmlal.s16 q10, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set2) + vmlsl.s16 q10, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set2) + vmlsl.s16 q10, d22, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2) + vext.16 d31, d23, d24, #1 @//extract a[5] (set3) + + vext.8 d17, d17, d18, #2 + vst1.8 {d18, d19}, [r11], r12 @// store row1 - 1,1/2 grid + vst1.8 {d17}, [r1], r12 @// store row1 - 1,1/2 grid + + vext.16 d30, d22, d23, #2 @//extract a[2] (set3) + vext.16 d29, d22, d23, #3 @//extract a[3] (set3) + + vaddl.s16 q9, d31, d22 @// a0 + a5 (set3) + vext.16 d28, d22, d23, #1 @//extract a[1] (set3) + vmlal.s16 q9, d30, d0[1] @// a0 + a5 + 20a2 (set3) + vmlal.s16 q9, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set3) + vmlsl.s16 q9, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set3) + vmlsl.s16 q9, d23, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3) + vext.16 d31, d24, d25, #1 @//extract a[5] (set4) + + vshrn.s32 d21, q10, #8 @// shift by 8 and later we will shift by 2 more with rounding (set2) + vext.16 d30, d23, d24, #2 @//extract a[2] (set4) + vshrn.s32 d20, q13, #8 @// shift by 8 and later we will shift by 2 more with rounding (set1) + vext.16 d29, d23, d24, #3 @//extract a[3] (set4) + + vaddl.s16 q13, d31, d23 @// a0 + a5 (set4) + vext.16 d28, d23, d24, #1 @//extract a[1] (set4) + vext.16 d31, d25, d25, #1 @//extract a[5] (set5) ;//here only first element in the row is valid + vmlal.s16 q13, d30, d0[1] @// a0 + a5 + 20a2 (set4) + vmlal.s16 q13, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set4) + vmlsl.s16 q13, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set4) + vmlsl.s16 q13, d24, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4) + vext.16 d30, d24, d25, #2 @//extract a[2] (set5) + + vaddl.s16 q11, d31, d24 @// a0 + a5 (set5) + vext.16 d29, d24, d25, #3 @//extract a[3] (set5) + + vext.16 d31, d24, d25, #1 @//extract a[1] (set5) + vshrn.s32 d28, q9, #8 @// shift by 8 and later we will shift by 2 more with rounding (set3) + + vld1.8 {d17, d18, d19}, [r0], r3 @// Load next Row data + vmlal.s16 q11, d30, d0[1] @// a0 + a5 + 20a2 (set5) + vmlal.s16 q11, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set5) + vmlsl.s16 q11, d31, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set5) + vmlsl.s16 q11, d25, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5) + vshrn.s32 d29, q13, #8 @// shift by 8 and later we will shift by 2 more with rounding (set4) + vqrshrun.s16 d26, q10, #2 @// half,half gird set1,2 + + + vqrshrun.s16 d27, q14, #2 @// half,half gird set3,4 + vshrn.s32 d28, q11, #8 @// shift by 8 and later we will shift by 2 more with rounding (set5) + + vqrshrun.s16 d28, q14, #2 @// half,half gird set5 + + vst1.8 {d26, d27, d28}, [r2], r14 @// store 1/2,1,2 grif values + + subs r10, r10, #1 @//decrement loop counter + + bne filter_2dvh_loop + + +@// Process first vertical interpolated row +@// each column is + @// ////////////// ROW 13 /////////////////////// + +@// Process first vertical interpolated row +@// each column is + vpop {d8-d15} + ldmfd sp!, {r10, r11, r12, pc} + +filter_2dvh_skip_row: + + vqrshrun.s16 d27, q14, #2 @// half,half gird set3,4 + vshrn.s32 d28, q11, #8 @// shift by 8 and later we will shift by 2 more with rounding (set5) + + vqrshrun.s16 d28, q14, #2 @// half,half gird set5 + + vst1.8 {d26, d27, d28}, [r2], r14 @// store 1/2,1,2 grif values + vpop {d8-d15} + ldmfd sp!, {r10, r11, r12, pc} + + + + diff --git a/encoder/arm/ih264e_platform_macros.h b/encoder/arm/ih264e_platform_macros.h new file mode 100755 index 0000000..39cac96 --- /dev/null +++ b/encoder/arm/ih264e_platform_macros.h @@ -0,0 +1,143 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ih264e_platform_macros.h +* +* @brief +* Contains platform specific routines used for codec context intialization +* +* @author +* ittiam +* +* @remarks +* none +* +******************************************************************************* +*/ + +#ifndef IH264E_PLATFORM_MACROS_H_ +#define IH264E_PLATFORM_MACROS_H_ + +/*****************************************************************************/ +/* Extern Function Declarations */ +/*****************************************************************************/ + +/** +******************************************************************************* +* +* @brief Initialize the intra/inter/transform/deblk function pointers of +* codec context +* +* @par Description: the current routine initializes the function pointers of +* codec context basing on the architecture in use +* +* @param[in] ps_codec +* Codec context pointer +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void ih264e_init_function_ptr_neon_a9q(codec_t *ps_codec); + +/** +******************************************************************************* +* +* @brief Initialize the intra/inter/transform/deblk function pointers of +* codec context +* +* @par Description: the current routine initializes the function pointers of +* codec context basing on the architecture in use +* +* @param[in] ps_codec +* Codec context pointer +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void ih264e_init_function_ptr_neon_av8(codec_t *ps_codec); + +/** +******************************************************************************* +* +* @brief Initialize the intra/inter/transform/deblk function pointers of +* codec context +* +* @par Description: the current routine initializes the function pointers of +* codec context basing on the architecture in use +* +* @param[in] ps_codec +* Codec context pointer +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void ih264e_init_function_ptr_generic(codec_t *ps_codec); + +/** +******************************************************************************* +* +* @brief Initialize the intra/inter/transform/deblk function pointers of +* codec context +* +* @par Description: the current routine initializes the function pointers of +* codec context basing on the architecture in use +* +* @param[in] ps_codec +* Codec context pointer +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void ih264e_init_function_ptr(void *pv_codec); + +/** +******************************************************************************* +* +* @brief Determine the architecture of the encoder executing environment +* +* @par Description: This routine returns the architecture of the enviro- +* ment in which the current encoder is being tested +* +* @param[in] void +* +* @returns IV_ARCH_T +* architecture +* +* @remarks none +* +******************************************************************************* +*/ +IV_ARCH_T ih264e_default_arch(void); + +#endif /* IH264E_PLATFORM_MACROS_H_ */ diff --git a/encoder/arm/ime_distortion_metrics_a9q.s b/encoder/arm/ime_distortion_metrics_a9q.s new file mode 100755 index 0000000..b58911e --- /dev/null +++ b/encoder/arm/ime_distortion_metrics_a9q.s @@ -0,0 +1,1353 @@ +@/****************************************************************************** +@ * +@ * Copyright (C) 2015 The Android Open Source Project +@ * +@ * Licensed under the Apache License, Version 2.0 (the "License"); +@ * you may not use this file except in compliance with the License. +@ * You may obtain a copy of the License at: +@ * +@ * http://www.apache.org/licenses/LICENSE-2.0 +@ * +@ * Unless required by applicable law or agreed to in writing, software +@ * distributed under the License is distributed on an "AS IS" BASIS, +@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ * See the License for the specific language governing permissions and +@ * limitations under the License. +@ * +@ ***************************************************************************** +@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +@*/ +@/** + +@/** +@****************************************************************************** +@* +@* +@* @brief +@* This file contains definitions of routines that compute distortion +@* between two macro/sub blocks of identical dimensions +@* +@* @author +@* Ittiam +@* +@* @par List of Functions: +@* - ime_compute_sad_16x16_a9q() +@* - ime_compute_sad_16x16_fast_a9q() +@* - ime_compute_sad_16x8_a9q() +@* - ime_compute_sad_16x16_ea8_a9q() +@* - ime_calculate_sad2_prog_a9q() +@* - ime_calculate_sad3_prog_a9q() +@* - ime_calculate_sad4_prog_a9q() +@* - ime_sub_pel_compute_sad_16x16_a9q() +@* - ime_compute_satqd_16x16_lumainter_a9q() +@* - +@* @remarks +@* None +@* +@******************************************************************************* +@ + + +@/** +@****************************************************************************** +@* +@* @brief computes distortion (SAD) between 2 16x16 blocks (fast mode) +@* +@* @par Description +@* This functions computes SAD between 2 16x16 blocks. There is a provision +@* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To +@* compute the distortion of the entire block set u4_max_sad to USHRT_MAX. +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@* @param[in] i4_max_sad +@* integer maximum allowed distortion +@* +@* @param[in] pi4_mb_distortion +@* integer evaluated sad +@* +@* @remarks +@* +@****************************************************************************** +@*/ +.text +.p2align 2 + .global ime_compute_sad_16x16_fast_a9q +ime_compute_sad_16x16_fast_a9q: + + stmfd sp!, {r12, lr} + lsl r2, r2, #1 + lsl r3, r3, #1 + + @for bringing buffer2 into cache..., dummy load instructions + @ LDR r12,[r1] + + vld1.8 {d4, d5}, [r0], r2 + vld1.8 {d6, d7}, [r1], r3 + mov r12, #6 + vld1.8 {d8, d9}, [r0], r2 + vabdl.u8 q0, d6, d4 + vabdl.u8 q1, d7, d5 + vld1.8 {d10, d11}, [r1], r3 + +loop_sad_16x16_fast: + + vld1.8 {d4, d5}, [r0], r2 + vabal.u8 q0, d10, d8 + vabal.u8 q1, d11, d9 + vld1.8 {d6, d7}, [r1], r3 + subs r12, #2 + vld1.8 {d8, d9}, [r0], r2 + vabal.u8 q0, d6, d4 + vabal.u8 q1, d7, d5 + vld1.8 {d10, d11}, [r1], r3 + + bne loop_sad_16x16_fast + + vabal.u8 q0, d10, d8 + vabal.u8 q1, d11, d9 + + vadd.i16 q0, q0, q1 + vadd.i16 d0, d1, d0 + + ldr r12, [sp, #12] + vpaddl.u16 d0, d0 + vpaddl.u32 d0, d0 + vshl.u32 d0, d0, #1 + vst1.32 {d0[0]}, [r12] + + ldmfd sp!, {r12, pc} + + + + +@/** +@****************************************************************************** +@* +@* @brief computes distortion (SAD) between 2 16x8 blocks +@* +@* +@* @par Description +@* This functions computes SAD between 2 16x8 blocks. There is a provision +@* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To +@* compute the distortion of the entire block set u4_max_sad to USHRT_MAX. +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@* @param[in] u4_max_sad +@* integer maximum allowed distortion +@* +@* @param[in] pi4_mb_distortion +@* integer evaluated sad +@* +@* @remarks +@* +@****************************************************************************** +@*/ +@ + .global ime_compute_sad_16x8_a9q +ime_compute_sad_16x8_a9q: + + stmfd sp!, {r12, lr} + + @for bringing buffer2 into cache..., dummy load instructions + @LDR r12,[r1] + + vld1.8 {d4, d5}, [r0], r2 + vld1.8 {d6, d7}, [r1], r3 + mov r12, #6 + vld1.8 {d8, d9}, [r0], r2 + vabdl.u8 q0, d6, d4 + vabdl.u8 q1, d7, d5 + vld1.8 {d10, d11}, [r1], r3 + +loop_sad_16x8: + + vld1.8 {d4, d5}, [r0], r2 + vabal.u8 q0, d10, d8 + vabal.u8 q1, d11, d9 + vld1.8 {d6, d7}, [r1], r3 + subs r12, #2 + vld1.8 {d8, d9}, [r0], r2 + vabal.u8 q0, d6, d4 + vabal.u8 q1, d7, d5 + vld1.8 {d10, d11}, [r1], r3 + + bne loop_sad_16x8 + + vabal.u8 q0, d10, d8 + vabal.u8 q1, d11, d9 + + vadd.i16 q0, q0, q1 + vadd.i16 d0, d1, d0 + + ldr r12, [sp, #12] + vpaddl.u16 d0, d0 + vpaddl.u32 d0, d0 + vst1.32 {d0[0]}, [r12] + + ldmfd sp!, {r12, pc} + + + + + +@/** +@****************************************************************************** +@* +@* @brief computes distortion (SAD) between 2 16x16 blocks with early exit +@* +@* @par Description +@* This functions computes SAD between 2 16x16 blocks. There is a provision +@* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To +@* compute the distortion of the entire block set u4_max_sad to USHRT_MAX. +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@* @param[in] i4_max_sad +@* integer maximum allowed distortion +@* +@* @param[in] pi4_mb_distortion +@* integer evaluated sad +@* +@* @remarks +@* +@****************************************************************************** +@*/ + .global ime_compute_sad_16x16_ea8_a9q + +ime_compute_sad_16x16_ea8_a9q: + + stmfd sp!, {r5-r7, lr} + lsl r2, r2, #1 + lsl r3, r3, #1 + + @for bringing buffer2 into cache..., dummy load instructions + @LDR r12,[r1] + + vld1.8 {d4, d5}, [r0], r2 + vld1.8 {d6, d7}, [r1], r3 + mov r5, #6 + vld1.8 {d8, d9}, [r0], r2 + vabdl.u8 q0, d6, d4 + vabdl.u8 q1, d7, d5 + vld1.8 {d10, d11}, [r1], r3 + ldrd r6, r7, [sp, #16] + @r6 = i4_max_sad, r7 = pi4_mb_distortion + +loop_sad_16x16_ea8_1: + + vld1.8 {d4, d5}, [r0], r2 + vabal.u8 q0, d10, d8 + vabal.u8 q1, d11, d9 + vld1.8 {d6, d7}, [r1], r3 + subs r5, #2 + vld1.8 {d8, d9}, [r0], r2 + vabal.u8 q0, d6, d4 + vabal.u8 q1, d7, d5 + vld1.8 {d10, d11}, [r1], r3 + + bne loop_sad_16x16_ea8_1 + + vabal.u8 q0, d10, d8 + sub r0, r0, r2, lsl #3 + vabal.u8 q1, d11, d9 + sub r1, r1, r3, lsl #3 + + vadd.i16 q6, q0, q1 + add r0, r0, r2, asr #1 + vadd.i16 d12, d12, d13 + add r1, r1, r3, asr #1 + + vpaddl.u16 d12, d12 + vld1.8 {d4, d5}, [r0], r2 + vld1.8 {d6, d7}, [r1], r3 + vpaddl.u32 d12, d12 + vld1.8 {d8, d9}, [r0], r2 + vabal.u8 q0, d6, d4 + vabal.u8 q1, d7, d5 + + vst1.32 {d12[0]}, [r7] + ldr r5, [r7] + cmp r5, r6 + bgt end_func_16x16_ea8 + + vld1.8 {d10, d11}, [r1], r3 + mov r5, #6 + +loop_sad_16x16_ea8_2: + + vld1.8 {d4, d5}, [r0], r2 + vabal.u8 q0, d10, d8 + vabal.u8 q1, d11, d9 + vld1.8 {d6, d7}, [r1], r3 + subs r5, #2 + vld1.8 {d8, d9}, [r0], r2 + vabal.u8 q0, d6, d4 + vabal.u8 q1, d7, d5 + vld1.8 {d10, d11}, [r1], r3 + + bne loop_sad_16x16_ea8_2 + + vabal.u8 q0, d10, d8 + vabal.u8 q1, d11, d9 + + vadd.i16 q0, q0, q1 + vadd.i16 d0, d1, d0 + + vpaddl.u16 d0, d0 + vpaddl.u32 d0, d0 + + vst1.32 {d0[0]}, [r7] + +end_func_16x16_ea8: + + ldmfd sp!, {r5-r7, pc} + + + +@/* +@//--------------------------------------------------------------------------- +@// Function Name : Calculate_Mad2_prog() +@// +@// Detail Description : This function find the sad values of 4 Progressive MBs +@// at one shot +@// +@// Platform : CortexA8/NEON . +@// +@//----------------------------------------------------------------------------- +@*/ + + .global ime_calculate_sad2_prog_a9q + +ime_calculate_sad2_prog_a9q: + + @ r0 = ref1 <UWORD8 *> + @ r1 = ref2 <UWORD8 *> + @ r2 = src <UWORD8 *> + @ r3 = RefBufferWidth <UWORD32> + @ stack = CurBufferWidth <UWORD32>, psad <UWORD32 *> + + stmfd sp!, {r4-r5, lr} + + ldr r4, [sp, #8] @ load src stride to r4 + mov r5, #14 + + @Row 1 + vld1.8 {d0, d1}, [r2], r4 @ load src Row 1 + vld1.8 {d2, d3}, [r0], r3 @ load ref1 Row 1 + vld1.8 {d4, d5}, [r1], r3 @ load ref2 Row 1 + + @Row 2 + vld1.8 {d6, d7}, [r2], r4 @ load src Row 2 + vabdl.u8 q6, d2, d0 + vabdl.u8 q7, d3, d1 + vld1.8 {d8, d9}, [r0], r3 @ load ref1 Row 2 + vabdl.u8 q8, d4, d0 + vabdl.u8 q9, d5, d1 + vld1.8 {d10, d11}, [r1], r3 @ load ref2 Row 2 + +loop_sad2_prog: + + subs r5, #2 + @Row 1 + vld1.8 {d0, d1}, [r2], r4 @ load src Row 1 + vabal.u8 q6, d8, d6 + vabal.u8 q7, d9, d7 + vld1.8 {d2, d3}, [r0], r3 @ load ref1 Row 1 + vabal.u8 q8, d10, d6 + vabal.u8 q9, d11, d7 + vld1.8 {d4, d5}, [r1], r3 @ load ref2 Row 1 + + @Row 2 + vld1.8 {d6, d7}, [r2], r4 @ load src Row 2 + vabal.u8 q6, d2, d0 + vabal.u8 q7, d3, d1 + vld1.8 {d8, d9}, [r0], r3 @ load ref1 Row 2 + vabal.u8 q8, d4, d0 + vabal.u8 q9, d5, d1 + vld1.8 {d10, d11}, [r1], r3 @ load ref2 Row 2 + + bne loop_sad2_prog + + vabal.u8 q6, d8, d6 + vabal.u8 q7, d9, d7 + vabal.u8 q8, d10, d6 + vabal.u8 q9, d11, d7 + + @ Compute SAD + + vadd.u16 q6, q6, q7 @ Q6 : sad_ref1 + vadd.u16 q8, q8, q9 @ Q8 : sad_ref2 + + vadd.u16 d12, d12, d13 + ldr r5, [sp, #16] @ loading pi4_sad to r5 + vadd.u16 d16, d16, d17 + + vpadd.u16 d12, d12, d16 + vpaddl.u16 d12, d12 + + vst1.64 {d12}, [r5]! + + ldmfd sp!, {r4-r5, pc} + + + +@/* +@//--------------------------------------------------------------------------- +@// Function Name : Calculate_Mad3_prog() +@// +@// Detail Description : This function find the sad values of 4 Progressive MBs +@// at one shot +@// +@// Platform : CortexA8/NEON . +@// +@//----------------------------------------------------------------------------- +@*/ + + .global ime_calculate_sad3_prog_a9q + +ime_calculate_sad3_prog_a9q: + + @ r0 = ref1 <UWORD8 *> + @ r1 = ref2 <UWORD8 *> + @ r2 = ref3 <UWORD8 *> + @ r3 = src <UWORD8 *> + @ stack = RefBufferWidth <UWORD32>, CurBufferWidth <UWORD32>, psad <UWORD32 *> + + + stmfd sp!, {r4-r6, lr} + + ldrd r4, r5, [sp, #16] @ load ref stride to r4, src stride to r5 + mov r6, #14 + + @ Row 1 + vld1.8 {d0, d1}, [r3], r5 @ load src Row 1 + vld1.8 {d2, d3}, [r0], r4 @ load ref1 Row 1 + vld1.8 {d4, d5}, [r1], r4 @ load ref2 Row 1 + vabdl.u8 q8, d2, d0 + vabdl.u8 q9, d3, d1 + vld1.8 {d6, d7}, [r2], r4 @ load ref3 Row 1 + vabdl.u8 q10, d4, d0 + vabdl.u8 q11, d5, d1 + + @ Row 2 + vld1.8 {d8, d9}, [r3], r5 @ load src Row 1 + vabdl.u8 q12, d6, d0 + vabdl.u8 q13, d7, d1 + vld1.8 {d10, d11}, [r0], r4 @ load ref1 Row 1 + vld1.8 {d12, d13}, [r1], r4 @ load ref2 Row 1 + vabal.u8 q8, d10, d8 + vabal.u8 q9, d11, d9 + vld1.8 {d14, d15}, [r2], r4 @ load ref3 Row 1 + vabal.u8 q10, d12, d8 + vabal.u8 q11, d13, d9 + +loop_sad3_prog: + + @Row 1 + vld1.8 {d0, d1}, [r3], r5 @ load src Row 1 + vabal.u8 q12, d14, d8 + vabal.u8 q13, d15, d9 + vld1.8 {d2, d3}, [r0], r4 @ load ref1 Row 1 + vld1.8 {d4, d5}, [r1], r4 @ load ref2 Row 1 + vabal.u8 q8, d2, d0 + vabal.u8 q9, d3, d1 + vld1.8 {d6, d7}, [r2], r4 @ load ref3 Row 1 + vabal.u8 q10, d4, d0 + vabal.u8 q11, d5, d1 + + @Row 2 + vld1.8 {d8, d9}, [r3], r5 @ load src Row 1 + vabal.u8 q12, d6, d0 + vabal.u8 q13, d7, d1 + vld1.8 {d10, d11}, [r0], r4 @ load ref1 Row 1 + subs r6, #2 + vld1.8 {d12, d13}, [r1], r4 @ load ref2 Row 1 + vabal.u8 q8, d10, d8 + vabal.u8 q9, d11, d9 + vld1.8 {d14, d15}, [r2], r4 @ load ref3 Row 1 + vabal.u8 q10, d12, d8 + vabal.u8 q11, d13, d9 + + bne loop_sad3_prog + + vabal.u8 q12, d14, d8 + vabal.u8 q13, d15, d9 + + @ Compute SAD + + vadd.u16 q8, q8, q9 @ Q8 : sad_ref1 + vadd.u16 q10, q10, q11 @ Q10 : sad_ref2 + vadd.u16 q12, q12, q13 @ Q12 : sad_ref3 + + vadd.u16 d16, d16, d17 + vadd.u16 d20, d20, d21 + vadd.u16 d24, d24, d25 + + vpadd.u16 d16, d16, d20 + vpadd.u16 d24, d24, d24 + + ldr r6, [sp, #24] @ loading pi4_sad to r6 + vpaddl.u16 d16, d16 + vpaddl.u16 d24, d24 + + vst1.64 {d16}, [r6]! + vst1.32 {d24[0]}, [r6] + + ldmfd sp!, {r4-r6, pc} + + + +@/** +@****************************************************************************** +@* +@* @brief computes distortion (SAD) for sub-pel motion estimation +@* +@* @par Description +@* This functions computes SAD for all the 8 half pel points +@* +@* @param[out] pi4_sad +@* integer evaluated sad +@* pi4_sad[0] - half x +@* pi4_sad[1] - half x - 1 +@* pi4_sad[2] - half y +@* pi4_sad[3] - half y - 1 +@* pi4_sad[4] - half xy +@* pi4_sad[5] - half xy - 1 +@* pi4_sad[6] - half xy - strd +@* pi4_sad[7] - half xy - 1 - strd +@* +@* @remarks +@* +@****************************************************************************** +@*/ + +.text +.p2align 2 + + .global ime_sub_pel_compute_sad_16x16_a9q + +ime_sub_pel_compute_sad_16x16_a9q: + + stmfd sp!, {r4-r11, lr} @store register values to stack + + ldr r9, [sp, #36] + ldr r10, [sp, #40] + + sub r4, r1, #1 @ x left + sub r5, r2, r10 @ y top + + sub r6, r3, #1 @ xy left + sub r7, r3, r10 @ xy top + + sub r8, r7, #1 @ xy top-left + mov r11, #15 + + @for bringing buffer2 into cache..., dummy load instructions + @ LDR r12,[r1] + @ LDR r12,[sp,#12] + + vld1.8 {d0, d1}, [r0], r9 @ src + vld1.8 {d2, d3}, [r5], r10 @ y top LOAD + vld1.8 {d4, d5}, [r7], r10 @ xy top LOAD + vld1.8 {d6, d7}, [r8], r10 @ xy top-left LOAD + + vabdl.u8 q6, d2, d0 @ y top ABS1 + vabdl.u8 q7, d4, d0 @ xy top ABS1 + vld1.8 {d8, d9}, [r1], r10 @ x LOAD + vabdl.u8 q8, d6, d0 @ xy top-left ABS1 + vabdl.u8 q9, d8, d0 @ x ABS1 + vld1.8 {d10, d11}, [r4], r10 @ x left LOAD + + vabal.u8 q6, d3, d1 @ y top ABS2 + vabal.u8 q7, d5, d1 @ xy top ABS2 + vld1.8 {d2, d3}, [r2], r10 @ y LOAD + vabal.u8 q8, d7, d1 @ xy top-left ABS2 + vabal.u8 q9, d9, d1 @ x ABS2 + vld1.8 {d4, d5}, [r3], r10 @ xy LOAD + + vabdl.u8 q10, d10, d0 @ x left ABS1 + vabdl.u8 q11, d2, d0 @ y ABS1 + vld1.8 {d6, d7}, [r6], r10 @ xy left LOAD + vabdl.u8 q12, d4, d0 @ xy ABS1 + vabdl.u8 q13, d6, d0 @ xy left ABS1 + +loop_sub_pel_16x16: + + vabal.u8 q10, d11, d1 @ x left ABS2 + vabal.u8 q11, d3, d1 @ y ABS2 + subs r11, #1 + vabal.u8 q12, d5, d1 @ xy ABS2 + vabal.u8 q13, d7, d1 @ xy left ABS2 + + vld1.8 {d0, d1}, [r0], r9 @ src + vabal.u8 q6, d2, d0 @ y top ABS1 + vabal.u8 q7, d4, d0 @ xy top ABS1 + vld1.8 {d8, d9}, [r1], r10 @ x LOAD + vabal.u8 q8, d6, d0 @ xy top-left ABS1 + vabal.u8 q9, d8, d0 @ x ABS1 + vld1.8 {d10, d11}, [r4], r10 @ x left LOAD + + vabal.u8 q6, d3, d1 @ y top ABS2 + vabal.u8 q7, d5, d1 @ xy top ABS2 + vld1.8 {d2, d3}, [r2], r10 @ y LOAD + vabal.u8 q8, d7, d1 @ xy top-left ABS2 + vabal.u8 q9, d9, d1 @ x ABS2 + vld1.8 {d4, d5}, [r3], r10 @ xy LOAD + + vabal.u8 q10, d10, d0 @ x left ABS1 + vabal.u8 q11, d2, d0 @ y ABS1 + vld1.8 {d6, d7}, [r6], r10 @ xy left LOAD + vabal.u8 q12, d4, d0 @ xy ABS1 + vabal.u8 q13, d6, d0 @ xy left ABS1 + + bne loop_sub_pel_16x16 + + vabal.u8 q10, d11, d1 @ x left ABS2 + vabal.u8 q11, d3, d1 @ y ABS2 + vabal.u8 q12, d5, d1 @ xy ABS2 + vabal.u8 q13, d7, d1 @ xy left ABS2 + + vadd.i16 d0, d18, d19 @ x + vadd.i16 d3, d12, d13 @ y top + vadd.i16 d6, d14, d15 @ xy top + vadd.i16 d5, d26, d27 @ xy left + vadd.i16 d1, d20, d21 @ x left + vadd.i16 d2, d22, d23 @ y + vadd.i16 d4, d24, d25 @ xy + vadd.i16 d7, d16, d17 @ xy top left + + vpadd.i16 d0, d0, d1 + vpadd.i16 d2, d2, d3 + vpadd.i16 d4, d4, d5 + vpadd.i16 d6, d6, d7 + + vpaddl.u16 d0, d0 + vpaddl.u16 d2, d2 + ldr r11, [sp, #44] + vpaddl.u16 d4, d4 + vpaddl.u16 d6, d6 + + vst1.32 {d0}, [r11]! + vst1.32 {d2}, [r11]! + vst1.32 {d4}, [r11]! + vst1.32 {d6}, [r11]! + + ldmfd sp!, {r4-r11, pc} @Restoring registers from stack + + + +@/** +@****************************************************************************** +@* +@* @brief computes distortion (SAD) between 2 16x16 blocks +@* +@* @par Description +@* This functions computes SAD between 2 16x16 blocks. There is a provision +@* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To +@* compute the distortion of the entire block set u4_max_sad to USHRT_MAX. +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@* @param[in] i4_max_sad +@* integer maximum allowed distortion +@* +@* @param[in] pi4_mb_distortion +@* integer evaluated sad +@* +@* @remarks +@* +@****************************************************************************** +@*/ + +.text +.p2align 2 + + .global ime_compute_sad_16x16_a9q + +ime_compute_sad_16x16_a9q: + + + @STMFD sp!,{r12,lr} + stmfd sp!, {r12, r14} @store register values to stack + + @for bringing buffer2 into cache..., dummy load instructions + @ LDR r12,[r1] + @ LDR r12,[sp,#12] + + vld1.8 {d4, d5}, [r0], r2 + vld1.8 {d6, d7}, [r1], r3 + + mov r12, #14 + vld1.8 {d8, d9}, [r0], r2 + vabdl.u8 q0, d4, d6 + vld1.8 {d10, d11}, [r1], r3 + vabdl.u8 q1, d5, d7 + +loop_sad_16x16: + + vld1.8 {d4, d5}, [r0], r2 + vabal.u8 q0, d8, d10 + vld1.8 {d6, d7}, [r1], r3 + vabal.u8 q1, d9, d11 + + vld1.8 {d8, d9}, [r0], r2 + vabal.u8 q0, d4, d6 + subs r12, #2 + vld1.8 {d10, d11}, [r1], r3 + vabal.u8 q1, d5, d7 + + bne loop_sad_16x16 + + vabal.u8 q0, d8, d10 + vabal.u8 q1, d9, d11 + + vadd.i16 q0, q0, q1 + vadd.i16 d0, d1, d0 + ldr r12, [sp, #12] + + vpaddl.u16 d0, d0 + vpaddl.u32 d0, d0 + vst1.32 {d0[0]}, [r12] + + ldmfd sp!, {r12, pc} @Restoring registers from stack + + +@/* +@//--------------------------------------------------------------------------- +@// Function Name : Calculate_Mad4_prog() +@// +@// Detail Description : This function find the sad values of 4 Progressive MBs +@// at one shot +@// +@// Platform : CortexA8/NEON . +@// +@//----------------------------------------------------------------------------- +@*/ + + .global ime_calculate_sad4_prog_a9q + +ime_calculate_sad4_prog_a9q: + @ r0 = temp_frame <UWORD8 *> + @ r1 = buffer_ptr <UWORD8 *> + @ r2 = RefBufferWidth <UWORD32> + @ r3 = CurBufferWidth <UWORD32> + @ stack = psad <UWORD32 *> {at 0x34} + + stmfd sp!, {r4-r7, lr} + + @UWORD8 *left_ptr = temp_frame - 1; + @UWORD8 *right_ptr = temp_frame + 1; + @UWORD8 *top_ptr = temp_frame - RefBufferWidth; + @UWORD8 *bot_ptr = temp_frame + RefBufferWidth; + + mov r7, #14 + sub r4, r0, #0x01 @r4 = left_ptr + add r5, r0, #0x1 @r5 = right_ptr + sub r6, r0, r2 @r6 = top_ptr + add r0, r0, r2 @r0 = bot_ptr + @r1 = buffer_ptr + + @D0:D1 : buffer + @D2:D3 : top + @D4:D5 : left + @D6:D7 : right + @D8:D9 : bottom + + @Row 1 + vld1.8 {d0, d1}, [r1], r3 @ load src Row 1 + vld1.8 {d2, d3}, [r6], r2 @ load top Row 1 + vld1.8 {d4, d5}, [r4], r2 @ load left Row 1 + + vabdl.u8 q5, d2, d0 + vld1.8 {d6, d7}, [r5], r2 @ load right Row 1 + vabdl.u8 q6, d3, d1 + + vabdl.u8 q7, d0, d4 + vld1.8 {d8, d9}, [r0], r2 @ load bottom Row 1 + vabdl.u8 q8, d1, d5 + + @Row 2 + vabdl.u8 q9, d0, d6 + vld1.8 {d26, d27}, [r1], r3 @ load src Row 2 + vabdl.u8 q10, d1, d7 + + vabdl.u8 q11, d0, d8 + vld1.8 {d2, d3}, [r6], r2 @ load top Row 2 + vabdl.u8 q12, d1, d9 + +loop_sad4_prog: + + vabal.u8 q5, d26, d2 + vld1.8 {d4, d5}, [r4], r2 @ load left Row 2 + vabal.u8 q6, d27, d3 + + vabal.u8 q7, d26, d4 + vld1.8 {d6, d7}, [r5], r2 @ load right Row 2 + vabal.u8 q8, d27, d5 + + vabal.u8 q9, d26, d6 + vld1.8 {d8, d9}, [r0], r2 @ load bottom Row 2 + vabal.u8 q10, d27, d7 + + @Row 1 + vabal.u8 q11, d26, d8 + vld1.8 {d0, d1}, [r1], r3 @ load src Row 1 + vabal.u8 q12, d27, d9 + + vld1.8 {d2, d3}, [r6], r2 @ load top Row 1 + subs r7, #2 + vld1.8 {d4, d5}, [r4], r2 @ load left Row 1 + + vabal.u8 q5, d0, d2 + + vld1.8 {d6, d7}, [r5], r2 @ load right Row 1 + vabal.u8 q6, d1, d3 + + vabal.u8 q7, d0, d4 + vld1.8 {d8, d9}, [r0], r2 @ load bottom Row 1 + vabal.u8 q8, d1, d5 + + @Row 2 + vabal.u8 q9, d0, d6 + vld1.8 {d26, d27}, [r1], r3 @ load src Row 2 + vabal.u8 q10, d1, d7 + + vabal.u8 q11, d0, d8 + vld1.8 {d2, d3}, [r6], r2 @ load top Row 2 + vabal.u8 q12, d1, d9 + + bne loop_sad4_prog + + vabal.u8 q5, d26, d2 + vld1.8 {d4, d5}, [r4], r2 @ load left Row 2 + vabal.u8 q6, d27, d3 + + vabal.u8 q7, d26, d4 + vld1.8 {d6, d7}, [r5], r2 @ load right Row 2 + vabal.u8 q8, d27, d5 + + vabal.u8 q9, d26, d6 + vld1.8 {d8, d9}, [r0], r2 @ load bottom Row 2 + vabal.u8 q10, d27, d7 + + vabal.u8 q11, d26, d8 + vabal.u8 q12, d27, d9 + + @;Q5:Q6 : sad_top + @;Q7:Q8 : sad_left + @;Q9:Q10 : sad_right + @;Q11:Q12 : sad_bot + + vadd.u16 q5, q5, q6 + vadd.u16 q7, q7, q8 + vadd.u16 q9, q9, q10 + vadd.u16 q11, q11, q12 + + @; Free :- + @; Q6,Q8,Q10,Q12 + + @;Q5 -> D10:D11 + @;Q7 -> D14:D15 + @;Q9 -> D18:D19 + @;Q11 -> D22:D23 + + vadd.u16 d10, d10, d11 + vadd.u16 d14, d14, d15 + vadd.u16 d18, d18, d19 + vadd.u16 d22, d22, d23 + + @;D10 : sad_top + @;D14 : sad_left + @;D18 : sad_right + @;D22 : sad_bot + + + vpaddl.u16 d11, d10 + vpaddl.u16 d15, d14 + vpaddl.u16 d19, d18 + vpaddl.u16 d23, d22 + + @;D11 : sad_top + @;D15 : sad_left + @;D19 : sad_right + @;D23 : sad_bot + + vpaddl.u32 d10, d11 + vpaddl.u32 d22, d23 + vpaddl.u32 d14, d15 + vpaddl.u32 d18, d19 + + @;D10 : sad_top + @;D14 : sad_left + @;D18 : sad_right + @;D22 : sad_bot + + ldr r4, [sp, #20] @;Can be rearranged + + vsli.64 d10, d22, #32 + vsli.64 d14, d18, #32 + + vst1.64 {d14}, [r4]! + vst1.64 {d10}, [r4]! + + ldmfd sp!, {r4-r7, pc} + + + + +@***************************************************************************** +@* +@* Function Name : ime_compute_satqd_16x16_lumainter_a9 +@* Description : This fucntion computes SAD for a 16x16 block. +@ : It also computes if any 4x4 block will have a nonzero coefficent after transform and quant +@ +@ Arguments : R0 :pointer to src buffer +@ R1 :pointer to est buffer +@ R2 :source stride +@ R3 :est stride +@ STACk :Threshold,distotion,is_nonzero +@* +@* Values Returned : NONE +@* +@* Register Usage : R0-R11 +@* Stack Usage : +@* Cycles : Around +@* Interruptiaility : Interruptable +@* +@* Known Limitations +@* \Assumptions : +@* +@* Revision History : +@* DD MM YYYY Author(s) Changes +@* 14 04 2014 Harinarayanan K K First version +@* +@***************************************************************************** + .global ime_compute_satqd_16x16_lumainter_a9q +ime_compute_satqd_16x16_lumainter_a9q: + @R0 :pointer to src buffer + @R1 :pointer to est buffer + @R2 :Source stride + @R3 :Pred stride + @R4 :Threshold pointer + @R5 :Distortion,ie SAD + @R6 :is nonzero + + push {r4-r12, lr} @push all the variables first + @ADD SP,SP,#40 ;decrement stack pointer,to accomodate two variables + ldr r4, [sp, #40] @load the threshold address + + mov r8, #8 @Number of 4x8 blocks to be processed + mov r10, #0 @Sad + mov r7, #0 @Nonzero info + @---------------------------------------------------- + + vld1.u8 d30, [r0], r2 @I load 8 pix src row 1 + + vld1.u8 d31, [r1], r3 @I load 8 pix pred row 1 + + vld1.u8 d28, [r0], r2 @I load 8 pix src row 2 + + vld1.u8 d29, [r1], r3 @I load 8 pix pred row 2 + + vld1.u8 d26, [r0], r2 @I load 8 pix src row 3 + vabdl.u8 q0, d30, d31 @I Abs diff r1 blk 12 + + vld1.u8 d27, [r1], r3 @I load 8 pix pred row 3 + + vld1.u8 d24, [r0], r2 @I load 8 pix src row 4 + + vld1.u8 d25, [r1], r3 @I load 8 pix pred row 4 + vabdl.u8 q1, d28, d29 @I Abs diff r1 blk 12 + + vld1.u16 {q11}, [r4] @I load the threhold + vabdl.u8 q2, d26, d27 @I Abs diff r1 blk 12 + + vabdl.u8 q3, d24, d25 @I Abs diff r1 blk 12 + + + +core_loop: + @S1 S2 S3 S4 A1 A2 A3 A4 + @S5 S6 S7 S8 A5 A6 A7 A8 + @S9 S10 S11 S12 A9 A10 A11 A12 + @S13 S14 S15 S16 A13 A14 A15 A16 + ands r11, r8, #1 @II See if we are at even or odd block + vadd.u16 q4 , q0, q3 @I Add r1 r4 + lsl r11, r2, #2 @II Move back src 4 rows + + subeq r0, r0, r11 @II Move back src 4 rows if we are at even block + vadd.u16 q5 , q1, q2 @I Add r2 r3 + addeq r0, r0, #8 @II Move src 8 cols forward if we are at even block + + lsl r11, r3, #2 @II Move back pred 4 rows + vtrn.16 d8 , d10 @I trnspse 1 + subeq r1, r1, r11 @II Move back pred 4 rows if we are at even block + + addeq r1, r1, #8 @II Move pred 8 cols forward if we are at even block + vtrn.16 d9 , d11 @I trnspse 2 + subne r0, r0, #8 @II Src 8clos back for odd rows + + subne r1, r1, #8 @II Pred 8 cols back for odd rows + vtrn.32 d10, d11 @I trnspse 4 + + + vtrn.32 d8 , d9 @I trnspse 3 + vswp d10, d11 @I rearrange so that the q4 and q5 add properly + @D8 S1 S4 A1 A4 + @D9 S2 S3 A2 A3 + @D11 S1 S4 A1 A4 + @D10 S2 S3 A2 A3 + + vadd.s16 q6, q4, q5 @I Get s1 s4 + vld1.u8 d30, [r0], r2 @II load first 8 pix src row 1 + + vtrn.s16 d12, d13 @I Get s2 s3 + @D12 S1 S4 A1 A4 + @D13 S2 S3 A2 A3 + + vshl.s16 q7, q6 , #1 @I si = si<<1 + vld1.u8 d31, [r1], r3 @II load first 8 pix pred row 1 + + vpadd.s16 d16, d12, d13 @I (s1 + s4) (s2 + s3) + vld1.u8 d28, [r0], r2 @II load first 8 pix src row 2 + @ D16 S14 A14 S23 A23 + vrev32.16 d0, d16 @I + vuzp.s16 d16, d0 @I + @D16 S14 S23 A14 A23 + vadd.s16 d17, d12, d13 @I (s1 + s2) (s3 + s4) + vld1.u8 d29, [r1], r3 @II load first 8 pix pred row 2 + @D17 S12 S34 A12 A34 + + vrev32.16 q9, q7 @I Rearrange si's + @Q9 Z4,Z1,Y4,Y1,Z3,Z2,Y3,Y2 + + @D12 S1 S4 A1 A4 + @D19 Z3 Z2 Y3 Y2 + vsub.s16 d8, d12, d19 @I (s1 - (s3<<1)) (s4 - (s2<<1)) + vld1.u8 d26, [r0], r2 @II load first 8 pix src row 3 + @D13 S2 S3 A2 A3 + @D18 Z4 Z1 Y4 Y1 + vsub.s16 d9, d13, d18 @I (s2 - (s4<<1)) (s3 - (s1<<1)) + vld1.u8 d27, [r1], r3 @II load first 8 pix pred row 3 + @Q10 S8 S5 A8 A5 S7 S4 A7 A4 + + @D16 S14 S23 A14 A23 + vpadd.s16 d10, d16, d17 @I Get sad by adding s1 s2 s3 s4 + vld1.u8 d24, [r0], r2 @II load first 8 pix src row 4 + @D22 SAD1 SAD2 junk junk + + + @Q8 S2 S1 A2 A1 S6 S3 A6 A3 + @Q10 S8 S5 A8 A5 S7 S4 A7 A4 + vtrn.32 q8, q4 @I Rearrange to make ls of each block togather + @Q8 S2 S1 S8 S5 S6 S3 S7 S4 + @Q10 A2 A1 A8 A5 A6 A3 A7 A4 + + + ldrh r11, [r4, #16] @I Load the threshold for DC val blk 1 + vdup.s16 q6, d10[0] @I Get the sad blk 1 + vabdl.u8 q0, d30, d31 @II Abs diff r1 blk 12 + + vshl.s16 q7, q6, #1 @I sad_2 = sad_1<<1 + vmov.s16 r9, d10[0] @I Get the sad for block 1 + + vsub.s16 q9, q7, q8 @I Add to the lss + vmov.s16 r5, d10[1] @I Get the sad for block 2 + + vcle.s16 q7, q11, q9 @I Add to the lss + vld1.u8 d25, [r1], r3 @II load first 8 pix pred row 4 + + vdup.s16 q15, d10[1] @I Get the sad blk 1 + vabdl.u8 q1, d28, d29 @II Abs diff r1 blk 12 + + + vshl.s16 q14, q15, #1 @I sad_2 = sad_1<<1 + vsub.s16 q3, q14, q4 @I Add to the lss + vcle.s16 q15, q11, q3 @I Add to the lss + + ADD R10, R10, R9 @I Add to the global sad blk 1 + vtrn.u8 q15, q7 @I get all comparison bits to one reg + vabdl.u8 q2, d26, d27 @II Abs diff r1 blk 12 + + ADD R10, R10, R5 @I Add to the global sad blk 2 + vshr.u8 q14, q15, #7 @I Shift the bits so that no overflow occurs + cmp r11, r9 + + movle r7, #0xf @I If not met mark it by mvoing non zero val to R7 blk 1 ;I Compare with threshold blk 1 + vadd.u8 d28, d28, d29 @I Add the bits + cmp r11, r5 @I Compare with threshold blk 2 + + movle r7, #0xf @I If not met mark it by mvoing non zero val to R7 blk 2 + vpadd.u8 d28, d28, d29 @I Add the bits + + vmov.u32 r11, d28[0] @I Since a set bit now represents a unstatisofrd contifon store it in r11 + vabdl.u8 q3, d24, d25 @II Abs diff r1 blk 12 + + orr r7, r7, r11 @I get the guy to r11 + + + sub r8, r8, #1 @I Decremrnt block count + + cmp r7, #0 @I If we have atlest one non zero block + bne compute_sad_only @I if a non zero block is der,From now on compute sad only + + cmp r8, #1 @I See if we are at the last block + bne core_loop @I If the blocks are zero, lets continue the satdq + + + @EPILOUGE for core loop + @S1 S2 S3 S4 A1 A2 A3 A4 + @S5 S6 S7 S8 A5 A6 A7 A8 + @S9 S10 S11 S12 A9 A10 A11 A12 + @S13 S14 S15 S16 A13 A14 A15 A16 + vadd.u16 q4 , q0, q3 @Add r1 r4 + vadd.u16 q5 , q1, q2 @Add r2 r3 + @D8 S1 S2 S2 S1 + @D10 S4 S3 S3 S4 + @D9 A1 A2 A2 A1 + @D11 A4 A3 A3 A4 + vtrn.16 d8 , d10 @I trnspse 1 + vtrn.16 d9 , d11 @I trnspse 2 + vtrn.32 d8 , d9 @I trnspse 3 + vtrn.32 d10, d11 @I trnspse 4 + + vswp d10, d11 @I rearrange so that the q4 and q5 add properly + @D8 S1 S4 A1 A4 + @D9 S2 S3 A2 A3 + @D11 S1 S4 A1 A4 + @D10 S2 S3 A2 A3 + vadd.s16 q6, q4, q5 @Get s1 s4 + vtrn.s16 d12, d13 @Get s2 s3 + @D12 S1 S4 A1 A4 + @D13 S2 S3 A2 A3 + + vshl.s16 q7, q6 , #1 @si = si<<1 + vmov.s16 r9, d10[0] @Get the sad for block 1 + + vpadd.s16 d16, d12, d13 @(s1 + s4) (s2 + s3) + vmov.s16 r5, d10[1] @Get the sad for block 2 + @D16 S14 A14 S23 A23 + vrev32.16 d30, d16 @ + vuzp.s16 d16, d30 @ + @D16 S14 S23 A14 A23 + vadd.s16 d17, d12, d13 @(s1 + s2) (s3 + s4) + @D17 S12 S34 A12 A34 + + vrev32.16 q9, q7 @Rearrange si's + @Q9 Z4,Z1,Y4,Y1,Z3,Z2,Y3,Y2 + + @D12 S1 S4 A1 A4 + @D19 Z3 Z2 Y3 Y2 + vsub.s16 d8, d12, d19 @(s1 - (s3<<1)) (s4 - (s2<<1)) + @D13 S2 S3 A2 A3 + @D18 Z4 Z1 Y4 Y1 + vsub.s16 d9, d13, d18 @(s2 - (s4<<1)) (s3 - (s1<<1)) + @Q10 S8 S5 A8 A5 S7 S4 A7 A4 + + @D16 S14 S23 A14 A23 + vpadd.s16 d10, d16, d17 @I Get sad by adding s1 s2 s3 s4 + @D22 SAD1 SAD2 junk junk + vmov.u16 r9, d10[0] @Get the sad for block 1 + vmov.u16 r5, d10[1] @Get the sad for block 2 + + @Q8 S2 S1 A2 A1 S6 S3 A6 A3 + @Q10 S8 S5 A8 A5 S7 S4 A7 A4 + ldrh r11, [r4, #16] @Load the threshold for DC val blk 1 + vtrn.32 q8, q4 @Rearrange to make ls of each block togather + ADD R10, R10, R9 @Add to the global sad blk 1 + + @Q8 S2 S1 S8 S5 S6 S3 S7 S4 + @Q10 A2 A1 A8 A5 A6 A3 A7 A4 + + vld1.u16 {q11}, [r4] @load the threhold + ADD R10, R10, R5 @Add to the global sad blk 2 + + vdup.u16 q6, d10[0] @Get the sad blk 1 + + cmp r11, r9 @Compare with threshold blk 1 + vshl.u16 q7, q6, #1 @sad_2 = sad_1<<1 + + vsub.s16 q9, q7, q8 @Add to the lss + + vcle.s16 q15, q11, q9 @Add to the lss + movle r7, #0xf @If not met mark it by mvoing non zero val to R7 blk 1 + + cmp r11, r5 @Compare with threshold blk 2 + vdup.u16 q14, d10[1] @Get the sad blk 1 + + vshl.u16 q13, q14, #1 @sad_2 = sad_1<<1 + vsub.s16 q12, q13, q4 @Add to the lss + vcle.s16 q14, q11, q12 @Add to the lss + movle r7, #0xf @If not met mark it by mvoing non zero val to R7 blk 2 + + vtrn.u8 q14, q15 @get all comparison bits to one reg + vshr.u8 q14, q14, #7 @Shift the bits so that no overflow occurs + vadd.u8 d28, d28, d29 @Add the bits + vpadd.u8 d28, d28, d29 @Add the bits + vmov.u32 r11, d28[0] @Since a set bit now represents a unstatisofrd contifon store it in r11 + orr r7, r7, r11 @get the guy to r11 + + b funcend_sad_16x16 @Since all blocks ar processed nw, got to end + +compute_sad_only: @This block computes SAD only, so will be lighter + @IT will start processign at n odd block + @It will compute sad for odd blok, + @and then for two blocks at a time + @The counter is r7, hence r7 blocks will be processed + + and r11, r8, #1 @Get the last bit of counter + cmp r11, #0 @See if we are at even or odd block + @iif the blk is even we just have to set the pointer to the + @start of current row + + lsleq r11, r2, #2 @I Move back src 4 rows + subeq r0, r0, r11 @I Move back src 4 rows if we are at even block + + lsleq r11, r3, #2 @I Move back pred 4 rows + subeq r1, r1, r11 @I Move back pred 4 rows if we are at even block + @ADDEQ R8,R8,#2 ;Inc counter + beq skip_odd_blk @If the blk is odd we have to compute sad + + + vadd.u16 q4, q0, q1 @Add SAD of row1 and row2 + vadd.u16 q5, q2, q3 @Add SAD of row3 and row4 + vadd.u16 q6, q4, q5 @Add SAD of row 1-4 + vadd.u16 d14, d12, d13 @Add Blk1 and blk2 + vpadd.u16 d16, d14, d15 @Add col 1-2 and 3-4 + vpadd.u16 d18, d16, d17 @Add col 12-34 + + vmov.u16 r9, d18[0] @Move sad to arm + ADD R10, R10, R9 @Add to the global sad + + sub r8, r8, #1 @Dec counter + cmp r8, #0 @See if we processed last block + beq funcend_sad_16x16 @if lprocessed last block goto end of func + + sub r0, r0, #8 @Since we processed od block move back src by 8 cols + sub r1, r1, #8 @Since we processed od block move back pred by 8 cols + +skip_odd_blk: + + vmov.s16 q0, #0 @Initialize the accumulator + vmov.s16 q1, #0 @Initialize the accumulator + + vld1.u8 {q15}, [r0], r2 @load src r1 + vld1.u8 {q14}, [r1], r3 @load pred r1 + + vld1.u8 {q13}, [r0], r2 @load src r2 + vld1.u8 {q12}, [r1], r3 @load pred r2 + + vld1.u8 {q11}, [r0], r2 @load src r3 + vld1.u8 {q10}, [r1], r3 @load pred r2 + + vld1.u8 {q9}, [r0], r2 @load src r4 + vld1.u8 {q8}, [r1], r3 @load pred r4 + + cmp r8, #2 + beq sad_epilouge + +sad_loop: + + vabal.u8 q0, d30, d28 @I accumulate Abs diff R1 + vabal.u8 q1, d31, d29 @I accumulate Abs diff R1 + + vld1.u8 {q15}, [r0], r2 @II load r1 src + vabal.u8 q0, d26, d24 @I accumulate Abs diff R2 + + vld1.u8 {q14}, [r1], r3 @II load r1 pred + vabal.u8 q1, d27, d25 @I accumulate Abs diff R2 + + vld1.u8 {q13}, [r0], r2 @II load r3 src + vabal.u8 q0, d22, d20 @I accumulate Abs diff R3 + + vld1.u8 {q12}, [r1], r3 @II load r2 pred + vabal.u8 q1, d23, d21 @I accumulate Abs diff R3 + + vld1.u8 {q11}, [r0], r2 @II load r3 src + vabal.u8 q0, d18, d16 @I accumulate Abs diff R4 + + + sub r8, r8, #2 @Since we processe 16 pix @a time, dec by 2 + vld1.u8 {q10}, [r1], r3 @II load r3 pred + vabal.u8 q1, d19, d17 @I accumulate Abs diff R4 + + cmp r8, #2 @Check if last loop + vld1.u8 {q9}, [r0], r2 @II load r4 src + vld1.u8 {q8}, [r1], r3 @II load r4 pred + + bne sad_loop @Go back to SAD computation + +sad_epilouge: + vabal.u8 q0, d30, d28 @Accumulate Abs diff R1 + vabal.u8 q1, d31, d29 @Accumulate Abs diff R1 + + vabal.u8 q0, d26, d24 @Accumulate Abs diff R2 + vabal.u8 q1, d27, d25 @Accumulate Abs diff R2 + + vabal.u8 q0, d22, d20 @Accumulate Abs diff R3 + vabal.u8 q1, d23, d21 @Aaccumulate Abs diff R3 + + vabal.u8 q0, d18, d16 @Accumulate Abs diff R4 + vabal.u8 q1, d19, d17 @Accumulate Abs diff R4 + + vadd.u16 q2, q0, q1 @ADD two accumulators + vadd.u16 d6, d4, d5 @Add two blk sad + vpadd.u16 d8, d6, d7 @Add col 1-2 and 3-4 sad + vpadd.u16 d10, d8, d9 @Add col 12-34 sad + + vmov.u16 r9, d10[0] @move SAD to ARM + ADD R10, R10, R9 @Add to the global sad + +funcend_sad_16x16: @End of fucntion process + ldr r5, [sp, #44] + ldr r6, [sp, #48] + + str r7, [r6] @Store the is zero reg + str r10, [r5] @Store sad + + @SUB SP,SP,#40 + pop {r4-r12, pc} + + diff --git a/encoder/arm/ime_platform_macros.h b/encoder/arm/ime_platform_macros.h new file mode 100755 index 0000000..0f5b2f2 --- /dev/null +++ b/encoder/arm/ime_platform_macros.h @@ -0,0 +1,51 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ime_platform_macros.h +* +* @brief +* Platform specific Macro definitions used in the codec +* +* @author +* Ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef _IME_PLATFORM_MACROS_H_ +#define _IME_PLATFORM_MACROS_H_ + +/*****************************************************************************/ +/* Function macro definitions */ +/*****************************************************************************/ + +#define USADA8(src,est,sad) \ + sad += ABS(src[0]-est[0]) + \ + ABS(src[1]-est[1]) + \ + ABS(src[2]-est[2]) + \ + ABS(src[3]-est[3]) + + +#endif /* _IH264_PLATFORM_MACROS_H_ */ diff --git a/encoder/armv8/ih264e_evaluate_intra16x16_modes_av8.s b/encoder/armv8/ih264e_evaluate_intra16x16_modes_av8.s new file mode 100755 index 0000000..c442077 --- /dev/null +++ b/encoder/armv8/ih264e_evaluate_intra16x16_modes_av8.s @@ -0,0 +1,592 @@ +//****************************************************************************** +//* +//* Copyright (C) 2015 The Android Open Source Project +//* +//* Licensed under the Apache License, Version 2.0 (the "License"); +//* you may not use this file except in compliance with the License. +//* You may obtain a copy of the License at: +//* +//* http://www.apache.org/licenses/LICENSE-2.0 +//* +//* Unless required by applicable law or agreed to in writing, software +//* distributed under the License is distributed on an "AS IS" BASIS, +//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//* See the License for the specific language governing permissions and +//* limitations under the License. +//* +//***************************************************************************** +//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +//*/ +///** + +///** +//****************************************************************************** +//* +//* @brief :Evaluate best intra 16x16 mode (among VERT, HORZ and DC ) +//* and do the prediction. +//* +//* @par Description +//* This function evaluates first three 16x16 modes and compute corresponding sad +//* and return the buffer predicted with best mode. +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//** @param[in] pu1_ngbr_pels_i16 +//* UWORD8 pointer to neighbouring pels +//* +//* @param[out] pu1_dst +//* UWORD8 pointer to the destination +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride +//* +//* @param[in] u4_n_avblty +//* availability of neighbouring pixels +//* +//* @param[in] u4_intra_mode +//* Pointer to the variable in which best mode is returned +//* +//* @param[in] pu4_sadmin +//* Pointer to the variable in which minimum sad is returned +//* +//* @param[in] u4_valid_intra_modes +//* Says what all modes are valid +//* +//* +//* @return none +//* +//****************************************************************************** +//*/ +// +//void ih264e_evaluate_intra16x16_modes(UWORD8 *pu1_src, +// UWORD8 *pu1_ngbr_pels_i16, +// UWORD8 *pu1_dst, +// UWORD32 src_strd, +// UWORD32 dst_strd, +// WORD32 u4_n_avblty, +// UWORD32 *u4_intra_mode, +// WORD32 *pu4_sadmin, +// UWORD32 u4_valid_intra_modes) +// +.text +.p2align 2 +.include "ih264_neon_macros.s" + +.globl ih264e_evaluate_intra16x16_modes_av8 + +ih264e_evaluate_intra16x16_modes_av8: + +//x0 = pu1_src, +//x1 = pu1_ngbr_pels_i16, +//x2 = pu1_dst, +//x3 = src_strd, +//x4 = dst_strd, +//x5 = u4_n_avblty, +//x6 = u4_intra_mode, +//x7 = pu4_sadmin + + + + // STMFD sp!, {x4-x12, x14} //store register values to stack + push_v_regs + stp x19, x20, [sp, #-16]! + + ldr x16, [sp, #80] + mov x17, x4 + mov x18, x5 + mov x14, x6 + mov x15, x7 + + + sub v0.16b, v0.16b, v0.16b + sub v1.16b, v1.16b, v1.16b + mov w10, #0 + mov w11 , #3 + + ands x6, x5, #0x01 + beq top_available //LEFT NOT AVAILABLE + ld1 {v0.16b}, [x1] + add w10, w10, #8 + add w11, w11, #1 +top_available: + ands x6, x5, #0x04 + beq none_available + add x6, x1, #17 + ld1 {v1.16b}, [x6] + add w10, w10, #8 + add w11, w11, #1 + b summation +none_available: + cmp x5, #0 + bne summation + mov w6, #128 + dup v30.16b, w6 + dup v31.16b, w6 + b sad_comp +summation: + uaddl v2.8h, v0.8b, v1.8b + uaddl2 v3.8h, v0.16b, v1.16b + dup v10.8h, w10 + neg w11, w11 + dup v20.8h, w11 + add v0.8h, v2.8h, v3.8h + mov v1.d[0], v0.d[1] + add v0.4h, v0.4h, v1.4h + addp v0.4h, v0.4h , v0.4h + addp v0.4h, v0.4h , v0.4h + add v0.4h, v0.4h, v10.4h + uqshl v0.8h, v0.8h, v20.8h + sqxtun v0.8b, v0.8h + + dup v30.16b, v0.b[0] + dup v31.16b, v0.b[0] + + +sad_comp: + ld1 { v0.2s, v1.2s }, [x0], x3 // source x0w 0 + + ld1 { v2.2s, v3.2s}, [x0], x3 //row 1 + + ld1 { v4.2s, v5.2s}, [x0], x3 //row 2 + + ld1 { v6.2s, v7.2s}, [x0], x3 //row 3 + + //--------------------- + + //values for vertical prediction + add x6, x1, #17 + ld1 {v10.8b}, [x6], #8 + ld1 {v11.8b}, [x6], #8 + ld1 {v9.16b}, [x1] + + + + dup v20.8b, v9.b[15] ///HORIZONTAL VALUE ROW=0// + dup v21.8b, v9.b[15] ///HORIZONTAL VALUE ROW=0// + + +///* computing SADs for all three modes*/ + ///vertical row 0@ + uabdl v16.8h, v0.8b, v10.8b + uabdl v18.8h, v1.8b, v11.8b + + ///HORZ row 0@ + uabdl v26.8h, v0.8b, v20.8b + uabdl v28.8h, v1.8b, v21.8b + + ///dc row 0@ + uabdl v22.8h, v0.8b, v30.8b + uabdl v24.8h, v1.8b, v31.8b + + + + + + dup v20.8b, v9.b[14] ///HORIZONTAL VALUE ROW=1// + dup v21.8b, v9.b[14] + + + ///vertical row 1@ + uabal v16.8h, v2.8b, v10.8b + uabal v18.8h, v3.8b, v11.8b + + ld1 { v0.2s, v1.2s }, [x0], x3 //row 4 + ///HORZ row 1@ + uabal v26.8h, v2.8b, v20.8b + uabal v28.8h, v3.8b, v21.8b + + ///dc row 1@ + uabal v22.8h, v2.8b, v30.8b + uabal v24.8h, v3.8b, v31.8b + + dup v20.8b, v9.b[13] ///HORIZONTAL VALUE ROW=2// + dup v21.8b, v9.b[13] + + ///vertical row 2@ + uabal v16.8h, v4.8b, v10.8b + uabal v18.8h, v5.8b, v11.8b + + ld1 { v2.2s, v3.2s}, [x0], x3 //row 5 + ///HORZ row 2@ + uabal v26.8h, v4.8b, v20.8b + uabal v28.8h, v5.8b, v21.8b + + ///dc row 2@ + uabal v22.8h, v4.8b, v30.8b + uabal v24.8h, v5.8b, v31.8b + + dup v20.8b, v9.b[12] ///HORIZONTAL VALUE ROW=3// + dup v21.8b, v9.b[12] + + ///vertical row 3@ + uabal v16.8h, v6.8b, v10.8b + uabal v18.8h, v7.8b, v11.8b + + ld1 { v4.2s, v5.2s}, [x0], x3 //row 6 + ///HORZ row 3@ + uabal v26.8h, v6.8b, v20.8b + uabal v28.8h, v7.8b, v21.8b + + ///dc row 3@ + uabal v22.8h, v6.8b, v30.8b + uabal v24.8h, v7.8b, v31.8b +//---------------------------------------------------------------------------------------------- + + dup v20.8b, v9.b[11] ///HORIZONTAL VALUE ROW=0// + dup v21.8b, v9.b[11] + + ///vertical row 0@ + uabal v16.8h, v0.8b, v10.8b + uabal v18.8h, v1.8b, v11.8b + + ld1 { v6.2s, v7.2s}, [x0], x3 //row 7 + ///HORZ row 0@ + uabal v26.8h, v0.8b, v20.8b + uabal v28.8h, v1.8b, v21.8b + + ///dc row 0@ + uabal v22.8h, v0.8b, v30.8b + uabal v24.8h, v1.8b, v31.8b + + dup v20.8b, v9.b[10] ///HORIZONTAL VALUE ROW=1// + dup v21.8b, v9.b[10] + + ///vertical row 1@ + uabal v16.8h, v2.8b, v10.8b + uabal v18.8h, v3.8b, v11.8b + + ld1 { v0.2s, v1.2s }, [x0], x3 //row 8 + ///HORZ row 1@ + uabal v26.8h, v2.8b, v20.8b + uabal v28.8h, v3.8b, v21.8b + + ///dc row 1@ + uabal v22.8h, v2.8b, v30.8b + uabal v24.8h, v3.8b, v31.8b + + dup v20.8b, v9.b[9] ///HORIZONTAL VALUE ROW=2// + dup v21.8b, v9.b[9] + + ///vertical row 2@ + uabal v16.8h, v4.8b, v10.8b + uabal v18.8h, v5.8b, v11.8b + + ld1 { v2.2s, v3.2s}, [x0], x3 //row 9 + + ///HORZ row 2@ + uabal v26.8h, v4.8b, v20.8b + uabal v28.8h, v5.8b, v21.8b + + ///dc row 2@ + uabal v22.8h, v4.8b, v30.8b + uabal v24.8h, v5.8b, v31.8b + + dup v20.8b, v9.b[8] ///HORIZONTAL VALUE ROW=3// + dup v21.8b, v9.b[8] + + ///vertical row 3@ + uabal v16.8h, v6.8b, v10.8b + uabal v18.8h, v7.8b, v11.8b + + ld1 { v4.2s, v5.2s}, [x0], x3 //row 10 + + ///HORZ row 3@ + uabal v26.8h, v6.8b, v20.8b + uabal v28.8h, v7.8b, v21.8b + + ///dc row 3@ + uabal v22.8h, v6.8b, v30.8b + uabal v24.8h, v7.8b, v31.8b + + +//------------------------------------------- + + dup v20.8b, v9.b[7] ///HORIZONTAL VALUE ROW=0// + dup v21.8b, v9.b[7] + + ///vertical row 0@ + uabal v16.8h, v0.8b, v10.8b + uabal v18.8h, v1.8b, v11.8b + + ld1 { v6.2s, v7.2s}, [x0], x3 //row11 + + ///HORZ row 0@ + uabal v26.8h, v0.8b, v20.8b + uabal v28.8h, v1.8b, v21.8b + + ///dc row 0@ + uabal v22.8h, v0.8b, v30.8b + uabal v24.8h, v1.8b, v31.8b + + dup v20.8b, v9.b[6] ///HORIZONTAL VALUE ROW=1// + dup v21.8b, v9.b[6] + + ///vertical row 1@ + uabal v16.8h, v2.8b, v10.8b + uabal v18.8h, v3.8b, v11.8b + + ld1 { v0.2s, v1.2s }, [x0], x3 //row12 + + ///HORZ row 1@ + uabal v26.8h, v2.8b, v20.8b + uabal v28.8h, v3.8b, v21.8b + + ///dc row 1@ + uabal v22.8h, v2.8b, v30.8b + uabal v24.8h, v3.8b, v31.8b + + dup v20.8b, v9.b[5] ///HORIZONTAL VALUE ROW=2// + dup v21.8b, v9.b[5] + + ///vertical row 2@ + uabal v16.8h, v4.8b, v10.8b + uabal v18.8h, v5.8b, v11.8b + + ld1 { v2.2s, v3.2s}, [x0], x3 //row13 + + ///HORZ row 2@ + uabal v26.8h, v4.8b, v20.8b + uabal v28.8h, v5.8b, v21.8b + + ///dc row 2@ + uabal v22.8h, v4.8b, v30.8b + uabal v24.8h, v5.8b, v31.8b + + dup v20.8b, v9.b[4] ///HORIZONTAL VALUE ROW=3// + dup v21.8b, v9.b[4] + + ///vertical row 3@ + uabal v16.8h, v6.8b, v10.8b + uabal v18.8h, v7.8b, v11.8b + + ld1 { v4.2s, v5.2s}, [x0], x3 //row14 + + ///HORZ row 3@ + uabal v26.8h, v6.8b, v20.8b + uabal v28.8h, v7.8b, v21.8b + + ///dc row 3@ + uabal v22.8h, v6.8b, v30.8b + uabal v24.8h, v7.8b, v31.8b + //----------------------------------------------------------------- + + dup v20.8b, v9.b[3] ///HORIZONTAL VALUE ROW=0// + dup v21.8b, v9.b[3] + + ///vertical row 0@ + uabal v16.8h, v0.8b, v10.8b + uabal v18.8h, v1.8b, v11.8b + + ld1 { v6.2s, v7.2s}, [x0], x3 //row15 + + ///HORZ row 0@ + uabal v26.8h, v0.8b, v20.8b + uabal v28.8h, v1.8b, v21.8b + + ///dc row 0@ + uabal v22.8h, v0.8b, v30.8b + uabal v24.8h, v1.8b, v31.8b + + dup v20.8b, v9.b[2] ///HORIZONTAL VALUE ROW=1// + dup v21.8b, v9.b[2] + + ///vertical row 1@ + uabal v16.8h, v2.8b, v10.8b + uabal v18.8h, v3.8b, v11.8b + + ///HORZ row 1@ + uabal v26.8h, v2.8b, v20.8b + uabal v28.8h, v3.8b, v21.8b + + ///dc row 1@ + uabal v22.8h, v2.8b, v30.8b + uabal v24.8h, v3.8b, v31.8b + + dup v20.8b, v9.b[1] ///HORIZONTAL VALUE ROW=2// + dup v21.8b, v9.b[1] + + ///vertical row 2@ + uabal v16.8h, v4.8b, v10.8b + uabal v18.8h, v5.8b, v11.8b + + ///HORZ row 2@ + uabal v26.8h, v4.8b, v20.8b + uabal v28.8h, v5.8b, v21.8b + + ///dc row 2@ + uabal v22.8h, v4.8b, v30.8b + uabal v24.8h, v5.8b, v31.8b + + dup v20.8b, v9.b[0] ///HORIZONTAL VALUE ROW=3// + dup v21.8b, v9.b[0] + + ///vertical row 3@ + uabal v16.8h, v6.8b, v10.8b + uabal v18.8h, v7.8b, v11.8b + + ///HORZ row 3@ + uabal v26.8h, v6.8b, v20.8b + uabal v28.8h, v7.8b, v21.8b + + ///dc row 3@ + uabal v22.8h, v6.8b, v30.8b + uabal v24.8h, v7.8b, v31.8b + //------------------------------------------------------------------------------ + + + //vert sum + + add v16.8h, v16.8h , v18.8h + mov v18.d[0], v16.d[1] + add v16.4h, v16.4h , v18.4h + uaddlp v16.2s, v16.4h + addp v16.2s, v16.2s, v16.2s + smov x8, v16.s[0] //dc + + + //horz sum + + add v26.8h, v26.8h , v28.8h + mov v28.d[0], v26.d[1] + add v26.4h, v26.4h , v28.4h + uaddlp v26.2s, v26.4h + addp v26.2s, v26.2s, v26.2s + smov x9, v26.s[0] + + //dc sum + + add v24.8h, v22.8h , v24.8h ///DC + mov v25.d[0], v24.d[1] + add v24.4h, v24.4h , v25.4h ///DC + uaddlp v24.2s, v24.4h ///DC + addp v24.2s, v24.2s, v24.2s ///DC + smov x10, v24.s[0] //dc + + + //----------------------- + mov x11, #1 + lsl x11, x11, #30 + + mov x0, x16 + //-------------------------------------------- + ands x7, x0, #01 // vert mode valid???????????? + csel x8, x11, x8, eq + + + ands x6, x0, #02 // horz mode valid???????????? + csel x9, x11, x9, eq + + ands x6, x0, #04 // dc mode valid???????????? + csel x10, x11, x10, eq + + + + +//-------------------------------- + + mov x4, x17 + mov x7, x15 + mov x6, x14 + + //--------------------------- + + //-------------------------- + + cmp x8, x9 + bgt not_vert + cmp x8, x10 + bgt do_dc + + ///---------------------- + //DO VERTICAL PREDICTION + str x8 , [x7] //MIN SAD + mov x8, #0 + str x8 , [x6] // MODE + add x6, x1, #17 + ld1 {v30.16b}, [x6] + b do_dc_vert + //----------------------------- +not_vert: cmp x9, x10 + bgt do_dc + + ///---------------------- + //DO HORIZONTAL + str x9 , [x7] //MIN SAD + mov x9, #1 + str x9 , [x6] // MODE + + ld1 {v0.16b}, [x1] + dup v10.16b, v0.b[15] + dup v11.16b, v0.b[14] + dup v12.16b, v0.b[13] + dup v13.16b, v0.b[12] + st1 {v10.16b}, [x2], x4 + dup v14.16b, v0.b[11] + st1 {v11.16b}, [x2], x4 + dup v15.16b, v0.b[10] + st1 {v12.16b}, [x2], x4 + dup v16.16b, v0.b[9] + st1 {v13.16b}, [x2], x4 + dup v17.16b, v0.b[8] + st1 {v14.16b}, [x2], x4 + dup v18.16b, v0.b[7] + st1 {v15.16b}, [x2], x4 + dup v19.16b, v0.b[6] + st1 {v16.16b}, [x2], x4 + dup v20.16b, v0.b[5] + st1 {v17.16b}, [x2], x4 + dup v21.16b, v0.b[4] + st1 {v18.16b}, [x2], x4 + dup v22.16b, v0.b[3] + st1 {v19.16b}, [x2], x4 + dup v23.16b, v0.b[2] + st1 {v20.16b}, [x2], x4 + dup v24.16b, v0.b[1] + st1 {v21.16b}, [x2], x4 + dup v25.16b, v0.b[0] + st1 {v22.16b}, [x2], x4 + st1 {v23.16b}, [x2], x4 + st1 {v24.16b}, [x2], x4 + st1 {v25.16b}, [x2], x4 + + + + b end_func + + + ///----------------------------- + +do_dc: ///--------------------------------- + //DO DC + str x10 , [x7] //MIN SAD + mov x10, #2 + str x10 , [x6] // MODE +do_dc_vert: + st1 {v30.4s}, [x2], x4 //0 + st1 {v30.4s}, [x2], x4 //1 + st1 {v30.4s}, [x2], x4 //2 + st1 {v30.4s}, [x2], x4 //3 + st1 {v30.4s}, [x2], x4 //4 + st1 {v30.4s}, [x2], x4 //5 + st1 {v30.4s}, [x2], x4 //6 + st1 {v30.4s}, [x2], x4 //7 + st1 {v30.4s}, [x2], x4 //8 + st1 {v30.4s}, [x2], x4 //9 + st1 {v30.4s}, [x2], x4 //10 + st1 {v30.4s}, [x2], x4 //11 + st1 {v30.4s}, [x2], x4 //12 + st1 {v30.4s}, [x2], x4 //13 + st1 {v30.4s}, [x2], x4 //14 + st1 {v30.4s}, [x2], x4 //15 + ///------------------ +end_func: + // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + diff --git a/encoder/armv8/ih264e_evaluate_intra_chroma_modes_av8.s b/encoder/armv8/ih264e_evaluate_intra_chroma_modes_av8.s new file mode 100755 index 0000000..b02afd1 --- /dev/null +++ b/encoder/armv8/ih264e_evaluate_intra_chroma_modes_av8.s @@ -0,0 +1,467 @@ +//****************************************************************************** +//* +//* Copyright (C) 2015 The Android Open Source Project +//* +//* Licensed under the Apache License, Version 2.0 (the "License"); +//* you may not use this file except in compliance with the License. +//* You may obtain a copy of the License at: +//* +//* http://www.apache.org/licenses/LICENSE-2.0 +//* +//* Unless required by applicable law or agreed to in writing, software +//* distributed under the License is distributed on an "AS IS" BASIS, +//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//* See the License for the specific language governing permissions and +//* limitations under the License. +//* +//***************************************************************************** +//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +//*/ +///** + +///** +//****************************************************************************** +//* +//* @brief :Evaluate best intr chroma mode (among VERT, HORZ and DC ) +//* and do the prediction. +//* +//* @par Description +//* This function evaluates first three intra chroma modes and compute corresponding sad +//* and return the buffer predicted with best mode. +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//** @param[in] pu1_ngbr_pels +//* UWORD8 pointer to neighbouring pels +//* +//* @param[out] pu1_dst +//* UWORD8 pointer to the destination +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride +//* +//* @param[in] u4_n_avblty +//* availability of neighbouring pixels +//* +//* @param[in] u4_intra_mode +//* Pointer to the variable in which best mode is returned +//* +//* @param[in] pu4_sadmin +//* Pointer to the variable in which minimum sad is returned +//* +//* @param[in] u4_valid_intra_modes +//* Says what all modes are valid +//* +//* +//* @return none +//* +//****************************************************************************** +//*/ +// +//void ih264e_evaluate_intra_chroma_modes(UWORD8 *pu1_src, +// UWORD8 *pu1_ngbr_pels_i16, +// UWORD8 *pu1_dst, +// UWORD32 src_strd, +// UWORD32 dst_strd, +// WORD32 u4_n_avblty, +// UWORD32 *u4_intra_mode, +// WORD32 *pu4_sadmin, +// UWORD32 u4_valid_intra_modes) +// +.text +.p2align 2 +.include "ih264_neon_macros.s" + +.global ih264e_evaluate_intra_chroma_modes_av8 + +ih264e_evaluate_intra_chroma_modes_av8: + +//x0 = pu1_src, +//x1 = pu1_ngbr_pels_i16, +//x2 = pu1_dst, +//x3 = src_strd, +//x4 = dst_strd, +//x5 = u4_n_avblty, +//x6 = u4_intra_mode, +//x7 = pu4_sadmin + + + + // STMFD sp!, {x4-x12, x14} //store register values to stack + push_v_regs + stp x19, x20, [sp, #-16]! + //----------------------- + ldr x16, [sp, #80] + mov x17, x4 + mov x18, x5 + mov x14, x6 + mov x15, x7 + + mov x19, #5 + ands x6, x5, x19 + beq none_available + cmp x6, #1 + beq left_only_available + cmp x6, #4 + beq top_only_available + +all_available: + ld1 {v0.8b, v1.8b}, [x1] + add x6, x1, #18 + ld1 {v2.8b, v3.8b}, [x6] + uxtl v0.8h, v0.8b + uxtl v1.8h, v1.8b + addp v0.4s, v0.4s , v0.4s + addp v1.4s, v1.4s , v1.4s + addp v0.4s, v0.4s , v0.4s + addp v1.4s, v1.4s , v1.4s + uxtl v2.8h, v2.8b + uxtl v3.8h, v3.8b + addp v2.4s, v2.4s , v2.4s + addp v3.4s, v3.4s , v3.4s + addp v2.4s, v2.4s , v2.4s + addp v3.4s, v3.4s , v3.4s + rshrn v5.8b, v0.8h, #2 + dup v21.8h, v5.h[0] + rshrn v6.8b, v3.8h, #2 + dup v20.8h, v6.h[0] + add v1.8h, v1.8h, v2.8h + rshrn v1.8b, v1.8h, #3 + dup v23.8h, v1.h[0] + mov v20.d[0], v23.d[0] + add v0.8h, v0.8h, v3.8h + rshrn v0.8b, v0.8h, #3 + dup v23.8h, v0.h[0] + mov v31.d[0], v23.d[0] + mov v28.d[0], v20.d[0] + mov v29.d[0], v20.d[1] + mov v30.d[0], v21.d[0] + b sad_comp + +left_only_available: + ld1 {v0.8b, v1.8b}, [x1] + uxtl v0.8h, v0.8b + uxtl v1.8h, v1.8b + addp v0.4s, v0.4s , v0.4s + addp v1.4s, v1.4s , v1.4s + addp v0.4s, v0.4s , v0.4s + addp v1.4s, v1.4s , v1.4s + rshrn v0.8b, v0.8h, #2 + rshrn v1.8b, v1.8h, #2 + + dup v28.8h , v1.h[0] + dup v29.8h , v1.h[0] + dup v30.8h, v0.h[0] + dup v31.8h, v0.h[0] + b sad_comp + +top_only_available: + add x6, x1, #18 + ld1 {v0.8b, v1.8b}, [x6] + uxtl v0.8h, v0.8b + uxtl v1.8h, v1.8b + addp v0.4s, v0.4s , v0.4s + addp v1.4s, v1.4s , v1.4s + addp v0.4s, v0.4s , v0.4s + addp v1.4s, v1.4s , v1.4s + rshrn v0.8b, v0.8h, #2 + rshrn v1.8b, v1.8h, #2 + dup v28.8h , v0.h[0] + dup v30.8h, v1.h[0] + mov v29.d[0], v30.d[1] + mov v30.d[0], v28.d[0] + mov v31.d[0], v30.d[1] + b sad_comp +none_available: + mov w20, #128 + dup v28.16b, w20 + dup v29.16b, w20 + dup v30.16b, w20 + dup v31.16b, w20 + + + +sad_comp: + add x6, x1, #18 + ld1 {v10.8b, v11.8b}, [x6] // vertical values + + ld1 {v27.8h}, [x1] + + dup v20.8h, v27.h[7] ///HORIZONTAL VALUE ROW=0// + dup v21.8h, v27.h[7] + + ld1 { v0.8b, v1.8b}, [x0], x3 + + + ///vertical row 0@ + uabdl v16.8h, v0.8b, v10.8b + uabdl v18.8h, v1.8b, v11.8b + + ///HORZ row 0@ + uabdl v26.8h, v0.8b, v20.8b + uabdl v14.8h, v1.8b, v21.8b + + ld1 {v2.8b, v3.8b}, [x0], x3 + + + + ///dc row 0@ + uabdl v22.8h, v0.8b, v28.8b + uabdl v24.8h, v1.8b, v29.8b + + + dup v20.8h, v27.h[6] + dup v21.8h, v27.h[6] ///HORIZONTAL VALUE ROW=1// + + ///vertical row 1@ + uabal v16.8h, v2.8b, v10.8b + uabal v18.8h, v3.8b, v11.8b + + ld1 { v4.8b, v5.8b}, [x0], x3 + + ///HORZ row 1@ + uabal v26.8h, v2.8b, v20.8b + uabal v14.8h, v3.8b, v21.8b + + ///dc row 1@ + uabal v22.8h, v2.8b, v28.8b + uabal v24.8h, v3.8b, v29.8b + + dup v20.8h, v27.h[5] + dup v21.8h, v27.h[5] ///HORIZONTAL VALUE ROW=2// + + ///vertical row 2@ + uabal v16.8h, v4.8b, v10.8b + uabal v18.8h, v5.8b, v11.8b + + ld1 { v6.8b, v7.8b}, [x0], x3 + ///HORZ row 2@ + uabal v26.8h, v4.8b, v20.8b + uabal v14.8h, v5.8b, v21.8b + + ///dc row 2@ + uabal v22.8h, v4.8b, v28.8b + uabal v24.8h, v5.8b, v29.8b + + dup v20.8h, v27.h[4] + dup v21.8h, v27.h[4] ///HORIZONTAL VALUE ROW=3// + + ///vertical row 3@ + uabal v16.8h, v6.8b, v10.8b + uabal v18.8h, v7.8b, v11.8b + + ///HORZ row 3@ + uabal v26.8h, v6.8b, v20.8b + uabal v14.8h, v7.8b, v21.8b + + ///dc row 3@ + uabal v22.8h, v6.8b, v28.8b + uabal v24.8h, v7.8b, v29.8b + + //---------------------------------------------------------------------------------------------- + ld1 { v0.8b, v1.8b}, [x0], x3 + + + dup v20.8h, v27.h[3] + dup v21.8h, v27.h[3] ///HORIZONTAL VALUE ROW=0// + + ///vertical row 0@ + uabal v16.8h, v0.8b, v10.8b + uabal v18.8h, v1.8b, v11.8b + + ///HORZ row 0@ + uabal v26.8h, v0.8b, v20.8b + uabal v14.8h, v1.8b, v21.8b + + ld1 { v2.8b, v3.8b}, [x0], x3 + + ///dc row 0@ + uabal v22.8h, v0.8b, v30.8b + uabal v24.8h, v1.8b, v31.8b + + dup v20.8h, v27.h[2] + dup v21.8h, v27.h[2] ///HORIZONTAL VALUE ROW=1// + + ///vertical row 1@ + uabal v16.8h, v2.8b, v10.8b + uabal v18.8h, v3.8b, v11.8b + + ///HORZ row 1@ + uabal v26.8h, v2.8b, v20.8b + uabal v14.8h, v3.8b, v21.8b + + ld1 { v4.8b, v5.8b}, [x0], x3 + + ///dc row 1@ + uabal v22.8h, v2.8b, v30.8b + uabal v24.8h, v3.8b, v31.8b + + dup v20.8h, v27.h[1] + dup v21.8h, v27.h[1] ///HORIZONTAL VALUE ROW=2// + + ///vertical row 2@ + uabal v16.8h, v4.8b, v10.8b + uabal v18.8h, v5.8b, v11.8b + + ///HORZ row 2@ + uabal v26.8h, v4.8b, v20.8b + uabal v14.8h, v5.8b, v21.8b + + ld1 {v6.8b, v7.8b}, [x0], x3 + + ///dc row 2@ + uabal v22.8h, v4.8b, v30.8b + uabal v24.8h, v5.8b, v31.8b + + dup v20.8h, v27.h[0] + dup v21.8h, v27.h[0] ///HORIZONTAL VALUE ROW=3// + + ///vertical row 3@ + uabal v16.8h, v6.8b, v10.8b + uabal v18.8h, v7.8b, v11.8b + + ///HORZ row 3@ + uabal v26.8h, v6.8b, v20.8b + uabal v14.8h, v7.8b, v21.8b + + ///dc row 3@ + uabal v22.8h, v6.8b, v30.8b + uabal v24.8h, v7.8b, v31.8b + + +//------------------------------------------- + + +//vert sum + + add v16.8h, v16.8h , v18.8h + mov v18.d[0], v16.d[1] + add v16.4h, v16.4h , v18.4h + uaddlp v16.2s, v16.4h + addp v16.2s, v16.2s, v16.2s + smov x8, v16.s[0] + + + //horz sum + + add v26.8h, v26.8h , v14.8h + mov v14.d[0], v26.d[1] + add v26.4h, v26.4h , v14.4h + uaddlp v26.2s, v26.4h + addp v26.2s, v26.2s, v26.2s + smov x9, v26.s[0] + + //dc sum + + add v24.8h, v22.8h , v24.8h ///DC + mov v25.d[0], v24.d[1] + add v24.4h, v24.4h , v25.4h ///DC + uaddlp v24.2s, v24.4h ///DC + addp v24.2s, v24.2s, v24.2s ///DC + smov x10, v24.s[0] //dc + + + + + mov x11, #1 +//----------------------- + mov x0, x16 // u4_valid_intra_modes + +//-------------------------------------------- + + + lsl x11, x11, #30 + + ands x7, x0, #04 // vert mode valid???????????? + csel x8, x11, x8, eq + + ands x6, x0, #02 // horz mode valid???????????? + csel x9, x11, x9, eq + + ands x6, x0, #01 // dc mode valid???????????? + csel x10, x11, x10, eq + + + //--------------------------- + + mov x4, x17 + mov x6, x14 + mov x7, x15 + + //-------------------------- + + cmp x10, x9 + bgt not_dc + cmp x10, x8 + bgt do_vert + + ///---------------------- + //DO DC PREDICTION + str x10 , [x7] //MIN SAD + + mov x10, #0 + str x10 , [x6] // MODE + + b do_dc_vert + //----------------------------- + +not_dc: + cmp x9, x8 + bgt do_vert + ///---------------------- + //DO HORIZONTAL + str x9 , [x7] //MIN SAD + + mov x10, #1 + str x10 , [x6] // MODE + ld1 {v0.8h}, [x1] + + dup v10.8h, v0.h[7] + dup v11.8h, v0.h[6] + dup v12.8h, v0.h[5] + dup v13.8h, v0.h[4] + st1 {v10.8h}, [x2], x4 + dup v14.8h, v0.h[3] + st1 {v11.8h}, [x2], x4 + dup v15.8h, v0.h[2] + st1 {v12.8h}, [x2], x4 + dup v16.8h, v0.h[1] + st1 {v13.8h}, [x2], x4 + dup v17.8h, v0.h[0] + st1 {v14.8h}, [x2], x4 + st1 {v15.8h}, [x2], x4 + st1 {v16.8h}, [x2], x4 + st1 {v17.8h}, [x2], x4 + + b end_func + +do_vert: + //DO VERTICAL PREDICTION + str x8 , [x7] //MIN SAD + mov x8, #2 + str x8 , [x6] // MODE + add x6, x1, #18 + ld1 {v28.8b, v29.8b}, [x6] // vertical values + ld1 {v30.8b, v31.8b}, [x6] // vertical values + +do_dc_vert: + st1 {v28.2s, v29.2s} , [x2], x4 //0 + st1 {v28.2s, v29.2s} , [x2], x4 //1 + st1 {v28.2s, v29.2s} , [x2], x4 //2 + st1 {v28.2s, v29.2s} , [x2], x4 //3 + st1 {v30.2s, v31.2s} , [x2], x4 //4 + st1 {v30.2s, v31.2s} , [x2], x4 //5 + st1 {v30.2s, v31.2s} , [x2], x4 //6 + st1 {v30.2s, v31.2s} , [x2], x4 //7 + +end_func: + // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + diff --git a/encoder/armv8/ih264e_half_pel_av8.s b/encoder/armv8/ih264e_half_pel_av8.s new file mode 100755 index 0000000..6dbd8f8 --- /dev/null +++ b/encoder/armv8/ih264e_half_pel_av8.s @@ -0,0 +1,1024 @@ +//****************************************************************************** +//* +//* Copyright (C) 2015 The Android Open Source Project +//* +//* Licensed under the Apache License, Version 2.0 (the "License"); +//* you may not use this file except in compliance with the License. +//* You may obtain a copy of the License at: +//* +//* http://www.apache.org/licenses/LICENSE-2.0 +//* +//* Unless required by applicable law or agreed to in writing, software +//* distributed under the License is distributed on an "AS IS" BASIS, +//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//* See the License for the specific language governing permissions and +//* limitations under the License. +//* +//***************************************************************************** +//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +//*/ +///** +// ******************************************************************************* +// * @file +// * ih264e_half_pel.s +// * +// * @brief +// * +// * +// * @author +// * Ittiam +// * +// * @par List of Functions: +// * ih264e_sixtapfilter_horz +// * ih264e_sixtap_filter_2dvh_vert +// +// * +// * @remarks +// * None +// * +// ******************************************************************************* +// */ + + +.text +.p2align 2 +.include "ih264_neon_macros.s" + +// /** +///******************************************************************************* +//* +//* @brief +//* Interprediction luma filter for horizontal input(Filter run for width = 17 and height =16) +//* +//* @par Description: +//* Applies a 6 tap horizontal filter .The output is clipped to 8 bits +//* sec 8.4.2.2.1 titled "Luma sample interpolation process" +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[out] pu1_dst +//* UWORD8 pointer to the destination +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride +//* +//* +//* @returns +//* +//* @remarks +//* None +//* +//******************************************************************************* +//*/ +//void ih264e_sixtapfilter_horz(UWORD8 *pu1_src, +// UWORD8 *pu1_dst, +// WORD32 src_strd, +// WORD32 dst_strd); + + +.equ halfpel_width , 17 + 1 //( make it even, two rows are processed at a time) + + + .global ih264e_sixtapfilter_horz_av8 +ih264e_sixtapfilter_horz_av8: + // STMFD sp!,{x14} + push_v_regs + stp x19, x20, [sp, #-16]! + + movi v0.8b, #5 + sub x0, x0, #2 + sub x3, x3, #16 + movi v1.8b, #20 + mov x14, #16 + +filter_horz_loop: + + + ld1 {v2.8b, v3.8b, v4.8b}, [x0], x2 //// Load row0 + ld1 {v5.8b, v6.8b, v7.8b}, [x0], x2 //// Load row1 + + //// Processing row0 and row1 + + ext v31.8b, v2.8b , v3.8b , #5 + ext v30.8b, v3.8b , v4.8b , #5 + + uaddl v8.8h, v31.8b, v2.8b //// a0 + a5 (column1,row0) + ext v29.8b, v4.8b , v4.8b , #5 + uaddl v10.8h, v30.8b, v3.8b //// a0 + a5 (column2,row0) + ext v28.8b, v5.8b , v6.8b , #5 + uaddl v12.8h, v29.8b, v4.8b //// a0 + a5 (column3,row0) + ext v27.8b, v6.8b , v7.8b , #5 + uaddl v14.8h, v28.8b, v5.8b //// a0 + a5 (column1,row1) + ext v26.8b, v7.8b , v7.8b , #5 + + uaddl v16.8h, v27.8b, v6.8b //// a0 + a5 (column2,row1) + ext v31.8b, v2.8b , v3.8b , #2 + uaddl v18.8h, v26.8b, v7.8b //// a0 + a5 (column3,row1) + ext v30.8b, v3.8b , v4.8b , #2 + umlal v8.8h, v31.8b, v1.8b //// a0 + a5 + 20a2 (column1,row0) + ext v29.8b, v4.8b , v4.8b , #2 + umlal v10.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 (column2,row0) + ext v28.8b, v5.8b , v6.8b , #2 + umlal v12.8h, v29.8b, v1.8b //// a0 + a5 + 20a2 (column3,row0) + ext v27.8b, v6.8b , v7.8b , #2 + umlal v14.8h, v28.8b, v1.8b //// a0 + a5 + 20a2 (column1,row1) + ext v26.8b, v7.8b , v7.8b , #2 + + umlal v16.8h, v27.8b, v1.8b //// a0 + a5 + 20a2 (column2,row1) + ext v31.8b, v2.8b , v3.8b , #3 + umlal v18.8h, v26.8b, v1.8b //// a0 + a5 + 20a2 (column3,row1) + ext v30.8b, v3.8b , v4.8b , #3 + umlal v8.8h, v31.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row0) + ext v29.8b, v4.8b , v4.8b , #3 + umlal v10.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row0) + ext v28.8b, v5.8b , v6.8b , #3 + umlal v12.8h, v29.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column3,row0) + ext v27.8b, v6.8b , v7.8b , #3 + umlal v14.8h, v28.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row1) + ext v26.8b, v7.8b , v7.8b , #3 + + umlal v16.8h, v27.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row1) + ext v31.8b, v2.8b , v3.8b , #1 + umlal v18.8h, v26.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column3,row1) + ext v30.8b, v3.8b , v4.8b , #1 + umlsl v8.8h, v31.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0) + ext v29.8b, v4.8b , v4.8b , #1 + umlsl v10.8h, v30.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0) + ext v28.8b, v5.8b , v6.8b , #1 + umlsl v12.8h, v29.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row0) + ext v27.8b, v6.8b , v7.8b , #1 + umlsl v14.8h, v28.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row1) + ext v26.8b, v7.8b , v7.8b , #1 + + umlsl v16.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row1) + ext v31.8b, v2.8b , v3.8b , #4 + umlsl v18.8h, v26.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row1) + ext v30.8b, v3.8b , v4.8b , #4 + umlsl v8.8h, v31.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0) + ext v29.8b, v4.8b , v4.8b , #4 + umlsl v10.8h, v30.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0) + ext v28.8b, v5.8b , v6.8b , #4 + umlsl v12.8h, v29.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row0) + ext v27.8b, v6.8b , v7.8b , #4 + umlsl v14.8h, v28.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row1) + ext v26.8b, v7.8b , v7.8b , #4 + + umlsl v16.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row1) + umlsl v18.8h, v26.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row1) + + sqrshrun v20.8b, v8.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0) + sqrshrun v21.8b, v10.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0) + sqrshrun v22.8b, v12.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0) + sqrshrun v23.8b, v14.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row1) + sqrshrun v24.8b, v16.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row1) + sqrshrun v25.8b, v18.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row1) + + st1 {v20.8b, v21.8b}, [x1], #16 ////Store dest row0 + st1 {v22.h}[0], [x1], x3 + st1 {v23.8b, v24.8b}, [x1], #16 ////Store dest row1 + st1 {v25.h}[0], [x1], x3 + + subs x14, x14, #2 // decrement counter + + bne filter_horz_loop + + + // LDMFD sp!,{pc} + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + + + + + + + + +///** +//******************************************************************************* +//* +//* @brief +//* This function implements a two stage cascaded six tap filter. It +//* applies the six tap filter in the vertical direction on the +//* predictor values, followed by applying the same filter in the +//* horizontal direction on the output of the first stage. The six tap +//* filtering operation is described in sec 8.4.2.2.1 titled "Luma sample +//* interpolation process" +//* (Filter run for width = 17 and height =17) +//* @par Description: +//* The function interpolates +//* the predictors first in the vertical direction and then in the +//* horizontal direction to output the (1/2,1/2). The output of the first +//* stage of the filter is stored in the buffer pointed to by pi16_pred1(only in C) +//* in 16 bit precision. +//* +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[out] pu1_dst1 +//* UWORD8 pointer to the destination(vertical filtered output) +//* +//* @param[out] pu1_dst2 +//* UWORD8 pointer to the destination(out put after applying horizontal filter to the intermediate vertical output) +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride of pu1_dst +//* +//* @param[in]pi16_pred1 +//* Pointer to 16bit intermediate buffer(used only in c) +//* +//* @param[in] pi16_pred1_strd +//* integer destination stride of pi16_pred1 +//* +//* +//* @returns +//* +//* @remarks +//* None +//* +//******************************************************************************* +//*/ +//void ih264e_sixtap_filter_2dvh_vert(UWORD8 *pu1_src, +// UWORD8 *pu1_dst1, +// UWORD8 *pu1_dst2, +// WORD32 src_strd, +// WORD32 dst_strd, +// WORD32 *pi16_pred1,/* Pointer to 16bit intermmediate buffer (used only in c)*/ +// WORD32 pi16_pred1_strd) + + + + + .global ih264e_sixtap_filter_2dvh_vert_av8 + +ih264e_sixtap_filter_2dvh_vert_av8: + // STMFD sp!,{x10,x11,x12,x14} + push_v_regs + stp x19, x20, [sp, #-16]! + +////x0 - pu1_ref +////x3 - u4_ref_width + + //// Load six rows for vertical interpolation + lsl x12, x3, #1 + sub x0, x0, x12 + sub x0, x0, #2 + ld1 {v2.8b, v3.8b, v4.8b}, [x0], x3 + ld1 {v5.8b, v6.8b, v7.8b}, [x0], x3 + ld1 {v8.8b, v9.8b, v10.8b}, [x0], x3 + mov x12, #5 + ld1 {v11.8b, v12.8b, v13.8b}, [x0], x3 + mov x14, #20 + ld1 {v14.8b, v15.8b, v16.8b}, [x0], x3 + mov v0.4h[0], w12 + mov v0.4h[1], w14 + ld1 {v17.8b, v18.8b, v19.8b}, [x0], x3 + movi v1.8b, #20 + +//// x12 - u2_buff1_width +//// x14 - u2_buff2_width + mov x12, x4 + add x11, x1, #16 + + mov x14, x12 + + mov x10, #3 //loop counter + sub x16 , x12, #8 + sub x19, x14, #16 +filter_2dvh_loop: + + //// ////////////// ROW 1 /////////////////////// + +//// Process first vertical interpolated row +//// each column is + uaddl v20.8h, v2.8b, v17.8b //// a0 + a5 (column1,row0) + movi v31.8b, #5 + umlal v20.8h, v8.8b, v1.8b //// a0 + a5 + 20a2 (column1,row0) + umlal v20.8h, v11.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row0) + umlsl v20.8h, v5.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0) + umlsl v20.8h, v14.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0) + mov v21.d[0], v20.d[1] + + uaddl v22.8h, v3.8b, v18.8b //// a0 + a5 (column2,row0) + umlal v22.8h, v9.8b, v1.8b //// a0 + a5 + 20a2 (column2,row0) + umlal v22.8h, v12.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row0) + umlsl v22.8h, v6.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0) + umlsl v22.8h, v15.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0) + ext v30.8b, v20.8b , v21.8b , #4 + mov v23.d[0], v22.d[1] + + + uaddl v24.8h, v4.8b, v19.8b //// a0 + a5 (column3,row0) + ext v29.8b, v20.8b , v21.8b , #6 + umlal v24.8h, v10.8b, v1.8b //// a0 + a5 + 20a2 (column3,row0) + umlal v24.8h, v13.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column3,row0) + umlsl v24.8h, v7.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row0) + umlsl v24.8h, v16.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row0) + mov v25.d[0], v24.d[1] + + sqrshrun v2.8b, v20.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0) + ext v31.8b, v21.8b , v22.8b , #2 + sqrshrun v3.8b, v22.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0) + ext v28.8b, v20.8b , v21.8b , #2 + + saddl v26.4s, v31.4h, v20.4h //// a0 + a5 (set1) + ext v31.8b, v22.8b , v23.8b , #2 + smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set1) + smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set1) + smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set1) + smlsl v26.4s, v21.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1) + ext v30.8b, v21.8b , v22.8b , #4 + + sqrshrun v4.8b, v24.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0) + ext v29.8b, v21.8b , v22.8b , #6 + + ext v28.8b, v21.8b , v22.8b , #2 + saddl v20.4s, v31.4h, v21.4h //// a0 + a5 (set2) + smlal v20.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set2) + smlal v20.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set2) + smlsl v20.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set2) + smlsl v20.4s, v22.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2) + ext v31.8b, v23.8b , v24.8b , #2 + mov v21.d[0], v20.d[1] + ext v2.8b, v2.8b , v3.8b , #2 + ext v3.8b, v3.8b , v4.8b , #2 + ext v4.8b, v4.8b , v4.8b , #2 + + st1 {v2.8b, v3.8b}, [x1], x12 //// store row1 - 1,1/2 grid + st1 {v4.h}[0], [x11], x12 //// store row1 - 1,1/2 grid + + ext v30.8b, v22.8b , v23.8b , #4 + ext v29.8b, v22.8b , v23.8b , #6 + + saddl v2.4s, v31.4h, v22.4h //// a0 + a5 (set3) + ext v28.8b, v22.8b , v23.8b , #2 + smlal v2.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set3) + smlal v2.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set3) + smlsl v2.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set3) + smlsl v2.4s, v23.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3) + ext v31.8b, v24.8b , v25.8b , #2 + + shrn v21.4h, v20.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set2) + ext v30.8b, v23.8b , v24.8b , #4 + shrn v20.4h, v26.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set1) + ext v29.8b, v23.8b , v24.8b , #6 + + saddl v26.4s, v31.4h, v23.4h //// a0 + a5 (set4) + ext v28.8b, v23.8b , v24.8b , #2 + ext v31.8b, v25.8b , v25.8b , #2 + smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set4) + smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set4) + smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set4) + smlsl v26.4s, v24.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4) + ext v30.8b, v24.8b , v25.8b , #4 + + saddl v22.4s, v31.4h, v24.4h //// a0 + a5 (set5) + ext v29.8b, v24.8b , v25.8b , #6 + + ext v31.8b, v24.8b , v25.8b , #2 + shrn v28.4h, v2.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set3) + + ld1 {v2.8b, v3.8b, v4.8b}, [x0], x3 //// Load next Row data + smlal v22.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set5) + smlal v22.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set5) + smlsl v22.4s, v31.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set5) + smlsl v22.4s, v25.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5) + shrn v29.4h, v26.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set4) + mov v20.d[1], v21.d[0] + sqrshrun v26.8b, v20.8h, #2 //// half,half gird set1,2 + + + ////VQRSHRUN.s16 D27,Q14,#2 ;// half,half gird set3,4 + ////VSHRN.s32 D28,Q11,#8 ;// shift by 8 and later we will shift by 2 more with rounding (set5) + + ////VQRSHRUN.s16 D28,Q14,#2 ;// half,half gird set5 + + ////VST1.8 {D26,D27,D28},[x2],x14 ;// store 1/2,1,2 grif values + //// ////////////// ROW 2 /////////////////////// + +//// Process first vertical interpolated row +//// each column is + uaddl v20.8h, v5.8b, v2.8b //// a0 + a5 (column1,row0) + movi v31.8b, #5 + umlal v20.8h, v11.8b, v1.8b //// a0 + a5 + 20a2 (column1,row0) + umlal v20.8h, v14.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row0) + umlsl v20.8h, v8.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0) + umlsl v20.8h, v17.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0) + mov v21.d[0], v20.d[1] + + mov v28.d[1], v29.d[0] + sqrshrun v27.8b, v28.8h, #2 //// half,half gird set3,4 + + shrn v28.4h, v22.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set5) + + uaddl v22.8h, v6.8b, v3.8b //// a0 + a5 (column2,row0) + umlal v22.8h, v12.8b, v1.8b //// a0 + a5 + 20a2 (column2,row0) + umlal v22.8h, v15.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row0) + umlsl v22.8h, v9.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0) + umlsl v22.8h, v18.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0) + mov v23.d[0], v22.d[1] + + sqrshrun v28.8b, v28.8h, #2 //// half,half gird set5 + ext v30.8b, v20.8b , v21.8b , #4 + + uaddl v24.8h, v7.8b, v4.8b //// a0 + a5 (column3,row0) + ext v29.8b, v20.8b , v21.8b , #6 + umlal v24.8h, v13.8b, v1.8b //// a0 + a5 + 20a2 (column3,row0) + umlal v24.8h, v16.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column3,row0) + umlsl v24.8h, v10.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row0) + umlsl v24.8h, v19.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row0) + mov v25.d[0], v24.d[1] + + st1 {v26.8b, v27.8b}, [x2], #16 //// store 1/2,1,2 grif values + st1 {v28.h}[0], [x2], x19 //// store 1/2,1,2 grif values + + sqrshrun v5.8b, v20.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0) + ext v31.8b, v21.8b , v22.8b , #2 + sqrshrun v6.8b, v22.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0) + ext v28.8b, v20.8b , v21.8b , #2 + + saddl v26.4s, v31.4h, v20.4h //// a0 + a5 (set1) + ext v31.8b, v22.8b , v23.8b , #2 + smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set1) + smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set1) + smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set1) + smlsl v26.4s, v21.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1) + ext v30.8b, v21.8b , v22.8b , #4 + + sqrshrun v7.8b, v24.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0) + ext v29.8b, v21.8b , v22.8b , #6 + + ext v28.8b, v21.8b , v22.8b , #2 + saddl v20.4s, v31.4h, v21.4h //// a0 + a5 (set2) + smlal v20.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set2) + smlal v20.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set2) + smlsl v20.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set2) + smlsl v20.4s, v22.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2) + ext v31.8b, v23.8b , v24.8b , #2 + + ext v5.8b, v5.8b , v6.8b , #2 + ext v6.8b, v6.8b , v7.8b , #2 + ext v7.8b, v7.8b , v7.8b , #2 + + st1 {v5.8b, v6.8b}, [x1], x12 //// store row1 - 1,1/2 grid + st1 {v7.h}[0], [x11], x12 //// store row1 - 1,1/2 grid + + ext v30.8b, v22.8b , v23.8b , #4 + ext v29.8b, v22.8b , v23.8b , #6 + + saddl v6.4s, v31.4h, v22.4h //// a0 + a5 (set3) + ext v28.8b, v22.8b , v23.8b , #2 + smlal v6.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set3) + smlal v6.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set3) + smlsl v6.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set3) + smlsl v6.4s, v23.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3) + ext v31.8b, v24.8b , v25.8b , #2 + + shrn v21.4h, v20.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set2) + ext v30.8b, v23.8b , v24.8b , #4 + shrn v20.4h, v26.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set1) + ext v29.8b, v23.8b , v24.8b , #6 + + saddl v26.4s, v31.4h, v23.4h //// a0 + a5 (set4) + ext v28.8b, v23.8b , v24.8b , #2 + ext v31.8b, v25.8b , v25.8b , #2 + smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set4) + smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set4) + smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set4) + smlsl v26.4s, v24.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4) + ext v30.8b, v24.8b , v25.8b , #4 + + saddl v22.4s, v31.4h, v24.4h //// a0 + a5 (set5) + ext v29.8b, v24.8b , v25.8b , #6 + + ext v31.8b, v24.8b , v25.8b , #2 + shrn v28.4h, v6.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set3) + + ld1 {v5.8b, v6.8b, v7.8b}, [x0], x3 //// Load next Row data + smlal v22.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set5) + smlal v22.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set5) + smlsl v22.4s, v31.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set5) + smlsl v22.4s, v25.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5) + shrn v29.4h, v26.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set4) + mov v20.d[1], v21.d[0] + sqrshrun v26.8b, v20.8h, #2 //// half,half gird set1,2 + + + ////VQRSHRUN.s16 D27,Q14,#2 ;// half,half gird set3,4 + ////VSHRN.s32 D28,Q11,#8 ;// shift by 8 and later we will shift by 2 more with rounding (set5) + + ////VQRSHRUN.s16 D28,Q14,#2 ;// half,half gird set5 + + ////VST1.8 {D26,D27,D28},[x2],x14 ;// store 1/2,1,2 grif values + //// ////////////// ROW 3 /////////////////////// + +//// Process first vertical interpolated row +//// each column is + uaddl v20.8h, v8.8b, v5.8b //// a0 + a5 (column1,row0) + movi v31.8b, #5 + umlal v20.8h, v14.8b, v1.8b //// a0 + a5 + 20a2 (column1,row0) + umlal v20.8h, v17.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row0) + umlsl v20.8h, v11.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0) + umlsl v20.8h, v2.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0) + mov v21.d[0], v20.d[1] + + mov v28.d[1], v29.d[0] + sqrshrun v27.8b, v28.8h, #2 //// half,half gird set3,4 + shrn v28.4h, v22.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set5) + + uaddl v22.8h, v9.8b, v6.8b //// a0 + a5 (column2,row0) + umlal v22.8h, v15.8b, v1.8b //// a0 + a5 + 20a2 (column2,row0) + umlal v22.8h, v18.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row0) + umlsl v22.8h, v12.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0) + umlsl v22.8h, v3.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0) + mov v23.d[0], v22.d[1] + + sqrshrun v28.8b, v28.8h, #2 //// half,half gird set5 + ext v30.8b, v20.8b , v21.8b , #4 + + uaddl v24.8h, v10.8b, v7.8b //// a0 + a5 (column3,row0) + ext v29.8b, v20.8b , v21.8b , #6 + umlal v24.8h, v16.8b, v1.8b //// a0 + a5 + 20a2 (column3,row0) + umlal v24.8h, v19.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column3,row0) + umlsl v24.8h, v13.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row0) + umlsl v24.8h, v4.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row0) + mov v25.d[0], v24.d[1] + + st1 {v26.8b, v27.8b}, [x2], #16 //// store 1/2,1,2 grif values + st1 { v28.h}[0], [x2], x19 //// store 1/2,1,2 grif values + + sqrshrun v8.8b, v20.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0) + ext v31.8b, v21.8b , v22.8b , #2 + sqrshrun v9.8b, v22.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0) + ext v28.8b, v20.8b , v21.8b , #2 + + saddl v26.4s, v31.4h, v20.4h //// a0 + a5 (set1) + ext v31.8b, v22.8b , v23.8b , #2 + smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set1) + smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set1) + smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set1) + smlsl v26.4s, v21.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1) + ext v30.8b, v21.8b , v22.8b , #4 + + sqrshrun v10.8b, v24.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0) + ext v29.8b, v21.8b , v22.8b , #6 + + ext v28.8b, v21.8b , v22.8b , #2 + saddl v20.4s, v31.4h, v21.4h //// a0 + a5 (set2) + smlal v20.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set2) + smlal v20.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set2) + smlsl v20.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set2) + smlsl v20.4s, v22.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2) + ext v31.8b, v23.8b , v24.8b , #2 + + ext v8.8b, v8.8b , v9.8b , #2 + ext v9.8b, v9.8b , v10.8b , #2 + ext v10.8b, v10.8b , v10.8b , #2 + + st1 {v8.8b, v9.8b}, [x1], x12 //// store row1 - 1,1/2 grid + st1 {v10.h}[0], [x11], x12 //// store row1 - 1,1/2 grid + + ext v30.8b, v22.8b , v23.8b , #4 + ext v29.8b, v22.8b , v23.8b , #6 + + saddl v8.4s, v31.4h, v22.4h //// a0 + a5 (set3) + ext v28.8b, v22.8b , v23.8b , #2 + smlal v8.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set3) + smlal v8.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set3) + smlsl v8.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set3) + smlsl v8.4s, v23.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3) + ext v31.8b, v24.8b , v25.8b , #2 + + shrn v21.4h, v20.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set2) + ext v30.8b, v23.8b , v24.8b , #4 + shrn v20.4h, v26.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set1) + ext v29.8b, v23.8b , v24.8b , #6 + + saddl v26.4s, v31.4h, v23.4h //// a0 + a5 (set4) + ext v28.8b, v23.8b , v24.8b , #2 + ext v31.8b, v25.8b , v25.8b , #2 + smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set4) + smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set4) + smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set4) + smlsl v26.4s, v24.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4) + ext v30.8b, v24.8b , v25.8b , #4 + + saddl v22.4s, v31.4h, v24.4h //// a0 + a5 (set5) + ext v29.8b, v24.8b , v25.8b , #6 + + ext v31.8b, v24.8b , v25.8b , #2 + shrn v28.4h, v8.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set3) + + ld1 {v8.8b, v9.8b, v10.8b}, [x0], x3 //// Load next Row data + smlal v22.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set5) + smlal v22.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set5) + smlsl v22.4s, v31.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set5) + smlsl v22.4s, v25.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5) + shrn v29.4h, v26.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set4) + mov v20.d[1], v21.d[0] + sqrshrun v26.8b, v20.8h, #2 //// half,half gird set1,2 + + + ////VQRSHRUN.s16 D27,Q14,#2 ;// half,half gird set3,4 + ////VSHRN.s32 D28,Q11,#8 ;// shift by 8 and later we will shift by 2 more with rounding (set5) + + ////VQRSHRUN.s16 D28,Q14,#2 ;// half,half gird set5 + + ////VST1.8 {D26,D27,D28},[x2],x14 ;// store 1/2,1,2 grif values + //// ////////////// ROW 4 /////////////////////// + +//// Process first vertical interpolated row +//// each column is + uaddl v20.8h, v11.8b, v8.8b //// a0 + a5 (column1,row0) + movi v31.8b, #5 + umlal v20.8h, v17.8b, v1.8b //// a0 + a5 + 20a2 (column1,row0) + umlal v20.8h, v2.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row0) + umlsl v20.8h, v14.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0) + umlsl v20.8h, v5.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0) + mov v21.d[0], v20.d[1] + mov v28.d[1], v29.d[0] + sqrshrun v27.8b, v28.8h, #2 //// half,half gird set3,4 + shrn v28.4h, v22.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set5) + + uaddl v22.8h, v12.8b, v9.8b //// a0 + a5 (column2,row0) + umlal v22.8h, v18.8b, v1.8b //// a0 + a5 + 20a2 (column2,row0) + umlal v22.8h, v3.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row0) + umlsl v22.8h, v15.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0) + umlsl v22.8h, v6.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0) + mov v23.d[0], v22.d[1] + + sqrshrun v28.8b, v28.8h, #2 //// half,half gird set5 + ext v30.8b, v20.8b , v21.8b , #4 + + uaddl v24.8h, v13.8b, v10.8b //// a0 + a5 (column3,row0) + ext v29.8b, v20.8b , v21.8b , #6 + umlal v24.8h, v19.8b, v1.8b //// a0 + a5 + 20a2 (column3,row0) + umlal v24.8h, v4.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column3,row0) + umlsl v24.8h, v16.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row0) + umlsl v24.8h, v7.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row0) + mov v25.d[0], v24.d[1] + + st1 {v26.8b, v27.8b}, [x2], #16 //// store 1/2,1,2 grif values + st1 {v28.h}[0], [x2], x19 //// store 1/2,1,2 grif values + + sqrshrun v11.8b, v20.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0) + ext v31.8b, v21.8b , v22.8b , #2 + sqrshrun v12.8b, v22.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0) + ext v28.8b, v20.8b , v21.8b , #2 + + saddl v26.4s, v31.4h, v20.4h //// a0 + a5 (set1) + ext v31.8b, v22.8b , v23.8b , #2 + smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set1) + smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set1) + smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set1) + smlsl v26.4s, v21.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1) + ext v30.8b, v21.8b , v22.8b , #4 + + sqrshrun v13.8b, v24.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0) + ext v29.8b, v21.8b , v22.8b , #6 + + ext v28.8b, v21.8b , v22.8b , #2 + saddl v20.4s, v31.4h, v21.4h //// a0 + a5 (set2) + smlal v20.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set2) + smlal v20.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set2) + smlsl v20.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set2) + smlsl v20.4s, v22.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2) + ext v31.8b, v23.8b , v24.8b , #2 + + ext v11.8b, v11.8b , v12.8b , #2 + ext v12.8b, v12.8b , v13.8b , #2 + ext v13.8b, v13.8b , v13.8b , #2 + + st1 {v11.8b, v12.8b}, [x1], x12 //// store row1 - 1,1/2 grid + st1 {v13.h}[0], [x11], x12 //// store row1 - 1,1/2 grid + + ext v30.8b, v22.8b , v23.8b , #4 + ext v29.8b, v22.8b , v23.8b , #6 + + saddl v12.4s, v31.4h, v22.4h //// a0 + a5 (set3) + ext v28.8b, v22.8b , v23.8b , #2 + smlal v12.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set3) + smlal v12.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set3) + smlsl v12.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set3) + smlsl v12.4s, v23.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3) + ext v31.8b, v24.8b , v25.8b , #2 + + shrn v21.4h, v20.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set2) + ext v30.8b, v23.8b , v24.8b , #4 + shrn v20.4h, v26.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set1) + ext v29.8b, v23.8b , v24.8b , #6 + + saddl v26.4s, v31.4h, v23.4h //// a0 + a5 (set4) + ext v28.8b, v23.8b , v24.8b , #2 + ext v31.8b, v25.8b , v25.8b , #2 + smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set4) + smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set4) + smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set4) + smlsl v26.4s, v24.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4) + ext v30.8b, v24.8b , v25.8b , #4 + + saddl v22.4s, v31.4h, v24.4h //// a0 + a5 (set5) + ext v29.8b, v24.8b , v25.8b , #6 + + ext v31.8b, v24.8b , v25.8b , #2 + shrn v28.4h, v12.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set3) + + ld1 {v11.8b, v12.8b, v13.8b}, [x0], x3 //// Load next Row data + smlal v22.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set5) + smlal v22.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set5) + smlsl v22.4s, v31.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set5) + smlsl v22.4s, v25.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5) + shrn v29.4h, v26.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set4) + mov v20.d[1], v21.d[0] + sqrshrun v26.8b, v20.8h, #2 //// half,half gird set1,2 + + + ////VQRSHRUN.s16 D27,Q14,#2 ;// half,half gird set3,4 + ////VSHRN.s32 D28,Q11,#8 ;// shift by 8 and later we will shift by 2 more with rounding (set5) + + ////VQRSHRUN.s16 D28,Q14,#2 ;// half,half gird set5 + + ////VST1.8 {D26,D27,D28},[x2],x14 ;// store 1/2,1,2 grif values + //// ////////////// ROW 5 /////////////////////// + +//// Process first vertical interpolated row +//// each column is + uaddl v20.8h, v14.8b, v11.8b //// a0 + a5 (column1,row0) + movi v31.8b, #5 + umlal v20.8h, v2.8b, v1.8b //// a0 + a5 + 20a2 (column1,row0) + umlal v20.8h, v5.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row0) + umlsl v20.8h, v17.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0) + umlsl v20.8h, v8.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0) + mov v21.d[0], v20.d[1] + mov v28.d[1], v29.d[0] + sqrshrun v27.8b, v28.8h, #2 //// half,half gird set3,4 + shrn v28.4h, v22.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set5) + + uaddl v22.8h, v15.8b, v12.8b //// a0 + a5 (column2,row0) + umlal v22.8h, v3.8b, v1.8b //// a0 + a5 + 20a2 (column2,row0) + umlal v22.8h, v6.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row0) + umlsl v22.8h, v18.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0) + umlsl v22.8h, v9.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0) + mov v23.d[0], v22.d[1] + + sqrshrun v28.8b, v28.8h, #2 //// half,half gird set5 + ext v30.8b, v20.8b , v21.8b , #4 + + uaddl v24.8h, v16.8b, v13.8b //// a0 + a5 (column3,row0) + ext v29.8b, v20.8b , v21.8b , #6 + umlal v24.8h, v4.8b, v1.8b //// a0 + a5 + 20a2 (column3,row0) + umlal v24.8h, v7.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column3,row0) + umlsl v24.8h, v19.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row0) + umlsl v24.8h, v10.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row0) + mov v25.d[0], v24.d[1] + + st1 {v26.8b, v27.8b}, [x2], #16 //// store 1/2,1,2 grif values + st1 {v28.h}[0], [x2], x19 //// store 1/2,1,2 grif values + + sqrshrun v14.8b, v20.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0) + ext v31.8b, v21.8b , v22.8b , #2 + sqrshrun v15.8b, v22.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0) + ext v28.8b, v20.8b , v21.8b , #2 + + saddl v26.4s, v31.4h, v20.4h //// a0 + a5 (set1) + ext v31.8b, v22.8b , v23.8b , #2 + smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set1) + smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set1) + smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set1) + smlsl v26.4s, v21.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1) + ext v30.8b, v21.8b , v22.8b , #4 + + sqrshrun v16.8b, v24.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0) + ext v29.8b, v21.8b , v22.8b , #6 + + ext v28.8b, v21.8b , v22.8b , #2 + saddl v20.4s, v31.4h, v21.4h //// a0 + a5 (set2) + smlal v20.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set2) + smlal v20.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set2) + smlsl v20.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set2) + smlsl v20.4s, v22.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2) + ext v31.8b, v23.8b , v24.8b , #2 + + ext v14.8b, v14.8b , v15.8b , #2 + ext v15.8b, v15.8b , v16.8b , #2 + ext v16.8b, v16.8b , v16.8b , #2 + + st1 {v14.8b, v15.8b}, [x1], x12 //// store row1 - 1,1/2 grid + st1 {v16.h}[0], [x11], x12 //// store row1 - 1,1/2 grid + + ext v30.8b, v22.8b , v23.8b , #4 + ext v29.8b, v22.8b , v23.8b , #6 + + saddl v14.4s, v31.4h, v22.4h //// a0 + a5 (set3) + ext v28.8b, v22.8b , v23.8b , #2 + smlal v14.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set3) + smlal v14.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set3) + smlsl v14.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set3) + smlsl v14.4s, v23.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3) + ext v31.8b, v24.8b , v25.8b , #2 + + shrn v21.4h, v20.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set2) + ext v30.8b, v23.8b , v24.8b , #4 + shrn v20.4h, v26.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set1) + ext v29.8b, v23.8b , v24.8b , #6 + + saddl v26.4s, v31.4h, v23.4h //// a0 + a5 (set4) + ext v28.8b, v23.8b , v24.8b , #2 + ext v31.8b, v25.8b , v25.8b , #2 + smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set4) + smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set4) + smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set4) + smlsl v26.4s, v24.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4) + ext v30.8b, v24.8b , v25.8b , #4 + + saddl v22.4s, v31.4h, v24.4h //// a0 + a5 (set5) + ext v29.8b, v24.8b , v25.8b , #6 + + ext v31.8b, v24.8b , v25.8b , #2 + shrn v28.4h, v14.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set3) + + ld1 {v14.8b, v15.8b, v16.8b}, [x0], x3 //// Load next Row data + smlal v22.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set5) + smlal v22.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set5) + smlsl v22.4s, v31.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set5) + smlsl v22.4s, v25.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5) + shrn v29.4h, v26.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set4) + mov v20.d[1], v21.d[0] + sqrshrun v26.8b, v20.8h, #2 //// half,half gird set1,2 + + + ////VQRSHRUN.s16 D27,Q14,#2 ;// half,half gird set3,4 + ////VSHRN.s32 D28,Q11,#8 ;// shift by 8 and later we will shift by 2 more with rounding (set5) + + ////VQRSHRUN.s16 D28,Q14,#2 ;// half,half gird set5 + + ////VST1.8 {D26,D27,D28},[x2],x14 ;// store 1/2,1,2 grif values + //// ////////////// ROW 6 /////////////////////// + +//// Process first vertical interpolated row +//// each column is + + cmp x10, #1 //// if it 17 rows are complete skip + beq filter_2dvh_skip_row + uaddl v20.8h, v17.8b, v14.8b //// a0 + a5 (column1,row0) + movi v31.8b, #5 + umlal v20.8h, v5.8b, v1.8b //// a0 + a5 + 20a2 (column1,row0) + umlal v20.8h, v8.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row0) + umlsl v20.8h, v2.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0) + umlsl v20.8h, v11.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0) + mov v21.d[0], v20.d[1] + mov v28.d[1], v29.d[0] + sqrshrun v27.8b, v28.8h, #2 //// half,half gird set3,4 + shrn v28.4h, v22.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set5) + + uaddl v22.8h, v18.8b, v15.8b //// a0 + a5 (column2,row0) + umlal v22.8h, v6.8b, v1.8b //// a0 + a5 + 20a2 (column2,row0) + umlal v22.8h, v9.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row0) + umlsl v22.8h, v3.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0) + umlsl v22.8h, v12.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0) + mov v23.d[0], v22.d[1] + + sqrshrun v28.8b, v28.8h, #2 //// half,half gird set5 + ext v30.8b, v20.8b , v21.8b , #4 + + uaddl v24.8h, v19.8b, v16.8b //// a0 + a5 (column3,row0) + ext v29.8b, v20.8b , v21.8b , #6 + umlal v24.8h, v7.8b, v1.8b //// a0 + a5 + 20a2 (column3,row0) + umlal v24.8h, v10.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column3,row0) + umlsl v24.8h, v4.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row0) + umlsl v24.8h, v13.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row0) + mov v25.d[0], v24.d[1] + + st1 {v26.8b, v27.8b}, [x2], #16 //// store 1/2,1,2 grif values + st1 {v28.h}[0], [x2], x19 //// store 1/2,1,2 grif values + + sqrshrun v17.8b, v20.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0) + ext v31.8b, v21.8b , v22.8b , #2 + sqrshrun v18.8b, v22.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0) + ext v28.8b, v20.8b , v21.8b , #2 + + saddl v26.4s, v31.4h, v20.4h //// a0 + a5 (set1) + ext v31.8b, v22.8b , v23.8b , #2 + smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set1) + smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set1) + smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set1) + smlsl v26.4s, v21.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1) + ext v30.8b, v21.8b , v22.8b , #4 + + sqrshrun v19.8b, v24.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0) + ext v29.8b, v21.8b , v22.8b , #6 + + ext v28.8b, v21.8b , v22.8b , #2 + saddl v20.4s, v31.4h, v21.4h //// a0 + a5 (set2) + smlal v20.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set2) + smlal v20.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set2) + smlsl v20.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set2) + smlsl v20.4s, v22.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2) + ext v31.8b, v23.8b , v24.8b , #2 + + ext v17.8b, v17.8b , v18.8b , #2 + ext v18.8b, v18.8b , v19.8b , #2 + ext v19.8b, v19.8b , v19.8b , #2 + + st1 {v17.8b, v18.8b}, [x1], x12 //// store row1 - 1,1/2 grid + st1 {v19.h}[0], [x11], x12 //// store row1 - 1,1/2 grid + + ext v30.8b, v22.8b , v23.8b , #4 + ext v29.8b, v22.8b , v23.8b , #6 + + saddl v18.4s, v31.4h, v22.4h //// a0 + a5 (set3) + ext v28.8b, v22.8b , v23.8b , #2 + smlal v18.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set3) + smlal v18.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set3) + smlsl v18.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set3) + smlsl v18.4s, v23.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3) + ext v31.8b, v24.8b , v25.8b , #2 + + shrn v21.4h, v20.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set2) + ext v30.8b, v23.8b , v24.8b , #4 + shrn v20.4h, v26.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set1) + ext v29.8b, v23.8b , v24.8b , #6 + + saddl v26.4s, v31.4h, v23.4h //// a0 + a5 (set4) + ext v28.8b, v23.8b , v24.8b , #2 + ext v31.8b, v25.8b , v25.8b , #2 + smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set4) + smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set4) + smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set4) + smlsl v26.4s, v24.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4) + ext v30.8b, v24.8b , v25.8b , #4 + + saddl v22.4s, v31.4h, v24.4h //// a0 + a5 (set5) + ext v29.8b, v24.8b , v25.8b , #6 + + ext v31.8b, v24.8b , v25.8b , #2 + shrn v28.4h, v18.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set3) + + ld1 {v17.8b, v18.8b, v19.8b}, [x0], x3 //// Load next Row data + smlal v22.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set5) + smlal v22.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set5) + smlsl v22.4s, v31.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set5) + smlsl v22.4s, v25.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5) + shrn v29.4h, v26.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set4) + mov v20.d[1], v21.d[0] + sqrshrun v26.8b, v20.8h, #2 //// half,half gird set1,2 + + mov v28.d[1], v29.d[0] + sqrshrun v27.8b, v28.8h, #2 //// half,half gird set3,4 + shrn v28.4h, v22.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set5) + + sqrshrun v28.8b, v28.8h, #2 //// half,half gird set5 + + st1 {v26.8b, v27.8b}, [x2], #16 //// store 1/2,1,2 grif values + st1 {v28.h}[0], [x2], x19 //// store 1/2,1,2 grif values + + subs x10, x10, #1 ////decrement loop counter + + bne filter_2dvh_loop + + +//// Process first vertical interpolated row +//// each column is + //// ////////////// ROW 13 /////////////////////// + +//// Process first vertical interpolated row +//// each column is + + // LDMFD sp!,{x10,x11,x12,pc} + ldp x19, x20, [sp], #16 + pop_v_regs + ret + +filter_2dvh_skip_row: + mov v28.d[1], v29.d[0] + sqrshrun v27.8b, v28.8h, #2 //// half,half gird set3,4 + shrn v28.4h, v22.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set5) + + sqrshrun v28.8b, v28.8h, #2 //// half,half gird set5 + + st1 {v26.8b, v27.8b}, [x2], #16 //// store 1/2,1,2 grif values + st1 {v28.h}[0], [x2], x19 //// store 1/2,1,2 grif values + // LDMFD sp!,{x10,x11,x12,pc} + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + +///***************************************** + + + + + + + .section .note.gnu-stack,"",%progbits diff --git a/encoder/armv8/ih264e_platform_macros.h b/encoder/armv8/ih264e_platform_macros.h new file mode 100755 index 0000000..39cac96 --- /dev/null +++ b/encoder/armv8/ih264e_platform_macros.h @@ -0,0 +1,143 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ih264e_platform_macros.h +* +* @brief +* Contains platform specific routines used for codec context intialization +* +* @author +* ittiam +* +* @remarks +* none +* +******************************************************************************* +*/ + +#ifndef IH264E_PLATFORM_MACROS_H_ +#define IH264E_PLATFORM_MACROS_H_ + +/*****************************************************************************/ +/* Extern Function Declarations */ +/*****************************************************************************/ + +/** +******************************************************************************* +* +* @brief Initialize the intra/inter/transform/deblk function pointers of +* codec context +* +* @par Description: the current routine initializes the function pointers of +* codec context basing on the architecture in use +* +* @param[in] ps_codec +* Codec context pointer +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void ih264e_init_function_ptr_neon_a9q(codec_t *ps_codec); + +/** +******************************************************************************* +* +* @brief Initialize the intra/inter/transform/deblk function pointers of +* codec context +* +* @par Description: the current routine initializes the function pointers of +* codec context basing on the architecture in use +* +* @param[in] ps_codec +* Codec context pointer +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void ih264e_init_function_ptr_neon_av8(codec_t *ps_codec); + +/** +******************************************************************************* +* +* @brief Initialize the intra/inter/transform/deblk function pointers of +* codec context +* +* @par Description: the current routine initializes the function pointers of +* codec context basing on the architecture in use +* +* @param[in] ps_codec +* Codec context pointer +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void ih264e_init_function_ptr_generic(codec_t *ps_codec); + +/** +******************************************************************************* +* +* @brief Initialize the intra/inter/transform/deblk function pointers of +* codec context +* +* @par Description: the current routine initializes the function pointers of +* codec context basing on the architecture in use +* +* @param[in] ps_codec +* Codec context pointer +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void ih264e_init_function_ptr(void *pv_codec); + +/** +******************************************************************************* +* +* @brief Determine the architecture of the encoder executing environment +* +* @par Description: This routine returns the architecture of the enviro- +* ment in which the current encoder is being tested +* +* @param[in] void +* +* @returns IV_ARCH_T +* architecture +* +* @remarks none +* +******************************************************************************* +*/ +IV_ARCH_T ih264e_default_arch(void); + +#endif /* IH264E_PLATFORM_MACROS_H_ */ diff --git a/encoder/armv8/ime_distortion_metrics_av8.s b/encoder/armv8/ime_distortion_metrics_av8.s new file mode 100755 index 0000000..99ebc8a --- /dev/null +++ b/encoder/armv8/ime_distortion_metrics_av8.s @@ -0,0 +1,978 @@ +//****************************************************************************** +//* +//* Copyright (C) 2015 The Android Open Source Project +//* +//* Licensed under the Apache License, Version 2.0 (the "License"); +//* you may not use this file except in compliance with the License. +//* You may obtain a copy of the License at: +//* +//* http://www.apache.org/licenses/LICENSE-2.0 +//* +//* Unless required by applicable law or agreed to in writing, software +//* distributed under the License is distributed on an "AS IS" BASIS, +//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//* See the License for the specific language governing permissions and +//* limitations under the License. +//* +//***************************************************************************** +//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +//*/ +//** + +///** +//****************************************************************************** +//* +//* +//* @brief +//* This file contains definitions of routines that compute distortion +//* between two macro/sub blocks of identical dimensions +//* +//* @author +//* Ittiam +//* +//* @par List of Functions: +//* - ime_compute_sad_16x16() +//* - ime_compute_sad_8x8() +//* - ime_compute_sad_4x4() +//* - ime_compute_sad_16x8() +//* - ime_compute_satqd_16x16_lumainter_av8() +//* +//* @remarks +//* None +//* +//******************************************************************************* +// + + +///** +//****************************************************************************** +//* +//* @brief computes distortion (SAD) between 2 16x16 blocks (fast mode) +//* +//* @par Description +//* This functions computes SAD between 2 16x16 blocks. There is a provision +//* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To +//* compute the distortion of the entire block set u4_max_sad to USHRT_MAX. +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[out] pu1_dst +//* UWORD8 pointer to the destination +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride +//* +//* @param[in] i4_max_sad +//* integer maximum allowed distortion +//* +//* @param[in] pi4_mb_distortion +//* integer evaluated sad +//* +//* @remarks +//* +//****************************************************************************** +//*/ +.text +.p2align 2 + +.macro push_v_regs + stp d8, d9, [sp, #-16]! + stp d10, d11, [sp, #-16]! + stp d12, d13, [sp, #-16]! + stp d14, d15, [sp, #-16]! +.endm +.macro pop_v_regs + ldp d14, d15, [sp], #16 + ldp d12, d13, [sp], #16 + ldp d10, d11, [sp], #16 + ldp d8, d9, [sp], #16 +.endm + + .global ime_compute_sad_16x16_fast_av8 +ime_compute_sad_16x16_fast_av8: + push_v_regs + lsl x2, x2, #1 + lsl x3, x3, #1 + + mov x6, #2 + movi v30.8h, #0 + +core_loop_ime_compute_sad_16x16_fast_av8: + + ld1 {v0.16b}, [x0], x2 + ld1 {v1.16b}, [x1], x3 + ld1 {v2.16b}, [x0], x2 + ld1 {v3.16b}, [x1], x3 + + uabal v30.8h, v0.8b, v1.8b + uabal2 v30.8h, v0.16b, v1.16b + + uabal v30.8h, v2.8b, v3.8b + uabal2 v30.8h, v2.16b, v3.16b + + ld1 {v4.16b}, [x0], x2 + ld1 {v5.16b}, [x1], x3 + ld1 {v6.16b}, [x0], x2 + ld1 {v7.16b}, [x1], x3 + + uabal v30.8h, v4.8b, v5.8b + uabal2 v30.8h, v4.16b, v5.16b + + uabal v30.8h, v6.8b, v7.8b + uabal2 v30.8h, v6.16b, v7.16b + + subs x6, x6, #1 + bne core_loop_ime_compute_sad_16x16_fast_av8 + + + addp v30.8h, v30.8h, v30.8h + uaddlp v30.4s, v30.8h + addp v30.2s, v30.2s, v30.2s + shl v30.2s, v30.2s, #1 + + st1 {v30.s}[0], [x5] + pop_v_regs + ret + + +///** +//****************************************************************************** +//* +//* @brief computes distortion (SAD) between 2 16x8 blocks +//* +//* +//* @par Description +//* This functions computes SAD between 2 16x8 blocks. There is a provision +//* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To +//* compute the distortion of the entire block set u4_max_sad to USHRT_MAX. +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[out] pu1_dst +//* UWORD8 pointer to the destination +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride +//* +//* @param[in] u4_max_sad +//* integer maximum allowed distortion +//* +//* @param[in] pi4_mb_distortion +//* integer evaluated sad +//* +//* @remarks +//* +//****************************************************************************** +//*/ +// + .global ime_compute_sad_16x8_av8 +ime_compute_sad_16x8_av8: + + //chheck what stride incremtn to use + //earlier code did not have this lsl + push_v_regs + mov x6, #2 + movi v30.8h, #0 + +core_loop_ime_compute_sad_16x8_av8: + + ld1 {v0.16b}, [x0], x2 + ld1 {v1.16b}, [x1], x3 + ld1 {v2.16b}, [x0], x2 + ld1 {v3.16b}, [x1], x3 + + uabal v30.8h, v0.8b, v1.8b + uabal2 v30.8h, v0.16b, v1.16b + + uabal v30.8h, v2.8b, v3.8b + uabal2 v30.8h, v2.16b, v3.16b + + ld1 {v4.16b}, [x0], x2 + ld1 {v5.16b}, [x1], x3 + ld1 {v6.16b}, [x0], x2 + ld1 {v7.16b}, [x1], x3 + + uabal v30.8h, v4.8b, v5.8b + uabal2 v30.8h, v4.16b, v5.16b + + uabal v30.8h, v6.8b, v7.8b + uabal2 v30.8h, v6.16b, v7.16b + + subs x6, x6, #1 + bne core_loop_ime_compute_sad_16x8_av8 + + + addp v30.8h, v30.8h, v30.8h + uaddlp v30.4s, v30.8h + addp v30.2s, v30.2s, v30.2s + + st1 {v30.s}[0], [x5] + pop_v_regs + ret + +///** +//****************************************************************************** +//* +//* @brief computes distortion (SAD) between 2 16x16 blocks with early exit +//* +//* @par Description +//* This functions computes SAD between 2 16x16 blocks. There is a provision +//* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To +//* compute the distortion of the entire block set u4_max_sad to USHRT_MAX. +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[out] pu1_dst +//* UWORD8 pointer to the destination +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride +//* +//* @param[in] i4_max_sad +//* integer maximum allowed distortion +//* +//* @param[in] pi4_mb_distortion +//* integer evaluated sad +//* +//* @remarks +//* +//****************************************************************************** +//*/ + + .global ime_compute_sad_16x16_ea8_av8 +ime_compute_sad_16x16_ea8_av8: + + push_v_regs + movi v30.8h, #0 + + add x7, x0, x2 + add x8, x1, x3 + + lsl x2, x2, #1 + lsl x3, x3, #1 + + ld1 {v0.16b}, [x0], x2 + ld1 {v1.16b}, [x1], x3 + ld1 {v2.16b}, [x0], x2 + ld1 {v3.16b}, [x1], x3 + ld1 {v8.16b}, [x0], x2 + ld1 {v9.16b}, [x1], x3 + ld1 {v10.16b}, [x0], x2 + ld1 {v11.16b}, [x1], x3 + ld1 {v12.16b}, [x0], x2 + ld1 {v13.16b}, [x1], x3 + ld1 {v14.16b}, [x0], x2 + ld1 {v15.16b}, [x1], x3 + ld1 {v16.16b}, [x0], x2 + ld1 {v17.16b}, [x1], x3 + ld1 {v18.16b}, [x0], x2 + ld1 {v19.16b}, [x1], x3 + + uabal v30.8h, v0.8b, v1.8b + uabal2 v30.8h, v0.16b, v1.16b + + uabal v30.8h, v2.8b, v3.8b + uabal2 v30.8h, v2.16b, v3.16b + + uabal v30.8h, v8.8b, v9.8b + uabal2 v30.8h, v8.16b, v9.16b + + uabal v30.8h, v10.8b, v11.8b + uabal2 v30.8h, v10.16b, v11.16b + + uabal v30.8h, v12.8b, v13.8b + uabal2 v30.8h, v12.16b, v13.16b + + uabal v30.8h, v14.8b, v15.8b + uabal2 v30.8h, v14.16b, v15.16b + + uabal v30.8h, v16.8b, v17.8b + uabal2 v30.8h, v16.16b, v17.16b + + uabal v30.8h, v18.8b, v19.8b + uabal2 v30.8h, v18.16b, v19.16b + + addp v31.8h, v30.8h, v30.8h + uaddlp v31.4s, v31.8h + addp v31.2s, v31.2s, v31.2s + mov w6, v31.s[0] + cmp w6, w4 + bgt end_func_16x16 + + //do the stuff again + ld1 {v0.16b}, [x7], x2 + ld1 {v1.16b}, [x8], x3 + ld1 {v2.16b}, [x7], x2 + ld1 {v3.16b}, [x8], x3 + ld1 {v8.16b}, [x7], x2 + ld1 {v9.16b}, [x8], x3 + ld1 {v10.16b}, [x7], x2 + ld1 {v11.16b}, [x8], x3 + ld1 {v12.16b}, [x7], x2 + ld1 {v13.16b}, [x8], x3 + ld1 {v14.16b}, [x7], x2 + ld1 {v15.16b}, [x8], x3 + ld1 {v16.16b}, [x7], x2 + ld1 {v17.16b}, [x8], x3 + ld1 {v18.16b}, [x7], x2 + ld1 {v19.16b}, [x8], x3 + + uabal v30.8h, v0.8b, v1.8b + uabal2 v30.8h, v0.16b, v1.16b + + uabal v30.8h, v2.8b, v3.8b + uabal2 v30.8h, v2.16b, v3.16b + + uabal v30.8h, v8.8b, v9.8b + uabal2 v30.8h, v8.16b, v9.16b + + uabal v30.8h, v10.8b, v11.8b + uabal2 v30.8h, v10.16b, v11.16b + + uabal v30.8h, v12.8b, v13.8b + uabal2 v30.8h, v12.16b, v13.16b + + uabal v30.8h, v14.8b, v15.8b + uabal2 v30.8h, v14.16b, v15.16b + + uabal v30.8h, v16.8b, v17.8b + uabal2 v30.8h, v16.16b, v17.16b + + uabal v30.8h, v18.8b, v19.8b + uabal2 v30.8h, v18.16b, v19.16b + + addp v31.8h, v30.8h, v30.8h + uaddlp v31.4s, v31.8h + addp v31.2s, v31.2s, v31.2s + +end_func_16x16: + st1 {v31.s}[0], [x5] + pop_v_regs + ret + + +///* +////--------------------------------------------------------------------------- +//// Function Name : ime_calculate_sad2_prog_av8() +//// +//// Detail Description : This function find the sad values of 4 Progressive MBs +//// at one shot +//// +//// Platform : CortexAv8/NEON . +//// +////----------------------------------------------------------------------------- +//*/ + + .global ime_calculate_sad2_prog_av8 +ime_calculate_sad2_prog_av8: + + // x0 = ref1 <UWORD8 *> + // x1 = ref2 <UWORD8 *> + // x2 = src <UWORD8 *> + // x3 = RefBufferWidth <UWORD32> + // stack = CurBufferWidth <UWORD32>, psad <UWORD32 *> + push_v_regs + mov x6, #8 + movi v30.8h, #0 + movi v31.8h, #0 + +core_loop_ime_calculate_sad2_prog_av8: + + ld1 {v0.16b}, [x0], x3 + ld1 {v1.16b}, [x1], x3 + ld1 {v2.16b}, [x3], x4 + + ld1 {v3.16b}, [x0], x3 + ld1 {v4.16b}, [x1], x3 + ld1 {v5.16b}, [x3], x4 + + + uabal v30.8h, v0.8b, v2.8b + uabal2 v30.8h, v0.16b, v2.16b + uabal v31.8h, v1.8b, v2.8b + uabal2 v31.8h, v1.16b, v2.16b + + uabal v30.8h, v3.8b, v5.8b + uabal2 v30.8h, v3.16b, v5.16b + uabal v31.8h, v4.8b, v5.8b + uabal2 v31.8h, v4.16b, v5.16b + + + ld1 {v6.16b}, [x0], x3 + ld1 {v7.16b}, [x1], x3 + ld1 {v8.16b}, [x3], x4 + + ld1 {v9.16b}, [x0], x3 + ld1 {v10.16b}, [x1], x3 + ld1 {v11.16b}, [x3], x4 + + uabal v30.8h, v6.8b, v8.8b + uabal2 v30.8h, v6.16b, v8.16b + uabal v31.8h, v7.8b, v8.8b + uabal2 v31.8h, v7.16b, v8.16b + + uabal v30.8h, v9.8b, v11.8b + uabal2 v30.8h, v9.16b, v11.16b + uabal v31.8h, v10.8b, v11.8b + uabal2 v31.8h, v0.16b, v11.16b + + subs x6, x6, #1 + bne core_loop_ime_calculate_sad2_prog_av8 + + addp v30.8h, v30.8h, v31.8h + uaddlp v30.4s, v30.8h + addp v30.2s, v30.2s, v30.2s + shl v30.2s, v30.2s, #1 + + st1 {v30.2s}, [x5] + pop_v_regs + ret + +///* +////--------------------------------------------------------------------------- +//// Function Name : Calculate_Mad3_prog() +//// +//// Detail Description : This function find the sad values of 4 Progressive MBs +//// at one shot +//// +//// Platform : CortexA8/NEON . +//// +////----------------------------------------------------------------------------- +//*/ + + .global ime_calculate_sad3_prog_av8 +ime_calculate_sad3_prog_av8: + + // x0 = ref1 <UWORD8 *> + // x1 = ref2 <UWORD8 *> + // x2 = ref3 <UWORD8 *> + // x3 = src <UWORD8 *> + // stack = RefBufferWidth <UWORD32>, CurBufferWidth <UWORD32>, psad <UWORD32 *> + + + // x0 = ref1 <UWORD8 *> + // x1 = ref2 <UWORD8 *> + // x2 = src <UWORD8 *> + // x3 = RefBufferWidth <UWORD32> + // stack = CurBufferWidth <UWORD32>, psad <UWORD32 *> + push_v_regs + mov x6, #16 + movi v29.8h, #0 + movi v30.8h, #0 + movi v31.8h, #0 + +core_loop_ime_calculate_sad3_prog_av8: + + ld1 {v0.16b}, [x0], x4 + ld1 {v1.16b}, [x1], x4 + ld1 {v2.16b}, [x2], x4 + ld1 {v3.16b}, [x3], x5 + + uabal v29.8h, v0.8b, v3.8b + uabal2 v29.8h, v0.16b, v3.16b + uabal v30.8h, v1.8b, v3.8b + uabal2 v30.8h, v1.16b, v3.16b + uabal v31.8h, v2.8b, v3.8b + uabal2 v31.8h, v2.16b, v3.16b + + ld1 {v4.16b}, [x0], x4 + ld1 {v5.16b}, [x1], x4 + ld1 {v6.16b}, [x2], x4 + ld1 {v7.16b}, [x3], x5 + + uabal v29.8h, v4.8b, v7.8b + uabal2 v29.8h, v4.16b, v7.16b + uabal v30.8h, v5.8b, v7.8b + uabal2 v30.8h, v5.16b, v7.16b + uabal v31.8h, v6.8b, v7.8b + uabal2 v31.8h, v6.16b, v7.16b + + subs x6, x6, #1 + bne core_loop_ime_calculate_sad2_prog_av8 + + addp v30.8h, v30.8h, v31.8h + uaddlp v30.4s, v30.8h + addp v30.2s, v30.2s, v30.2s + shl v30.2s, v30.2s, #1 + + st1 {v30.2s}, [x5] + pop_v_regs + ret + + + + +///** +//****************************************************************************** +//* +//* @brief computes distortion (SAD) for sub-pel motion estimation +//* +//* @par Description +//* This functions computes SAD for all the 8 half pel points +//* +//* @param[out] pi4_sad +//* integer evaluated sad +//* pi4_sad[0] - half x +//* pi4_sad[1] - half x - 1 +//* pi4_sad[2] - half y +//* pi4_sad[3] - half y - 1 +//* pi4_sad[4] - half xy +//* pi4_sad[5] - half xy - 1 +//* pi4_sad[6] - half xy - strd +//* pi4_sad[7] - half xy - 1 - strd +//* +//* @remarks +//* +//****************************************************************************** +//*/ + +.text +.p2align 2 + + .global ime_sub_pel_compute_sad_16x16_av8 +ime_sub_pel_compute_sad_16x16_av8: + push_v_regs + sub x7, x1, #1 //x left + sub x8, x2, x5 //y top + sub x9, x3, #1 //xy left + sub x10, x3, x5 //xy top + sub x11, x10, #1 //xy top left + + movi v24.8h, #0 + movi v25.8h, #0 + movi v26.8h, #0 + movi v27.8h, #0 + movi v28.8h, #0 + movi v29.8h, #0 + movi v30.8h, #0 + movi v31.8h, #0 + + mov x12, #16 +core_loop_ime_sub_pel_compute_sad_16x16_av8: + + ld1 {v0.16b}, [x0], x4 //src + ld1 {v1.16b}, [x1], x5 //x + ld1 {v2.16b}, [x7], x5 //x left + ld1 {v3.16b}, [x2], x5 //y + ld1 {v9.16b}, [x8], x5 //y top + ld1 {v10.16b}, [x3], x5 //xy + ld1 {v11.16b}, [x9], x5 //xy left + ld1 {v12.16b}, [x10], x5 //xy top + ld1 {v13.16b}, [x11], x5 //xy top left + + uabal v24.8h, v0.8b, v1.8b + uabal2 v24.8h, v0.16b, v1.16b + uabal v25.8h, v0.8b, v2.8b + uabal2 v25.8h, v0.16b, v2.16b + uabal v26.8h, v0.8b, v3.8b + uabal2 v26.8h, v0.16b, v3.16b + uabal v27.8h, v0.8b, v9.8b + uabal2 v27.8h, v0.16b, v9.16b + uabal v28.8h, v0.8b, v10.8b + uabal2 v28.8h, v0.16b, v10.16b + uabal v29.8h, v0.8b, v11.8b + uabal2 v29.8h, v0.16b, v11.16b + uabal v30.8h, v0.8b, v12.8b + uabal2 v30.8h, v0.16b, v12.16b + uabal v31.8h, v0.8b, v13.8b + uabal2 v31.8h, v0.16b, v13.16b + + subs x12, x12, #1 + bne core_loop_ime_sub_pel_compute_sad_16x16_av8 + + addp v24.8h, v24.8h, v25.8h + addp v26.8h, v26.8h, v27.8h + addp v28.8h, v28.8h, v29.8h + addp v30.8h, v30.8h, v31.8h + + uaddlp v24.4s, v24.8h + uaddlp v26.4s, v26.8h + uaddlp v28.4s, v28.8h + uaddlp v30.4s, v30.8h + + addp v24.4s, v24.4s, v26.4s + addp v25.4s, v28.4s, v30.4s + + st1 {v24.4s-v25.4s}, [x6] + + + pop_v_regs + ret + + +///** +//****************************************************************************** +//* +//* @brief computes distortion (SAD) between 2 16x16 blocks +//* +//* @par Description +//* This functions computes SAD between 2 16x16 blocks. There is a provision +//* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To +//* compute the distortion of the entire block set u4_max_sad to USHRT_MAX. +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[out] pu1_dst +//* UWORD8 pointer to the destination +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride +//* +//* @param[in] i4_max_sad +//* integer maximum allowed distortion +//* +//* @param[in] pi4_mb_distortion +//* integer evaluated sad +//* +//* @remarks +//* +//****************************************************************************** +//*/ + .global ime_compute_sad_16x16_av8 +ime_compute_sad_16x16_av8: + push_v_regs + mov x6, #4 + movi v30.8h, #0 + +core_loop_ime_compute_sad_16x16_av8: + + ld1 {v0.16b}, [x0], x2 + ld1 {v1.16b}, [x1], x3 + ld1 {v2.16b}, [x0], x2 + ld1 {v3.16b}, [x1], x3 + + uabal v30.8h, v0.8b, v1.8b + uabal2 v30.8h, v0.16b, v1.16b + + uabal v30.8h, v2.8b, v3.8b + uabal2 v30.8h, v2.16b, v3.16b + + ld1 {v4.16b}, [x0], x2 + ld1 {v5.16b}, [x1], x3 + ld1 {v6.16b}, [x0], x2 + ld1 {v7.16b}, [x1], x3 + + uabal v30.8h, v4.8b, v5.8b + uabal2 v30.8h, v4.16b, v5.16b + + uabal v30.8h, v6.8b, v7.8b + uabal2 v30.8h, v6.16b, v7.16b + + subs x6, x6, #1 + bne core_loop_ime_compute_sad_16x16_av8 + + + addp v30.8h, v30.8h, v30.8h + uaddlp v30.4s, v30.8h + addp v30.2s, v30.2s, v30.2s + + st1 {v30.s}[0], [x5] + pop_v_regs + ret + + +///* +////--------------------------------------------------------------------------- +//// Function Name : Calculate_Mad4_prog() +//// +//// Detail Description : This function find the sad values of 4 Progressive MBs +//// at one shot +//// +//// Platform : CortexA8/NEON . +//// +////----------------------------------------------------------------------------- +//*/ + + .global ime_calculate_sad4_prog_av8 +ime_calculate_sad4_prog_av8: + push_v_regs + sub x5, x0, #1 //left + add x6, x0, #1 //right + sub x7, x0, x2 //top + add x8, x0, x2 //bottom + + movi v28.8h, #0 + movi v29.8h, #0 + movi v30.8h, #0 + movi v31.8h, #0 + + mov x9, #16 +core_loop_ime_calculate_sad4_prog_av8: + + ld1 {v0.16b}, [x1], x3 + ld1 {v1.16b}, [x5], x2 + ld1 {v2.16b}, [x6], x2 + ld1 {v3.16b}, [x7], x2 + ld1 {v9.16b}, [x8], x2 + + uabal v28.8h, v0.8b, v1.8b + uabal2 v28.8h, v0.16b, v1.16b + uabal v29.8h, v0.8b, v2.8b + uabal2 v29.8h, v0.16b, v2.16b + uabal v30.8h, v0.8b, v3.8b + uabal2 v30.8h, v0.16b, v3.16b + uabal v31.8h, v0.8b, v9.8b + uabal2 v31.8h, v0.16b, v9.16b + + subs x9, x9, #1 + bne core_loop_ime_calculate_sad4_prog_av8 + + addp v28.8h, v28.8h, v29.8h + addp v30.8h, v30.8h, v31.8h + + uaddlp v28.4s, v28.8h + uaddlp v30.4s, v30.8h + + addp v28.4s, v28.4s, v30.4s + st1 {v28.4s}, [x4] + pop_v_regs + ret + + + +//***************************************************************************** +//* +//* Function Name : ime_compute_satqd_16x16_lumainter_av8 +//* Description : This fucntion computes SAD for a 16x16 block. +// : It also computes if any 4x4 block will have a nonzero coefficent after transform and quant +// +// Arguments : x0 :pointer to src buffer +// x1 :pointer to est buffer +// x2 :source stride +// x3 :est stride +// STACk :Threshold,distotion,is_nonzero +//* +//* Values Returned : NONE +//* +//* Register Usage : x0-x11 +//* Stack Usage : +//* Cycles : Around +//* Interruptiaility : Interruptable +//* +//* Known Limitations +//* \Assumptions : +//* +//* Revision History : +//* DD MM YYYY Author(s) Changes +//* 14 04 2014 Harinarayanan K K First version +//* +//***************************************************************************** + .global ime_compute_satqd_16x16_lumainter_av8 +ime_compute_satqd_16x16_lumainter_av8: + //x0 :pointer to src buffer + //x1 :pointer to est buffer + //x2 :Source stride + //x3 :Pred stride + //x4 :Threshold pointer + //x5 :Distortion,ie SAD + //x6 :is nonzero + //x7 :loop counter + push_v_regs + stp d8, d9, [sp, #-16]! + stp d10, d11, [sp, #-16]! + stp d12, d13, [sp, #-16]! + stp d14, d15, [sp, #-16]! + + ld1 {v30.8h}, [x4] + + dup v20.4h, v30.h[1] //ls1 + dup v24.4h, v30.h[0] //ls2 + dup v21.4h, v30.h[5] //ls3 + dup v25.4h, v30.h[7] //ls4 + dup v22.4h, v30.h[3] //ls5 + dup v26.4h, v30.h[4] //ls6 + dup v23.4h, v30.h[6] //ls7 + dup v27.4h, v30.h[2] //ls8 + + mov v20.d[1], v24.d[0] + mov v21.d[1], v25.d[0] + mov v22.d[1], v26.d[0] + mov v23.d[1], v27.d[0] + + add x4, x4, #16 + ld1 {v29.h}[0], [x4] + dup v29.4h, v29.h[0] + + movi v31.8h, #0 + + mov x7, #4 +core_loop_satqd_ime_compute_satqd_16x16_lumainter: + ld1 {v0.16b}, [x0], x2 + ld1 {v1.16b}, [x1], x3 + ld1 {v2.16b}, [x0], x2 + ld1 {v3.16b}, [x1], x3 + ld1 {v4.16b}, [x0], x2 + ld1 {v5.16b}, [x1], x3 + ld1 {v6.16b}, [x0], x2 + ld1 {v7.16b}, [x1], x3 + + uabdl v10.8h, v0.8b, v1.8b + uabdl2 v15.8h, v0.16b, v1.16b + uabdl v11.8h, v2.8b, v3.8b + uabdl2 v16.8h, v2.16b, v3.16b + uabdl v12.8h, v4.8b, v5.8b + uabdl2 v17.8h, v4.16b, v5.16b + uabdl v13.8h, v6.8b, v7.8b + uabdl2 v18.8h, v6.16b, v7.16b + + add v0.8h, v10.8h, v13.8h + add v1.8h, v11.8h, v12.8h + add v2.8h, v15.8h, v18.8h + add v3.8h, v16.8h, v17.8h + + //v0 : S1 S4 S4 S1 A1 A4 A4 A1 + //v1 : S2 S3 S3 S2 A2 A3 A3 A2 + //v2 : B1 B4 B4 B1 X1 X4 X4 X1 + //v3 : B3 B2 B2 B3 X3 X2 X2 X3 + + trn1 v4.8h, v0.8h, v1.8h + trn2 v5.8h, v0.8h, v1.8h + trn1 v6.8h, v2.8h, v3.8h + trn2 v7.8h, v2.8h, v3.8h + + trn1 v0.4s, v4.4s, v6.4s + trn2 v2.4s, v4.4s, v6.4s + trn1 v1.4s, v5.4s, v7.4s + trn2 v3.4s, v5.4s, v7.4s + + add v4.8h, v0.8h, v3.8h + add v5.8h, v1.8h, v2.8h + //v4 : S1 S2 B1 B2 A1 A2 X1 X2 + //v5 : S4 S3 B4 B3 A4 A3 X4 X3 + + //compute sad for each 4x4 block + add v6.8h, v4.8h, v5.8h + addp v19.8h, v6.8h, v6.8h + //duplicate the sad into 128 bit so that we can compare using 128bit + add v31.4h, v31.4h, v19.4h + + //sad_2 = sad_1<<1; + shl v28.8h, v19.8h, #1 + + //sad_2 - pu2_thrsh + sub v24.8h, v28.8h, v20.8h + sub v25.8h, v28.8h, v21.8h + sub v26.8h, v28.8h, v22.8h + sub v27.8h, v28.8h, v23.8h + + trn1 v0.4s, v4.4s, v5.4s + trn2 v1.4s, v4.4s, v5.4s + //v0 : S1 S2 S4 S3 A1 A2 A4 A3 + //v1 : B1 B2 B4 B3 X1 X2 X4 X3 + + trn1 v4.8h, v0.8h, v1.8h + trn2 v5.8h, v0.8h, v1.8h + //v4 : S1 B1 S4 B4 A1 X1 A4 X4 + //v5 : S2 B2 S3 B3 A2 X2 A3 X3 + + mov v7.s[0], v4.s[1] + mov v7.s[1], v4.s[3] + mov v6.s[0], v5.s[1] // V4 //S1 B1 A1 X1 + mov v6.s[1], v5.s[3] // V5 //S2 B2 A2 X2 + mov v4.s[1], v4.s[2] // V6 //S3 B3 A3 X3 + mov v5.s[1], v5.s[2] // V7 //S4 B4 A4 X4 + + shl v0.4h, v4.4h, #1 //S1<<1 + shl v1.4h, v5.4h, #1 //S2<<1 + shl v2.4h, v6.4h, #1 //S3<<1 + shl v3.4h, v7.4h, #1 //S4<<1 + + add v8.4h, v5.4h, v6.4h //(s2[j] + s3[j])) + add v9.4h, v4.4h, v7.4h //(s1[j] + s4[j])) + add v10.4h, v6.4h, v7.4h //(s3[j] + s4[j])) + sub v11.4h, v6.4h, v0.4h //(s3[j] - (s1[j]<<1)) + sub v12.4h, v7.4h, v1.4h //(s4[j] - (s2[j]<<1)) + add v13.4h, v4.4h, v5.4h //(s1[j] + s2[j])) + sub v14.4h, v5.4h, v3.4h //(s2[j] - (s4[j]<<1))) + sub v15.4h, v4.4h, v2.4h //(s1[j] - (s3[j]<<1))) + + mov v8.d[1], v9.d[0] + mov v10.d[1], v11.d[0] + mov v12.d[1], v13.d[0] + mov v14.d[1], v15.d[0] + + cmge v0.8h, v24.8h, v8.8h //ls1 ls2 + cmge v1.8h, v25.8h, v10.8h //ls3 ls4 + cmge v2.8h, v26.8h, v12.8h //ls5 ls6 + cmge v3.8h, v27.8h, v14.8h //ls7 ls8 + cmge v4.4h, v19.4h, v29.4h //sad + + orr v0.16b, v0.16b, v1.16b + orr v2.16b, v2.16b, v3.16b + orr v2.16b, v0.16b, v2.16b + xtn v2.8b, v2.8h + orr v2.8b, v2.8b, v4.8b + + //if the comparison is non zero, out + mov x4, v2.d[0] + cmp x4, #0 + bne core_loop_compute_sad_pre + + subs x7, x7, #1 + bne core_loop_satqd_ime_compute_satqd_16x16_lumainter + b satdq_end_func + + +core_loop_compute_sad: + ld1 {v0.16b}, [x0], x2 + ld1 {v1.16b}, [x1], x3 + ld1 {v2.16b}, [x0], x2 + ld1 {v3.16b}, [x1], x3 + + uabal v31.8h, v0.8b, v1.8b + uabal2 v31.8h, v0.16b, v1.16b + + uabal v31.8h, v2.8b, v3.8b + uabal2 v31.8h, v2.16b, v3.16b + + ld1 {v4.16b}, [x0], x2 + ld1 {v5.16b}, [x1], x3 + ld1 {v6.16b}, [x0], x2 + ld1 {v7.16b}, [x1], x3 + + uabal v31.8h, v4.8b, v5.8b + uabal2 v31.8h, v4.16b, v5.16b + + uabal v31.8h, v6.8b, v7.8b + uabal2 v31.8h, v6.16b, v7.16b + +core_loop_compute_sad_pre: + subs x7, x7, #1 + bne core_loop_compute_sad + +satdq_end_func: + + mov x7, #1 + cmp x4, #0 + csel x7, x4, x7, eq + str w7, [x6] + + addp v31.8h, v31.8h, v31.8h + uaddlp v31.4s, v31.8h + addp v31.2s, v31.2s, v31.2s + st1 {v31.s}[0], [x5] + + + ldp d14, d15, [sp], #16 + ldp d12, d13, [sp], #16 + ldp d10, d11, [sp], #16 + ldp d8, d9, [sp], #16 + pop_v_regs + ret + .section .note.gnu-stack,"",%progbits diff --git a/encoder/armv8/ime_platform_macros.h b/encoder/armv8/ime_platform_macros.h new file mode 100755 index 0000000..0f5b2f2 --- /dev/null +++ b/encoder/armv8/ime_platform_macros.h @@ -0,0 +1,51 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ime_platform_macros.h +* +* @brief +* Platform specific Macro definitions used in the codec +* +* @author +* Ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef _IME_PLATFORM_MACROS_H_ +#define _IME_PLATFORM_MACROS_H_ + +/*****************************************************************************/ +/* Function macro definitions */ +/*****************************************************************************/ + +#define USADA8(src,est,sad) \ + sad += ABS(src[0]-est[0]) + \ + ABS(src[1]-est[1]) + \ + ABS(src[2]-est[2]) + \ + ABS(src[3]-est[3]) + + +#endif /* _IH264_PLATFORM_MACROS_H_ */ diff --git a/encoder/ih264e.h b/encoder/ih264e.h new file mode 100755 index 0000000..15a9d8f --- /dev/null +++ b/encoder/ih264e.h @@ -0,0 +1,620 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*****************************************************************************/ +/* */ +/* File Name : ih264e.h */ +/* */ +/* Description : This file contains all the necessary structure and */ +/* enumeration definitions needed for the Application */ +/* Program Interface(API) of the Ittiam MPEG4 */ +/* Encoder on Cortex A8 - Neon platform */ +/* */ +/* List of Functions : ih264e_api_function */ +/* */ +/* Issues / Problems : None */ +/* */ +/* Revision History : */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 26 08 2010 100239(RCY) Draft */ +/* */ +/*****************************************************************************/ + +#ifndef _IH264E_H_ +#define _IH264E_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "iv2.h" +#include "ive2.h" +/*****************************************************************************/ +/* Constant Macros */ +/*****************************************************************************/ + + +/*****************************************************************************/ +/* API Function Prototype */ +/*****************************************************************************/ +IV_STATUS_T ih264e_api_function(iv_obj_t *ps_handle, void *pv_api_ip,void *pv_api_op); + +/*****************************************************************************/ +/* Enums */ +/*****************************************************************************/ +typedef enum +{ + IH264E_CMD_CTL_SET_ME_INFO_ENABLE, +}IH264E_CMD_CTL_SUB_CMDS; + + +/*****************************************************************************/ +/* Extended Structures */ +/*****************************************************************************/ + +/*****************************************************************************/ +/* Get Number of Memory Records */ +/*****************************************************************************/ + + +typedef struct +{ + iv_num_mem_rec_ip_t s_ive_ip; +}ih264e_num_mem_rec_ip_t; + + +typedef struct +{ + iv_num_mem_rec_op_t s_ive_op; +}ih264e_num_mem_rec_op_t; + + +/*****************************************************************************/ +/* Fill Memory Records */ +/*****************************************************************************/ + + +typedef struct +{ + iv_fill_mem_rec_ip_t s_ive_ip; +}ih264e_fill_mem_rec_ip_t; + + +typedef struct +{ + iv_fill_mem_rec_op_t s_ive_op; +}ih264e_fill_mem_rec_op_t; + +/*****************************************************************************/ +/* Retrieve Memory Records */ +/*****************************************************************************/ + + +typedef struct +{ + iv_retrieve_mem_rec_ip_t s_ive_ip; +}ih264e_retrieve_mem_rec_ip_t; + + +typedef struct +{ + iv_retrieve_mem_rec_op_t s_ive_op; +}ih264e_retrieve_mem_rec_op_t; + + +/*****************************************************************************/ +/* Initialize encoder */ +/*****************************************************************************/ + +typedef struct +{ + ive_init_ip_t s_ive_ip; +}ih264e_init_ip_t; + + +typedef struct +{ + ive_init_op_t s_ive_op; +}ih264e_init_op_t; + + +/*****************************************************************************/ +/* Queue Input raw buffer - Send the YUV buffer to be encoded */ +/*****************************************************************************/ +typedef struct +{ + ive_queue_inp_ip_t s_ive_ip; +}ih264e_queue_inp_ip_t; + +typedef struct +{ + ive_queue_inp_op_t s_ive_op; +}ih264e_queue_inp_op_t; + +/*****************************************************************************/ +/* Dequeue Input raw buffer - Get free YUV buffer from the encoder */ +/*****************************************************************************/ +typedef struct +{ + ive_dequeue_inp_ip_t s_ive_ip; +}ih264e_dequeue_inp_ip_t; + +typedef struct +{ + ive_dequeue_inp_op_t s_ive_op; +}ih264e_dequeue_inp_op_t; + + +/*****************************************************************************/ +/* Queue Output bitstream buffer - Send the bistream buffer to be filled */ +/*****************************************************************************/ +typedef struct +{ + ive_queue_out_ip_t s_ive_ip; +}ih264e_queue_out_ip_t; + +typedef struct +{ + ive_queue_out_op_t s_ive_op; +}ih264e_queue_out_op_t; + +/*****************************************************************************/ +/* Dequeue Output bitstream buffer - Get the bistream buffer filled */ +/*****************************************************************************/ +typedef struct +{ + ive_dequeue_out_ip_t s_ive_ip; +}ih264e_dequeue_out_ip_t; + +typedef struct +{ + ive_dequeue_out_op_t s_ive_op; +}ih264e_dequeue_out_op_t; + + +/*****************************************************************************/ +/* Get Recon data - Get the reconstructed data from encoder */ +/*****************************************************************************/ +typedef struct +{ + ive_get_recon_ip_t s_ive_ip; +}ih264e_get_recon_ip_t; + +typedef struct +{ + ive_get_recon_op_t s_ive_op; +}ih264e_get_recon_op_t; +/*****************************************************************************/ +/* Video control Flush */ +/*****************************************************************************/ + + +typedef struct +{ + ive_ctl_flush_ip_t s_ive_ip; +}ih264e_ctl_flush_ip_t; + + +typedef struct +{ + ive_ctl_flush_op_t s_ive_op; +}ih264e_ctl_flush_op_t; + +/*****************************************************************************/ +/* Video control reset */ +/*****************************************************************************/ + + +typedef struct +{ + ive_ctl_reset_ip_t s_ive_ip; +}ih264e_ctl_reset_ip_t; + + +typedef struct +{ + ive_ctl_reset_op_t s_ive_op; +}ih264e_ctl_reset_op_t; + + +/*****************************************************************************/ +/* Video control:Get Buf Info */ +/*****************************************************************************/ + + +typedef struct +{ + ive_ctl_getbufinfo_ip_t s_ive_ip; +}ih264e_ctl_getbufinfo_ip_t; + + + +typedef struct +{ + ive_ctl_getbufinfo_op_t s_ive_op; +}ih264e_ctl_getbufinfo_op_t; + + + +/*****************************************************************************/ +/* Video control:Get Version Info */ +/*****************************************************************************/ + + +typedef struct +{ + ive_ctl_getversioninfo_ip_t s_ive_ip; +}ih264e_ctl_getversioninfo_ip_t; + + + +typedef struct +{ + ive_ctl_getversioninfo_op_t s_ive_op; +}ih264e_ctl_getversioninfo_op_t; + +/*****************************************************************************/ +/* Video control:Set default params */ +/*****************************************************************************/ + + +typedef struct +{ + ive_ctl_setdefault_ip_t s_ive_ip; +}ih264e_ctl_setdefault_ip_t; + + + +typedef struct +{ + ive_ctl_setdefault_op_t s_ive_op; +}ih264e_ctl_setdefault_op_t; + +/*****************************************************************************/ +/* Video control Set IPE params */ +/*****************************************************************************/ +typedef struct +{ + ive_ctl_set_ipe_params_ip_t s_ive_ip; +}ih264e_ctl_set_ipe_params_ip_t; + +typedef struct +{ + ive_ctl_set_ipe_params_op_t s_ive_op; +}ih264e_ctl_set_ipe_params_op_t; + +/*****************************************************************************/ +/* Video control Set Frame dimensions */ +/*****************************************************************************/ +typedef struct +{ + ive_ctl_set_dimensions_ip_t s_ive_ip; +}ih264e_ctl_set_dimensions_ip_t; + +typedef struct +{ + ive_ctl_set_dimensions_op_t s_ive_op; +}ih264e_ctl_set_dimensions_op_t; + +/*****************************************************************************/ +/* Video control Set Frame rates */ +/*****************************************************************************/ +typedef struct +{ + ive_ctl_set_frame_rate_ip_t s_ive_ip; +}ih264e_ctl_set_frame_rate_ip_t; +typedef struct +{ + ive_ctl_set_frame_rate_op_t s_ive_op; +}ih264e_ctl_set_frame_rate_op_t; + + +/*****************************************************************************/ +/* Video control Set Bitrate */ +/*****************************************************************************/ +typedef struct +{ + ive_ctl_set_bitrate_ip_t s_ive_ip; +}ih264e_ctl_set_bitrate_ip_t; + +typedef struct +{ + ive_ctl_set_bitrate_op_t s_ive_op; +}ih264e_ctl_set_bitrate_op_t; + + +/*****************************************************************************/ +/* Video control Set Frame type */ +/*****************************************************************************/ +typedef struct +{ + ive_ctl_set_frame_type_ip_t s_ive_ip; +}ih264e_ctl_set_frame_type_ip_t; + +typedef struct +{ + ive_ctl_set_frame_type_op_t s_ive_op; +}ih264e_ctl_set_frame_type_op_t; + +/*****************************************************************************/ +/* Video control Set Encode mode */ +/*****************************************************************************/ +typedef struct +{ + ive_ctl_set_enc_mode_ip_t s_ive_ip; +}ih264e_ctl_set_enc_mode_ip_t; + +typedef struct +{ + ive_ctl_set_enc_mode_op_t s_ive_op; +}ih264e_ctl_set_enc_mode_op_t; + +/*****************************************************************************/ +/* Video control Set QP */ +/*****************************************************************************/ +typedef struct +{ + ive_ctl_set_qp_ip_t s_ive_ip; +}ih264e_ctl_set_qp_ip_t; + +typedef struct +{ + ive_ctl_set_qp_op_t s_ive_op; +}ih264e_ctl_set_qp_op_t; + +/*****************************************************************************/ +/* Video control Set AIR params */ +/*****************************************************************************/ +typedef struct +{ + ive_ctl_set_air_params_ip_t s_ive_ip; +}ih264e_ctl_set_air_params_ip_t; + +typedef struct +{ + ive_ctl_set_air_params_op_t s_ive_op; +}ih264e_ctl_set_air_params_op_t; + +/*****************************************************************************/ +/* Video control Set VBV params */ +/*****************************************************************************/ +typedef struct +{ + ive_ctl_set_vbv_params_ip_t s_ive_ip; +}ih264e_ctl_set_vbv_params_ip_t; + +typedef struct +{ + ive_ctl_set_vbv_params_op_t s_ive_op; +}ih264e_ctl_set_vbv_params_op_t; + +/*****************************************************************************/ +/* Video control Set Processor Details */ +/*****************************************************************************/ +typedef struct +{ + ive_ctl_set_num_cores_ip_t s_ive_ip; +}ih264e_ctl_set_num_cores_ip_t; + +typedef struct +{ + ive_ctl_set_num_cores_op_t s_ive_op; +}ih264e_ctl_set_num_cores_op_t; + +/*****************************************************************************/ +/* Video control Set Motion estimation params */ +/*****************************************************************************/ +typedef struct +{ + ive_ctl_set_me_params_ip_t s_ive_ip; +}ih264e_ctl_set_me_params_ip_t; + +typedef struct +{ + ive_ctl_set_me_params_op_t s_ive_op; +}ih264e_ctl_set_me_params_op_t; + +/*****************************************************************************/ +/* Video control Set GOP params */ +/*****************************************************************************/ +typedef struct +{ + ive_ctl_set_gop_params_ip_t s_ive_ip; +}ih264e_ctl_set_gop_params_ip_t; + +typedef struct +{ + ive_ctl_set_gop_params_op_t s_ive_op; +}ih264e_ctl_set_gop_params_op_t; + +/*****************************************************************************/ +/* Video control Set Deblock params */ +/*****************************************************************************/ +typedef struct +{ + ive_ctl_set_deblock_params_ip_t s_ive_ip; +}ih264e_ctl_set_deblock_params_ip_t; + +typedef struct +{ + ive_ctl_set_deblock_params_op_t s_ive_op; +}ih264e_ctl_set_deblock_params_op_t; + +/*****************************************************************************/ +/* Video control Set Profile params */ +/*****************************************************************************/ +typedef struct +{ + ive_ctl_set_profile_params_ip_t s_ive_ip; +}ih264e_ctl_set_profile_params_ip_t; + +typedef struct +{ + ive_ctl_set_profile_params_op_t s_ive_op; +}ih264e_ctl_set_profile_params_op_t; + +/*****************************************************************************/ +/* Synchronous video encode call */ +/*****************************************************************************/ +typedef struct +{ + ive_video_encode_ip_t s_ive_ip; +}ih264e_video_encode_ip_t; + +typedef struct +{ + ive_video_encode_op_t s_ive_op; +}ih264e_video_encode_op_t; + + +/* The enum values should not have greater than 8 bits as this is assigned to WORD8 */ +typedef enum +{ + INTRA16x16 = 0, + INTRA4x4, + INTER16x16 +}IV_MB_TYPE_T; + +/*****************************************************************************/ +/* Pic info structures */ +/*****************************************************************************/ +typedef struct +{ + /** Qp */ + UWORD32 u4_qp; + + /** Pic Type */ + IV_PICTURE_CODING_TYPE_T e_frame_type; + +}ih264e_pic_info1_t; + +typedef struct +{ + /** Qp */ + UWORD32 u4_qp; + + /** Pic Type */ + IV_PICTURE_CODING_TYPE_T e_frame_type; + + /** Disable deblock level (0: Enable completely, 3: Disable completely */ + UWORD32 u4_disable_deblock_level; + +}ih264e_pic_info2_t; + + +/*****************************************************************************/ +/* MB info structures */ +/*****************************************************************************/ +typedef struct +{ + /** MV X */ + WORD16 i2_mv_x; + + /** MV Y */ + WORD16 i2_mv_y; +}ih264e_mv_t; + +typedef struct +{ + /** Intra / Inter */ + WORD8 i1_mb_type; + union + { + ih264e_mv_t as_mv[1]; + + /** Intra mode */ + WORD8 ai1_intra_mode[1]; + }; +}ih264e_mb_info1_t; + +typedef struct +{ + /** Intra / Inter */ + WORD8 i1_mb_type; + + + /** SAD */ + UWORD16 u2_sad; + + union + { + ih264e_mv_t as_mv[1]; + + /** Intra mode */ + WORD8 ai1_intra_mode[1]; + }; + + +}ih264e_mb_info2_t; + +typedef struct +{ + /** Intra / Inter */ + WORD8 i1_mb_type; + + union + { + ih264e_mv_t as_mv[4]; + + /** Intra mode */ + WORD8 ai1_intra_mode[16]; + }; + +}ih264e_mb_info3_t; + +typedef struct +{ + /** Intra / Inter */ + WORD8 i1_mb_type; + + /** Intra Mode */ + WORD8 i1_intra_mode; + + /** SAD */ + UWORD16 u2_sad; + + union + { + ih264e_mv_t as_mv[16]; + + /** Intra mode */ + WORD8 ai1_intra_mode[16]; + }; + + + +}ih264e_mb_info4_t; + +/* Add any new structures to the following union. It is used to calculate the max size needed for allocation of memory */ +typedef struct +{ + union + { + ih264e_mb_info1_t s_mb_info1; + ih264e_mb_info2_t s_mb_info2; + ih264e_mb_info3_t s_mb_info3; + ih264e_mb_info4_t s_mb_info4; + }; +}ih264e_mb_info_t; + +#ifdef __cplusplus +} /* closing brace for extern "C" */ +#endif +#endif /* _IH264E_H_ */ diff --git a/encoder/ih264e_api.c b/encoder/ih264e_api.c new file mode 100755 index 0000000..e5c66ea --- /dev/null +++ b/encoder/ih264e_api.c @@ -0,0 +1,5559 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +******************************************************************************* +* @file +* ih264e_api.c +* +* @brief +* Contains api function definitions for H264 encoder +* +* @author +* ittiam +* +* @par List of Functions: +* - api_check_struct_sanity() +* - ih264e_codec_update_config() +* - ih264e_set_default_params() +* - ih264e_init() +* - ih264e_get_num_rec() +* - ih264e_fill_num_mem_rec() +* - ih264e_init_mem_rec() +* - ih264e_retrieve_memrec() +* - ih264e_set_flush_mode() +* - ih264e_get_buf_info() +* - ih264e_set_dimensions() +* - ih264e_set_frame_rate() +* - ih264e_set_bit_rate() +* - ih264e_set_frame_type() +* - ih264e_set_qp() +* - ih264e_set_enc_mode() +* - ih264e_set_vbv_params() +* - ih264_set_air_params() +* - ih264_set_me_params() +* - ih264_set_ipe_params() +* - ih264_set_gop_params() +* - ih264_set_profile_params() +* - ih264_set_deblock_params() +* - ih264e_set_num_cores() +* - ih264e_reset() +* - ih264e_ctl() +* - ih264e_api_function() +* +* @remarks +* None +* +******************************************************************************* +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System Include Files */ +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> + +/* User Include Files */ +#include "ih264e_config.h" +#include "ih264_typedefs.h" +#include "ih264_size_defs.h" +#include "iv2.h" +#include "ive2.h" +#include "ih264e.h" +#include "ithread.h" +#include "ih264_debug.h" +#include "ih264_defs.h" +#include "ih264_error.h" +#include "ih264_structs.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include "ih264_inter_pred_filters.h" +#include "ih264_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" +#include "ih264_macros.h" +#include "ih264e_defs.h" +#include "ih264e_globals.h" +#include "ih264_buf_mgr.h" +#include "irc_mem_req_and_acq.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "irc_rate_control_api.h" +#include "ih264e_time_stamp.h" +#include "ih264e_modify_frm_rate.h" +#include "ih264e_rate_control.h" +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "ime_defs.h" +#include "ime_distortion_metrics.h" +#include "ime_structs.h" +#include "ih264e_structs.h" +#include "ih264e_utils.h" +#include "ih264e_core_coding.h" +#include "ih264_buf_mgr.h" +#include "ih264_platform_macros.h" +#include "ih264e_platform_macros.h" +#include "ih264_list.h" +#include "ih264_dpb_mgr.h" +#include "ih264_cavlc_tables.h" +#include "ih264e_cavlc.h" +#include "ih264_common_tables.h" +#include "ih264e_master.h" +#include "ih264e_fmt_conv.h" +#include "ih264e_version.h" + + +/*****************************************************************************/ +/* Function Declarations */ +/*****************************************************************************/ +WORD32 ih264e_get_rate_control_mem_tab(void *pv_rate_control, + iv_mem_rec_t *ps_mem, + ITT_FUNC_TYPE_E e_func_type); + + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + +/** +******************************************************************************* +* +* @brief +* Used to test arguments for corresponding API call +* +* @par Description: +* For each command the arguments are validated +* +* @param[in] ps_handle +* Codec handle at API level +* +* @param[in] pv_api_ip +* Pointer to input structure +* +* @param[out] pv_api_op +* Pointer to output structure +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static IV_STATUS_T api_check_struct_sanity(iv_obj_t *ps_handle, + void *pv_api_ip, + void *pv_api_op) +{ + /* api call */ + WORD32 command = IV_CMD_NA; + + /* input structure expected by the api call */ + UWORD32 *pu4_api_ip = pv_api_ip; + + /* output structure expected by the api call */ + UWORD32 *pu4_api_op = pv_api_op; + + /* temp var */ + WORD32 i, j; + + if (NULL == pv_api_op || NULL == pv_api_ip) + { + return (IV_FAIL); + } + + /* get command */ + command = pu4_api_ip[1]; + + /* set error code */ + pu4_api_op[1] = 0; + + /* error checks on handle */ + switch (command) + { + case IV_CMD_GET_NUM_MEM_REC: + case IV_CMD_FILL_NUM_MEM_REC: + break; + + case IV_CMD_INIT: + if (ps_handle == NULL) + { + *(pu4_api_op + 1) |= 1 << IVE_UNSUPPORTEDPARAM; + *(pu4_api_op + 1) |= IVE_ERR_HANDLE_NULL; + return IV_FAIL; + } + + if (ps_handle->u4_size != sizeof(iv_obj_t)) + { + *(pu4_api_op + 1) |= 1 << IVE_UNSUPPORTEDPARAM; + *(pu4_api_op + 1) |= IVE_ERR_HANDLE_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + break; + + case IVE_CMD_QUEUE_INPUT: + case IVE_CMD_QUEUE_OUTPUT: + case IVE_CMD_DEQUEUE_OUTPUT: + case IVE_CMD_GET_RECON: + case IV_CMD_RETRIEVE_MEMREC: + case IVE_CMD_VIDEO_CTL: + case IVE_CMD_VIDEO_ENCODE: + + if (ps_handle == NULL) + { + *(pu4_api_op + 1) |= 1 << IVE_UNSUPPORTEDPARAM; + *(pu4_api_op + 1) |= IVE_ERR_HANDLE_NULL; + return IV_FAIL; + } + + if (ps_handle->u4_size != sizeof(iv_obj_t)) + { + *(pu4_api_op + 1) |= 1 << IVE_UNSUPPORTEDPARAM; + *(pu4_api_op + 1) |= IVE_ERR_HANDLE_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if (ps_handle->pv_fxns != ih264e_api_function) + { + *(pu4_api_op + 1) |= 1 << IVE_UNSUPPORTEDPARAM; + *(pu4_api_op + 1) |= IVE_ERR_API_FUNCTION_PTR_NULL; + return IV_FAIL; + } + + if (ps_handle->pv_codec_handle == NULL) + { + *(pu4_api_op + 1) |= 1 << IVE_UNSUPPORTEDPARAM; + *(pu4_api_op + 1) |= IVE_ERR_INVALID_CODEC_HANDLE; + return IV_FAIL; + } + break; + + default: + *(pu4_api_op + 1) |= 1 << IVE_UNSUPPORTEDPARAM; + *(pu4_api_op + 1) |= IVE_ERR_INVALID_API_CMD; + return IV_FAIL; + } + + /* error checks on input output structures */ + switch (command) + { + case IV_CMD_GET_NUM_MEM_REC: + { + ih264e_num_mem_rec_ip_t *ps_ip = pv_api_ip; + ih264e_num_mem_rec_op_t *ps_op = pv_api_op; + + ps_op->s_ive_op.u4_error_code = 0; + + if (ps_ip->s_ive_ip.u4_size != sizeof(ih264e_num_mem_rec_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_IP_GET_MEM_REC_API_STRUCT_SIZE_INCORRECT; + return (IV_FAIL); + } + + if (ps_op->s_ive_op.u4_size != sizeof(ih264e_num_mem_rec_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_OP_GET_MEM_REC_API_STRUCT_SIZE_INCORRECT; + return (IV_FAIL); + } + break; + } + + case IV_CMD_FILL_NUM_MEM_REC: + { + ih264e_fill_mem_rec_ip_t *ps_ip = pv_api_ip; + ih264e_fill_mem_rec_op_t *ps_op = pv_api_op; + + iv_mem_rec_t *ps_mem_rec = NULL; + + WORD32 max_wd = ALIGN16(ps_ip->s_ive_ip.u4_max_wd); + WORD32 max_ht = ALIGN16(ps_ip->s_ive_ip.u4_max_ht); + + ps_op->s_ive_op.u4_error_code = 0; + + if (ps_ip->s_ive_ip.u4_size != sizeof(ih264e_fill_mem_rec_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_IP_FILL_MEM_REC_API_STRUCT_SIZE_INCORRECT; + return (IV_FAIL); + } + + if (ps_op->s_ive_op.u4_size != sizeof(ih264e_fill_mem_rec_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_OP_FILL_MEM_REC_API_STRUCT_SIZE_INCORRECT; + return (IV_FAIL); + } + + if (max_wd < MIN_WD || max_wd > MAX_WD) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_WIDTH_NOT_SUPPORTED; + return (IV_FAIL); + } + + if (max_ht < MIN_HT || max_ht > MAX_HT) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_HEIGHT_NOT_SUPPORTED; + return (IV_FAIL); + } + + /* verify number of mem rec ptr */ + if (NULL == ps_ip->s_ive_ip.ps_mem_rec) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_FILL_NUM_MEM_RECS_POINTER_NULL; + return (IV_FAIL); + } + + /* verify number of mem records */ + if (ps_ip->s_ive_ip.u4_num_mem_rec != MEM_REC_CNT) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_NUM_MEM_REC_NOT_SUFFICIENT; + return IV_FAIL; + } + + /* check mem records sizes are correct */ + ps_mem_rec = ps_ip->s_ive_ip.ps_mem_rec; + for (i = 0; i < MEM_REC_CNT; i++) + { + if (ps_mem_rec[i].u4_size != sizeof(iv_mem_rec_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_MEM_REC_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + } + break; + } + + case IV_CMD_INIT: + { + ih264e_init_ip_t *ps_ip = pv_api_ip; + ih264e_init_op_t *ps_op = pv_api_op; + + iv_mem_rec_t *ps_mem_rec = NULL; + + WORD32 max_wd = ALIGN16(ps_ip->s_ive_ip.u4_max_wd); + WORD32 max_ht = ALIGN16(ps_ip->s_ive_ip.u4_max_ht); + + ps_op->s_ive_op.u4_error_code = 0; + + if (ps_ip->s_ive_ip.u4_size != sizeof(ih264e_init_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_IP_INIT_API_STRUCT_SIZE_INCORRECT; + return (IV_FAIL); + } + + if (ps_op->s_ive_op.u4_size != sizeof(ih264e_init_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_OP_INIT_API_STRUCT_SIZE_INCORRECT; + return (IV_FAIL); + } + + if (max_wd < MIN_WD || max_wd > MAX_WD) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_WIDTH_NOT_SUPPORTED; + return (IV_FAIL); + } + + if (max_ht < MIN_HT || max_ht > MAX_HT) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_HEIGHT_NOT_SUPPORTED; + return (IV_FAIL); + } + + if (ps_ip->s_ive_ip.u4_max_ref_cnt != 1) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_NUM_REF_UNSUPPORTED; + return (IV_FAIL); + } + + if (ps_ip->s_ive_ip.u4_max_reorder_cnt != 0) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_NUM_REORDER_UNSUPPORTED; + return (IV_FAIL); + } + + if ((ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_10) + && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_1B) + && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_11) + && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_12) + && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_13) + && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_20) + && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_21) + && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_22) + && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_30) + && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_31) + && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_32) + && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_40) + && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_41) + && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_42) + && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_50) + && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_51)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_CODEC_LEVEL_NOT_SUPPORTED; + return (IV_FAIL); + } + + if ((ps_ip->s_ive_ip.e_inp_color_fmt != IV_YUV_420P) + && (ps_ip->s_ive_ip.e_inp_color_fmt != IV_YUV_422ILE) + && (ps_ip->s_ive_ip.e_inp_color_fmt != IV_YUV_420SP_UV) + && (ps_ip->s_ive_ip.e_inp_color_fmt != IV_YUV_420SP_VU)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_INPUT_CHROMA_FORMAT_NOT_SUPPORTED; + return (IV_FAIL); + } + + if ((ps_ip->s_ive_ip.e_recon_color_fmt != IV_YUV_420P) + && (ps_ip->s_ive_ip.e_recon_color_fmt != IV_YUV_420SP_UV) + && (ps_ip->s_ive_ip.e_recon_color_fmt != IV_YUV_420SP_VU)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_RECON_CHROMA_FORMAT_NOT_SUPPORTED; + return (IV_FAIL); + } + + if ((ps_ip->s_ive_ip.e_rc_mode != IVE_RC_NONE) + && (ps_ip->s_ive_ip.e_rc_mode != IVE_RC_STORAGE) + && (ps_ip->s_ive_ip.e_rc_mode != IVE_RC_CBR_NON_LOW_DELAY)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_RATE_CONTROL_MODE_NOT_SUPPORTED; + return (IV_FAIL); + } + + if (ps_ip->s_ive_ip.u4_max_framerate > DEFAULT_MAX_FRAMERATE) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_FRAME_RATE_NOT_SUPPORTED; + return (IV_FAIL); + } + + if (ps_ip->s_ive_ip.u4_max_bitrate > DEFAULT_MAX_BITRATE) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_BITRATE_NOT_SUPPORTED; + return (IV_FAIL); + } + + if (ps_ip->s_ive_ip.u4_max_num_bframes != 0) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_BFRAMES_NOT_SUPPORTED; + return (IV_FAIL); + } + + if (ps_ip->s_ive_ip.e_content_type != IV_PROGRESSIVE) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_CONTENT_TYPE_NOT_SUPPORTED; + return (IV_FAIL); + } + + if (ps_ip->s_ive_ip.u4_max_srch_rng_x > DEFAULT_MAX_SRCH_RANGE_X) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_HORIZONTAL_SEARCH_RANGE_NOT_SUPPORTED; + return (IV_FAIL); + } + + if (ps_ip->s_ive_ip.u4_max_srch_rng_y > DEFAULT_MAX_SRCH_RANGE_Y) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_VERTICAL_SEARCH_RANGE_NOT_SUPPORTED; + return (IV_FAIL); + } + + if ((ps_ip->s_ive_ip.e_slice_mode != IVE_SLICE_MODE_NONE) + && (ps_ip->s_ive_ip.e_slice_mode != IVE_SLICE_MODE_BLOCKS)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_SLICE_TYPE_INPUT_INVALID; + return (IV_FAIL); + } + + if (ps_ip->s_ive_ip.e_slice_mode == IVE_SLICE_MODE_BLOCKS) + { + if (ps_ip->s_ive_ip.u4_slice_param == 0 + || ps_ip->s_ive_ip.u4_slice_param > ((UWORD32)max_ht >> 4)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_SLICE_PARAM_INPUT_INVALID; + return (IV_FAIL); + } + } + + if (NULL == ps_ip->s_ive_ip.ps_mem_rec) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_FILL_NUM_MEM_RECS_POINTER_NULL; + return (IV_FAIL); + } + + /* verify number of mem records */ + if (ps_ip->s_ive_ip.u4_num_mem_rec != MEM_REC_CNT) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_NUM_MEM_REC_NOT_SUFFICIENT; + return (IV_FAIL); + } + + ps_mem_rec = ps_ip->s_ive_ip.ps_mem_rec; + + /* check memrecords sizes are correct */ + for (i = 0; i <((WORD32)ps_ip->s_ive_ip.u4_num_mem_rec); i++) + { + if (ps_mem_rec[i].u4_size != sizeof(iv_mem_rec_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_MEM_REC_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + /* check memrecords pointers are not NULL */ + if (ps_mem_rec[i].pv_base == NULL) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_MEM_REC_BASE_POINTER_NULL; + return IV_FAIL; + } + } + + /* verify memtabs for overlapping regions */ + { + void *start[MEM_REC_CNT]; + void *end[MEM_REC_CNT]; + + start[0] = (ps_mem_rec[0].pv_base); + end[0] = ((UWORD8 *) ps_mem_rec[0].pv_base) + + ps_mem_rec[0].u4_mem_size - 1; + + for (i = 1; i < MEM_REC_CNT; i++) + { + /* This array is populated to check memtab overlap */ + start[i] = (ps_mem_rec[i].pv_base); + end[i] = ((UWORD8 *) ps_mem_rec[i].pv_base) + + ps_mem_rec[i].u4_mem_size - 1; + + for (j = 0; j < i; j++) + { + if ((start[i] >= start[j]) && (start[i] <= end[j])) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_MEM_REC_OVERLAP_ERR; + return IV_FAIL; + } + + if ((end[i] >= start[j]) && (end[i] <= end[j])) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_MEM_REC_OVERLAP_ERR; + return IV_FAIL; + } + + if ((start[i] < start[j]) && (end[i] > end[j])) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_MEM_REC_OVERLAP_ERR; + return IV_FAIL; + } + } + } + } + + /* re-validate mem records with init config */ + { + /* mem records */ + iv_mem_rec_t s_mem_rec_ittiam_api[MEM_REC_CNT]; + + /* api interface structs */ + ih264e_fill_mem_rec_ip_t s_ip; + ih264e_fill_mem_rec_op_t s_op; + + /* error status */ + IV_STATUS_T e_status; + + /* temp var */ + WORD32 i; + + s_ip.s_ive_ip.u4_size = sizeof(ih264e_fill_mem_rec_ip_t); + s_op.s_ive_op.u4_size = sizeof(ih264e_fill_mem_rec_op_t); + + s_ip.s_ive_ip.e_cmd = IV_CMD_FILL_NUM_MEM_REC; + s_ip.s_ive_ip.ps_mem_rec = s_mem_rec_ittiam_api; + s_ip.s_ive_ip.u4_max_wd = max_wd; + s_ip.s_ive_ip.u4_max_ht = max_ht; + s_ip.s_ive_ip.u4_num_mem_rec = ps_ip->s_ive_ip.u4_num_mem_rec; + s_ip.s_ive_ip.u4_max_level = ps_ip->s_ive_ip.u4_max_level; + s_ip.s_ive_ip.u4_max_ref_cnt = ps_ip->s_ive_ip.u4_max_ref_cnt; + s_ip.s_ive_ip.u4_max_reorder_cnt = + ps_ip->s_ive_ip.u4_max_reorder_cnt; + s_ip.s_ive_ip.e_color_format = ps_ip->s_ive_ip.e_inp_color_fmt; + s_ip.s_ive_ip.u4_max_srch_rng_x = + ps_ip->s_ive_ip.u4_max_srch_rng_x; + s_ip.s_ive_ip.u4_max_srch_rng_y = + ps_ip->s_ive_ip.u4_max_srch_rng_y; + + for (i = 0; i < MEM_REC_CNT; i++) + { + s_mem_rec_ittiam_api[i].u4_size = sizeof(iv_mem_rec_t); + } + + /* fill mem records */ + e_status = ih264e_api_function(NULL, (void *) &s_ip, + (void *) &s_op); + + if (IV_FAIL == e_status) + { + ps_op->s_ive_op.u4_error_code = s_op.s_ive_op.u4_error_code; + return (IV_FAIL); + } + + /* verify mem records */ + for (i = 0; i < MEM_REC_CNT; i++) + { + if (ps_mem_rec[i].u4_mem_size + < s_mem_rec_ittiam_api[i].u4_mem_size) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_MEM_REC_INSUFFICIENT_SIZE; + + return IV_FAIL; + } + + if (ps_mem_rec[i].u4_mem_alignment + != s_mem_rec_ittiam_api[i].u4_mem_alignment) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_MEM_REC_ALIGNMENT_ERR; + + return IV_FAIL; + } + + if (ps_mem_rec[i].e_mem_type + != s_mem_rec_ittiam_api[i].e_mem_type) + { + UWORD32 check = IV_SUCCESS; + UWORD32 diff = s_mem_rec_ittiam_api[i].e_mem_type + - ps_mem_rec[i].e_mem_type; + + if ((ps_mem_rec[i].e_mem_type + <= IV_EXTERNAL_CACHEABLE_SCRATCH_MEM) + && (s_mem_rec_ittiam_api[i].e_mem_type + >= IV_INTERNAL_NONCACHEABLE_PERSISTENT_MEM)) + { + check = IV_FAIL; + } + + if (3 != (s_mem_rec_ittiam_api[i].e_mem_type % 4)) + { + /* It is not IV_EXTERNAL_NONCACHEABLE_PERSISTENT_MEM or + * IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM */ + + if ((diff < 1) || (diff > 3)) + { + /* Difference between 1 and 3 is okay for all cases other than the + * two filtered with the MOD condition above */ + check = IV_FAIL; + } + } + else + { + if (diff == 1) + { + /* This particular case is when codec asked for External Persistent, + * but got Internal Scratch */ + check = IV_FAIL; + } + if ((diff != 2) && (diff != 3)) + { + check = IV_FAIL; + } + } + + if (check == IV_FAIL) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_MEM_REC_INCORRECT_TYPE; + + return IV_FAIL; + } + } + } + } + break; + } + + case IVE_CMD_QUEUE_INPUT: + case IVE_CMD_QUEUE_OUTPUT: + case IVE_CMD_DEQUEUE_OUTPUT: + case IVE_CMD_GET_RECON: + break; + + case IV_CMD_RETRIEVE_MEMREC: + { + ih264e_retrieve_mem_rec_ip_t *ps_ip = pv_api_ip; + ih264e_retrieve_mem_rec_op_t *ps_op = pv_api_op; + + iv_mem_rec_t *ps_mem_rec = NULL; + + ps_op->s_ive_op.u4_error_code = 0; + + if (ps_ip->s_ive_ip.u4_size != sizeof(ih264e_retrieve_mem_rec_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_IP_RETRIEVE_MEM_REC_API_STRUCT_SIZE_INCORRECT; + return (IV_FAIL); + } + + if (ps_op->s_ive_op.u4_size != sizeof(ih264e_retrieve_mem_rec_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_OP_RETRIEVE_MEM_REC_API_STRUCT_SIZE_INCORRECT; + return (IV_FAIL); + } + + if (NULL == ps_ip->s_ive_ip.ps_mem_rec) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_FILL_NUM_MEM_RECS_POINTER_NULL; + return (IV_FAIL); + } + + ps_mem_rec = ps_ip->s_ive_ip.ps_mem_rec; + + /* check memrecords sizes are correct */ + for (i = 0; i < MEM_REC_CNT; i++) + { + if (ps_mem_rec[i].u4_size != sizeof(iv_mem_rec_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_MEM_REC_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + } + break; + } + + case IVE_CMD_VIDEO_ENCODE: + { + ih264e_video_encode_ip_t *ps_ip = pv_api_ip; + ih264e_video_encode_op_t *ps_op = pv_api_op; + + if (ps_ip->s_ive_ip.u4_size != sizeof(ih264e_video_encode_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_IP_ENCODE_API_STRUCT_SIZE_INCORRECT; + return (IV_FAIL); + } + + if (ps_op->s_ive_op.u4_size != sizeof(ih264e_video_encode_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_OP_ENCODE_API_STRUCT_SIZE_INCORRECT; + return (IV_FAIL); + } + break; + } + + case IVE_CMD_VIDEO_CTL: + { + /* ptr to input structure */ + WORD32 *pu4_ptr_cmd = pv_api_ip; + + /* sub command */ + WORD32 sub_command = pu4_ptr_cmd[2]; + + switch (sub_command) + { + case IVE_CMD_CTL_SETDEFAULT: + { + ih264e_ctl_setdefault_ip_t *ps_ip = pv_api_ip; + ih264e_ctl_setdefault_op_t *ps_op = pv_api_op; + + if (ps_ip->s_ive_ip.u4_size + != sizeof(ih264e_ctl_setdefault_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_IP_CTL_SETDEF_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if (ps_op->s_ive_op.u4_size + != sizeof(ih264e_ctl_setdefault_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_OP_CTL_SETDEF_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + break; + } + + case IVE_CMD_CTL_GETBUFINFO: + { + codec_t *ps_codec = (codec_t *) (ps_handle->pv_codec_handle); + + ih264e_ctl_getbufinfo_ip_t *ps_ip = pv_api_ip; + ih264e_ctl_getbufinfo_op_t *ps_op = pv_api_op; + + if (ps_ip->s_ive_ip.u4_size + != sizeof(ih264e_ctl_getbufinfo_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_IP_CTL_GETBUFINFO_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if (ps_op->s_ive_op.u4_size + != sizeof(ih264e_ctl_getbufinfo_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_OP_CTL_GETBUFINFO_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if (ps_ip->s_ive_ip.u4_max_wd < MIN_WD) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_WIDTH_NOT_SUPPORTED; + return (IV_FAIL); + } + + if (ps_ip->s_ive_ip.u4_max_wd > ps_codec->s_cfg.u4_max_wd) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_WIDTH_NOT_SUPPORTED; + return (IV_FAIL); + } + + if (ps_ip->s_ive_ip.u4_max_ht < MIN_HT) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_HEIGHT_NOT_SUPPORTED; + return (IV_FAIL); + } + + if (ps_ip->s_ive_ip.u4_max_ht > ps_codec->s_cfg.u4_max_ht) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_HEIGHT_NOT_SUPPORTED; + return (IV_FAIL); + } + + if ((ps_ip->s_ive_ip.e_inp_color_fmt != IV_YUV_420P) + && (ps_ip->s_ive_ip.e_inp_color_fmt != IV_YUV_422ILE) + && (ps_ip->s_ive_ip.e_inp_color_fmt != IV_YUV_420SP_UV) + && (ps_ip->s_ive_ip.e_inp_color_fmt != IV_YUV_420SP_VU)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_INPUT_CHROMA_FORMAT_NOT_SUPPORTED; + return (IV_FAIL); + } + break; + } + + case IVE_CMD_CTL_GETVERSION: + { + ih264e_ctl_getversioninfo_ip_t *ps_ip = pv_api_ip; + ih264e_ctl_getversioninfo_op_t *ps_op = pv_api_op; + + if (ps_ip->s_ive_ip.u4_size + != sizeof(ih264e_ctl_getversioninfo_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_IP_CTL_GETVERSION_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if (ps_op->s_ive_op.u4_size + != sizeof(ih264e_ctl_getversioninfo_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_OP_CTL_GETVERSION_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if (ps_ip->s_ive_ip.pu1_version == NULL) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_CTL_GET_VERSION_BUFFER_IS_NULL; + return IV_FAIL; + } + + break; + } + + case IVE_CMD_CTL_FLUSH: + { + ih264e_ctl_flush_ip_t *ps_ip = pv_api_ip; + ih264e_ctl_flush_op_t *ps_op = pv_api_op; + + if (ps_ip->s_ive_ip.u4_size + != sizeof(ih264e_ctl_flush_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_IP_CTL_FLUSH_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if (ps_op->s_ive_op.u4_size + != sizeof(ih264e_ctl_flush_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_OP_CTL_FLUSH_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + break; + } + + case IVE_CMD_CTL_RESET: + { + ih264e_ctl_reset_ip_t *ps_ip = pv_api_ip; + ih264e_ctl_reset_op_t *ps_op = pv_api_op; + + if (ps_ip->s_ive_ip.u4_size + != sizeof(ih264e_ctl_reset_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_IP_CTL_RESET_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if (ps_op->s_ive_op.u4_size + != sizeof(ih264e_ctl_reset_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_OP_CTL_RESET_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + break; + } + + case IVE_CMD_CTL_SET_NUM_CORES: + { + ih264e_ctl_set_num_cores_ip_t *ps_ip = pv_api_ip; + ih264e_ctl_set_num_cores_op_t *ps_op = pv_api_op; + + if (ps_ip->s_ive_ip.u4_size + != sizeof(ih264e_ctl_set_num_cores_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_IP_CTL_SETCORES_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if (ps_op->s_ive_op.u4_size + != sizeof(ih264e_ctl_set_num_cores_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_OP_CTL_SETCORES_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if ((ps_ip->s_ive_ip.u4_num_cores < 1) + || (ps_ip->s_ive_ip.u4_num_cores > MAX_NUM_CORES)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_INVALID_NUM_CORES; + return IV_FAIL; + } + + break; + } + + case IVE_CMD_CTL_SET_DIMENSIONS: + { + codec_t *ps_codec = (codec_t *) (ps_handle->pv_codec_handle); + + ih264e_ctl_set_dimensions_ip_t *ps_ip = pv_api_ip; + ih264e_ctl_set_dimensions_op_t *ps_op = pv_api_op; + + if (ps_ip->s_ive_ip.u4_size + != sizeof(ih264e_ctl_set_dimensions_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_IP_CTL_SETDIM_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if (ps_op->s_ive_op.u4_size + != sizeof(ih264e_ctl_set_dimensions_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_OP_CTL_SETDIM_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if (ps_ip->s_ive_ip.u4_wd < MIN_WD) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_WIDTH_NOT_SUPPORTED; + return (IV_FAIL); + } + + if (ps_ip->s_ive_ip.u4_wd > ps_codec->s_cfg.u4_max_wd) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_WIDTH_NOT_SUPPORTED; + return (IV_FAIL); + } + + if (ps_ip->s_ive_ip.u4_ht < MIN_HT) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_HEIGHT_NOT_SUPPORTED; + return (IV_FAIL); + } + + if (ps_ip->s_ive_ip.u4_ht > ps_codec->s_cfg.u4_max_ht) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_HEIGHT_NOT_SUPPORTED; + return (IV_FAIL); + } + + break; + } + + case IVE_CMD_CTL_SET_FRAMERATE: + { + ih264e_ctl_set_frame_rate_ip_t *ps_ip = pv_api_ip; + ih264e_ctl_set_frame_rate_op_t *ps_op = pv_api_op; + + if (ps_ip->s_ive_ip.u4_size + != sizeof(ih264e_ctl_set_frame_rate_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_IP_CTL_SETFRAMERATE_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if (ps_op->s_ive_op.u4_size + != sizeof(ih264e_ctl_set_frame_rate_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_OP_CTL_SETFRAMERATE_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if (((ps_ip->s_ive_ip.u4_src_frame_rate * 1000) > DEFAULT_MAX_FRAMERATE) + || ((ps_ip->s_ive_ip.u4_tgt_frame_rate * 1000) > DEFAULT_MAX_FRAMERATE)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_FRAME_RATE_NOT_SUPPORTED; + return (IV_FAIL); + } + + if ((ps_ip->s_ive_ip.u4_src_frame_rate == 0) + || (ps_ip->s_ive_ip.u4_tgt_frame_rate == 0)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_FRAME_RATE_NOT_SUPPORTED; + return (IV_FAIL); + } + + if (ps_ip->s_ive_ip.u4_tgt_frame_rate + > ps_ip->s_ive_ip.u4_src_frame_rate) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_TGT_FRAME_RATE_EXCEEDS_SRC_FRAME_RATE; + return (IV_FAIL); + } + + break; + } + + case IVE_CMD_CTL_SET_BITRATE: + { + ih264e_ctl_set_bitrate_ip_t *ps_ip = pv_api_ip; + ih264e_ctl_set_bitrate_op_t *ps_op = pv_api_op; + + if (ps_ip->s_ive_ip.u4_size + != sizeof(ih264e_ctl_set_bitrate_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_IP_CTL_SETBITRATE_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if (ps_op->s_ive_op.u4_size + != sizeof(ih264e_ctl_set_bitrate_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_OP_CTL_SETBITRATE_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if ((ps_ip->s_ive_ip.u4_target_bitrate > DEFAULT_MAX_BITRATE) + || (ps_ip->s_ive_ip.u4_target_bitrate == 0)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_BITRATE_NOT_SUPPORTED; + return (IV_FAIL); + } + + break; + } + + case IVE_CMD_CTL_SET_FRAMETYPE: + { + ih264e_ctl_set_frame_type_ip_t *ps_ip = pv_api_ip; + ih264e_ctl_set_frame_type_op_t *ps_op = pv_api_op; + + if (ps_ip->s_ive_ip.u4_size + != sizeof(ih264e_ctl_set_frame_type_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_IP_CTL_SETFRAMETYPE_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if (ps_op->s_ive_op.u4_size + != sizeof(ih264e_ctl_set_frame_type_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_OP_CTL_SETFRAMETYPE_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if ((ps_ip->s_ive_ip.e_frame_type != IV_NA_FRAME) + && (ps_ip->s_ive_ip.e_frame_type != IV_I_FRAME) + && (ps_ip->s_ive_ip.e_frame_type != IV_P_FRAME) + && (ps_ip->s_ive_ip.e_frame_type != IV_IDR_FRAME)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_INVALID_FORCE_FRAME_INPUT; + return IV_FAIL; + } + break; + } + + case IVE_CMD_CTL_SET_ME_PARAMS: + { + codec_t *ps_codec = (codec_t *) (ps_handle->pv_codec_handle); + + ih264e_ctl_set_me_params_ip_t *ps_ip = pv_api_ip; + ih264e_ctl_set_me_params_op_t *ps_op = pv_api_op; + + if (ps_ip->s_ive_ip.u4_size + != sizeof(ih264e_ctl_set_me_params_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_IP_CTL_SETMEPARAMS_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if (ps_op->s_ive_op.u4_size + != sizeof(ih264e_ctl_set_me_params_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_OP_CTL_SETMEPARAMS_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if ((ps_ip->s_ive_ip.u4_me_speed_preset != FULL_SRCH) + && (ps_ip->s_ive_ip.u4_me_speed_preset != DMND_SRCH) + && (ps_ip->s_ive_ip.u4_me_speed_preset != HEX_SRCH)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_INVALID_ME_SPEED_PRESET; + return IV_FAIL; + } + + if ((ps_ip->s_ive_ip.u4_enable_hpel != 0) + && (ps_ip->s_ive_ip.u4_enable_hpel != 1)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_INVALID_HALFPEL_OPTION; + return IV_FAIL; + } + + if ((ps_ip->s_ive_ip.u4_enable_qpel != 0) + && (ps_ip->s_ive_ip.u4_enable_qpel != 1)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_INVALID_QPEL_OPTION; + return IV_FAIL; + } + + if ((ps_ip->s_ive_ip.u4_enable_fast_sad != 0) + && (ps_ip->s_ive_ip.u4_enable_fast_sad != 1)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_INVALID_FAST_SAD_OPTION; + return IV_FAIL; + } + + if (ps_ip->s_ive_ip.u4_enable_alt_ref > 255) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_INVALID_ALT_REF_OPTION; + return IV_FAIL; + } + + if (ps_ip->s_ive_ip.u4_srch_rng_x + > ps_codec->s_cfg.u4_max_srch_rng_x) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_HORIZONTAL_SEARCH_RANGE_NOT_SUPPORTED; + return (IV_FAIL); + } + + if (ps_ip->s_ive_ip.u4_srch_rng_y + > ps_codec->s_cfg.u4_max_srch_rng_y) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_VERTICAL_SEARCH_RANGE_NOT_SUPPORTED; + return (IV_FAIL); + } + + break; + } + + case IVE_CMD_CTL_SET_IPE_PARAMS: + { + ih264e_ctl_set_ipe_params_ip_t *ps_ip = pv_api_ip; + ih264e_ctl_set_ipe_params_op_t *ps_op = pv_api_op; + + if (ps_ip->s_ive_ip.u4_size + != sizeof(ih264e_ctl_set_ipe_params_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_IP_CTL_SETIPEPARAMS_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if (ps_op->s_ive_op.u4_size + != sizeof(ih264e_ctl_set_ipe_params_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_OP_CTL_SETIPEPARAMS_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if ((ps_ip->s_ive_ip.u4_enable_intra_4x4 != 0) + && (ps_ip->s_ive_ip.u4_enable_intra_4x4 != 1)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_INVALID_INTRA4x4_OPTION; + return IV_FAIL; + } + + if ((ps_ip->s_ive_ip.u4_enc_speed_preset != IVE_CONFIG) + && (ps_ip->s_ive_ip.u4_enc_speed_preset != IVE_SLOWEST) + && (ps_ip->s_ive_ip.u4_enc_speed_preset != IVE_NORMAL) + && (ps_ip->s_ive_ip.u4_enc_speed_preset != IVE_FAST) + && (ps_ip->s_ive_ip.u4_enc_speed_preset != IVE_HIGH_SPEED) + && (ps_ip->s_ive_ip.u4_enc_speed_preset != IVE_FASTEST)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_INVALID_ENC_SPEED_PRESET; + return IV_FAIL; + } + + break; + } + + case IVE_CMD_CTL_SET_GOP_PARAMS: + { + ih264e_ctl_set_gop_params_ip_t *ps_ip = pv_api_ip; + ih264e_ctl_set_gop_params_op_t *ps_op = pv_api_op; + + if (ps_ip->s_ive_ip.u4_size + != sizeof(ih264e_ctl_set_gop_params_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_IP_CTL_SETGOPPARAMS_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if (ps_op->s_ive_op.u4_size + != sizeof(ih264e_ctl_set_gop_params_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_OP_CTL_SETGOPPARAMS_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if ((ps_ip->s_ive_ip.u4_i_frm_interval < DEFAULT_MIN_INTRA_FRAME_RATE) + || (ps_ip->s_ive_ip.u4_i_frm_interval > DEFAULT_MAX_INTRA_FRAME_RATE)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_INVALID_INTRA_FRAME_INTERVAL; + return IV_FAIL; + } + + if ((ps_ip->s_ive_ip.u4_idr_frm_interval < DEFAULT_MIN_INTRA_FRAME_RATE) + || (ps_ip->s_ive_ip.u4_idr_frm_interval > DEFAULT_MAX_INTRA_FRAME_RATE)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_INVALID_IDR_FRAME_INTERVAL; + return IV_FAIL; + } + + if (ps_ip->s_ive_ip.u4_num_b_frames != 0) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_BFRAMES_NOT_SUPPORTED; + return IV_FAIL; + } + + break; + } + + case IVE_CMD_CTL_SET_DEBLOCK_PARAMS: + { + ih264e_ctl_set_deblock_params_ip_t *ps_ip = pv_api_ip; + ih264e_ctl_set_deblock_params_op_t *ps_op = pv_api_op; + + if (ps_ip->s_ive_ip.u4_size + != sizeof(ih264e_ctl_set_deblock_params_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_IP_CTL_SETDEBLKPARAMS_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if (ps_op->s_ive_op.u4_size + != sizeof(ih264e_ctl_set_deblock_params_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_OP_CTL_SETDEBLKPARAMS_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if ((ps_ip->s_ive_ip.u4_disable_deblock_level != DISABLE_DEBLK_LEVEL_0) + && (ps_ip->s_ive_ip.u4_disable_deblock_level != DISABLE_DEBLK_LEVEL_2) + && (ps_ip->s_ive_ip.u4_disable_deblock_level != DISABLE_DEBLK_LEVEL_3) + && (ps_ip->s_ive_ip.u4_disable_deblock_level != DISABLE_DEBLK_LEVEL_4)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_INVALID_DEBLOCKING_TYPE_INPUT; + return IV_FAIL; + } + + break; + } + + case IVE_CMD_CTL_SET_QP: + { + ih264e_ctl_set_qp_ip_t *ps_ip = pv_api_ip; + ih264e_ctl_set_qp_op_t *ps_op = pv_api_op; + + if (ps_ip->s_ive_ip.u4_size + != sizeof(ih264e_ctl_set_qp_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_IP_CTL_SETQPPARAMS_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if (ps_op->s_ive_op.u4_size + != sizeof(ih264e_ctl_set_qp_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_OP_CTL_SETQPPARAMS_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if ((ps_ip->s_ive_ip.u4_i_qp_max > MAX_H264_QP) + || (ps_ip->s_ive_ip.u4_p_qp_max > MAX_H264_QP) + || (ps_ip->s_ive_ip.u4_b_qp_max > MAX_H264_QP)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_INVALID_MAX_FRAME_QP; + return IV_FAIL; + } + + if ((ps_ip->s_ive_ip.u4_i_qp_min > ps_ip->s_ive_ip.u4_i_qp_max) + || (ps_ip->s_ive_ip.u4_p_qp_min > ps_ip->s_ive_ip.u4_p_qp_max) + || (ps_ip->s_ive_ip.u4_b_qp_min > ps_ip->s_ive_ip.u4_b_qp_max)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_INVALID_MIN_FRAME_QP; + return IV_FAIL; + } + + if ((ps_ip->s_ive_ip.u4_i_qp > ps_ip->s_ive_ip.u4_i_qp_max) + || (ps_ip->s_ive_ip.u4_p_qp > ps_ip->s_ive_ip.u4_p_qp_max) + || (ps_ip->s_ive_ip.u4_b_qp > ps_ip->s_ive_ip.u4_b_qp_max)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_INVALID_INIT_QP; + return IV_FAIL; + } + + if ((ps_ip->s_ive_ip.u4_i_qp < ps_ip->s_ive_ip.u4_i_qp_min) + || (ps_ip->s_ive_ip.u4_p_qp < ps_ip->s_ive_ip.u4_p_qp_min) + || (ps_ip->s_ive_ip.u4_b_qp < ps_ip->s_ive_ip.u4_b_qp_min)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_INVALID_INIT_QP; + return IV_FAIL; + } + + break; + } + + case IVE_CMD_CTL_SET_ENC_MODE: + { + ih264e_ctl_set_enc_mode_ip_t *ps_ip = pv_api_ip; + ih264e_ctl_set_enc_mode_op_t *ps_op = pv_api_op; + + if (ps_ip->s_ive_ip.u4_size + != sizeof(ih264e_ctl_set_enc_mode_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_IP_CTL_SETENCMODE_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if (ps_op->s_ive_op.u4_size + != sizeof(ih264e_ctl_set_enc_mode_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_OP_CTL_SETENCMODE_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if ((ps_ip->s_ive_ip.e_enc_mode != IVE_ENC_MODE_HEADER) + && (ps_ip->s_ive_ip.e_enc_mode != IVE_ENC_MODE_PICTURE)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_INVALID_ENC_OPERATION_MODE; + return IV_FAIL; + } + + break; + } + + case IVE_CMD_CTL_SET_VBV_PARAMS: + { + ih264e_ctl_set_vbv_params_ip_t *ps_ip = pv_api_ip; + ih264e_ctl_set_vbv_params_op_t *ps_op = pv_api_op; + + if (ps_ip->s_ive_ip.u4_size + != sizeof(ih264e_ctl_set_vbv_params_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_IP_CTL_SETVBVPARAMS_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if (ps_op->s_ive_op.u4_size + != sizeof(ih264e_ctl_set_vbv_params_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_OP_CTL_SETVBVPARAMS_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if ((ps_ip->s_ive_ip.u4_vbv_buffer_delay < DEFAULT_MIN_BUFFER_DELAY) + || (ps_ip->s_ive_ip.u4_vbv_buffer_delay > DEFAULT_MAX_BUFFER_DELAY)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_INVALID_BUFFER_DELAY; + return IV_FAIL; + } + + break; + } + + case IVE_CMD_CTL_SET_AIR_PARAMS: + { + ih264e_ctl_set_air_params_ip_t *ps_ip = pv_api_ip; + ih264e_ctl_set_air_params_op_t *ps_op = pv_api_op; + + if (ps_ip->s_ive_ip.u4_size + != sizeof(ih264e_ctl_set_air_params_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_IP_CTL_SETAIRPARAMS_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if (ps_op->s_ive_op.u4_size + != sizeof(ih264e_ctl_set_air_params_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_OP_CTL_SETAIRPARAMS_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if ((ps_ip->s_ive_ip.e_air_mode != IVE_AIR_MODE_NONE) + && (ps_ip->s_ive_ip.e_air_mode != IVE_AIR_MODE_CYCLIC) + && (ps_ip->s_ive_ip.e_air_mode != IVE_AIR_MODE_RANDOM)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_INVALID_AIR_MODE; + return IV_FAIL; + } + + if (ps_ip->s_ive_ip.u4_air_refresh_period == 0) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_INVALID_AIR_REFRESH_PERIOD; + return IV_FAIL; + } + + break; + } + + case IVE_CMD_CTL_SET_PROFILE_PARAMS: + { + ih264e_ctl_set_profile_params_ip_t *ps_ip = pv_api_ip; + ih264e_ctl_set_profile_params_op_t *ps_op = pv_api_op; + + if (ps_ip->s_ive_ip.u4_size + != sizeof(ih264e_ctl_set_profile_params_ip_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_IP_CTL_SETPROFILE_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if (ps_op->s_ive_op.u4_size + != sizeof(ih264e_ctl_set_profile_params_op_t)) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IVE_ERR_OP_CTL_SETPROFILE_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if (ps_ip->s_ive_ip.e_profile != IV_PROFILE_BASE) + { + ps_op->s_ive_op.u4_error_code |= 1 + << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= + IH264E_PROFILE_NOT_SUPPORTED; + return IV_FAIL; + } + + break; + } + + default: + *(pu4_api_op + 1) |= 1 << IVE_UNSUPPORTEDPARAM; + *(pu4_api_op + 1) |= IVE_ERR_INVALID_API_SUB_CMD; + return IV_FAIL; + } + + break; + } + + default: + *(pu4_api_op + 1) |= 1 << IVE_UNSUPPORTEDPARAM; + *(pu4_api_op + 1) |= IVE_ERR_INVALID_API_CMD; + return IV_FAIL; + } + + return IV_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief update encoder configuration parameters +* +* @par Description: +* updates encoder configuration parameters from the given config set. +* Initialize/reinitialize codec parameters according to new configurations. +* +* @param[in] ps_codec +* Pointer to codec context +* +* @param[in] ps_cfg +* Pointer to config param set +* +* @remarks none +* +******************************************************************************* +*/ +IH264E_ERROR_T ih264e_codec_update_config(codec_t *ps_codec, + cfg_params_t *ps_cfg) +{ + /* config params */ + cfg_params_t *ps_curr_cfg = &ps_codec->s_cfg; + + /* error status */ + IH264E_ERROR_T err = IH264E_SUCCESS; + + /* temp var */ + UWORD32 u4_init_rc = 0; + + /***********************/ + /* UPDATE CODEC CONFIG */ + /***********************/ + if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_DIMENSIONS) + { + UWORD32 wd_aln = ALIGN16(ps_cfg->u4_wd); + UWORD32 ht_aln = ALIGN16(ps_cfg->u4_ht); + + if (ps_curr_cfg->u4_wd != wd_aln || ps_curr_cfg->u4_ht != ht_aln + || ps_curr_cfg->u4_strd != ps_cfg->u4_strd + || ps_curr_cfg->u4_disp_wd != ps_cfg->u4_disp_wd + || ps_curr_cfg->u4_disp_ht != ps_cfg->u4_disp_ht) + { + ps_curr_cfg->u4_wd = wd_aln; + ps_curr_cfg->u4_ht = ht_aln; + ps_curr_cfg->u4_strd = ps_cfg->u4_strd; + + if (ps_curr_cfg->u4_strd == 0) + { + ps_curr_cfg->u4_strd = ps_curr_cfg->u4_wd; + } + + ps_curr_cfg->u4_disp_wd = ps_cfg->u4_disp_wd; + ps_curr_cfg->u4_disp_ht = ps_cfg->u4_disp_ht; + + ps_curr_cfg->i4_wd_mbs = ps_curr_cfg->u4_wd >> 4; + ps_curr_cfg->i4_ht_mbs = ps_curr_cfg->u4_ht >> 4; + + ps_codec->i4_src_strd = ps_codec->s_cfg.u4_strd; + ps_codec->i4_rec_strd = ALIGN16(ps_cfg->u4_wd) + PAD_WD; + + /* If number of MBs in a frame changes the air map also changes. + * Hence recompute air map also reset air pic cnt */ + if (ps_codec->s_cfg.e_air_mode != IVE_AIR_MODE_NONE) + { + /* re-init the air map */ + ih264e_init_air_map(ps_codec); + + /* reset air counter */ + ps_codec->i4_air_pic_cnt = -1; + } + + /* initialize mv bank buffer manager */ + err = ih264e_mv_buf_mgr_add_bufs(ps_codec); + if (err != IH264E_SUCCESS) + return err; + + /* initialize ref bank buffer manager */ + err = ih264e_pic_buf_mgr_add_bufs(ps_codec); + if (err != IH264E_SUCCESS) + return err; + + /* since dimension changed, start new sequence by forcing IDR */ + ps_codec->force_curr_frame_type = IV_IDR_FRAME; + + /* in case dimension changes, we need to reinitialize RC as the + * old model shall not fit further */ + u4_init_rc = 1; + + /* when the dimension changes, the header needs to be regenerated */ + ps_codec->i4_header_mode = 1; + } + } + else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_FRAMERATE) + { + /* temp var */ + UWORD32 u4_src_ticks, u4_tgt_ticks; + + u4_src_ticks = ih264e_frame_time_get_src_ticks( + ps_codec->s_rate_control.pps_frame_time); + + u4_tgt_ticks = ih264e_frame_time_get_tgt_ticks( + ps_codec->s_rate_control.pps_frame_time); + + /* Change frame rate */ + if (ps_codec->s_cfg.u4_src_frame_rate + != ps_cfg->u4_src_frame_rate * 1000) + { + ps_codec->s_cfg.u4_src_frame_rate = ps_cfg->u4_src_frame_rate + * 1000; + + ih264e_frame_time_update_src_frame_rate( + ps_codec->s_rate_control.pps_frame_time, + ps_codec->s_cfg.u4_src_frame_rate); + + ih264_time_stamp_update_frame_rate( + ps_codec->s_rate_control.pps_time_stamp, + ps_codec->s_cfg.u4_src_frame_rate); + + irc_change_frame_rate(ps_codec->s_rate_control.pps_rate_control_api, + ps_codec->s_cfg.u4_src_frame_rate, + u4_src_ticks, u4_tgt_ticks); + } + + if (ps_codec->s_cfg.u4_tgt_frame_rate + != ps_cfg->u4_tgt_frame_rate * 1000) + { + ps_codec->s_cfg.u4_tgt_frame_rate = ps_cfg->u4_tgt_frame_rate + * 1000; + + ih264e_frame_time_update_tgt_frame_rate( + ps_codec->s_rate_control.pps_frame_time, + ps_codec->s_cfg.u4_tgt_frame_rate); + + irc_change_frame_rate(ps_codec->s_rate_control.pps_rate_control_api, + ps_codec->s_cfg.u4_src_frame_rate, + u4_src_ticks, u4_tgt_ticks); + + irc_change_frm_rate_for_bit_alloc( + ps_codec->s_rate_control.pps_rate_control_api, + ps_codec->s_cfg.u4_tgt_frame_rate); + } + + } + else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_BITRATE) + { + if (ps_curr_cfg->u4_target_bitrate != ps_cfg->u4_target_bitrate) + { + if (IVE_RC_NONE != ps_curr_cfg->e_rc_mode) + irc_change_avg_bit_rate( + ps_codec->s_rate_control.pps_rate_control_api, + ps_cfg->u4_target_bitrate); + + ps_curr_cfg->u4_target_bitrate = ps_cfg->u4_target_bitrate; + } + } + else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_FRAMETYPE) + { + switch (ps_cfg->e_frame_type) + { + case IV_I_FRAME: + ps_codec->force_curr_frame_type = IV_I_FRAME; + break; + + case IV_IDR_FRAME: + ps_codec->force_curr_frame_type = IV_IDR_FRAME; + break; + + case IV_P_FRAME: + default: + break; + } + } + else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_ME_PARAMS) + { + if (ps_curr_cfg->u4_enc_speed_preset == IVE_CONFIG) + { + ps_codec->s_cfg.u4_enable_hpel = ps_cfg->u4_enable_hpel; + ps_codec->s_cfg.u4_enable_fast_sad = ps_cfg->u4_enable_fast_sad; + ps_codec->s_cfg.u4_me_speed_preset = ps_cfg->u4_me_speed_preset; + ps_codec->s_cfg.u4_enable_qpel = ps_cfg->u4_enable_qpel; + } + else if (ps_curr_cfg->u4_enc_speed_preset == IVE_FASTEST) + { + ps_codec->s_cfg.u4_enable_fast_sad = ps_cfg->u4_enable_fast_sad; + } + ps_codec->s_cfg.u4_srch_rng_x = ps_cfg->u4_srch_rng_x; + ps_codec->s_cfg.u4_srch_rng_y = ps_cfg->u4_srch_rng_y; + + if (ps_codec->s_cfg.u4_enable_alt_ref != ps_cfg->u4_enable_alt_ref) + { + ps_codec->s_cfg.u4_enable_alt_ref = ps_cfg->u4_enable_alt_ref; + ps_codec->u4_is_curr_frm_ref = 1; + } + } + else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_IPE_PARAMS) + { + ps_curr_cfg->u4_enc_speed_preset = ps_cfg->u4_enc_speed_preset; + + if (ps_curr_cfg->u4_enc_speed_preset == IVE_SLOWEST) + {/* high quality */ + /* enable diamond search */ + ps_curr_cfg->u4_me_speed_preset = DMND_SRCH; + ps_curr_cfg->u4_enable_fast_sad = 0; + + /* disable intra 4x4 */ + ps_curr_cfg->u4_enable_intra_4x4 = 1; + ps_codec->luma_energy_compaction[1] = + ih264e_code_luma_intra_macroblock_4x4_rdopt_on; + + /* sub pel off */ + ps_curr_cfg->u4_enable_hpel = 1; + + /* deblocking off */ + ps_curr_cfg->u4_disable_deblock_level = DISABLE_DEBLK_LEVEL_0; + + /* disabled intra inter gating in Inter slices */ + ps_codec->u4_inter_gate = 0; + } + else if (ps_curr_cfg->u4_enc_speed_preset == IVE_NORMAL) + {/* normal */ + /* enable diamond search */ + ps_curr_cfg->u4_me_speed_preset = DMND_SRCH; + ps_curr_cfg->u4_enable_fast_sad = 0; + + /* disable intra 4x4 */ + ps_curr_cfg->u4_enable_intra_4x4 = 1; + + /* sub pel off */ + ps_curr_cfg->u4_enable_hpel = 1; + + /* deblocking off */ + ps_curr_cfg->u4_disable_deblock_level = DISABLE_DEBLK_LEVEL_0; + + /* disabled intra inter gating in Inter slices */ + ps_codec->u4_inter_gate = 0; + } + else if (ps_curr_cfg->u4_enc_speed_preset == IVE_FAST) + {/* normal */ + /* enable diamond search */ + ps_curr_cfg->u4_me_speed_preset = DMND_SRCH; + ps_curr_cfg->u4_enable_fast_sad = 0; + + /* disable intra 4x4 */ + ps_curr_cfg->u4_enable_intra_4x4 = 0; + + /* sub pel off */ + ps_curr_cfg->u4_enable_hpel = 1; + + /* deblocking off */ + ps_curr_cfg->u4_disable_deblock_level = DISABLE_DEBLK_LEVEL_0; + + /* disabled intra inter gating in Inter slices */ + ps_codec->u4_inter_gate = 1; + } + else if (ps_curr_cfg->u4_enc_speed_preset == IVE_HIGH_SPEED) + {/* fast */ + /* enable diamond search */ + ps_curr_cfg->u4_me_speed_preset = DMND_SRCH; + ps_curr_cfg->u4_enable_fast_sad = 0; + + /* disable intra 4x4 */ + ps_curr_cfg->u4_enable_intra_4x4 = 0; + + /* sub pel off */ + ps_curr_cfg->u4_enable_hpel = 0; + + /* deblocking off */ + ps_curr_cfg->u4_disable_deblock_level = DISABLE_DEBLK_LEVEL_4; + + /* disabled intra inter gating in Inter slices */ + ps_codec->u4_inter_gate = 0; + } + else if (ps_curr_cfg->u4_enc_speed_preset == IVE_FASTEST) + {/* fastest */ + /* enable diamond search */ + ps_curr_cfg->u4_me_speed_preset = DMND_SRCH; + //u4_num_layers = 4; + + /* disable intra 4x4 */ + ps_curr_cfg->u4_enable_intra_4x4 = 0; + + /* sub pel off */ + ps_curr_cfg->u4_enable_hpel = 0; + + /* deblocking off */ + ps_curr_cfg->u4_disable_deblock_level = DISABLE_DEBLK_LEVEL_4; + + /* disabled intra inter gating in Inter slices */ + ps_codec->u4_inter_gate = 1; + } + else if (ps_curr_cfg->u4_enc_speed_preset == IVE_CONFIG) + { + ps_curr_cfg->u4_enable_intra_4x4 = ps_cfg->u4_enable_intra_4x4; + } + } + else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_GOP_PARAMS) + { + if (ps_curr_cfg->u4_i_frm_interval != ps_cfg->u4_i_frm_interval) + { + ps_curr_cfg->u4_i_frm_interval = ps_cfg->u4_i_frm_interval; + + /* reset air counter */ + ps_codec->i4_air_pic_cnt = -1; + + /* re-init air map */ + ih264e_init_air_map(ps_codec); + + /*Effect intra frame interval change*/ + + irc_change_intra_frm_int_call( + ps_codec->s_rate_control.pps_rate_control_api, + ps_curr_cfg->u4_i_frm_interval); + } + + ps_curr_cfg->u4_idr_frm_interval = ps_cfg->u4_idr_frm_interval; + + ps_curr_cfg->u4_num_b_frames = ps_cfg->u4_num_b_frames; + } + else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_DEBLOCK_PARAMS) + { + if (ps_curr_cfg->u4_enc_speed_preset == IVE_CONFIG) + { + ps_curr_cfg->u4_disable_deblock_level = + ps_cfg->u4_disable_deblock_level; + } + } + else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_QP) + { + UWORD8 au1_init_qp[MAX_PIC_TYPE]; + UWORD8 au1_min_max_qp[2 * MAX_PIC_TYPE]; + + ps_codec->s_cfg.u4_i_qp_max = ps_cfg->u4_i_qp_max; + ps_codec->s_cfg.u4_i_qp_min = ps_cfg->u4_i_qp_min; + ps_codec->s_cfg.u4_i_qp = ps_cfg->u4_i_qp; + + ps_codec->s_cfg.u4_p_qp_max = ps_cfg->u4_p_qp_max; + ps_codec->s_cfg.u4_p_qp_min = ps_cfg->u4_p_qp_min; + ps_codec->s_cfg.u4_p_qp = ps_cfg->u4_p_qp; + + ps_codec->s_cfg.u4_b_qp_max = ps_cfg->u4_b_qp_max; + ps_codec->s_cfg.u4_b_qp_min = ps_cfg->u4_b_qp_min; + ps_codec->s_cfg.u4_b_qp = ps_cfg->u4_b_qp; + + /* update rc lib with modified qp */ + au1_init_qp[0] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_i_qp]; + au1_init_qp[1] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_p_qp]; + au1_init_qp[2] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_b_qp]; + + irc_change_init_qp(ps_codec->s_rate_control.pps_rate_control_api, + au1_init_qp); + + au1_min_max_qp[2 * I_PIC] = + gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_i_qp_min]; + au1_min_max_qp[2 * I_PIC + 1] = + gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_i_qp_max]; + + au1_min_max_qp[2 * P_PIC] = + gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_p_qp_min]; + au1_min_max_qp[2 * P_PIC + 1] = + gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_p_qp_max]; + + au1_min_max_qp[2 * B_PIC] = + gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_b_qp_min]; + au1_min_max_qp[2 * B_PIC + 1] = + gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_b_qp_max]; + + irc_change_min_max_qp(ps_codec->s_rate_control.pps_rate_control_api, + au1_min_max_qp); + } + else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_ENC_MODE) + { + ps_codec->s_cfg.e_enc_mode = ps_cfg->e_enc_mode; + + if (ps_codec->s_cfg.e_enc_mode == IVE_ENC_MODE_HEADER) + { + ps_codec->i4_header_mode = 1; + ps_codec->s_cfg.e_enc_mode = IVE_ENC_MODE_PICTURE; + } + else + { + ps_codec->i4_header_mode = 0; + } + } + else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_VBV_PARAMS + && IVE_RC_NONE != ps_codec->s_cfg.e_rc_mode) + { + ps_codec->s_cfg.u4_vbv_buf_size = ps_cfg->u4_vbv_buf_size; + ps_codec->s_cfg.u4_vbv_buffer_delay = ps_cfg->u4_vbv_buffer_delay; + + // irc_change_buffer_delay(ps_codec->s_rate_control.pps_rate_control_api, ps_codec->s_cfg.u4_vbv_buffer_delay); + + // TODO: remove this when the support for changing buffer dynamically + // is yet to be added. + u4_init_rc = 1; + } + else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_AIR_PARAMS) + { + if (ps_curr_cfg->e_air_mode != ps_cfg->e_air_mode + || ps_curr_cfg->u4_air_refresh_period + != ps_cfg->u4_air_refresh_period) + { + ps_curr_cfg->e_air_mode = ps_cfg->e_air_mode; + ps_curr_cfg->u4_air_refresh_period = ps_cfg->u4_air_refresh_period; + + ih264e_init_air_map(ps_codec); + + /* reset air counter */ + ps_codec->i4_air_pic_cnt = -1; + } + } + else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_PROFILE_PARAMS) + { + ps_codec->s_cfg.e_profile = ps_cfg->e_profile; + } + else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_NUM_CORES) + { + ps_codec->s_cfg.u4_num_cores = ps_cfg->u4_num_cores; + } + + /* reset RC model */ + if (u4_init_rc) + { + /* init qp */ + UWORD8 au1_init_qp[MAX_PIC_TYPE]; + + /* min max qp */ + UWORD8 au1_min_max_qp[2 * MAX_PIC_TYPE]; + + /* init i,p,b qp */ + au1_init_qp[0] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_i_qp]; + au1_init_qp[1] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_p_qp]; + au1_init_qp[2] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_b_qp]; + + /* init min max qp */ + au1_min_max_qp[2 * I_PIC] = + gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_i_qp_min]; + au1_min_max_qp[2 * I_PIC + 1] = + gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_i_qp_max]; + + au1_min_max_qp[2 * P_PIC] = + gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_p_qp_min]; + au1_min_max_qp[2 * P_PIC + 1] = + gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_p_qp_max]; + + au1_min_max_qp[2 * B_PIC] = + gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_b_qp_min]; + au1_min_max_qp[2 * B_PIC + 1] = + gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_b_qp_max]; + + /* get rc mode */ + switch (ps_codec->s_cfg.e_rc_mode) + { + case IVE_RC_STORAGE: + ps_codec->s_rate_control.e_rc_type = VBR_STORAGE; + break; + + case IVE_RC_CBR_NON_LOW_DELAY: + ps_codec->s_rate_control.e_rc_type = CBR_NLDRC; + break; + + case IVE_RC_CBR_LOW_DELAY: + ps_codec->s_rate_control.e_rc_type = CBR_LDRC; + break; + + case IVE_RC_NONE: + ps_codec->s_rate_control.e_rc_type = CONST_QP; + break; + + default: + break; + } + + /* init rate control */ + ih264e_rc_init(ps_codec->s_rate_control.pps_rate_control_api, + ps_codec->s_rate_control.pps_frame_time, + ps_codec->s_rate_control.pps_time_stamp, + ps_codec->s_rate_control.pps_pd_frm_rate, + ps_codec->s_cfg.u4_max_framerate, + ps_codec->s_cfg.u4_src_frame_rate, + ps_codec->s_cfg.u4_tgt_frame_rate, + ps_codec->s_rate_control.e_rc_type, + ps_codec->s_cfg.u4_target_bitrate, + ps_codec->s_cfg.u4_max_bitrate, + ps_codec->s_cfg.u4_vbv_buffer_delay, + ps_codec->s_cfg.u4_i_frm_interval, au1_init_qp, + H264_ALLOC_INTER_FRM_INTV, au1_min_max_qp, + ps_codec->s_cfg.u4_max_level); + } + + return err; +} + +/** +******************************************************************************* +* +* @brief +* Sets default encoder config parameters +* +* @par Description: +* Sets default dynamic parameters. Will be called in ih264e_init() to ensure +* that even if set_params is not called, codec continues to work +* +* @param[in] ps_cfg +* Pointer to encoder config params +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static WORD32 ih264e_set_default_params(cfg_params_t *ps_cfg) +{ + WORD32 ret = IV_SUCCESS; + + ps_cfg->u4_max_wd = MAX_WD; + ps_cfg->u4_max_ht = MAX_HT; + ps_cfg->u4_max_ref_cnt = MAX_REF_CNT; + ps_cfg->u4_max_reorder_cnt = MAX_REF_CNT; + ps_cfg->u4_max_level = DEFAULT_MAX_LEVEL; + ps_cfg->e_inp_color_fmt = IV_YUV_420SP_UV; + ps_cfg->u4_enable_recon = DEFAULT_RECON_ENABLE; + ps_cfg->e_recon_color_fmt = IV_YUV_420P; + ps_cfg->u4_enc_speed_preset = IVE_FASTEST; + ps_cfg->e_rc_mode = DEFAULT_RC; + ps_cfg->u4_max_framerate = DEFAULT_MAX_FRAMERATE; + ps_cfg->u4_max_bitrate = DEFAULT_MAX_BITRATE; + ps_cfg->u4_max_num_bframes = 0; + ps_cfg->e_content_type = IV_PROGRESSIVE; + ps_cfg->u4_max_srch_rng_x = DEFAULT_MAX_SRCH_RANGE_X; + ps_cfg->u4_max_srch_rng_y = DEFAULT_MAX_SRCH_RANGE_Y; + ps_cfg->e_slice_mode = IVE_SLICE_MODE_NONE; + ps_cfg->u4_slice_param = DEFAULT_SLICE_PARAM; + ps_cfg->e_arch = ih264e_default_arch(); + ps_cfg->e_soc = SOC_GENERIC; + ps_cfg->u4_disp_wd = MAX_WD; + ps_cfg->u4_disp_ht = MAX_HT; + ps_cfg->u4_wd = MAX_WD; + ps_cfg->u4_ht = MAX_HT; + ps_cfg->u4_strd = ALIGN16(MAX_WD); + ps_cfg->u4_src_frame_rate = DEFAULT_SRC_FRAME_RATE; + ps_cfg->u4_tgt_frame_rate = DEFAULT_TGT_FRAME_RATE; + ps_cfg->u4_target_bitrate = DEFAULT_BITRATE; + ps_cfg->e_frame_type = IV_NA_FRAME; + ps_cfg->e_enc_mode = IVE_ENC_MODE_DEFAULT; + ps_cfg->u4_i_qp = DEFAULT_I_QP; + ps_cfg->u4_p_qp = DEFAULT_P_QP; + ps_cfg->u4_b_qp = DEFAULT_B_QP; + ps_cfg->u4_i_qp_min = DEFAULT_QP_MIN; + ps_cfg->u4_i_qp_max = DEFAULT_QP_MAX; + ps_cfg->u4_p_qp_min = DEFAULT_QP_MIN; + ps_cfg->u4_p_qp_max = DEFAULT_QP_MAX; + ps_cfg->u4_b_qp_min = DEFAULT_QP_MIN; + ps_cfg->u4_b_qp_max = DEFAULT_QP_MAX; + ps_cfg->e_air_mode = DEFAULT_AIR_MODE; + ps_cfg->u4_air_refresh_period = DEFAULT_AIR_REFRESH_PERIOD; + ps_cfg->u4_vbv_buffer_delay = DEFAULT_VBV_DELAY; + ps_cfg->u4_vbv_buf_size = DEFAULT_VBV_SIZE; + ps_cfg->u4_num_cores = DEFAULT_NUM_CORES; + ps_cfg->u4_me_speed_preset = DEFAULT_ME_SPEED_PRESET; + ps_cfg->u4_enable_hpel = DEFAULT_HPEL; + ps_cfg->u4_enable_qpel = DEFAULT_QPEL; + ps_cfg->u4_enable_intra_4x4 = DEFAULT_I4; + ps_cfg->u4_enable_intra_8x8 = DEFAULT_I8; + ps_cfg->u4_enable_intra_16x16 = DEFAULT_I16; + ps_cfg->u4_enable_fast_sad = DEFAULT_ENABLE_FAST_SAD; + ps_cfg->u4_enable_satqd = DEFAULT_ENABLE_SATQD; + ps_cfg->i4_min_sad = + (ps_cfg->u4_enable_satqd == DEFAULT_ENABLE_SATQD) ? + DEFAULT_MIN_SAD_ENABLE : + DEFAULT_MIN_SAD_DISABLE; + ps_cfg->u4_srch_rng_x = DEFAULT_SRCH_RNG_X; + ps_cfg->u4_srch_rng_y = DEFAULT_SRCH_RNG_Y; + ps_cfg->u4_i_frm_interval = DEFAULT_I_INTERVAL; + ps_cfg->u4_idr_frm_interval = DEFAULT_IDR_INTERVAL; + ps_cfg->u4_num_b_frames = DEFAULT_B_FRAMES; + ps_cfg->u4_disable_deblock_level = DEFAULT_DISABLE_DEBLK_LEVEL; + ps_cfg->e_profile = DEFAULT_PROFILE; + ps_cfg->u4_timestamp_low = 0; + ps_cfg->u4_timestamp_high = 0; + ps_cfg->u4_is_valid = 1; + ps_cfg->e_cmd = IVE_CMD_CT_NA; + ps_cfg->i4_wd_mbs = ps_cfg->u4_max_wd >> 4; + ps_cfg->i4_ht_mbs = ps_cfg->u4_max_ht >> 4; + ps_cfg->u4_entropy_coding_mode = CAVLC; + ps_cfg->u4_weighted_prediction = 0; + ps_cfg->u4_constrained_intra_pred = 0; + ps_cfg->u4_pic_info_type = 0; + ps_cfg->u4_mb_info_type = 0; + + return ret; +} + +/** +******************************************************************************* +* +* @brief +* Initialize encoder context. This will be called by init_mem_rec and during +* codec reset +* +* @par Description: +* Initializes the context +* +* @param[in] ps_codec +* Codec context pointer +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static WORD32 ih264e_init(codec_t *ps_codec) +{ + /* enc config param set */ + cfg_params_t *ps_cfg = &(ps_codec->s_cfg); + + /* temp var */ + WORD32 i; + + /* coded pic count */ + ps_codec->i4_coded_pic_cnt = 0; + + /* Number of API calls to encode are made */ + ps_codec->i4_encode_api_call_cnt = -1; + + /* Indicates no header has been generated yet */ + ps_codec->u4_header_generated = 0; + + /* Number of pictures encoded */ + ps_codec->i4_pic_cnt = -1; + + /* Number of threads created */ + ps_codec->i4_proc_thread_cnt = 0; + + /* ctl mutex init */ + ithread_mutex_init(ps_codec->pv_ctl_mutex); + + /* Set encoder chroma format */ + ps_codec->e_codec_color_format = + (ps_cfg->e_inp_color_fmt == IV_YUV_420SP_VU) ? + IV_YUV_420SP_VU : IV_YUV_420SP_UV; + + /* Number of continuous frames where deblocking was disabled */ + ps_codec->i4_disable_deblk_pic_cnt = 0; + + /* frame num */ + ps_codec->i4_frame_num = -1; + + /* set the current frame type to I frame, since we are going to start encoding*/ + ps_codec->force_curr_frame_type = IV_NA_FRAME; + + /* idr_pic_id */ + ps_codec->i4_idr_pic_id = -1; + + /* Flush mode */ + ps_codec->i4_flush_mode = 0; + + /* Encode header mode */ + ps_codec->i4_header_mode = 0; + + /* Encode generate header */ + ps_codec->i4_gen_header = 0; + + /* To signal successful completion of init */ + ps_codec->i4_init_done = 1; + + /* To signal that at least one picture was decoded */ + ps_codec->i4_first_pic_done = 0; + + /* Reset Codec */ + ps_codec->i4_reset_flag = 0; + + /* Current error code */ + ps_codec->i4_error_code = IH264E_SUCCESS; + + /* threshold residue */ + ps_codec->u4_thres_resi = 1; + + /* inter gating enable */ + ps_codec->u4_inter_gate = 0; + + /* entropy mutex init */ + ithread_mutex_init(ps_codec->pv_entropy_mutex); + + /* sps id */ + ps_codec->i4_sps_id = 0; + + /* sps id */ + ps_codec->i4_pps_id = 0; + + /* Process thread created status */ + memset(ps_codec->ai4_process_thread_created, 0, MAX_PROCESS_THREADS); + + /* Number of MBs processed together */ + ps_codec->i4_proc_nmb = 8; + + /* Previous POC msb */ + ps_codec->i4_prev_poc_msb = 0; + + /* Previous POC lsb */ + ps_codec->i4_prev_poc_lsb = -1; + + /* max Previous POC lsb */ + ps_codec->i4_max_prev_poc_lsb = -1; + + /* sps, pps status */ + { + sps_t *ps_sps = ps_codec->ps_sps_base; + pps_t *ps_pps = ps_codec->ps_pps_base; + + for (i = 0; i < MAX_SPS_CNT; i++) + { + ps_sps->i1_sps_valid = 0; + ps_sps++; + } + + for (i = 0; i < MAX_PPS_CNT; i++) + { + ps_pps->i1_pps_valid = 0; + ps_pps++; + } + } + + { + WORD32 max_mb_rows = ps_cfg->i4_ht_mbs; + + WORD32 num_jobs = max_mb_rows * 2; + WORD32 clz; + + /* Use next power of two number of entries*/ + clz = CLZ(num_jobs); + num_jobs = 1 << (32 - clz); + + /* init process jobq */ + ps_codec->pv_proc_jobq = ih264_list_init( + ps_codec->pv_proc_jobq_buf, + ps_codec->i4_proc_jobq_buf_size, num_jobs, + sizeof(job_t), 10); + RETURN_IF((ps_codec->pv_proc_jobq == NULL), IV_FAIL); + ih264_list_reset(ps_codec->pv_proc_jobq); + + /* init entropy jobq */ + ps_codec->pv_entropy_jobq = ih264_list_init( + ps_codec->pv_entropy_jobq_buf, + ps_codec->i4_entropy_jobq_buf_size, num_jobs, + sizeof(job_t), 10); + RETURN_IF((ps_codec->pv_entropy_jobq == NULL), IV_FAIL); + ih264_list_reset(ps_codec->pv_entropy_jobq); + } + + /* Update the jobq context to all the threads */ + for (i = 0; i < MAX_PROCESS_CTXT; i++) + { + ps_codec->as_process[i].pv_proc_jobq = ps_codec->pv_proc_jobq; + ps_codec->as_process[i].pv_entropy_jobq = ps_codec->pv_entropy_jobq; + + /* i4_id always stays between 0 and MAX_PROCESS_THREADS */ + ps_codec->as_process[i].i4_id = + (i >= MAX_PROCESS_THREADS) ? + (i - MAX_PROCESS_THREADS) : i; + ps_codec->as_process[i].ps_codec = ps_codec; + + ps_codec->as_process[i].s_entropy.pv_proc_jobq = ps_codec->pv_proc_jobq; + ps_codec->as_process[i].s_entropy.pv_entropy_jobq = + ps_codec->pv_entropy_jobq; + ps_codec->as_process[i].s_entropy.i4_abs_pic_order_cnt = -1; + } + + /* Initialize MV Bank buffer manager */ + ps_codec->pv_mv_buf_mgr = ih264_buf_mgr_init(ps_codec->pv_mv_buf_mgr_base); + + /* Initialize Picture buffer manager for reference buffers*/ + ps_codec->pv_ref_buf_mgr = ih264_buf_mgr_init( + ps_codec->pv_ref_buf_mgr_base); + + /* Initialize Picture buffer manager for input buffers*/ + ps_codec->pv_inp_buf_mgr = ih264_buf_mgr_init( + ps_codec->pv_inp_buf_mgr_base); + + /* Initialize buffer manager for output buffers*/ + ps_codec->pv_out_buf_mgr = ih264_buf_mgr_init( + ps_codec->pv_out_buf_mgr_base); + + /* buffer cnt in buffer manager */ + ps_codec->i4_inp_buf_cnt = 0; + ps_codec->i4_out_buf_cnt = 0; + ps_codec->i4_ref_buf_cnt = 0; + + ps_codec->ps_pic_buf = (pic_buf_t *) ps_codec->pv_pic_buf_base; + memset(ps_codec->ps_pic_buf, 0, BUF_MGR_MAX_CNT * sizeof(pic_buf_t)); + + /* Initialize dpb manager */ + ih264_dpb_mgr_init((dpb_mgr_t*) ps_codec->pv_dpb_mgr); + + memset(ps_codec->as_ref_set, 0, + sizeof(ref_set_t) * (MAX_DPB_SIZE + MAX_CTXT_SETS)); + for (i = 0; i < (MAX_DPB_SIZE + MAX_CTXT_SETS); i++) + { + ps_codec->as_ref_set[i].i4_pic_cnt = -1; + } + + /* fn ptr init */ + ih264e_init_function_ptr(ps_codec); + + /* reset status flags */ + for (i = 0; i < MAX_CTXT_SETS; i++) + { + ps_codec->au4_entropy_thread_active[i] = 0; + ps_codec->ai4_pic_cnt[i] = -1; + + ps_codec->s_rate_control.pre_encode_skip[i] = 0; + ps_codec->s_rate_control.post_encode_skip[i] = 0; + } + + ps_codec->s_rate_control.num_intra_in_prev_frame = 0; + ps_codec->s_rate_control.i4_avg_activity = 0; + + return IV_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Gets number of memory records required by the codec +* +* @par Description: +* Gets codec memory requirements +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @returns status +* +* @remarks +* +******************************************************************************* +*/ +static WORD32 ih264e_get_num_rec(void *pv_api_ip, void *pv_api_op) +{ + UNUSED(pv_api_ip); + /* api call I/O structures */ + ih264e_num_mem_rec_op_t *ps_op = pv_api_op; + + ps_op->s_ive_op.u4_num_mem_rec = MEM_REC_CNT; + + return IV_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Fills memory records of the codec +* +* @par Description: +* Fills codec memory requirements +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static WORD32 ih264e_fill_num_mem_rec(void *pv_api_ip, void *pv_api_op) +{ + /* api call I/O structures */ + ih264e_fill_mem_rec_ip_t *ps_ip = pv_api_ip; + ih264e_fill_mem_rec_op_t *ps_op = pv_api_op; + + /* profile / level info */ + WORD32 level; + WORD32 num_reorder_frames; + WORD32 num_ref_frames; + + /* mem records */ + WORD32 no_of_mem_rec; + iv_mem_rec_t *ps_mem_rec_base, *ps_mem_rec; + + /* frame dimensions */ + WORD32 max_wd_luma, max_ht_luma; + WORD32 max_mb_rows, max_mb_cols, max_mb_cnt; + + /* temp var */ + WORD32 i; + + /* error status */ + IV_STATUS_T status = IV_SUCCESS; + + /* profile / level info */ + level = ps_ip->s_ive_ip.u4_max_level; + num_reorder_frames = ps_ip->s_ive_ip.u4_max_reorder_cnt; + num_ref_frames = ps_ip->s_ive_ip.u4_max_ref_cnt; + + /* mem records */ + ps_mem_rec_base = ps_ip->s_ive_ip.ps_mem_rec; + no_of_mem_rec = ps_ip->s_ive_ip.u4_num_mem_rec; + + /* frame dimensions */ + max_ht_luma = ps_ip->s_ive_ip.u4_max_ht; + max_wd_luma = ps_ip->s_ive_ip.u4_max_wd; + max_ht_luma = ALIGN16(max_ht_luma); + max_wd_luma = ALIGN16(max_wd_luma); + max_mb_rows = max_ht_luma / MB_SIZE; + max_mb_cols = max_wd_luma / MB_SIZE; + max_mb_cnt = max_mb_rows * max_mb_cols; + + /* validate params */ + if ((level < MIN_LEVEL) || (level > MAX_LEVEL)) + { + ps_op->s_ive_op.u4_error_code |= IH264E_CODEC_LEVEL_NOT_SUPPORTED; + level = MAX_LEVEL; + } + + if (num_ref_frames > MAX_REF_CNT) + { + ps_op->s_ive_op.u4_error_code |= IH264E_NUM_REF_UNSUPPORTED; + num_ref_frames = MAX_REF_CNT; + } + + if (num_reorder_frames > MAX_REF_CNT) + { + ps_op->s_ive_op.u4_error_code |= IH264E_NUM_REORDER_UNSUPPORTED; + num_reorder_frames = MAX_REF_CNT; + } + + /* Set all memory records as persistent and alignment as 128 by default */ + ps_mem_rec = ps_mem_rec_base; + for (i = 0; i < no_of_mem_rec; i++) + { + ps_mem_rec->u4_mem_alignment = 128; + ps_mem_rec->e_mem_type = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM; + ps_mem_rec++; + } + + /************************************************************************ + * Request memory for h264 encoder handle * + ***********************************************************************/ + ps_mem_rec = &ps_mem_rec_base[MEM_REC_IV_OBJ]; + { + ps_mem_rec->u4_mem_size = sizeof(iv_obj_t); + } + DEBUG("\nMemory record Id %d = %d \n", MEM_REC_IV_OBJ, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * Request memory for h264 encoder context * + ***********************************************************************/ + ps_mem_rec = &ps_mem_rec_base[MEM_REC_CODEC]; + { + ps_mem_rec->u4_mem_size = sizeof(codec_t); + } + DEBUG("\nMemory record Id %d = %d \n", MEM_REC_CODEC, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * Request memory for entropy context * + * In multi core encoding, each row is assumed to be launched on a * + * thread. The rows below can only start after its neighbors are coded * + * The status of an mb coded/uncoded is signaled via entropy map. * + * 1. One word32 to store skip run cnt * + * 2. mb entropy map (mb status entropy coded/uncoded). The size* + * of the entropy map is max mb cols. Further allocate one * + * more additional row to evade checking for row -1. * + * 3. size of bit stream buffer to store bit stream ctxt. * + * 4. Entropy coding is dependent on nnz coefficient count for * + * the neighbor blocks. It is sufficient to maintain one row * + * worth of nnz as entropy for lower row waits on entropy map* + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[MEM_REC_ENTROPY]; + { + /* total size of the mem record */ + WORD32 total_size = 0; + + /* size of skip mb run */ + total_size += sizeof(WORD32); + total_size = ALIGN8(total_size); + + /* size in bytes to store entropy status of an entire frame */ + total_size += (max_mb_cols * max_mb_rows); + /* add an additional 1 row of bytes to evade the special case of row 0 */ + total_size += max_mb_cols; + total_size = ALIGN128(total_size); + + /* size of bit stream buffer */ + total_size += sizeof(bitstrm_t); + total_size = ALIGN128(total_size); + + /* top nnz luma */ + total_size += (max_mb_cols * 4 * sizeof(UWORD8)); + total_size = ALIGN128(total_size); + + /* top nnz cbcr */ + total_size += (max_mb_cols * 4 * sizeof(UWORD8)); + total_size = ALIGN128(total_size); + + /* total size per each proc ctxt */ + total_size *= MAX_CTXT_SETS; + + ps_mem_rec->u4_mem_size = total_size; + } + DEBUG("\nMemory record Id %d = %d \n", MEM_REC_ENTROPY, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * The residue coefficients that needs to be entropy coded are packed * + * at a buffer space by the proc threads. The entropy thread shall * + * read from the buffer space, unpack them and encode the same. The * + * buffer space required to pack a row of mbs are as follows. * + * Assuming transform_8x8_flag is disabled, * + * In the worst case, 1 mb contains 1 dc 4x4 luma sub block, followed * + * by 16 ac 4x4 luma sub blocks, 2 dc chroma 2x2 sub blocks, followed * + * by 8 ac 4x4 chroma sub blocks. * + * For the sake of simplicity we assume that all sub blocks are of * + * type 4x4. The packing of each 4x4 is depicted by the structure * + * tu_sblk_coeff_data_t * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[MEM_REC_MB_COEFF_DATA]; + { + /* temp var */ + WORD32 size = 0; + + /* size of coeff data of 1 mb */ + size += sizeof(tu_sblk_coeff_data_t) * MAX_4x4_SUBBLKS; + + /* size of coeff data of 1 row of mb's */ + size *= max_mb_cols; + + /* align to avoid any false sharing across threads */ + size = ALIGN64(size); + + /* size for one full frame */ + size *= max_mb_rows; + + /* size of each proc buffer set (ping, pong) */ + size *= MAX_CTXT_SETS; + + ps_mem_rec->u4_mem_size = size; + } + DEBUG("\nMemory record Id %d = %d \n", MEM_REC_MB_COEFF_DATA, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * while encoding an mb, the mb header data is signaled to the entropy* + * thread by writing to a buffer space. the size of header data per mb * + * is assumed to be 40 bytes * + * TODO: revisit this inference * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[MEM_REC_MB_HEADER_DATA]; + { + /* temp var */ + WORD32 size; + + /* size per MB */ + size = 40; + + /* size for 1 row of mbs */ + size = size * max_mb_cols; + + /* align to avoid any false sharing across threads */ + size = ALIGN64(size); + + /* size for one full frame */ + size *= max_mb_rows; + + /* size of each proc buffer set (ping, pong) */ + size *= MAX_CTXT_SETS; + + ps_mem_rec->u4_mem_size = size; + } + DEBUG("\nMemory record Id %d = %d \n", MEM_REC_MB_HEADER_DATA, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * Size for holding mv_buf_t for each MV Bank. * + * Note this allocation is done for BUF_MGR_MAX_CNT instead of * + * MAX_DPB_SIZE or max_dpb_size for following reasons * + * max_dpb_size will be based on max_wd and max_ht * + * For higher max_wd and max_ht this number will be smaller than * + * MAX_DPB_SIZE But during actual initialization number of buffers * + * allocated can be more. * + * * + * One extra MV Bank is needed to hold current pics MV bank. * + * Since this is only a structure allocation and not actual buffer * + * allocation, it is allocated for BUF_MGR_MAX_CNT entries * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[MEM_REC_MVBANK]; + { + /* max luma samples */ + WORD32 max_luma_samples = 0; + + /* determine max luma samples */ + for (i = 0; i < 16; i++) + if (level ==(WORD32)gas_ih264_lvl_tbl[i].u4_level_idc) + max_luma_samples = gas_ih264_lvl_tbl[i].u4_max_fs + << (BLK_SIZE + BLK_SIZE); + + ps_mem_rec->u4_mem_size = ih264_buf_mgr_size(); + + /************************************************************************ + * Allocate for pu_map, enc_pu_t and pic_pu_idx for each MV bank * + * Note: Number of luma samples is not max_wd * max_ht here, instead it * + * is set to maximum number of luma samples allowed at the given level. * + * This is done to ensure that any stream with width and height lesser * + * than max_wd and max_ht is supported. Number of buffers required can * + * be greater for lower width and heights at a given level and this * + * increased number of buffers might require more memory than what * + * max_wd and max_ht buffer would have required Also note one extra * + * buffer is allocated to store current pictures MV bank. * + ***********************************************************************/ + + ps_mem_rec->u4_mem_size += BUF_MGR_MAX_CNT * sizeof(mv_buf_t); + + ps_mem_rec->u4_mem_size += (num_ref_frames + num_reorder_frames + + MAX_CTXT_SETS) + * ih264e_get_pic_mv_bank_size(max_luma_samples); + } + DEBUG("\nMemory record Id %d = %d \n", MEM_REC_MVBANK, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * While encoding inter slices, to compute the cost of encoding an mb * + * with the mv's at hand, we employ the expression cost = sad + lambda * + * x mv_bits. Here mv_bits is the total number of bits taken to represe* + * nt the mv in the stream. The mv bits for all the possible mv are * + * stored in the look up table. The mem record for this look up table * + * is given below. * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[MEM_REC_MVBITS]; + { + /* max srch range x */ + UWORD32 u4_srch_range_x = ps_ip->s_ive_ip.u4_max_srch_rng_x; + + /* max srch range y */ + UWORD32 u4_srch_range_y = ps_ip->s_ive_ip.u4_max_srch_rng_y; + + /* max srch range */ + UWORD32 u4_max_srch_range = MAX(u4_srch_range_x, u4_srch_range_y); + + /* due to subpel */ + u4_max_srch_range <<= 2; + + /* due to mv on either direction */ + u4_max_srch_range = (u4_max_srch_range << 1); + + /* due to pred mv + zero */ + u4_max_srch_range = (u4_max_srch_range << 1) + 1; + + u4_max_srch_range = ALIGN128(u4_max_srch_range); + + ps_mem_rec->u4_mem_size = u4_max_srch_range; + } + DEBUG("\nMemory record Id %d = %d \n", MEM_REC_MVBITS, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * Request memory for SPS * + ***********************************************************************/ + ps_mem_rec = &ps_mem_rec_base[MEM_REC_SPS]; + { + ps_mem_rec->u4_mem_size = MAX_SPS_CNT * sizeof(sps_t); + } + DEBUG("\nMemory record Id %d = %d \n", MEM_REC_SPS, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * Request memory for PPS * + ***********************************************************************/ + ps_mem_rec = &ps_mem_rec_base[MEM_REC_PPS]; + { + ps_mem_rec->u4_mem_size = MAX_PPS_CNT * sizeof(pps_t); + } + DEBUG("\nMemory record Id %d = %d \n", MEM_REC_PPS, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * Request memory for Slice Header * + ***********************************************************************/ + ps_mem_rec = &ps_mem_rec_base[MEM_REC_SLICE_HDR]; + { + ps_mem_rec->u4_mem_size = MAX_CTXT_SETS * MAX_SLICE_HDR_CNT + * sizeof(slice_header_t); + } + DEBUG("\nMemory record Id %d = %d \n", MEM_REC_SLICE_HDR, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * Request memory for Adaptive Intra Refresh * + ***********************************************************************/ + ps_mem_rec = &ps_mem_rec_base[MEM_REC_AIR_MAP]; + { + /* total size of the mem record */ + WORD32 total_size = 0; + + /* intra coded map */ + total_size += max_mb_cnt; + total_size *= MAX_CTXT_SETS; + + /* mb refresh map */ + total_size += sizeof(UWORD16) * max_mb_cnt; + + /* alignment */ + total_size = ALIGN128(total_size); + + ps_mem_rec->u4_mem_size = total_size; + } + DEBUG("\nMemory record Id %d = %d \n", MEM_REC_AIR_MAP, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * In multi slice encoding, this memory record helps tracking the start* + * of slice with reference to mb. * + * MEM RECORD for holding * + * 1. mb slice map * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[MEM_REC_SLICE_MAP]; + { + /* total size of the mem record */ + WORD32 total_size = 0; + + /* size in bytes to slice index of all mbs of a frame */ + total_size = ALIGN64(max_mb_cnt); + + /* total size per each proc ctxt */ + total_size *= MAX_CTXT_SETS; + ps_mem_rec->u4_mem_size = total_size; + } + DEBUG("\nMemory record Id %d = %d \n", MEM_REC_SLICE_MAP, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * Request memory to hold thread handles for each processing thread * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[MEM_REC_THREAD_HANDLE]; + { + WORD32 handle_size = ithread_get_handle_size(); + + ps_mem_rec->u4_mem_size = MAX_PROCESS_THREADS * handle_size; + } + DEBUG("\nMemory record Id %d = %d \n", MEM_REC_THREAD_HANDLE, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * Request memory to hold mutex for control calls * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[MEM_REC_CTL_MUTEX]; + { + ps_mem_rec->u4_mem_size = ithread_get_mutex_lock_size(); + } + DEBUG("\nMemory record Id %d = %d \n", MEM_REC_CTL_MUTEX, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * Request memory to hold mutex for entropy calls * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[MEM_REC_ENTROPY_MUTEX]; + { + ps_mem_rec->u4_mem_size = ithread_get_mutex_lock_size(); + } + DEBUG("\nMemory record Id %d = %d \n", MEM_REC_ENTROPY_MUTEX, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * Request memory to hold process jobs * + ***********************************************************************/ + ps_mem_rec = &ps_mem_rec_base[MEM_REC_PROC_JOBQ]; + { + /* One process job per row of MBs */ + /* Allocate for two pictures, so that wrap around can be handled easily */ + WORD32 num_jobs = max_mb_rows * 2; + + WORD32 job_queue_size = ih264_list_size(num_jobs, sizeof(job_t)); + + ps_mem_rec->u4_mem_size = job_queue_size; + } + DEBUG("\nMemory record Id %d = %d \n", MEM_REC_PROC_JOBQ, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * Request memory to hold entropy jobs * + ***********************************************************************/ + ps_mem_rec = &ps_mem_rec_base[MEM_REC_ENTROPY_JOBQ]; + { + /* One process job per row of MBs */ + /* Allocate for two pictures, so that wrap around can be handled easily */ + WORD32 num_jobs = max_mb_rows * 2; + + WORD32 job_queue_size = ih264_list_size(num_jobs, sizeof(job_t)); + + ps_mem_rec->u4_mem_size = job_queue_size; + } + DEBUG("\nMemory record Id %d = %d \n", MEM_REC_ENTROPY_JOBQ, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * In multi core encoding, each row is assumed to be launched on a * + * thread. The rows below can only start after its neighbors are coded * + * The status of an mb coded/uncoded is signaled via proc map. * + * MEM RECORD for holding * + * 1. mb proc map (mb status core coded/uncoded) * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[MEM_REC_PROC_MAP]; + { + /* total size of the mem record */ + WORD32 total_size = 0; + + /* size in bytes to mb core coding status of an entire frame */ + total_size = max_mb_cnt; + + /* add an additional 1 row of bytes to evade the special case of row 0 */ + total_size += max_mb_cols; + + /* total size per each proc ctxt */ + total_size *= MAX_CTXT_SETS; + ps_mem_rec->u4_mem_size = total_size; + } + DEBUG("\nMemory record Id %d = %d \n", MEM_REC_PROC_MAP, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * mem record for holding a particular MB is deblocked or not * + * 1. mb deblk map (mb status deblocked/not deblocked) * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[MEM_REC_DBLK_MAP]; + { + /* total size of the mem record */ + WORD32 total_size = 0; + + /* size in bytes to mb core coding status of an entire frame */ + total_size = max_mb_cnt; + + /* add an additional 1 row of bytes to evade the special case of row 0 */ + total_size += max_mb_cols; + + total_size = ALIGN64(total_size); + + /* total size per each proc ctxt */ + total_size *= MAX_CTXT_SETS; + ps_mem_rec->u4_mem_size = total_size; + } + DEBUG("\nMemory record Id %d = %d \n", MEM_REC_DBLK_MAP, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * mem record for holding a particular MB's me is done or not * + * 1. mb me map * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[MEM_REC_ME_MAP]; + { + /* total size of the mem record */ + WORD32 total_size = 0; + + /* size in bytes to mb core coding status of an entire frame */ + total_size = max_mb_cnt; + + /* add an additional 1 row of bytes to evade the special case of row 0 */ + total_size += max_mb_cols; + + /* total size per each proc ctxt */ + total_size *= MAX_CTXT_SETS; + + ps_mem_rec->u4_mem_size = total_size; + } + DEBUG("\nMemory record Id %d = %d \n", MEM_REC_ME_MAP, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * size for holding dpb manager context * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[MEM_REC_DPB_MGR]; + { + ps_mem_rec->u4_mem_size = sizeof(dpb_mgr_t); + } + DEBUG("\nMemory record Id %d = %d \n", MEM_REC_DPB_MGR, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * luma or chroma core coding involves mb estimation, error computation* + * between the estimated singnal and the actual signal, transform the * + * error, quantize the error, then inverse transform and inverse quant * + * ize the residue and add the result back to estimated signal. * + * To perform all these, a set of temporary buffers are needed. * + * MEM RECORD for holding scratch buffers * + * 1. prediction buffer used during mb mode analysis * + * 2 temp. reference buffer when intra 4x4 with rdopt on is * + * enabled * + * - when intra 4x4 is enabled, rdopt is on, to store the * + * reconstructed values and use them later this temp. buffer * + * is used. * + * 3. prediction buffer used during intra mode analysis * + * 4. prediction buffer used during intra 16x16 plane mode * + * analysis + * 5. prediction buffer used during intra chroma mode analysis * + * 6. prediction buffer used during intra chroma 16x16 plane * + * mode analysis + * 7. forward transform output buffer * + * - to store the error between estimated and the actual inp * + * ut and to store the fwd transformed quantized output * + * 8. forward transform output buffer * + * - when intra 4x4 is enabled, rdopt is on, to store the * + * fwd transform values and use them later this temp. buffer * + * is used. * + * 9. temporary buffer for inverse transform * + * - temporary buffer used in inverse transform and inverse * + * quantization * + * A. Buffers for holding half_x , half_y and half_xy planes * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[MEM_REC_PROC_SCRATCH]; + { + WORD32 total_size = 0; + + /* size to hold prediction buffer */ + total_size += sizeof(UWORD8) * 16 * 16; + total_size = ALIGN64(total_size); + + /* size to hold recon for intra 4x4 buffer */ + total_size += sizeof(UWORD8) * 16 * 16; + total_size = ALIGN64(total_size); + + /* prediction buffer intra 16x16 */ + total_size += sizeof(UWORD8) * 16 * 16; + total_size = ALIGN64(total_size); + + /* prediction buffer intra 16x16 plane*/ + total_size += sizeof(UWORD8) * 16 * 16; + total_size = ALIGN64(total_size); + + /* prediction buffer intra chroma*/ + total_size += sizeof(UWORD8) * 16 * 8; + total_size = ALIGN64(total_size); + + /* prediction buffer intra chroma plane*/ + total_size += sizeof(UWORD8) * 16 * 8; + total_size = ALIGN64(total_size); + + /* size to hold fwd transform output */ + total_size += sizeof(WORD16) * SIZE_TRANS_BUFF; + total_size = ALIGN64(total_size); + + /* size to hold fwd transform output */ + total_size += sizeof(WORD16) * SIZE_TRANS_BUFF; + total_size = ALIGN64(total_size); + + /* size to hold temporary data during inverse transform */ + total_size += sizeof(WORD32) * SIZE_TMP_BUFF_ITRANS; + total_size = ALIGN64(total_size); + + /* Buffers for holding half_x , half_y and half_xy planes */ + total_size += sizeof(UWORD8) * (HP_BUFF_WD * HP_BUFF_HT); + total_size = ALIGN64(total_size); + + total_size += sizeof(UWORD8) * (HP_BUFF_WD * HP_BUFF_HT); + total_size = ALIGN64(total_size); + + total_size += sizeof(UWORD8) * (HP_BUFF_WD * HP_BUFF_HT); + total_size = ALIGN64(total_size); + + /* Allocate for each process thread */ + total_size *= MAX_PROCESS_CTXT; + + ps_mem_rec->u4_mem_size = total_size; + } + DEBUG("\nMemory record Id %d = %d \n", MEM_REC_PROC_SCRATCH, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * When transform_8x8_flag is disabled, the size of a sub block is * + * 4x4 and when the transform_8x8_flag is enabled the size of the sub * + * block is 8x8. The threshold matrix and the forward scaling list * + * is of the size of the sub block. * + * MEM RECORD for holding * + * 1. quantization parameters for plane y, cb, cr * + * - threshold matrix for quantization * + * - forward weight matrix * + * - satqd threshold matrix * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[MEM_REC_QUANT_PARAM]; + { + /* total size of the mem record */ + WORD32 total_size = 0; + + /* quantization parameter list for planes y,cb and cr */ + total_size += ALIGN64(sizeof(quant_params_t)) * 3; + + /* size of threshold matrix for quantization + * (assuming the transform_8x8_flag is disabled). + * for all 3 planes */ + total_size += ALIGN64(sizeof(WORD16) * 4 * 4) * 3; + + /* size of forward weight matrix for quantization + * (assuming the transform_8x8_flag is disabled). + * for all 3 planes */ + total_size += ALIGN64(sizeof(WORD16) * 4 * 4) * 3; + + /* Size for SATDQ threshold matrix for palnes y, cb and cr */ + total_size += ALIGN64(sizeof(UWORD16) * 9) * 3; + + /* total size per each proc thread */ + total_size *= MAX_PROCESS_CTXT; + + ps_mem_rec->u4_mem_size = total_size; + } + DEBUG("\nMemory record Id %d = %d \n", MEM_REC_QUANT_PARAM, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * While computing blocking strength for the current mb, the csbp, mb * + * type for the neighboring mbs are necessary. memtab for storing top * + * row mbtype and csbp is evaluated here. * + * * + * when encoding intra 4x4 or intra 8x8 the submb types are estimated * + * and sent. The estimation is dependent on neighbor mbs. For this * + * store the top row sub mb types for intra mbs * + * * + * During motion vector prediction, the curr mb mv is predicted from * + * neigbors left, top, top right and sometimes top left depending on * + * the availability. The top and top right content is accessed from * + * the memtab specified below. * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[MEM_REC_TOP_ROW_SYN_INFO]; + { + /* total size of the mem record */ + WORD32 total_size = 0; + + /* size in bytes to store 1 row of mb_info_t */ + /* one additional mb, to avoid checking end of row condition */ + total_size += (max_mb_cols + 1) * sizeof(mb_info_t); + + /* size in bytes to store 1 row of intra macroblock sub modes */ + total_size += max_mb_cols * sizeof(UWORD8) * 16; + + /* size in bytes to store 1 row + 1 of enc_pu_t */ + /* one additional mb, to avoid checking end of row condition */ + total_size += (max_mb_cols + 1) * sizeof(enc_pu_t); + + /* total size per proc ctxt */ + total_size = ALIGN128(total_size); + + /* total size per each proc ctxt */ + total_size *= MAX_CTXT_SETS; + ps_mem_rec->u4_mem_size = total_size; + } + DEBUG("\nMemory record Id %d = %d \n", MEM_REC_TOP_ROW_SYN_INFO, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * When transform_8x8_flag is disabled, the mb is partitioned into * + * 4 sub blocks. This corresponds to 1 vertical left edge and 1 * + * vertical inner edge, 1 horizontal top edge and 1 horizontal * + * inner edge per mb. Further, When transform_8x8_flag is enabled, * + * the mb is partitioned in to 16 sub blocks. This corresponds to * + * 1 vertical left edge and 3 vertical inner edges, 1 horizontal top * + * edge and 3 horizontal inner edges per mb. * + * MEM RECORD for holding * + * 1. vertical edge blocking strength * + * 2. horizontal edge blocking strength * + * 3. mb qp * + * all are frame level * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[MEM_REC_BS_QP]; + { + /* total size of the mem record */ + WORD32 total_size = 0; + + /* size in bytes to store vertical edge bs, horizontal edge bs and qp of every mb*/ + WORD32 vert_bs_size, horz_bs_size, qp_size; + + /* vertical edge bs = total number of vertical edges * number of bytes per each edge */ + /* total num of v edges = total mb * 4 (assuming transform_8x8_flag = 0), + * each edge is formed by 4 pairs of subblks, requiring 4 bytes to storing bs */ + vert_bs_size = ALIGN64(max_mb_cnt * 4 * 4); + + /* horizontal edge bs = total number of horizontal edges * number of bytes per each edge */ + /* total num of h edges = total mb * 4 (assuming transform_8x8_flag = 0), + * each edge is formed by 4 pairs of subblks, requiring 4 bytes to storing bs */ + horz_bs_size = ALIGN64(max_mb_cnt * 4 * 4); + + /* qp of each mb requires 1 byte */ + qp_size = ALIGN64(max_mb_cnt); + + /* total size */ + total_size = vert_bs_size + horz_bs_size + qp_size; + + /* total size per each proc ctxt */ + total_size *= MAX_CTXT_SETS; + + ps_mem_rec->u4_mem_size = total_size; + } + DEBUG("\nMemory record Id %d = %d \n", MEM_REC_BS_QP, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * size for holding dpb manager context * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[MEM_REC_INP_PIC]; + { + ps_mem_rec->u4_mem_size = ih264_buf_mgr_size(); + } + DEBUG("\nMemory record Id %d = %d \n", MEM_REC_INP_PIC, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * size for holding dpb manager context * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[MEM_REC_OUT]; + { + ps_mem_rec->u4_mem_size = ih264_buf_mgr_size(); + } + DEBUG("\nMemory record Id %d = %d \n", MEM_REC_OUT, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * Size for color space conversion * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[MEM_REC_CSC]; + { + /* We need a total a memory for a single frame of 420 sp, ie + * (wd * ht) for luma and (wd * ht / 2) for chroma*/ + ps_mem_rec->u4_mem_size = MAX_CTXT_SETS + * ((3 * max_ht_luma * max_wd_luma) >> 1); + } + DEBUG("\nMemory record Id %d = %d \n", MEM_REC_CSC, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * Size for holding pic_buf_t for each reference picture * + * Note this allocation is done for BUF_MGR_MAX_CNT instead of * + * MAX_DPB_SIZE or max_dpb_size for following reasons * + * max_dpb_size will be based on max_wd and max_ht * + * For higher max_wd and max_ht this number will be smaller than * + * MAX_DPB_SIZE But during actual initialization number of buffers * + * allocated can be more. * + * * + * Also to handle display depth application can allocate more than * + * what codec asks for in case of non-shared mode * + * Since this is only a structure allocation and not actual buffer * + * allocation, it is allocated for BUF_MGR_MAX_CNT entries * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[MEM_REC_REF_PIC]; + { + ps_mem_rec->u4_mem_size = ih264_buf_mgr_size(); + ps_mem_rec->u4_mem_size += BUF_MGR_MAX_CNT * sizeof(pic_buf_t); + + /************************************************************************ + * Note: Number of luma samples is not max_wd * max_ht here, instead it * + * is set to maximum number of luma samples allowed at the given level. * + * This is done to ensure that any stream with width and height lesser * + * than max_wd and max_ht is supported. Number of buffers required can * + * be greater for lower width and heights at a given level and this * + * increased number of buffers might require more memory than what * + * max_wd and max_ht buffer would have required. Number of buffers is * + * doubled in order to return one frame at a time instead of sending * + * multiple outputs during dpb full case. Also note one extra buffer is * + * allocted to store current picture. * + * * + * Half-pel planes for each reference buffer are allocated along with * + * the reference buffer. So each reference buffer is 4 times the * + * required size. This way buffer management for the half-pel planes is * + * easier and while using the half-pel planes in MC, an offset can be * + * used from a single pointer * + ***********************************************************************/ + ps_mem_rec->u4_mem_size += HPEL_PLANES_CNT + * ih264e_get_total_pic_buf_size( + max_wd_luma * max_ht_luma, level, + PAD_WD, PAD_HT, num_ref_frames, + num_reorder_frames); + } + DEBUG("\nMemory record Id %d = %d \n", MEM_REC_REF_PIC, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * Request memory to hold mem recs to be returned during retrieve call * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[MEM_REC_BACKUP]; + { + ps_mem_rec->u4_mem_size = MEM_REC_CNT * sizeof(iv_mem_rec_t); + } + DEBUG("\nMemory record Id %d = %d \n", MEM_REC_BACKUP, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * size for memory required by NMB info structs and buffer for storing * + * half pel plane * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[MEM_REC_MB_INFO_NMB]; + { + ps_mem_rec->u4_mem_size = MAX_PROCESS_CTXT * MAX_NMB + * (sizeof(mb_info_nmb_t) + + MB_SIZE * MB_SIZE * sizeof(UWORD8)); + } + DEBUG("\nMemory record Id %d = %d \n", MEM_REC_MB_INFO_NMB, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * RC mem records * + ************************************************************************/ + ps_mem_rec = &ps_mem_rec_base[MEM_REC_RC]; + { + ih264e_get_rate_control_mem_tab(NULL, ps_mem_rec, FILL_MEMTAB); + } + DEBUG("\nMemory record Id %d = %d \n", MEM_REC_RC, ps_mem_rec->u4_mem_size); + + /* Each memtab size is aligned to next multiple of 128 bytes */ + /* This is to ensure all the memtabs start at different cache lines */ + ps_mem_rec = ps_mem_rec_base; + for (i = 0; i < MEM_REC_CNT; i++) + { + ps_mem_rec->u4_mem_size = ALIGN128(ps_mem_rec->u4_mem_size); + ps_mem_rec++; + } + + ps_op->s_ive_op.u4_num_mem_rec = MEM_REC_CNT; + + DEBUG("Num mem recs in fill call : %d\n", ps_op->s_ive_op.u4_num_mem_rec); + + return (status); +} + +/** +******************************************************************************* +* +* @brief +* Initializes from mem records passed to the codec +* +* @par Description: +* Initializes pointers based on mem records passed +* +* @param[in] ps_codec_obj +* Pointer to codec object at API level +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static WORD32 ih264e_init_mem_rec(iv_obj_t *ps_codec_obj, + void *pv_api_ip, + void *pv_api_op) +{ + /* api call I/O structures */ + ih264e_init_ip_t *ps_ip = pv_api_ip; + ih264e_init_op_t *ps_op = pv_api_op; + + /* mem records */ + iv_mem_rec_t *ps_mem_rec_base, *ps_mem_rec; + + /* codec variables */ + codec_t * ps_codec; + cfg_params_t *ps_cfg; + + /* frame dimensions */ + WORD32 max_wd_luma, max_ht_luma; + WORD32 max_mb_rows, max_mb_cols, max_mb_cnt; + + /* temp var */ + WORD32 i; + WORD32 status = IV_SUCCESS; + + /* frame dimensions */ + max_ht_luma = ALIGN16(ps_ip->s_ive_ip.u4_max_ht); + max_wd_luma = ALIGN16(ps_ip->s_ive_ip.u4_max_wd); + max_mb_rows = max_ht_luma / MB_SIZE; + max_mb_cols = max_wd_luma / MB_SIZE; + max_mb_cnt = max_mb_rows * max_mb_cols; + + /* mem records */ + ps_mem_rec_base = ps_ip->s_ive_ip.ps_mem_rec; + + /* Init mem records */ + ps_mem_rec = &ps_mem_rec_base[MEM_REC_CODEC]; + { + ps_codec_obj->pv_codec_handle = ps_mem_rec->pv_base; + ps_codec = (codec_t *) (ps_codec_obj->pv_codec_handle); + } + + /* Note this memset can not be done in init() call, since init will called + during reset as well. And calling this during reset will mean all pointers + need to reinitialized */ + memset(ps_codec, 0, sizeof(codec_t)); + + /* Set default Config Params */ + ps_cfg = &ps_codec->s_cfg; + ih264e_set_default_params(ps_cfg); + + /* Update config params as per input */ + ps_cfg->u4_max_wd = ALIGN16(ps_ip->s_ive_ip.u4_max_wd); + ps_cfg->u4_max_ht = ALIGN16(ps_ip->s_ive_ip.u4_max_ht); + ps_cfg->i4_wd_mbs = ps_cfg->u4_max_wd >> 4; + ps_cfg->i4_ht_mbs = ps_cfg->u4_max_ht >> 4; + ps_cfg->u4_max_ref_cnt = ps_ip->s_ive_ip.u4_max_ref_cnt; + ps_cfg->u4_max_reorder_cnt = ps_ip->s_ive_ip.u4_max_reorder_cnt; + ps_cfg->u4_max_level = ps_ip->s_ive_ip.u4_max_level; + ps_cfg->e_inp_color_fmt = ps_ip->s_ive_ip.e_inp_color_fmt; + ps_cfg->e_recon_color_fmt = ps_ip->s_ive_ip.e_recon_color_fmt; + ps_cfg->u4_max_framerate = ps_ip->s_ive_ip.u4_max_framerate; + ps_cfg->u4_max_bitrate = ps_ip->s_ive_ip.u4_max_bitrate; + ps_cfg->u4_max_num_bframes = ps_ip->s_ive_ip.u4_max_num_bframes; + ps_cfg->e_content_type = ps_ip->s_ive_ip.e_content_type; + ps_cfg->u4_max_srch_rng_x = ps_ip->s_ive_ip.u4_max_srch_rng_x; + ps_cfg->u4_max_srch_rng_y = ps_ip->s_ive_ip.u4_max_srch_rng_y; + ps_cfg->e_slice_mode = ps_ip->s_ive_ip.e_slice_mode; + ps_cfg->u4_slice_param = ps_ip->s_ive_ip.u4_slice_param; + ps_cfg->e_arch = ps_ip->s_ive_ip.e_arch; + ps_cfg->e_soc = ps_ip->s_ive_ip.e_soc; + ps_cfg->u4_enable_recon = ps_ip->s_ive_ip.u4_enable_recon; + ps_cfg->e_rc_mode = ps_ip->s_ive_ip.e_rc_mode; + + /* Validate params */ + if ((ps_ip->s_ive_ip.u4_max_level < MIN_LEVEL) + || (ps_ip->s_ive_ip.u4_max_level > MAX_LEVEL)) + { + ps_op->s_ive_op.u4_error_code |= IH264E_CODEC_LEVEL_NOT_SUPPORTED; + ps_cfg->u4_max_level = DEFAULT_MAX_LEVEL; + } + + if (ps_ip->s_ive_ip.u4_max_ref_cnt > MAX_REF_CNT) + { + ps_op->s_ive_op.u4_error_code |= IH264E_NUM_REF_UNSUPPORTED; + ps_cfg->u4_max_ref_cnt = MAX_REF_CNT; + } + + if (ps_ip->s_ive_ip.u4_max_reorder_cnt > MAX_REF_CNT) + { + ps_op->s_ive_op.u4_error_code |= IH264E_NUM_REORDER_UNSUPPORTED; + ps_cfg->u4_max_reorder_cnt = MAX_REF_CNT; + } + + ps_mem_rec = &ps_mem_rec_base[MEM_REC_BACKUP]; + { + ps_codec->ps_mem_rec_backup = (iv_mem_rec_t *) ps_mem_rec->pv_base; + + memcpy(ps_codec->ps_mem_rec_backup, ps_mem_rec_base, + MEM_REC_CNT * sizeof(iv_mem_rec_t)); + } + + ps_mem_rec = &ps_mem_rec_base[MEM_REC_ENTROPY]; + { + /* temp var */ + WORD32 size = 0, offset; + + for (i = 0; i < MAX_PROCESS_CTXT; i++) + { + if (i < MAX_PROCESS_CTXT / 2) + { + /* base ptr */ + UWORD8 *pu1_buf = ps_mem_rec->pv_base; + + /* reset size */ + size = 0; + + /* skip mb run */ + ps_codec->as_process[i].s_entropy.pi4_mb_skip_run = + (void *) (pu1_buf + size); + size += sizeof(WORD32); + size = ALIGN8(size); + + /* entropy map */ + ps_codec->as_process[i].s_entropy.pu1_entropy_map = + (void *) (pu1_buf + size + max_mb_cols); + /* size in bytes to store entropy status of an entire frame */ + size += (max_mb_cols * max_mb_rows); + /* add an additional 1 row of bytes to evade the special case of row 0 */ + size += max_mb_cols; + size = ALIGN128(size); + + /* bit stream ptr */ + ps_codec->as_process[i].s_entropy.ps_bitstrm = (void *) (pu1_buf + + size); + size += sizeof(bitstrm_t); + size = ALIGN128(size); + + /* nnz luma */ + ps_codec->as_process[i].s_entropy.pu1_top_nnz_luma = + (void *) (pu1_buf + size); + size += (max_mb_cols * 4 * sizeof(UWORD8)); + size = ALIGN128(size); + + /* nnz chroma */ + ps_codec->as_process[i].s_entropy.pu1_top_nnz_cbcr = + (void *) (pu1_buf + size); + size += (max_mb_cols * 4 * sizeof(UWORD8)); + size = ALIGN128(size); + offset = size; + } + else + { + /* base ptr */ + UWORD8 *pu1_buf = ps_mem_rec->pv_base; + + /* reset size */ + size = offset; + + /* skip mb run */ + ps_codec->as_process[i].s_entropy.pi4_mb_skip_run = + (void *) (pu1_buf + size); + size += sizeof(WORD32); + size = ALIGN8(size); + + /* entropy map */ + ps_codec->as_process[i].s_entropy.pu1_entropy_map = + (void *) (pu1_buf + size + max_mb_cols); + /* size in bytes to store entropy status of an entire frame */ + size += (max_mb_cols * max_mb_rows); + /* add an additional 1 row of bytes to evade the special case of row 0 */ + size += max_mb_cols; + size = ALIGN128(size); + + /* bit stream ptr */ + ps_codec->as_process[i].s_entropy.ps_bitstrm = (void *) (pu1_buf + + size); + size += sizeof(bitstrm_t); + size = ALIGN128(size); + + /* nnz luma */ + ps_codec->as_process[i].s_entropy.pu1_top_nnz_luma = + (void *) (pu1_buf + size); + size += (max_mb_cols * 4 * sizeof(UWORD8)); + size = ALIGN128(size); + + /* nnz chroma */ + ps_codec->as_process[i].s_entropy.pu1_top_nnz_cbcr = + (void *) (pu1_buf + size); + size += (max_mb_cols * 4 * sizeof(UWORD8)); + size = ALIGN128(size); + } + } + } + + ps_mem_rec = &ps_mem_rec_base[MEM_REC_MB_COEFF_DATA]; + { + /* temp var */ + WORD32 size = 0, size_of_row; + UWORD8 *pu1_buf = ps_mem_rec->pv_base; + + /* size of coeff data of 1 mb */ + size += sizeof(tu_sblk_coeff_data_t) * MAX_4x4_SUBBLKS; + + /* size of coeff data of 1 row of mb's */ + size *= max_mb_cols; + + /* align to avoid false sharing */ + size = ALIGN64(size); + size_of_row = size; + + /* size for one full frame */ + size *= max_mb_rows; + + ps_codec->u4_size_coeff_data = size_of_row; + + for (i = 0; i < MAX_PROCESS_CTXT; i++) + { + if (i < MAX_PROCESS_CTXT / 2) + { + ps_codec->as_process[i].pv_pic_mb_coeff_data = pu1_buf; + ps_codec->as_process[i].s_entropy.pv_pic_mb_coeff_data = + pu1_buf; + } + else + { + ps_codec->as_process[i].pv_pic_mb_coeff_data = pu1_buf + size; + ps_codec->as_process[i].s_entropy.pv_pic_mb_coeff_data = pu1_buf + + size; + } + } + } + + ps_mem_rec = &ps_mem_rec_base[MEM_REC_MB_HEADER_DATA]; + { + /* temp var */ + WORD32 size, size_of_row; + UWORD8 *pu1_buf = ps_mem_rec->pv_base; + + /* size of header data of 1 mb */ + size = 40; + + /* size for 1 row of mbs */ + size = size * max_mb_cols; + + /* align to avoid any false sharing across threads */ + size = ALIGN64(size); + size_of_row = size; + + /* size for one full frame */ + size *= max_mb_rows; + + ps_codec->u4_size_header_data = size_of_row; + + for (i = 0; i < MAX_PROCESS_CTXT; i++) + { + if (i < MAX_PROCESS_CTXT / 2) + { + ps_codec->as_process[i].pv_pic_mb_header_data = pu1_buf; + ps_codec->as_process[i].s_entropy.pv_pic_mb_header_data = + pu1_buf; + } + else + { + ps_codec->as_process[i].pv_pic_mb_header_data = pu1_buf + size; + ps_codec->as_process[i].s_entropy.pv_pic_mb_header_data = + pu1_buf + size; + } + } + } + + ps_mem_rec = &ps_mem_rec_base[MEM_REC_MVBANK]; + { + /* size of buf mgr struct */ + WORD32 size = ih264_buf_mgr_size(); + + /* temp var */ + UWORD8 *pu1_buf = ps_mem_rec->pv_base; + + /* mv buffer mgr */ + ps_codec->pv_mv_buf_mgr_base = pu1_buf; + + /* mv bank */ + ps_codec->pv_mv_bank_buf_base = pu1_buf + size; + ps_codec->i4_total_mv_bank_size = ps_mem_rec->u4_mem_size - size; + } + + ps_mem_rec = &ps_mem_rec_base[MEM_REC_MVBITS]; + { + /* max srch range x */ + UWORD32 u4_srch_range_x = ps_ip->s_ive_ip.u4_max_srch_rng_x; + + /* max srch range y */ + UWORD32 u4_srch_range_y = ps_ip->s_ive_ip.u4_max_srch_rng_y; + + /* max srch range */ + UWORD32 u4_max_srch_range = MAX(u4_srch_range_x, u4_srch_range_y); + + /* temp var */ + UWORD8 *pu1_buf = ps_mem_rec->pv_base; + + /* due to subpel */ + u4_max_srch_range <<= 2; + +// /* due to mv on either direction */ +// u4_max_srch_range = (u4_max_srch_range << 1); + + /* due to pred mv + zero */ + u4_max_srch_range = (u4_max_srch_range << 1) + 1; + + for (i = 0; i < MAX_PROCESS_CTXT; i++) + { + /* me ctxt */ + me_ctxt_t *ps_mem_ctxt = &(ps_codec->as_process[i].s_me_ctxt); + + /* init at zero mv */ + ps_mem_ctxt->pu1_mv_bits = pu1_buf + u4_max_srch_range; + } + } + + ps_mem_rec = &ps_mem_rec_base[MEM_REC_SPS]; + { + ps_codec->ps_sps_base = (sps_t *) ps_mem_rec->pv_base; + } + + ps_mem_rec = &ps_mem_rec_base[MEM_REC_PPS]; + { + ps_codec->ps_pps_base = (pps_t *) ps_mem_rec->pv_base; + } + + ps_mem_rec = &ps_mem_rec_base[MEM_REC_SLICE_HDR]; + { + ps_codec->ps_slice_hdr_base = ps_mem_rec->pv_base; + + for (i = 0; i < MAX_PROCESS_CTXT; i++) + { + if (i < MAX_PROCESS_CTXT / 2) + { + ps_codec->as_process[i].ps_slice_hdr_base = ps_mem_rec->pv_base; + } + else + { + /* temp var */ + WORD32 size = MAX_SLICE_HDR_CNT * sizeof(slice_header_t); + void *pv_buf = (UWORD8 *) ps_mem_rec->pv_base + size; + + ps_codec->as_process[i].ps_slice_hdr_base = pv_buf; + } + } + } + + ps_mem_rec = &ps_mem_rec_base[MEM_REC_AIR_MAP]; + { + /* temp var */ + UWORD8 *pu1_buf = ps_mem_rec->pv_base; + + for (i = 0; i < MAX_PROCESS_CTXT; i++) + { + if (i < MAX_PROCESS_CTXT / 2) + { + ps_codec->as_process[i].pu1_is_intra_coded = pu1_buf; + } + else + { + ps_codec->as_process[i].pu1_is_intra_coded = pu1_buf + + max_mb_cnt; + } + } + + ps_codec->pu2_intr_rfrsh_map = (UWORD16 *) (pu1_buf + max_mb_cnt * 2); + } + + ps_mem_rec = &ps_mem_rec_base[MEM_REC_SLICE_MAP]; + { + /* pointer to storage space */ + UWORD8 *pu1_buf_ping, *pu1_buf_pong; + + /* init pointer */ + pu1_buf_ping = ps_mem_rec->pv_base; + pu1_buf_pong = pu1_buf_ping + ALIGN64(max_mb_cnt); + + for (i = 0; i < MAX_PROCESS_CTXT; i++) + { + if (i < MAX_PROCESS_CTXT / 2) + { + ps_codec->as_process[i].pu1_slice_idx = pu1_buf_ping; + } + else + { + ps_codec->as_process[i].pu1_slice_idx = pu1_buf_pong; + } + } + } + + ps_mem_rec = &ps_mem_rec_base[MEM_REC_THREAD_HANDLE]; + { + WORD32 handle_size = ithread_get_handle_size(); + + for (i = 0; i < MAX_PROCESS_THREADS; i++) + { + ps_codec->apv_proc_thread_handle[i] = (UWORD8 *) ps_mem_rec->pv_base + + (i * handle_size); + } + } + + ps_mem_rec = &ps_mem_rec_base[MEM_REC_CTL_MUTEX]; + { + ps_codec->pv_ctl_mutex = ps_mem_rec->pv_base; + } + + ps_mem_rec = &ps_mem_rec_base[MEM_REC_ENTROPY_MUTEX]; + { + ps_codec->pv_entropy_mutex = ps_mem_rec->pv_base; + } + + ps_mem_rec = &ps_mem_rec_base[MEM_REC_PROC_JOBQ]; + { + ps_codec->pv_proc_jobq_buf = ps_mem_rec->pv_base; + ps_codec->i4_proc_jobq_buf_size = ps_mem_rec->u4_mem_size; + } + + ps_mem_rec = &ps_mem_rec_base[MEM_REC_ENTROPY_JOBQ]; + { + ps_codec->pv_entropy_jobq_buf = ps_mem_rec->pv_base; + ps_codec->i4_entropy_jobq_buf_size = ps_mem_rec->u4_mem_size; + } + + ps_mem_rec = &ps_mem_rec_base[MEM_REC_PROC_MAP]; + { + /* pointer to storage space */ + UWORD8 *pu1_buf = ps_mem_rec->pv_base; + + /* total size of the mem record */ + WORD32 total_size = 0; + + /* size in bytes to mb core coding status of an entire frame */ + total_size = max_mb_cnt; + + /* add an additional 1 row of bytes to evade the special case of row 0 */ + total_size += max_mb_cols; + + for (i = 0; i < MAX_PROCESS_CTXT; i++) + { + if (i < MAX_PROCESS_CTXT / 2) + { + ps_codec->as_process[i].pu1_proc_map = pu1_buf + max_mb_cols; + } + else + { + ps_codec->as_process[i].pu1_proc_map = pu1_buf + total_size + + max_mb_cols; + } + } + } + + ps_mem_rec = &ps_mem_rec_base[MEM_REC_DBLK_MAP]; + { + /* pointer to storage space */ + UWORD8 *pu1_buf = ps_mem_rec->pv_base; + + /* total size of the mem record */ + WORD32 total_size = 0; + + /* size in bytes to mb core coding status of an entire frame */ + total_size = max_mb_cnt; + + /* add an additional 1 row of bytes to evade the special case of row 0 */ + total_size += max_mb_cols; + + /*Align the memory offsets*/ + total_size = ALIGN64(total_size); + + for (i = 0; i < MAX_PROCESS_CTXT; i++) + { + if (i < MAX_PROCESS_CTXT / 2) + { + ps_codec->as_process[i].pu1_deblk_map = pu1_buf + max_mb_cols; + + } + else + { + ps_codec->as_process[i].pu1_deblk_map = pu1_buf + total_size + + max_mb_cols; + + } + } + } + + ps_mem_rec = &ps_mem_rec_base[MEM_REC_ME_MAP]; + { + /* pointer to storage space */ + UWORD8 *pu1_buf = (UWORD8 *) ps_mem_rec->pv_base; + + /* total size of the mem record */ + WORD32 total_size = 0; + + /* size in bytes to mb core coding status of an entire frame */ + total_size = max_mb_cnt; + + /* add an additional 1 row of bytes to evade the special case of row 0 */ + total_size += max_mb_cols; + + for (i = 0; i < MAX_PROCESS_CTXT; i++) + { + if (i < MAX_PROCESS_CTXT / 2) + { + ps_codec->as_process[i].pu1_me_map = pu1_buf + max_mb_cols; + } + else + { + ps_codec->as_process[i].pu1_me_map = pu1_buf + total_size + + max_mb_cols; + } + } + } + + ps_mem_rec = &ps_mem_rec_base[MEM_REC_DPB_MGR]; + { + ps_codec->pv_dpb_mgr = ps_mem_rec->pv_base; + } + + ps_mem_rec = &ps_mem_rec_base[MEM_REC_PROC_SCRATCH]; + { + /* pointer to storage space */ + UWORD8 *pu1_buf = (UWORD8 *) ps_mem_rec->pv_base; + + /* size of pred buffer, fwd transform output, temp buffer for inv tra */ + WORD32 size_pred_luma, size_pred_chroma, size_fwd, size_inv, size_hp; + + /* temp var */ + WORD32 size = 0; + + /* size to hold intra/inter prediction buffer */ + size_pred_luma = sizeof(UWORD8) * 16 * 16; + size_pred_chroma = sizeof(UWORD8) * 8 * 16; + + /* size to hold fwd transform output */ + size_fwd = sizeof(WORD16) * SIZE_TRANS_BUFF; + + /* size to hold temporary data during inverse transform */ + size_inv = sizeof(WORD32) * SIZE_TMP_BUFF_ITRANS; + + /* size to hold half pel plane buffers */ + size_hp = sizeof(UWORD8) * (HP_BUFF_WD * HP_BUFF_HT); + + for (i = 0; i < MAX_PROCESS_CTXT; i++) + { + /* prediction buffer */ + ps_codec->as_process[i].pu1_pred_mb = (void *) (pu1_buf + size); + ps_codec->as_process[i].i4_pred_strd = 16; + size += size_pred_luma; + size = ALIGN64(size); + + /* prediction buffer */ + ps_codec->as_process[i].pu1_ref_mb_intra_4x4 = (void *) (pu1_buf + + size); + size += size_pred_luma; + size = ALIGN64(size); + + /* prediction buffer intra 16x16 */ + ps_codec->as_process[i].pu1_pred_mb_intra_16x16 = (void *) (pu1_buf + + size); + size += size_pred_luma; + size = ALIGN64(size); + + /* prediction buffer intra 16x16 plane*/ + ps_codec->as_process[i].pu1_pred_mb_intra_16x16_plane = + (void *) (pu1_buf + size); + size += size_pred_luma; + size = ALIGN64(size); + + /* prediction buffer intra chroma*/ + ps_codec->as_process[i].pu1_pred_mb_intra_chroma = (void *) (pu1_buf + + size); + size += size_pred_chroma; + size = ALIGN64(size); + + /* prediction buffer intra chroma plane*/ + ps_codec->as_process[i].pu1_pred_mb_intra_chroma_plane = + (void *) (pu1_buf + size); + size += size_pred_chroma; + size = ALIGN64(size); + + /* Fwd transform output */ + ps_codec->as_process[i].pi2_res_buf = (void *) (pu1_buf + size); + ps_codec->as_process[i].i4_res_strd = 16; + size += size_fwd; + size = ALIGN64(size); + + /* Fwd transform output */ + ps_codec->as_process[i].pi2_res_buf_intra_4x4 = (void *) (pu1_buf + + size); + size += size_fwd; + size = ALIGN64(size); + + /* scratch buffer used during inverse transform */ + ps_codec->as_process[i].pv_scratch_buff = (void *) (pu1_buf + size); + size += size_inv; + size = ALIGN64(size); + + /* Buffers for holding half_x , half_y and half_xy values */ + ps_codec->as_process[i].pu1_half_x = (void *) (pu1_buf + size); + size += size_hp; + size = ALIGN64(size); + + ps_codec->as_process[i].pu1_half_y = (void *) (pu1_buf + size); + size += size_hp; + size = ALIGN64(size); + + ps_codec->as_process[i].pu1_half_xy = (void *) (pu1_buf + size); + size += size_hp; + size = ALIGN64(size); + } + } + + ps_mem_rec = &ps_mem_rec_base[MEM_REC_QUANT_PARAM]; + { + /* pointer to storage space */ + UWORD8 *pu1_buf = (UWORD8 *) ps_mem_rec->pv_base; + + /* size of qp, threshold matrix, fwd scaling list for one plane */ + WORD32 size_quant_param, size_thres_mat, size_fwd_weight_mat, + size_satqd_weight_mat; + + /* temp var */ + WORD32 total_size = 0; + + /* size of quantization parameter list of 1 plane */ + size_quant_param = ALIGN64(sizeof(quant_params_t)); + + /* size of threshold matrix for quantization + * (assuming the transform_8x8_flag is disabled). + * for 1 plane */ + size_thres_mat = ALIGN64(sizeof(WORD16) * 4 * 4); + + /* size of forward weight matrix for quantization + * (assuming the transform_8x8_flag is disabled). + * for 1 plane */ + size_fwd_weight_mat = ALIGN64(sizeof(WORD16) * 4 * 4); + + /* size of SATQD matrix*/ + size_satqd_weight_mat = ALIGN64(sizeof(UWORD16) * 9); + + for (i = 0; i < MAX_PROCESS_CTXT; i++) + { + quant_params_t **ps_qp_params = ps_codec->as_process[i].ps_qp_params; + + /* quantization param structure */ + ps_qp_params[0] = (quant_params_t *) (pu1_buf + total_size); + total_size = total_size + size_quant_param; + ps_qp_params[1] = (quant_params_t *) (pu1_buf + total_size); + total_size = total_size + size_quant_param; + ps_qp_params[2] = (quant_params_t *) (pu1_buf + total_size); + total_size = total_size + size_quant_param; + + /* threshold matrix for quantization */ + ps_qp_params[0]->pu2_thres_mat = (void *) (pu1_buf + total_size); + total_size = total_size + size_thres_mat; + ps_qp_params[1]->pu2_thres_mat = (void *) (pu1_buf + total_size); + total_size = total_size + size_thres_mat; + ps_qp_params[2]->pu2_thres_mat = (void *) (pu1_buf + total_size); + total_size = total_size + size_thres_mat; + + /* fwd weight matrix */ + ps_qp_params[0]->pu2_weigh_mat = (void *) (pu1_buf + total_size); + total_size = total_size + size_fwd_weight_mat; + ps_qp_params[1]->pu2_weigh_mat = (void *) (pu1_buf + total_size); + total_size = total_size + size_fwd_weight_mat; + ps_qp_params[2]->pu2_weigh_mat = (void *) (pu1_buf + total_size); + total_size = total_size + size_fwd_weight_mat; + + /* threshold matrix for SATQD */ + ps_qp_params[0]->pu2_sad_thrsh = (void *) (pu1_buf + total_size); + total_size = total_size + size_satqd_weight_mat; + ps_qp_params[1]->pu2_sad_thrsh = (void *) (pu1_buf + total_size); + total_size = total_size + size_satqd_weight_mat; + ps_qp_params[2]->pu2_sad_thrsh = (void *) (pu1_buf + total_size); + total_size = total_size + size_satqd_weight_mat; + + total_size = ALIGN128(total_size); + } + } + + ps_mem_rec = &ps_mem_rec_base[MEM_REC_TOP_ROW_SYN_INFO]; + { + /* total size of the mem record */ + WORD32 total_size = 0, size_csbp, size_intra_modes, size_mv; + + /* pointer to buffer */ + UWORD8 *pu1_buf = ps_mem_rec->pv_base; + + /* size in bytes to store 1 row of mb_info_t */ + /* one additional mb, to avoid checking end of row condition */ + size_csbp = (max_mb_cols + 1) * sizeof(mb_info_t); + + /* size in bytes to store 1 row of intra macroblock sub modes */ + size_intra_modes = max_mb_cols * sizeof(UWORD8) * 16; + + /* size in bytes to store 1 row + 1 of enc_pu_t */ + /* one additional mb, to avoid checking end of row condition */ + size_mv = (max_mb_cols + 1) * sizeof(enc_pu_t); + + /* total size per proc ctxt */ + total_size = size_csbp + size_intra_modes + size_mv; + + for (i = 0; i < MAX_PROCESS_CTXT; i++) + { + if (i < MAX_PROCESS_CTXT / 2) + { + ps_codec->as_process[i].ps_top_row_mb_syntax_ele_base = + (mb_info_t *) pu1_buf; + ps_codec->as_process[i].pu1_top_mb_intra_modes_base = pu1_buf + + size_csbp; + ps_codec->as_process[i].ps_top_row_pu_base = + (enc_pu_t *) (pu1_buf + size_csbp + + size_intra_modes); + } + else + { + ps_codec->as_process[i].ps_top_row_mb_syntax_ele_base = + (mb_info_t *) (pu1_buf + total_size); + ps_codec->as_process[i].pu1_top_mb_intra_modes_base = pu1_buf + + total_size + size_csbp; + ps_codec->as_process[i].ps_top_row_pu_base = + (enc_pu_t *) (pu1_buf + total_size + size_csbp + + size_intra_modes); + } + } + } + + ps_mem_rec = &ps_mem_rec_base[MEM_REC_BS_QP]; + { + UWORD8 *pu1_buf_ping, *pu1_buf_pong; + + /* total size of the mem record */ + WORD32 total_size = 0; + + /* size in bytes to store vertical edge bs, horizontal edge bs and qp of every mb*/ + WORD32 vert_bs_size, horz_bs_size, qp_size; + + /* vertical edge bs = total number of vertical edges * number of bytes per each edge */ + /* total num of v edges = total mb * 4 (assuming transform_8x8_flag = 0), + * each edge is formed by 4 pairs of subblks, requiring 4 bytes to storing bs */ + vert_bs_size = ALIGN64(max_mb_cnt * 4 * 4); + + /* horizontal edge bs = total number of horizontal edges * number of bytes per each edge */ + /* total num of h edges = total mb * 4 (assuming transform_8x8_flag = 0), + * each edge is formed by 4 pairs of subblks, requiring 4 bytes to storing bs */ + horz_bs_size = ALIGN64(max_mb_cnt * 4 * 4); + + /* qp of each mb requires 1 byte */ + qp_size = ALIGN64(max_mb_cnt); + + /* total size */ + total_size = vert_bs_size + horz_bs_size + qp_size; + + for (i = 0; i < MAX_PROCESS_CTXT; i++) + { + if (i < MAX_PROCESS_CTXT / 2) + { + pu1_buf_ping = (UWORD8 *) ps_mem_rec->pv_base; + + /* vertical edge bs storage space */ + ps_codec->as_process[i].s_deblk_ctxt.s_bs_ctxt.pu4_pic_vert_bs = + (UWORD32 *) pu1_buf_ping; + pu1_buf_ping += vert_bs_size; + + /* horizontal edge bs storage space */ + ps_codec->as_process[i].s_deblk_ctxt.s_bs_ctxt.pu4_pic_horz_bs = + (UWORD32 *) pu1_buf_ping; + pu1_buf_ping += horz_bs_size; + + /* qp */ + ps_codec->as_process[i].s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp = + (UWORD8 *) pu1_buf_ping; + pu1_buf_ping += qp_size; + } + else + { + pu1_buf_pong = (UWORD8 *) ps_mem_rec->pv_base; + pu1_buf_pong += total_size; + + /* vertical edge bs storage space */ + ps_codec->as_process[i].s_deblk_ctxt.s_bs_ctxt.pu4_pic_vert_bs = + (UWORD32 *) pu1_buf_pong; + pu1_buf_pong += vert_bs_size; + + /* horizontal edge bs storage space */ + ps_codec->as_process[i].s_deblk_ctxt.s_bs_ctxt.pu4_pic_horz_bs = + (UWORD32 *) pu1_buf_pong; + pu1_buf_pong += horz_bs_size; + + /* qp */ + ps_codec->as_process[i].s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp = + (UWORD8 *) pu1_buf_pong; + pu1_buf_pong += qp_size; + } + } + } + + ps_mem_rec = &ps_mem_rec_base[MEM_REC_INP_PIC]; + { + ps_codec->pv_inp_buf_mgr_base = ps_mem_rec->pv_base; + } + + ps_mem_rec = &ps_mem_rec_base[MEM_REC_OUT]; + { + ps_codec->pv_out_buf_mgr_base = ps_mem_rec->pv_base; + } + + ps_mem_rec = &ps_mem_rec_base[MEM_REC_CSC]; + { + ps_codec->pu1_y_csc_buf_base = ps_mem_rec->pv_base; + ps_codec->pu1_uv_csc_buf_base = (UWORD8 *) ps_mem_rec->pv_base + + (max_ht_luma * max_wd_luma); + } + + ps_mem_rec = &ps_mem_rec_base[MEM_REC_REF_PIC]; + { + /* size of buf mgr struct */ + WORD32 size = ih264_buf_mgr_size(); + + /* temp var */ + UWORD8 *pu1_buf = ps_mem_rec->pv_base; + + /* pic buffer mgr */ + ps_codec->pv_ref_buf_mgr_base = pu1_buf; + + /* picture bank */ + ps_codec->pv_pic_buf_base = pu1_buf + size; + ps_codec->i4_total_pic_buf_size = ps_mem_rec->u4_mem_size - size; + } + + ps_mem_rec = &ps_mem_rec_base[MEM_REC_MB_INFO_NMB]; + { + /* temp var */ + UWORD8 *pu1_buf = ps_mem_rec->pv_base; + + /* size of nmb ctxt */ + WORD32 size = MAX_NMB * sizeof(mb_info_nmb_t); + + UWORD32 nmb_cntr, subpel_buf_size; + + /* init nmb info structure pointer in all proc ctxts */ + for (i = 0; i < MAX_PROCESS_CTXT; i++) + { + ps_codec->as_process[i].ps_nmb_info = (mb_info_nmb_t *) (pu1_buf); + + pu1_buf += size; + } + + subpel_buf_size = MB_SIZE * MB_SIZE * sizeof(UWORD8); + + /* adjusting pointers for nmb halfpel buffer */ + for (i = 0; i < MAX_PROCESS_CTXT; i++) + { + mb_info_nmb_t* ps_mb_info_nmb = + &ps_codec->as_process[i].ps_nmb_info[0]; + + for (nmb_cntr = 0; nmb_cntr < MAX_NMB; nmb_cntr++) + { + ps_mb_info_nmb[nmb_cntr].pu1_best_sub_pel_buf = pu1_buf; + + pu1_buf = pu1_buf + subpel_buf_size; + + ps_mb_info_nmb[nmb_cntr].u4_bst_spel_buf_strd = MB_SIZE; + } + } + } + + ps_mem_rec = &ps_mem_rec_base[MEM_REC_RC]; + { + ih264e_get_rate_control_mem_tab(&ps_codec->s_rate_control, ps_mem_rec, + USE_BASE); + } + + /* init codec ctxt */ + status = ih264e_init(ps_codec); + + return status; +} + +/** +******************************************************************************* +* +* @brief +* Retrieves mem records passed to the codec +* +* @par Description: +* Retrieves mem recs passed during init +* +* @param[in] ps_codec_obj +* Pointer to codec object at API level +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static WORD32 ih264e_retrieve_memrec(iv_obj_t *ps_codec_obj, + void *pv_api_ip, + void *pv_api_op) +{ + /* codec ctxt */ + codec_t *ps_codec = (codec_t *) ps_codec_obj->pv_codec_handle; + + /* ctrl call I/O structures */ + ih264e_retrieve_mem_rec_ip_t *ps_ip = pv_api_ip; + ih264e_retrieve_mem_rec_op_t *ps_op = pv_api_op; + + if (ps_codec->i4_init_done != 1) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_FATALERROR; + ps_op->s_ive_op.u4_error_code |= IH264E_INIT_NOT_DONE; + return IV_FAIL; + } + + /* join threads upon at end of sequence */ + ih264e_join_threads(ps_codec); + + /* collect list of memory records used by the encoder library */ + memcpy(ps_ip->s_ive_ip.ps_mem_rec, ps_codec->ps_mem_rec_backup, + MEM_REC_CNT * (sizeof(iv_mem_rec_t))); + ps_op->s_ive_op.u4_num_mem_rec_filled = MEM_REC_CNT; + + /* clean up mutex memory */ + ih264_list_free(ps_codec->pv_entropy_jobq); + ih264_list_free(ps_codec->pv_proc_jobq); + ithread_mutex_destroy(ps_codec->pv_ctl_mutex); + ithread_mutex_destroy(ps_codec->pv_entropy_mutex); + + + ih264_buf_mgr_free((buf_mgr_t *)ps_codec->pv_mv_buf_mgr); + ih264_buf_mgr_free((buf_mgr_t *)ps_codec->pv_ref_buf_mgr); + ih264_buf_mgr_free((buf_mgr_t *)ps_codec->pv_inp_buf_mgr); + ih264_buf_mgr_free((buf_mgr_t *)ps_codec->pv_out_buf_mgr); + + return IV_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Sets the encoder in flush mode. +* +* @par Description: +* Sets the encoder in flush mode +* +* @param[in] ps_codec_obj +* Pointer to codec object at API level +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @returns error status +* +* @remarks This call has no real effect on encoder +* +******************************************************************************* +*/ +static WORD32 ih264e_set_flush_mode(iv_obj_t *ps_codec_obj, + void *pv_api_ip, + void *pv_api_op) +{ + UNUSED(pv_api_ip); + /* codec ctxt */ + codec_t *ps_codec = (codec_t *) ps_codec_obj->pv_codec_handle; + + /* ctrl call I/O structures */ + ih264e_ctl_flush_op_t *ps_ctl_op = pv_api_op; + + ps_ctl_op->s_ive_op.u4_error_code = 0; + + /* signal flush frame control call */ + ps_codec->i4_flush_mode = 1; + + return IV_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Gets encoder buffer requirements +* +* @par Description: +* Gets the encoder buffer requirements. Basing on max width and max height +* configuration settings, this routine, computes the sizes of necessary input, +* output buffers returns this info to callee. +* +* @param[in] ps_codec_obj +* Pointer to codec object at API level +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static WORD32 ih264e_get_buf_info(iv_obj_t *ps_codec_obj, + void *pv_api_ip, + void *pv_api_op) +{ + UNUSED(ps_codec_obj); + /* ctrl call I/O structures */ + ih264e_ctl_getbufinfo_ip_t *ps_ip = pv_api_ip; + ih264e_ctl_getbufinfo_op_t *ps_op = pv_api_op; + + /* temp var */ + WORD32 wd = ALIGN16(ps_ip->s_ive_ip.u4_max_wd); + WORD32 ht = ALIGN16(ps_ip->s_ive_ip.u4_max_ht); + WORD32 i; + + ps_op->s_ive_op.u4_error_code = 0; + + /* Number of components in input buffers required for codec & + * Minimum sizes of each component in input buffer required */ + if (ps_ip->s_ive_ip.e_inp_color_fmt == IV_YUV_420P) + { + ps_op->s_ive_op.u4_inp_comp_cnt = MIN_RAW_BUFS_420_COMP; + + ps_op->s_ive_op.au4_min_in_buf_size[0] = wd * ht; + ps_op->s_ive_op.au4_min_in_buf_size[1] = (wd >> 1) * (ht >> 1); + ps_op->s_ive_op.au4_min_in_buf_size[2] = (wd >> 1) * (ht >> 1); + } + else if (ps_ip->s_ive_ip.e_inp_color_fmt == IV_YUV_422ILE) + { + ps_op->s_ive_op.u4_inp_comp_cnt = MIN_RAW_BUFS_422ILE_COMP; + + ps_op->s_ive_op.au4_min_in_buf_size[0] = wd * ht * 2; + ps_op->s_ive_op.au4_min_in_buf_size[1] = + ps_op->s_ive_op.au4_min_in_buf_size[2] = 0; + } + else if (ps_ip->s_ive_ip.e_inp_color_fmt == IV_RGB_565) + { + ps_op->s_ive_op.u4_inp_comp_cnt = MIN_RAW_BUFS_RGB565_COMP; + + ps_op->s_ive_op.au4_min_in_buf_size[0] = wd * ht * 2; + ps_op->s_ive_op.au4_min_in_buf_size[1] = + ps_op->s_ive_op.au4_min_in_buf_size[2] = 0; + } + else if (ps_ip->s_ive_ip.e_inp_color_fmt == IV_RGBA_8888) + { + ps_op->s_ive_op.u4_inp_comp_cnt = MIN_RAW_BUFS_RGBA8888_COMP; + + ps_op->s_ive_op.au4_min_in_buf_size[0] = wd * ht * 4; + ps_op->s_ive_op.au4_min_in_buf_size[1] = + ps_op->s_ive_op.au4_min_in_buf_size[2] = 0; + } + else if ((ps_ip->s_ive_ip.e_inp_color_fmt == IV_YUV_420SP_UV) + || (ps_ip->s_ive_ip.e_inp_color_fmt == IV_YUV_420SP_VU)) + { + ps_op->s_ive_op.u4_inp_comp_cnt = MIN_RAW_BUFS_420SP_COMP; + + ps_op->s_ive_op.au4_min_in_buf_size[0] = wd * ht; + ps_op->s_ive_op.au4_min_in_buf_size[1] = wd * (ht >> 1); + ps_op->s_ive_op.au4_min_in_buf_size[2] = 0; + } + + /* Number of components in output buffers required for codec & + * Minimum sizes of each component in output buffer required */ + ps_op->s_ive_op.u4_out_comp_cnt = MIN_BITS_BUFS_COMP; + + for (i = 0; i < (WORD32) ps_op->s_ive_op.u4_out_comp_cnt; i++) + { + ps_op->s_ive_op.au4_min_out_buf_size[i] = (wd * ht * 3) >> 1; + } + + ps_op->s_ive_op.u4_min_inp_bufs = MIN_INP_BUFS; + ps_op->s_ive_op.u4_min_out_bufs = MIN_OUT_BUFS; + + return IV_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Sets the picture dimensions +* +* @par Description: +* Sets width, height, display width, display height and strides +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @param[out] ps_cfg +* Pointer to config structure to be updated +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static IV_STATUS_T ih264e_set_dimensions(void *pv_api_ip, + void *pv_api_op, + cfg_params_t *ps_cfg) +{ + /* ctrl call I/O structures */ + ih264e_ctl_set_dimensions_ip_t *ps_ip = pv_api_ip; + ih264e_ctl_set_dimensions_op_t *ps_op = pv_api_op; + + ps_op->s_ive_op.u4_error_code = 0; + + ps_cfg->u4_wd = ALIGN16(ps_ip->s_ive_ip.u4_wd); + ps_cfg->u4_ht = ALIGN16(ps_ip->s_ive_ip.u4_ht); + ps_cfg->u4_strd = ps_ip->s_ive_ip.u4_strd; + ps_cfg->i4_wd_mbs = ps_cfg->u4_wd >> 4; + ps_cfg->i4_ht_mbs = ps_cfg->u4_ht >> 4; + ps_cfg->u4_disp_wd = ps_ip->s_ive_ip.u4_wd; + ps_cfg->u4_disp_ht = ps_ip->s_ive_ip.u4_ht; + + ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high; + ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low; + + return IV_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Sets source and target frame rates +* +* @par Description: +* Sets source and target frame rates +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @param[out] ps_cfg +* Pointer to config structure to be updated +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static IV_STATUS_T ih264e_set_frame_rate(void *pv_api_ip, + void *pv_api_op, + cfg_params_t *ps_cfg) +{ + /* ctrl call I/O structures */ + ih264e_ctl_set_frame_rate_ip_t *ps_ip = pv_api_ip; + ih264e_ctl_set_frame_rate_op_t *ps_op = pv_api_op; + + ps_op->s_ive_op.u4_error_code = 0; + + ps_cfg->u4_src_frame_rate = ps_ip->s_ive_ip.u4_src_frame_rate; + ps_cfg->u4_tgt_frame_rate = ps_ip->s_ive_ip.u4_tgt_frame_rate; + + ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high; + ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low; + + return IV_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Sets target bit rate +* +* @par Description: +* Sets target bit rate +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @param[out] ps_cfg +* Pointer to config structure to be updated +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static IV_STATUS_T ih264e_set_bit_rate(void *pv_api_ip, + void *pv_api_op, + cfg_params_t *ps_cfg) +{ + /* ctrl call I/O structures */ + ih264e_ctl_set_bitrate_ip_t *ps_ip = pv_api_ip; + ih264e_ctl_set_bitrate_op_t *ps_op = pv_api_op; + + ps_op->s_ive_op.u4_error_code = 0; + + ps_cfg->u4_target_bitrate = ps_ip->s_ive_ip.u4_target_bitrate; + + ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high; + ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low; + + return IV_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Sets frame type +* +* @par Description: +* Sets frame type +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @param[out] ps_cfg +* Pointer to config structure to be updated +* +* @returns error status +* +* @remarks not a sticky tag +* +******************************************************************************* +*/ +static IV_STATUS_T ih264e_set_frame_type(void *pv_api_ip, + void *pv_api_op, + cfg_params_t *ps_cfg) +{ + /* ctrl call I/O structures */ + ih264e_ctl_set_frame_type_ip_t *ps_ip = pv_api_ip; + ih264e_ctl_set_frame_type_op_t *ps_op = pv_api_op; + + ps_op->s_ive_op.u4_error_code = 0; + + ps_cfg->e_frame_type = ps_ip->s_ive_ip.e_frame_type; + + ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high; + ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low; + + return IV_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Sets quantization params +* +* @par Description: +* Sets the max, min and default qp for I frame, P frame and B frame +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @param[out] ps_cfg +* Pointer to config structure to be updated +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static IV_STATUS_T ih264e_set_qp(void *pv_api_ip, + void *pv_api_op, + cfg_params_t *ps_cfg) +{ + /* ctrl call I/O structures */ + ih264e_ctl_set_qp_ip_t *ps_set_qp_ip = pv_api_ip; + ih264e_ctl_set_qp_op_t *ps_set_qp_op = pv_api_op; + + ps_set_qp_op->s_ive_op.u4_error_code = 0; + + ps_cfg->u4_i_qp_max = ps_set_qp_ip->s_ive_ip.u4_i_qp_max; + ps_cfg->u4_i_qp_min = ps_set_qp_ip->s_ive_ip.u4_i_qp_min; + ps_cfg->u4_i_qp = ps_set_qp_ip->s_ive_ip.u4_i_qp; + ps_cfg->u4_p_qp_max = ps_set_qp_ip->s_ive_ip.u4_p_qp_max; + ps_cfg->u4_p_qp_min = ps_set_qp_ip->s_ive_ip.u4_p_qp_min; + ps_cfg->u4_p_qp = ps_set_qp_ip->s_ive_ip.u4_p_qp; + ps_cfg->u4_b_qp_max = ps_set_qp_ip->s_ive_ip.u4_b_qp_max; + ps_cfg->u4_b_qp_min = ps_set_qp_ip->s_ive_ip.u4_b_qp_min; + ps_cfg->u4_b_qp = ps_set_qp_ip->s_ive_ip.u4_b_qp; + + ps_cfg->u4_timestamp_high = ps_set_qp_ip->s_ive_ip.u4_timestamp_high; + ps_cfg->u4_timestamp_low = ps_set_qp_ip->s_ive_ip.u4_timestamp_low; + + return IV_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Sets encoding mode +* +* @par Description: +* Sets encoding mode +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @param[out] ps_cfg +* Pointer to config structure to be updated +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static IV_STATUS_T ih264e_set_enc_mode(void *pv_api_ip, + void *pv_api_op, + cfg_params_t *ps_cfg) +{ + /* ctrl call I/O structures */ + ih264e_ctl_set_enc_mode_ip_t *ps_ip = pv_api_ip; + ih264e_ctl_set_enc_mode_op_t *ps_op = pv_api_op; + + ps_op->s_ive_op.u4_error_code = 0; + + ps_cfg->e_enc_mode = ps_ip->s_ive_ip.e_enc_mode; + + ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high; + ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low; + + return IV_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Sets vbv parameters +* +* @par Description: +* Sets vbv parameters +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @param[out] ps_cfg +* Pointer to config structure to be updated +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static IV_STATUS_T ih264e_set_vbv_params(void *pv_api_ip, + void *pv_api_op, + cfg_params_t *ps_cfg) +{ + /* ctrl call I/O structures */ + ih264e_ctl_set_vbv_params_ip_t *ps_ip = pv_api_ip; + ih264e_ctl_set_vbv_params_op_t *ps_op = pv_api_op; + + ps_op->s_ive_op.u4_error_code = 0; + + ps_cfg->u4_vbv_buf_size = ps_ip->s_ive_ip.u4_vbv_buf_size; + ps_cfg->u4_vbv_buffer_delay = ps_ip->s_ive_ip.u4_vbv_buffer_delay; + + ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high; + ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low; + + return IV_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Sets AIR parameters +* +* @par Description: +* Sets AIR parameters +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @param[out] ps_cfg +* Pointer to config structure to be updated +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static IV_STATUS_T ih264_set_air_params(void *pv_api_ip, + void *pv_api_op, + cfg_params_t *ps_cfg) +{ + /* ctrl call I/O structures */ + ih264e_ctl_set_air_params_ip_t *ps_ip = pv_api_ip; + ih264e_ctl_set_air_params_op_t *ps_op = pv_api_op; + + ps_op->s_ive_op.u4_error_code = 0; + + ps_cfg->e_air_mode = ps_ip->s_ive_ip.e_air_mode; + ps_cfg->u4_air_refresh_period = ps_ip->s_ive_ip.u4_air_refresh_period; + + ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high; + ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low; + + return IV_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Sets motion estimation parameters +* +* @par Description: +* Sets motion estimation parameters +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @param[out] ps_cfg +* Pointer to config structure to be updated +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static IV_STATUS_T ih264_set_me_params(void *pv_api_ip, + void *pv_api_op, + cfg_params_t *ps_cfg) +{ + /* ctrl call I/O structures */ + ih264e_ctl_set_me_params_ip_t *ps_ip = pv_api_ip; + ih264e_ctl_set_me_params_op_t *ps_op = pv_api_op; + + ps_op->s_ive_op.u4_error_code = 0; + + ps_cfg->u4_enable_hpel = ps_ip->s_ive_ip.u4_enable_hpel; + ps_cfg->u4_enable_qpel = ps_ip->s_ive_ip.u4_enable_qpel; + ps_cfg->u4_enable_fast_sad = ps_ip->s_ive_ip.u4_enable_fast_sad; + ps_cfg->u4_enable_alt_ref = ps_ip->s_ive_ip.u4_enable_alt_ref; + ps_cfg->u4_srch_rng_x = ps_ip->s_ive_ip.u4_srch_rng_x; + ps_cfg->u4_srch_rng_y = ps_ip->s_ive_ip.u4_srch_rng_y; + ps_cfg->u4_me_speed_preset = ps_ip->s_ive_ip.u4_me_speed_preset; + + ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high; + ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low; + + return IV_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Sets Intra/Inter Prediction estimation parameters +* +* @par Description: +* Sets Intra/Inter Prediction estimation parameters +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @param[out] ps_cfg +* Pointer to config structure to be updated +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static IV_STATUS_T ih264_set_ipe_params(void *pv_api_ip, + void *pv_api_op, + cfg_params_t *ps_cfg) +{ + /* ctrl call I/O structures */ + ih264e_ctl_set_ipe_params_ip_t *ps_ip = pv_api_ip; + ih264e_ctl_set_ipe_params_op_t *ps_op = pv_api_op; + + ps_op->s_ive_op.u4_error_code = 0; + + ps_cfg->u4_enable_intra_4x4 = ps_ip->s_ive_ip.u4_enable_intra_4x4; + ps_cfg->u4_enc_speed_preset = ps_ip->s_ive_ip.u4_enc_speed_preset; + + ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high; + ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low; + + return IV_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Sets GOP parameters +* +* @par Description: +* Sets GOP parameters +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @param[out] ps_cfg +* Pointer to config structure to be updated +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static IV_STATUS_T ih264_set_gop_params(void *pv_api_ip, + void *pv_api_op, + cfg_params_t *ps_cfg) +{ + /* ctrl call I/O structures */ + ih264e_ctl_set_gop_params_ip_t *ps_ip = pv_api_ip; + ih264e_ctl_set_gop_params_op_t *ps_op = pv_api_op; + + ps_op->s_ive_op.u4_error_code = 0; + + ps_cfg->u4_i_frm_interval = ps_ip->s_ive_ip.u4_i_frm_interval; + ps_cfg->u4_idr_frm_interval = ps_ip->s_ive_ip.u4_idr_frm_interval; + ps_cfg->u4_num_b_frames = ps_ip->s_ive_ip.u4_num_b_frames; + + ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high; + ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low; + + return IV_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Sets profile parameters +* +* @par Description: +* Sets profile parameters +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @param[out] ps_cfg +* Pointer to config structure to be updated +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static IV_STATUS_T ih264_set_profile_params(void *pv_api_ip, + void *pv_api_op, + cfg_params_t *ps_cfg) +{ + /* ctrl call I/O structures */ + ih264e_ctl_set_profile_params_ip_t *ps_ip = pv_api_ip; + ih264e_ctl_set_profile_params_op_t *ps_op = pv_api_op; + + ps_op->s_ive_op.u4_error_code = 0; + + ps_cfg->e_profile = ps_ip->s_ive_ip.e_profile; + + ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high; + ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low; + + return IV_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Sets disable deblock level +* +* @par Description: +* Sets disable deblock level. Level 0 means no disabling and level 4 means +* disable completely. 1, 2, 3 are intermediate levels that control amount +* of deblocking done. +* +* @param[in] ps_codec_obj +* Pointer to codec object at API level +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static WORD32 ih264_set_deblock_params(void *pv_api_ip, + void *pv_api_op, + cfg_params_t *ps_cfg) +{ + /* ctrl call I/O structures */ + ih264e_ctl_set_deblock_params_ip_t *ps_ip = pv_api_ip; + ih264e_ctl_set_deblock_params_op_t *ps_op = pv_api_op; + + ps_op->s_ive_op.u4_error_code = 0; + + ps_cfg->u4_disable_deblock_level = ps_ip->s_ive_ip.u4_disable_deblock_level; + + ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high; + ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low; + + return IV_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Sets number of cores +* +* @par Description: +* Sets number of cores +* +* @param[in] ps_codec_obj +* Pointer to codec object at API level +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @returns error status +* +* @remarks The number of encoder threads is limited to MAX_PROCESS_THREADS +* +******************************************************************************* +*/ +static WORD32 ih264e_set_num_cores(void *pv_api_ip, + void *pv_api_op, + cfg_params_t *ps_cfg) +{ + /* ctrl call I/O structures */ + ih264e_ctl_set_num_cores_ip_t *ps_ip = pv_api_ip; + ih264e_ctl_set_num_cores_op_t *ps_op = pv_api_op; + + ps_op->s_ive_op.u4_error_code = 0; + + ps_cfg->u4_num_cores = MIN(ps_ip->s_ive_ip.u4_num_cores, MAX_PROCESS_THREADS); + + ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high; + ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low; + + return IV_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Resets encoder state +* +* @par Description: +* Resets encoder state by calling ih264e_init() +* +* @param[in] ps_codec_obj +* Pointer to codec object at API level +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static WORD32 ih264e_reset(iv_obj_t *ps_codec_obj, + void *pv_api_ip, + void *pv_api_op) +{ + UNUSED(pv_api_ip); + /* codec ctxt */ + codec_t * ps_codec = (codec_t *) (ps_codec_obj->pv_codec_handle); + + /* ctrl call I/O structures */ + ih264e_ctl_reset_op_t *ps_op = pv_api_op; + + ps_op->s_ive_op.u4_error_code = 0; + + if (ps_codec != NULL) + { + ih264e_init(ps_codec); + } + else + { + ps_op->s_ive_op.u4_error_code = IH264E_INIT_NOT_DONE; + } + + return IV_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Codec control call +* +* @par Description: +* Codec control call which in turn calls appropriate calls based on sub-command +* +* @param[in] ps_codec_obj +* Pointer to codec object at API level +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static WORD32 ih264e_ctl(iv_obj_t *ps_codec_obj, + void *pv_api_ip, + void *pv_api_op) +{ + /* codec ctxt */ + codec_t *ps_codec = (codec_t *) ps_codec_obj->pv_codec_handle; + + /* ctrl call I/O structures */ + ih264e_ctl_setdefault_ip_t *ps_ctl_ip = pv_api_ip; + ih264e_ctl_setdefault_op_t *ps_ctl_op = pv_api_op; + + /* ctrl call sub cmd */ + IVE_CONTROL_API_COMMAND_TYPE_T sub_cmd = ps_ctl_ip->s_ive_ip.e_sub_cmd; + + /* error status */ + IV_STATUS_T ret = 0; + + /* temp var */ + WORD32 i; + cfg_params_t *ps_cfg = NULL; + + /* control call is for configuring encoding params, this is not to be called + * before a successful init call */ + if (ps_codec->i4_init_done != 1) + { + ps_ctl_op->s_ive_op.u4_error_code |= 1 << IVE_FATALERROR; + ps_ctl_op->s_ive_op.u4_error_code |= IH264E_INIT_NOT_DONE; + return IV_FAIL; + } + + /* make it thread safe */ + ithread_mutex_lock(ps_codec->pv_ctl_mutex); + + /* find a free config param set to hold current parameters */ + for (i = 0; i < MAX_ACTIVE_CONFIG_PARAMS; i++) + { + if (0 == ps_codec->as_cfg[i].u4_is_valid) + { + ps_cfg = &ps_codec->as_cfg[i]; + break; + } + } + + /* If all are invalid, then start overwriting from the head config params */ + if (NULL == ps_cfg) + { + ps_cfg = &ps_codec->as_cfg[0]; + } + + ps_cfg->u4_is_valid = 1; + + ps_cfg->e_cmd = sub_cmd; + + switch (sub_cmd) + { + case IVE_CMD_CTL_SET_DIMENSIONS: + ret = ih264e_set_dimensions(pv_api_ip, pv_api_op, ps_cfg); + break; + + case IVE_CMD_CTL_SET_FRAMERATE: + ret = ih264e_set_frame_rate(pv_api_ip, pv_api_op, ps_cfg); + break; + + case IVE_CMD_CTL_SET_BITRATE: + ret = ih264e_set_bit_rate(pv_api_ip, pv_api_op, ps_cfg); + break; + + case IVE_CMD_CTL_SET_FRAMETYPE: + ret = ih264e_set_frame_type(pv_api_ip, pv_api_op, ps_cfg); + break; + + case IVE_CMD_CTL_SET_QP: + ret = ih264e_set_qp(pv_api_ip, pv_api_op, ps_cfg); + break; + + case IVE_CMD_CTL_SET_ENC_MODE: + ret = ih264e_set_enc_mode(pv_api_ip, pv_api_op, ps_cfg); + break; + + case IVE_CMD_CTL_SET_VBV_PARAMS: + ret = ih264e_set_vbv_params(pv_api_ip, pv_api_op, ps_cfg); + break; + + case IVE_CMD_CTL_SET_AIR_PARAMS: + ret = ih264_set_air_params(pv_api_ip, pv_api_op, ps_cfg); + break; + + case IVE_CMD_CTL_SET_ME_PARAMS: + ret = ih264_set_me_params(pv_api_ip, pv_api_op, ps_cfg); + break; + + case IVE_CMD_CTL_SET_IPE_PARAMS: + ret = ih264_set_ipe_params(pv_api_ip, pv_api_op, ps_cfg); + break; + + case IVE_CMD_CTL_SET_GOP_PARAMS: + ret = ih264_set_gop_params(pv_api_ip, pv_api_op, ps_cfg); + break; + + case IVE_CMD_CTL_SET_PROFILE_PARAMS: + ret = ih264_set_profile_params(pv_api_ip, pv_api_op, ps_cfg); + break; + + case IVE_CMD_CTL_SET_DEBLOCK_PARAMS: + ret = ih264_set_deblock_params(pv_api_ip, pv_api_op, ps_cfg); + break; + + case IVE_CMD_CTL_RESET: + + /* invalidate config param struct as it is being served right away */ + ps_codec->as_cfg[i].u4_is_valid = 0; + + ret = ih264e_reset(ps_codec_obj, pv_api_ip, pv_api_op); + break; + + case IVE_CMD_CTL_SETDEFAULT: + { + /* ctrl call I/O structures */ + ih264e_ctl_setdefault_op_t *ps_op = pv_api_op; + + /* invalidate config param struct as it is being served right away */ + ps_codec->as_cfg[i].u4_is_valid = 0; + + /* error status */ + ret = ih264e_set_default_params(ps_cfg); + + ps_op->s_ive_op.u4_error_code = ret; + + break; + } + + case IVE_CMD_CTL_FLUSH: + + /* invalidate config param struct as it is being served right away */ + ps_codec->as_cfg[i].u4_is_valid = 0; + + ret = ih264e_set_flush_mode(ps_codec_obj, pv_api_ip, pv_api_op); + break; + + case IVE_CMD_CTL_GETBUFINFO: + + /* invalidate config param struct as it is being served right away */ + ps_codec->as_cfg[i].u4_is_valid = 0; + + ret = ih264e_get_buf_info(ps_codec_obj, pv_api_ip, pv_api_op); + break; + + case IVE_CMD_CTL_GETVERSION: + { + /* ctrl call I/O structures */ + ih264e_ctl_getversioninfo_ip_t *ps_ip = pv_api_ip; + ih264e_ctl_getversioninfo_op_t *ps_op = pv_api_op; + + /* invalidate config param struct as it is being served right away */ + ps_codec->as_cfg[i].u4_is_valid = 0; + + /* error status */ + ps_op->s_ive_op.u4_error_code = IV_SUCCESS; + + if (ps_ip->s_ive_ip.u4_version_bufsize <= 0) + { + ps_op->s_ive_op.u4_error_code = + IH264E_CXA_VERS_BUF_INSUFFICIENT; + ret = IV_FAIL; + } + else + { + ret = ih264e_get_version((CHAR *) ps_ip->s_ive_ip.pu1_version, + ps_ip->s_ive_ip.u4_version_bufsize); + + if (ret != IV_SUCCESS) + { + ps_op->s_ive_op.u4_error_code = + IH264E_CXA_VERS_BUF_INSUFFICIENT; + ret = IV_FAIL; + } + } + break; + } + + case IVE_CMD_CTL_SET_NUM_CORES: + ret = ih264e_set_num_cores(pv_api_ip, pv_api_op, ps_cfg); + break; + + default: + /* invalidate config param struct as it is being served right away */ + ps_codec->as_cfg[i].u4_is_valid = 0; + + DEBUG("Warning !! unrecognized control api command \n"); + break; + } + + ithread_mutex_unlock(ps_codec->pv_ctl_mutex); + + return ret; +} + +/** +******************************************************************************* +* +* @brief +* Codec entry point function. All the function calls to the codec are done +* using this function with different values specified in command +* +* @par Description: +* Arguments are tested for validity and then based on the command +* appropriate function is called +* +* @param[in] ps_handle +* API level handle for codec +* +* @param[in] pv_api_ip +* Input argument structure +* +* @param[out] pv_api_op +* Output argument structure +* +* @returns error_status +* +* @remarks +* +******************************************************************************* +*/ +IV_STATUS_T ih264e_api_function(iv_obj_t *ps_handle, + void *pv_api_ip, + void *pv_api_op) +{ + /* api command */ + WORD32 command = IV_CMD_NA; + + /* error status */ + IV_STATUS_T e_status; + WORD32 ret; + + /* tmp var */ + WORD32 *pu4_ptr_cmd = (WORD32 *) pv_api_ip; + + /* validate input / output structures */ + e_status = api_check_struct_sanity(ps_handle, pv_api_ip, pv_api_op); + + if (e_status != IV_SUCCESS) + { + DEBUG("error code = %d\n", *((UWORD32 *)pv_api_op + 1)); + return IV_FAIL; + } + + pu4_ptr_cmd++; + + command = *pu4_ptr_cmd; + + switch (command) + { + case IV_CMD_GET_NUM_MEM_REC: + ret = ih264e_get_num_rec(pv_api_ip, pv_api_op); + break; + + case IV_CMD_FILL_NUM_MEM_REC: + ret = ih264e_fill_num_mem_rec(pv_api_ip, pv_api_op); + break; + + case IV_CMD_INIT: + ret = ih264e_init_mem_rec(ps_handle, pv_api_ip, pv_api_op); + break; + + case IV_CMD_RETRIEVE_MEMREC: + ret = ih264e_retrieve_memrec(ps_handle, pv_api_ip, pv_api_op); + break; + + case IVE_CMD_VIDEO_CTL: + ret = ih264e_ctl(ps_handle, pv_api_ip, pv_api_op); + break; + + case IVE_CMD_VIDEO_ENCODE: + ret = ih264e_encode(ps_handle, pv_api_ip, pv_api_op); + break; + + default: + ret = IV_FAIL; + break; + } + + return (IV_STATUS_T) ret; +} diff --git a/encoder/ih264e_bitstream.c b/encoder/ih264e_bitstream.c new file mode 100755 index 0000000..e5bfbe4 --- /dev/null +++ b/encoder/ih264e_bitstream.c @@ -0,0 +1,472 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +****************************************************************************** +* @file +* ih264e_bitstream.c +* +* @brief +* This file contains function definitions related to bitstream generation +* +* @author +* ittiam +* +* @par List of Functions: +* - ih264e_bitstrm_init() +* - ih264e_put_bits() +* - ih264e_put_bit() +* - ih264e_put_rbsp_trailing_bits() +* - ih264e_put_uev() +* - ih264e_put_sev() +* - ih264e_put_nal_start_code_prefix() +* +****************************************************************************** +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <assert.h> +#include <stdarg.h> +#include <math.h> + +/* User include files */ +#include "ih264e_config.h" +#include "ih264_typedefs.h" +#include "ih264_platform_macros.h" +#include "ih264_debug.h" +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "ih264_defs.h" +#include "ih264_macros.h" + + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + +/** +****************************************************************************** +* +* @brief Initializes the encoder bitstream engine +* +* @par Description +* This routine needs to be called at start of slice/frame encode +* +* @param[in] ps_bitstrm +* pointer to bitstream context (handle) +* +* @param[in] p1_bitstrm_buf +* bitstream buffer pointer where the encoded stream is generated in byte order +* +* @param[in] u4_max_bitstrm_size +* indicates maximum bitstream buffer size. (in bytes) +* If actual stream size exceeds the maximum size, encoder should +* 1. Not corrupt data beyond u4_max_bitstrm_size bytes +* 2. Report an error back to application indicating overflow +* +* @return success or failure error code +* +****************************************************************************** +*/ +IH264E_ERROR_T ih264e_bitstrm_init(bitstrm_t *ps_bitstrm, + UWORD8 *pu1_bitstrm_buf, + UWORD32 u4_max_bitstrm_size) +{ + ps_bitstrm->pu1_strm_buffer = pu1_bitstrm_buf; + ps_bitstrm->u4_max_strm_size = u4_max_bitstrm_size; + + /* Default init values for other members of bitstream context */ + ps_bitstrm->u4_strm_buf_offset = 0; + ps_bitstrm->u4_cur_word = 0; + ps_bitstrm->i4_bits_left_in_cw = WORD_SIZE; + ps_bitstrm->i4_zero_bytes_run = 0; + + return(IH264E_SUCCESS); +} + +/** +****************************************************************************** +* +* @brief puts a code with specified number of bits into the bitstream +* +* @par Description +* inserts code_len number of bits from lsb of code_val into the +* bitstream. updates context members like u4_cur_word, u4_strm_buf_offset and +* i4_bits_left_in_cw. If the total words (u4_strm_buf_offset) exceeds max +* available size (u4_max_strm_size), returns error without corrupting data +* beyond it +* +* @param[in] ps_bitstrm +* pointer to bitstream context (handle) +* +* @param[in] u4_code_val +* code value that needs to be inserted in the stream. +* +* @param[in] code_len +* indicates code length (in bits) of code_val that would be inserted in +* bitstream buffer size. Range of length[1:WORD_SIZE] +* +* @remarks Assumptions: all bits from bit position code_len to msb of +* code_val shall be zero +* +* @return success or failure error code +* +****************************************************************************** +*/ +IH264E_ERROR_T ih264e_put_bits(bitstrm_t *ps_bitstrm, + UWORD32 u4_code_val, + WORD32 code_len) +{ + UWORD32 u4_cur_word = ps_bitstrm->u4_cur_word; + WORD32 bits_left_in_cw = ps_bitstrm->i4_bits_left_in_cw; + + + /* check assumptions made in the module */ + ASSERT(code_len > 0 && code_len <= WORD_SIZE); + + if(code_len < WORD_SIZE) + ASSERT((u4_code_val >> code_len) == 0); + + + /* sanity check on the bitstream engine state */ + ASSERT(bits_left_in_cw > 0 && bits_left_in_cw <= WORD_SIZE); + + ASSERT(ps_bitstrm->i4_zero_bytes_run <= EPB_ZERO_BYTES); + + ASSERT(ps_bitstrm->pu1_strm_buffer != NULL); + + + if(bits_left_in_cw > code_len) + { + /*******************************************************************/ + /* insert the code in local bitstream word and return */ + /* code is inserted in position of bits left (post decrement) */ + /*******************************************************************/ + bits_left_in_cw -= code_len; + u4_cur_word |= (u4_code_val << bits_left_in_cw); + + ps_bitstrm->u4_cur_word = u4_cur_word; + ps_bitstrm->i4_bits_left_in_cw = bits_left_in_cw; + + return(IH264E_SUCCESS); + } + else + { + /********************************************************************/ + /* 1. insert partial code corresponding to bits left in cur word */ + /* 2. flush all the bits of cur word to bitstream */ + /* 3. insert emulation prevention bytes while flushing the bits */ + /* 4. insert remaining bits of code starting from msb of cur word */ + /* 5. update bitsleft in current word and stream buffer offset */ + /********************************************************************/ + UWORD32 u4_strm_buf_offset = ps_bitstrm->u4_strm_buf_offset; + + UWORD32 u4_max_strm_size = ps_bitstrm->u4_max_strm_size; + + WORD32 zero_run = ps_bitstrm->i4_zero_bytes_run; + + UWORD8* pu1_strm_buf = ps_bitstrm->pu1_strm_buffer; + + WORD32 i, rem_bits = (code_len - bits_left_in_cw); + + + /*********************************************************************/ + /* Bitstream overflow check */ + /* NOTE: corner case of epb bytes (max 2 for 32bit word) not handled */ + /*********************************************************************/ + if((u4_strm_buf_offset + (WORD_SIZE>>3)) >= u4_max_strm_size) + { + /* return without corrupting the buffer beyond its size */ + return(IH264E_BITSTREAM_BUFFER_OVERFLOW); + } + + /* insert parital code corresponding to bits left in cur word */ + u4_cur_word |= u4_code_val >> rem_bits; + + for(i = WORD_SIZE; i > 0; i -= 8) + { + /* flush the bits in cur word byte by byte and copy to stream */ + UWORD8 u1_next_byte = (u4_cur_word >> (i-8)) & 0xFF; + + PUTBYTE_EPB(pu1_strm_buf, u4_strm_buf_offset, u1_next_byte, zero_run); + } + + /* insert the remaining bits from code val into current word */ + u4_cur_word = rem_bits ? (u4_code_val << (WORD_SIZE - rem_bits)) : 0; + + /* update the state variables and return success */ + ps_bitstrm->u4_cur_word = u4_cur_word; + ps_bitstrm->i4_bits_left_in_cw = WORD_SIZE - rem_bits; + ps_bitstrm->i4_zero_bytes_run = zero_run; + ps_bitstrm->u4_strm_buf_offset = u4_strm_buf_offset; + return (IH264E_SUCCESS); + } +} + +/** +****************************************************************************** +* +* @brief inserts a 1-bit code into the bitstream +* +* @par Description +* inserts 1bit lsb of code_val into the bitstream +* updates context members like u4_cur_word, u4_strm_buf_offset and +* i4_bits_left_in_cw. If the total words (u4_strm_buf_offset) exceeds max +* available size (u4_max_strm_size), returns error without corrupting data +* beyond it +* +* @param[in] ps_bitstrm +* pointer to bitstream context (handle) +* +* @param[in] u4_code_val +* code value that needs to be inserted in the stream. +* +* @remarks Assumptions: all bits from bit position 1 to msb of code_val +* shall be zero +* +* @return success or failure error code +* +****************************************************************************** +*/ +IH264E_ERROR_T ih264e_put_bit(bitstrm_t *ps_bitstrm, UWORD32 u4_code_val) +{ + /* call the put bits function for 1 bit and return */ + return(ih264e_put_bits(ps_bitstrm, u4_code_val, 1)); +} + +/** +****************************************************************************** +* +* @brief inserts rbsp trailing bits at the end of stream buffer (NAL) +* +* @par Description +* inserts rbsp trailing bits, updates context members like u4_cur_word and +* i4_bits_left_in_cw and flushes the same in the bitstream buffer. If the +* total words (u4_strm_buf_offset) exceeds max available size +* (u4_max_strm_size), returns error without corrupting data beyond it +* +* @param[in] ps_bitstrm +* pointer to bitstream context (handle) +* +* @return success or failure error code +* +****************************************************************************** +*/ +IH264E_ERROR_T ih264e_put_rbsp_trailing_bits(bitstrm_t *ps_bitstrm) +{ + WORD32 i; + UWORD32 u4_cur_word = ps_bitstrm->u4_cur_word; + WORD32 bits_left_in_cw = ps_bitstrm->i4_bits_left_in_cw; + WORD32 bytes_left_in_cw = (bits_left_in_cw - 1) >> 3; + + UWORD32 u4_strm_buf_offset = ps_bitstrm->u4_strm_buf_offset; + UWORD32 u4_max_strm_size = ps_bitstrm->u4_max_strm_size; + WORD32 zero_run = ps_bitstrm->i4_zero_bytes_run; + UWORD8* pu1_strm_buf = ps_bitstrm->pu1_strm_buffer; + + /*********************************************************************/ + /* Bitstream overflow check */ + /* NOTE: corner case of epb bytes (max 2 for 32bit word) not handled */ + /*********************************************************************/ + if((u4_strm_buf_offset + (WORD_SIZE>>3) - bytes_left_in_cw) >= + u4_max_strm_size) + { + /* return without corrupting the buffer beyond its size */ + return(IH264E_BITSTREAM_BUFFER_OVERFLOW); + } + + /* insert a 1 at the end of current word and flush all the bits */ + u4_cur_word |= (1 << (bits_left_in_cw - 1)); + + /* get the bits to be inserted in msbdb of the word */ + //u4_cur_word <<= (WORD_SIZE - bytes_left_in_cw + 1); + + for(i = WORD_SIZE; i > (bytes_left_in_cw*8); i -= 8) + { + /* flush the bits in cur word byte by byte and copy to stream */ + UWORD8 u1_next_byte = (u4_cur_word >> (i-8)) & 0xFF; + + PUTBYTE_EPB(pu1_strm_buf, u4_strm_buf_offset, u1_next_byte, zero_run); + } + + /* update the stream offset */ + ps_bitstrm->u4_strm_buf_offset = u4_strm_buf_offset; + + /* Default init values for scratch variables of bitstream context */ + ps_bitstrm->u4_cur_word = 0; + ps_bitstrm->i4_bits_left_in_cw = WORD_SIZE; + ps_bitstrm->i4_zero_bytes_run = 0; + + return (IH264E_SUCCESS); +} + +/** +****************************************************************************** +* +* @brief puts exponential golomb code of a unsigned integer into bitstream +* +* @par Description +* computes uev code for given syntax element and inserts the same into +* bitstream by calling ih264e_put_bits() interface. +* +* @param[in] ps_bitstrm +* pointer to bitstream context (handle) +* +* @param[in] u4_code_num +* unsigned integer input whose golomb code is written in stream +* +* @remarks Assumptions: code value can be represented in less than 16bits +* +* @return success or failure error code +* +****************************************************************************** +*/ +IH264E_ERROR_T ih264e_put_uev(bitstrm_t *ps_bitstrm, UWORD32 u4_code_num) +{ + UWORD32 u4_bit_str, u4_range; + IH264E_ERROR_T e_error; + + /* convert the codenum to exp-golomb bit code: Table 9-2 JCTVC-J1003_d7 */ + u4_bit_str = u4_code_num + 1; + + /* get range of the bit string and put using put_bits() */ + GETRANGE(u4_range, u4_bit_str); + + e_error = ih264e_put_bits(ps_bitstrm, u4_bit_str, (2 * u4_range - 1)); + + return(e_error); +} + +/** +****************************************************************************** +* +* @brief puts exponential golomb code of a signed integer into bitstream +* +* @par Description +* computes sev code for given syntax element and inserts the same into +* bitstream by calling ih264e_put_bits() interface. +* +* @param[in] ps_bitstrm +* pointer to bitstream context (handle) +* +* @param[in] syntax_elem +* signed integer input whose golomb code is written in stream +* +* @remarks Assumptions: code value can be represented in less than 16bits +* +* @return success or failure error code +* +****************************************************************************** +*/ +IH264E_ERROR_T ih264e_put_sev(bitstrm_t *ps_bitstrm, WORD32 syntax_elem) +{ + UWORD32 u4_code_num, u4_bit_str, u4_range; + IH264E_ERROR_T e_error; + + /************************************************************************/ + /* convert the codenum to exp-golomb bit code for signed syntax element */ + /* See Table9-2 and Table 9-3 of standard JCTVC-J1003_d7 */ + /************************************************************************/ + if(syntax_elem <= 0) + { + /* codeNum for non-positive integer = 2*abs(x) : Table9-3 */ + u4_code_num = ((-syntax_elem) << 1); + } + else + { + /* codeNum for positive integer = 2x-1 : Table9-3 */ + u4_code_num = (syntax_elem << 1) - 1; + } + + /* convert the codenum to exp-golomb bit code: Table 9-2 JCTVC-J1003_d7 */ + u4_bit_str = u4_code_num + 1; + + /* get range of the bit string and put using put_bits() */ + GETRANGE(u4_range, u4_bit_str); + + e_error = ih264e_put_bits(ps_bitstrm, u4_bit_str, (2 * u4_range - 1)); + + return(e_error); +} + +/** +****************************************************************************** +* +* @brief insert NAL start code prefix (0x000001) into bitstream with an option +* of inserting leading_zero_8bits (which makes startcode prefix as 0x00000001) +* +* @par Description +* Although start code prefix could have been put by calling ih264e_put_bits(), +* ih264e_put_nal_start_code_prefix() is specially added to make sure emulation +* prevention insertion is not done for the NAL start code prefix which will +* surely happen otherwise by calling ih264e_put_bits() interface. +* +* @param[in] ps_bitstrm +* pointer to bitstream context (handle) +* +* @param[in] insert_leading_zero_8bits +* flag indicating if one more zero bytes needs to prefixed before start code +* +* @return success or failure error code +* +****************************************************************************** +*/ +IH264E_ERROR_T ih264e_put_nal_start_code_prefix(bitstrm_t *ps_bitstrm, + WORD32 insert_leading_zero_8bits) +{ + UWORD32 u4_strm_buf_offset = ps_bitstrm->u4_strm_buf_offset; + UWORD8* pu1_strm_buf = ps_bitstrm->pu1_strm_buffer; + + /* Bitstream buffer overflow check assuming worst case of 4 bytes */ + if((u4_strm_buf_offset + 4) >= ps_bitstrm->u4_max_strm_size) + { + return(IH264E_BITSTREAM_BUFFER_OVERFLOW); + } + + /* Insert leading zero 8 bits conditionally */ + if(insert_leading_zero_8bits) + { + pu1_strm_buf[u4_strm_buf_offset] = 0x00; + u4_strm_buf_offset++; + } + + /* Insert NAL start code prefix 0x00 00 01 */ + pu1_strm_buf[u4_strm_buf_offset] = 0x00; + u4_strm_buf_offset++; + + pu1_strm_buf[u4_strm_buf_offset] = 0x00; + u4_strm_buf_offset++; + + pu1_strm_buf[u4_strm_buf_offset] = 0x01; + u4_strm_buf_offset++; + + /* update the stream offset */ + ps_bitstrm->u4_strm_buf_offset = u4_strm_buf_offset; + + return (IH264E_SUCCESS); +} + diff --git a/encoder/ih264e_bitstream.h b/encoder/ih264e_bitstream.h new file mode 100755 index 0000000..21360cc --- /dev/null +++ b/encoder/ih264e_bitstream.h @@ -0,0 +1,401 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +******************************************************************************* +* @file +* ih264e_bitstream.h +* +* @brief +* This file contains encoder bitstream engine related structures and +* interface prototypes +* +* @author +* ittiam +* +* @remarks +* none +* +******************************************************************************* +*/ + +#ifndef IH264E_BITSTREAM_H_ +#define IH264E_BITSTREAM_H_ + +/*****************************************************************************/ +/* Constant Macros */ +/*****************************************************************************/ + +/** +****************************************************************************** + * @brief defines the maximum number of bits in a bitstream word +****************************************************************************** + */ +#define WORD_SIZE 32 + +/** +****************************************************************************** + * @brief The number of consecutive zero bytes for emulation prevention check +****************************************************************************** + */ +#define EPB_ZERO_BYTES 2 + +/** +****************************************************************************** + * @brief Emulation prevention insertion byte +****************************************************************************** + */ +#define EPB_BYTE 0x03 + + +/*****************************************************************************/ +/* Function Macros */ +/*****************************************************************************/ + +/** +****************************************************************************** + * @brief Macro to check if emulation prevention byte insertion is required +****************************************************************************** + */ +#define INSERT_EPB(zero_run, next_byte) \ + ((zero_run) == EPB_ZERO_BYTES) && (0 == ((next_byte) & 0xFC)) + +/** +****************************************************************************** + * @brief returns the bit position of a leading 1 (msb) in a code value +****************************************************************************** + */ +#if !MSVC +#define GETRANGE(r,value) \ +{ \ + r = 0; \ + if(0 == value) \ + r = 1; \ + else \ + { \ + r = 32-CLZ(value); \ + }\ +} +#else +#define GETRANGE(r,value) \ +{ \ + unsigned long msb_one_bit = 0; \ + r = _BitScanReverse(&msb_one_bit, value) ? (UWORD32)(msb_one_bit + 1) : 1 ; \ +} +#endif + +/** +****************************************************************************** + * @brief returns bits required to code a value +****************************************************************************** + */ +#define UE_LENGTH(bits,x) \ +{ \ + UWORD32 r_bit; \ + GETRANGE(r_bit,x+1) \ + bits =(((r_bit - 1) << 1)+1); \ +} \ + +/** +****************************************************************************** + * @brief Inserts 1 byte and Emulation Prevention Byte(if any) into bitstream + * Increments the stream offset and zero run correspondingly +****************************************************************************** + */ +#define PUTBYTE_EPB(ptr,off,byte,zero_run) \ +{ \ + if( INSERT_EPB(zero_run, byte) ) \ + { \ + ptr[off] = EPB_BYTE; \ + off++; \ + zero_run = 0; \ + } \ + \ + ptr[off] = byte; \ + off++; \ + zero_run = byte ? 0 : zero_run+1; \ +} \ + +/** +****************************************************************************** + * @brief Ensures Byte alignment of the slice header +****************************************************************************** + */ +#define BYTE_ALIGNMENT(ps_bitstrm) ih264e_put_rbsp_trailing_bits(ps_bitstrm) + + +/*****************************************************************************/ +/* Structures */ +/*****************************************************************************/ + +/** +****************************************************************************** + * @brief Bitstream context for encoder +****************************************************************************** + */ +typedef struct bitstrm +{ + /** points to start of stream buffer. */ + UWORD8 *pu1_strm_buffer; + + /** + * max bitstream size (in bytes). + * Encoded stream shall not exceed this size. + */ + UWORD32 u4_max_strm_size; + + /** + * byte offset (w.r.t pu1_strm_buffer) where next byte would be written + * Bitstream engine makes sure it would not corrupt data beyond + * u4_max_strm_size bytes + */ + UWORD32 u4_strm_buf_offset; + + /** + * current bitstream word; It is a scratch word containing max of + * WORD_SIZE bits. Will be copied to stream buffer when the word is + * full + */ + UWORD32 u4_cur_word; + + /** + * signifies number of bits available in u4_cur_word + * bits from msb to i4_bits_left_in_cw of u4_cur_word have already been + * inserted next bits would be inserted from pos [i4_bits_left_in_cw-1] + * Range of this variable [1 : WORD_SIZE] + */ + WORD32 i4_bits_left_in_cw; + + /** + * signifies the number of consecutive zero bytes propogated from previous + * word. It is used for emulation prevention byte insertion in the stream + */ + WORD32 i4_zero_bytes_run; + +} bitstrm_t; + + +/*****************************************************************************/ +/* Extern Function Declarations */ +/*****************************************************************************/ + +/** +****************************************************************************** +* +* @brief Initializes the encoder bitstream engine +* +* @par Description +* This routine needs to be called at start of slice/frame encode +* +* @param[in] ps_bitstrm +* pointer to bitstream context (handle) +* +* @param[in] p1_bitstrm_buf +* bitstream buffer pointer where the encoded stream is generated in byte order +* +* @param[in] u4_max_bitstrm_size +* indicates maximum bitstream buffer size. (in bytes) +* If actual stream size exceeds the maximum size, encoder should +* 1. Not corrupt data beyond u4_max_bitstrm_size bytes +* 2. Report an error back to application indicating overflow +* +* @return success or failure error code +* +****************************************************************************** +*/ +IH264E_ERROR_T ih264e_bitstrm_init + ( + bitstrm_t *ps_bitstrm, + UWORD8 *pu1_bitstrm_buf, + UWORD32 u4_max_bitstrm_size + ); + +/** +****************************************************************************** +* +* @brief puts a code with specified number of bits into the bitstream +* +* @par Description +* inserts code_len number of bits from lsb of code_val into the +* bitstream. If the total bytes (u4_strm_buf_offset) exceeds max +* available size (u4_max_strm_size), returns error without corrupting data +* beyond it +* +* @param[in] ps_bitstrm +* pointer to bitstream context (handle) +* +* @param[in] u4_code_val +* code value that needs to be inserted in the stream. +* +* @param[in] code_len +* indicates code length (in bits) of code_val that would be inserted in +* bitstream buffer size. +* +* @remarks Assumptions: all bits from bit position code_len to msb of +* code_val shall be zero +* +* @return success or failure error code +* +****************************************************************************** +*/ +IH264E_ERROR_T ih264e_put_bits + ( + bitstrm_t *ps_bitstrm, + UWORD32 u4_code_val, + WORD32 code_len + ); + +/** +****************************************************************************** +* +* @brief inserts a 1-bit code into the bitstream +* +* @par Description +* inserts 1bit lsb of code_val into the bitstream +* updates context members like u4_cur_word, u4_strm_buf_offset and +* i4_bits_left_in_cw. If the total words (u4_strm_buf_offset) exceeds max +* available size (u4_max_strm_size), returns error without corrupting data +* beyond it +* +* @param[in] ps_bitstrm +* pointer to bitstream context (handle) +* +* @param[in] u4_code_val +* code value that needs to be inserted in the stream. +* +* @remarks Assumptions: all bits from bit position 1 to msb of code_val +* shall be zero +* +* @return success or failure error code +* +****************************************************************************** +*/ +IH264E_ERROR_T ih264e_put_bit + ( + bitstrm_t *ps_bitstrm, + UWORD32 u4_code_val + ); + +/** +****************************************************************************** +* +* @brief inserts rbsp trailing bits at the end of stream buffer (NAL) +* +* @par Description +* inserts rbsp trailing bits, updates context members like u4_cur_word and +* i4_bits_left_in_cw and flushes the same in the bitstream buffer. If the +* total words (u4_strm_buf_offset) exceeds max available size +* (u4_max_strm_size), returns error without corrupting data beyond it +* +* @param[in] ps_bitstrm +* pointer to bitstream context (handle) +* +* @return success or failure error code +* +****************************************************************************** +*/ +IH264E_ERROR_T ih264e_put_rbsp_trailing_bits + ( + bitstrm_t *ps_bitstrm + ); + +/** +****************************************************************************** +* +* @brief puts exponential golomb code of a unsigned integer into bitstream +* +* @par Description +* computes uev code for given syntax element and inserts the same into +* bitstream by calling ih264e_put_bits() interface. +* +* @param[in] ps_bitstrm +* pointer to bitstream context (handle) +* +* @param[in] u4_code_num +* unsigned integer input whose golomb code is written in stream +* +* @remarks Assumptions: code value can be represented in less than 16bits +* +* @return success or failure error code +* +****************************************************************************** +*/ +IH264E_ERROR_T ih264e_put_uev + ( + bitstrm_t *ps_bitstrm, + UWORD32 u4_code_num + ); + +/** +****************************************************************************** +* +* @brief puts exponential golomb code of a signed integer into bitstream +* +* @par Description +* computes sev code for given syntax element and inserts the same into +* bitstream by calling ih264e_put_bits() interface. +* +* @param[in] ps_bitstrm +* pointer to bitstream context (handle) +* +* @param[in] syntax_elem +* signed integer input whose golomb code is written in stream +* +* @remarks Assumptions: code value can be represented in less than 16bits +* +* @return success or failure error code +* +****************************************************************************** +*/ +IH264E_ERROR_T ih264e_put_sev + ( + bitstrm_t *ps_bitstrm, + WORD32 syntax_elem + ); + +/** +****************************************************************************** +* +* @brief insert NAL start code prefix (0x000001) into bitstream with an option +* of inserting leading_zero_8bits (which makes startcode prefix as 0x00000001) +* +* @par Description +* Although start code prefix could have been put by calling ih264e_put_bits(), +* ih264e_put_nal_start_code_prefix() is specially added to make sure emulation +* prevention insertion is not done for the NAL start code prefix which will +* surely happen otherwise by calling ih264e_put_bits() interface. +* +* @param[in] ps_bitstrm +* pointer to bitstream context (handle) +* +* @param[in] insert_leading_zero_8bits +* flag indicating if one more zero bytes needs to prefixed before start code +* +* @return success or failure error code +* +****************************************************************************** +*/ +IH264E_ERROR_T ih264e_put_nal_start_code_prefix + ( + bitstrm_t *ps_bitstrm, + WORD32 insert_leading_zero_8bits + ); + +#endif /* IH264E_BITSTREAM_H_ */ diff --git a/encoder/ih264e_cavlc.c b/encoder/ih264e_cavlc.c new file mode 100755 index 0000000..1341dcd --- /dev/null +++ b/encoder/ih264e_cavlc.c @@ -0,0 +1,1448 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +******************************************************************************* +* @file +* ih264e_cavlc.c +* +* @brief +* Contains all the routines to code syntax elements and residuals when entropy +* coding chosen is CAVLC +* +* @author +* ittiam +* +* @par List of Functions: +* - ih264e_compute_zeroruns_and_trailingones() +* - ih264e_write_coeff4x4_cavlc() +* - ih264e_write_coeff8x8_cavlc() +* - ih264e_encode_residue() +* - ih264e_write_islice_mb() +* - ih264e_write_pslice_mb() +* +* @remarks +* None +* +******************************************************************************* +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include <stdio.h> +#include <assert.h> +#include <limits.h> + +/* User include files */ +#include "ih264e_config.h" +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" +#include "ih264_debug.h" +#include "ih264_defs.h" +#include "ih264e_defs.h" +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "ime_distortion_metrics.h" +#include "ime_structs.h" +#include "ih264_defs.h" +#include "ih264_error.h" +#include "ih264_structs.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include "ih264_inter_pred_filters.h" +#include "ih264_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "ih264e_rate_control.h" +#include "ih264e_structs.h" +#include "ih264e_encode_header.h" +#include "ih264_cavlc_tables.h" +#include "ih264e_cavlc.h" +#include "ih264e_statistics.h" +#include "ih264e_trace.h" + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + +/** +******************************************************************************* +* +* @brief +* This function computes run of zero, number of trailing ones and sign of +* trailing ones basing on the significant coeff map, residual block and +* total nnz. +* +* @param[in] pi2_res_block +* Pointer to residual block containing levels in scan order +* +* @param[in] u4_total_coeff +* Total non-zero coefficients in that sub block +* +* @param[in] pu1_zero_run +* Pointer to array to store run of zeros +* +* @param[in] u4_sig_coeff_map +* significant coefficient map +* +* @returns u4_totzero_sign_trailone +* Bits 0-8 contains number of trailing ones. +* Bits 8-16 contains bitwise sign information of trailing one +* Bits 16-24 contains total number of zeros. +* +* @remarks +* None +* +******************************************************************************* +*/ +static UWORD32 ih264e_compute_zeroruns_and_trailingones(WORD16 *pi2_res_block, + UWORD32 u4_total_coeff, + UWORD8 *pu1_zero_run, + UWORD32 u4_sig_coeff_map) +{ + UWORD32 i = 0; + UWORD32 u4_nnz_coeff = 0; + WORD32 i4_run = -1; + UWORD32 u4_sign = 0; + UWORD32 u4_tot_zero = 0; + UWORD32 u4_trailing1 = 0; + WORD32 i4_val; + UWORD32 u4_totzero_sign_trailone; + UWORD32 *pu4_zero_run; + + pu4_zero_run = (void *)pu1_zero_run; + pu4_zero_run[0] = 0; + pu4_zero_run[1] = 0; + pu4_zero_run[2] = 0; + pu4_zero_run[3] = 0; + + /* Compute Runs of zeros for all nnz coefficients except the last 3 */ + if (u4_total_coeff > 3) + { + for (i = 0; u4_nnz_coeff < (u4_total_coeff-3); i++) + { + i4_run++; + + i4_val = (u4_sig_coeff_map & 0x1); + u4_sig_coeff_map >>= 1; + + if (i4_val != 0) + { + pu1_zero_run[u4_nnz_coeff++] = i4_run; + i4_run = -1; + } + } + } + + /* Compute T1's, Signof(T1's) and Runs of zeros for the last 3 */ + while (u4_nnz_coeff != u4_total_coeff) + { + i4_run++; + + i4_val = (u4_sig_coeff_map & 0x1); + u4_sig_coeff_map >>= 1; + + if (i4_val != 0) + { + if (pi2_res_block[u4_nnz_coeff] == 1) + { + pu1_zero_run[u4_nnz_coeff] = i4_run; + u4_trailing1++; + } + else + { + if (pi2_res_block[u4_nnz_coeff] == -1) + { + pu1_zero_run[u4_nnz_coeff] = i4_run; + u4_sign |= 1 << u4_trailing1; + u4_trailing1++; + } + else + { + pu1_zero_run[u4_nnz_coeff] = i4_run; + u4_trailing1 = 0; + u4_sign = 0; + } + } + i4_run = -1; + u4_nnz_coeff++; + } + i++; + } + + u4_tot_zero = i - u4_total_coeff; + u4_totzero_sign_trailone = (u4_tot_zero << 16)|(u4_sign << 8)|u4_trailing1; + + return (u4_totzero_sign_trailone); +} + +/** +******************************************************************************* +* +* @brief +* This function generates CAVLC coded bit stream for the given residual block +* +* @param[in] pi2_res_block +* Pointer to residual block containing levels in scan order +* +* @param[in] u4_total_coeff +* Total non-zero coefficients in the sub block +* +* @param[in] u4_block_type +* block type +* +* @param[in] pu1_zero_run +* Pointer to array to store run of zeros +* +* @param[in] u4_nc +* average of non zero coeff from top and left blocks (when available) +* +* @param[in, out] ps_bit_stream +* structure pointing to a buffer holding output bit stream +* +* @param[in] u4_sig_coeff_map +* significant coefficient map of the residual block +* +* @returns +* error code +* +* @remarks +* If the block type is CAVLC_CHROMA_4x4_DC, then u4_nc is non-significant +* +******************************************************************************* +*/ +static IH264E_ERROR_T ih264e_write_coeff4x4_cavlc(WORD16 *pi2_res_block, + UWORD32 u4_total_coeff, + ENTROPY_BLK_TYPE u4_block_type, + UWORD8 *pu1_zero_run, + UWORD32 u4_nc, + bitstrm_t *ps_bit_stream, + UWORD32 u4_sig_coeff_map) +{ + IH264E_ERROR_T error_status = IH264E_SUCCESS; + UWORD32 u4_totzero_sign_trailone = 0; + UWORD32 u4_trailing_ones = 0; + UWORD32 u4_tot_zeros = 0; + UWORD32 u4_remaining_coeff = 0; + UWORD32 u4_sign1 = 0; + UWORD32 u4_max_num_coeff = 0; + const UWORD32 au4_max_num_nnz_coeff[] = {16, 15, 16, 4, 15}; + + /* validate inputs */ + ASSERT(u4_block_type <= CAVLC_CHROMA_4x4_AC); + + u4_max_num_coeff = au4_max_num_nnz_coeff[u4_block_type]; + + ASSERT(u4_total_coeff <= u4_max_num_coeff); + + if (!u4_total_coeff) + { + UWORD32 u4_codeword = 15; + UWORD32 u4_codesize = 1; + if (u4_block_type == CAVLC_CHROMA_4x4_DC) + { + u4_codeword = 1; + u4_codesize = 2; + DEBUG("\n[%d numcoeff, %d numtrailing ones]",u4_total_coeff, 0); + ENTROPY_TRACE("\tnumber of non zero coeffs ",u4_total_coeff); + ENTROPY_TRACE("\tnumber of trailing ones ",0); + } + else + { + UWORD32 u4_vlcnum = u4_nc >> 1; + + /* write coeff_token */ + if (u4_vlcnum > 3) + { + /* Num-FLC */ + u4_codeword = 3; + u4_codesize = 6; + } + else + { + /* Num-VLC 0, 1, 2 */ + if (u4_vlcnum > 1) + { + u4_vlcnum = 2; + } + u4_codesize <<= u4_vlcnum; + u4_codeword >>= (4 - u4_codesize); + } + + DEBUG("\n[%d numcoeff, %d numtrailing ones, %d nnz]",u4_total_coeff, 0, u4_nc); + ENTROPY_TRACE("\tnumber of non zero coeffs ",u4_total_coeff); + ENTROPY_TRACE("\tnC ",u4_nc); + } + + + DEBUG("\nCOEFF TOKEN 0: %d u4_codeword, %d u4_codesize",u4_codeword, u4_codesize); + ENTROPY_TRACE("\tcodeword ",u4_codeword); + ENTROPY_TRACE("\tcodesize ",u4_codesize); + + error_status = ih264e_put_bits(ps_bit_stream, u4_codeword, u4_codesize); + + return error_status; + } + else + { + /* Compute zero run, number of trailing ones and their sign. */ + u4_totzero_sign_trailone = + ih264e_compute_zeroruns_and_trailingones(pi2_res_block, + u4_total_coeff, + pu1_zero_run, + u4_sig_coeff_map); + u4_trailing_ones = u4_totzero_sign_trailone & 0xFF; + u4_sign1 = (u4_totzero_sign_trailone >> 8)& 0xFF; + u4_tot_zeros = (u4_totzero_sign_trailone >> 16) & 0xFF; + u4_remaining_coeff = u4_total_coeff - u4_trailing_ones; + + /* write coeff_token */ + { + UWORD32 u4_codeword; + UWORD32 u4_codesize; + if (u4_block_type == CAVLC_CHROMA_4x4_DC) + { + u4_codeword = gu1_code_coeff_token_table_chroma[u4_trailing_ones][u4_total_coeff-1]; + u4_codesize = gu1_size_coeff_token_table_chroma[u4_trailing_ones][u4_total_coeff-1]; + + DEBUG("\n[%d numcoeff, %d numtrailing ones]",u4_total_coeff, u4_trailing_ones); + ENTROPY_TRACE("\tnumber of non zero coeffs ",u4_total_coeff); + ENTROPY_TRACE("\tnumber of trailing ones ",u4_trailing_ones); + } + else + { + UWORD32 u4_vlcnum = u4_nc >> 1; + + if (u4_vlcnum > 3) + { + /* Num-FLC */ + u4_codeword = ((u4_total_coeff-1) << 2 ) + u4_trailing_ones; + u4_codesize = 6; + } + else + { + /* Num-VLC 0, 1, 2 */ + if (u4_vlcnum > 1) + { + u4_vlcnum = 2; + } + u4_codeword = gu1_code_coeff_token_table[u4_vlcnum][u4_trailing_ones][u4_total_coeff-1]; + u4_codesize = gu1_size_coeff_token_table[u4_vlcnum][u4_trailing_ones][u4_total_coeff-1]; + } + + DEBUG("\n[%d numcoeff, %d numtrailing ones, %d nnz]",u4_total_coeff, u4_trailing_ones, u4_nc); + ENTROPY_TRACE("\tnumber of non zero coeffs ",u4_total_coeff); + ENTROPY_TRACE("\tnumber of trailing ones ",u4_trailing_ones); + ENTROPY_TRACE("\tnC ",u4_nc); + } + + DEBUG("\nCOEFF TOKEN 0: %d u4_codeword, %d u4_codesize",u4_codeword, u4_codesize); + ENTROPY_TRACE("\tcodeword ",u4_codeword); + ENTROPY_TRACE("\tcodesize ",u4_codesize); + + error_status = ih264e_put_bits(ps_bit_stream, u4_codeword, u4_codesize); + } + + /* write sign of trailing ones */ + if (u4_trailing_ones) + { + DEBUG("\nT1's: %d u4_codeword, %d u4_codesize",u4_sign1, u4_trailing_ones); + error_status = ih264e_put_bits(ps_bit_stream, u4_sign1, u4_trailing_ones); + ENTROPY_TRACE("\tnumber of trailing ones ",u4_trailing_ones); + ENTROPY_TRACE("\tsign of trailing ones ",u4_sign1); + } + + /* write level codes */ + if (u4_remaining_coeff) + { + WORD32 i4_level = pi2_res_block[u4_remaining_coeff-1]; + UWORD32 u4_escape; + UWORD32 u4_suffix_length = 0; // Level-VLC[N] + UWORD32 u4_abs_level, u4_abs_level_actual = 0; + WORD32 i4_sign; + const UWORD32 u4_rndfactor[] = {0, 0, 1, 3, 7, 15, 31}; + + DEBUG("\n \t%d coeff,",i4_level); + ENTROPY_TRACE("\tcoeff ",i4_level); + + if (u4_trailing_ones < 3) + { + /* If there are less than 3 T1s, then the first non-T1 level is incremented if negative (decremented if positive)*/ + if (i4_level < 0) + { + i4_level += 1; + } + else + { + i4_level -= 1; + } + + u4_abs_level_actual = 1; + + /* Initialize VLC table (Suffix Length) to encode the level */ + if (u4_total_coeff > 10) + { + u4_suffix_length = 1; + } + } + + i4_sign = (i4_level >> (sizeof(WORD32) * CHAR_BIT - 1)); + u4_abs_level = ((i4_level + i4_sign) ^ i4_sign); + + u4_abs_level_actual += u4_abs_level; + + u4_escape = (u4_abs_level + u4_rndfactor[u4_suffix_length]) >> u4_suffix_length; + + while (1) + { + UWORD32 u4_codesize; + UWORD32 u4_codeword; + UWORD32 u4_codeval; + + u4_remaining_coeff--; + +GATHER_CAVLC_STATS1(); + + { + u4_codeval = u4_abs_level << 1; + u4_codeval = u4_codeval - 2 - i4_sign; + + if ((!u4_suffix_length) && (u4_escape > 7) && (u4_abs_level < 16)) + { + u4_codeword = (1 << 4) + (u4_codeval - 14); + u4_codesize = 19; + } + else if (u4_escape > 7) + { + u4_codeword = (1 << 12) + (u4_codeval - (15 << u4_suffix_length)); + u4_codesize = 28; + if (!u4_suffix_length) + { + u4_codeword -= 15; + } + } + else + { + u4_codeword = (1 << u4_suffix_length) + (u4_codeval & ((1 << u4_suffix_length)-1)); + u4_codesize = (u4_codeval >> u4_suffix_length) + 1 + u4_suffix_length; + } + } + + /*put the level code in bitstream*/ + DEBUG("\nLEVEL: %d u4_codeword, %d u4_codesize",u4_codeword, u4_codesize); + ENTROPY_TRACE("\tcodeword ",u4_codeword); + ENTROPY_TRACE("\tcodesize ",u4_codesize); + error_status = ih264e_put_bits(ps_bit_stream, u4_codeword, u4_codesize); + + if (u4_remaining_coeff == 0) break; + + /*update suffix length for next level*/ + if (u4_suffix_length == 0) + { + u4_suffix_length++; + } + if (u4_suffix_length < 6) + { + if (u4_abs_level_actual > gu1_threshold_vlc_level[u4_suffix_length]) + { + u4_suffix_length++; + } + } + + /* next level */ + i4_level = pi2_res_block[u4_remaining_coeff-1]; + + DEBUG("\n \t%d coeff,",i4_level); + ENTROPY_TRACE("\tcoeff ",i4_level); + + i4_sign = (i4_level >> (sizeof(WORD32) * CHAR_BIT - 1)); + u4_abs_level = ((i4_level + i4_sign) ^ i4_sign); + + u4_abs_level_actual = u4_abs_level; + + u4_escape = (u4_abs_level + u4_rndfactor[u4_suffix_length]) >> u4_suffix_length; + } + } + + DEBUG("\n \t %d totalzeros",u4_tot_zeros); + ENTROPY_TRACE("\ttotal zeros ",u4_tot_zeros); + + /* Write Total Zeros */ + if (u4_total_coeff < u4_max_num_coeff) + { + WORD32 index; + UWORD32 u4_codeword; + UWORD32 u4_codesize; + + if (u4_block_type == CAVLC_CHROMA_4x4_DC) + { + UWORD8 gu1_index_zero_table_chroma[] = {0, 4, 7}; + index = gu1_index_zero_table_chroma[u4_total_coeff-1] + u4_tot_zeros; + u4_codesize = gu1_size_zero_table_chroma[index]; + u4_codeword = gu1_code_zero_table_chroma[index]; + } + else + { + index = gu1_index_zero_table[u4_total_coeff-1] + u4_tot_zeros; + u4_codesize = gu1_size_zero_table[index]; + u4_codeword = gu1_code_zero_table[index]; + } + + DEBUG("\nTOTAL ZEROS: %d u4_codeword, %d u4_codesize",u4_codeword, u4_codesize); + ENTROPY_TRACE("\tcodeword ",u4_codeword); + ENTROPY_TRACE("\tcodesize ",u4_codesize); + error_status = ih264e_put_bits(ps_bit_stream, u4_codeword, u4_codesize); + } + + /* Write Run Before */ + if (u4_tot_zeros) + { + UWORD32 u4_max_num_coef = u4_total_coeff-1; + UWORD32 u4_codeword; + UWORD32 u4_codesize; + UWORD32 u4_zeros_left = u4_tot_zeros; + + while (u4_max_num_coef) + { + UWORD32 u4_run_before = pu1_zero_run[u4_max_num_coef]; + UWORD32 u4_index; + + if (u4_zeros_left > MAX_ZERO_LEFT) + { + u4_index = gu1_index_run_table[MAX_ZERO_LEFT]; + } + else + { + u4_index = gu1_index_run_table[u4_zeros_left - 1]; + } + + u4_codesize = gu1_size_run_table[u4_index + u4_run_before]; + u4_codeword = gu1_code_run_table[u4_index + u4_run_before]; + + DEBUG("\nRUN BEFORE ZEROS: %d u4_codeword, %d u4_codesize",u4_codeword, u4_codesize); + ENTROPY_TRACE("\tcodeword ",u4_codeword); + ENTROPY_TRACE("\tcodesize ",u4_codesize); + error_status = ih264e_put_bits(ps_bit_stream, u4_codeword, u4_codesize); + + u4_zeros_left -= u4_run_before; + if (!u4_zeros_left) + { + break; + } + u4_max_num_coef--; + } + } + } + + return error_status; +} + +/** +******************************************************************************* +* +* @brief +* This function generates CAVLC coded bit stream for the given subblock +* +* @param[in] ps_ent_ctxt +* Pointer to entropy context +* +* @param[in] pi2_res_block +* Pointers to residual blocks of all the partitions for the current subblk +* (containing levels in scan order) +* +* @param[in] pu1_nnz +* Total non-zero coefficients of all the partitions for the current subblk +* +* @param[in] pu2_sig_coeff_map +* Significant coefficient map of all the partitions for the current subblk +* +* @param[in] u4_block_type +* entropy coding block type +* +* @param[in] u4_ngbr_avbl +* top and left availability of all the partitions for the current subblk +* (packed) +* +* @param[in] pu1_top_nnz +* pointer to the buffer containing nnz of all the subblks to the top +* +* @param[in] pu1_left_nnz +* pointer to the buffer containing nnz of all the subblks to the left +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +static IH264E_ERROR_T ih264e_write_coeff8x8_cavlc(entropy_ctxt_t *ps_ent_ctxt, + WORD16 **pi2_res_block, + UWORD8 *pu1_nnz, + UWORD16 *pu2_sig_coeff_map, + ENTROPY_BLK_TYPE u4_block_type, + UWORD32 u4_ngbr_avlb, + UWORD8 *pu1_top_nnz, + UWORD8 *pu1_left_nnz) +{ + IH264E_ERROR_T error_status = IH264E_SUCCESS; + bitstrm_t *ps_bitstream = ps_ent_ctxt->ps_bitstrm; + UWORD8 *pu1_zero_run = ps_ent_ctxt->au1_zero_run, *pu1_ngbr_avbl; + UWORD32 u4_nC; + UWORD8 u1_mb_a, u1_mb_b; + + pu1_ngbr_avbl = (void *)(&u4_ngbr_avlb); + + /* encode ac block index 4x4 = 0*/ + u1_mb_a = pu1_ngbr_avbl[0] & 0x0F; + u1_mb_b = pu1_ngbr_avbl[0] & 0xF0; + u4_nC = 0; + if (u1_mb_a) + u4_nC += pu1_left_nnz[0]; + if (u1_mb_b) + u4_nC += pu1_top_nnz[0]; + if (u1_mb_a && u1_mb_b) + u4_nC = (u4_nC + 1) >> 1; + pu1_left_nnz[0] = pu1_top_nnz[0] = pu1_nnz[0]; + error_status = ih264e_write_coeff4x4_cavlc(pi2_res_block[0], pu1_nnz[0], u4_block_type, pu1_zero_run, u4_nC, ps_bitstream, pu2_sig_coeff_map[0]); + + /* encode ac block index 4x4 = 1*/ + u1_mb_a = pu1_ngbr_avbl[1] & 0x0F; + u1_mb_b = pu1_ngbr_avbl[1] & 0xF0; + u4_nC = 0; + if (u1_mb_a) + u4_nC += pu1_left_nnz[0]; + if (u1_mb_b) + u4_nC += pu1_top_nnz[1]; + if (u1_mb_a && u1_mb_b) + u4_nC = (u4_nC + 1) >> 1; + pu1_left_nnz[0] = pu1_top_nnz[1] = pu1_nnz[1]; + error_status = ih264e_write_coeff4x4_cavlc(pi2_res_block[1], pu1_nnz[1], u4_block_type, pu1_zero_run, u4_nC, ps_bitstream, pu2_sig_coeff_map[1]); + + /* encode ac block index 4x4 = 2*/ + u1_mb_a = pu1_ngbr_avbl[2] & 0x0F; + u1_mb_b = pu1_ngbr_avbl[2] & 0xF0; + u4_nC = 0; + if (u1_mb_a) + u4_nC += pu1_left_nnz[1]; + if (u1_mb_b) + u4_nC += pu1_top_nnz[0]; + if (u1_mb_a && u1_mb_b) + u4_nC = (u4_nC + 1) >> 1; + pu1_left_nnz[1] = pu1_top_nnz[0] = pu1_nnz[2]; + error_status = ih264e_write_coeff4x4_cavlc(pi2_res_block[2], pu1_nnz[2], u4_block_type, pu1_zero_run, u4_nC, ps_bitstream, pu2_sig_coeff_map[2]); + + /* encode ac block index 4x4 = 0*/ + u1_mb_a = pu1_ngbr_avbl[3] & 0x0F; + u1_mb_b = pu1_ngbr_avbl[3] & 0xF0; + u4_nC = 0; + if (u1_mb_a) + u4_nC += pu1_left_nnz[1]; + if (u1_mb_b) + u4_nC += pu1_top_nnz[1]; + if (u1_mb_a && u1_mb_b) + u4_nC = (u4_nC + 1) >> 1; + pu1_left_nnz[1] = pu1_top_nnz[1] = pu1_nnz[3]; + error_status = ih264e_write_coeff4x4_cavlc(pi2_res_block[3], pu1_nnz[3], u4_block_type, pu1_zero_run, u4_nC, ps_bitstream, pu2_sig_coeff_map[3]); + + return error_status; +} + +/** +******************************************************************************* +* +* @brief +* This function encodes luma and chroma residues of a macro block when +* the entropy coding mode chosen is cavlc. +* +* @param[in] ps_ent_ctxt +* Pointer to entropy context +* +* @param[in] u4_mb_type +* current mb type +* +* @param[in] u4_cbp +* coded block pattern for the current mb +* +* @returns error code +* +* @remarks none +* +******************************************************************************* +*/ +static IH264E_ERROR_T ih264e_encode_residue(entropy_ctxt_t *ps_ent_ctxt, + UWORD32 u4_mb_type, + UWORD32 u4_cbp) +{ + /* error status */ + IH264E_ERROR_T error_status = IH264E_SUCCESS; + + /* packed residue */ + void *pv_mb_coeff_data = ps_ent_ctxt->pv_mb_coeff_data; + + /* bit stream buffer */ + bitstrm_t *ps_bitstream = ps_ent_ctxt->ps_bitstrm; + + /* zero run */ + UWORD8 *pu1_zero_run = ps_ent_ctxt->au1_zero_run; + + /* temp var */ + UWORD32 u4_nC, u4_ngbr_avlb; + UWORD8 au1_nnz[4], *pu1_ngbr_avlb, *pu1_top_nnz, *pu1_left_nnz; + UWORD16 au2_sig_coeff_map[4]; + WORD16 *pi2_res_block[4]; + UWORD8 *pu1_slice_idx = ps_ent_ctxt->pu1_slice_idx; + tu_sblk_coeff_data_t *ps_mb_coeff_data; + ENTROPY_BLK_TYPE e_entropy_blk_type = CAVLC_LUMA_4x4; + + /* ngbr availability */ + UWORD8 u1_mb_a, u1_mb_b; + + /* cbp */ + UWORD32 u4_cbp_luma = u4_cbp & 0xF, u4_cbp_chroma = u4_cbp >> 4; + + /* mb indices */ + WORD32 i4_mb_x, i4_mb_y; + + /* derive neighbor availability */ + i4_mb_x = ps_ent_ctxt->i4_mb_x; + i4_mb_y = ps_ent_ctxt->i4_mb_y; + pu1_slice_idx += (i4_mb_y * ps_ent_ctxt->i4_wd_mbs); + /* left macroblock availability */ + u1_mb_a = (i4_mb_x == 0 || + (pu1_slice_idx[i4_mb_x - 1 ] != pu1_slice_idx[i4_mb_x]))? 0 : 1; + /* top macroblock availability */ + u1_mb_b = (i4_mb_y == 0 || + (pu1_slice_idx[i4_mb_x-ps_ent_ctxt->i4_wd_mbs] != pu1_slice_idx[i4_mb_x]))? 0 : 1; + + pu1_ngbr_avlb = (void *)(&u4_ngbr_avlb); + pu1_top_nnz = ps_ent_ctxt->pu1_top_nnz_luma[ps_ent_ctxt->i4_mb_x]; + pu1_left_nnz = (UWORD8 *)&ps_ent_ctxt->u4_left_nnz_luma; + + /* encode luma residue */ + + /* mb type intra 16x16 */ + if (u4_mb_type == I16x16) + { + /* parse packed coeff data structure for residual data */ + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[0], au2_sig_coeff_map[0], pi2_res_block[0]); + /* estimate nnz for the current mb */ + u4_nC = 0; + if (u1_mb_a) + u4_nC += pu1_left_nnz[0]; + if (u1_mb_b) + u4_nC += pu1_top_nnz[0]; + if (u1_mb_a && u1_mb_b) + u4_nC = (u4_nC + 1) >> 1; + + /* encode dc block */ + ENTROPY_TRACE("Luma DC blk idx %d",0); + error_status = ih264e_write_coeff4x4_cavlc(pi2_res_block[0], au1_nnz[0], CAVLC_LUMA_4x4_DC, pu1_zero_run, u4_nC, ps_bitstream, au2_sig_coeff_map[0]); + + e_entropy_blk_type = CAVLC_LUMA_4x4_AC; + } + + if (u4_cbp_luma & 1) + { + /* encode ac block index 8x8 = 0*/ + /* parse packed coeff data structure for residual data */ + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[0], au2_sig_coeff_map[0], pi2_res_block[0]); + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[1], au2_sig_coeff_map[1], pi2_res_block[1]); + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[2], au2_sig_coeff_map[2], pi2_res_block[2]); + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[3], au2_sig_coeff_map[3], pi2_res_block[3]); + /* derive sub block neighbor availability */ + + pu1_ngbr_avlb[0] = (u1_mb_b << 4) | (u1_mb_a); + pu1_ngbr_avlb[1] = (u1_mb_b << 4) | 1; + pu1_ngbr_avlb[2] = (1 << 4) | (u1_mb_a); + pu1_ngbr_avlb[3] = 0x11; + /* encode sub blk */ + ENTROPY_TRACE("Luma blk idx %d",0); + error_status = ih264e_write_coeff8x8_cavlc(ps_ent_ctxt, pi2_res_block, au1_nnz, au2_sig_coeff_map, e_entropy_blk_type, u4_ngbr_avlb, pu1_top_nnz, pu1_left_nnz); + } + else + { + pu1_top_nnz[0] = pu1_top_nnz[1] = 0; + pu1_left_nnz[0] = pu1_left_nnz[1] = 0; + } + + if (u4_cbp_luma & 2) + { + /* encode ac block index 8x8 = 1*/ + /* parse packed coeff data structure for residual data */ + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[0], au2_sig_coeff_map[0], pi2_res_block[0]); + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[1], au2_sig_coeff_map[1], pi2_res_block[1]); + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[2], au2_sig_coeff_map[2], pi2_res_block[2]); + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[3], au2_sig_coeff_map[3], pi2_res_block[3]); + + /* derive sub block neighbor availability */ + pu1_ngbr_avlb[1] = pu1_ngbr_avlb[0] = (u1_mb_b << 4) | 1; + pu1_ngbr_avlb[3] = pu1_ngbr_avlb[2] = 0x11; + /* encode sub blk */ + ENTROPY_TRACE("Luma blk idx %d",1); + error_status = ih264e_write_coeff8x8_cavlc(ps_ent_ctxt, pi2_res_block, au1_nnz, au2_sig_coeff_map, e_entropy_blk_type, u4_ngbr_avlb, pu1_top_nnz+2, pu1_left_nnz); + } + else + { + (pu1_top_nnz + 2)[0] = (pu1_top_nnz + 2)[1] = 0; + pu1_left_nnz[0] = pu1_left_nnz[1] = 0; + } + + if (u4_cbp_luma & 0x4) + { + /* encode ac block index 8x8 = 2*/ + /* parse packed coeff data structure for residual data */ + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[0], au2_sig_coeff_map[0], pi2_res_block[0]); + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[1], au2_sig_coeff_map[1], pi2_res_block[1]); + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[2], au2_sig_coeff_map[2], pi2_res_block[2]); + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[3], au2_sig_coeff_map[3], pi2_res_block[3]); + + /* derive sub block neighbor availability */ + pu1_ngbr_avlb[2] = pu1_ngbr_avlb[0] = (1 << 4) | u1_mb_a; + pu1_ngbr_avlb[1] = pu1_ngbr_avlb[3] = 0x11; + /* encode sub blk */ + ENTROPY_TRACE("Luma blk idx %d",2); + error_status = ih264e_write_coeff8x8_cavlc(ps_ent_ctxt, pi2_res_block, au1_nnz, au2_sig_coeff_map, e_entropy_blk_type, u4_ngbr_avlb, pu1_top_nnz, (pu1_left_nnz+2)); + } + else + { + pu1_top_nnz[0] = pu1_top_nnz[1] = 0; + (pu1_left_nnz + 2)[0] = (pu1_left_nnz + 2)[1] = 0; + } + + if (u4_cbp_luma & 0x8) + { + /* encode ac block index 8x8 = 3*/ + /* parse packed coeff data structure for residual data */ + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[0], au2_sig_coeff_map[0], pi2_res_block[0]); + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[1], au2_sig_coeff_map[1], pi2_res_block[1]); + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[2], au2_sig_coeff_map[2], pi2_res_block[2]); + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[3], au2_sig_coeff_map[3], pi2_res_block[3]); + + /* derive sub block neighbor availability */ + u4_ngbr_avlb = 0x11111111; + /* encode sub blk */ + ENTROPY_TRACE("Luma blk idx %d",3); + error_status = ih264e_write_coeff8x8_cavlc(ps_ent_ctxt, pi2_res_block, au1_nnz, au2_sig_coeff_map, e_entropy_blk_type, u4_ngbr_avlb, pu1_top_nnz+2, pu1_left_nnz+2); + } + else + { + (pu1_top_nnz + 2)[0] = (pu1_top_nnz + 2)[1] = 0; + (pu1_left_nnz + 2)[0] = (pu1_left_nnz + 2)[1] = 0; + } + + /* encode chroma residue */ + if (u4_cbp_chroma & 3) + { + /* parse packed coeff data structure for residual data */ + /* cb, cr */ + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[0], au2_sig_coeff_map[0], pi2_res_block[0]); + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[1], au2_sig_coeff_map[1], pi2_res_block[1]); + + /* encode dc block */ + /* cb, cr */ + ENTROPY_TRACE("Chroma DC blk idx %d",0); + error_status = ih264e_write_coeff4x4_cavlc(pi2_res_block[0], au1_nnz[0], CAVLC_CHROMA_4x4_DC, pu1_zero_run, 0, ps_bitstream, au2_sig_coeff_map[0]); + ENTROPY_TRACE("Chroma DC blk idx %d",1); + error_status = ih264e_write_coeff4x4_cavlc(pi2_res_block[1], au1_nnz[1], CAVLC_CHROMA_4x4_DC, pu1_zero_run, 0, ps_bitstream, au2_sig_coeff_map[1]); + } + + pu1_top_nnz = ps_ent_ctxt->pu1_top_nnz_cbcr[ps_ent_ctxt->i4_mb_x]; + pu1_left_nnz = (UWORD8 *) &ps_ent_ctxt->u4_left_nnz_cbcr; + + /* encode sub blk */ + if (u4_cbp_chroma & 0x2) + { + /* encode ac block index 8x8 = 0*/ + /* derive sub block neighbor availability */ + pu1_ngbr_avlb[0] = (u1_mb_b << 4) | (u1_mb_a); + pu1_ngbr_avlb[1] = (u1_mb_b << 4) | 1; + pu1_ngbr_avlb[2] = (1 << 4) | (u1_mb_a); + pu1_ngbr_avlb[3] = 0x11; + + /* parse packed coeff data structure for residual data */ + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[0], au2_sig_coeff_map[0], pi2_res_block[0]); + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[1], au2_sig_coeff_map[1], pi2_res_block[1]); + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[2], au2_sig_coeff_map[2], pi2_res_block[2]); + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[3], au2_sig_coeff_map[3], pi2_res_block[3]); + + ENTROPY_TRACE("Chroma AC blk idx %d",0); + error_status = ih264e_write_coeff8x8_cavlc(ps_ent_ctxt, pi2_res_block, au1_nnz, au2_sig_coeff_map, CAVLC_CHROMA_4x4_AC, u4_ngbr_avlb, pu1_top_nnz, pu1_left_nnz); + } + else + { + pu1_top_nnz[0] = pu1_top_nnz[1] = 0; + pu1_left_nnz[0] = pu1_left_nnz[1] = 0; + } + + pu1_top_nnz += 2; + pu1_left_nnz += 2; + + /* encode sub blk */ + if (u4_cbp_chroma & 0x2) + { + /* parse packed coeff data structure for residual data */ + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[0], au2_sig_coeff_map[0], pi2_res_block[0]); + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[1], au2_sig_coeff_map[1], pi2_res_block[1]); + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[2], au2_sig_coeff_map[2], pi2_res_block[2]); + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[3], au2_sig_coeff_map[3], pi2_res_block[3]); + + ENTROPY_TRACE("Chroma AC blk idx %d",1); + error_status = ih264e_write_coeff8x8_cavlc(ps_ent_ctxt, pi2_res_block, au1_nnz, au2_sig_coeff_map, CAVLC_CHROMA_4x4_AC, u4_ngbr_avlb, pu1_top_nnz, pu1_left_nnz); + } + else + { + pu1_top_nnz[0] = pu1_top_nnz[1] = 0; + pu1_left_nnz[0] = pu1_left_nnz[1] = 0; + } + + /* store the index of the next mb coeff data */ + ps_ent_ctxt->pv_mb_coeff_data = pv_mb_coeff_data; + + return error_status; +} + +#define GET_NUM_BITS(ps_bitstream) ((ps_bitstream->u4_strm_buf_offset << 3) + 32 - ps_bitstream->i4_bits_left_in_cw) + +/** +******************************************************************************* +* +* @brief +* This function generates CAVLC coded bit stream for an Intra Slice. +* +* @description +* The mb syntax layer for intra slices constitutes luma mb mode, luma sub modes +* (if present), mb qp delta, coded block pattern, chroma mb mode and +* luma/chroma residue. These syntax elements are written as directed by table +* 7.3.5 of h264 specification. +* +* @param[in] ps_ent_ctxt +* pointer to entropy context +* +* @returns error code +* +* @remarks none +* +******************************************************************************* +*/ +IH264E_ERROR_T ih264e_write_islice_mb(entropy_ctxt_t *ps_ent_ctxt) +{ + /* error status */ + IH264E_ERROR_T error_status = IH264E_SUCCESS; + + /* bit stream ptr */ + bitstrm_t *ps_bitstream = ps_ent_ctxt->ps_bitstrm; + + /* packed header data */ + UWORD8 *pu1_byte = ps_ent_ctxt->pv_mb_header_data; + + /* mb header info */ + /* + * mb_tpm : mb type plus mode + * mb_type : luma mb type and chroma mb type are packed + * cbp : coded block pattern + * mb_qp_delta : mb qp delta + * chroma_intra_mode : chroma intra mode + * luma_intra_mode : luma intra mode + */ + WORD32 mb_tpm, mb_type, cbp, chroma_intra_mode, luma_intra_mode; + WORD8 mb_qp_delta; + + /* temp var */ + WORD32 i, mb_type_stream; + + WORD32 bitstream_start_offset, bitstream_end_offset; + + /* Starting bitstream offset for header in bits */ + bitstream_start_offset = GET_NUM_BITS(ps_bitstream); + + + /********************************************************************/ + /* BEGIN HEADER GENERATION */ + /********************************************************************/ + + /* mb header info */ + mb_tpm = *pu1_byte++; + cbp = *pu1_byte++; + mb_qp_delta = *pu1_byte++; + + /* mb type */ + mb_type = mb_tpm & 0xF; + /* is intra ? */ + if (mb_type == I16x16) + { + UWORD32 u4_cbp_l, u4_cbp_c; + + u4_cbp_c = (cbp >> 4); + u4_cbp_l = (cbp & 0xF); + luma_intra_mode = (mb_tpm >> 4) & 3; + chroma_intra_mode = (mb_tpm >> 6); + + mb_type_stream = luma_intra_mode + 1 + (u4_cbp_c << 2) + (u4_cbp_l == 15) * 12; + + /* write mb type */ + PUT_BITS_UEV(ps_bitstream, mb_type_stream, error_status, "mb type"); + + /* intra_chroma_pred_mode */ + PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode"); + } + else if (mb_type == I4x4) + { + /* mb sub blk modes */ + WORD32 intra_pred_mode_flag, rem_intra_mode; + WORD32 byte; + + chroma_intra_mode = (mb_tpm >> 6); + + /* write mb type */ + PUT_BITS_UEV(ps_bitstream, 0, error_status, "mb type"); + + for (i = 0; i < 16; i += 2) + { + /* sub blk idx 1 */ + byte = *pu1_byte++; + + intra_pred_mode_flag = byte & 0x1; + + /* prev_intra4x4_pred_mode_flag */ + PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, "prev_intra4x4_pred_mode_flag"); + + /* rem_intra4x4_pred_mode */ + if (!intra_pred_mode_flag) + { + rem_intra_mode = (byte & 0xF) >> 1; + PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, "rem_intra4x4_pred_mode"); + } + + /* sub blk idx 2 */ + byte >>= 4; + + intra_pred_mode_flag = byte & 0x1; + + /* prev_intra4x4_pred_mode_flag */ + PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, "prev_intra4x4_pred_mode_flag"); + + /* rem_intra4x4_pred_mode */ + if (!intra_pred_mode_flag) + { + rem_intra_mode = (byte & 0xF) >> 1; + PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, "rem_intra4x4_pred_mode"); + } + } + + /* intra_chroma_pred_mode */ + PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode"); + } + else if (mb_type == I8x8) + { + /* transform 8x8 flag */ + UWORD32 u4_transform_size_8x8_flag = ps_ent_ctxt->i1_transform_8x8_mode_flag; + + /* mb sub blk modes */ + WORD32 intra_pred_mode_flag, rem_intra_mode; + WORD32 byte; + + chroma_intra_mode = (mb_tpm >> 6); + + ASSERT(0); + + /* write mb type */ + PUT_BITS_UEV(ps_bitstream, 0, error_status, "mb type"); + + /* u4_transform_size_8x8_flag */ + PUT_BITS(ps_bitstream, u4_transform_size_8x8_flag, 1, error_status, "u4_transform_size_8x8_flag"); + + /* write sub block modes */ + for (i = 0; i < 4; i++) + { + /* sub blk idx 1 */ + byte = *pu1_byte++; + + intra_pred_mode_flag = byte & 0x1; + + /* prev_intra4x4_pred_mode_flag */ + PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, "prev_intra4x4_pred_mode_flag"); + + /* rem_intra4x4_pred_mode */ + if (!intra_pred_mode_flag) + { + rem_intra_mode = (byte & 0xF) >> 1; + PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, "rem_intra4x4_pred_mode"); + } + + /* sub blk idx 2 */ + byte >>= 4; + + intra_pred_mode_flag = byte & 0x1; + + /* prev_intra4x4_pred_mode_flag */ + PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, "prev_intra4x4_pred_mode_flag"); + + /* rem_intra4x4_pred_mode */ + if (!intra_pred_mode_flag) + { + rem_intra_mode = (byte & 0xF) >> 1; + PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, "rem_intra4x4_pred_mode"); + } + } + + /* intra_chroma_pred_mode */ + PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode"); + } + else + { + } + + /* coded_block_pattern */ + if (mb_type != I16x16) + { + PUT_BITS_UEV(ps_bitstream, gu1_cbp_map_tables[cbp][0], error_status, "coded_block_pattern"); + } + + if (cbp || mb_type == I16x16) + { + /* mb_qp_delta */ + PUT_BITS_SEV(ps_bitstream, mb_qp_delta, error_status, "mb_qp_delta"); + } + + /* Ending bitstream offset for header in bits */ + bitstream_end_offset = GET_NUM_BITS(ps_bitstream); + + ps_ent_ctxt->u4_header_bits[0] += bitstream_end_offset - bitstream_start_offset; + + /* Starting bitstream offset for residue */ + bitstream_start_offset = bitstream_end_offset; + + /* residual */ + error_status = ih264e_encode_residue(ps_ent_ctxt, mb_type, cbp); + + /* Ending bitstream offset for reside in bits */ + bitstream_end_offset = GET_NUM_BITS(ps_bitstream); + ps_ent_ctxt->u4_residue_bits[0] += bitstream_end_offset - bitstream_start_offset; + + /* store the index of the next mb syntax layer */ + ps_ent_ctxt->pv_mb_header_data = pu1_byte; + + return error_status; +} + +/** +******************************************************************************* +* +* @brief +* This function generates CAVLC coded bit stream for Inter slices +* +* @description +* The mb syntax layer for inter slices constitutes luma mb mode, luma sub modes +* (if present), mb qp delta, coded block pattern, chroma mb mode and +* luma/chroma residue. These syntax elements are written as directed by table +* 7.3.5 of h264 specification +* +* @param[in] ps_ent_ctxt +* pointer to entropy context +* +* @returns error code +* +* @remarks none +* +******************************************************************************* +*/ +IH264E_ERROR_T ih264e_write_pslice_mb(entropy_ctxt_t *ps_ent_ctxt) +{ + /* error status */ + IH264E_ERROR_T error_status = IH264E_SUCCESS; + + /* bit stream ptr */ + bitstrm_t *ps_bitstream = ps_ent_ctxt->ps_bitstrm; + + /* packed header data */ + UWORD8 *pu1_byte = ps_ent_ctxt->pv_mb_header_data; + + /* mb header info */ + /* + * mb_tpm : mb type plus mode + * mb_type : luma mb type and chroma mb type are packed + * cbp : coded block pattern + * mb_qp_delta : mb qp delta + * chroma_intra_mode : chroma intra mode + * luma_intra_mode : luma intra mode + * ps_pu : Pointer to the array of structures having motion vectors, size + * and position of sub partitions + */ + WORD32 mb_tpm, mb_type, cbp, chroma_intra_mode, luma_intra_mode; + WORD8 mb_qp_delta; + + /* temp var */ + WORD32 i, mb_type_stream, cbptable = 1; + + WORD32 is_inter = 0; + + WORD32 bitstream_start_offset, bitstream_end_offset; + + /* Starting bitstream offset for header in bits */ + bitstream_start_offset = GET_NUM_BITS(ps_bitstream); + + /********************************************************************/ + /* BEGIN HEADER GENERATION */ + /********************************************************************/ + + /* mb header info */ + mb_tpm = *pu1_byte++; + + /* mb type */ + mb_type = mb_tpm & 0xF; + + /* check for skip */ + if (mb_type == PSKIP) + { + UWORD32 *nnz; + + is_inter = 1; + + /* increment skip counter */ + (*ps_ent_ctxt->pi4_mb_skip_run)++; + + /* store the index of the next mb syntax layer */ + ps_ent_ctxt->pv_mb_header_data = pu1_byte; + + /* set nnz to zero */ + ps_ent_ctxt->u4_left_nnz_luma = 0; + nnz = (UWORD32 *)ps_ent_ctxt->pu1_top_nnz_luma[ps_ent_ctxt->i4_mb_x]; + *nnz = 0; + ps_ent_ctxt->u4_left_nnz_cbcr = 0; + nnz = (UWORD32 *)ps_ent_ctxt->pu1_top_nnz_cbcr[ps_ent_ctxt->i4_mb_x]; + *nnz = 0; + + /* residual */ + error_status = ih264e_encode_residue(ps_ent_ctxt, P16x16, 0); + + bitstream_end_offset = GET_NUM_BITS(ps_bitstream); + + ps_ent_ctxt->u4_header_bits[is_inter] += bitstream_end_offset - bitstream_start_offset; + + return error_status; + } + + /* remaining mb header info */ + cbp = *pu1_byte++; + mb_qp_delta = *pu1_byte++; + + /* mb skip run */ + PUT_BITS_UEV(ps_bitstream, *ps_ent_ctxt->pi4_mb_skip_run, error_status, "mb skip run"); + + /* reset skip counter */ + *ps_ent_ctxt->pi4_mb_skip_run = 0; + + /* is intra ? */ + if (mb_type == I16x16) + { + UWORD32 u4_cbp_l, u4_cbp_c; + + is_inter = 0; + + u4_cbp_c = (cbp >> 4); + u4_cbp_l = (cbp & 0xF); + luma_intra_mode = (mb_tpm >> 4) & 3; + chroma_intra_mode = (mb_tpm >> 6); + + mb_type_stream = luma_intra_mode + 1 + (u4_cbp_c << 2) + (u4_cbp_l == 15) * 12; + + mb_type_stream += 5; + + /* write mb type */ + PUT_BITS_UEV(ps_bitstream, mb_type_stream, error_status, "mb type"); + + /* intra_chroma_pred_mode */ + PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode"); + } + else if (mb_type == I4x4) + { + /* mb sub blk modes */ + WORD32 intra_pred_mode_flag, rem_intra_mode; + WORD32 byte; + + is_inter = 0; + + chroma_intra_mode = (mb_tpm >> 6); + cbptable = 0; + + /* write mb type */ + PUT_BITS_UEV(ps_bitstream, 5, error_status, "mb type"); + + for (i = 0; i < 16; i += 2) + { + /* sub blk idx 1 */ + byte = *pu1_byte++; + + intra_pred_mode_flag = byte & 0x1; + + /* prev_intra4x4_pred_mode_flag */ + PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, "prev_intra4x4_pred_mode_flag"); + + /* rem_intra4x4_pred_mode */ + if (!intra_pred_mode_flag) + { + rem_intra_mode = (byte & 0xF) >> 1; + PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, "rem_intra4x4_pred_mode"); + } + + /* sub blk idx 2 */ + byte >>= 4; + + intra_pred_mode_flag = byte & 0x1; + + /* prev_intra4x4_pred_mode_flag */ + PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, "prev_intra4x4_pred_mode_flag"); + + /* rem_intra4x4_pred_mode */ + if (!intra_pred_mode_flag) + { + rem_intra_mode = (byte & 0xF) >> 1; + PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, "rem_intra4x4_pred_mode"); + } + } + + /* intra_chroma_pred_mode */ + PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode"); + } + else if (mb_type == I8x8) + { + /* transform 8x8 flag */ + UWORD32 u4_transform_size_8x8_flag = ps_ent_ctxt->i1_transform_8x8_mode_flag; + + /* mb sub blk modes */ + WORD32 intra_pred_mode_flag, rem_intra_mode; + WORD32 byte; + + is_inter = 0; + + chroma_intra_mode = (mb_tpm >> 6); + cbptable = 0; + + ASSERT(0); + + /* write mb type */ + PUT_BITS_UEV(ps_bitstream, 5, error_status, "mb type"); + + /* u4_transform_size_8x8_flag */ + PUT_BITS(ps_bitstream, u4_transform_size_8x8_flag, 1, error_status, "u4_transform_size_8x8_flag"); + + /* write sub block modes */ + for (i = 0; i < 4; i++) + { + /* sub blk idx 1 */ + byte = *pu1_byte++; + + intra_pred_mode_flag = byte & 0x1; + + /* prev_intra4x4_pred_mode_flag */ + PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, "prev_intra4x4_pred_mode_flag"); + + /* rem_intra4x4_pred_mode */ + if (!intra_pred_mode_flag) + { + rem_intra_mode = (byte & 0xF) >> 1; + PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, "rem_intra4x4_pred_mode"); + } + + /* sub blk idx 2 */ + byte >>= 4; + + intra_pred_mode_flag = byte & 0x1; + + /* prev_intra4x4_pred_mode_flag */ + PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, "prev_intra4x4_pred_mode_flag"); + + /* rem_intra4x4_pred_mode */ + if (!intra_pred_mode_flag) + { + rem_intra_mode = (byte & 0xF) >> 1; + PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, "rem_intra4x4_pred_mode"); + } + } + + /* intra_chroma_pred_mode */ + PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode"); + } + else + { + /* inter macro block partition cnt */ + const UWORD8 au1_part_cnt[] = { 1, 2, 2, 4 }; + + /* mv ptr */ + WORD16 *pi2_mv_ptr = (WORD16 *)pu1_byte; + + /* number of partitions for the current mb */ + UWORD32 u4_part_cnt = au1_part_cnt[mb_type - 3]; + + is_inter = 1; + + /* write mb type */ + PUT_BITS_UEV(ps_bitstream, mb_type - 3, error_status, "mb type"); + + for (i = 0; i < (WORD32)u4_part_cnt; i++) + { + PUT_BITS_SEV(ps_bitstream, *pi2_mv_ptr++, error_status, "mv x"); + + PUT_BITS_SEV(ps_bitstream, *pi2_mv_ptr++, error_status, "mv y"); + } + + pu1_byte = (UWORD8 *)pi2_mv_ptr; + } + + /* coded_block_pattern */ + if (mb_type != I16x16) + { + PUT_BITS_UEV(ps_bitstream, gu1_cbp_map_tables[cbp][cbptable], error_status, "coded_block_pattern"); + } + + if (cbp || mb_type == I16x16) + { + /* mb_qp_delta */ + PUT_BITS_SEV(ps_bitstream, mb_qp_delta, error_status, "mb_qp_delta"); + } + + + /* Ending bitstream offset for header in bits */ + bitstream_end_offset = GET_NUM_BITS(ps_bitstream); + + ps_ent_ctxt->u4_header_bits[is_inter] += bitstream_end_offset - bitstream_start_offset; + + /* start bitstream offset for residue in bits */ + bitstream_start_offset = bitstream_end_offset; + + /* residual */ + error_status = ih264e_encode_residue(ps_ent_ctxt, mb_type, cbp); + + /* Ending bitstream offset for residue in bits */ + bitstream_end_offset = GET_NUM_BITS(ps_bitstream); + + ps_ent_ctxt->u4_residue_bits[is_inter] += bitstream_end_offset - bitstream_start_offset; + + /* store the index of the next mb syntax layer */ + ps_ent_ctxt->pv_mb_header_data = pu1_byte; + + return error_status; +} diff --git a/encoder/ih264e_cavlc.h b/encoder/ih264e_cavlc.h new file mode 100755 index 0000000..86f4cd4 --- /dev/null +++ b/encoder/ih264e_cavlc.h @@ -0,0 +1,112 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +****************************************************************************** +* @file +* ih264e_cavlc.h +* +* @brief +* This file contains enumerations, macros and extern declarations of H264 +* cavlc tables +* +* @author +* ittiam +* +* @remarks +* none +****************************************************************************** +*/ + +#ifndef IH264E_CAVLC_H_ +#define IH264E_CAVLC_H_ + +/*****************************************************************************/ +/* Function macro definitions */ +/*****************************************************************************/ + +#define PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, u4_nnz, u4_sig_coeff_map, pi2_res_block) \ + {\ + ps_mb_coeff_data = pv_mb_coeff_data; \ + u4_nnz = ps_mb_coeff_data->i4_sig_map_nnz & 0xff; \ + if (u4_nnz)\ + {\ + u4_sig_coeff_map = ps_mb_coeff_data->i4_sig_map_nnz >> 16; \ + pi2_res_block = ps_mb_coeff_data->ai2_residue; \ + pv_mb_coeff_data = ps_mb_coeff_data->ai2_residue + u4_nnz; \ + }\ + else\ + {\ + pv_mb_coeff_data = ps_mb_coeff_data->ai2_residue;\ + }\ + } + + +/*****************************************************************************/ +/* Extern Function Declarations */ +/*****************************************************************************/ + +/** +******************************************************************************* +* +* @brief +* This function generates CAVLC coded bit stream for an Intra Slice. +* +* @description +* The mb syntax layer for intra slices constitutes luma mb mode, luma sub modes +* (if present), mb qp delta, coded block pattern, chroma mb mode and +* luma/chroma residue. These syntax elements are written as directed by table +* 7.3.5 of h264 specification. +* +* @param[in] ps_ent_ctxt +* pointer to entropy context +* +* @returns error code +* +* @remarks none +* +******************************************************************************* +*/ +IH264E_ERROR_T ih264e_write_islice_mb(entropy_ctxt_t *ps_ent_ctxt); + +/** +******************************************************************************* +* +* @brief +* This function generates CAVLC coded bit stream for Inter slices +* +* @description +* The mb syntax layer for inter slices constitutes luma mb mode, luma sub modes +* (if present), mb qp delta, coded block pattern, chroma mb mode and +* luma/chroma residue. These syntax elements are written as directed by table +* 7.3.5 of h264 specification +* +* @param[in] ps_ent_ctxt +* pointer to entropy context +* +* @returns error code +* +* @remarks none +* +******************************************************************************* +*/ +IH264E_ERROR_T ih264e_write_pslice_mb(entropy_ctxt_t *ps_ent_ctxt); + +#endif /* IH264E_CAVLC_H_ */ diff --git a/encoder/ih264e_config.h b/encoder/ih264e_config.h new file mode 100755 index 0000000..2446cdb --- /dev/null +++ b/encoder/ih264e_config.h @@ -0,0 +1,52 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +****************************************************************************** +* @file +* ih264e_config.h +* +* @brief +* contains any necessary declarations/definitions that are used during codec +* build +* +* @author +* ittiam +* +* @remarks +* none +****************************************************************************** +*/ + +#ifndef IH264E_CONFIG_H_ +#define IH264E_CONFIG_H_ + +/*****************************************************************************/ +/* Constant Macros */ +/*****************************************************************************/ + +#define CAVLC_LEVEL_STATS 0 +#define GATING_STATS 0 +#define DEBUG_PRINT 0 +#define ENABLE_TRACE 0 +#define DEBUG_RC 0 +#define TRACE_SUPPORT 0 + +#endif /* IH264E_CONFIG_H_ */ diff --git a/encoder/ih264e_core_coding.c b/encoder/ih264e_core_coding.c new file mode 100755 index 0000000..5ba18de --- /dev/null +++ b/encoder/ih264e_core_coding.c @@ -0,0 +1,2365 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** + ******************************************************************************* + * @file + * ih264e_core_coding.c + * + * @brief + * This file contains routines that perform luma and chroma core coding for + * intra macroblocks + * + * @author + * ittiam + * + * @par List of Functions: + * - ih264e_pack_l_mb_i16() + * - ih264e_pack_c_mb_i8() + * - ih264e_code_luma_intra_macroblock_16x16() + * - ih264e_code_luma_intra_macroblock_4x4() + * - ih264e_code_chroma_intra_macroblock_8x8() + * + * @remarks + * None + * + ******************************************************************************* + */ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include <stdio.h> +#include <string.h> +#include <assert.h> + +/* User include files */ +#include "ih264e_config.h" +#include "ih264_typedefs.h" +#include "ih264_platform_macros.h" +#include "iv2.h" +#include "ive2.h" +#include "ih264_defs.h" +#include "ih264e_defs.h" +#include "ih264_trans_data.h" +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "ime_distortion_metrics.h" +#include "ime_structs.h" +#include "ih264_structs.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include "ih264_inter_pred_filters.h" +#include "ih264_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "ih264e_rate_control.h" +#include "ih264e_structs.h" +#include "ih264e_globals.h" +#include "ih264e_core_coding.h" +#include "ih264e_mc.h" + + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + +/** +******************************************************************************* +* +* @brief +* This function performs does the DCT transform then Hadamard transform +* and quantization for a macroblock when the mb mode is intra 16x16 mode +* +* @par Description: +* First cf4 is done on all 16 4x4 blocks of the 16x16 input block. +* Then hadamard transform is done on the DC coefficients +* Quantization is then performed on the 16x16 block, 4x4 wise +* +* @param[in] pu1_src +* Pointer to source sub-block +* +* @param[in] pu1_pred +* Pointer to prediction sub-block +* +* @param[in] pi2_out +* Pointer to residual sub-block +* The output will be in linear format +* The first 16 continuous locations will contain the values of Dc block +* After DC block and a stride 1st AC block will follow +* After one more stride next AC block will follow +* The blocks will be in raster scan order +* +* @param[in] src_strd +* Source stride +* +* @param[in] pred_strd +* Prediction stride +* +* @param[in] dst_strd +* Destination stride +* +* @param[in] pu2_scale_matrix +* The quantization matrix for 4x4 transform +* +* @param[in] pu2_threshold_matrix +* Threshold matrix +* +* @param[in] u4_qbits +* 15+QP/6 +* +* @param[in] u4_round_factor +* Round factor for quant +* +* @param[out] pu1_nnz +* Memory to store the non-zeros after transform +* The first byte will be the nnz of DC block +* From the next byte the AC nnzs will be stored in raster scan order +* +* @param u4_dc_flag +* Signals if Dc transform is to be done or not +* 1 -> Dc transform will be done +* 0 -> Dc transform will not be done +* +* @remarks +* +******************************************************************************* +*/ +void ih264e_luma_16x16_resi_trans_dctrans_quant(codec_t *ps_codec, + UWORD8 *pu1_src, + UWORD8 *pu1_pred, + WORD16 *pi2_out, + WORD32 src_strd, + WORD32 pred_strd, + WORD32 dst_strd, + const UWORD16 *pu2_scale_matrix, + const UWORD16 *pu2_threshold_matrix, + UWORD32 u4_qbits, + UWORD32 u4_round_factor, + UWORD8 *pu1_nnz, + UWORD32 u4_dc_flag) + +{ + WORD32 blk_cntr; + WORD32 i4_offsetx, i4_offsety; + UWORD8 *pu1_curr_src, *pu1_curr_pred; + + WORD16 *pi2_dc_str = pi2_out; + + /* Move to the ac addresses */ + pu1_nnz++; + pi2_out += dst_strd; + + for (blk_cntr = 0; blk_cntr < NUM_LUMA4x4_BLOCKS_IN_MB; blk_cntr++) + { + IND2SUB_LUMA_MB(blk_cntr, i4_offsetx, i4_offsety); + + pu1_curr_src = pu1_src + i4_offsetx + i4_offsety * src_strd; + pu1_curr_pred = pu1_pred + i4_offsetx + i4_offsety * pred_strd; + + ps_codec->pf_resi_trans_quant_4x4(pu1_curr_src, pu1_curr_pred, + pi2_out + blk_cntr * dst_strd, + src_strd, pred_strd, pu2_scale_matrix, + pu2_threshold_matrix, u4_qbits, + u4_round_factor, &pu1_nnz[blk_cntr], + &pi2_dc_str[blk_cntr]); + + } + + if (!u4_dc_flag) + return; + + /* + * In case of i16x16, we need to remove the contribution of dc coeffs into + * nnz of each block. We are doing that in the packing function + */ + + /* Adjust pointers to point to dc values */ + pi2_out -= dst_strd; + pu1_nnz--; + + u4_qbits++; + u4_round_factor <<= 1; + + ps_codec->pf_hadamard_quant_4x4(pi2_dc_str, pi2_out, pu2_scale_matrix, + pu2_threshold_matrix, u4_qbits, + u4_round_factor, &pu1_nnz[0]); +} + +/** +******************************************************************************* +* +* @brief +* This function performs the intra 16x16 inverse transform process for H264 +* it includes inverse Dc transform, inverse quant and then inverse transform +* +* @par Description: +* +* @param[in] pi2_src +* Input data, 16x16 size +* First 16 mem locations will have the Dc coffs in rater scan order in linear fashion +* after a stride 1st AC clock will be present again in raster can order +* Then each AC block of the 16x16 block will follow in raster scan order +* +* @param[in] pu1_pred +* The predicted data, 16x16 size +* Block by block form +* +* @param[in] pu1_out +* Output 16x16 +* In block by block form +* +* @param[in] src_strd +* Source stride +* +* @param[in] pred_strd +* input stride for prediction buffer +* +* @param[in] out_strd +* input stride for output buffer +* +* @param[in] pu2_iscale_mat +* Inverse quantization matrix for 4x4 transform +* +* @param[in] pu2_weigh_mat +* weight matrix of 4x4 transform +* +* @param[in] qp_div +* QP/6 +* +* @param[in] pi4_tmp +* Input temporary buffer +* needs to be at least 20 in size +* +* @param[in] pu4_cntrl +* Controls the transform path +* total Last 17 bits are used +* the 16th th bit will correspond to DC block +* and 32-17 will correspond to the ac blocks in raster scan order +* bit equaling zero indicates that the entire 4x4 block is zero for DC +* For AC blocks a bit equaling zero will mean that all 15 AC coffs of the block is nonzero +* +* @param[in] pi4_tmp +* Input temporary buffer +* needs to be at least COFF_CNT_SUB_BLK_4x4+COFF_CNT_SUB_BLK_4x4 size +* +* @returns +* none +* +* @remarks +* The all zero case must be taken care outside +* +******************************************************************************* +*/ +void ih264e_luma_16x16_idctrans_iquant_itrans_recon(codec_t *ps_codec, + WORD16 *pi2_src, + UWORD8 *pu1_pred, + UWORD8 *pu1_out, + WORD32 src_strd, + WORD32 pred_strd, + WORD32 out_strd, + const UWORD16 *pu2_iscale_mat, + const UWORD16 *pu2_weigh_mat, + UWORD32 qp_div, + UWORD32 u4_cntrl, + UWORD32 u4_dc_trans_flag, + WORD32 *pi4_tmp) +{ + /* Start index for inverse quant in a 4x4 block */ + WORD32 iq_start_idx = (u4_dc_trans_flag == 0) ? 0 : 1; + + /* Cntrl bits for 4x4 transforms + * u4_blk_cntrl : controls if a 4x4 block should be processed in ac path + * u4_dc_cntrl : controls is a 4x4 block is to be processed in dc path + * : dc block must contain only single dc coefficient + * u4_empty_blk_cntrl : control fot 4x4 block with no coeffs, ie no dc and ac + * : ie not (ac or dc) + */ + UWORD32 u4_blk_cntrl, u4_dc_cntrl, u4_empty_blk_cntrl; + + /* tmp registers for block ids */ + UWORD32 u4_blk_id; + + /* Subscrripts */ + WORD32 i4_offset_x, i4_offset_y; + + UWORD8 *pu1_cur_prd_blk, *pu1_cur_out_blk; + + /* Src and stride for dc coeffs */ + UWORD32 u4_dc_inc; + WORD16 *pi2_dc_src; + + /* + * For intra blocks we need to do inverse dc transform + * In case if intra blocks, its here that we populate the dc bits in cntrl + * as they cannot be populated any earlier + */ + if (u4_dc_trans_flag) + { + UWORD32 cntr, u4_dc_cntrl; + /* Do inv hadamard and place the results at the start of each AC block */ + ps_codec->pf_ihadamard_scaling_4x4(pi2_src, pi2_src, pu2_iscale_mat, + pu2_weigh_mat, qp_div, pi4_tmp); + + /* Update the cntrl flag */ + u4_dc_cntrl = 0; + for (cntr = 0; cntr < DC_COEFF_CNT_LUMA_MB; cntr++) + { + u4_dc_cntrl |= ((pi2_src[cntr] != 0) << (15 - cntr)); + } + /* Mark dc bits as 1 if corresponding ac bit is 0 */ + u4_dc_cntrl = (~(u4_cntrl >> 16) & u4_dc_cntrl); + /* Combine both ac and dc bits */ + u4_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_LUMA) + | (u4_dc_cntrl & CNTRL_FLAG_DC_MASK_LUMA); + } + + /* Source for dc coeffs + * If the block is intra, we have to read dc values from first row of src + * then stride for each block is 1, other wise its src stride + */ + pi2_dc_src = (iq_start_idx == 0) ? (pi2_src + src_strd) : pi2_src; + u4_dc_inc = (iq_start_idx == 0) ? src_strd : 1; + + /* The AC blocks starts from 2nd row */ + pi2_src += src_strd; + + /* Get the block bits */ + u4_blk_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_LUMA); + u4_dc_cntrl = (u4_cntrl & CNTRL_FLAG_DC_MASK_LUMA) << 16; + u4_empty_blk_cntrl = (~(u4_dc_cntrl | u4_blk_cntrl)) & 0xFFFF0000; + + /* Get first block to process */ + DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id); + while (u4_blk_id < NUM_LUMA4x4_BLOCKS_IN_MB) + { + /* Compute address of src blocks */ + WORD32 i4_src_offset = u4_dc_inc * u4_blk_id; + + IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y); + + /* Compute address of out and pred blocks */ + pu1_cur_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd; + pu1_cur_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd; + + /* Do inv dc transform */ + ps_codec->pf_iquant_itrans_recon_4x4_dc(pi2_dc_src + i4_src_offset, + pu1_cur_prd_blk, + pu1_cur_out_blk, pred_strd, + out_strd, pu2_iscale_mat, + pu2_weigh_mat, qp_div, NULL, + iq_start_idx, + pi2_dc_src + i4_src_offset); + /* Get next DC block to process */ + DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id); + } + + /* now process ac/mixed blocks */ + DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id); + while (u4_blk_id < NUM_LUMA4x4_BLOCKS_IN_MB) + { + + WORD32 i4_src_offset = src_strd * u4_blk_id; + + IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y); + + pu1_cur_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd; + pu1_cur_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd; + + ps_codec->pf_iquant_itrans_recon_4x4(pi2_src + i4_src_offset, + pu1_cur_prd_blk, pu1_cur_out_blk, + pred_strd, out_strd, + pu2_iscale_mat, pu2_weigh_mat, + qp_div, (WORD16*) pi4_tmp, + iq_start_idx, + pi2_dc_src + u4_blk_id); + + DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id); + } + + /* Now process empty blocks */ + DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id); + while (u4_blk_id < NUM_LUMA4x4_BLOCKS_IN_MB) + { + IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y); + + pu1_cur_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd; + pu1_cur_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd; + + ps_codec->pf_inter_pred_luma_copy(pu1_cur_prd_blk, pu1_cur_out_blk, + pred_strd, out_strd, SIZE_4X4_BLK_HRZ, + SIZE_4X4_BLK_VERT, 0, 0); + + DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id); + } +} + +/** +******************************************************************************* +* +* @brief +* This function performs does the DCT transform then Hadamard transform +* and quantization for a chroma macroblock +* +* @par Description: +* First cf4 is done on all 16 4x4 blocks of the 8x8input block +* Then hadamard transform is done on the DC coefficients +* Quantization is then performed on the 8x8 block, 4x4 wise +* +* @param[in] pu1_src +* Pointer to source sub-block +* The input is in interleaved format for two chroma planes +* +* @param[in] pu1_pred +* Pointer to prediction sub-block +* Prediction is in inter leaved format +* +* @param[in] pi2_out +* Pointer to residual sub-block +* The output will be in linear format +* The first 4 continuous locations will contain the values of DC block for U +* and then next 4 will contain for V. +* After DC block and a stride 1st AC block of U plane will follow +* After one more stride next AC block of V plane will follow +* The blocks will be in raster scan order +* +* After all the AC blocks of U plane AC blocks of V plane will follow in exact +* same way +* +* @param[in] src_strd +* Source stride +* +* @param[in] pred_strd +* Prediction stride +* +* @param[in] dst_strd +* Destination stride +* +* @param[in] pu2_scale_matrix +* The quantization matrix for 4x4 transform +* +* @param[in] pu2_threshold_matrix +* Threshold matrix +* +* @param[in] u4_qbits +* 15+QP/6 +* +* @param[in] u4_round_factor +* Round factor for quant +* +* @param[out] pu1_nnz +* Memory to store the non-zeros after transform +* The first byte will be the nnz od DC block for U plane +* From the next byte the AC nnzs will be storerd in raster scan order +* The fifth byte will be nnz of Dc block of V plane +* Then Ac blocks will follow +* +* @param u4_dc_flag +* Signals if Dc transform is to be done or not +* 1 -> Dc transform will be done +* 0 -> Dc transform will not be done +* +* @remarks +* +******************************************************************************* +*/ +void ih264e_chroma_8x8_resi_trans_dctrans_quant(codec_t *ps_codec, + UWORD8 *pu1_src, + UWORD8 *pu1_pred, + WORD16 *pi2_out, + WORD32 src_strd, + WORD32 pred_strd, + WORD32 out_strd, + const UWORD16 *pu2_scale_matrix, + const UWORD16 *pu2_threshold_matrix, + UWORD32 u4_qbits, + UWORD32 u4_round_factor, + UWORD8 *pu1_nnz_c) +{ + WORD32 blk_cntr; + WORD32 i4_offsetx, i4_offsety; + UWORD8 *pu1_curr_src, *pu1_curr_pred; + + WORD16 pi2_dc_str[8]; + UWORD8 au1_dcnnz[2]; + + /* Move to the ac addresses */ + pu1_nnz_c++; + pi2_out += out_strd; + + for (blk_cntr = 0; blk_cntr < NUM_CHROMA4x4_BLOCKS_IN_MB; blk_cntr++) + { + IND2SUB_CHROMA_MB(blk_cntr, i4_offsetx, i4_offsety); + + pu1_curr_src = pu1_src + i4_offsetx + i4_offsety * src_strd; + pu1_curr_pred = pu1_pred + i4_offsetx + i4_offsety * pred_strd; + + /* For chroma, v plane nnz is populated from position 5 */ + ps_codec->pf_resi_trans_quant_chroma_4x4( + pu1_curr_src, pu1_curr_pred, + pi2_out + blk_cntr * out_strd, src_strd, pred_strd, + pu2_scale_matrix, pu2_threshold_matrix, u4_qbits, + u4_round_factor, &pu1_nnz_c[blk_cntr + (blk_cntr > 3)], + &pi2_dc_str[blk_cntr]); + } + + /* Adjust pointers to point to dc values */ + pi2_out -= out_strd; + pu1_nnz_c--; + + u4_qbits++; + u4_round_factor <<= 1; + + ps_codec->pf_hadamard_quant_2x2_uv(pi2_dc_str, pi2_out, pu2_scale_matrix, + pu2_threshold_matrix, u4_qbits, + u4_round_factor, au1_dcnnz); + + /* Copy the dc nnzs */ + pu1_nnz_c[0] = au1_dcnnz[0]; + pu1_nnz_c[5] = au1_dcnnz[1]; + +} + +/** +******************************************************************************* +* @brief +* This function performs the inverse transform with process for chroma MB of H264 +* +* @par Description: +* Does inverse DC transform ,inverse quantization inverse transform +* +* @param[in] pi2_src +* Input data, 16x16 size +* The input is in the form of, first 4 locations will contain DC coeffs of +* U plane, next 4 will contain DC coeffs of V plane, then AC blocks of U plane +* in raster scan order will follow, each block as linear array in raster scan order. +* After a stride next AC block will follow. After all AC blocks of U plane +* V plane AC blocks will follow in exact same order. +* +* @param[in] pu1_pred +* The predicted data, 8x16 size, U and V interleaved +* +* @param[in] pu1_out +* Output 8x16, U and V interleaved +* +* @param[in] src_strd +* Source stride +* +* @param[in] pred_strd +* input stride for prediction buffer +* +* @param[in] out_strd +* input stride for output buffer +* +* @param[in] pu2_iscale_mat +* Inverse quantization martix for 4x4 transform +* +* @param[in] pu2_weigh_mat +* weight matrix of 4x4 transform +* +* @param[in] qp_div +* QP/6 +* +* @param[in] pi4_tmp +* Input temporary buffer +* needs to be at least COFF_CNT_SUB_BLK_4x4 + Number of Dc cofss for chroma * number of planes +* in size +* +* @param[in] pu4_cntrl +* Controls the transform path +* the 15 th bit will correspond to DC block of U plane , 14th will indicate the V plane Dc block +* 32-28 bits will indicate AC blocks of U plane in raster scan order +* 27-23 bits will indicate AC blocks of V plane in rater scan order +* The bit 1 implies that there is at least one non zero coeff in a block +* +* @returns +* none +* +* @remarks +******************************************************************************* +*/ +void ih264e_chroma_8x8_idctrans_iquant_itrans_recon(codec_t *ps_codec, + WORD16 *pi2_src, + UWORD8 *pu1_pred, + UWORD8 *pu1_out, + WORD32 src_strd, + WORD32 pred_strd, + WORD32 out_strd, + const UWORD16 *pu2_iscale_mat, + const UWORD16 *pu2_weigh_mat, + UWORD32 qp_div, + UWORD32 u4_cntrl, + WORD32 *pi4_tmp) +{ + /* Cntrl bits for 4x4 transforms + * u4_blk_cntrl : controls if a 4x4 block should be processed in ac path + * u4_dc_cntrl : controls is a 4x4 block is to be processed in dc path + * : dc block must contain only single dc coefficient + * u4_empty_blk_cntrl : control fot 4x4 block with no coeffs, ie no dc and ac + * : ie not (ac or dc) + */ + + UWORD32 u4_blk_cntrl, u4_dc_cntrl, u4_empty_blk_cntrl; + + /* tmp registers for block ids */ + WORD32 u4_blk_id; + + /* Offsets for pointers */ + WORD32 i4_offset_x, i4_offset_y; + + /* Pointer to 4x4 blocks */ + UWORD8 *pu1_cur_4x4_prd_blk, *pu1_cur_4x4_out_blk; + + /* Tmp register for pointer to dc coffs */ + WORD16 *pi2_dc_src; + + WORD16 i2_zero = 0; + + /* Increment for dc block */ + WORD32 i4_dc_inc; + + /* + * Lets do the inverse transform for dc coeffs in chroma + */ + if (u4_cntrl & CNTRL_FLAG_DCBLK_MASK_CHROMA) + { + UWORD32 cntr, u4_dc_cntrl; + /* Do inv hadamard for u an v block */ + + ps_codec->pf_ihadamard_scaling_2x2_uv(pi2_src, pi2_src, pu2_iscale_mat, + pu2_weigh_mat, qp_div, NULL); + /* + * Update the cntrl flag + * Flag is updated as follows bits 15-11 -> u block dc bits + */ + u4_dc_cntrl = 0; + for (cntr = 0; cntr < 8; cntr++) + { + u4_dc_cntrl |= ((pi2_src[cntr] != 0) << (15 - cntr)); + } + + /* Mark dc bits as 1 if corresponding ac bit is 0 */ + u4_dc_cntrl = (~(u4_cntrl >> 16) & u4_dc_cntrl); + /* Combine both ac and dc bits */ + u4_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_CHROMA) + | (u4_dc_cntrl & CNTRL_FLAG_DC_MASK_CHROMA); + + /* Since we populated the dc coffs, we have to read them from there */ + pi2_dc_src = pi2_src; + i4_dc_inc = 1; + } + else + { + u4_cntrl = u4_cntrl & CNTRL_FLAG_AC_MASK_CHROMA; + pi2_dc_src = &i2_zero; + i4_dc_inc = 0; + } + + /* Get the block bits */ + u4_blk_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_CHROMA); + u4_dc_cntrl = (u4_cntrl & CNTRL_FLAG_DC_MASK_CHROMA) << 16; + u4_empty_blk_cntrl = (~(u4_dc_cntrl | u4_blk_cntrl)) & 0xFF000000; + + /* The AC blocks starts from 2nd row */ + pi2_src += src_strd; + + DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id); + while (u4_blk_id < 8) + { + WORD32 dc_src_offset = u4_blk_id * i4_dc_inc; + + IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y); + + pu1_cur_4x4_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd; + pu1_cur_4x4_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd; + + ps_codec->pf_iquant_itrans_recon_chroma_4x4_dc( + pi2_dc_src + dc_src_offset, pu1_cur_4x4_prd_blk, + pu1_cur_4x4_out_blk, pred_strd, out_strd, NULL, NULL, 0, + NULL, pi2_dc_src + dc_src_offset); + /* Get next DC block to process */ + DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id); + } + + /* now process ac/mixed blocks */ + DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id); + while (u4_blk_id < 8) + { + WORD32 i4_src_offset = src_strd * u4_blk_id; + WORD32 dc_src_offset = i4_dc_inc * u4_blk_id; + + IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y); + + pu1_cur_4x4_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd; + pu1_cur_4x4_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd; + + ps_codec->pf_iquant_itrans_recon_chroma_4x4(pi2_src + i4_src_offset, + pu1_cur_4x4_prd_blk, + pu1_cur_4x4_out_blk, + pred_strd, out_strd, + pu2_iscale_mat, + pu2_weigh_mat, qp_div, + (WORD16 *) pi4_tmp, + pi2_dc_src + dc_src_offset); + + DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id); + } + + /* Now process empty blocks */ + DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id); + while (u4_blk_id < 8) + { + IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y); + + pu1_cur_4x4_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd; + pu1_cur_4x4_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd; + + ps_codec->pf_interleave_copy(pu1_cur_4x4_prd_blk, pu1_cur_4x4_out_blk, + pred_strd, out_strd, SIZE_4X4_BLK_VERT, + SIZE_4X4_BLK_HRZ); + + DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id); + } +} + +/** +****************************************************************************** +* +* @brief This function packs residue of an i16x16 luma mb for entropy coding +* +* @par Description +* An i16 macro block contains two classes of units, dc 4x4 block and +* 4x4 ac blocks. while packing the mb, the dc block is sent first, and +* the 16 ac blocks are sent next in scan order. Each and every block is +* represented by 3 parameters (nnz, significant coefficient map and the +* residue coefficients itself). If a 4x4 unit does not have any coefficients +* then only nnz is sent. Inside a 4x4 block the individual coefficients are +* sent in scan order. +* +* The first byte of each block will be nnz of the block, if it is non zero, +* a 2 byte significance map is sent. This is followed by nonzero coefficients. +* This is repeated for 1 dc + 16 ac blocks. +* +* @param[in] pi2_res_mb +* pointer to residue mb +* +* @param[in, out] pv_mb_coeff_data +* buffer pointing to packed residue coefficients +* +* @param[in] u4_res_strd +* residual block stride +* +* @param[out] u1_cbp_l +* coded block pattern luma +* +* @param[in] pu1_nnz +* number of non zero coefficients in each 4x4 unit +* +* @param[out] +* Control signal for inverse transform of 16x16 blocks +* +* @return none +* +* @ remarks +* +****************************************************************************** +*/ +void ih264e_pack_l_mb_i16(WORD16 *pi2_res_mb, + void **pv_mb_coeff_data, + WORD32 i4_res_strd, + UWORD8 *u1_cbp_l, + UWORD8 *pu1_nnz, + UWORD32 *pu4_cntrl) +{ + /* pointer to packed sub block buffer space */ + tu_sblk_coeff_data_t *ps_mb_coeff_data = (*pv_mb_coeff_data), *ps_mb_coeff_data_ac; + + /* no of non zero coefficients in the current sub block */ + UWORD32 u4_nnz_cnt; + + /* significant coefficient map */ + UWORD32 u4_s_map; + + /* pointer to scanning matrix */ + const UWORD8 *pu1_scan_order; + + /* number of non zeros in sub block */ + UWORD32 u4_nnz; + + /* coeff scan order */ + const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15}; + + /* temp var */ + UWORD32 coeff_cnt, mask, b4,u4_cntrl=0; + + /*DC and AC coeff pointers*/ + WORD16 *pi2_res_mb_ac,*pi2_res_mb_dc; + + /********************************************************/ + /* pack dc coeff data for entropy coding */ + /********************************************************/ + + pi2_res_mb_dc = pi2_res_mb; + pu1_scan_order = gu1_luma_scan_order_dc; + + u4_nnz = *pu1_nnz; + u4_cntrl = 0; + + /* write number of non zero coefficients */ + ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz; + + if (u4_nnz) + { + for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++) + { + if (pi2_res_mb_dc[pu1_scan_order[coeff_cnt]]) + { + /* write residue */ + ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb_dc[pu1_scan_order[coeff_cnt]]; + u4_s_map |= mask; + } + mask <<= 1; + } + /* write significant coeff map */ + ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16); + (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + u4_nnz_cnt; + + u4_cntrl = 0x00008000;// Set DC bit in ctrl code + } + else + { + (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue; + } + + /********************************************************/ + /* pack ac coeff data for entropy coding */ + /********************************************************/ + + pu1_nnz ++; + pu1_scan_order = gu1_luma_scan_order; + pi2_res_mb += i4_res_strd; /*Move to AC block*/ + + ps_mb_coeff_data_ac = (*pv_mb_coeff_data); + + for (b4 = 0; b4 < 16; b4++) + { + ps_mb_coeff_data = (*pv_mb_coeff_data); + + u4_nnz = pu1_nnz[u1_scan_order[b4]]; + + /* Jump according to the scan order */ + pi2_res_mb_ac = pi2_res_mb + (i4_res_strd * u1_scan_order[b4]); + + /* + * Since this is a i16x16 block, we should not count dc coeff on indi + * vidual 4x4 blocks to nnz. But due to the implementation of 16x16 + * trans function, we add dc's nnz to u4_nnz too. Hence we adjust that + * here + */ + u4_nnz -= (pi2_res_mb_ac[0] != 0); + + /* write number of non zero coefficients */ + ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz; + + if (u4_nnz) + { + for (u4_nnz_cnt = 0, coeff_cnt = 1, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++) + { + if (pi2_res_mb_ac[pu1_scan_order[coeff_cnt]]) + { + /* write residue */ + ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb_ac[pu1_scan_order[coeff_cnt]]; + u4_s_map |= mask; + } + mask <<= 1; + } + /* write significant coeff map */ + ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16); + (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + u4_nnz_cnt; + *u1_cbp_l = 15; + + u4_cntrl |= (1 << (31 - u1_scan_order[b4])); + } + else + { + (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue; + } + + } + + if (!(*u1_cbp_l)) + { + (*pv_mb_coeff_data) = ps_mb_coeff_data_ac; + } + + /* Store the cntrl signal */ + (*pu4_cntrl) = u4_cntrl; + return; +} + +/** +****************************************************************************** +* +* @brief This function packs residue of an p16x16 luma mb for entropy coding +* +* @par Description +* A p16x16 macro block contains two classes of units 16 4x4 ac blocks. +* while packing the mb, the dc block is sent first, and +* the 16 ac blocks are sent next in scan order. Each and every block is +* represented by 3 parameters (nnz, significant coefficient map and the +* residue coefficients itself). If a 4x4 unit does not have any coefficients +* then only nnz is sent. Inside a 4x4 block the individual coefficients are +* sent in scan order. +* +* The first byte of each block will be nnz of the block, if it is non zero, +* a 2 byte significance map is sent. This is followed by nonzero coefficients. +* This is repeated for 1 dc + 16 ac blocks. +* +* @param[in] pi2_res_mb +* pointer to residue mb +* +* @param[in, out] pv_mb_coeff_data +* buffer pointing to packed residue coefficients +* +* @param[in] i4_res_strd +* residual block stride +* +* @param[out] u1_cbp_l +* coded block pattern luma +* +* @param[in] pu1_nnz +* number of non zero coefficients in each 4x4 unit +* +* @param[out] pu4_cntrl +* Control signal for inverse transform +* +* @return none +* +* @remarks Killing coffs not yet coded +* +****************************************************************************** +*/ +void ih264e_pack_l_mb(WORD16 *pi2_res_mb, + void **pv_mb_coeff_data, + WORD32 i4_res_strd, + UWORD8 *u1_cbp_l, + UWORD8 *pu1_nnz, + UWORD32 u4_thres_resi, + UWORD32 *pu4_cntrl) +{ + /* pointer to packed sub block buffer space */ + tu_sblk_coeff_data_t *ps_mb_coeff_data, *ps_mb_coeff_data_b8, *ps_mb_coeff_data_mb; + + /* no of non zero coefficients in the current sub block */ + UWORD32 u4_nnz_cnt; + + /* significant coefficient map */ + UWORD32 u4_s_map; + + /* pointer to scanning matrix */ + const UWORD8 *pu1_scan_order = gu1_luma_scan_order; + + /* number of non zeros in sub block */ + UWORD32 u4_nnz; + + /* pointer to residual sub block */ + WORD16 *pi2_res_sb; + + /* coeff scan order */ + const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15}; + + /* coeff cost */ + const UWORD8 *pu1_coeff_cost = gu1_coeff_cost; + + /* temp var */ + UWORD32 u4_mb_coeff_cost = 0, u4_b8_coeff_cost = 0, coeff_cnt, mask, u4_cntrl = 0, b4, b8; + + /* temp var */ + WORD32 i4_res_val, i4_run = -1, dcac_block; + + /* When Hadamard transform is disabled, first row values are dont care, ignore them */ + pi2_res_mb += i4_res_strd; + + /* When Hadamard transform is disabled, first unit value is dont care, ignore this */ + pu1_nnz ++; + + ps_mb_coeff_data_mb = ps_mb_coeff_data_b8 = (*pv_mb_coeff_data); + + /********************************************************/ + /* pack coeff data for entropy coding */ + /********************************************************/ + + for (b4 = 0; b4 < 16; b4++) + { + ps_mb_coeff_data = (*pv_mb_coeff_data); + + b8 = b4 >> 2; + + u4_nnz = pu1_nnz[u1_scan_order[b4]]; + + /* Jump according to the scan order */ + pi2_res_sb = pi2_res_mb + (i4_res_strd * u1_scan_order[b4]); + + /* write number of non zero coefficients */ + ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz; + + if (u4_nnz) + { + for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++) + { + /* number of runs of zero before, this is used to compute coeff cost */ + i4_run++; + + i4_res_val = pi2_res_sb[pu1_scan_order[coeff_cnt]]; + + if (i4_res_val) + { + /* write residue */ + ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = i4_res_val; + u4_s_map |= mask; + + if (u4_thres_resi) + { + /* compute coeff cost */ + if (i4_res_val == 1 || i4_res_val == -1) + { + if (i4_run < 6) + u4_b8_coeff_cost += pu1_coeff_cost[i4_run]; + } + else + u4_b8_coeff_cost += 9; + + i4_run = -1; + } + } + + mask <<= 1; + } + + /* write significant coeff map */ + ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16); + (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + u4_nnz_cnt; + + /* cbp */ + *u1_cbp_l |= (1 << b8); + + /* Cntrl map for inverse transform computation + * + * If coeff_cnt is zero, it means that only nonzero was a dc coeff + * Hence we have to set the 16 - u1_scan_order[b4]) position instead + * of 31 - u1_scan_order[b4] + */ + dcac_block = (coeff_cnt == 0)?16:31; + u4_cntrl |= (1 << (dcac_block - u1_scan_order[b4])); + } + else + { + (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue; + } + + /* Decide if the 8x8 unit has to be sent for entropy coding? */ + if ((b4+1) % 4 == 0) + { + if ( u4_thres_resi && (u4_b8_coeff_cost <= LUMA_SUB_BLOCK_SKIP_THRESHOLD) && + (*u1_cbp_l & (1 << b8)) ) + { + + + /* + * When we want to reset the full 8x8 block, we have to reset + * both the dc and ac coeff bits hence we have the symmetric + * arrangement of bits + */ + const UWORD32 cntrl_mask_map[4] = {0xcc00cc00, 0x33003300, 0x00cc00cc, 0x00330033}; + + /* restore cbp */ + *u1_cbp_l = (*u1_cbp_l & (~(1 << b8))); + + /* correct cntrl flag */ + u4_cntrl = u4_cntrl & (~cntrl_mask_map[(b4 >> 2)]); + + /* correct nnz */ + pu1_nnz[u1_scan_order[b4 - 3]] = 0; + pu1_nnz[u1_scan_order[b4 - 2]] = 0; + pu1_nnz[u1_scan_order[b4 - 1]] = 0; + pu1_nnz[u1_scan_order[b4]] = 0; + + /* reset blk cost */ + u4_b8_coeff_cost = 0; + } + + if (!(*u1_cbp_l & (1 << b8))) + { + (*pv_mb_coeff_data) = ps_mb_coeff_data_b8; + } + + u4_mb_coeff_cost += u4_b8_coeff_cost; + + u4_b8_coeff_cost = 0; + i4_run = -1; + ps_mb_coeff_data_b8 = (*pv_mb_coeff_data); + } + } + + if (u4_thres_resi && (u4_mb_coeff_cost <= LUMA_BLOCK_SKIP_THRESHOLD) + && (*u1_cbp_l)) + { + (*pv_mb_coeff_data) = ps_mb_coeff_data_mb; + *u1_cbp_l = 0; + u4_cntrl = 0; + memset(pu1_nnz, 0, 16); + } + + (*pu4_cntrl) = u4_cntrl; + + return; +} + +/** +****************************************************************************** +* +* @brief This function packs residue of an i8x8 chroma mb for entropy coding +* +* @par Description +* An i8 chroma macro block contains two classes of units, dc 2x2 block and +* 4x4 ac blocks. while packing the mb, the dc block is sent first, and +* the 4 ac blocks are sent next in scan order. Each and every block is +* represented by 3 parameters (nnz, significant coefficient map and the +* residue coefficients itself). If a 4x4 unit does not have any coefficients +* then only nnz is sent. Inside a 4x4 block the individual coefficients are +* sent in scan order. +* +* The first byte of each block will be nnz of the block, if it is non zero, +* a 2 byte significance map is sent. This is followed by nonzero coefficients. +* This is repeated for 1 dc + 4 ac blocks. +* +* @param[in] pi2_res_mb +* pointer to residue mb +* +* @param[in, out] pv_mb_coeff_data +* buffer pointing to packed residue coefficients +* +* @param[in] u4_res_strd +* residual block stride +* +* @param[out] u1_cbp_c +* coded block pattern chroma +* +* @param[in] pu1_nnz +* number of non zero coefficients in each 4x4 unit +* +* @param[out] pu1_nnz +* Control signal for inverse transform +* +* @param[in] u4_swap_uv +* Swaps the order of U and V planes in entropy bitstream +* +* @return none +* +* @ remarks +* +****************************************************************************** +*/ +void ih264e_pack_c_mb(WORD16 *pi2_res_mb, + void **pv_mb_coeff_data, + WORD32 i4_res_strd, + UWORD8 *u1_cbp_c, + UWORD8 *pu1_nnz, + UWORD32 u4_thres_resi, + UWORD32 *pu4_cntrl, + UWORD32 u4_swap_uv) +{ + /* pointer to packed sub block buffer space */ + tu_sblk_coeff_data_t *ps_mb_coeff_data = (*pv_mb_coeff_data); + tu_sblk_coeff_data_t *ps_mb_coeff_data_dc, *ps_mb_coeff_data_ac; + + /* nnz pointer */ + UWORD8 *pu1_nnz_ac, *pu1_nnz_dc; + + /* nnz counter */ + UWORD32 u4_nnz_cnt; + + /* significant coefficient map */ + UWORD32 u4_s_map; + + /* pointer to scanning matrix */ + const UWORD8 *pu1_scan_order; + + /* no of non zero coefficients in the current sub block */ + UWORD32 u4_nnz; + + /* pointer to residual sub block, res val */ + WORD16 *pi2_res_sb, i2_res_val; + + /* temp var */ + UWORD32 coeff_cnt, mask, b4,plane; + + /* temp var */ + UWORD32 u4_coeff_cost; + WORD32 i4_run; + + /* coeff cost */ + const UWORD8 *pu1_coeff_cost = gu1_coeff_cost; + + /* pointer to packed buffer space */ + UWORD32 *pu4_mb_coeff_data = NULL; + + /* ac coded block pattern */ + UWORD8 u1_cbp_ac; + + /* Variable to store the current bit pos in cntrl variable*/ + UWORD32 cntrl_pos = 0; + + /********************************************************/ + /* pack dc coeff data for entropy coding */ + /********************************************************/ + pu1_scan_order = gu1_chroma_scan_order_dc; + pi2_res_sb = pi2_res_mb; + pu1_nnz_dc = pu1_nnz; + (*pu4_cntrl) = 0; + cntrl_pos = 15; + ps_mb_coeff_data_dc = (*pv_mb_coeff_data); + + /* Color space conversion between SP_UV and SP_VU + * We always assume SP_UV for all the processing + * Hence to get proper stream output we need to swap U and V channels here + * + * For that there are two paths we need to look for + * One is the path to bitstream , these variables should have the proper input + * configured UV or VU + * For the other path the inverse transform variables should have ehat ever 0ordering the + * input had + */ + + if (u4_swap_uv) + { + pu1_nnz_dc += 5;/* Move to NNZ of V planve */ + pi2_res_sb += 4;/* Move to DC coff of V plane */ + + cntrl_pos = 14; /* Control bit for V plane */ + } + + for (plane = 0; plane < 2; plane++) + { + ps_mb_coeff_data = (*pv_mb_coeff_data); + + u4_nnz = *pu1_nnz_dc; + /* write number of non zero coefficients U/V */ + ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz; + + if (u4_nnz) + { + for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++) + { + i2_res_val = pi2_res_sb[pu1_scan_order[coeff_cnt]]; + if (i2_res_val) + { + /* write residue U/V */ + ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = i2_res_val; + u4_s_map |= mask; + } + mask <<= 1; + } + /* write significant coeff map U/V */ + ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16); + (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + u4_nnz_cnt; + *u1_cbp_c = 1; + + (*pu4_cntrl) |= (1 << cntrl_pos); + } + else + { + (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue; + } + + if (u4_swap_uv) + { + cntrl_pos++; /* Control bit for U plane */ + pu1_nnz_dc -= 5; /* Move to NNZ of U plane */ + pi2_res_sb -= 4; /* Move to DC coff of U plane */ + + } + else + { + cntrl_pos--; /* Control bit for U plane */ + pu1_nnz_dc += 5; /* 4 for AC NNZ and 1 for DC */ + pi2_res_sb += 4; /* Move to DC coff of V plane */ + } + } + + /********************************************************/ + /* pack ac coeff data for entropy coding */ + /********************************************************/ + + pu1_scan_order = gu1_chroma_scan_order; + ps_mb_coeff_data_ac = (*pv_mb_coeff_data); + + if (u4_swap_uv) + { + pi2_res_sb = pi2_res_mb + i4_res_strd * 5; /* Move to V plane ,ie 1dc row+ 4 ac row */ + cntrl_pos = 27; /* The control bits are to be added for V bloc ie 31-4 th bit */ + pu1_nnz_ac = pu1_nnz + 6;/*Move the nnz to V block NNZ 1 dc + 1dc + 4 ac */ + } + else + { + pi2_res_sb = pi2_res_mb + i4_res_strd; /* Move to U plane ,ie 1dc row */ + cntrl_pos = 31; + pu1_nnz_ac = pu1_nnz + 1; /* Move the nnz to V block NNZ 1 dc */ + } + + for (plane = 0; plane < 2; plane++) + { + pu4_mb_coeff_data = (*pv_mb_coeff_data); + + u4_coeff_cost = 0; + i4_run = -1; + + /* get the current cbp, so that it automatically + * gets reverted in case of zero ac values */ + u1_cbp_ac = *u1_cbp_c; + + for (b4 = 0; b4 < 4; b4++) + { + ps_mb_coeff_data = (*pv_mb_coeff_data); + + u4_nnz = *pu1_nnz_ac; + + /* + * We are scanning only ac coeffs, but the nnz is for the + * complete 4x4 block. Hence we have to discount the nnz contributed + * by the dc coefficient + */ + u4_nnz -= (pi2_res_sb[0]!=0); + + /* write number of non zero coefficients U/V */ + ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz; + + if (u4_nnz) + { + for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++) + { + i2_res_val = pi2_res_sb[pu1_scan_order[coeff_cnt]]; + + i4_run++; + + if (i2_res_val) + { + /* write residue U/V */ + ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = i2_res_val; + u4_s_map |= mask; + + if ( u4_thres_resi && (u4_coeff_cost < CHROMA_BLOCK_SKIP_THRESHOLD) ) + { + /* compute coeff cost */ + if (i2_res_val == 1 || i2_res_val == -1) + { + if (i4_run < 6) + u4_coeff_cost += pu1_coeff_cost[i4_run]; + } + else + u4_coeff_cost += 9; + + i4_run = -1; + } + } + mask <<= 1; + } + + /* write significant coeff map U/V */ + ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16); + (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + u4_nnz_cnt; + u1_cbp_ac = 2; + + (*pu4_cntrl) |= 1 << cntrl_pos; + } + else + { + (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue; + } + + pu1_nnz_ac++; + pi2_res_sb += i4_res_strd; + cntrl_pos--; + } + + /* reset block */ + if (u4_thres_resi && (u4_coeff_cost < CHROMA_BLOCK_SKIP_THRESHOLD)) + { + pu4_mb_coeff_data[0] = 0; + pu4_mb_coeff_data[1] = 0; + pu4_mb_coeff_data[2] = 0; + pu4_mb_coeff_data[3] = 0; + (*pv_mb_coeff_data) = pu4_mb_coeff_data + 4; + + /* Generate the control signal */ + /* Zero out the current plane's AC coefficients */ + (*pu4_cntrl) &= ((plane == u4_swap_uv) ? 0x0FFFFFFF : 0xF0FFFFFF); + + /* Similarly do for the NNZ also */ + *(pu1_nnz_ac - 4) = 0; + *(pu1_nnz_ac - 3) = 0; + *(pu1_nnz_ac - 2) = 0; + *(pu1_nnz_ac - 1) = 0; + } + else + { + *u1_cbp_c = u1_cbp_ac; + } + + if (u4_swap_uv) + { + pi2_res_sb = pi2_res_mb + i4_res_strd; /* Move to V plane ,ie 1dc row+ 4 ac row + 1 dc row */ + cntrl_pos = 31; /* The control bits are to be added for V bloc ie 31-4 th bit */ + pu1_nnz_ac = pu1_nnz + 1; /* Move the nnz to V block NNZ 1 dc + 1dc + 4 ac */ + + pu1_nnz_ac = pu1_nnz + 1; + } + else + pu1_nnz_ac = pu1_nnz + 6; /* Go to nnz of V plane */ + } + + /* restore the ptr basing on cbp */ + if (*u1_cbp_c == 0) + { + (*pv_mb_coeff_data) = ps_mb_coeff_data_dc; + } + else if (*u1_cbp_c == 1) + { + (*pv_mb_coeff_data) = ps_mb_coeff_data_ac; + } + + return ; +} + +/** +******************************************************************************* +* +* @brief performs luma core coding when intra mode is i16x16 +* +* @par Description: +* If the current mb is to be coded as intra of mb type i16x16, the mb is first +* predicted using one of i16x16 prediction filters, basing on the intra mode +* chosen. Then, error is computed between the input blk and the estimated blk. +* This error is transformed (hierarchical transform i.e., dct followed by hada- +* -mard), quantized. The quantized coefficients are packed in scan order for +* entropy coding. +* +* @param[in] ps_proc_ctxt +* pointer to the current macro block context +* +* @returns u1_cbp_l +* coded block pattern luma +* +* @remarks none +* +******************************************************************************* +*/ + +UWORD8 ih264e_code_luma_intra_macroblock_16x16(process_ctxt_t *ps_proc) +{ + /* Codec Context */ + codec_t *ps_codec = ps_proc->ps_codec; + + /* pointer to ref macro block */ + UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_luma; + + /* pointer to src macro block */ + UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma; + + /* pointer to prediction macro block */ + UWORD8 *pu1_pred_mb = NULL; + + /* pointer to residual macro block */ + WORD16 *pi2_res_mb = ps_proc->pi2_res_buf; + + /* strides */ + WORD32 i4_src_strd = ps_proc->i4_src_strd; + WORD32 i4_rec_strd = ps_proc->i4_rec_strd; + WORD32 i4_pred_strd = ps_proc->i4_pred_strd; + WORD32 i4_res_strd = ps_proc->i4_res_strd; + + /* intra mode */ + UWORD8 u1_intra_mode = ps_proc->u1_l_i16_mode; + + /* coded block pattern */ + UWORD8 u1_cbp_l = 0; + + /* number of non zero coeffs*/ + UWORD32 au4_nnz[5]; + UWORD8 *pu1_nnz = (UWORD8 *)au4_nnz; + + /*Cntrol signal for itrans*/ + UWORD32 u4_cntrl; + + /* quantization parameters */ + quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0]; + + /* pointer to packed mb coeff data */ + void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data); + + /* init nnz */ + au4_nnz[0] = 0; + au4_nnz[1] = 0; + au4_nnz[2] = 0; + au4_nnz[3] = 0; + au4_nnz[4] = 0; + + if (u1_intra_mode == PLANE_I16x16) + { + pu1_pred_mb = ps_proc->pu1_pred_mb_intra_16x16_plane; + } + else + { + pu1_pred_mb = ps_proc->pu1_pred_mb_intra_16x16; + } + + /********************************************************/ + /* error estimation, */ + /* transform */ + /* quantization */ + /********************************************************/ + ih264e_luma_16x16_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb, + pu1_pred_mb, pi2_res_mb, + i4_src_strd, i4_pred_strd, + i4_res_strd, + ps_qp_params->pu2_scale_mat, + ps_qp_params->pu2_thres_mat, + ps_qp_params->u1_qbits, + ps_qp_params->u4_dead_zone, + pu1_nnz, ENABLE_DC_TRANSFORM); + + /********************************************************/ + /* pack coeff data for entropy coding */ + /********************************************************/ + ih264e_pack_l_mb_i16(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_l, + pu1_nnz, &u4_cntrl); + + /********************************************************/ + /* ierror estimation, */ + /* itransform */ + /* iquantization */ + /********************************************************/ + /* + *if refernce frame is not to be computed + *we only need the right and bottom border 4x4 blocks to predict next intra + *blocks, hence only compute them + */ + if (!ps_proc->u4_compute_recon) + { + u4_cntrl &= 0x111F8000; + } + + if (u4_cntrl) + { + ih264e_luma_16x16_idctrans_iquant_itrans_recon( + ps_codec, pi2_res_mb, pu1_pred_mb, pu1_ref_mb, + i4_res_strd, i4_pred_strd, i4_rec_strd, + ps_qp_params->pu2_iscale_mat, + ps_qp_params->pu2_weigh_mat, ps_qp_params->u1_qp_div, + u4_cntrl, ENABLE_DC_TRANSFORM, + ps_proc->pv_scratch_buff); + } + else + { + ps_codec->pf_inter_pred_luma_copy(pu1_pred_mb, pu1_ref_mb, i4_pred_strd, + i4_rec_strd, MB_SIZE, MB_SIZE, NULL, + 0); + } + + return (u1_cbp_l); +} + + +/** +******************************************************************************* +* +* @brief performs luma core coding when intra mode is i4x4 +* +* @par Description: +* If the current mb is to be coded as intra of mb type i4x4, the mb is first +* predicted using one of i4x4 prediction filters, basing on the intra mode +* chosen. Then, error is computed between the input blk and the estimated blk. +* This error is dct transformed and quantized. The quantized coefficients are +* packed in scan order for entropy coding. +* +* @param[in] ps_proc_ctxt +* pointer to the current macro block context +* +* @returns u1_cbp_l +* coded block pattern luma +* +* @remarks +* The traversal of 4x4 subblocks in the 16x16 macroblock is as per the scan order +* mentioned in h.264 specification +* +******************************************************************************* +*/ +UWORD8 ih264e_code_luma_intra_macroblock_4x4(process_ctxt_t *ps_proc) +{ + /* Codec Context */ + codec_t *ps_codec = ps_proc->ps_codec; + + /* pointer to ref macro block */ + UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_luma; + + /* pointer to src macro block */ + UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma; + + /* pointer to prediction macro block */ + UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb; + + /* pointer to residual macro block */ + WORD16 *pi2_res_mb = ps_proc->pi2_res_buf; + + /* strides */ + WORD32 i4_src_strd = ps_proc->i4_src_strd; + WORD32 i4_rec_strd = ps_proc->i4_rec_strd; + WORD32 i4_pred_strd = ps_proc->i4_pred_strd; + + /* pointer to neighbors: left, top, top-left */ + UWORD8 *pu1_mb_a; + UWORD8 *pu1_mb_b; + UWORD8 *pu1_mb_c; + UWORD8 *pu1_mb_d; + + /* intra mode */ + UWORD8 u1_intra_mode = ps_proc->u1_l_i16_mode; + + /* neighbor availability */ + WORD32 i4_ngbr_avbl; + + /* neighbor pels for intra prediction */ + UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels; + + /* coded block pattern */ + UWORD8 u1_cbp_l = 0; + + /* number of non zero coeffs*/ + UWORD8 u1_nnz; + + /* quantization parameters */ + quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0]; + + /* pointer to packed mb coeff data */ + void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data); + + /* pointer to packed mb coeff data */ + tu_sblk_coeff_data_t *ps_mb_coeff_data, *ps_mb_coeff_data_b8; + + /* no of non zero coefficients in the current sub block */ + UWORD32 u4_nnz_cnt; + + /* significant coefficient map */ + UWORD32 u4_s_map; + + /* pointer to scanning matrix */ + const UWORD8 *pu1_scan_order = gu1_luma_scan_order; + + /*Dummy variable for 4x4 trans fucntion*/ + WORD16 i2_dc_dummy; + + /* temp var */ + UWORD32 i, b8, b4, u1_blk_x, u1_blk_y, u1_pix_x, u1_pix_y, coeff_cnt, mask; + + /* Process 16 4x4 lum sub-blocks of the MB in scan order */ + for (b8 = 0; b8 < 4; b8++) + { + u1_blk_x = GET_BLK_RASTER_POS_X(b8) << 3; + u1_blk_y = GET_BLK_RASTER_POS_Y(b8) << 3; + + /* if in case cbp for the 8x8 block is zero, send no residue */ + ps_mb_coeff_data_b8 = *pv_mb_coeff_data; + + for (b4 = 0; b4 < 4; b4++) + { + /* index of pel in MB */ + u1_pix_x = u1_blk_x + (GET_SUB_BLK_RASTER_POS_X(b4) << 2); + u1_pix_y = u1_blk_y + (GET_SUB_BLK_RASTER_POS_Y(b4) << 2); + + /* Initialize source and reference pointers */ + pu1_curr_mb = ps_proc->pu1_src_buf_luma + u1_pix_x + (u1_pix_y * i4_src_strd); + pu1_ref_mb = ps_proc->pu1_rec_buf_luma + u1_pix_x + (u1_pix_y * i4_rec_strd); + + /* pointer to left of ref macro block */ + pu1_mb_a = pu1_ref_mb - 1; + /* pointer to top of ref macro block */ + pu1_mb_b = pu1_ref_mb - i4_rec_strd; + /* pointer to topright of ref macro block */ + pu1_mb_c = pu1_mb_b + 4; + /* pointer to topleft macro block */ + pu1_mb_d = pu1_mb_b - 1; + + /* compute neighbor availability */ + i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4]; + + /* sub block intra mode */ + u1_intra_mode = ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4]; + + /********************************************************/ + /* gather prediction pels from neighbors for prediction */ + /********************************************************/ + /* left pels */ + if (i4_ngbr_avbl & LEFT_MB_AVAILABLE_MASK) + { + for (i = 0; i < 4; i++) + pu1_ngbr_pels_i4[4 - 1 - i] = pu1_mb_a[i * i4_rec_strd]; + } + else + { + memset(pu1_ngbr_pels_i4, 0, 4); + } + + /* top pels */ + if (i4_ngbr_avbl & TOP_MB_AVAILABLE_MASK) + { + memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4); + } + else + { + memset(pu1_ngbr_pels_i4 + 5, 0, 4); + } + /* top left pels */ + if (i4_ngbr_avbl & TOP_LEFT_MB_AVAILABLE_MASK) + { + pu1_ngbr_pels_i4[4] = *pu1_mb_d; + } + else + { + pu1_ngbr_pels_i4[4] = 0; + } + /* top right pels */ + if (i4_ngbr_avbl & TOP_RIGHT_MB_AVAILABLE_MASK) + { + memcpy(pu1_ngbr_pels_i4+8+1,pu1_mb_c,4); + } + else if (i4_ngbr_avbl & TOP_MB_AVAILABLE_MASK) + { + memset(pu1_ngbr_pels_i4+8+1,pu1_ngbr_pels_i4[8],4); + } + + /********************************************************/ + /* prediction */ + /********************************************************/ + (ps_codec->apf_intra_pred_4_l)[u1_intra_mode](pu1_ngbr_pels_i4, + pu1_pred_mb, 0, + i4_pred_strd, + i4_ngbr_avbl); + + /********************************************************/ + /* error estimation, */ + /* transform */ + /* quantization */ + /********************************************************/ + ps_codec->pf_resi_trans_quant_4x4(pu1_curr_mb, pu1_pred_mb, + pi2_res_mb, i4_src_strd, + i4_pred_strd, + ps_qp_params->pu2_scale_mat, + ps_qp_params->pu2_thres_mat, + ps_qp_params->u1_qbits, + ps_qp_params->u4_dead_zone, + &u1_nnz, &i2_dc_dummy); + + /********************************************************/ + /* pack coeff data for entropy coding */ + /********************************************************/ + ps_mb_coeff_data = *pv_mb_coeff_data; + + /* write number of non zero coefficients */ + ps_mb_coeff_data->i4_sig_map_nnz = u1_nnz; + + if (u1_nnz) + { + for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u1_nnz; coeff_cnt++) + { + if (pi2_res_mb[pu1_scan_order[coeff_cnt]]) + { + /* write residue */ + ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb[pu1_scan_order[coeff_cnt]]; + u4_s_map |= mask; + } + mask <<= 1; + } + /* write significant coeff map */ + ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16); + + /* update ptr to coeff data */ + (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + u4_nnz_cnt; + + /* cbp */ + u1_cbp_l |= (1 << b8); + } + else + { + (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue; + } + + /********************************************************/ + /* ierror estimation, */ + /* itransform */ + /* iquantization */ + /********************************************************/ + /* If the frame is not to be used for P frame reference or dumping recon + * we only will use the recon for only predicting intra Mbs + * This will need only right and bottom edge 4x4 blocks recon + * Hence we selectively enable them + */ + if (ps_proc->u4_compute_recon || (0xF888 & (1 << ((b8 << 2) + b4)))) + { + if (u1_nnz) + ps_codec->pf_iquant_itrans_recon_4x4( + pi2_res_mb, pu1_pred_mb, pu1_ref_mb, + /*No input stride,*/i4_pred_strd, + i4_rec_strd, ps_qp_params->pu2_iscale_mat, + ps_qp_params->pu2_weigh_mat, + ps_qp_params->u1_qp_div, + ps_proc->pv_scratch_buff, 0, 0); + else + ps_codec->pf_inter_pred_luma_copy(pu1_pred_mb, pu1_ref_mb, + i4_pred_strd, i4_rec_strd, + BLK_SIZE, BLK_SIZE, NULL, + 0); + } + + } + + /* if the 8x8 block has no residue, nothing needs to be sent to entropy */ + if (!(u1_cbp_l & (1 << b8))) + { + *pv_mb_coeff_data = ps_mb_coeff_data_b8; + } + } + + return (u1_cbp_l); +} + +/** +******************************************************************************* +* +* @brief performs luma core coding when intra mode is i4x4 +* +* @par Description: +* If the current mb is to be coded as intra of mb type i4x4, the mb is first +* predicted using one of i4x4 prediction filters, basing on the intra mode +* chosen. Then, error is computed between the input blk and the estimated blk. +* This error is dct transformed and quantized. The quantized coefficients are +* packed in scan order for entropy coding. +* +* @param[in] ps_proc_ctxt +* pointer to the current macro block context +* +* @returns u1_cbp_l +* coded block pattern luma +* +* @remarks +* The traversal of 4x4 subblocks in the 16x16 macroblock is as per the scan order +* mentioned in h.264 specification +* +******************************************************************************* +*/ +UWORD8 ih264e_code_luma_intra_macroblock_4x4_rdopt_on(process_ctxt_t *ps_proc) +{ + /* Codec Context */ + codec_t *ps_codec = ps_proc->ps_codec; + + /* pointer to ref macro block */ + UWORD8 *pu1_ref_mb_intra_4x4 = ps_proc->pu1_ref_mb_intra_4x4; + + /* pointer to recon buffer */ + UWORD8 *pu1_rec_mb = ps_proc->pu1_rec_buf_luma; + + /* pointer to residual macro block */ + WORD16 *pi2_res_mb = ps_proc->pi2_res_buf_intra_4x4; + + /* strides */ + WORD32 i4_rec_strd = ps_proc->i4_rec_strd; + + /* number of non zero coeffs*/ + UWORD8 *pu1_nnz = (UWORD8 *)ps_proc->au4_nnz_intra_4x4; + + /* coded block pattern */ + UWORD8 u1_cbp_l = 0; + + /* pointer to packed mb coeff data */ + void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data); + + /* pointer to packed mb coeff data */ + tu_sblk_coeff_data_t *ps_mb_coeff_data, *ps_mb_coeff_data_b8; + + /* no of non zero coefficients in the current sub block */ + UWORD32 u4_nnz_cnt; + + /* significant coefficient map */ + UWORD32 u4_s_map; + + /* pointer to scanning matrix */ + const UWORD8 *pu1_scan_order = gu1_luma_scan_order; + + /* temp var */ + UWORD32 b8, b4, coeff_cnt, mask; + + /* Process 16 4x4 lum sub-blocks of the MB in scan order */ + for (b8 = 0; b8 < 4; b8++) + { + /* if in case cbp for the 8x8 block is zero, send no residue */ + ps_mb_coeff_data_b8 = *pv_mb_coeff_data; + + for (b4 = 0; b4 < 4; b4++, pu1_nnz++, pi2_res_mb += MB_SIZE) + { + /********************************************************/ + /* pack coeff data for entropy coding */ + /********************************************************/ + ps_mb_coeff_data = *pv_mb_coeff_data; + + /* write number of non zero coefficients */ + ps_mb_coeff_data->i4_sig_map_nnz = *pu1_nnz; + + if (*pu1_nnz) + { + for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < *pu1_nnz; coeff_cnt++) + { + if (pi2_res_mb[pu1_scan_order[coeff_cnt]]) + { + /* write residue */ + ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb[pu1_scan_order[coeff_cnt]]; + u4_s_map |= mask; + } + mask <<= 1; + } + /* write significant coeff map */ + ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16); + + /* update ptr to coeff data */ + (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + u4_nnz_cnt; + + /* cbp */ + u1_cbp_l |= (1 << b8); + } + else + { + (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue; + } + } + + /* if the 8x8 block has no residue, nothing needs to be sent to entropy */ + if (!(u1_cbp_l & (1 << b8))) + { + *pv_mb_coeff_data = ps_mb_coeff_data_b8; + } + } + + /* memcpy recon */ + ps_codec->pf_inter_pred_luma_copy(pu1_ref_mb_intra_4x4, pu1_rec_mb, MB_SIZE, i4_rec_strd, MB_SIZE, MB_SIZE, NULL, 0); + + return (u1_cbp_l); +} + + +/** +******************************************************************************* +* +* @brief performs chroma core coding for intra macro blocks +* +* @par Description: +* If the current MB is to be intra coded with mb type chroma I8x8, the MB is +* first predicted using intra 8x8 prediction filters. The predicted data is +* compared with the input for error and the error is transformed. The DC +* coefficients of each transformed sub blocks are further transformed using +* Hadamard transform. The resulting coefficients are quantized, packed and sent +* for entropy coding. +* +* @param[in] ps_proc_ctxt +* pointer to the current macro block context +* +* @returns u1_cbp_c +* coded block pattern chroma +* +* @remarks +* The traversal of 4x4 subblocks in the 8x8 macroblock is as per the scan order +* mentioned in h.264 specification +* +******************************************************************************* +*/ +UWORD8 ih264e_code_chroma_intra_macroblock_8x8(process_ctxt_t *ps_proc) +{ + /* Codec Context */ + codec_t *ps_codec = ps_proc->ps_codec; + + /* pointer to ref macro block */ + UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_chroma; + + /* pointer to src macro block */ + UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_chroma; + + /* pointer to prediction macro block */ + UWORD8 *pu1_pred_mb = NULL; + + /* pointer to residual macro block */ + WORD16 *pi2_res_mb = ps_proc->pi2_res_buf; + + /* strides */ + WORD32 i4_src_strd = ps_proc->i4_src_strd; + WORD32 i4_rec_strd = ps_proc->i4_rec_strd; + WORD32 i4_pred_strd = ps_proc->i4_pred_strd; + WORD32 i4_res_strd = ps_proc->i4_res_strd; + + /* intra mode */ + UWORD8 u1_intra_mode = ps_proc->u1_c_i8_mode; + + /* coded block pattern */ + UWORD8 u1_cbp_c = 0; + + /* number of non zero coeffs*/ + UWORD8 au1_nnz[18] = {0}; + + /* quantization parameters */ + quant_params_t *ps_qp_params = ps_proc->ps_qp_params[1]; + + /* Control signal for inverse transform */ + UWORD32 u4_cntrl; + + /* pointer to packed mb coeff data */ + void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data); + + /* See if we need to swap U and V plances for entropy */ + UWORD32 u4_swap_uv = ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU; + + if (PLANE_CH_I8x8 == u1_intra_mode) + { + pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma_plane; + } + else + { + pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma; + } + + /********************************************************/ + /* error estimation, */ + /* transform */ + /* quantization */ + /********************************************************/ + ih264e_chroma_8x8_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb, + pu1_pred_mb, pi2_res_mb, + i4_src_strd, i4_pred_strd, + i4_res_strd, + ps_qp_params->pu2_scale_mat, + ps_qp_params->pu2_thres_mat, + ps_qp_params->u1_qbits, + ps_qp_params->u4_dead_zone, + au1_nnz); + + /********************************************************/ + /* pack coeff data for entropy coding */ + /********************************************************/ + ih264e_pack_c_mb(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_c, + au1_nnz, ps_codec->u4_thres_resi, &u4_cntrl, u4_swap_uv); + + /********************************************************/ + /* ierror estimation, */ + /* itransform */ + /* iquantization */ + /********************************************************/ + ih264e_chroma_8x8_idctrans_iquant_itrans_recon(ps_codec, pi2_res_mb, + pu1_pred_mb, pu1_ref_mb, + i4_res_strd, i4_pred_strd, + i4_rec_strd, + ps_qp_params->pu2_iscale_mat, + ps_qp_params->pu2_weigh_mat, + ps_qp_params->u1_qp_div, + u4_cntrl, + ps_proc->pv_scratch_buff); + return (u1_cbp_c); +} + + +/** +******************************************************************************* +* +* @brief performs luma core coding when mode is inter +* +* @par Description: +* If the current mb is to be coded as inter the mb is predicted based on the +* sub mb partitions and corresponding motion vectors generated by ME. Then, +* error is computed between the input blk and the estimated blk. This error is +* transformed, quantized. The quantized coefficients are packed in scan order +* for entropy coding +* +* @param[in] ps_proc_ctxt +* pointer to the current macro block context +* +* @returns u1_cbp_l +* coded block pattern luma +* +* @remarks none +* +******************************************************************************* +*/ + +UWORD8 ih264e_code_luma_inter_macroblock_16x16(process_ctxt_t *ps_proc) +{ + /* Codec Context */ + codec_t *ps_codec = ps_proc->ps_codec; + + /* pointer to ref macro block */ + UWORD8 *pu1_rec_mb = ps_proc->pu1_rec_buf_luma; + + /* pointer to src macro block */ + UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma; + + /* pointer to prediction macro block */ + UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb; + + /* pointer to residual macro block */ + WORD16 *pi2_res_mb = ps_proc->pi2_res_buf; + + /* strides */ + WORD32 i4_src_strd = ps_proc->i4_src_strd; + WORD32 i4_rec_strd = ps_proc->i4_rec_strd; + WORD32 i4_pred_strd = ps_proc->i4_pred_strd; + WORD32 i4_res_strd = ps_proc->i4_res_strd; + + /* coded block pattern */ + UWORD8 u1_cbp_l = 0; + + /*Control signal of itrans*/ + UWORD32 u4_cntrl; + + /* number of non zero coeffs*/ + UWORD8 *pu1_nnz = (UWORD8 *)ps_proc->au4_nnz; + + /* quantization parameters */ + quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0]; + + /* pointer to packed mb coeff data */ + void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data); + + /* pseudo pred buffer */ + UWORD8 *pu1_pseudo_pred = pu1_pred_mb; + + /* pseudo pred buffer stride */ + WORD32 i4_pseudo_pred_strd = i4_pred_strd; + + /* init nnz */ + ps_proc->au4_nnz[0] = 0; + ps_proc->au4_nnz[1] = 0; + ps_proc->au4_nnz[2] = 0; + ps_proc->au4_nnz[3] = 0; + ps_proc->au4_nnz[4] = 0; + + /********************************************************/ + /* prediction */ + /********************************************************/ + ih264e_motion_comp_luma(ps_proc, &pu1_pseudo_pred, &i4_pseudo_pred_strd); + + /********************************************************/ + /* error estimation, */ + /* transform */ + /* quantization */ + /********************************************************/ + if (ps_proc->u4_min_sad_reached == 0 || ps_proc->u4_min_sad != 0) + { + ih264e_luma_16x16_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb, + pu1_pseudo_pred, pi2_res_mb, + i4_src_strd, + i4_pseudo_pred_strd, + i4_res_strd, + ps_qp_params->pu2_scale_mat, + ps_qp_params->pu2_thres_mat, + ps_qp_params->u1_qbits, + ps_qp_params->u4_dead_zone, + pu1_nnz, + DISABLE_DC_TRANSFORM); + + /********************************************************/ + /* pack coeff data for entropy coding */ + /********************************************************/ + ih264e_pack_l_mb(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_l, + pu1_nnz, ps_codec->u4_thres_resi, &u4_cntrl); + } + else + { + u1_cbp_l = 0; + u4_cntrl = 0; + } + + /********************************************************/ + /* ierror estimation, */ + /* itransform */ + /* iquantization */ + /********************************************************/ + + /*If the frame is not to be used for P frame reference or dumping recon + * we only will use the reocn for only predicting intra Mbs + * THis will need only right and bottom edge 4x4 blocks recon + * Hence we selectively enable them using control signal(including DC) + */ + if (ps_proc->u4_compute_recon != 1) + { + u4_cntrl &= 0x111F0000; + } + + if (u4_cntrl) + { + ih264e_luma_16x16_idctrans_iquant_itrans_recon( + ps_codec, pi2_res_mb, pu1_pseudo_pred, pu1_rec_mb, + i4_res_strd, i4_pseudo_pred_strd, i4_rec_strd, + ps_qp_params->pu2_iscale_mat, + ps_qp_params->pu2_weigh_mat, ps_qp_params->u1_qp_div, + u4_cntrl /*Cntrl*/, DISABLE_DC_TRANSFORM, + ps_proc->pv_scratch_buff); + } + else + { + ps_codec->pf_inter_pred_luma_copy(pu1_pseudo_pred, pu1_rec_mb, + i4_pseudo_pred_strd, i4_rec_strd, + MB_SIZE, MB_SIZE, NULL, 0); + } + + + return (u1_cbp_l); +} + +/** +******************************************************************************* +* +* @brief performs chroma core coding for inter macro blocks +* +* @par Description: +* If the current mb is to be coded as inter predicted mb,based on the sub mb partitions +* and corresponding motion vectors generated by ME ,prediction is done. +* Then, error is computed between the input blk and the estimated blk. +* This error is transformed , quantized. The quantized coefficients +* are packed in scan order for +* entropy coding. +* +* @param[in] ps_proc_ctxt +* pointer to the current macro block context +* +* @returns u1_cbp_l +* coded block pattern chroma +* +* @remarks none +* +******************************************************************************* +*/ +UWORD8 ih264e_code_chroma_inter_macroblock_8x8(process_ctxt_t *ps_proc) +{ + /* Codec Context */ + codec_t *ps_codec = ps_proc->ps_codec; + + /* pointer to ref macro block */ + UWORD8 *pu1_rec_mb = ps_proc->pu1_rec_buf_chroma; + + /* pointer to src macro block */ + UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_chroma; + + /* pointer to prediction macro block */ + UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb; + + /* pointer to residual macro block */ + WORD16 *pi2_res_mb = ps_proc->pi2_res_buf; + + /* strides */ + WORD32 i4_src_strd = ps_proc->i4_src_strd; + WORD32 i4_rec_strd = ps_proc->i4_rec_strd; + WORD32 i4_pred_strd = ps_proc->i4_pred_strd; + WORD32 i4_res_strd = ps_proc->i4_res_strd; + + /* coded block pattern */ + UWORD8 u1_cbp_c = 0; + + /*Control signal for inverse transform*/ + UWORD32 u4_cntrl; + + /* number of non zero coeffs*/ + UWORD8 au1_nnz[10] = {0}; + + /* quantization parameters */ + quant_params_t *ps_qp_params = ps_proc->ps_qp_params[1]; + + /* pointer to packed mb coeff data */ + void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data); + + /*See if we need to swap U and V plances for entropy*/ + UWORD32 u4_swap_uv = ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU; + + /********************************************************/ + /* prediction */ + /********************************************************/ + ih264e_motion_comp_chroma(ps_proc); + + /********************************************************/ + /* error estimation, */ + /* transform */ + /* quantization */ + /********************************************************/ + ih264e_chroma_8x8_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb, + pu1_pred_mb, pi2_res_mb, + i4_src_strd, i4_pred_strd, + i4_res_strd, + ps_qp_params->pu2_scale_mat, + ps_qp_params->pu2_thres_mat, + ps_qp_params->u1_qbits, + ps_qp_params->u4_dead_zone, + au1_nnz); + + /********************************************************/ + /* pack coeff data for entropy coding */ + /********************************************************/ + ih264e_pack_c_mb(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_c, + au1_nnz, ps_codec->u4_thres_resi, &u4_cntrl, u4_swap_uv); + + /********************************************************/ + /* ierror estimation, */ + /* itransform */ + /* iquantization */ + /********************************************************/ + + /* If the frame is not to be used for P frame reference or dumping recon + * we only will use the reocn for only predicting intra Mbs + * THis will need only right and bottom edge 4x4 blocks recon + * Hence we selectively enable them using control signal(including DC) + */ + if (!ps_proc->u4_compute_recon) + { + u4_cntrl &= 0x7700C000; + } + + if (u4_cntrl) + { + ih264e_chroma_8x8_idctrans_iquant_itrans_recon( + ps_codec, pi2_res_mb, pu1_pred_mb, pu1_rec_mb, + i4_res_strd, i4_pred_strd, i4_rec_strd, + ps_qp_params->pu2_iscale_mat, + ps_qp_params->pu2_weigh_mat, ps_qp_params->u1_qp_div, + u4_cntrl, ps_proc->pv_scratch_buff); + } + else + { + ps_codec->pf_inter_pred_luma_copy(pu1_pred_mb, pu1_rec_mb, i4_pred_strd, + i4_rec_strd, MB_SIZE >> 1, MB_SIZE, + NULL, 0); + } + + return (u1_cbp_c); +} diff --git a/encoder/ih264e_core_coding.h b/encoder/ih264e_core_coding.h new file mode 100755 index 0000000..1237d25 --- /dev/null +++ b/encoder/ih264e_core_coding.h @@ -0,0 +1,653 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +****************************************************************************** +* @file +* ih264e_core_coding.h +* +* @brief +* This file contains extern declarations of core coding routines +* +* @author +* ittiam +* +* @remarks +* none +****************************************************************************** +*/ + +#ifndef IH264E_CORE_CODING_H_ +#define IH264E_CORE_CODING_H_ + +/*****************************************************************************/ +/* Constant Macros */ +/*****************************************************************************/ + +/** +****************************************************************************** + * @brief Enable/Disable Hadamard transform of DC Coeff's +****************************************************************************** + */ +#define DISABLE_DC_TRANSFORM 0 +#define ENABLE_DC_TRANSFORM 1 + +/** +******************************************************************************* + * @brief bit masks for DC and AC control flags +******************************************************************************* + */ + +#define DC_COEFF_CNT_LUMA_MB 16 +#define NUM_4X4_BLKS_LUMA_MB_ROW 4 +#define NUM_LUMA4x4_BLOCKS_IN_MB 16 +#define NUM_CHROMA4x4_BLOCKS_IN_MB 8 + +#define SIZE_4X4_BLK_HRZ TRANS_SIZE_4 +#define SIZE_4X4_BLK_VERT TRANS_SIZE_4 + +#define CNTRL_FLAG_DC_MASK_LUMA 0x0000FFFF +#define CNTRL_FLAG_AC_MASK_LUMA 0xFFFF0000 + +#define CNTRL_FLAG_AC_MASK_CHROMA_U 0xF0000000 +#define CNTRL_FLAG_DC_MASK_CHROMA_U 0x0000F000 + +#define CNTRL_FLAG_AC_MASK_CHROMA_V 0x0F000000 +#define CNTRL_FLAG_DC_MASK_CHROMA_V 0x00000F00 + +#define CNTRL_FLAG_AC_MASK_CHROMA ( CNTRL_FLAG_AC_MASK_CHROMA_U | CNTRL_FLAG_AC_MASK_CHROMA_V ) +#define CNTRL_FLAG_DC_MASK_CHROMA ( CNTRL_FLAG_DC_MASK_CHROMA_U | CNTRL_FLAG_DC_MASK_CHROMA_V ) + +#define CNTRL_FLAG_DCBLK_MASK_CHROMA 0x0000C000 + +/** +******************************************************************************* + * @brief macros for transforms +******************************************************************************* + */ +#define DEQUEUE_BLKID_FROM_CONTROL( u4_cntrl, blk_lin_id) \ +{ \ + blk_lin_id = CLZ(u4_cntrl); \ + u4_cntrl &= (0x7FFFFFFF >> blk_lin_id); \ +}; + +#define IND2SUB_LUMA_MB(u4_blk_id,i4_offset_x,i4_offset_y) \ +{ \ + i4_offset_x = (u4_blk_id % 4) << 2; \ + i4_offset_y = (u4_blk_id / 4) << 2; \ +} + +#define IND2SUB_CHROMA_MB(u4_blk_id,i4_offset_x,i4_offset_y) \ +{ \ + i4_offset_x = ((u4_blk_id & 0x1 ) << 3) + (u4_blk_id > 3); \ + i4_offset_y = (u4_blk_id & 0x2) << 1; \ +} + + +/*****************************************************************************/ +/* Function Declarations */ +/*****************************************************************************/ + +/** +******************************************************************************* +* +* @brief +* This function performs does the DCT transform then Hadamard transform +* and quantization for a macroblock when the mb mode is intra 16x16 mode +* +* @par Description: +* First cf4 is done on all 16 4x4 blocks of the 16x16 input block. +* Then hadamard transform is done on the DC coefficients +* Quantization is then performed on the 16x16 block, 4x4 wise +* +* @param[in] pu1_src +* Pointer to source sub-block +* +* @param[in] pu1_pred +* Pointer to prediction sub-block +* +* @param[in] pi2_out +* Pointer to residual sub-block +* The output will be in linear format +* The first 16 continuous locations will contain the values of Dc block +* After DC block and a stride 1st AC block will follow +* After one more stride next AC block will follow +* The blocks will be in raster scan order +* +* @param[in] src_strd +* Source stride +* +* @param[in] pred_strd +* Prediction stride +* +* @param[in] dst_strd +* Destination stride +* +* @param[in] pu2_scale_matrix +* The quantization matrix for 4x4 transform +* +* @param[in] pu2_threshold_matrix +* Threshold matrix +* +* @param[in] u4_qbits +* 15+QP/6 +* +* @param[in] u4_round_factor +* Round factor for quant +* +* @param[out] pu1_nnz +* Memory to store the non-zeros after transform +* The first byte will be the nnz of DC block +* From the next byte the AC nnzs will be stored in raster scan order +* +* @param u4_dc_flag +* Signals if Dc transform is to be done or not +* 1 -> Dc transform will be done +* 0 -> Dc transform will not be done +* +* @remarks +* +******************************************************************************* +*/ +void ih264e_luma_16x16_resi_trans_dctrans_quant( + codec_t *ps_codec, UWORD8 *pu1_src, UWORD8 *pu1_pred, + WORD16 *pi2_out, WORD32 src_strd, WORD32 pred_strd, + WORD32 dst_strd, const UWORD16 *pu2_scale_matrix, + const UWORD16 *pu2_threshold_matrix, UWORD32 u4_qbits, + UWORD32 u4_round_factor, UWORD8 *pu1_nnz, UWORD32 u4_dc_flag); + +/** +******************************************************************************* +* +* @brief +* This function performs the intra 16x16 inverse transform process for H264 +* it includes inverse Dc transform, inverse quant and then inverse transform +* +* @par Description: +* +* @param[in] pi2_src +* Input data, 16x16 size +* First 16 mem locations will have the Dc coffs in rater scan order in linear fashion +* after a stride 1st AC clock will be present again in raster can order +* Then each AC block of the 16x16 block will follow in raster scan order +* +* @param[in] pu1_pred +* The predicted data, 16x16 size +* Block by block form +* +* @param[in] pu1_out +* Output 16x16 +* In block by block form +* +* @param[in] src_strd +* Source stride +* +* @param[in] pred_strd +* input stride for prediction buffer +* +* @param[in] out_strd +* input stride for output buffer +* +* @param[in] pu2_iscale_mat +* Inverse quantization matrix for 4x4 transform +* +* @param[in] pu2_weigh_mat +* weight matrix of 4x4 transform +* +* @param[in] qp_div +* QP/6 +* +* @param[in] pi4_tmp +* Input temporary buffer +* needs to be at least 20 in size +* +* @param[in] pu4_cntrl +* Controls the transform path +* total Last 17 bits are used +* the 16th th bit will correspond to DC block +* and 32-17 will correspond to the ac blocks in raster scan order +* bit equaling zero indicates that the entire 4x4 block is zero for DC +* For AC blocks a bit equaling zero will mean that all 15 AC coffs of the block is nonzero +* +* @param[in] pi4_tmp +* Input temporary buffer +* needs to be at least COFF_CNT_SUB_BLK_4x4+COFF_CNT_SUB_BLK_4x4 size +* +* @returns +* none +* +* @remarks +* The all zero case must be taken care outside +* +******************************************************************************* +*/ +void ih264e_luma_16x16_idctrans_iquant_itrans_recon( + codec_t *ps_codec, WORD16 *pi2_src, UWORD8 *pu1_pred, + UWORD8 *pu1_out, WORD32 src_strd, WORD32 pred_strd, + WORD32 out_strd, const UWORD16 *pu2_iscale_mat, + const UWORD16 *pu2_weigh_mat, UWORD32 qp_div, UWORD32 u4_cntrl, + UWORD32 u4_dc_trans_flag, WORD32 *pi4_tmp); + +/** +******************************************************************************* +* +* @brief +* This function performs does the DCT transform then Hadamard transform +* and quantization for a chroma macroblock +* +* @par Description: +* First cf4 is done on all 16 4x4 blocks of the 8x8input block +* Then hadamard transform is done on the DC coefficients +* Quantization is then performed on the 8x8 block, 4x4 wise +* +* @param[in] pu1_src +* Pointer to source sub-block +* The input is in interleaved format for two chroma planes +* +* @param[in] pu1_pred +* Pointer to prediction sub-block +* Prediction is in inter leaved format +* +* @param[in] pi2_out +* Pointer to residual sub-block +* The output will be in linear format +* The first 4 continuous locations will contain the values of DC block for U +* and then next 4 will contain for V. +* After DC block and a stride 1st AC block of U plane will follow +* After one more stride next AC block of V plane will follow +* The blocks will be in raster scan order +* +* After all the AC blocks of U plane AC blocks of V plane will follow in exact +* same way +* +* @param[in] src_strd +* Source stride +* +* @param[in] pred_strd +* Prediction stride +* +* @param[in] dst_strd +* Destination stride +* +* @param[in] pu2_scale_matrix +* The quantization matrix for 4x4 transform +* +* @param[in] pu2_threshold_matrix +* Threshold matrix +* +* @param[in] u4_qbits +* 15+QP/6 +* +* @param[in] u4_round_factor +* Round factor for quant +* +* @param[out] pu1_nnz +* Memory to store the non-zeros after transform +* The first byte will be the nnz od DC block for U plane +* From the next byte the AC nnzs will be storerd in raster scan order +* The fifth byte will be nnz of Dc block of V plane +* Then Ac blocks will follow +* +* @param u4_dc_flag +* Signals if Dc transform is to be done or not +* 1 -> Dc transform will be done +* 0 -> Dc transform will not be done +* +* @remarks +* +******************************************************************************* +*/ +void ih264e_chroma_8x8_resi_trans_dctrans_quant( + codec_t *ps_codec, UWORD8 *pu1_src, UWORD8 *pu1_pred, + WORD16 *pi2_out, WORD32 src_strd, WORD32 pred_strd, + WORD32 out_strd, const UWORD16 *pu2_scale_matrix, + const UWORD16 *pu2_threshold_matrix, UWORD32 u4_qbits, + UWORD32 u4_round_factor, UWORD8 *pu1_nnz_c); + +/** +******************************************************************************* +* @brief +* This function performs the inverse transform with process for chroma MB of H264 +* +* @par Description: +* Does inverse DC transform ,inverse quantization inverse transform +* +* @param[in] pi2_src +* Input data, 16x16 size +* The input is in the form of, first 4 locations will contain DC coeffs of +* U plane, next 4 will contain DC coeffs of V plane, then AC blocks of U plane +* in raster scan order will follow, each block as linear array in raster scan order. +* After a stride next AC block will follow. After all AC blocks of U plane +* V plane AC blocks will follow in exact same order. +* +* @param[in] pu1_pred +* The predicted data, 8x16 size, U and V interleaved +* +* @param[in] pu1_out +* Output 8x16, U and V interleaved +* +* @param[in] src_strd +* Source stride +* +* @param[in] pred_strd +* input stride for prediction buffer +* +* @param[in] out_strd +* input stride for output buffer +* +* @param[in] pu2_iscale_mat +* Inverse quantization martix for 4x4 transform +* +* @param[in] pu2_weigh_mat +* weight matrix of 4x4 transform +* +* @param[in] qp_div +* QP/6 +* +* @param[in] pi4_tmp +* Input temporary buffer +* needs to be at least COFF_CNT_SUB_BLK_4x4 + Number of Dc cofss for chroma * number of planes +* in size +* +* @param[in] pu4_cntrl +* Controls the transform path +* the 15 th bit will correspond to DC block of U plane , 14th will indicate the V plane Dc block +* 32-28 bits will indicate AC blocks of U plane in raster scan order +* 27-23 bits will indicate AC blocks of V plane in rater scan order +* The bit 1 implies that there is at least one non zero coff in a block +* +* @returns +* none +* +* @remarks +******************************************************************************* +*/ +void ih264e_chroma_8x8_idctrans_iquant_itrans_recon( + codec_t *ps_codec, WORD16 *pi2_src, UWORD8 *pu1_pred, + UWORD8 *pu1_out, WORD32 src_strd, WORD32 pred_strd, + WORD32 out_strd, const UWORD16 *pu2_iscale_mat, + const UWORD16 *pu2_weigh_mat, UWORD32 qp_div, UWORD32 u4_cntrl, + WORD32 *pi4_tmp); + +/** +****************************************************************************** +* +* @brief This function packs residue of an i16x16 luma mb for entropy coding +* +* @par Description +* An i16 macro block contains two classes of units, dc 4x4 block and +* 4x4 ac blocks. while packing the mb, the dc block is sent first, and +* the 16 ac blocks are sent next in scan order. Each and every block is +* represented by 3 parameters (nnz, significant coefficient map and the +* residue coefficients itself). If a 4x4 unit does not have any coefficients +* then only nnz is sent. Inside a 4x4 block the individual coefficients are +* sent in scan order. +* +* The first byte of each block will be nnz of the block, if it is non zero, +* a 2 byte significance map is sent. This is followed by nonzero coefficients. +* This is repeated for 1 dc + 16 ac blocks. +* +* @param[in] pi2_res_mb +* pointer to residue mb +* +* @param[in, out] pv_mb_coeff_data +* buffer pointing to packed residue coefficients +* +* @param[in] u4_res_strd +* residual block stride +* +* @param[out] u1_cbp_l +* coded block pattern luma +* +* @param[in] pu1_nnz +* number of non zero coefficients in each 4x4 unit +* +* @param[out] +* Control signal for inverse transform of 16x16 blocks +* +* @return none +* +* @ remarks +* +****************************************************************************** +*/ +void ih264e_pack_l_mb_i16(WORD16 *pi2_res_mb, void **pv_mb_coeff_data, + WORD32 i4_res_strd, UWORD8 *u1_cbp_l, UWORD8 *pu1_nnz, + UWORD32 *pu4_cntrl); + +/** +****************************************************************************** +* +* @brief This function packs residue of an i8x8 chroma mb for entropy coding +* +* @par Description +* An i8 chroma macro block contains two classes of units, dc 2x2 block and +* 4x4 ac blocks. while packing the mb, the dc block is sent first, and +* the 4 ac blocks are sent next in scan order. Each and every block is +* represented by 3 parameters (nnz, significant coefficient map and the +* residue coefficients itself). If a 4x4 unit does not have any coefficients +* then only nnz is sent. Inside a 4x4 block the individual coefficients are +* sent in scan order. +* +* The first byte of each block will be nnz of the block, if it is non zero, +* a 2 byte significance map is sent. This is followed by nonzero coefficients. +* This is repeated for 1 dc + 4 ac blocks. +* +* @param[in] pi2_res_mb +* pointer to residue mb +* +* @param[in, out] pv_mb_coeff_data +* buffer pointing to packed residue coefficients +* +* @param[in] u4_res_strd +* residual block stride +* +* @param[out] u1_cbp_c +* coded block pattern chroma +* +* @param[in] pu1_nnz +* number of non zero coefficients in each 4x4 unit +* +* @param[out] pu1_nnz +* Control signal for inverse transform +* +* @param[in] u4_swap_uv +* Swaps the order of U and V planes in entropy bitstream +* +* @return none +* +* @ remarks +* +****************************************************************************** +*/ +void ih264e_pack_c_mb(WORD16 *pi2_res_mb, void **pv_mb_coeff_data, + WORD32 i4_res_strd, UWORD8 *u1_cbp_c, UWORD8 *pu1_nnz, + UWORD32 u4_kill_coffs_flag, UWORD32 *pu4_cntrl, + UWORD32 u4_swap_uv); + +/** +******************************************************************************* +* +* @brief performs luma core coding when intra mode is i16x16 +* +* @par Description: +* If the current mb is to be coded as intra of mb type i16x16, the mb is first +* predicted using one of i16x16 prediction filters, basing on the intra mode +* chosen. Then, error is computed between the input blk and the estimated blk. +* This error is transformed (hierarchical transform i.e., dct followed by hada- +* -mard), quantized. The quantized coefficients are packed in scan order for +* entropy coding. +* +* @param[in] ps_proc_ctxt +* pointer to the current macro block context +* +* @returns u1_cbp_l +* coded block pattern luma +* +* @remarks none +* +******************************************************************************* +*/ +UWORD8 ih264e_code_luma_intra_macroblock_16x16 + ( + process_ctxt_t *ps_proc + ); + +/** +******************************************************************************* +* +* @brief performs luma core coding when intra mode is i4x4 +* +* @par Description: +* If the current mb is to be coded as intra of mb type i4x4, the mb is first +* predicted using one of i4x4 prediction filters, basing on the intra mode +* chosen. Then, error is computed between the input blk and the estimated blk. +* This error is dct transformed and quantized. The quantized coefficients are +* packed in scan order for entropy coding. +* +* @param[in] ps_proc_ctxt +* pointer to the current macro block context +* +* @returns u1_cbp_l +* coded block pattern luma +* +* @remarks +* The traversal of 4x4 subblocks in the 16x16 macroblock is as per the scan order +* mentioned in h.264 specification +* +******************************************************************************* +*/ +UWORD8 ih264e_code_luma_intra_macroblock_4x4 + ( + process_ctxt_t *ps_proc + ); + +/** +******************************************************************************* +* +* @brief performs luma core coding when intra mode is i4x4 +* +* @par Description: +* If the current mb is to be coded as intra of mb type i4x4, the mb is first +* predicted using one of i4x4 prediction filters, basing on the intra mode +* chosen. Then, error is computed between the input blk and the estimated blk. +* This error is dct transformed and quantized. The quantized coefficients are +* packed in scan order for entropy coding. +* +* @param[in] ps_proc_ctxt +* pointer to the current macro block context +* +* @returns u1_cbp_l +* coded block pattern luma +* +* @remarks +* The traversal of 4x4 subblocks in the 16x16 macroblock is as per the scan order +* mentioned in h.264 specification +* +******************************************************************************* +*/ +UWORD8 ih264e_code_luma_intra_macroblock_4x4_rdopt_on + ( + process_ctxt_t *ps_proc + ); + +/** +******************************************************************************* +* +* @brief performs chroma core coding for intra macro blocks +* +* @par Description: +* If the current MB is to be intra coded with mb type chroma I8x8, the MB is +* first predicted using intra 8x8 prediction filters. The predicted data is +* compared with the input for error and the error is transformed. The DC +* coefficients of each transformed sub blocks are further transformed using +* Hadamard transform. The resulting coefficients are quantized, packed and sent +* for entropy coding. +* +* @param[in] ps_proc_ctxt +* pointer to the current macro block context +* +* @returns u1_cbp_c +* coded block pattern chroma +* +* @remarks +* The traversal of 4x4 subblocks in the 8x8 macroblock is as per the scan order +* mentioned in h.264 specification +* +******************************************************************************* +*/ +UWORD8 ih264e_code_chroma_intra_macroblock_8x8 + ( + process_ctxt_t *ps_proc + ); + +/** +******************************************************************************* +* @brief performs luma core coding when mode is inter +* +* @par Description: +* If the current mb is to be coded as inter predicted mb,based on the sub mb +* partitions and corresponding motion vectors generated by ME, prediction is done. +* Then, error is computed between the input blk and the estimated blk. +* This error is transformed ( dct and with out hadamard), quantized. The +* quantized coefficients are packed in scan order for entropy coding. +* +* @param[in] ps_proc_ctxt +* pointer to the current macro block context +* +* @returns u1_cbp_l +* coded block pattern luma +* +* @remarks none +* +******************************************************************************* +*/ +UWORD8 ih264e_code_luma_inter_macroblock_16x16 + ( + process_ctxt_t *ps_proc + ); + +/** +******************************************************************************* +* @brief performs chroma core coding for inter macro blocks +* +* @par Description: +* If the current mb is to be coded as inter predicted mb, based on the sub mb +* partitions and corresponding motion vectors generated by ME, prediction is done. +* Then, error is computed between the input blk and the estimated blk. +* This error is transformed, quantized. The quantized coefficients +* are packed in scan order for entropy coding. +* +* @param[in] ps_proc_ctxt +* pointer to the current macro block context +* +* @returns u1_cbp_l +* coded block pattern luma +* +* @remarks none +* +******************************************************************************* +*/ +UWORD8 ih264e_code_chroma_inter_macroblock_8x8 + ( + process_ctxt_t *ps_proc + ); + +#endif /* IH264E_CORE_CODING_H_ */ diff --git a/encoder/ih264e_deblk.c b/encoder/ih264e_deblk.c new file mode 100755 index 0000000..8a11bdb --- /dev/null +++ b/encoder/ih264e_deblk.c @@ -0,0 +1,854 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** + ******************************************************************************* + * @file + * ih264e_deblk.c + * + * @brief + * This file contains functions that are associated with deblocking + * + * @author + * ittiam + * + * @par List of Functions: + * - ih264e_fill_bs_1mv_1ref_non_mbaff + * - ih264e_calculate_csbp + * - ih264e_compute_bs + * - ih264e_filter_top_edge + * - ih264e_filter_left_edge + * - ih264e_deblock_mb + * + * @remarks + * None + * + ******************************************************************************* + */ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include <stdio.h> +#include <string.h> +#include <assert.h> + +/* User include files */ +#include "ih264e_config.h" +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" +#include "ih264_macros.h" +#include "ih264_defs.h" +#include "ih264e_defs.h" +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "ime_distortion_metrics.h" +#include "ime_structs.h" +#include "ih264_structs.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include "ih264_inter_pred_filters.h" +#include "ih264_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "ih264e_rate_control.h" +#include "ih264e_structs.h" +#include "ih264_trans_data.h" +#include "ih264_deblk_edge_filters.h" +#include "ih264_deblk_tables.h" +#include "ih264e_deblk.h" + + +/*****************************************************************************/ +/* Extern global definitions */ +/*****************************************************************************/ + +/** +****************************************************************************** +* @brief BS Table Lookup +* input : +* output : +* @remarks none +****************************************************************************** +*/ +static const UWORD32 gu4_bs_table[][16] = +{ + { + 0x00000000, 0x02000000, 0x00020000, 0x02020000, + 0x00000200, 0x02000200, 0x00020200, 0x02020200, + 0x00000002, 0x02000002, 0x00020002, 0x02020002, + 0x00000202, 0x02000202, 0x00020202, 0x02020202 + }, + { + 0x01010101, 0x02010101, 0x01020101, 0x02020101, + 0x01010201, 0x02010201, 0x01020201, 0x02020201, + 0x01010102, 0x02010102, 0x01020102, 0x02020102, + 0x01010202, 0x02010202, 0x01020202, 0x02020202 + } +}; + +/** +****************************************************************************** +* @brief Transpose Matrix used in BS +* input : +* output : +* @remarks none +****************************************************************************** +*/ +static const UWORD16 ih264e_gu2_4x4_v2h_reorder[16] = +{ + 0x0000, 0x0001, 0x0010, 0x0011, + 0x0100, 0x0101, 0x0110, 0x0111, + 0x1000, 0x1001, 0x1010, 0x1011, + 0x1100, 0x1101, 0x1110, 0x1111 +}; + + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + +/** +******************************************************************************* +* +* @brief Fill BS value for all the edges of an mb +* +* @par Description: +* Fill BS value for all the edges of an mb +* +* @param[in] pu4_horz_bs +* Base pointer of horizontal BS table +* +* @param[in] pu4_vert_bs +* Base pointer of vertical BS table +* +* @param[in] u4_left_mb_csbp +* coded sub block pattern of left mb +* +* @param[in] u4_left_mb_csbp +* coded sub block pattern of top mb +* +* @param[in] ps_leftMvPred +* MV of left mb +* +* @param[in] ps_topMvPred +* MV of top mb +* +* @param[in] ps_curMvPred +* MV of curr mb +* +* @param[in] u1_left_intra +* is left intra +* +* @param[in] u1_top_intra +* is top intra +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +static void ih264e_fill_bs_1mv_1ref_non_mbaff(UWORD32 *pu4_horz_bs, + UWORD32 *pu4_vert_bs, + UWORD32 u4_left_mb_csbp, + UWORD32 u4_top_mb_csbp, + UWORD32 u4_cur_mb_csbp, + mv_t *ps_leftMvPred, + mv_t *ps_topMvPred, + mv_t *ps_curMvPred, + UWORD8 u1_left_intra, + UWORD8 u1_top_intra) +{ + /* motion vectors of blks p & q */ + WORD16 i16_qMv0, i16_qMv1, i16_pMv0, i16_pMv1; + + /* temp var */ + UWORD32 u4_lft_flag, u4_top_flag; + const UWORD32 *bs_map; + UWORD32 u4_reordered_vert_bs_enc, u4_temp; + + /* Coded Pattern for Horizontal Edge */ + /*-----------------------------------------------------------------------*/ + /*u4_nbr_horz_csbp=11C|10C|9C|8C|7C|6C|5C|4C|3C|2C|1C|0C|15T|14T|13T|12T */ + /*-----------------------------------------------------------------------*/ + UWORD32 u4_nbr_horz_csbp = (u4_cur_mb_csbp << 4) | (u4_top_mb_csbp >> 12); + UWORD32 u4_horz_bs_enc = u4_cur_mb_csbp | u4_nbr_horz_csbp; + + /* Coded Pattern for Vertical Edge */ + /*-----------------------------------------------------------------------*/ + /*u4_left_mb_masked_csbp = 15L|0|0|0|11L|0|0|0|7L|0|0|0|3L|0|0|0 */ + /*-----------------------------------------------------------------------*/ + UWORD32 u4_left_mb_masked_csbp = u4_left_mb_csbp & CSBP_RIGHT_BLOCK_MASK; + + /*-----------------------------------------------------------------------*/ + /*u4_cur_mb_masked_csbp =14C|13C|12C|x|10C|9C|8C|x|6C|5C|4C|x|2C|1C|0C|x */ + /*-----------------------------------------------------------------------*/ + UWORD32 u4_cur_mb_masked_csbp =(u4_cur_mb_csbp<<1)&(~CSBP_LEFT_BLOCK_MASK); + + /*-----------------------------------------------------------------------*/ + /*u4_nbr_vert_csbp=14C|13C|12C|15L|10C|9C|8C|11L|6C|5C|4C|7L|2C|1C|0C|3L */ + /*-----------------------------------------------------------------------*/ + UWORD32 u4_nbr_vert_csbp = (u4_cur_mb_masked_csbp) | (u4_left_mb_masked_csbp >> 3); + UWORD32 u4_vert_bs_enc = u4_cur_mb_csbp | u4_nbr_vert_csbp; + + /* BS Calculation for MB Boundary Edges */ + + /* BS calculation for 1 2 3 horizontal boundary */ + bs_map = gu4_bs_table[0]; + pu4_horz_bs[1] = bs_map[(u4_horz_bs_enc >> 4) & 0xF]; + pu4_horz_bs[2] = bs_map[(u4_horz_bs_enc >> 8) & 0xF]; + pu4_horz_bs[3] = bs_map[(u4_horz_bs_enc >> 12) & 0xF]; + + /* BS calculation for 5 6 7 vertical boundary */ + /* Do 4x4 tranpose of u4_vert_bs_enc by using look up table for reorder */ + u4_reordered_vert_bs_enc = ih264e_gu2_4x4_v2h_reorder[u4_vert_bs_enc & 0xF]; + + u4_temp = ih264e_gu2_4x4_v2h_reorder[(u4_vert_bs_enc >> 4) & 0xF]; + u4_reordered_vert_bs_enc |= (u4_temp << 1); + + u4_temp = ih264e_gu2_4x4_v2h_reorder[(u4_vert_bs_enc >> 8) & 0xF]; + u4_reordered_vert_bs_enc |= (u4_temp << 2); + + u4_temp = ih264e_gu2_4x4_v2h_reorder[(u4_vert_bs_enc >> 12) & 0xF]; + u4_reordered_vert_bs_enc |= (u4_temp << 3); + + pu4_vert_bs[1] = bs_map[(u4_reordered_vert_bs_enc >> 4) & 0xF]; + pu4_vert_bs[2] = bs_map[(u4_reordered_vert_bs_enc >> 8) & 0xF]; + pu4_vert_bs[3] = bs_map[(u4_reordered_vert_bs_enc >> 12) & 0xF]; + + + /* BS Calculation for MB Boundary Edges */ + i16_qMv0 = ps_curMvPred->i2_mvx; + i16_qMv1 = ps_curMvPred->i2_mvy; + + if (u1_top_intra) + { + pu4_horz_bs[0] = 0x04040404; + } + else + { + i16_pMv0 = ps_topMvPred->i2_mvx; + i16_pMv1 = ps_topMvPred->i2_mvy; + + u4_top_flag = (ABS((i16_pMv0 - i16_qMv0)) >= 4 ) | + (ABS((i16_pMv1 - i16_qMv1)) >= 4); + + bs_map = gu4_bs_table[!!u4_top_flag]; + pu4_horz_bs[0] = bs_map[u4_horz_bs_enc & 0xF]; + } + + if (u1_left_intra) + { + pu4_vert_bs[0] = 0x04040404; + } + else + { + i16_pMv0 = ps_leftMvPred->i2_mvx; + i16_pMv1 = ps_leftMvPred->i2_mvy; + + + u4_lft_flag = (ABS((i16_pMv0 - i16_qMv0)) >= 4 ) | + (ABS((i16_pMv1 - i16_qMv1)) >= 4); + + bs_map = gu4_bs_table[!!u4_lft_flag]; + pu4_vert_bs[0] = bs_map[u4_reordered_vert_bs_enc & 0xF]; + } +} + +/** +******************************************************************************* +* +* @brief calculate coded subblock pattern from nnz +* +* @par Description: +* calculate coded subblock pattern from nnz +* +* @param[in] ps_proc +* process context +* +* @returns csbp +* +* @remarks none +* +******************************************************************************* +*/ +static UWORD32 ih264e_calculate_csbp(process_ctxt_t *ps_proc) +{ + /* number of non zeros for each tx blk */ + UWORD8 *pu1_curr_nnz = (UWORD8 *)ps_proc->au4_nnz; + + /* csbp */ + UWORD32 u4_csbp = 0; + + /* temp var */ + WORD32 i4_i; + + pu1_curr_nnz += 1; + + /* Creating Subblock pattern for current MB */ + /* 15C|14C|13C|12C|11C|10C|9C|8C|7C|6C|5C|4C|3C|2C|1C|0C */ + for (i4_i = 0; i4_i < 16; i4_i++ ) + { + u4_csbp |= ((!!*(pu1_curr_nnz + i4_i))<< i4_i); + } + + return u4_csbp; +} + +/** +******************************************************************************* +* +* @brief This function computes blocking strength for an mb +* +* @par Description: +* This function computes blocking strength for an mb +* +* @param[in] ps_proc +* process context +* +* @returns none +* +* @remarks In this module it is assumed that their is only single reference +* frame and is always the most recently used anchor frame +* +******************************************************************************* +*/ +void ih264e_compute_bs(process_ctxt_t * ps_proc) +{ + /* deblk bs context */ + bs_ctxt_t *ps_bs = &(ps_proc->s_deblk_ctxt.s_bs_ctxt); + + /* vertical blocking strength */ + UWORD32 *pu4_pic_vert_bs; + + /* horizontal blocking strength */ + UWORD32 *pu4_pic_horz_bs; + + /* mb indices */ + WORD32 i4_mb_x, i4_mb_y; + + /* is intra */ + WORD32 i4_intra; + + /* temp var */ + WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs; + + /* init indices */ + i4_mb_x = ps_bs->i4_mb_x; + i4_mb_y = ps_bs->i4_mb_y; + + /* init pointers */ + pu4_pic_vert_bs = ps_bs->pu4_pic_vert_bs + ((i4_mb_y * i4_wd_mbs) + i4_mb_x) * 4; + pu4_pic_horz_bs = ps_bs->pu4_pic_horz_bs + ((i4_mb_y * i4_wd_mbs) + i4_mb_x) * 4; + + /* is intra? */ + i4_intra = ps_proc->u4_is_intra; + + /* compute blocking strength */ + if (i4_intra) + { + pu4_pic_vert_bs[0] = 0x04040404; + pu4_pic_vert_bs[1] = pu4_pic_vert_bs[2] = pu4_pic_vert_bs[3] = 0x03030303; + + pu4_pic_horz_bs[0] = 0x04040404; + pu4_pic_horz_bs[1] = pu4_pic_horz_bs[2] = pu4_pic_horz_bs[3] = 0x03030303; + } + else + { + /* left mb syntax info */ + mb_info_t *ps_left_mb_syntax_ele = &ps_proc->s_left_mb_syntax_ele; + + /* top mb syntax info */ + mb_info_t *ps_top_mb_syntax_ele = ps_proc->ps_top_row_mb_syntax_ele + i4_mb_x; + + /* top row motion vector info */ + enc_pu_t *ps_top_row_pu = ps_proc->ps_top_row_pu + i4_mb_x; + + /* csbp for curr mb */ + ps_proc->u4_csbp = ih264e_calculate_csbp(ps_proc); + + /* csbp for ngbrs */ + if (i4_mb_x == 0) + { + ps_left_mb_syntax_ele->u4_csbp = 0; + ps_left_mb_syntax_ele->u2_is_intra = 0; + ps_proc->s_left_mb_pu.s_l0_mv = ps_proc->ps_pu->s_l0_mv; + } + if (i4_mb_y == 0) + { + ps_top_mb_syntax_ele->u4_csbp = 0; + ps_top_mb_syntax_ele->u2_is_intra = 0; + ps_top_row_pu->s_l0_mv = ps_proc->ps_pu->s_l0_mv; + } + + ih264e_fill_bs_1mv_1ref_non_mbaff(pu4_pic_horz_bs, + pu4_pic_vert_bs, + ps_left_mb_syntax_ele->u4_csbp, + ps_top_mb_syntax_ele->u4_csbp, + ps_proc->u4_csbp, + &ps_proc->s_left_mb_pu.s_l0_mv, + &ps_top_row_pu->s_l0_mv, + &ps_proc->ps_pu->s_l0_mv, + ps_left_mb_syntax_ele->u2_is_intra, + ps_top_mb_syntax_ele->u2_is_intra); + } + + return ; +} + +/** +******************************************************************************* +* +* @brief This function performs deblocking of top horizontal edge +* +* @par Description: +* This function performs deblocking of top horizontal edge +* +* @param[in] ps_codec +* pointer to codec context +* +* @param[in] ps_proc +* pointer to proc context +* +* @param[in] pu1_mb_qp +* pointer to mb quantization param +* +* @param[in] pu1_cur_pic_luma +* pointer to recon buffer luma +* +* @param[in] pu1_cur_pic_chroma +* pointer to recon buffer chroma +* +* @param[in] pu4_pic_horz_bs +* pointer to horizontal blocking strength +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +static void ih264e_filter_top_edge(codec_t *ps_codec, + process_ctxt_t *ps_proc, + UWORD8 *pu1_mb_qp, + UWORD8 *pu1_cur_pic_luma, + UWORD8 *pu1_cur_pic_chroma, + UWORD32 *pu4_pic_horz_bs) +{ + /* strd */ + WORD32 i4_rec_strd = ps_proc->i4_rec_strd; + + /* deblk params */ + UWORD32 u4_alpha_luma, u4_beta_luma, u4_qp_luma, u4_idx_A_luma, u4_idx_B_luma, u4_qp_p, u4_qp_q; + UWORD32 u4_alpha_chroma, u4_beta_chroma, u4_qp_chroma, u4_idx_A_chroma, u4_idx_B_chroma; + + /* collect qp of left & top mb */ + u4_qp_p = pu1_mb_qp[-ps_proc->i4_wd_mbs]; + u4_qp_q = pu1_mb_qp[0]; + + /********/ + /* luma */ + /********/ + u4_qp_luma = (u4_qp_p + u4_qp_q + 1) >> 1; + + /* filter offset A and filter offset B have to be received from slice header */ + /* TODO : for now lets set these offsets as zero */ + + + u4_idx_A_luma = MIN(51, u4_qp_luma + 0); + u4_idx_B_luma = MIN(51, u4_qp_luma + 0); + + /* alpha, beta computation */ + u4_alpha_luma = gu1_ih264_alpha_table[u4_idx_A_luma]; + u4_beta_luma = gu1_ih264_beta_table[u4_idx_B_luma]; + + /**********/ + /* chroma */ + /**********/ + u4_qp_chroma = (gu1_qpc_fqpi[u4_qp_p] + gu1_qpc_fqpi[u4_qp_q] + 1) >> 1; + + /* filter offset A and filter offset B have to be received from slice header */ + /* TODO : for now lets set these offsets as zero */ + + + u4_idx_A_chroma = MIN(51, u4_qp_chroma + 0); + u4_idx_B_chroma = MIN(51, u4_qp_chroma + 0); + + /* alpha, beta computation */ + u4_alpha_chroma = gu1_ih264_alpha_table[u4_idx_A_chroma]; + u4_beta_chroma = gu1_ih264_beta_table[u4_idx_B_chroma]; + + /* deblk edge */ + /* top Horizontal edge - allowed to be deblocked ? */ + if (pu4_pic_horz_bs[0] == 0x04040404) + { + /* strong filter */ + ps_codec->pf_deblk_luma_horz_bs4(pu1_cur_pic_luma, i4_rec_strd, u4_alpha_luma, u4_beta_luma); + ps_codec->pf_deblk_chroma_horz_bs4(pu1_cur_pic_chroma, i4_rec_strd, u4_alpha_chroma, u4_beta_chroma, u4_alpha_chroma, u4_beta_chroma); + } + else + { + /* normal filter */ + ps_codec->pf_deblk_luma_horz_bslt4(pu1_cur_pic_luma, i4_rec_strd, u4_alpha_luma, + u4_beta_luma, pu4_pic_horz_bs[0], + gu1_ih264_clip_table[u4_idx_A_luma]); + + ps_codec->pf_deblk_chroma_horz_bslt4(pu1_cur_pic_chroma, i4_rec_strd, u4_alpha_chroma, + u4_beta_chroma, u4_alpha_chroma, u4_beta_chroma, pu4_pic_horz_bs[0], + gu1_ih264_clip_table[u4_idx_A_chroma], gu1_ih264_clip_table[u4_idx_A_chroma]); + } +} + +/** +******************************************************************************* +* +* @brief This function performs deblocking of left vertical edge +* +* @par Description: +* This function performs deblocking of top horizontal edge +* +* @param[in] ps_codec +* pointer to codec context +* +* @param[in] ps_proc +* pointer to proc context +* +* @param[in] pu1_mb_qp +* pointer to mb quantization param +* +* @param[in] pu1_cur_pic_luma +* pointer to recon buffer luma +* +* @param[in] pu1_cur_pic_chroma +* pointer to recon buffer chroma +* +* @param[in] pu4_pic_vert_bs +* pointer to vertical blocking strength +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +static void ih264e_filter_left_edge(codec_t *ps_codec, + process_ctxt_t *ps_proc, + UWORD8 *pu1_mb_qp, + UWORD8 *pu1_cur_pic_luma, + UWORD8 *pu1_cur_pic_chroma, + UWORD32 *pu4_pic_vert_bs) +{ + /* strd */ + WORD32 i4_rec_strd = ps_proc->i4_rec_strd; + + /* deblk params */ + UWORD32 u4_alpha_luma, u4_beta_luma, u4_qp_luma, u4_idx_A_luma, u4_idx_B_luma, u4_qp_p, u4_qp_q; + UWORD32 u4_alpha_chroma, u4_beta_chroma, u4_qp_chroma, u4_idx_A_chroma, u4_idx_B_chroma; + + /* collect qp of left & curr mb */ + u4_qp_p = pu1_mb_qp[-1]; + u4_qp_q = pu1_mb_qp[0]; + + /********/ + /* luma */ + /********/ + u4_qp_luma = (u4_qp_p + u4_qp_q + 1) >> 1; + + /* filter offset A and filter offset B have to be received from slice header */ + /* TODO : for now lets set these offsets as zero */ + + + u4_idx_A_luma = MIN(51, u4_qp_luma + 0); + u4_idx_B_luma = MIN(51, u4_qp_luma + 0); + + /* alpha, beta computation */ + u4_alpha_luma = gu1_ih264_alpha_table[u4_idx_A_luma]; + u4_beta_luma = gu1_ih264_beta_table[u4_idx_B_luma]; + + /**********/ + /* chroma */ + /**********/ + u4_qp_chroma = (gu1_qpc_fqpi[u4_qp_p] + gu1_qpc_fqpi[u4_qp_q] + 1) >> 1; + + /* filter offset A and filter offset B have to be received from slice header */ + /* TODO : for now lets set these offsets as zero */ + + + u4_idx_A_chroma = MIN(51, u4_qp_chroma + 0); + u4_idx_B_chroma = MIN(51, u4_qp_chroma + 0); + + /* alpha, beta computation */ + u4_alpha_chroma = gu1_ih264_alpha_table[u4_idx_A_chroma]; + u4_beta_chroma = gu1_ih264_beta_table[u4_idx_B_chroma]; + + /* deblk edge */ + if (pu4_pic_vert_bs[0] == 0x04040404) + { + /* strong filter */ + ps_codec->pf_deblk_luma_vert_bs4(pu1_cur_pic_luma, i4_rec_strd, u4_alpha_luma, u4_beta_luma); + ps_codec->pf_deblk_chroma_vert_bs4(pu1_cur_pic_chroma, i4_rec_strd, u4_alpha_chroma, u4_beta_chroma, u4_alpha_chroma, u4_beta_chroma); + } + else + { + /* normal filter */ + ps_codec->pf_deblk_luma_vert_bslt4(pu1_cur_pic_luma, i4_rec_strd, + u4_alpha_luma, u4_beta_luma, + pu4_pic_vert_bs[0], + gu1_ih264_clip_table[u4_idx_A_luma]); + + ps_codec->pf_deblk_chroma_vert_bslt4(pu1_cur_pic_chroma, i4_rec_strd, u4_alpha_chroma, + u4_beta_chroma, u4_alpha_chroma, u4_beta_chroma, pu4_pic_vert_bs[0], + gu1_ih264_clip_table[u4_idx_A_chroma], gu1_ih264_clip_table[u4_idx_A_chroma]); + } +} + +/** +******************************************************************************* +* +* @brief This function performs deblocking on an mb +* +* @par Description: +* This function performs deblocking on an mb +* +* @param[in] ps_proc +* process context corresponding to the job +* +* @param[in] ps_deblk +* pointer to deblock context +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void ih264e_deblock_mb(process_ctxt_t *ps_proc, deblk_ctxt_t * ps_deblk) +{ + /* codec ctxt */ + codec_t *ps_codec = ps_proc->ps_codec; + + /* ngbr availability */ + UWORD8 u1_mb_a, u1_mb_b; + + /* mb indices */ + WORD32 i4_mb_x = ps_deblk->i4_mb_x, i4_mb_y = ps_deblk->i4_mb_y; + + /* pic qp ptr */ + UWORD8 *pu1_pic_qp = ps_deblk->s_bs_ctxt.pu1_pic_qp; + + /* vertical blocking strength */ + UWORD32 *pu4_pic_vert_bs = ps_deblk->s_bs_ctxt.pu4_pic_vert_bs; + + /* horizontal blocking strength */ + UWORD32 *pu4_pic_horz_bs = ps_deblk->s_bs_ctxt.pu4_pic_horz_bs; + + /* src buffers luma */ + UWORD8 *pu1_cur_pic_luma = ps_deblk->pu1_cur_pic_luma; + + /* src buffers chroma */ + UWORD8 *pu1_cur_pic_chroma = ps_deblk->pu1_cur_pic_chroma; + + /* strd */ + WORD32 i4_rec_strd = ps_proc->i4_rec_strd; + + /* deblk params */ + UWORD32 u4_alpha_luma, u4_beta_luma, u4_qp_luma, u4_idx_A_luma, u4_idx_B_luma; + UWORD32 u4_alpha_chroma, u4_beta_chroma, u4_qp_chroma, u4_idx_A_chroma, u4_idx_B_chroma; + + /* temp var */ + UWORD32 push_ptr = (i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x; + + /* derive neighbor availability */ + /* In slice mode the edges of mbs that lie on the slice boundary are not deblocked */ + /* deblocking filter idc '2' */ + if (ps_codec->s_cfg.e_slice_mode != IVE_SLICE_MODE_NONE) + { + /* slice index */ + UWORD8 *pu1_slice_idx = ps_deblk->pu1_slice_idx; + + pu1_slice_idx += (i4_mb_y * ps_proc->i4_wd_mbs); + /* left macroblock availability */ + u1_mb_a = (i4_mb_x == 0 || + (pu1_slice_idx[i4_mb_x - 1 ] != pu1_slice_idx[i4_mb_x]))? 0 : 1; + /* top macroblock availability */ + u1_mb_b = (i4_mb_y == 0 || + (pu1_slice_idx[i4_mb_x-ps_proc->i4_wd_mbs] != pu1_slice_idx[i4_mb_x]))? 0 : 1; + } + else + { + /* left macroblock availability */ + u1_mb_a = (i4_mb_x == 0)? 0 : 1; + /* top macroblock availability */ + u1_mb_b = (i4_mb_y == 0)? 0 : 1; + } + + pu1_pic_qp += push_ptr; + pu4_pic_vert_bs += push_ptr * 4; + pu4_pic_horz_bs += push_ptr * 4; + + /********/ + /* luma */ + /********/ + u4_qp_luma = pu1_pic_qp[0]; + + /* filter offset A and filter offset B have to be received from slice header */ + /* TODO : for now lets set these offsets as zero */ + + + u4_idx_A_luma = MIN(51, u4_qp_luma + 0); + u4_idx_B_luma = MIN(51, u4_qp_luma + 0); + + /* alpha, beta computation */ + u4_alpha_luma = gu1_ih264_alpha_table[u4_idx_A_luma]; + u4_beta_luma = gu1_ih264_beta_table[u4_idx_B_luma]; + + /**********/ + /* chroma */ + /**********/ + u4_qp_chroma = gu1_qpc_fqpi[u4_qp_luma]; + + /* filter offset A and filter offset B have to be received from slice header */ + /* TODO : for now lets set these offsets as zero */ + + + u4_idx_A_chroma = MIN(51, u4_qp_chroma + 0); + u4_idx_B_chroma = MIN(51, u4_qp_chroma + 0); + + /* alpha, beta computation */ + u4_alpha_chroma = gu1_ih264_alpha_table[u4_idx_A_chroma]; + u4_beta_chroma = gu1_ih264_beta_table[u4_idx_B_chroma]; + + /* Deblock vertical edges */ + /* left vertical edge 0 - allowed to be deblocked ? */ + if (u1_mb_a) + { + ih264e_filter_left_edge(ps_codec, ps_proc, pu1_pic_qp, pu1_cur_pic_luma, pu1_cur_pic_chroma, pu4_pic_vert_bs); + } + + /* vertical edge 1 */ + if (pu4_pic_vert_bs[1] == 0x04040404) + { + /* strong filter */ + ps_codec->pf_deblk_luma_vert_bs4(pu1_cur_pic_luma + 4, i4_rec_strd, u4_alpha_luma, u4_beta_luma); + } + else + { + /* normal filter */ + ps_codec->pf_deblk_luma_vert_bslt4(pu1_cur_pic_luma + 4, i4_rec_strd, + u4_alpha_luma, u4_beta_luma, + pu4_pic_vert_bs[1], + gu1_ih264_clip_table[u4_idx_A_luma]); + } + + /* vertical edge 2 */ + if (pu4_pic_vert_bs[2] == 0x04040404) + { + /* strong filter */ + ps_codec->pf_deblk_luma_vert_bs4(pu1_cur_pic_luma + 8, i4_rec_strd, u4_alpha_luma, u4_beta_luma); + ps_codec->pf_deblk_chroma_vert_bs4(pu1_cur_pic_chroma + 8, i4_rec_strd, u4_alpha_chroma, u4_beta_chroma, u4_alpha_chroma, u4_beta_chroma); + } + else + { + /* normal filter */ + ps_codec->pf_deblk_luma_vert_bslt4(pu1_cur_pic_luma + 8, i4_rec_strd, u4_alpha_luma, + u4_beta_luma, pu4_pic_vert_bs[2], + gu1_ih264_clip_table[u4_idx_A_luma]); + + ps_codec->pf_deblk_chroma_vert_bslt4(pu1_cur_pic_chroma + 8, i4_rec_strd, u4_alpha_chroma, + u4_beta_chroma, u4_alpha_chroma, u4_beta_chroma, pu4_pic_vert_bs[2], + gu1_ih264_clip_table[u4_idx_A_chroma], gu1_ih264_clip_table[u4_idx_A_chroma]); + } + + /* vertical edge 3 */ + if (pu4_pic_vert_bs[3] == 0x04040404) + { + /* strong filter */ + ps_codec->pf_deblk_luma_vert_bs4(pu1_cur_pic_luma + 12, i4_rec_strd, u4_alpha_luma, u4_beta_luma); + } + else + { + /* normal filter */ + ps_codec->pf_deblk_luma_vert_bslt4(pu1_cur_pic_luma + 12, i4_rec_strd, u4_alpha_luma, + u4_beta_luma, pu4_pic_vert_bs[3], + gu1_ih264_clip_table[u4_idx_A_luma]); + } + + /* Deblock Horizontal edges */ + /* Horizontal edge 0 */ + if (u1_mb_b) + { + ih264e_filter_top_edge(ps_codec, ps_proc, pu1_pic_qp, pu1_cur_pic_luma, pu1_cur_pic_chroma, pu4_pic_horz_bs); + } + + /* horizontal edge 1 */ + if (pu4_pic_horz_bs[1] == 0x04040404) + { + /* strong filter */ + ps_codec->pf_deblk_luma_horz_bs4(pu1_cur_pic_luma + 4 * i4_rec_strd, i4_rec_strd, u4_alpha_luma, u4_beta_luma); + } + else + { + /* normal filter */ + ps_codec->pf_deblk_luma_horz_bslt4(pu1_cur_pic_luma + 4 * i4_rec_strd, i4_rec_strd, u4_alpha_luma, + u4_beta_luma, pu4_pic_horz_bs[1], + gu1_ih264_clip_table[u4_idx_A_luma]); + } + + /* horizontal edge 2 */ + if (pu4_pic_horz_bs[2] == 0x04040404) + { + /* strong filter */ + ps_codec->pf_deblk_luma_horz_bs4(pu1_cur_pic_luma + 8 * i4_rec_strd, i4_rec_strd, u4_alpha_luma, u4_beta_luma); + ps_codec->pf_deblk_chroma_horz_bs4(pu1_cur_pic_chroma + 4 * i4_rec_strd, i4_rec_strd, u4_alpha_chroma, u4_beta_chroma, u4_alpha_chroma, u4_beta_chroma); + } + else + { + /* normal filter */ + ps_codec->pf_deblk_luma_horz_bslt4(pu1_cur_pic_luma + 8 * i4_rec_strd, i4_rec_strd, u4_alpha_luma, + u4_beta_luma, pu4_pic_horz_bs[2], + gu1_ih264_clip_table[u4_idx_A_luma]); + + ps_codec->pf_deblk_chroma_horz_bslt4(pu1_cur_pic_chroma + 4 * i4_rec_strd, i4_rec_strd, u4_alpha_chroma, + u4_beta_chroma, u4_alpha_chroma, u4_beta_chroma, pu4_pic_horz_bs[2], + gu1_ih264_clip_table[u4_idx_A_chroma], gu1_ih264_clip_table[u4_idx_A_chroma]); + } + + /* horizontal edge 3 */ + if (pu4_pic_horz_bs[3] == 0x04040404) + { + /* strong filter */ + ps_codec->pf_deblk_luma_horz_bs4(pu1_cur_pic_luma + 12 * i4_rec_strd, i4_rec_strd, u4_alpha_luma, u4_beta_luma); + } + else + { + /* normal filter */ + ps_codec->pf_deblk_luma_horz_bslt4(pu1_cur_pic_luma + 12 * i4_rec_strd, i4_rec_strd, u4_alpha_luma, + u4_beta_luma, pu4_pic_horz_bs[3], + gu1_ih264_clip_table[u4_idx_A_luma]); + } + + return ; +} diff --git a/encoder/ih264e_deblk.h b/encoder/ih264e_deblk.h new file mode 100755 index 0000000..9b3b67b --- /dev/null +++ b/encoder/ih264e_deblk.h @@ -0,0 +1,99 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +****************************************************************************** +* @file +* ih264e_deblk.h +* +* @brief +* This file contains extern declarations of deblocking routines +* +* @author +* ittiam +* +* @remarks +* none +****************************************************************************** +*/ + +#ifndef IH264E_DEBLK_H_ +#define IH264E_DEBLK_H_ + +/*****************************************************************************/ +/* Constant Macros */ +/*****************************************************************************/ + +/** +****************************************************************************** + * @brief masks to extract csbp +****************************************************************************** + */ +#define CSBP_LEFT_BLOCK_MASK 0x1111 +#define CSBP_RIGHT_BLOCK_MASK 0x8888 + + +/*****************************************************************************/ +/* Function Declarations */ +/*****************************************************************************/ + +/** +******************************************************************************* +* +* @brief This function computes blocking strength for an mb +* +* @par Description: +* This function computes blocking strength for an mb +* +* @param[in] ps_proc +* process context +* +* @returns none +* +* @remarks In this module it is assumed that their is only single reference +* frame and is always the most recently used anchor frame +* +******************************************************************************* +*/ +void ih264e_compute_bs(process_ctxt_t * ps_proc); + +/** +******************************************************************************* +* +* @brief This function performs deblocking on an mb +* +* @par Description: +* This function performs deblocking on an mb +* +* @param[in] ps_proc +* process context corresponding to the job +* +* @param[in] ps_deblk +* pointer to deblock context +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void ih264e_deblock_mb(process_ctxt_t *ps_proc, deblk_ctxt_t * ps_deblk); + +#endif /* IH264E_DEBLK_H_ */ diff --git a/encoder/ih264e_debug.h b/encoder/ih264e_debug.h new file mode 100755 index 0000000..5cb0434 --- /dev/null +++ b/encoder/ih264e_debug.h @@ -0,0 +1,65 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +****************************************************************************** +* @file +* ih264e_debug.h +* +* @brief +* This file contains extern declarations of routines that could be helpful +* for debugging purposes. +* +* @author +* ittiam +* +* @remarks +* none +****************************************************************************** +*/ + +#ifndef IH264E_DEBUG_H_ +#define IH264E_DEBUG_H_ + +#if DEBUG_RC + +#define DEBUG_DUMP_QP(pic_cnt, qp, num_cores) \ + ih264e_debug_dump_qp(pic_cnt, qp, num_cores); + +#define DEBUG_DUMP_RC(ps_rc) ih264e_debug_print_rc(ps_rc); + +#define DEBUG_DUMP_COST_SAD_PU(ps_proc) ih264e_debug_dump_cost_sad_pu(ps_proc); + +#define DEBUG_DUMP_INP_TO_RC_POST_ENC(ps_frame_info, pic_cnt, num_cores) \ + ih264e_debug_dump_inp_to_post_enc(ps_frame_info, pic_cnt, num_cores); + +#else + +#define DEBUG_DUMP_QP(pic_cnt, qp, num_cores) (void); + +#define DEBUG_DUMP_RC(ps_rc) (void); + +#define DEBUG_DUMP_COST_SAD_PU(ps_proc) (void); + +#define DEBUG_DUMP_INP_TO_RC_POST_ENC(ps_frame_info, pic_cnt, num_cores) (void); + +#endif + +#endif /* IH264E_DEBUG_H_ */ diff --git a/encoder/ih264e_defs.h b/encoder/ih264e_defs.h new file mode 100755 index 0000000..76929ef --- /dev/null +++ b/encoder/ih264e_defs.h @@ -0,0 +1,538 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ih264e_defs.h +* +* @brief +* Definitions used in the encoder +* +* @author +* ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef IH264E_DEFS_H_ +#define IH264E_DEFS_H_ + + +/*****************************************************************************/ +/* Width and height restrictions */ +/*****************************************************************************/ +/** + * Minimum width supported by codec + */ +#define MIN_WD 16 + +/** + * Maximum width supported by codec + */ + +#define MAX_WD 1920 + +/** + * Minimum height supported by codec + */ +#define MIN_HT 16 + +/** + * Maximum height supported by codec + */ + +#define MAX_HT 1920 + +/*****************************************************************************/ +/* Padding sizes */ +/*****************************************************************************/ +/** + * Padding used for top of the frame + */ +#define PAD_TOP 32 + +/** + * Padding used for bottom of the frame + */ +#define PAD_BOT 32 + +/** + * Padding used at left of the frame + */ +#define PAD_LEFT 32 + +/** + * Padding used at right of the frame + */ +#define PAD_RIGHT 32 +/** + * Padding for width + */ +#define PAD_WD (PAD_LEFT + PAD_RIGHT) +/** + * Padding for height + */ +#define PAD_HT (PAD_TOP + PAD_BOT) + +/* + * buffer width and height for half pel buffers + */ +#define HP_BUFF_WD 24 +#define HP_BUFF_HT 18 + +/*****************************************************************************/ +/* Number of frame restrictions */ +/*****************************************************************************/ +/** + * Maximum number of reference buffers in DPB manager + */ +#define MAX_REF_CNT 32 + +/*****************************************************************************/ +/* Num cores releated defs */ +/*****************************************************************************/ +/** + * Maximum number of cores + */ +#define MAX_NUM_CORES 8 + +/** + * Maximum number of threads for pixel processing + */ +#define MAX_PROCESS_THREADS MAX_NUM_CORES + +/** + * Maximum process context sets + * Used to stagger encoding of MAX_CTXT_SETS in parallel + */ +#define MAX_CTXT_SETS 2 +/** + * Maximum number of contexts + * Kept as twice the number of threads, to make it easier to initialize the contexts + * from master thread + */ +#define MAX_PROCESS_CTXT MAX_NUM_CORES * MAX_CTXT_SETS + +/*****************************************************************************/ +/* Profile and level restrictions */ +/*****************************************************************************/ +/** + * Max level supported by the codec + */ +#define MAX_LEVEL IH264_LEVEL_51 + +/** + * Min level supported by the codec + */ +#define MIN_LEVEL IH264_LEVEL_10 + +/** + * Maximum number of slice headers that are held in memory simultaneously + * For single core implementation only 1 slice header is enough. + * But for multi-core parsing thread needs to ensure that slice headers are + * stored till the last CB in a slice is decoded. + * Parsing thread has to wait till last CB of a slice is consumed before reusing + * overwriting the slice header + * MAX_SLICE_HDR_CNT is assumed to be a power of 2 + */ + +#define LOG2_MAX_SLICE_HDR_CNT 8 +#define MAX_SLICE_HDR_CNT (1 << LOG2_MAX_SLICE_HDR_CNT) + +/* Generic declarations */ +#define DEFAULT_MAX_LEVEL 40 +#define DEFAULT_RECON_ENABLE 0 +#define DEFAULT_RC IVE_RC_STORAGE +#define DEFAULT_MAX_FRAMERATE 120000 +#define DEFAULT_MAX_BITRATE 20000000 +#define DEFAULT_MAX_SRCH_RANGE_X 256 +#define DEFAULT_MAX_SRCH_RANGE_Y 256 +#define DEFAULT_SLICE_PARAM 256 +#define DEFAULT_SRC_FRAME_RATE 30000 +#define DEFAULT_TGT_FRAME_RATE 30000 +#define DEFAULT_BITRATE 6000000 +#define DEFAULT_QP_MIN 10 +#define DEFAULT_QP_MAX 51 +#define DEFAULT_I_QP 25 +#define DEFAULT_P_QP 28 +#define DEFAULT_B_QP 28 +#define DEFAULT_AIR_MODE IVE_AIR_MODE_NONE +#define DEFAULT_AIR_REFRESH_PERIOD 30 +#define DEFAULT_VBV_DELAY 1000 +#define DEFAULT_VBV_SIZE 16800000 /* level 3.1 */ +#define DEFAULT_NUM_CORES 1 +#define DEFAULT_ME_SPEED_PRESET 100 +#define DEFAULT_HPEL 1 +#define DEFAULT_QPEL 1 +#define DEFAULT_I4 1 +#define DEFAULT_I8 0 +#define DEFAULT_I16 1 +#define DEFAULT_ENABLE_FAST_SAD 0 +#define DEFAULT_ENABLE_SATQD 1 +#define DEFAULT_MIN_SAD_ENABLE 0 +#define DEFAULT_MIN_SAD_DISABLE -1 +#define DEFAULT_SRCH_RNG_X 64 +#define DEFAULT_SRCH_RNG_Y 48 +#define DEFAULT_I_INTERVAL 30 +#define DEFAULT_IDR_INTERVAL 1000 +#define DEFAULT_B_FRAMES 0 +#define DEFAULT_DISABLE_DEBLK_LEVEL 0 +#define DEFAULT_PROFILE IV_PROFILE_BASE +#define DEFAULT_MIN_INTRA_FRAME_RATE 1 +#define DEFAULT_MAX_INTRA_FRAME_RATE 2147483647 +#define DEFAULT_MIN_BUFFER_DELAY 30 +#define DEFAULT_MAX_BUFFER_DELAY 20000 +#define DEFAULT_STRIDE 0 +#define DEFAULT_ENC_SPEED_PRESET IVE_USER_DEFINED +#define DEFAULT_PRE_ENC_ME 0 +#define DEFAULT_PRE_ENC_IPE 0 + +/** Maximum number of entries in input buffer list */ +#define MAX_INP_BUF_LIST_ENTRIES 32 + +/** Maximum number of entries in output buffer list */ +#define MAX_OUT_BUF_LIST_ENTRIES 32 + +/** Maximum number of entries in recon buffer list used within the encoder */ +#define MAX_REC_LIST_ENTRIES 16 + +/** Number of buffers created to hold half-pel planes for every reference buffer */ + #define HPEL_PLANES_CNT 1 + +/** + ***************************************************************************** + * Macro to compute total size required to hold on set of scaling matrices + ***************************************************************************** + */ +#define SCALING_MAT_SIZE(m_scaling_mat_size) \ +{ \ + m_scaling_mat_size = 6 * TRANS_SIZE_4 * TRANS_SIZE_4; \ + m_scaling_mat_size += 6 * TRANS_SIZE_8 * TRANS_SIZE_8; \ + m_scaling_mat_size += 6 * TRANS_SIZE_16 * TRANS_SIZE_16; \ + m_scaling_mat_size += 2 * TRANS_SIZE_32 * TRANS_SIZE_32; \ +} + +/** + ****************************************************************************** + * @brief Macros to get raster scan position of a block[8x8] / sub block[4x4] + ****************************************************************************** + */ +#define GET_BLK_RASTER_POS_X(x) ((x & 0x01)) +#define GET_BLK_RASTER_POS_Y(y) ((y >> 1)) +#define GET_SUB_BLK_RASTER_POS_X(x) ((x & 0x01)) +#define GET_SUB_BLK_RASTER_POS_Y(y) ((y >> 1)) + +#define NUM_RC_MEMTABS 17 + +/** + *************************************************************************** + * Enum to hold various mem records being request + **************************************************************************** + */ +enum +{ + /** + * Codec Object at API level + */ + MEM_REC_IV_OBJ, + + /** + * Codec context + */ + MEM_REC_CODEC, + + /** + * entropy context + */ + MEM_REC_ENTROPY, + + /** + * Buffer to hold coeff data + */ + MEM_REC_MB_COEFF_DATA, + + /** + * Buffer to hold coeff data + */ + MEM_REC_MB_HEADER_DATA, + + /** + * Motion vector bank + */ + MEM_REC_MVBANK, + + /** + * Motion vector bits + */ + MEM_REC_MVBITS, + + /** + * Holds mem records passed to the codec. + */ + MEM_REC_BACKUP, + + /** + * Holds SPS + */ + MEM_REC_SPS, + + /** + * Holds PPS + */ + MEM_REC_PPS, + + /** + * Holds Slice Headers + */ + MEM_REC_SLICE_HDR, + + /** + * Contains map indicating slice index per MB basis + */ + MEM_REC_SLICE_MAP, + + /** + * Holds thread handles + */ + MEM_REC_THREAD_HANDLE, + + /** + * Holds control call mutex + */ + MEM_REC_CTL_MUTEX, + + /** + * Holds entropy call mutex + */ + MEM_REC_ENTROPY_MUTEX, + + /** + * Holds memory for Process JOB Queue + */ + MEM_REC_PROC_JOBQ, + + /** + * Holds memory for Entropy JOB Queue + */ + MEM_REC_ENTROPY_JOBQ, + + /** + * Contains status map indicating processing status per MB basis + */ + MEM_REC_PROC_MAP, + + /** + * Contains status map indicating deblocking status per MB basis + */ + MEM_REC_DBLK_MAP, + + /* + * Contains AIR map and mask + */ + MEM_REC_AIR_MAP, + + /** + * Contains status map indicating ME status per MB basis + */ + MEM_REC_ME_MAP, + + /** + * Holds dpb manager context + */ + MEM_REC_DPB_MGR, + + /** + * Holds intermediate buffers needed during processing stage + * Memory for process contexts is allocated in this memtab + */ + MEM_REC_PROC_SCRATCH, + + /** + * Holds buffers for vert_bs, horz_bs and QP (all frame level) + */ + MEM_REC_QUANT_PARAM, + + /** + * Holds top row syntax information + */ + MEM_REC_TOP_ROW_SYN_INFO, + + /** + * Holds buffers for vert_bs, horz_bs and QP (all frame level) + */ + MEM_REC_BS_QP, + + /** + * Holds input buffer manager context + */ + MEM_REC_INP_PIC, + + /** + * Holds output buffer manager context + */ + MEM_REC_OUT, + + /** + * Holds picture buffer manager context and array of pic_buf_ts + * Also holds reference picture buffers in non-shared mode + */ + MEM_REC_REF_PIC, + + /* + * Mem record for color space conversion + */ + MEM_REC_CSC, + + /** + * NMB info struct + */ + MEM_REC_MB_INFO_NMB, + + /** + * Rate control of memory records. + */ + MEM_REC_RC, + + /** + * Place holder to compute number of memory records. + */ + MEM_REC_CNT = MEM_REC_RC + NUM_RC_MEMTABS, + + /* + * Do not add anything below + */ +}; + +#define DISABLE_DEBLOCK_INTERVAL 8 + +/** + **************************************************************************** + * Disable deblock levels + * Level 0 enables deblocking completely and level 4 disables completely + * Other levels are intermediate values to control deblocking level + **************************************************************************** + */ +enum +{ + /** + * Enable deblocking completely + */ + DISABLE_DEBLK_LEVEL_0, + + /** + * Disable only within MB edges - Not supported currently + */ + DISABLE_DEBLK_LEVEL_1, + + /** + * Enable deblocking once in DEBLOCK_INTERVAL number of pictures + * and for I slices + */ + DISABLE_DEBLK_LEVEL_2, + + /** + * Enable deblocking only for I slices + */ + DISABLE_DEBLK_LEVEL_3, + + /** + * Disable deblocking completely + */ + DISABLE_DEBLK_LEVEL_4 +}; + +/** + **************************************************************************** + * Number of buffers for I/O based on format + **************************************************************************** + */ + +/** Minimum number of input buffers */ +#define MIN_INP_BUFS 2 + +/** Minimum number of output buffers */ +#define MIN_OUT_BUFS 1 + +/** Minimum number of components in bitstream buffer */ +#define MIN_BITS_BUFS_COMP 1 + +/** Minimum number of components in raw buffer */ +#define MIN_RAW_BUFS_420_COMP 3 +#define MIN_RAW_BUFS_422ILE_COMP 1 +#define MIN_RAW_BUFS_RGB565_COMP 1 +#define MIN_RAW_BUFS_RGBA8888_COMP 1 +#define MIN_RAW_BUFS_420SP_COMP 2 + +#define MAX_NMB 120 + +/** Maximum number of active config paramter sets */ +#define MAX_ACTIVE_CONFIG_PARAMS 32 + +/** +****************************************************************************** + * @brief Thresholds for luma & chroma to determine if the 8x8 subblock needs + * to be encoded or skipped +****************************************************************************** +*/ +#define LUMA_SUB_BLOCK_SKIP_THRESHOLD 4 +#define LUMA_BLOCK_SKIP_THRESHOLD 5 +#define CHROMA_BLOCK_SKIP_THRESHOLD 4 + +/** +****************************************************************************** + * @brief defines the first byte of a NAL unit + * forbidden zero bit - nal_ref_idc - nal_unit_type +****************************************************************************** +*/ +/* [0 - 11 - 00111] */ +#define NAL_SPS_FIRST_BYTE 0x67 + +/* [0 - 11 - 01000] */ +#define NAL_PPS_FIRST_BYTE 0x68 + +/* [0 - 11 - 00001] */ +#define NAL_SLICE_FIRST_BYTE 0x61 + +/* [0 - 00 - 00001] */ +#define NAL_NON_REF_SLICE_FIRST_BYTE 0x01 + +/* [0 - 11 - 00101] */ +#define NAL_IDR_SLICE_FIRST_BYTE 0x65 + +/* [0 - 00 - 01100] */ +#define NAL_FILLER_FIRST_BYTE 0x0C + +/* [0 - 00 - 00110] */ +#define NAL_SEI_FIRST_BYTE 0x06 + +#define H264_ALLOC_INTER_FRM_INTV 1 + +#define H264_MPEG_QP_MAP 191 + +#define MPEG2_QP_ELEM (H264_MPEG_QP_MAP + 1) +#define H264_QP_ELEM (MAX_H264_QP + 1) + +#define H264_INIT_QUANT_I 26 +#define H264_INIT_QUANT_P 34 + +#endif /*IH264E_DEFS_H_*/ diff --git a/encoder/ih264e_encode.c b/encoder/ih264e_encode.c new file mode 100755 index 0000000..ffc6fb7 --- /dev/null +++ b/encoder/ih264e_encode.c @@ -0,0 +1,580 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +****************************************************************************** +* @file +* ih264e_encode.c +* +* @brief +* This file contains functions for encoding the input yuv frame in synchronous +* api mode +* +* @author +* ittiam +* +* List of Functions +* - ih264e_join_threads() +* - ih264e_wait_for_thread() +* - ih264e_encode() +* +****************************************************************************** +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System Include files */ +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> + +/* User Include files */ +#include "ih264e_config.h" +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" +#include "ih264e.h" +#include "ithread.h" +#include "ih264_defs.h" +#include "ih264_macros.h" +#include "ih264_debug.h" +#include "ih264_structs.h" +#include "ih264_platform_macros.h" +#include "ih264_error.h" +#include "ime_distortion_metrics.h" +#include "ime_structs.h" +#include "ih264_defs.h" +#include "ih264_error.h" +#include "ih264_structs.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include "ih264_inter_pred_filters.h" +#include "ih264_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" +#include "ih264_list.h" +#include "ih264e_error.h" +#include "ih264e_defs.h" +#include "ih264_padding.h" +#include "ih264e_bitstream.h" +#include "irc_mem_req_and_acq.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "ih264e_rate_control.h" +#include "ih264e_time_stamp.h" +#include "ih264e_structs.h" +#include "ih264e_master.h" +#include "ih264e_process.h" +#include "ih264_buf_mgr.h" +#include "ih264_dpb_mgr.h" +#include "ih264e_utils.h" +#include "ih264e_fmt_conv.h" +#include "ih264e_config.h" +#include "ih264e_statistics.h" +#include "ih264e_trace.h" +#include "ih264e_debug.h" +#ifdef LOGO_EN +#include "ih264e_ittiam_logo.h" +#endif + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + +/** +****************************************************************************** +* +* @brief +* This function joins all the spawned threads after successful completion of +* their tasks +* +* @par Description +* +* @param[in] ps_codec +* pointer to codec context +* +* @returns none +* +****************************************************************************** +*/ +void ih264e_join_threads(codec_t *ps_codec) +{ + /* temp var */ + WORD32 i = 0; + WORD32 ret = 0; + + /* join spawned threads */ + while (i < ps_codec->i4_proc_thread_cnt) + { + if (ps_codec->ai4_process_thread_created[i]) + { + ret = ithread_join(ps_codec->apv_proc_thread_handle[i], NULL); + if (ret != 0) + { + printf("pthread Join Failed"); + assert(0); + } + ps_codec->ai4_process_thread_created[i] = 0; + i++; + } + } + + ps_codec->i4_proc_thread_cnt = 0; +} + +/** +****************************************************************************** +* +* @brief This function puts the current thread to sleep for a duration +* of sleep_us +* +* @par Description +* ithread_yield() method causes the calling thread to yield execution to another +* thread that is ready to run on the current processor. The operating system +* selects the thread to yield to. ithread_usleep blocks the current thread for +* the specified number of milliseconds. In other words, yield just says, +* end my timeslice prematurely, look around for other threads to run. If there +* is nothing better than me, continue. Sleep says I don't want to run for x +* milliseconds. Even if no other thread wants to run, don't make me run. +* +* @param[in] sleep_us +* thread sleep duration +* +* @returns error_status +* +****************************************************************************** +*/ +IH264E_ERROR_T ih264e_wait_for_thread(UWORD32 sleep_us) +{ + /* yield thread */ + ithread_yield(); + + /* put thread to sleep */ + ithread_usleep(sleep_us); + + return IH264E_SUCCESS; +} + +/** +****************************************************************************** +* +* @brief +* Encodes in synchronous api mode +* +* @par Description +* This routine processes input yuv, encodes it and outputs bitstream and recon +* +* @param[in] ps_codec_obj +* Pointer to codec object at API level +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @returns Status +* +****************************************************************************** +*/ +WORD32 ih264e_encode(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *pv_api_op) +{ + /* error status */ + IH264E_ERROR_T error_status = IH264E_SUCCESS; + + /* codec ctxt */ + codec_t *ps_codec = (codec_t *)ps_codec_obj->pv_codec_handle; + + /* input frame to encode */ + ih264e_video_encode_ip_t *ps_video_encode_ip = pv_api_ip; + + /* output buffer to write stream */ + ih264e_video_encode_op_t *ps_video_encode_op = pv_api_op; + + /* i/o structures */ + inp_buf_t s_inp_buf; + out_buf_t s_out_buf; + + /* temp var */ + WORD32 ctxt_sel = 0, i; + + /********************************************************************/ + /* BEGIN INIT */ + /********************************************************************/ + /* reset output structure */ + ps_video_encode_op->s_ive_op.u4_error_code = IV_SUCCESS; + ps_video_encode_op->s_ive_op.output_present = 0; + ps_video_encode_op->s_ive_op.dump_recon = 0; + ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_NA_FRAME; + + /* copy input info. to internal structure */ + s_inp_buf.s_raw_buf = ps_video_encode_ip->s_ive_ip.s_inp_buf; + s_inp_buf.u4_timestamp_low = ps_video_encode_ip->s_ive_ip.u4_timestamp_low; + s_inp_buf.u4_timestamp_high = ps_video_encode_ip->s_ive_ip.u4_timestamp_high; + s_inp_buf.u4_is_last = ps_video_encode_ip->s_ive_ip.u4_is_last; + s_inp_buf.pv_mb_info = ps_video_encode_ip->s_ive_ip.pv_mb_info; + s_inp_buf.u4_mb_info_type = ps_video_encode_ip->s_ive_ip.u4_mb_info_type; + s_inp_buf.pv_pic_info = ps_video_encode_ip->s_ive_ip.pv_pic_info; + s_inp_buf.u4_pic_info_type = ps_video_encode_ip->s_ive_ip.u4_pic_info_type; + + /* copy output info. to internal structure */ + s_out_buf.s_bits_buf = ps_video_encode_ip->s_ive_ip.s_out_buf; + s_out_buf.u4_is_last = ps_video_encode_ip->s_ive_ip.u4_is_last; + s_out_buf.u4_timestamp_low = ps_video_encode_ip->s_ive_ip.u4_timestamp_low; + s_out_buf.u4_timestamp_high = ps_video_encode_ip->s_ive_ip.u4_timestamp_high; + + /* api call cnt */ + ps_codec->i4_encode_api_call_cnt += 1; + + /* curr pic cnt */ + ps_codec->i4_pic_cnt += 1; + + /* codec context selector */ + ctxt_sel = ps_codec->i4_encode_api_call_cnt & 1; + + /* reset status flags */ + ps_codec->ai4_pic_cnt[ctxt_sel] = -1; + ps_codec->s_rate_control.post_encode_skip[ctxt_sel] = 0; + ps_codec->s_rate_control.pre_encode_skip[ctxt_sel] = 0; + + /* pass output buffer to codec */ + ps_codec->as_out_buf[ctxt_sel] = s_out_buf; + + /* initialize codec ctxt with default params for the first encode api call */ + if (ps_codec->i4_encode_api_call_cnt == 0) + { + ih264e_codec_init(ps_codec); + } + + /* parse configuration params */ + for (i = 0; i < MAX_ACTIVE_CONFIG_PARAMS; i++) + { + cfg_params_t *ps_cfg = &ps_codec->as_cfg[i]; + + if (1 == ps_cfg->u4_is_valid) + { + if ( ((ps_cfg->u4_timestamp_high == s_inp_buf.u4_timestamp_high) && + (ps_cfg->u4_timestamp_low == s_inp_buf.u4_timestamp_low)) || + ((WORD32)ps_cfg->u4_timestamp_high == -1) || + ((WORD32)ps_cfg->u4_timestamp_low == -1) ) + { + error_status |= ih264e_codec_update_config(ps_codec, ps_cfg); + SET_ERROR_ON_RETURN(error_status, + IVE_UNSUPPORTEDPARAM, + ps_video_encode_op->s_ive_op.u4_error_code, + IV_FAIL); + + ps_cfg->u4_is_valid = 0; + } + } + } + + /****************************************************************** + * INSERT LOGO + *****************************************************************/ +#ifdef LOGO_EN + if (s_inp_buf.s_raw_buf.apv_bufs[0] != NULL && + ps_codec->i4_header_mode != 1) + { + ih264e_insert_logo(s_inp_buf.s_raw_buf.apv_bufs[0], + s_inp_buf.s_raw_buf.apv_bufs[1], + s_inp_buf.s_raw_buf.apv_bufs[2], + s_inp_buf.s_raw_buf.au4_strd[0], + 0, + 0, + ps_codec->s_cfg.e_inp_color_fmt, + ps_codec->s_cfg.u4_disp_wd, + ps_codec->s_cfg.u4_disp_ht); + } +#endif /*LOGO_EN*/ + + if (ps_codec->i4_encode_api_call_cnt == 0) + { + /********************************************************************/ + /* number of mv/ref bank buffers used by the codec, */ + /* 1 to handle curr frame */ + /* 1 to store information of ref frame */ + /* 1 more additional because of the codec employs 2 ctxt sets */ + /* to assist asynchronous API */ + /********************************************************************/ + + /* initialize mv bank buffer manager */ + error_status |= ih264e_mv_buf_mgr_add_bufs(ps_codec); + SET_ERROR_ON_RETURN(error_status, + IVE_FATALERROR, + ps_video_encode_op->s_ive_op.u4_error_code, + IV_FAIL); + + /* initialize ref bank buffer manager */ + error_status |= ih264e_pic_buf_mgr_add_bufs(ps_codec); + SET_ERROR_ON_RETURN(error_status, + IVE_FATALERROR, + ps_video_encode_op->s_ive_op.u4_error_code, + IV_FAIL); + + /* for the first frame, generate header when not requested explicitly */ + if (ps_codec->i4_header_mode == 0 && + ps_codec->u4_header_generated == 0) + { + ps_codec->i4_gen_header = 1; + } + } + + /* generate header and return when encoder is operated in header mode */ + if (ps_codec->i4_header_mode == 1) + { + /* whenever the header is generated, this implies a start of sequence + * and a sequence needs to be started with IDR + */ + ps_codec->force_curr_frame_type = IV_IDR_FRAME; + + /* generate header */ + error_status |= ih264e_generate_sps_pps(ps_codec); + + /* api call cnt */ + ps_codec->i4_encode_api_call_cnt --; + + /* curr pic cnt */ + ps_codec->i4_pic_cnt --; + + /* header mode tag is not sticky */ + ps_codec->i4_header_mode = 0; + + /* send the input to app */ + ps_video_encode_op->s_ive_op.s_inp_buf = s_inp_buf.s_raw_buf; + + /* send the output to app */ + ps_video_encode_op->s_ive_op.output_present = 1; + ps_video_encode_op->s_ive_op.dump_recon = 0; + ps_video_encode_op->s_ive_op.s_out_buf = ps_codec->as_out_buf[ctxt_sel].s_bits_buf; + + /* error status */ + SET_ERROR_ON_RETURN(error_status, + IVE_FATALERROR, + ps_video_encode_op->s_ive_op.u4_error_code, + IV_FAIL); + + /* indicates that header has been generated previously */ + ps_codec->u4_header_generated = 1; + + return IV_SUCCESS; + } + + + if (s_inp_buf.s_raw_buf.apv_bufs[0] != NULL) + { + /* array giving pic cnt that is being processed in curr context set */ + ps_codec->ai4_pic_cnt[ctxt_sel] = ps_codec->i4_pic_cnt; + + /* initialize all relevant process ctxts */ + error_status |= ih264e_pic_init(ps_codec, &s_inp_buf); + SET_ERROR_ON_RETURN(error_status, + IVE_FATALERROR, + ps_video_encode_op->s_ive_op.u4_error_code, + IV_FAIL); + + if (ps_codec->s_rate_control.pre_encode_skip[ctxt_sel] == 0) + { + /* proc ctxt base idx */ + WORD32 proc_ctxt_select = ctxt_sel * MAX_PROCESS_THREADS; + + /* proc ctxt */ + process_ctxt_t *ps_proc = &ps_codec->as_process[proc_ctxt_select]; + + WORD32 ret = 0; + + /* number of addl. threads to be created */ + WORD32 num_thread_cnt = ps_codec->s_cfg.u4_num_cores - 1; + + for (i = 0; i < num_thread_cnt; i++) + { + ret = ithread_create(ps_codec->apv_proc_thread_handle[i], + NULL, + (void*)ih264e_process_thread, + &ps_codec->as_process[i + 1]); + if (ret != 0) + { + printf("pthread Create Failed"); + assert(0); + } + + ps_codec->ai4_process_thread_created[i] = 1; + + ps_codec->i4_proc_thread_cnt++; + } + + + /* launch job */ + ih264e_process_thread(ps_proc); + + /* Join threads at the end of encoding a frame */ + ih264e_join_threads(ps_codec); + + ih264_list_reset(ps_codec->pv_proc_jobq); + + ih264_list_reset(ps_codec->pv_entropy_jobq); + } + } + + if (-1 != ps_codec->ai4_pic_cnt[ctxt_sel]) + { + /* proc ctxt base idx */ + WORD32 proc_ctxt_select = ctxt_sel * MAX_PROCESS_THREADS; + + /* proc ctxt */ + process_ctxt_t *ps_proc = &ps_codec->as_process[proc_ctxt_select]; + + /* receive output back from codec */ + s_out_buf = ps_codec->as_out_buf[ctxt_sel]; + + /* send the output to app */ + ps_video_encode_op->s_ive_op.output_present = 1; + ps_video_encode_op->s_ive_op.dump_recon = 1; + ps_video_encode_op->s_ive_op.s_out_buf = s_out_buf.s_bits_buf; + ps_video_encode_op->s_ive_op.u4_error_code = IV_SUCCESS; + + /* receive input back from codec */ + s_inp_buf = ps_proc->s_inp_buf; + + /* send the input to app */ + ps_video_encode_op->s_ive_op.s_inp_buf = s_inp_buf.s_raw_buf; + + if (ps_codec->s_cfg.u4_enable_recon && + ps_codec->s_rate_control.pre_encode_skip[ctxt_sel] == 0) + { + /* error status */ + IH264_ERROR_T ret = IH264_SUCCESS; + + /* recon buffer */ + rec_buf_t *ps_rec_buf = &ps_codec->as_rec_buf[ctxt_sel]; + + ps_video_encode_op->s_ive_op.s_recon_buf = ps_video_encode_ip->s_ive_ip.s_recon_buf; + + /* copy/convert the recon buffer and return */ + ih264e_fmt_conv(ps_codec, &ps_rec_buf->s_pic_buf, + ps_video_encode_ip->s_ive_ip.s_recon_buf.apv_bufs[0], + ps_video_encode_ip->s_ive_ip.s_recon_buf.apv_bufs[1], + ps_video_encode_ip->s_ive_ip.s_recon_buf.apv_bufs[2], + ps_video_encode_ip->s_ive_ip.s_recon_buf.au4_wd[0], + ps_video_encode_ip->s_ive_ip.s_recon_buf.au4_wd[1], + 0, + ps_codec->s_cfg.u4_disp_ht); + + ret = ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_rec_buf->s_pic_buf.i4_buf_id, BUF_MGR_IO); + if (IH264_SUCCESS != ret) + { + SET_ERROR_ON_RETURN((IH264E_ERROR_T)ret, + IVE_FATALERROR, + ps_video_encode_op->s_ive_op.u4_error_code, + IV_FAIL); + } + } + + /* release buffers from ref list */ + if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel] == 1) + { + /* pic info */ + pic_buf_t *ps_cur_pic; + + /* mv info */ + mv_buf_t *ps_cur_mv_buf; + + /* error status */ + IH264_ERROR_T ret = IH264_SUCCESS; + + /* Decrement coded pic count */ + ps_codec->i4_coded_pic_cnt--; + + /* loop through to get the min pic cnt among the list of pics stored in ref list */ + /* since the skipped frame may not be on reference list, we may not have an MV bank + * hence free only if we have allocated */ + for (i = 0; i < ps_codec->i4_ref_buf_cnt; i++) + { + if (ps_codec->i4_pic_cnt == ps_codec->as_ref_set[i].i4_pic_cnt) + { + ps_codec->as_ref_set[i].i4_pic_cnt = -1; + ps_codec->as_ref_set[i].i4_poc = -1; + + ps_cur_pic = ps_codec->as_ref_set[i].ps_pic_buf; + + ps_cur_mv_buf = ps_codec->as_ref_set[i].ps_mv_buf; + + /* release this frame from reference list */ + ret = ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr, ps_cur_mv_buf->i4_buf_id , BUF_MGR_REF); + SET_ERROR_ON_RETURN((IH264E_ERROR_T)ret, + IVE_FATALERROR, + ps_video_encode_op->s_ive_op.u4_error_code, + IV_FAIL); + + ret = ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_cur_pic->i4_buf_id , BUF_MGR_REF); + SET_ERROR_ON_RETURN((IH264E_ERROR_T)ret, + IVE_FATALERROR, + ps_video_encode_op->s_ive_op.u4_error_code, + IV_FAIL); + break; + } + } + } + + if ((ps_codec->s_rate_control.post_encode_skip[ctxt_sel] == 1) || + (ps_codec->s_rate_control.pre_encode_skip[ctxt_sel] == 1)) + { + ps_video_encode_op->s_ive_op.dump_recon = 0; + } + else + { + /* set output pic type */ + if (ps_codec->i4_slice_type == PSLICE) + { + ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_P_FRAME; + } + else if (ps_codec->i4_slice_type == ISLICE && ps_codec->u4_is_idr != 1) + { + ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_I_FRAME; + } + else + { + ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_IDR_FRAME; + } + } + + /* loop through to get the error status */ + for (i = 0; i < (WORD32)ps_codec->s_cfg.u4_num_cores; i++) + { + error_status |= ps_codec->as_process[ctxt_sel + i].i4_error_code; + } + SET_ERROR_ON_RETURN(error_status, + IVE_FATALERROR, + ps_video_encode_op->s_ive_op.u4_error_code, + IV_FAIL); + } + + if (1 == s_inp_buf.u4_is_last) + { + ps_video_encode_op->s_ive_op.output_present = 0; + ps_video_encode_op->s_ive_op.dump_recon = 0; + } + + return IV_SUCCESS; +} diff --git a/encoder/ih264e_encode_header.c b/encoder/ih264e_encode_header.c new file mode 100755 index 0000000..67e5409 --- /dev/null +++ b/encoder/ih264e_encode_header.c @@ -0,0 +1,1187 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +******************************************************************************* +* @file +* ih264e_encode_header.c +* +* @brief +* This file contains function definitions related to header encoding. +* +* @author +* ittiam +* +* @par List of Functions: +* - ih264e_generate_nal_unit_header() +* - ih264e_generate_sps() +* - ih264e_generate_pps() +* - ih264e_generate_slice_header() +* - ih264e_get_level() +* - ih264e_populate_sps() +* - ih264e_populate_pps() +* - ih264e_populate_slice_header() +* - ih264e_add_filler_nal_unit() +* +* @remarks +* None +* +******************************************************************************* +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> + +/* User Include Files */ +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" +#include "ih264e.h" +#include "ithread.h" +#include "ih264e_config.h" +#include "ih264e_trace.h" +#include "ih264_typedefs.h" +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "ih264_debug.h" +#include "ih264_defs.h" +#include "ime_distortion_metrics.h" +#include "ime_structs.h" +#include "ih264_defs.h" +#include "ih264_error.h" +#include "ih264_structs.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include "ih264_inter_pred_filters.h" +#include "ih264_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" +#include "ih264e_defs.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "ih264e_rate_control.h" +#include "ih264e_structs.h" +#include "ih264e_encode_header.h" +#include "ih264_common_tables.h" +#include "ih264_macros.h" + + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + +/** +****************************************************************************** +* +* @brief Generate nal unit header in the stream as per section 7.4.1 +* +* @par Description +* Inserts Nal unit header syntax as per section 7.4.1 +* +* @param[inout] ps_bitstrm +* pointer to bitstream context (handle) +* +* @param[in] nal_unit_type +* nal type to be inserted +* +* @param[in] nal_ref_idc +* nal ref idc to be inserted +* +* @return success or failure error code +* +****************************************************************************** +*/ +static WORD32 ih264e_generate_nal_unit_header(bitstrm_t *ps_bitstrm, + WORD32 nal_unit_type, + WORD32 nal_ref_idc) +{ + WORD32 return_status = IH264E_SUCCESS; + + /* sanity checks */ + ASSERT((nal_unit_type > 0) && (nal_unit_type < 32)); + + /* forbidden_zero_bit + nal_ref_idc + nal_unit_type */ + PUT_BITS(ps_bitstrm, + ((nal_ref_idc << 5) + nal_unit_type), + (1+2+5), /*1 forbidden zero bit + 2 nal_ref_idc + 5 nal_unit_type */ + return_status, + "nal_unit_header"); + + return(return_status); +} + +/** +****************************************************************************** +* +* @brief Generates SPS (Sequence Parameter Set) +* +* @par Description +* This function generates Sequence Parameter Set header as per the spec +* +* @param[in] ps_bitstrm +* pointer to bitstream context (handle) +* +* @param[in] ps_sps +* pointer to structure containing SPS data +* +* @return success or failure error code +* +****************************************************************************** +*/ +WORD32 ih264e_generate_sps(bitstrm_t *ps_bitstrm, sps_t *ps_sps) +{ + WORD32 return_status = IH264E_SUCCESS; + WORD32 i; + WORD8 i1_nal_unit_type = 7; + WORD8 i1_nal_ref_idc = 3; + + /* Insert Start Code */ + return_status |= ih264e_put_nal_start_code_prefix(ps_bitstrm, 1); + + /* Insert Nal Unit Header */ + return_status |= ih264e_generate_nal_unit_header(ps_bitstrm, i1_nal_unit_type, i1_nal_ref_idc); + + /* profile_idc */ + PUT_BITS(ps_bitstrm, ps_sps->u1_profile_idc, 8, return_status, "profile_idc"); + + /* constrained_set_flags */ + PUT_BITS(ps_bitstrm, ps_sps->u1_constraint_set0_flag, 1, return_status, "constrained_set0_flag"); + PUT_BITS(ps_bitstrm, ps_sps->u1_constraint_set1_flag, 1, return_status, "constrained_set1_flag"); + PUT_BITS(ps_bitstrm, ps_sps->u1_constraint_set2_flag, 1, return_status, "constrained_set2_flag"); + PUT_BITS(ps_bitstrm, ps_sps->u1_constraint_set3_flag, 1, return_status, "constrained_set3_flag"); + + /* reserved_zero_four_bits */ + PUT_BITS(ps_bitstrm, 0, 4, return_status, "reserved_zero_four_bits"); + + /* level_idc */ + PUT_BITS(ps_bitstrm, ps_sps->u1_level_idc, 8, return_status, "level_idc"); + + /* seq_parameter_set_id */ + PUT_BITS_UEV(ps_bitstrm, ps_sps->u1_sps_id, return_status, "seq_parameter_set_id"); + + if (ps_sps->u1_profile_idc >= IH264_PROFILE_HIGH) + { + /* chroma_format_idc */ + PUT_BITS_UEV(ps_bitstrm, ps_sps->u1_chroma_format_idc, return_status, "chroma_format_idc"); + + if (ps_sps->u1_chroma_format_idc == CHROMA_FMT_IDC_YUV444) + { + /* i1_residual_colour_transform_flag */ + PUT_BITS(ps_bitstrm, ps_sps->i1_residual_colour_transform_flag, 1, return_status, "i1_residual_colour_transform_flag"); + } + + /* bit_depth_luma_minus8 */ + PUT_BITS_UEV(ps_bitstrm, (ps_sps->i1_bit_depth_luma - 8), return_status, "bit_depth_luma_minus8"); + + /* bit_depth_chroma_minus8 */ + PUT_BITS_UEV(ps_bitstrm, (ps_sps->i1_bit_depth_chroma - 8), return_status, "bit_depth_chroma_minus8"); + + /* qpprime_y_zero_transform_bypass_flag */ + PUT_BITS(ps_bitstrm, ps_sps->i1_qpprime_y_zero_transform_bypass_flag, 1, return_status, "qpprime_y_zero_transform_bypass_flag"); + + /* seq_scaling_matrix_present_flag */ + PUT_BITS(ps_bitstrm, ps_sps->i1_seq_scaling_matrix_present_flag, 1, return_status, "seq_scaling_matrix_present_flag"); + + /* seq_scaling_list */ + if (ps_sps->i1_seq_scaling_matrix_present_flag) + { + /* TODO_LATER: Will be enabled once scaling list support is added */ + } + } + + /* log2_max_frame_num_minus4 */ + PUT_BITS_UEV(ps_bitstrm, (ps_sps->i1_log2_max_frame_num - 4), return_status, "log2_max_frame_num_minus4"); + + /* pic_order_cnt_type */ + PUT_BITS_UEV(ps_bitstrm, ps_sps->i1_pic_order_cnt_type, return_status, "pic_order_cnt_type"); + + if (ps_sps->i1_pic_order_cnt_type == 0) + { + /* log2_max_pic_order_cnt_lsb_minus4 */ + PUT_BITS_UEV(ps_bitstrm, (ps_sps->i1_log2_max_pic_order_cnt_lsb - 4), return_status, "log2_max_pic_order_cnt_lsb_minus4"); + } + else if (ps_sps->i1_pic_order_cnt_type == 1) + { + /* delta_pic_order_always_zero_flag */ + PUT_BITS(ps_bitstrm, ps_sps->i1_delta_pic_order_always_zero_flag, 1, return_status, "delta_pic_order_always_zero_flag"); + + /* offset_for_non_ref_pic */ + PUT_BITS_SEV(ps_bitstrm, ps_sps->i4_offset_for_non_ref_pic, return_status, "offset_for_non_ref_pic"); + + /* offset_for_top_to_bottom_field */ + PUT_BITS_SEV(ps_bitstrm, ps_sps->i4_offset_for_top_to_bottom_field, return_status, "offset_for_top_to_bottom_field"); + + /* num_ref_frames_in_pic_order_cnt_cycle */ + PUT_BITS_UEV(ps_bitstrm, ps_sps->u1_num_ref_frames_in_pic_order_cnt_cycle, return_status, "num_ref_frames_in_pic_order_cnt_cycle"); + + /* Offset for ref frame */ + for (i=0; i<ps_sps->u1_num_ref_frames_in_pic_order_cnt_cycle; i++) + { + /* offset_for_ref_frame */ + PUT_BITS_SEV(ps_bitstrm, ps_sps->ai4_offset_for_ref_frame[i], return_status, "offset_for_ref_frame"); + } + } + + /* num_ref_frames */ + PUT_BITS_UEV(ps_bitstrm, ps_sps->u1_max_num_ref_frames, return_status, "num_ref_frames"); + + /* gaps_in_frame_num_value_allowed_flag */ + PUT_BITS(ps_bitstrm, ps_sps->i1_gaps_in_frame_num_value_allowed_flag, 1, return_status, "gaps_in_frame_num_value_allowed_flag"); + + /* pic_width_in_mbs_minus1 */ + PUT_BITS_UEV(ps_bitstrm, ps_sps->i2_pic_width_in_mbs_minus1, return_status, "pic_width_in_mbs_minus1"); + + /* pic_height_in_map_units_minus1 */ + PUT_BITS_UEV(ps_bitstrm, ps_sps->i2_pic_height_in_map_units_minus1, return_status, "pic_height_in_map_units_minus1"); + + /* frame_mbs_only_flag */ + PUT_BITS(ps_bitstrm, ps_sps->i1_frame_mbs_only_flag, 1, return_status, "frame_mbs_only_flag"); + + if (!ps_sps->i1_frame_mbs_only_flag) + { + /* mb_adaptive_frame_field_flag */ + PUT_BITS(ps_bitstrm, ps_sps->i1_mb_adaptive_frame_field_flag, 1, return_status, "mb_adaptive_frame_field_flag"); + } + + /* direct_8x8_inference_flag */ + PUT_BITS(ps_bitstrm, ps_sps->i1_direct_8x8_inference_flag, 1, return_status, "direct_8x8_inference_flag"); + + /* frame_cropping_flag */ + PUT_BITS(ps_bitstrm, ps_sps->i1_frame_cropping_flag, 1, return_status, "frame_cropping_flag"); + + if (ps_sps->i1_frame_cropping_flag) + { + /* frame_crop_left_offset */ + PUT_BITS_UEV(ps_bitstrm, ps_sps->i2_frame_crop_left_offset, return_status, "frame_crop_left_offset"); + + /* frame_crop_right_offset */ + PUT_BITS_UEV(ps_bitstrm, ps_sps->i2_frame_crop_right_offset, return_status, "frame_crop_right_offset"); + + /* frame_crop_top_offset */ + PUT_BITS_UEV(ps_bitstrm, ps_sps->i2_frame_crop_top_offset, return_status, "frame_crop_top_offset"); + + /* frame_crop_bottom_offset */ + PUT_BITS_UEV(ps_bitstrm, ps_sps->i2_frame_crop_bottom_offset, return_status, "frame_crop_bottom_offset"); + } + + /* vui_parameters_present_flag */ + PUT_BITS(ps_bitstrm, ps_sps->i1_vui_parameters_present_flag, 1, return_status, "vui_parameters_present_flag"); + + if (ps_sps->i1_vui_parameters_present_flag) + { + /* Add vui parameters to the bitstream */; + } + + /* rbsp trailing bits */ + return_status |= ih264e_put_rbsp_trailing_bits(ps_bitstrm); + + return return_status; +} + +/** +****************************************************************************** +* +* @brief Generates PPS (Picture Parameter Set) +* +* @par Description +* Generate Picture Parameter Set as per Section 7.3.2.2 +* +* @param[in] ps_bitstrm +* pointer to bitstream context (handle) +* +* @param[in] ps_pps +* pointer to structure containing PPS data +* +* @return success or failure error code +* +****************************************************************************** +*/ +WORD32 ih264e_generate_pps(bitstrm_t *ps_bitstrm, pps_t *ps_pps, sps_t *ps_sps) +{ + WORD32 return_status = IH264E_SUCCESS; + + /* Insert the NAL start code */ + return_status |= ih264e_put_nal_start_code_prefix(ps_bitstrm, 1); + + /* Insert Nal Unit Header */ + PUT_BITS(ps_bitstrm, NAL_PPS_FIRST_BYTE, 8, return_status, "pps_header"); + + /* pic_parameter_set_id */ + PUT_BITS_UEV(ps_bitstrm, ps_pps->u1_pps_id, return_status, "pic_parameter_set_id"); + + /* seq_parameter_set_id */ + PUT_BITS_UEV(ps_bitstrm, ps_pps->u1_sps_id, return_status, "seq_parameter_set_id"); + + /* Entropy coding : 0-VLC; 1 - CABAC */ + PUT_BITS(ps_bitstrm, ps_pps->u1_entropy_coding_mode_flag, 1, return_status, "Entropy coding : 0-VLC; 1 - CABAC"); + + /* Pic order present flag */ + PUT_BITS(ps_bitstrm, ps_pps->u1_pic_order_present_flag, 1, return_status, "Pic order present flag"); + + /* Number of slice groups */ + PUT_BITS_UEV(ps_bitstrm, ps_pps->u1_num_slice_groups - 1, return_status, "Number of slice groups"); + + if (ps_pps->u1_num_slice_groups > 1) + { + /* TODO_LATER: Currently the number of slice groups minus 1 is 0. + * If this is not the case, we have to add Slice group map type to the bit stream*/ + } + + /* num_ref_idx_l0_default_active_minus1 */ + PUT_BITS_UEV(ps_bitstrm, ps_pps->i1_num_ref_idx_l0_default_active - 1, return_status, "num_ref_idx_l0_default_active_minus1"); + + /* num_ref_idx_l1_default_active_minus1 */ + PUT_BITS_UEV(ps_bitstrm, ps_pps->i1_num_ref_idx_l1_default_active - 1, return_status, "num_ref_idx_l1_default_active_minus1"); + + /* weighted_pred_flag */ + PUT_BITS(ps_bitstrm, ps_pps->i1_weighted_pred_flag, 1, return_status, "weighted_pred_flag"); + + /* weighted_bipred_flag */ + PUT_BITS(ps_bitstrm, ps_pps->i1_weighted_bipred_idc, 2, return_status, "weighted_bipred_idc"); + + /* pic_init_qp_minus26 */ + PUT_BITS_SEV(ps_bitstrm, ps_pps->i1_pic_init_qp - 26, return_status, "pic_init_qp_minus26"); + + /* pic_init_qs_minus26 */ + PUT_BITS_SEV(ps_bitstrm, ps_pps->i1_pic_init_qs - 26, return_status, "pic_init_qs_minus26"); + + /* chroma_qp_index_offset */ + PUT_BITS_SEV(ps_bitstrm, ps_pps->i1_chroma_qp_index_offset, return_status, "chroma_qp_index_offset"); + + /* deblocking_filter_control_present_flag */ + PUT_BITS(ps_bitstrm, ps_pps->i1_deblocking_filter_control_present_flag, 1, return_status, "deblocking_filter_control_present_flag"); + + /* constrained_intra_pred_flag */ + PUT_BITS(ps_bitstrm, ps_pps->i1_constrained_intra_pred_flag, 1, return_status, "constrained_intra_pred_flag"); + + /*redundant_pic_cnt_present_flag */ + PUT_BITS(ps_bitstrm, ps_pps->i1_redundant_pic_cnt_present_flag, 1, return_status, "redundant_pic_cnt_present_flag"); + + if (ps_sps->u1_profile_idc >= IH264_PROFILE_HIGH) + { + /* transform_8x8_mode_flag */ + PUT_BITS(ps_bitstrm, ps_pps->i1_transform_8x8_mode_flag, 1, return_status, "transform_8x8_mode_flag"); + + /* pic_scaling_matrix_present_flag */ + PUT_BITS(ps_bitstrm, ps_pps->i1_pic_scaling_matrix_present_flag, 1, return_status, "pic_scaling_matrix_present_flag"); + + if(ps_pps->i1_pic_scaling_matrix_present_flag) + { + /* TODO_LATER: Will be enabled once scaling list support is added */ + } + + /* Second chroma QP offset */ + PUT_BITS_SEV(ps_bitstrm, ps_pps->i1_second_chroma_qp_index_offset, return_status, "Second chroma QP offset"); + } + + return_status |= ih264e_put_rbsp_trailing_bits(ps_bitstrm); + + return return_status; +} + +/** +****************************************************************************** +* +* @brief Generates Slice Header +* +* @par Description +* Generate Slice Header as per Section 7.3.5.1 +* +* @param[inout] ps_bitstrm +* pointer to bitstream context for generating slice header +* +* @param[in] ps_slice_hdr +* pointer to slice header params +* +* @param[in] ps_pps +* pointer to pps params referred by slice +* +* @param[in] ps_sps +* pointer to sps params referred by slice +* +* @param[out] ps_dup_bit_strm_ent_offset +* Bitstream struct to store bitstream state +* +* @param[out] pu4_first_slice_start_offset +* first slice offset is returned +* +* @return success or failure error code +* +****************************************************************************** +*/ +WORD32 ih264e_generate_slice_header(bitstrm_t *ps_bitstrm, + slice_header_t *ps_slice_hdr, + pps_t *ps_pps, + sps_t *ps_sps) +{ + + WORD32 return_status = IH264E_SUCCESS; + + /* Insert start code */ + return_status |= ih264e_put_nal_start_code_prefix(ps_bitstrm, 1); + + /* Insert Nal Unit Header */ + return_status |= ih264e_generate_nal_unit_header(ps_bitstrm, ps_slice_hdr->i1_nal_unit_type, ps_slice_hdr->i1_nal_unit_idc); + + /* first_mb_in_slice */ + PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->u2_first_mb_in_slice, return_status, "first_mb_in_slice"); + + /* slice_type */ + PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->u1_slice_type, return_status, "slice_type"); + + /* pic_parameter_set_id */ + PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->u1_pps_id, return_status, "pic_parameter_set_id"); + + /* frame_num */ + PUT_BITS(ps_bitstrm, ps_slice_hdr->i4_frame_num, ps_sps->i1_log2_max_frame_num, return_status, "frame_num"); + + if (!ps_sps->i1_frame_mbs_only_flag) + { + /* field_pic_flag */ + PUT_BITS(ps_bitstrm, ps_slice_hdr->i1_field_pic_flag, 1, return_status, "field_pic_flag"); + + if(ps_slice_hdr->i1_field_pic_flag) + { + /* bottom_field_flag */ + PUT_BITS(ps_bitstrm, ps_slice_hdr->i1_bottom_field_flag, 1, return_status, "bottom_field_flag"); + } + } + + if (ps_slice_hdr->i1_nal_unit_type == 5) + { + /* u2_idr_pic_id */ + PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->u2_idr_pic_id, return_status, "u2_idr_pic_id"); + } + + if (ps_sps->i1_pic_order_cnt_type == 0) + { + /* pic_order_cnt_lsb */ + PUT_BITS(ps_bitstrm, ps_slice_hdr->i4_pic_order_cnt_lsb, ps_sps->i1_log2_max_pic_order_cnt_lsb, return_status, "pic_order_cnt_lsb"); + + if(ps_pps->u1_pic_order_present_flag && !ps_slice_hdr->i1_field_pic_flag) + { + /* delta_pic_order_cnt_bottom */ + PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->i4_delta_pic_order_cnt_bottom, return_status, "delta_pic_order_cnt_bottom"); + } + } + + if (ps_sps->i1_pic_order_cnt_type == 1 && !ps_sps->i1_delta_pic_order_always_zero_flag) + { + /* delta_pic_order_cnt[0] */ + PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->ai4_delta_pic_order_cnt[0], return_status, "delta_pic_order_cnt[0]"); + + if (ps_pps->u1_pic_order_present_flag && !ps_slice_hdr->i1_field_pic_flag) + { + /* delta_pic_order_cnt[1] */ + PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->ai4_delta_pic_order_cnt[1], return_status, "delta_pic_order_cnt[1]"); + } + } + + if (ps_pps->i1_redundant_pic_cnt_present_flag) + { + /* redundant_pic_cnt */ + PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->u1_redundant_pic_cnt, return_status, "redundant_pic_cnt"); + } + + if (ps_slice_hdr->u1_slice_type == BSLICE) + { + /* direct_spatial_mv_pred_flag */ + PUT_BITS(ps_bitstrm, ps_slice_hdr->u1_direct_spatial_mv_pred_flag, 1, return_status, "direct_spatial_mv_pred_flag"); + } + + if (ps_slice_hdr->u1_slice_type == PSLICE || ps_slice_hdr->u1_slice_type == SPSLICE || ps_slice_hdr->u1_slice_type == BSLICE) + { + /* num_ref_idx_active_override_flag */ + PUT_BITS(ps_bitstrm, ps_slice_hdr->u1_num_ref_idx_active_override_flag, 1, return_status, "num_ref_idx_active_override_flag"); + + if (ps_slice_hdr->u1_num_ref_idx_active_override_flag) + { + /* num_ref_idx_l0_active_minus1 */ + PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->i1_num_ref_idx_l0_active - 1, return_status, "num_ref_idx_l0_active_minus1"); + } + if (ps_slice_hdr->u1_slice_type == BSLICE) + { + /* num_ref_idx_l1_active_minus1 */ + PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->i1_num_ref_idx_l1_active - 1, return_status, "num_ref_idx_l1_active_minus1"); + } + } + + /* ref_idx_reordering */ + /* TODO: ref_idx_reordering */ + if ((ps_slice_hdr->u1_slice_type != ISLICE) && (ps_slice_hdr->u1_slice_type != SISLICE)) + { + /* ref_pic_list_reordering_flag_l0 */ + PUT_BITS(ps_bitstrm, ps_slice_hdr->u1_ref_idx_reordering_flag_l0, 1, return_status, "ref_pic_list_reordering_flag_l0"); + + if (ps_slice_hdr->u1_ref_idx_reordering_flag_l0) + { + + } + } + + if ((ps_pps->i1_weighted_pred_flag && + (ps_slice_hdr->u1_slice_type == PSLICE || ps_slice_hdr->u1_slice_type == SPSLICE)) || + (ps_slice_hdr->u1_weighted_bipred_idc == 1 && ps_slice_hdr->u1_slice_type == BSLICE)) + { + /* TODO_LATER: Currently there is no support for weighted prediction. + This needs to be updated when the support is added */ + } + + if (ps_slice_hdr->i1_nal_unit_idc != 0) + { + if (ps_slice_hdr->i1_nal_unit_type == 5) + { + /* no_output_of_prior_pics_flag */ + PUT_BITS(ps_bitstrm, ps_slice_hdr->u1_no_output_of_prior_pics_flag , 1, return_status, "no_output_of_prior_pics_flag "); + + /* long_term_reference_flag */ + PUT_BITS(ps_bitstrm, ps_slice_hdr->u1_long_term_reference_flag , 1, return_status, "long_term_reference_flag "); + } + else + { + /* adaptive_ref_pic_marking_mode_flag */ + PUT_BITS(ps_bitstrm, ps_slice_hdr->u1_adaptive_ref_pic_marking_mode_flag , 1, return_status, "adaptive_ref_pic_marking_mode_flag "); + + if (ps_slice_hdr->u1_adaptive_ref_pic_marking_mode_flag) + { + /* TODO: if the reference picture marking mode is adaptive + add these fields in the bit-stream */ + } + } + } + + if (ps_slice_hdr->u1_entropy_coding_mode_flag && ps_slice_hdr->u1_slice_type != ISLICE && + ps_slice_hdr->u1_slice_type != SISLICE) + { + /* cabac_init_idc */ + PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->i1_cabac_init_idc, return_status, "cabac_init_idc"); + } + + /* slice_qp_delta */ + PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->i1_slice_qp - ps_pps->i1_pic_init_qp, return_status, "slice_qp_delta"); + + if (ps_slice_hdr->u1_slice_type == SPSLICE || ps_slice_hdr->u1_slice_type == SISLICE) + { + if (ps_slice_hdr->u1_slice_type == SPSLICE) + { + /* sp_for_switch_flag */ + PUT_BITS(ps_bitstrm, ps_slice_hdr->u1_sp_for_switch_flag , 1, return_status, "sp_for_switch_flag"); + } + /* slice_qs_delta */ + PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->u1_slice_qs - ps_pps->i1_pic_init_qs, return_status, "slice_qs_delta"); + } + + if (ps_pps->i1_deblocking_filter_control_present_flag) + { + /* disable_deblocking_filter_idc */ + PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->u1_disable_deblocking_filter_idc, return_status, "disable_deblocking_filter_idc"); + + if(ps_slice_hdr->u1_disable_deblocking_filter_idc != 1) + { + /* slice_alpha_c0_offset_div2 */ + PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->i1_slice_alpha_c0_offset_div2, return_status, "slice_alpha_c0_offset_div2"); + + /* slice_beta_offset_div2 */ + PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->i1_slice_beta_offset_div2, return_status, "slice_beta_offset_div2"); + } + } + + if (ps_slice_hdr->u1_num_slice_groups_minus1 > 0 && + ps_pps->u1_slice_group_map_type >= 3 && + ps_pps->u1_slice_group_map_type <= 5) + { + /* slice_group_change_cycle */ + /* TODO_LATER: Currently the number of slice groups minus 1 is 0. + * If this is not the case, we have to add Slice group map type to the bit stream */ + } + + return return_status; +} + + + +/** +****************************************************************************** +* +* @brief Populates sps structure +* +* @par Description +* Populates sps structure for its use in header generation +* +* @param[in] ps_codec +* pointer to encoder context +* +* @param[out] ps_sps +* pointer to sps params that needs to be populated +* +* @return success or failure error code +* +****************************************************************************** +*/ +IH264E_ERROR_T ih264e_populate_sps(codec_t *ps_codec, sps_t *ps_sps) +{ + /* active config parameters */ + cfg_params_t *ps_cfg = &(ps_codec->s_cfg); + +// /* level */ +// IH264_LEVEL_T level_idc; + + /* error_status */ + IH264E_ERROR_T i4_err_code = IH264E_FAIL; + + /* profile */ + /* + * Baseline profile supports, 8 bits per sample, 4:2:0 format, CAVLC. + * B frames are not allowed. Further, Flexible mb ordering, Redundant slices, Arbitrary slice ordering are supported. + * The constrained baseline profile is baseline profile minus ASO, FMO and redundant slices. + * To the constrained baseline profile if we add support for B slices, support for encoding interlaced frames, + * support for weighted prediction and introduce CABAC entropy coding then we have Main Profile. + */ + if ((ps_cfg->u4_num_b_frames) || (ps_cfg->e_content_type != IV_PROGRESSIVE) || + (ps_cfg->u4_entropy_coding_mode == CABAC) || (ps_cfg->u4_weighted_prediction)) + { + ps_sps->u1_profile_idc = IH264_PROFILE_MAIN; + } + else + { + ps_sps->u1_profile_idc = IH264_PROFILE_BASELINE; + } + + /* level */ + ps_sps->u1_level_idc = ps_cfg->u4_max_level; +// i4_err_code = ih264e_get_level(ps_cfg, &level_idc); +// if (i4_err_code == IH264E_SUCCESS) +// { +// ps_sps->u1_level_idc = level_idc; +// +// } +// else +// { +// return i4_err_code; +// } + + /* constrained flags */ + /* + * baseline profile automatically implies set 0 flag + */ + ps_sps->u1_constraint_set0_flag = (ps_sps->u1_profile_idc == IH264_PROFILE_BASELINE); + /* + * main profile automatically implies set 1 flag + * Although the encoder says it supports Baseline profile it actually supports constrained + * baseline profile as ASO, FMO and redundant slices are not supported + */ + ps_sps->u1_constraint_set1_flag = (ps_sps->u1_profile_idc <= IH264_PROFILE_MAIN); + /* + * extended profile is not supported + */ + ps_sps->u1_constraint_set2_flag = 0x00; + /* + * level 1b or level 11 + */ + if (ps_sps->u1_level_idc == IH264_LEVEL_1B) + { + ps_sps->u1_constraint_set3_flag = 0; + ps_sps->u1_level_idc = IH264_LEVEL_11; + } + else + { + ps_sps->u1_constraint_set3_flag = 0; + } + + /* active sps id */ + ps_sps->u1_sps_id = ps_codec->i4_sps_id; + + if (ps_sps->u1_profile_idc >= IH264_PROFILE_HIGH) + { + /* chroma format idc */ + ps_sps->u1_chroma_format_idc = CHROMA_FMT_IDC_YUV420; + + /* residual_colour_transform_flag */ + ps_sps->i1_residual_colour_transform_flag = 0; + + /* luma bit depth 8 */ + ps_sps->i1_bit_depth_luma = 8; + + /* chroma bit depth 8 */ + ps_sps->i1_bit_depth_chroma = 8; + + /* qpprime_y_zero_transform_bypass_flag */ + ps_sps->i1_qpprime_y_zero_transform_bypass_flag = 0; + + /* seq_scaling_matrix_present_flag */ + ps_sps->i1_seq_scaling_matrix_present_flag = 0; + + if (ps_sps->i1_seq_scaling_matrix_present_flag) + { + /* TODO_LATER: Will be enabled once scaling list support is added */ + } + } + + /* log2_max_frame_num_minus4 */ + ps_sps->i1_log2_max_frame_num = 16; + + /* pic_order_cnt_type */ + ps_sps->i1_pic_order_cnt_type = 2; + + if(ps_cfg->u4_enable_alt_ref) + ps_sps->i1_pic_order_cnt_type = 0; + + /* log2_max_pic_order_cnt_lsb_minus4 */ + ps_sps->i1_log2_max_pic_order_cnt_lsb = 8; + + /* TODO : add support for other poc types */ + if (ps_sps->i1_pic_order_cnt_type == 0) + { + + } + else if (ps_sps->i1_pic_order_cnt_type == 1) + { + + } + + /* num_ref_frames */ + /* FIXME : Fix this hard coding */ + ps_sps->u1_max_num_ref_frames = 1; + + /* gaps_in_frame_num_value_allowed_flag */ + ps_sps->i1_gaps_in_frame_num_value_allowed_flag = 0; + + /* pic width in mb - 1 */ + ps_sps->i2_pic_width_in_mbs_minus1 = ps_cfg->i4_wd_mbs - 1; + + /* pic height in mb - 1 */ + ps_sps->i2_pic_height_in_map_units_minus1 = ps_cfg->i4_ht_mbs - 1;; + + /* frame_mbs_only_flag, no support for interlace encoding */ + ps_sps->i1_frame_mbs_only_flag = 1; + + /* mb_adaptive_frame_field_flag */ + if (ps_sps->i1_frame_mbs_only_flag == 0) + { + ps_sps->i1_mb_adaptive_frame_field_flag = 0; + } + + /* direct_8x8_inference_flag */ + ps_sps->i1_direct_8x8_inference_flag = 0; + + /* cropping params */ + /*NOTE : Cropping values depend on the chroma format + * For our case ,decoder interprets the cropping values as 2*num pixels + * Hence the difference in the disp width and width must be halved before sending + * to get the expected results + */ + ps_sps->i1_frame_cropping_flag = 0; + ps_sps->i2_frame_crop_left_offset = 0; + ps_sps->i2_frame_crop_right_offset = (ps_codec->s_cfg.u4_wd - ps_codec->s_cfg.u4_disp_wd)>>1; + ps_sps->i2_frame_crop_top_offset = 0; + ps_sps->i2_frame_crop_bottom_offset = (ps_codec->s_cfg.u4_ht - ps_codec->s_cfg.u4_disp_ht)>>1; + + if (ps_sps->i2_frame_crop_left_offset || + ps_sps->i2_frame_crop_right_offset || + ps_sps->i2_frame_crop_top_offset || + ps_sps->i2_frame_crop_bottom_offset) + { + ps_sps->i1_frame_cropping_flag = 1; + } + + /* vui params */ + ps_sps->i1_vui_parameters_present_flag = 0; + + if (ps_sps->i1_vui_parameters_present_flag) + { + /* populate vui params */ + } + + return i4_err_code; +} + +/** +****************************************************************************** +* +* @brief Populates pps structure +* +* @par Description +* Populates pps structure for its use in header generation +* +* @param[in] ps_codec +* pointer to encoder context +* +* @param[out] ps_pps +* pointer to pps params that needs to be populated +* +* @return success or failure error code +* +****************************************************************************** +*/ +IH264E_ERROR_T ih264e_populate_pps(codec_t *ps_codec, pps_t *ps_pps) +{ + /* active config parameters */ + cfg_params_t *ps_cfg = &(ps_codec->s_cfg); + + /* seq_parameter_set_id */ + ps_pps->u1_sps_id = ps_codec->i4_sps_id; + + /* pic_parameter_set_id */ + ps_pps->u1_pps_id = ps_codec->i4_pps_id; + + /* entropy_coding_mode */ + ps_pps->u1_entropy_coding_mode_flag = ps_cfg->u4_entropy_coding_mode; + + /* pic_order_present_flag is unset for POC type 2 */ + ps_pps->u1_pic_order_present_flag = 0; + + /* Currently number of slice groups supported are 1 */ + ps_pps->u1_num_slice_groups = 1; + + if (ps_pps->u1_num_slice_groups - 1) + { + /* TODO_LATER: Currently the number of slice groups minus 1 is 0. + * If this is not the case, we have to add Slice group map type to the bit stream*/ + } + + /* number of reference frames for list 0 */ + /* FIXME : fix this hard coded value */ + ps_pps->i1_num_ref_idx_l0_default_active = 1; + + /* number of reference frames for list 1 */ + ps_pps->i1_num_ref_idx_l1_default_active = 1; + + /* weighted prediction for now is disabled */ + ps_pps->i1_weighted_pred_flag = 0; + ps_pps->i1_weighted_bipred_idc = 0; + + /* The intent is to not signal qp from pps. Rather send the same in slice headers */ + ps_pps->i1_pic_init_qp = 0; + + /* The intent is to not signal qp from pps. Rather send the same in slice headers */ + ps_pps->i1_pic_init_qs = 0; + + /* The intent is to not signal qp from pps. Rather send the same in slice headers */ + ps_pps->i1_chroma_qp_index_offset = 0; + + /* deblocking filter flags present in slice header */ + ps_pps->i1_deblocking_filter_control_present_flag = 1; + + /* constrained intra prediction */ + ps_pps->i1_constrained_intra_pred_flag = ps_cfg->u4_constrained_intra_pred; + + /* sending redundant slices is not supported for now */ + ps_pps->i1_redundant_pic_cnt_present_flag = 0; + + ps_pps->u1_slice_group_map_type = 0; + return IH264E_SUCCESS; +} + +/** +****************************************************************************** +* +* @brief Populates slice header structure +* +* @par Description +* Populates slice header structure for its use in header generation +* +* @param[in] ps_proc +* pointer to proc context +* +* @param[out] ps_slice_hdr +* pointer to slice header structure that needs to be populated +* +* @param[in] ps_pps +* pointer to pps params structure referred by the slice +* +* @param[in] ps_sps +* pointer to sps params referred by the pps +* +* @return success or failure error code +* +****************************************************************************** +*/ +WORD32 ih264e_populate_slice_header(process_ctxt_t *ps_proc, + slice_header_t *ps_slice_hdr, + pps_t *ps_pps, + sps_t *ps_sps) +{ + /* entropy context */ + entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy; + + codec_t *ps_codec = ps_proc->ps_codec; + + if (ps_proc->ps_codec->u4_is_curr_frm_ref) + { + ps_slice_hdr->i1_nal_unit_idc = 3; + } + else + { + ps_slice_hdr->i1_nal_unit_idc = 0; + } + + /* start mb address */ + ps_slice_hdr->u2_first_mb_in_slice = ps_entropy->i4_mb_start_add; + + /* slice type */ + ps_slice_hdr->u1_slice_type = ps_proc->i4_slice_type; + + /* pic_parameter_set_id */ + ps_slice_hdr->u1_pps_id = ps_pps->u1_pps_id; + + /* Separate color plane flag is 0, + * hence the syntax element color_plane_id not included */ + + /* frame num */ + ps_slice_hdr->i4_frame_num = ps_proc->i4_frame_num; + + /* frame_mbs_only_flag, no support for interlace encoding */ + if (!ps_sps->i1_frame_mbs_only_flag) + { + ps_slice_hdr->i1_field_pic_flag = 0; + + if (ps_slice_hdr->i1_field_pic_flag) + { + ps_slice_hdr->i1_bottom_field_flag = 0; + } + } + + /* idr pic id */ + if (ps_proc->u4_is_idr) + { + ps_slice_hdr->u2_idr_pic_id = ps_proc->u4_idr_pic_id; + ps_slice_hdr->i1_nal_unit_type = 5; + } + else + { + ps_slice_hdr->i1_nal_unit_type = 1; + } + + if (ps_sps->i1_pic_order_cnt_type == 0) + { + + WORD32 val; + val = ps_codec->i4_coded_pic_cnt; + val %= (1 << ps_sps->i1_log2_max_pic_order_cnt_lsb); + ps_slice_hdr->i4_pic_order_cnt_lsb = val; + } + else if (ps_sps->i1_pic_order_cnt_type == 1) + { + + } + + if(0 == ps_slice_hdr->u2_first_mb_in_slice) + ps_codec->i4_coded_pic_cnt++; + + /* + * redundant slices are not currently supported. + * Hence the syntax element redundant slice cnt is not initialized + */ + if (ps_pps->i1_redundant_pic_cnt_present_flag) + { + + } + + /* direct spatial mv pred flag */ + if (ps_proc->i4_slice_type == BSLICE) + { + + } + + if (ps_proc->i4_slice_type == PSLICE || ps_proc->i4_slice_type == SPSLICE || ps_proc->i4_slice_type == BSLICE) + { + /* num_ref_idx_active_override_flag */ + ps_slice_hdr->u1_num_ref_idx_active_override_flag = 0; + + if (ps_slice_hdr->u1_num_ref_idx_active_override_flag) + { + /* num_ref_idx_l0_active_minus1 */ + + if (ps_proc->i4_slice_type == BSLICE) + { + /* num_ref_idx_l1_active_minus1 */ + + } + } + } + + /* ref_idx_reordering */ + /* TODO: ref_idx_reordering */ + if ((ps_proc->i4_slice_type != ISLICE) && (ps_proc->i4_slice_type != SISLICE)) + { + /* ref_pic_list_reordering_flag_l0 */ + ps_slice_hdr->u1_ref_idx_reordering_flag_l0 = 0; + + if (ps_slice_hdr->u1_ref_idx_reordering_flag_l0) + { + + } + } + + if ((ps_pps->i1_weighted_pred_flag && + (ps_proc->i4_slice_type == PSLICE || ps_proc->i4_slice_type == SPSLICE)) || + (ps_slice_hdr->u1_weighted_bipred_idc == 1 && ps_proc->i4_slice_type == BSLICE)) + { + /* TODO_LATER: Currently there is no support for weighted prediction. + This needs to be updated when the support is added */ + } + + if (ps_slice_hdr->i1_nal_unit_idc != 0) + { + if (ps_slice_hdr->i1_nal_unit_type == 5) + { + /* no_output_of_prior_pics_flag */ + ps_slice_hdr->u1_no_output_of_prior_pics_flag = 0; + + /* long_term_reference_flag */ + ps_slice_hdr->u1_long_term_reference_flag = 0; + } + else + { + /* adaptive_ref_pic_marking_mode_flag */ + ps_slice_hdr->u1_adaptive_ref_pic_marking_mode_flag = 0; + + if (ps_slice_hdr->u1_adaptive_ref_pic_marking_mode_flag) + { + /* TODO: if the reference picture marking mode is adaptive + add these fields in the bit-stream */ + } + } + } + + /* entropy coding mode flag */ + ps_slice_hdr->u1_entropy_coding_mode_flag = ps_entropy->u1_entropy_coding_mode_flag; + + if (ps_slice_hdr->u1_entropy_coding_mode_flag && ps_proc->i4_slice_type != ISLICE && + ps_proc->i4_slice_type != SISLICE) + { + /* cabac_init_idc */ + } + + /* slice qp */ + ps_slice_hdr->i1_slice_qp = ps_proc->u4_frame_qp; + + if (ps_proc->i4_slice_type == SPSLICE || ps_proc->i4_slice_type == SISLICE) + { + if (ps_proc->i4_slice_type == SPSLICE) + { + /* sp_for_switch_flag */ + } + /* slice_qs_delta */ + } + + if (ps_pps->i1_deblocking_filter_control_present_flag) + { + /* disable_deblocking_filter_idc */ + ps_slice_hdr->u1_disable_deblocking_filter_idc = ps_proc->u4_disable_deblock_level; + + if (ps_slice_hdr->u1_disable_deblocking_filter_idc != 1) + { + /* slice_alpha_c0_offset_div2 */ + ps_slice_hdr->i1_slice_alpha_c0_offset_div2 = 0; + + /* slice_beta_offset_div2 */ + ps_slice_hdr->i1_slice_beta_offset_div2 = 0; + } + } + ps_slice_hdr->u1_num_slice_groups_minus1 = 0; + if(ps_slice_hdr->u1_num_slice_groups_minus1 > 0 && + ps_pps->u1_slice_group_map_type >= 3 && + ps_pps->u1_slice_group_map_type <= 5) + { + /* slice_group_change_cycle */ + /* TODO_LATER: Currently the number of slice groups minus 1 is 0. + * If this is not the case, we have to add Slice group map type to the bit stream */ + } + + return IH264E_SUCCESS; +} + +/** +****************************************************************************** +* +* @brief inserts FILLER Nal Unit. +* +* @par Description +* In constant bit rate rc mode, when the bits generated by the codec is +* underflowing the target bit rate, the encoder library inserts filler nal unit. +* +* @param[in] ps_bitstrm +* pointer to bitstream context (handle) +* +* @param[in] insert_fill_bytes +* Number of fill bytes to be inserted +* +* @return success or failure error code +* +****************************************************************************** +*/ +IH264E_ERROR_T ih264e_add_filler_nal_unit(bitstrm_t *ps_bitstrm, + WORD32 insert_fill_bytes) +{ + WORD32 i4_num_words_to_fill, i4_words_filled; + + IH264E_ERROR_T return_status = IH264E_SUCCESS; + + /* Insert the NAL start code */ + return_status |= ih264e_put_nal_start_code_prefix(ps_bitstrm, 1); + + if (ps_bitstrm->u4_strm_buf_offset + insert_fill_bytes >= ps_bitstrm->u4_max_strm_size) + { + return (IH264E_BITSTREAM_BUFFER_OVERFLOW); + } + + /* Insert Nal Unit Header */ + PUT_BITS(ps_bitstrm, NAL_FILLER_FIRST_BYTE, 8, return_status, "filler_header"); + + PUT_BITS(ps_bitstrm, 0xFFFFFF, 24, return_status, "fill bytes"); + + /* Initializing Variables */ + i4_words_filled = 1; + + /****************************************************/ + /* Flooring the number of bytes for be stuffed to */ + /* WORD unit */ + /****************************************************/ + i4_num_words_to_fill = (insert_fill_bytes >> 2); + + /****************************************************/ + /* Reducing already 4 bytes filled. In case stuffing*/ + /* is <= 4 bytes, we are actually not stuffing */ + /* anything */ + /****************************************************/ + i4_num_words_to_fill -= i4_words_filled; + + while (i4_num_words_to_fill > 0) + { + /* Insert Nal Unit Header */ + PUT_BITS(ps_bitstrm, 0xFFFFFFFF, 32, return_status, "fill bytes"); + + i4_num_words_to_fill-- ; + } + + return_status |= ih264e_put_rbsp_trailing_bits(ps_bitstrm); + + return return_status; +} + diff --git a/encoder/ih264e_encode_header.h b/encoder/ih264e_encode_header.h new file mode 100755 index 0000000..acae5b6 --- /dev/null +++ b/encoder/ih264e_encode_header.h @@ -0,0 +1,278 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +****************************************************************************** +* @file +* ih264e_encode_header.h +* +* @brief +* This file contains structures and interface prototypes for h264 bitstream +* header encoding +* +* @author +* ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef IH264E_ENCODE_HEADER_H_ +#define IH264E_ENCODE_HEADER_H_ + +/*****************************************************************************/ +/* Function Macros */ +/*****************************************************************************/ + +/** +****************************************************************************** + * @brief Macro to put a code with specified number of bits into the + * bitstream +****************************************************************************** + */ +#define PUT_BITS(ps_bitstrm, code_val, code_len, ret_val, syntax_string) \ + ENTROPY_TRACE(syntax_string, code_val);\ + ret_val |= ih264e_put_bits((ps_bitstrm), (code_val), (code_len)) + +/** +****************************************************************************** + * @brief Macro to put a code with specified number of bits into the + * bitstream using 0th order exponential Golomb encoding for + * signed numbers +****************************************************************************** + */ +#define PUT_BITS_UEV(ps_bitstrm, code_val, ret_val, syntax_string) \ + ENTROPY_TRACE(syntax_string, code_val);\ + ret_val |= ih264e_put_uev((ps_bitstrm), (code_val)) + +/** +****************************************************************************** + * @brief Macro to put a code with specified number of bits into the + * bitstream using 0th order exponential Golomb encoding for + * signed numbers +****************************************************************************** + */ +#define PUT_BITS_SEV(ps_bitstrm, code_val, ret_val, syntax_string) \ + ENTROPY_TRACE(syntax_string, code_val);\ + ret_val |= ih264e_put_sev((ps_bitstrm), (code_val)) + + +/*****************************************************************************/ +/* Extern Function Declarations */ +/*****************************************************************************/ + +/** +****************************************************************************** +* +* @brief Generates SPS (Sequence Parameter Set) +* +* @par Description +* This function generates Sequence Parameter Set header as per the spec +* +* @param[in] ps_bitstrm +* pointer to bitstream context (handle) +* +* @param[in] ps_sps +* pointer to structure containing SPS data +* +* @return success or failure error code +* +****************************************************************************** +*/ +WORD32 ih264e_generate_sps + ( + bitstrm_t *ps_bitstrm, + sps_t *ps_sps + ); + +/** +****************************************************************************** +* +* @brief Generates PPS (Picture Parameter Set) +* +* @par Description +* Generate Picture Parameter Set as per Section 7.3.2.2 +* +* @param[in] ps_bitstrm +* pointer to bitstream context (handle) +* +* @param[in] ps_pps +* pointer to structure containing PPS data +* +* @return success or failure error code +* +****************************************************************************** +*/ +WORD32 ih264e_generate_pps + ( + bitstrm_t *ps_bitstrm, + pps_t *ps_pps, + sps_t *ps_sps + ); + +/** +****************************************************************************** +* +* @brief Generates Slice Header +* +* @par Description +* Generate Slice Header as per Section 7.3.5.1 +* +* @param[inout] ps_bitstrm +* pointer to bitstream context for generating slice header +* +* @param[in] ps_slice_hdr +* pointer to slice header params +* +* @param[in] ps_pps +* pointer to pps params referred by slice +* +* @param[in] ps_sps +* pointer to sps params referred by slice +* +* @param[out] ps_dup_bit_strm_ent_offset +* Bitstream struct to store bitstream state +* +* @param[out] pu4_first_slice_start_offset +* first slice offset is returned +* +* @return success or failure error code +* +****************************************************************************** +*/ +WORD32 ih264e_generate_slice_header + ( + bitstrm_t *ps_bitstrm, + slice_header_t *ps_slice_hdr, + pps_t *ps_pps, + sps_t *ps_sps + ); + +/** +****************************************************************************** +* +* @brief Populates sps structure +* +* @par Description +* Populates sps structure for its use in header generation +* +* @param[in] ps_codec +* pointer to encoder context +* +* @param[out] ps_sps +* pointer to sps params that needs to be populated +* +* @return success or failure error code +* +****************************************************************************** +*/ +IH264E_ERROR_T ih264e_populate_sps + ( + codec_t *ps_codec, + sps_t *ps_sps + ); + +/** +****************************************************************************** +* +* @brief Populates pps structure +* +* @par Description +* Populates pps structure for its use in header generation +* +* @param[in] ps_codec +* pointer to encoder context +* +* @param[out] ps_pps +* pointer to pps params that needs to be populated +* +* @return success or failure error code +* +****************************************************************************** +*/ +IH264E_ERROR_T ih264e_populate_pps + ( + codec_t *ps_codec, + pps_t *ps_pps + ); + + +/** +****************************************************************************** +* +* @brief Populates slice header structure +* +* @par Description +* Populates slice header structure for its use in header generation +* +* @param[in] ps_proc +* pointer to proc context +* +* @param[out] ps_slice_hdr +* pointer to slice header structure that needs to be populated +* +* @param[in] ps_pps +* pointer to pps params structure referred by the slice +* +* @param[in] ps_sps +* pointer to sps params referred by the pps +* +* @return success or failure error code +* +****************************************************************************** +*/ +WORD32 ih264e_populate_slice_header + ( + process_ctxt_t *ps_proc, + slice_header_t *ps_slice_hdr, + pps_t *ps_pps, + sps_t *ps_sps + ); + + +/** +****************************************************************************** +* +* @brief inserts FILLER Nal Unit. +* +* @par Description +* In constant bit rate rc mode, when the bits generated by the codec is +* underflowing the target bit rate, the encoder library inserts filler nal unit. +* +* @param[in] ps_bitstrm +* pointer to bitstream context (handle) +* +* @param[in] insert_fill_bytes +* Number of fill bytes to be inserted +* +* @return success or failure error code +* +****************************************************************************** +*/ +IH264E_ERROR_T ih264e_add_filler_nal_unit + ( + bitstrm_t *ps_bitstrm, + WORD32 insert_fill_bytes + ); + + +#endif //IH264E_ENCODE_HEADER_H_ diff --git a/encoder/ih264e_error.h b/encoder/ih264e_error.h new file mode 100755 index 0000000..8fe9dac --- /dev/null +++ b/encoder/ih264e_error.h @@ -0,0 +1,229 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +******************************************************************************* +* @file +* ih264e_error.h +* +* @brief +* Definitions related to error handling +* +* @author +* ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef IH264E_ERROR_H_ +#define IH264E_ERROR_H_ + +/** +****************************************************************************** +* @brief Error start codes for various classes of errors in H264 encoder +****************************************************************************** +*/ +#define SET_ERROR_ON_RETURN(error, severity, out_status, ret_code) \ + if (error != IH264E_SUCCESS) \ + {\ + out_status = ((1 << severity) | error);\ + return (ret_code);\ + } + + +/** +****************************************************************************** + * @brief Extended error code for each error in H264 encoder +****************************************************************************** + */ +typedef enum +{ + /* NOTE: the ive error codes ends at 0x80 */ + IVE_ERR_CODEC_EXTENSIONS = 0x80, + + /* bit stream error start */ + IH264E_BITSTREAM_ERROR_START = IVE_ERR_CODEC_EXTENSIONS, + + /* codec error start */ + IH264E_CODEC_ERROR_START = IH264E_BITSTREAM_ERROR_START + 0x10, + + /** no error */ + IH264E_SUCCESS = 0, + + /** bitstream init failure, buffer ptr not aligned to WORD (32bits) */ + IH264E_BITSTREAM_BUFPTR_ALIGN_FAIL = IH264E_BITSTREAM_ERROR_START + 0x01, + + /** bitstream init failure, buf size not multiple of WORD size (32bits) */ + IH264E_BITSTREAM_BUFSIZE_ALIGN_FAIL = IH264E_BITSTREAM_ERROR_START + 0x02, + + /** bitstream runtime failure, buf size limit exceeded during encode */ + IH264E_BITSTREAM_BUFFER_OVERFLOW = IH264E_BITSTREAM_ERROR_START + 0x03, + + /**width not set within supported limit */ + IH264E_WIDTH_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x01, + + /**height not set within supported limit */ + IH264E_HEIGHT_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x02, + + /**Unsupported number of reference pictures passed as an argument */ + IH264E_NUM_REF_UNSUPPORTED = IH264E_CODEC_ERROR_START + 0x03, + + /**Unsupported number of reference pictures passed as an argument */ + IH264E_NUM_REORDER_UNSUPPORTED = IH264E_CODEC_ERROR_START + 0x04, + + /**codec level not supported */ + IH264E_CODEC_LEVEL_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x05, + + /**input chroma format not supported */ + IH264E_INPUT_CHROMA_FORMAT_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x06, + + /**recon chroma format not supported */ + IH264E_RECON_CHROMA_FORMAT_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x07, + + /**rate control option configured is not supported */ + IH264E_RATE_CONTROL_MODE_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x08, + + /**frame rate configured is not supported */ + IH264E_FRAME_RATE_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x09, + + /**bit rate configured is not supported */ + IH264E_BITRATE_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x0A, + + /**frame rate not supported */ + IH264E_BFRAMES_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x0B, + + /**content type not supported */ + IH264E_CONTENT_TYPE_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x0C, + + /**unsupported horizontal search range */ + IH264E_HORIZONTAL_SEARCH_RANGE_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x0D, + + /**unsupported vertical search range */ + IH264E_VERTICAL_SEARCH_RANGE_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x0E, + + /**Unsupported slice type input */ + IH264E_SLICE_TYPE_INPUT_INVALID = IH264E_CODEC_ERROR_START + 0x0F, + + /**unsupported architecture type */ + IH264E_ARCH_TYPE_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x10, + + /**unsupported soc type */ + IH264E_SOC_TYPE_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x11, + + /**target frame rate exceeds source frame rate */ + IH264E_TGT_FRAME_RATE_EXCEEDS_SRC_FRAME_RATE = IH264E_CODEC_ERROR_START + 0x12, + + /**invalid force frame input */ + IH264E_INVALID_FORCE_FRAME_INPUT = IH264E_CODEC_ERROR_START + 0x13, + + /**invalid me speed preset */ + IH264E_INVALID_ME_SPEED_PRESET = IH264E_CODEC_ERROR_START + 0x14, + + /**invalid encoder speed preset */ + IH264E_INVALID_ENC_SPEED_PRESET = IH264E_CODEC_ERROR_START + 0x15, + + /**invalid deblocking param */ + IH264E_INVALID_DEBLOCKING_TYPE_INPUT = IH264E_CODEC_ERROR_START + 0x16, + + /**invalid max qp */ + IH264E_INVALID_MAX_FRAME_QP = IH264E_CODEC_ERROR_START + 0x17, + + /**invalid min qp */ + IH264E_INVALID_MIN_FRAME_QP = IH264E_CODEC_ERROR_START + 0x18, + + /**invalid init qp */ + IH264E_INVALID_INIT_QP = IH264E_CODEC_ERROR_START + 0x19, + + /**version buffer size is insufficient */ + IH264E_CXA_VERS_BUF_INSUFFICIENT = IH264E_CODEC_ERROR_START + 0x1A, + + /**init not done */ + IH264E_INIT_NOT_DONE = IH264E_CODEC_ERROR_START + 0x1B, + + /**invalid refresh type input */ + IH264E_INVALID_AIR_MODE = IH264E_CODEC_ERROR_START + 0x1C, + + /** Unsupported air mode */ + IH264E_INVALID_AIR_REFRESH_PERIOD = IH264E_CODEC_ERROR_START + 0x1D, + + /**In sufficient memory allocated for MV Bank */ + IH264E_INSUFFICIENT_MEM_MVBANK = IH264E_CODEC_ERROR_START + 0x1E, + + /**In sufficient memory allocated for MV Bank */ + IH264E_INSUFFICIENT_MEM_PICBUF = IH264E_CODEC_ERROR_START + 0x1F, + + /**Buffer manager error */ + IH264E_BUF_MGR_ERROR = IH264E_CODEC_ERROR_START + 0x20, + + /**No free MV Bank buffer available to store current pic */ + IH264E_NO_FREE_MVBANK = IH264E_CODEC_ERROR_START + 0x21, + + /**No free picture buffer available to store current pic */ + IH264E_NO_FREE_PICBUF = IH264E_CODEC_ERROR_START + 0x22, + + /**Invalid encoder operation mode */ + IH264E_INVALID_ENC_OPERATION_MODE = IH264E_CODEC_ERROR_START + 0x23, + + /**Invalid half pel option */ + IH264E_INVALID_HALFPEL_OPTION = IH264E_CODEC_ERROR_START + 0x24, + + /**Invalid quarter pel option */ + IH264E_INVALID_QPEL_OPTION = IH264E_CODEC_ERROR_START + 0x25, + + /**Invalid fast sad option */ + IH264E_INVALID_FAST_SAD_OPTION = IH264E_CODEC_ERROR_START + 0x26, + + /**Invalid intra 4x4 option */ + IH264E_INVALID_INTRA4x4_OPTION = IH264E_CODEC_ERROR_START + 0x27, + + /**Invalid intra frame interval */ + IH264E_INVALID_INTRA_FRAME_INTERVAL = IH264E_CODEC_ERROR_START + 0x28, + + /**Invalid idr frame interval */ + IH264E_INVALID_IDR_FRAME_INTERVAL = IH264E_CODEC_ERROR_START + 0x29, + + /**Invalid buffer delay */ + IH264E_INVALID_BUFFER_DELAY = IH264E_CODEC_ERROR_START + 0x2A, + + /**Invalid num cores */ + IH264E_INVALID_NUM_CORES = IH264E_CODEC_ERROR_START + 0x2B, + + /**profile not supported */ + IH264E_PROFILE_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x2C, + + /**Unsupported slice type input */ + IH264E_SLICE_PARAM_INPUT_INVALID = IH264E_CODEC_ERROR_START + 0x2D, + + /**Invalid alt ref option */ + IH264E_INVALID_ALT_REF_OPTION = IH264E_CODEC_ERROR_START + 0x2E, + + /**No free picture buffer available to store recon pic */ + IH264E_NO_FREE_RECONBUF = IH264E_CODEC_ERROR_START + 0x2F, + + /**max failure error code to ensure enum is 32 bits wide */ + IH264E_FAIL = -1, + +}IH264E_ERROR_T; + + +#endif /* IH264E_ERROR_H_ */ diff --git a/encoder/ih264e_fmt_conv.c b/encoder/ih264e_fmt_conv.c new file mode 100755 index 0000000..393d6ca --- /dev/null +++ b/encoder/ih264e_fmt_conv.c @@ -0,0 +1,864 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +******************************************************************************* +* @file +* ih264e_fmt_conv.c +* +* @brief +* Contains functions for format conversion or frame copy of output buffer +* +* @author +* ittiam +* +* @par List of Functions: +* - ih264e_fmt_conv_420sp_to_rgb565() +* - ih264e_fmt_conv_420sp_to_rgba8888() +* - ih264e_fmt_conv_420sp_to_420sp() +* - ih264e_fmt_conv_420sp_to_420sp_swap_uv() +* - ih264e_fmt_conv_420sp_to_420p() +* - ih264e_fmt_conv_420p_to_420sp() +* - ih264e_fmt_conv_422i_to_420sp() +* - ih264e_fmt_conv() +* +* @remarks +* None +* +******************************************************************************* +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System Include files */ +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> + +/* User Include files */ +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" +#include "ih264e.h" +#include "ithread.h" +#include "ih264_defs.h" +#include "ih264_debug.h" +#include "ime_distortion_metrics.h" +#include "ime_structs.h" +#include "ih264_defs.h" +#include "ih264_error.h" +#include "ih264_structs.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include "ih264_inter_pred_filters.h" +#include "ih264_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264_error.h" +#include "ih264_buf_mgr.h" +#include "ih264e_defs.h" +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "ih264e_rate_control.h" +#include "ih264e_structs.h" +#include "ih264e_fmt_conv.h" + + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + +void ih264e_fmt_conv_420sp_to_rgb565(UWORD8 *pu1_y_src, + UWORD8 *pu1_uv_src, + UWORD16 *pu2_rgb_dst, + WORD32 wd, + WORD32 ht, + WORD32 src_y_strd, + WORD32 src_uv_strd, + WORD32 dst_strd, + WORD32 is_u_first) +{ + WORD16 i2_r, i2_g, i2_b; + UWORD32 u4_r, u4_g, u4_b; + WORD16 i2_i, i2_j; + UWORD8 *pu1_y_src_nxt; + UWORD16 *pu2_rgb_dst_NextRow; + + UWORD8 *pu1_u_src, *pu1_v_src; + + if (is_u_first) + { + pu1_u_src = (UWORD8 *) pu1_uv_src; + pu1_v_src = (UWORD8 *) pu1_uv_src + 1; + } + else + { + pu1_u_src = (UWORD8 *) pu1_uv_src + 1; + pu1_v_src = (UWORD8 *) pu1_uv_src; + } + + pu1_y_src_nxt = pu1_y_src + src_y_strd; + pu2_rgb_dst_NextRow = pu2_rgb_dst + dst_strd; + + for (i2_i = 0; i2_i < (ht >> 1); i2_i++) + { + for (i2_j = (wd >> 1); i2_j > 0; i2_j--) + { + i2_b = ((*pu1_u_src - 128) * COEFF4 >> 13); + i2_g = ((*pu1_u_src - 128) * COEFF2 + (*pu1_v_src - 128) * COEFF3) + >> 13; + i2_r = ((*pu1_v_src - 128) * COEFF1) >> 13; + + pu1_u_src += 2; + pu1_v_src += 2; + /* pixel 0 */ + /* B */ + u4_b = CLIP_U8(*pu1_y_src + i2_b); + u4_b >>= 3; + /* G */ + u4_g = CLIP_U8(*pu1_y_src + i2_g); + u4_g >>= 2; + /* R */ + u4_r = CLIP_U8(*pu1_y_src + i2_r); + u4_r >>= 3; + + pu1_y_src++; + *pu2_rgb_dst++ = ((u4_r << 11) | (u4_g << 5) | u4_b); + + /* pixel 1 */ + /* B */ + u4_b = CLIP_U8(*pu1_y_src + i2_b); + u4_b >>= 3; + /* G */ + u4_g = CLIP_U8(*pu1_y_src + i2_g); + u4_g >>= 2; + /* R */ + u4_r = CLIP_U8(*pu1_y_src + i2_r); + u4_r >>= 3; + + pu1_y_src++; + *pu2_rgb_dst++ = ((u4_r << 11) | (u4_g << 5) | u4_b); + + /* pixel 2 */ + /* B */ + u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b); + u4_b >>= 3; + /* G */ + u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g); + u4_g >>= 2; + /* R */ + u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r); + u4_r >>= 3; + + pu1_y_src_nxt++; + *pu2_rgb_dst_NextRow++ = ((u4_r << 11) | (u4_g << 5) | u4_b); + + /* pixel 3 */ + /* B */ + u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b); + u4_b >>= 3; + /* G */ + u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g); + u4_g >>= 2; + /* R */ + u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r); + u4_r >>= 3; + + pu1_y_src_nxt++; + *pu2_rgb_dst_NextRow++ = ((u4_r << 11) | (u4_g << 5) | u4_b); + + } + + pu1_u_src = pu1_u_src + src_uv_strd - wd; + pu1_v_src = pu1_v_src + src_uv_strd - wd; + + pu1_y_src = pu1_y_src + (src_y_strd << 1) - wd; + pu1_y_src_nxt = pu1_y_src_nxt + (src_y_strd << 1) - wd; + + pu2_rgb_dst = pu2_rgb_dst_NextRow - wd + dst_strd; + pu2_rgb_dst_NextRow = pu2_rgb_dst_NextRow + (dst_strd << 1) - wd; + } + +} + +void ih264e_fmt_conv_420sp_to_rgba8888(UWORD8 *pu1_y_src, + UWORD8 *pu1_uv_src, + UWORD32 *pu4_rgba_dst, + WORD32 wd, + WORD32 ht, + WORD32 src_y_strd, + WORD32 src_uv_strd, + WORD32 dst_strd, + WORD32 is_u_first) +{ + WORD16 i2_r, i2_g, i2_b; + UWORD32 u4_r, u4_g, u4_b; + WORD16 i2_i, i2_j; + UWORD8 *pu1_y_src_nxt; + UWORD32 *pu4_rgba_dst_NextRow; + UWORD8 *pu1_u_src, *pu1_v_src; + + if (is_u_first) + { + pu1_u_src = (UWORD8 *) pu1_uv_src; + pu1_v_src = (UWORD8 *) pu1_uv_src + 1; + } + else + { + pu1_u_src = (UWORD8 *) pu1_uv_src + 1; + pu1_v_src = (UWORD8 *) pu1_uv_src; + } + + pu1_y_src_nxt = pu1_y_src + src_y_strd; + + pu4_rgba_dst_NextRow = pu4_rgba_dst + dst_strd; + + for (i2_i = 0; i2_i < (ht >> 1); i2_i++) + { + for (i2_j = (wd >> 1); i2_j > 0; i2_j--) + { + i2_b = ((*pu1_u_src - 128) * COEFF4 >> 13); + i2_g = ((*pu1_u_src - 128) * COEFF2 + (*pu1_v_src - 128) * COEFF3) + >> 13; + i2_r = ((*pu1_v_src - 128) * COEFF1) >> 13; + + pu1_u_src += 2; + pu1_v_src += 2; + /* pixel 0 */ + /* B */ + u4_b = CLIP_U8(*pu1_y_src + i2_b); + /* G */ + u4_g = CLIP_U8(*pu1_y_src + i2_g); + /* R */ + u4_r = CLIP_U8(*pu1_y_src + i2_r); + + pu1_y_src++; + *pu4_rgba_dst++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0)); + + /* pixel 1 */ + /* B */ + u4_b = CLIP_U8(*pu1_y_src + i2_b); + /* G */ + u4_g = CLIP_U8(*pu1_y_src + i2_g); + /* R */ + u4_r = CLIP_U8(*pu1_y_src + i2_r); + + pu1_y_src++; + *pu4_rgba_dst++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0)); + + /* pixel 2 */ + /* B */ + u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b); + /* G */ + u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g); + /* R */ + u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r); + + pu1_y_src_nxt++; + *pu4_rgba_dst_NextRow++ = + ((u4_r << 16) | (u4_g << 8) | (u4_b << 0)); + + /* pixel 3 */ + /* B */ + u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b); + /* G */ + u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g); + /* R */ + u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r); + + pu1_y_src_nxt++; + *pu4_rgba_dst_NextRow++ = + ((u4_r << 16) | (u4_g << 8) | (u4_b << 0)); + + } + + pu1_u_src = pu1_u_src + src_uv_strd - wd; + pu1_v_src = pu1_v_src + src_uv_strd - wd; + + pu1_y_src = pu1_y_src + (src_y_strd << 1) - wd; + pu1_y_src_nxt = pu1_y_src_nxt + (src_y_strd << 1) - wd; + + pu4_rgba_dst = pu4_rgba_dst_NextRow - wd + dst_strd; + pu4_rgba_dst_NextRow = pu4_rgba_dst_NextRow + (dst_strd << 1) - wd; + } + +} + +/** +******************************************************************************* +* +* @brief Function used for copying a 420SP buffer +* +* @par Description +* Function used for copying a 420SP buffer +* +* @param[in] pu1_y_src +* Input Y pointer +* +* @param[in] pu1_uv_src +* Input UV pointer (UV is interleaved either in UV or VU format) +* +* @param[in] pu1_y_dst +* Output Y pointer +* +* @param[in] pu1_uv_dst +* Output UV pointer (UV is interleaved in the same format as that of input) +* +* @param[in] wd +* Width +* +* @param[in] ht +* Height +* +* @param[in] src_y_strd +* Input Y Stride +* +* @param[in] src_uv_strd +* Input UV stride +* +* @param[in] dst_y_strd +* Output Y stride +* +* @param[in] dst_uv_strd +* Output UV stride +* +* @returns None +* +* @remarks In case there is a need to perform partial frame copy then +* by passion appropriate source and destination pointers and appropriate +* values for wd and ht it can be done +* +******************************************************************************* +*/ +void ih264e_fmt_conv_420sp_to_420sp(UWORD8 *pu1_y_src, + UWORD8 *pu1_uv_src, + UWORD8 *pu1_y_dst, + UWORD8 *pu1_uv_dst, + WORD32 wd, + WORD32 ht, + WORD32 src_y_strd, + WORD32 src_uv_strd, + WORD32 dst_y_strd, + WORD32 dst_uv_strd) +{ + UWORD8 *pu1_src, *pu1_dst; + WORD32 num_rows, num_cols, src_strd, dst_strd; + WORD32 i; + + /* copy luma */ + pu1_src = (UWORD8 *) pu1_y_src; + pu1_dst = (UWORD8 *) pu1_y_dst; + + num_rows = ht; + num_cols = wd; + + src_strd = src_y_strd; + dst_strd = dst_y_strd; + + for (i = 0; i < num_rows; i++) + { + memcpy(pu1_dst, pu1_src, num_cols); + pu1_dst += dst_strd; + pu1_src += src_strd; + } + + /* copy U and V */ + pu1_src = (UWORD8 *) pu1_uv_src; + pu1_dst = (UWORD8 *) pu1_uv_dst; + + num_rows = ht >> 1; + num_cols = wd; + + src_strd = src_uv_strd; + dst_strd = dst_uv_strd; + + for (i = 0; i < num_rows; i++) + { + memcpy(pu1_dst, pu1_src, num_cols); + pu1_dst += dst_strd; + pu1_src += src_strd; + } + return; +} + + +void ih264e_fmt_conv_420sp_to_420sp_swap_uv(UWORD8 *pu1_y_src, + UWORD8 *pu1_uv_src, + UWORD8 *pu1_y_dst, + UWORD8 *pu1_uv_dst, + WORD32 wd, + WORD32 ht, + WORD32 src_y_strd, + WORD32 src_uv_strd, + WORD32 dst_y_strd, + WORD32 dst_uv_strd) +{ + UWORD8 *pu1_src, *pu1_dst; + WORD32 num_rows, num_cols, src_strd, dst_strd; + WORD32 i; + + /* copy luma */ + pu1_src = (UWORD8 *) pu1_y_src; + pu1_dst = (UWORD8 *) pu1_y_dst; + + num_rows = ht; + num_cols = wd; + + src_strd = src_y_strd; + dst_strd = dst_y_strd; + + for (i = 0; i < num_rows; i++) + { + memcpy(pu1_dst, pu1_src, num_cols); + pu1_dst += dst_strd; + pu1_src += src_strd; + } + + /* copy U and V */ + pu1_src = (UWORD8 *) pu1_uv_src; + pu1_dst = (UWORD8 *) pu1_uv_dst; + + num_rows = ht >> 1; + num_cols = wd; + + src_strd = src_uv_strd; + dst_strd = dst_uv_strd; + + for (i = 0; i < num_rows; i++) + { + WORD32 j; + for (j = 0; j < num_cols; j += 2) + { + pu1_dst[j + 0] = pu1_src[j + 1]; + pu1_dst[j + 1] = pu1_src[j + 0]; + } + pu1_dst += dst_strd; + pu1_src += src_strd; + } + return; +} + +void ih264e_fmt_conv_420sp_to_420p(UWORD8 *pu1_y_src, + UWORD8 *pu1_uv_src, + UWORD8 *pu1_y_dst, + UWORD8 *pu1_u_dst, + UWORD8 *pu1_v_dst, + WORD32 wd, + WORD32 ht, + WORD32 src_y_strd, + WORD32 src_uv_strd, + WORD32 dst_y_strd, + WORD32 dst_uv_strd, + WORD32 is_u_first, + WORD32 disable_luma_copy) +{ + UWORD8 *pu1_src, *pu1_dst; + UWORD8 *pu1_u_src, *pu1_v_src; + WORD32 num_rows, num_cols, src_strd, dst_strd; + WORD32 i, j; + + if (0 == disable_luma_copy) + { + /* copy luma */ + pu1_src = (UWORD8 *) pu1_y_src; + pu1_dst = (UWORD8 *) pu1_y_dst; + + num_rows = ht; + num_cols = wd; + + src_strd = src_y_strd; + dst_strd = dst_y_strd; + + for (i = 0; i < num_rows; i++) + { + memcpy(pu1_dst, pu1_src, num_cols); + pu1_dst += dst_strd; + pu1_src += src_strd; + } + } + /* de-interleave U and V and copy to destination */ + if (is_u_first) + { + pu1_u_src = (UWORD8 *) pu1_uv_src; + pu1_v_src = (UWORD8 *) pu1_uv_src + 1; + } + else + { + pu1_u_src = (UWORD8 *) pu1_uv_src + 1; + pu1_v_src = (UWORD8 *) pu1_uv_src; + } + + num_rows = ht >> 1; + num_cols = wd >> 1; + + src_strd = src_uv_strd; + dst_strd = dst_uv_strd; + + for (i = 0; i < num_rows; i++) + { + for (j = 0; j < num_cols; j++) + { + pu1_u_dst[j] = pu1_u_src[j * 2]; + pu1_v_dst[j] = pu1_v_src[j * 2]; + } + + pu1_u_dst += dst_strd; + pu1_v_dst += dst_strd; + pu1_u_src += src_strd; + pu1_v_src += src_strd; + } + return; +} + +/** +******************************************************************************* +* +* @brief Function used to perform color space conversion from 420P to 420SP +* +* @par Description +* Function used to perform color space conversion from 420P to 420SP +* +* @param[in] pu1_y_src +* Input Y pointer +* +* @param[in] pu1_u_src +* Input U pointer +* +* @param[in] pu1_v_dst +* Input V pointer +* +* @param[in] pu1_y_dst +* Output Y pointer +* +* @param[in] pu1_uv_dst +* Output UV pointer +* +* @param[in] u4_width +* Width +* +* @param[in] u4_height +* Height +* +* @param[in] src_y_strd +* Input Y Stride +* +* @param[in] src_u_strd +* Input U stride +* +* @param[in] src_v_strd +* Input V stride +* +* @param[in] dst_y_strd +* Output Y stride +* +* @param[in] dst_uv_strd +* Output UV stride +* +* @param[in] convert_uv_only +* Flag to indicate if only UV copy needs to be done +* +* @returns none +* +* @remarks In case there is a need to perform partial frame copy then +* by passion appropriate source and destination pointers and appropriate +* values for wd and ht it can be done +* +******************************************************************************* +*/ +void ih264e_fmt_conv_420p_to_420sp(UWORD8 *pu1_y_src, + UWORD8 *pu1_u_src, + UWORD8 *pu1_v_src, + UWORD8 *pu1_y_dst, + UWORD8 *pu1_uv_dst, + UWORD16 u2_height, + UWORD16 u2_width, + UWORD16 src_y_strd, + UWORD16 src_u_strd, + UWORD16 src_v_strd, + UWORD16 dst_y_strd, + UWORD16 dst_uv_strd, + UWORD32 convert_uv_only) +{ + UWORD8 *pu1_src, *pu1_dst; + UWORD8 *pu1_src_u, *pu1_src_v; + UWORD16 i; + UWORD32 u2_width_uv; + UWORD32 dest_inc_Y = 0, dest_inc_UV = 0; + + dest_inc_UV = dst_uv_strd; + + if (0 == convert_uv_only) + { + + /* Copy Y buffer */ + pu1_dst = (UWORD8 *) pu1_y_dst; + pu1_src = (UWORD8 *) pu1_y_src; + + dest_inc_Y = dst_y_strd; + + for (i = 0; i < u2_height; i++) + { + memcpy((void *) pu1_dst, (void *) pu1_src, u2_width); + pu1_dst += dest_inc_Y; + pu1_src += src_y_strd; + } + } + + /* Interleave Cb and Cr buffers */ + pu1_src_u = pu1_u_src; + pu1_src_v = pu1_v_src; + pu1_dst = pu1_uv_dst; + + u2_height = (u2_height + 1) >> 1; + u2_width_uv = (u2_width + 1) >> 1; + for (i = 0; i < u2_height; i++) + { + UWORD32 j; + for (j = 0; j < u2_width_uv; j++) + { + *pu1_dst++ = *pu1_src_u++; + *pu1_dst++ = *pu1_src_v++; + } + + pu1_dst += dest_inc_UV - u2_width; + pu1_src_u += src_u_strd - u2_width_uv; + pu1_src_v += src_v_strd - u2_width_uv; + } +} + +/** +******************************************************************************* +* +* @brief Function used to convert 422 interleaved to 420sp +* +* @par Description +* Function used to convert 422 interleaved to 420sp +* +* @param[in] pu1_y_buf +* Output Y pointer +* +* @param[in] pu1_u_buf +* Output u pointer +* +* @param[in[ pu1_v_buf +* Output V pointer +* +* @param[in] pu1_422i_buf +* Input 422i pointer +* +* @param[in] u4_y_width +* Width of Y component +* +* @param[in] u4_y_height +* Height of Y component +* +* @param[in] u4_y_stride +* Stride of pu1_y_buf +* +* @param[in] u4_u_stride +* Stride of pu1_u_buf +* +* @param[in] u4_v_stride +* Stride of pu1_v_buf +* +* @param[in] u4_422i_stride +* Stride of pu1_422i_buf +* +* @returns None +* +* @remarks For conversion +* pu1_v_buf = pu1_u_buf+1 +* u4_u_stride = u4_v_stride +* +* The extra parameters are for maintaining API with assembly function +* +******************************************************************************* +*/ +void ih264e_fmt_conv_422i_to_420sp(UWORD8 *pu1_y_buf, + UWORD8 *pu1_u_buf, + UWORD8 *pu1_v_buf, + UWORD8 *pu1_422i_buf, + WORD32 u4_y_width, + WORD32 u4_y_height, + WORD32 u4_y_stride, + WORD32 u4_u_stride, + WORD32 u4_v_stride, + WORD32 u4_422i_stride) +{ + WORD32 row, col; + UWORD8 *row_even_422 = pu1_422i_buf; + UWORD8 *row_odd_422 = row_even_422 + (u4_422i_stride << 1); + UWORD8 *row_even_luma = pu1_y_buf; + /* Since at the end of loop, we have row_even_luma += (luma_width << 1), + * it should be same here right? */ + UWORD8 *row_odd_luma = row_even_luma + u4_y_stride; + UWORD8 *row_cb = pu1_u_buf; + UWORD8 *row_cr = pu1_v_buf; + + for (row = 0; row < u4_y_height; row = row + 2) + { + for (col = 0; col < (u4_y_width << 1); col = col + 4) + { + UWORD8 cb_even = row_even_422[col]; + UWORD8 cr_even = row_even_422[col + 2]; + + row_cb[col >> 1] = cb_even; + row_cr[col >> 1] = cr_even; + + row_even_luma[col >> 1] = row_even_422[col + 1]; + row_even_luma[(col >> 1) + 1] = row_even_422[col + 3]; + + row_odd_luma[col >> 1] = row_odd_422[col + 1]; + row_odd_luma[(col >> 1) + 1] = row_odd_422[col + 3]; + } + + row_even_422 += (u4_422i_stride << 2); + row_odd_422 += (u4_422i_stride << 2); + + row_even_luma += (u4_y_stride << 1); + row_odd_luma += (u4_y_stride << 1); + + row_cb += u4_u_stride; + row_cr += u4_v_stride; + } +} + +/** +******************************************************************************* +* +* @brief Function used from format conversion or frame copy +* +* @par Description +* Function used from copying or converting a reference frame to display buffer +* in non shared mode +* +* @param[in] pu1_y_dst +* Output Y pointer +* +* @param[in] pu1_u_dst +* Output U/UV pointer ( UV is interleaved in the same format as that of input) +* +* @param[in] pu1_v_dst +* Output V pointer ( used in 420P output case) +* +* @param[in] u4_dst_y_strd +* Stride of destination Y buffer +* +* @param[in] u4_dst_u_strd +* Stride of destination U/V buffer +* +* @param[in] blocking +* To indicate whether format conversion should wait till frame is reconstructed +* and then return after complete copy is done. To be set to 1 when called at the +* end of frame processing and set to 0 when called between frame processing modules +* in order to utilize available MCPS +* +* @returns error status +* +* @remarks +* Assumes that the stride of U and V buffers are same. +* This is correct in most cases +* If a case comes where this is not true we need to modify the fmt conversion +* functions called inside also +* +******************************************************************************* +*/ +IH264E_ERROR_T ih264e_fmt_conv(codec_t *ps_codec, + pic_buf_t *ps_pic, + UWORD8 *pu1_y_dst, + UWORD8 *pu1_u_dst, + UWORD8 *pu1_v_dst, + UWORD32 u4_dst_y_strd, + UWORD32 u4_dst_uv_strd, + WORD32 cur_row, + WORD32 num_rows) +{ + IH264E_ERROR_T ret = IH264E_SUCCESS; + UWORD8 *pu1_y_src, *pu1_uv_src; + UWORD8 *pu1_y_dst_tmp, *pu1_uv_dst_tmp; + UWORD8 *pu1_u_dst_tmp, *pu1_v_dst_tmp; + UWORD16 *pu2_rgb_dst_tmp; + UWORD32 *pu4_rgb_dst_tmp; + WORD32 is_u_first; + UWORD8 *pu1_luma; + UWORD8 *pu1_chroma; + WORD32 dst_stride, wd; + + + if (0 == num_rows) + return ret; + + pu1_luma = ps_pic->pu1_luma; + pu1_chroma = ps_pic->pu1_chroma; + + + dst_stride = ps_codec->s_cfg.u4_wd; + wd = ps_codec->s_cfg.u4_disp_wd; + is_u_first = (IV_YUV_420SP_UV == ps_codec->e_codec_color_format) ? 1 : 0; + + /* In case of 420P output luma copy is disabled for shared mode */ + { + pu1_y_src = pu1_luma + cur_row * ps_codec->i4_rec_strd; + pu1_uv_src = pu1_chroma + (cur_row / 2) * ps_codec->i4_rec_strd; + + pu2_rgb_dst_tmp = (UWORD16 *) pu1_y_dst; + pu2_rgb_dst_tmp += cur_row * dst_stride; + pu4_rgb_dst_tmp = (UWORD32 *) pu1_y_dst; + pu4_rgb_dst_tmp += cur_row * dst_stride; + + pu1_y_dst_tmp = pu1_y_dst + cur_row * u4_dst_y_strd; + pu1_uv_dst_tmp = pu1_u_dst + (cur_row / 2) * u4_dst_uv_strd; + pu1_u_dst_tmp = pu1_u_dst + (cur_row / 2) * u4_dst_uv_strd; + pu1_v_dst_tmp = pu1_v_dst + (cur_row / 2) * u4_dst_uv_strd; + + /* If the call is non-blocking and there are no rows to be copied then return */ + /* In non-shared mode, reference buffers are in 420SP UV format, + * if output also is in 420SP_UV, then just copy + * if output is in 420SP_VU then swap UV values + */ + if ((IV_YUV_420SP_UV == ps_codec->s_cfg.e_recon_color_fmt) || + (IV_YUV_420SP_VU == ps_codec->s_cfg.e_recon_color_fmt)) + { + ih264e_fmt_conv_420sp_to_420sp(pu1_y_src, pu1_uv_src, pu1_y_dst_tmp, + pu1_uv_dst_tmp, wd, num_rows, + ps_codec->i4_rec_strd, + ps_codec->i4_rec_strd, u4_dst_y_strd, + u4_dst_uv_strd); + } + else if (IV_YUV_420P == ps_codec->s_cfg.e_recon_color_fmt) + { + ih264e_fmt_conv_420sp_to_420p(pu1_y_src, pu1_uv_src, pu1_y_dst_tmp, + pu1_u_dst_tmp, pu1_v_dst_tmp, wd, + num_rows, ps_codec->i4_rec_strd, + ps_codec->i4_rec_strd, u4_dst_y_strd, + u4_dst_uv_strd, is_u_first, 0); + } + } + return(ret); +} + diff --git a/encoder/ih264e_fmt_conv.h b/encoder/ih264e_fmt_conv.h new file mode 100755 index 0000000..6b33bf0 --- /dev/null +++ b/encoder/ih264e_fmt_conv.h @@ -0,0 +1,142 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +******************************************************************************* +* @file +* ih264e_fmt_conv.h +* +* @brief +* The file contains extern declarations of color space conversion routines +* +* @author +* ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef IH264E_FMT_CONV_H_ +#define IH264E_FMT_CONV_H_ + +#define COEFF1 13073 +#define COEFF2 -3207 +#define COEFF3 -6664 +#define COEFF4 16530 + +IH264E_ERROR_T ih264e_fmt_conv(codec_t *ps_codec, + pic_buf_t *ps_pic, + UWORD8 *pu1_y_dst, + UWORD8 *pu1_u_dst, + UWORD8 *pu1_v_dst, + UWORD32 u4_dst_y_strd, + UWORD32 u4_dst_uv_strd, + WORD32 cur_row, + WORD32 num_rows); + +typedef void ih264e_fmt_conv_420sp_to_rgba8888_ft(UWORD8 *pu1_y_src, + UWORD8 *pu1_uv_src, + UWORD32 *pu4_rgba_dst, + WORD32 wd, + WORD32 ht, + WORD32 src_y_strd, + WORD32 src_uv_strd, + WORD32 dst_strd, + WORD32 is_u_first); + +typedef void ih264e_fmt_conv_420sp_to_rgb565_ft(UWORD8 *pu1_y_src, + UWORD8 *pu1_uv_src, + UWORD16 *pu2_rgb_dst, + WORD32 wd, + WORD32 ht, + WORD32 src_y_strd, + WORD32 src_uv_strd, + WORD32 dst_strd, + WORD32 is_u_first); + +typedef void ih264e_fmt_conv_420sp_to_420sp_ft(UWORD8 *pu1_y_src, + UWORD8 *pu1_uv_src, + UWORD8 *pu1_y_dst, + UWORD8 *pu1_uv_dst, + WORD32 wd, + WORD32 ht, + WORD32 src_y_strd, + WORD32 src_uv_strd, + WORD32 dst_y_strd, + WORD32 dst_uv_strd); + +typedef void ih264e_fmt_conv_420sp_to_420p_ft(UWORD8 *pu1_y_src, + UWORD8 *pu1_uv_src, + UWORD8 *pu1_y_dst, + UWORD8 *pu1_u_dst, + UWORD8 *pu1_v_dst, + WORD32 wd, + WORD32 ht, + WORD32 src_y_strd, + WORD32 src_uv_strd, + WORD32 dst_y_strd, + WORD32 dst_uv_strd, + WORD32 is_u_first, + WORD32 disable_luma_copy); + +typedef void ih264e_fmt_conv_420p_to_420sp_ft(UWORD8 *pu1_y_src, UWORD8 *pu1_u_src, UWORD8 *pu1_v_src, + UWORD8 *pu1_y_dst, UWORD8 *pu1_uv_dst, + UWORD16 u2_height, UWORD16 u2_width, UWORD16 src_y_strd, + UWORD16 src_u_strd, UWORD16 src_v_strd, + UWORD16 dst_y_strd, UWORD16 dst_uv_strd, + UWORD32 convert_uv_only); + +typedef void ih264e_fmt_conv_422i_to_420sp_ft(UWORD8 *pu1_y_buf,UWORD8 *pu1_u_buf,UWORD8 *pu1_v_buf, + UWORD8 *pu1_422i_buf, + WORD32 u4_y_width,WORD32 u4_y_height, + WORD32 u4_y_stride,WORD32 u4_u_stride,WORD32 u4_v_stride, + WORD32 u4_422i_stride); + + +/* C function declarations */ +ih264e_fmt_conv_420sp_to_rgba8888_ft ih264e_fmt_conv_420sp_to_rgba8888; +ih264e_fmt_conv_420sp_to_rgb565_ft ih264e_fmt_conv_420sp_to_rgb565; +ih264e_fmt_conv_420sp_to_420sp_ft ih264e_fmt_conv_420sp_to_420sp; +ih264e_fmt_conv_420sp_to_420p_ft ih264e_fmt_conv_420sp_to_420p; +ih264e_fmt_conv_420p_to_420sp_ft ih264e_fmt_conv_420p_to_420sp; +ih264e_fmt_conv_422i_to_420sp_ft ih264e_fmt_conv_422i_to_420sp; + +/* A9Q function declarations */ +ih264e_fmt_conv_420sp_to_rgba8888_ft ih264e_fmt_conv_420sp_to_rgba8888_a9q; +ih264e_fmt_conv_420sp_to_420sp_ft ih264e_fmt_conv_420sp_to_420sp_a9q; +ih264e_fmt_conv_420sp_to_420p_ft ih264e_fmt_conv_420sp_to_420p_a9q; +ih264e_fmt_conv_420p_to_420sp_ft ih264e_fmt_conv_420p_to_420sp_a9q; +ih264e_fmt_conv_422i_to_420sp_ft ih264e_fmt_conv_422i_to_420sp_a9q; + + +/* A9A function declarations */ +ih264e_fmt_conv_420sp_to_rgba8888_ft ih264e_fmt_conv_420sp_to_rgba8888_a9a; +ih264e_fmt_conv_420sp_to_420sp_ft ih264e_fmt_conv_420sp_to_420sp_a9a; +ih264e_fmt_conv_420sp_to_420p_ft ih264e_fmt_conv_420sp_to_420p_a9a; + +/* SSSe31 function declarations */ +ih264e_fmt_conv_420sp_to_420p_ft ih264e_fmt_conv_420sp_to_420p_ssse31; + +/* SSE4 function declarations */ +ih264e_fmt_conv_420sp_to_420p_ft ih264e_fmt_conv_420sp_to_420p_sse42; + +#endif /* IH264E_FMT_CONV_H_ */ diff --git a/encoder/ih264e_function_selector_generic.c b/encoder/ih264e_function_selector_generic.c new file mode 100755 index 0000000..65f943a --- /dev/null +++ b/encoder/ih264e_function_selector_generic.c @@ -0,0 +1,259 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +******************************************************************************* +* @file +* ih264e_function_selector_generic.c +* +* @brief +* Contains functions to initialize function pointers of codec context +* +* @author +* ittiam +* +* @par List of Functions: +* - ih264e_init_function_ptr_generic +* +* @remarks +* None +* +******************************************************************************* +*/ + + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + + +/* System Include files */ +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +/* User Include files */ +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" +#include "ih264_defs.h" +#include "ih264_size_defs.h" +#include "ih264e_defs.h" +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "ime_distortion_metrics.h" +#include "ime_structs.h" +#include "ih264_defs.h" +#include "ih264_error.h" +#include "ih264_structs.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include "ih264_inter_pred_filters.h" +#include "ih264_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "ih264e_rate_control.h" +#include "ih264e_structs.h" +#include "ih264e_platform_macros.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include "ih264e_defs.h" +#include "ih264e_structs.h" +#include "ih264_deblk_edge_filters.h" +#include "ih264e_core_coding.h" +#include "ih264_cavlc_tables.h" +#include "ih264e_cavlc.h" +#include "ih264_padding.h" +#include "ih264e_intra_modes_eval.h" +#include "ih264_mem_fns.h" +#include "ih264e_fmt_conv.h" +#include "ih264e_half_pel.h" + + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + +/** +******************************************************************************* +* +* @brief Initialize the intra/inter/transform/deblk function pointers of +* codec context +* +* @par Description: the current routine initializes the function pointers of +* codec context basing on the architecture in use +* +* @param[in] ps_codec +* Codec context pointer +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void ih264e_init_function_ptr_generic(codec_t *ps_codec) +{ + WORD32 i = 0; + + /* curr proc ctxt */ + process_ctxt_t *ps_proc = NULL; + me_ctxt_t *ps_me_ctxt = NULL; + + /* Init function pointers for intra pred leaf level functions luma + * Intra 16x16 */ + ps_codec->apf_intra_pred_16_l[0] = ih264_intra_pred_luma_16x16_mode_vert; + ps_codec->apf_intra_pred_16_l[1] = ih264_intra_pred_luma_16x16_mode_horz; + ps_codec->apf_intra_pred_16_l[2] = ih264_intra_pred_luma_16x16_mode_dc; + ps_codec->apf_intra_pred_16_l[3] = ih264_intra_pred_luma_16x16_mode_plane; + + /* Init function pointers for intra pred leaf level functions luma + * Intra 4x4 */ + ps_codec->apf_intra_pred_4_l[0] = ih264_intra_pred_luma_4x4_mode_vert; + ps_codec->apf_intra_pred_4_l[1] = ih264_intra_pred_luma_4x4_mode_horz; + ps_codec->apf_intra_pred_4_l[2] = ih264_intra_pred_luma_4x4_mode_dc; + ps_codec->apf_intra_pred_4_l[3] = ih264_intra_pred_luma_4x4_mode_diag_dl; + ps_codec->apf_intra_pred_4_l[4] = ih264_intra_pred_luma_4x4_mode_diag_dr; + ps_codec->apf_intra_pred_4_l[5] = ih264_intra_pred_luma_4x4_mode_vert_r; + ps_codec->apf_intra_pred_4_l[6] = ih264_intra_pred_luma_4x4_mode_horz_d; + ps_codec->apf_intra_pred_4_l[7] = ih264_intra_pred_luma_4x4_mode_vert_l; + ps_codec->apf_intra_pred_4_l[8] = ih264_intra_pred_luma_4x4_mode_horz_u; + + /* Init function pointers for intra pred leaf level functions luma + * Intra 8x8 */ + ps_codec->apf_intra_pred_8_l[0] = ih264_intra_pred_luma_8x8_mode_vert; + ps_codec->apf_intra_pred_8_l[2] = ih264_intra_pred_luma_8x8_mode_dc; + ps_codec->apf_intra_pred_8_l[3] = ih264_intra_pred_luma_8x8_mode_diag_dl; + ps_codec->apf_intra_pred_8_l[4] = ih264_intra_pred_luma_8x8_mode_diag_dr; + ps_codec->apf_intra_pred_8_l[5] = ih264_intra_pred_luma_8x8_mode_vert_r; + ps_codec->apf_intra_pred_8_l[6] = ih264_intra_pred_luma_8x8_mode_horz_d; + ps_codec->apf_intra_pred_8_l[7] = ih264_intra_pred_luma_8x8_mode_vert_l; + ps_codec->apf_intra_pred_8_l[8] = ih264_intra_pred_luma_8x8_mode_horz_u; + + /* Init function pointers for intra pred leaf level functions chroma + * Intra 8x8 */ + ps_codec->apf_intra_pred_c[0] = ih264_intra_pred_chroma_8x8_mode_dc; + ps_codec->apf_intra_pred_c[1] = ih264_intra_pred_chroma_8x8_mode_horz; + ps_codec->apf_intra_pred_c[2] = ih264_intra_pred_chroma_8x8_mode_vert; + ps_codec->apf_intra_pred_c[3] = ih264_intra_pred_chroma_8x8_mode_plane; + + /* Init luma forward transform fn ptr */ + ps_codec->pf_resi_trans_quant_8x8 = ih264_resi_trans_quant_8x8; + ps_codec->pf_resi_trans_quant_4x4 = ih264_resi_trans_quant_4x4; + ps_codec->pf_resi_trans_quant_chroma_4x4 = ih264_resi_trans_quant_chroma_4x4; + ps_codec->pf_hadamard_quant_4x4 = ih264_hadamard_quant_4x4; + ps_codec->pf_hadamard_quant_2x2_uv = ih264_hadamard_quant_2x2_uv; + + /* Init inverse transform fn ptr */ + ps_codec->pf_iquant_itrans_recon_8x8 = ih264_iquant_itrans_recon_8x8; + ps_codec->pf_iquant_itrans_recon_4x4 = ih264_iquant_itrans_recon_4x4; + ps_codec->pf_iquant_itrans_recon_4x4_dc = ih264_iquant_itrans_recon_4x4_dc; + ps_codec->pf_iquant_itrans_recon_chroma_4x4 = ih264_iquant_itrans_recon_chroma_4x4; + ps_codec->pf_iquant_itrans_recon_chroma_4x4_dc = ih264_iquant_itrans_recon_chroma_4x4_dc; + + ps_codec->pf_ihadamard_scaling_4x4 = ih264_ihadamard_scaling_4x4; + ps_codec->pf_ihadamard_scaling_2x2_uv = ih264_ihadamard_scaling_2x2_uv; + ps_codec->pf_interleave_copy = ih264_interleave_copy; + + /* Init fn ptr luma core coding */ + ps_codec->luma_energy_compaction[0] = ih264e_code_luma_intra_macroblock_16x16; + ps_codec->luma_energy_compaction[1] = ih264e_code_luma_intra_macroblock_4x4; + ps_codec->luma_energy_compaction[3] = ih264e_code_luma_inter_macroblock_16x16; + + /* Init fn ptr chroma core coding */ + ps_codec->chroma_energy_compaction[0] = ih264e_code_chroma_intra_macroblock_8x8; + ps_codec->chroma_energy_compaction[1] = ih264e_code_chroma_inter_macroblock_8x8; + + /* Init fn ptr luma deblocking */ + ps_codec->pf_deblk_luma_vert_bs4 = ih264_deblk_luma_vert_bs4; + ps_codec->pf_deblk_luma_vert_bslt4 = ih264_deblk_luma_vert_bslt4; + ps_codec->pf_deblk_luma_horz_bs4 = ih264_deblk_luma_horz_bs4; + ps_codec->pf_deblk_luma_horz_bslt4 = ih264_deblk_luma_horz_bslt4; + + /* Init fn ptr chroma deblocking */ + ps_codec->pf_deblk_chroma_vert_bs4 = ih264_deblk_chroma_vert_bs4; + ps_codec->pf_deblk_chroma_vert_bslt4 = ih264_deblk_chroma_vert_bslt4; + ps_codec->pf_deblk_chroma_horz_bs4 = ih264_deblk_chroma_horz_bs4; + ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4; + + /* write mb syntax layer */ + ps_codec->pf_write_mb_syntax_layer[ISLICE] = ih264e_write_islice_mb; + ps_codec->pf_write_mb_syntax_layer[PSLICE] = ih264e_write_pslice_mb; + + /* Padding Functions */ + ps_codec->pf_pad_top = ih264_pad_top; + ps_codec->pf_pad_bottom = ih264_pad_bottom; + ps_codec->pf_pad_left_luma = ih264_pad_left_luma; + ps_codec->pf_pad_left_chroma = ih264_pad_left_chroma; + ps_codec->pf_pad_right_luma = ih264_pad_right_luma; + ps_codec->pf_pad_right_chroma = ih264_pad_right_chroma; + + /* Inter pred leaf level functions */ + ps_codec->pf_inter_pred_luma_copy = ih264_inter_pred_luma_copy; + ps_codec->pf_inter_pred_luma_horz = ih264_inter_pred_luma_horz; + ps_codec->pf_inter_pred_luma_vert = ih264_inter_pred_luma_vert; + ps_codec->pf_inter_pred_luma_bilinear = ih264_inter_pred_luma_bilinear; + ps_codec->pf_inter_pred_chroma = ih264_inter_pred_chroma; + + /* sad me level functions */ + ps_codec->apf_compute_sad_16x16[0] = ime_compute_sad_16x16; + ps_codec->apf_compute_sad_16x16[1] = ime_compute_sad_16x16_fast; + ps_codec->pf_compute_sad_16x8 = ime_compute_sad_16x8; + + /* memory handling operations */ + ps_codec->pf_mem_cpy = ih264_memcpy; + ps_codec->pf_mem_cpy_mul8 = ih264_memcpy_mul_8; + ps_codec->pf_mem_set = ih264_memset; + ps_codec->pf_mem_set_mul8 = ih264_memset_mul_8; + + /* sad me level functions */ + for (i = 0; i < (MAX_PROCESS_CTXT); i++) + { + ps_proc = &ps_codec->as_process[i]; + + ps_me_ctxt = &ps_proc->s_me_ctxt; + ps_me_ctxt->pf_ime_compute_sad_16x16[0] = ime_compute_sad_16x16; + ps_me_ctxt->pf_ime_compute_sad_16x16[1] = ime_compute_sad_16x16_fast; + ps_me_ctxt->pf_ime_compute_sad_16x8 = ime_compute_sad_16x8; + ps_me_ctxt->pf_ime_compute_sad4_diamond = ime_calculate_sad4_prog; + ps_me_ctxt->pf_ime_compute_sad3_diamond = ime_calculate_sad3_prog; + ps_me_ctxt->pf_ime_compute_sad2_diamond = ime_calculate_sad2_prog; + ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16 = ime_sub_pel_compute_sad_16x16; + ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16 = ime_compute_satqd_16x16_lumainter; + } + + /* intra mode eval -encoder level function */ + ps_codec->pf_ih264e_evaluate_intra16x16_modes = ih264e_evaluate_intra16x16_modes; + ps_codec->pf_ih264e_evaluate_intra_chroma_modes = ih264e_evaluate_intra_chroma_modes; + ps_codec->pf_ih264e_evaluate_intra_4x4_modes = ih264e_evaluate_intra_4x4_modes; + + /* csc */ + ps_codec->pf_ih264e_conv_420p_to_420sp = ih264e_fmt_conv_420p_to_420sp; + ps_codec->pf_ih264e_fmt_conv_422i_to_420sp = ih264e_fmt_conv_422i_to_420sp; + + /* Halp pel generation function - encoder level*/ + ps_codec->pf_ih264e_sixtapfilter_horz = ih264e_sixtapfilter_horz; + ps_codec->pf_ih264e_sixtap_filter_2dvh_vert = ih264e_sixtap_filter_2dvh_vert; + + return; +} diff --git a/encoder/ih264e_globals.c b/encoder/ih264e_globals.c new file mode 100755 index 0000000..e2b46a4 --- /dev/null +++ b/encoder/ih264e_globals.c @@ -0,0 +1,261 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ih264e_globals.c +* +* @brief +* Contains definitions of global variables used across the encoder +* +* @author +* ittiam +* +* @par List of functions +* +* +* @remarks +* +******************************************************************************* +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* User include files */ +#include "ih264_typedefs.h" +#include "ih264_defs.h" +#include "ih264e_defs.h" +#include "ih264e_globals.h" + +/*****************************************************************************/ +/* Extern global definitions */ +/*****************************************************************************/ + +/** +****************************************************************************** +* @brief lamda for varying quantizer scales that would be used to +* compute the RD cost while deciding on the MB modes. +* input : qp +* output : lambda +* @remarks lambda = 0.85 * pow(2, (qp - 12)/3), when SSD is used as metric +* for computing distortion (Bit rate estimation for cost function of H.264/ +* AVC by Mohd Golam Sarwer et. al.) If the use of distortion metric is SAD +* rather than SSD in the stage of encoding, consider sqrt(lambda) simply to +* adjust lambda for the lack of squaring operation in the error computation +* (from rate distortion optimization for video compression by sullivan). +****************************************************************************** +*/ +const UWORD16 gu2_qp_lambda[52]= +{ + 0, 0, 0, 0, 0, 0, 0, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 3, 3, 3, + 4, 4, 5, 5, 6, 7, 7, 8, + 9, 10, 12, 13, 15, 17, 19, 21, + 23, 26, 30, 33, 37, 42, 47, 53, + 59, 66, 74, 83, +}; + +/** +****************************************************************************** +* @brief Lamda for varying quantizer scales that would be used to +* compute the RD cost while deciding on the MB modes. +* input : qp +* output : lambda +* @remarks lambda = pow(2, (qp - 12)/6) +****************************************************************************** +*/ +const UWORD8 gu1_qp0[52]= +{ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 1, 1, 1, + 2, 2, 2, 2, 3, 3, 3, 4, + 4, 4, 5, 6, 6, 7, 8, 9, + 10, 11, 13, 14, 16, 18, 20, 23, + 25, 29, 32, 36, 40, 45, 51, 57, + 64, 72, 81, 91, +}; + +/** +****************************************************************************** +* @brief unsigned exp. goulumb codelengths to assign cost to a coefficient of +* mb types. +* input : Integer +* output : codelength +* @remarks Refer sec. 9-1 in h264 specification +****************************************************************************** +*/ +const UWORD8 u1_uev_codelength[32] = +{ + 1, 3, 3, 5, 5, 5, 5, 7, + 7, 7, 7, 7, 7, 7, 7, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 11, +}; + + +/** +****************************************************************************** +* @brief Look up table to assign cost to a coefficient of a residual block +* basing on its surrounding coefficients +* input : Numbers of T1's +* output : coeff_cost +* @remarks Refer Section 2.3 Elimination of single coefficients in inter +* macroblocks in document JVT-O079 +****************************************************************************** +*/ +const UWORD8 gu1_coeff_cost[6] = +{ + 3, 2, 2, 1, 1, 1 +}; + +/** +****************************************************************************** +* @brief Indices map to raster scan for luma 4x4 block +* input : scan index +* output : scan location +* @remarks None +****************************************************************************** +*/ +const UWORD8 gu1_luma_scan_order[16] = +{ + 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 +}; + +/** +****************************************************************************** +* @brief Indices map to raster scan for chroma AC block +* input : scan index +* output : scan location +* @remarks None +****************************************************************************** +*/ +const UWORD8 gu1_chroma_scan_order[15] = +{ + 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 +}; + +/** +****************************************************************************** +* @brief Indices map to raster scan for luma 4x4 dc block +* input : scan index +* output : scan location +* @remarks : None +****************************************************************************** +*/ +const UWORD8 gu1_luma_scan_order_dc[16] = +{ + 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 +}; + +/** +****************************************************************************** +* @brief Indices map to raster scan for chroma 2x2 dc block +* input : scan index +* output : scan location +* @remarks None +****************************************************************************** +*/ +const UWORD8 gu1_chroma_scan_order_dc[4] = +{ + 0, 1, 2, 3 +}; + +/** +****************************************************************************** +* @brief choice of motion vectors to be used during mv prediction +* input : formatted reference idx comparison metric +* output : mv prediction has to be median or a simple straight forward selec +* tion from neighbors. +* @remarks If only one of the candidate blocks has a reference frame equal to + the current block then use the same block as the final predictor. A simple + look up table to assist this mv prediction condition +****************************************************************************** +*/ +const WORD8 gi1_mv_pred_condition[8] = +{ + -1, 0, 1, -1, 2, -1, -1, -1 +}; + +/** +****************************************************************************** +* @brief maps the h264 quantizer to the mpeg2 quantizer scale +* input : h264 qp +* output : equivalent mpeg 2 qp +* @remarks mpeg2qscale = 2 ^ [((h264qp - 12) / 6) + 1] +****************************************************************************** +*/ +const UWORD8 gau1_h264_to_mpeg2_qmap[H264_QP_ELEM] = +{ + 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 3, 3, 3, 4, + 4, 4, 5, 6, 6, 7, 8, 9, + 10, 11, 13, 14, 16, 18, 20, 23, + 25, 29, 32, 36, 40, 45, 51, 57, + 64, 72, 81, 91, 102, 114, 128, 144, + 161, 181, 203, 228, +}; + +/** +****************************************************************************** +* @brief maps the mpeg2 quantizer to the h264 quantizer scale +* input : mpeg2 qp +* output : equivalent h264qp +* @remarks MPEG-2 dequantization: (2*QFij + k)*Wij*qscale/32 +* k = 0 (for intra) k = sign(QFij) +* H.264 dequantization: (QFij*R(QP%6,i,j))>>(6 - QP/6) +* +* Excluding the portion of R(QP%6,i,j) that is due to +* the DCT scale factors, the 6 entries after dividing by 64 (2^6) +* correspond to dequant values of +* 2.5, 2.8125, 3.125, 3.5625, 3.9375, 4.4375. +* (a=0.5 b=sqrt(2/5) - refer to JVT-B038.doc) +* +* Assuming that h264Qp=12 corresponds to MPEG2 qscale of 2 +* (the actual mapping seems to be to MPEG2 qscale of 2.5), +* and the fact that the effective h264 quantizer changes by +* a factor of 2 for every 6 steps, the following mapping is +* obtained: +* h264qp = 6*(log2(mpeg2qscale/2)) + 12. +* +* Note that the quant matrix entry assumed for the above +* equality is 16. Hence when the mpeg2 quant matrix entries +* are all 16, this lookup can be used as is (which is the +* default inter quant matrix in mpeg-2). +****************************************************************************** +*/ +const UWORD8 gau1_mpeg2_to_h264_qmap[MPEG2_QP_ELEM] = +{ + 0, 4, 10, 14, 16, 18, 20, 21, 22, 23, 24, 25, 26, 26, 27, 27, + 28, 29, 29, 29, 30, 30, 31, 31, 32, 32, 32, 33, 33, 33, 33, 34, + 34, 34, 35, 35, 35, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, 37, + 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 40, 40, 40, + 40, 40, 40, 40, 41, 41, 41, 41, 41, 41, 41, 41, 41, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, + 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 47, 47, + 47, 47, 47, 47, 47, 47, 47, 47, 47, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 49, 49, 49, 49, 49, + 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, +}; + diff --git a/encoder/ih264e_globals.h b/encoder/ih264e_globals.h new file mode 100755 index 0000000..4c3de23 --- /dev/null +++ b/encoder/ih264e_globals.h @@ -0,0 +1,192 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +******************************************************************************* +* @file +* ih264e_globals.h +* +* @brief +* Contains declarations of global variables for H264 encoder +* +* @author +* Ittiam +* +* @remarks +* +******************************************************************************* +*/ + +#ifndef IH264E_GLOBALS_H_ +#define IH264E_GLOBALS_H_ + + +/*****************************************************************************/ +/* Extern global declarations */ +/*****************************************************************************/ + +/** +****************************************************************************** +* @brief Computes the lamda for varying quantizer scales that would be used to +* compute the RD cost while deciding on the MB modes. +* input : qp +* output : lambda +* @remarks lambda = 0.85 * pow(2, (qp - 12)/3), when SSD is used as metric +* for computing distortion (Bit rate estimation for cost function of H.264/ +* AVC by Mohd Golam Sarwer et. al.) If the use of distortion metric is SAD +* rather than SSD in the stage of encoding, consider sqrt(lambda) simply to +* adjust lambda for the lack of squaring operation in the error computation +* (from rate distortion optimization for video compression by sullivan). +****************************************************************************** +*/ +extern const UWORD16 gu2_qp_lambda[52]; + +/** +****************************************************************************** +* @brief Computes the lamda for varying quantizer scales that would be used to +* compute the RD cost while deciding on the MB modes. +* input : qp +* output : lambda +* @remarks lambda = pow(2, (qp - 12)/6). When Lagrangian multiplier is disabled +* the same constant is used across mode decision and mv decisions. +****************************************************************************** +*/ +extern const UWORD8 gu1_qp0[52]; + +/** +****************************************************************************** +* @brief unsigned exp. goulumb codelengths to assign cost to a coefficient of +* mb types. +* input : Integer +* output : codelength +* @remarks Refer sec. 9-1 in h264 specification +****************************************************************************** +*/ +extern const UWORD8 u1_uev_codelength[32]; + +/** +****************************************************************************** +* @brief Look up table to assign cost to a coefficient of a residual block +* basing on its surrounding coefficients +* input : Numbers of T1's +* output : coeff_cost +* @remarks Refer Section 2.3 Elimination of single coefficients in inter +* macroblocks in document JVT-O079 +****************************************************************************** +*/ +extern const UWORD8 gu1_coeff_cost[6]; + +/** +****************************************************************************** +* @brief Indices map to raster scan for luma 4x4 block +* input : scan index +* output : scan location +* @remarks The scan order assumes the stride to access the next row is 16 +****************************************************************************** +*/ +extern const UWORD8 gu1_luma_scan_order[16]; + +/** +****************************************************************************** +* @brief Indices map to raster scan for chroma AC block +* input : scan index +* output : scan location +* @remarks The scan order assumes the stride to access the next row is 32 +****************************************************************************** +*/ +extern const UWORD8 gu1_chroma_scan_order[15]; + +/** +****************************************************************************** +* @brief Indices map to raster scan for luma 4x4 dc block +* input : scan index +* output : scan location +* @remarks The scan order assumes the stride to access the next row is 16 +****************************************************************************** +*/ +extern const UWORD8 gu1_luma_scan_order_dc[16]; + +/** +****************************************************************************** +* @brief Indices map to raster scan for chroma 2x2 dc block +* input : scan index +* output : scan location +* @remarks The scan order assumes the stride to access the next row is 16 +****************************************************************************** +*/ +extern const UWORD8 gu1_chroma_scan_order_dc[4]; + + +/** +****************************************************************************** +* @brief choice of motion vectors to be used during mv prediction +* input : formatted reference idx comparison metric +* output : mv prediction has to be median or a simple straight forward selec +* tion from neighbors. +* @remarks If only one of the candidate blocks has a reference frame equal to + the current block then use the same block as the final predictor. A simple + look up table to assist this mv prediction condition +****************************************************************************** +*/ +extern const WORD8 gi1_mv_pred_condition[8]; + + +/** +****************************************************************************** +* @brief maps the h264 quantizer to the mpeg2 quantizer scale +* input : h264 qp +* output : eqvivalent mpeg 2 qp +* @remarks mpeg2qscale = 2 ^ [((h264qp - 12) / 6) + 1] +****************************************************************************** +*/ +extern const UWORD8 gau1_h264_to_mpeg2_qmap[H264_QP_ELEM]; + +/** +****************************************************************************** +* @brief maps the mpeg2 quantizer to the h264 quantizer scale +* input : mpeg2 qp +* output : eqvivalent h264q p +* @remarks MPEG-2 dequantization: (2*QFij + k)*Wij*qscale/32 +* k = 0 (for intra) k = sign(QFij) +* H.264 dequantization: (QFij*R(QP%6,i,j))>>(6 - QP/6) +* +* Excluding the portion of R(QP%6,i,j) that is due to +* the DCT scale factors, the 6 entries after dividing by 64 (2^6) +* correspond to dequant values of +* 2.5, 2.8125, 3.125, 3.5625, 3.9375, 4.4375. +* (a=0.5 b=sqrt(2/5) - refer to JVT-B038.doc) +* +* Assuming that h264Qp=12 corresponds to MPEG2 qscale of 2 +* (the actual mapping seems to be to MPEG2 qscale of 2.5), +* and the fact that the effective h264 quantizer changes by +* a factor of 2 for every 6 steps, the following mapping is +* obtained: +* h264qp = 6*(log2(mpeg2qscale/2)) + 12. +* +* Note that the quant matrix entry assumed for the above +* equality is 16. Hence when the mpeg2 quant matrix entries +* are all 16, this lookup can be used as is (which is the +* default inter quant matrix in mpeg-2). +****************************************************************************** +*/ +extern const UWORD8 gau1_mpeg2_to_h264_qmap[MPEG2_QP_ELEM]; + + +#endif /* IH264E_GLOBALS_H_ */ diff --git a/encoder/ih264e_half_pel.c b/encoder/ih264e_half_pel.c new file mode 100755 index 0000000..cb475a1 --- /dev/null +++ b/encoder/ih264e_half_pel.c @@ -0,0 +1,226 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +******************************************************************************* +* @file +* ih264e_half_pel.c +* +* @brief +* This file contains functions that are used for computing subpixel planes +* +* @author +* ittiam +* +* @par List of Functions: +* - ih264e_sixtapfilter_horz +* - ih264e_sixtap_filter_2dvh_vert +* +* @remarks +* None +* +******************************************************************************* +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include <stdio.h> +#include <assert.h> +#include <limits.h> + +/* User include files */ +#include "ih264_typedefs.h" +#include "ithread.h" +#include "ih264_platform_macros.h" +#include "ih264_defs.h" +#include "ih264e_half_pel.h" +#include "ih264_macros.h" +#include "ih264e_half_pel.h" +#include "ih264e_debug.h" +#include "ih264_inter_pred_filters.h" +#include "ih264_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" + + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + +/** +******************************************************************************* +* +* @brief +* Interprediction luma filter for horizontal input (Filter run for width = 17 +* and height =16) +* +* @par Description: +* Applies a 6 tap horizontal filter .The output is clipped to 8 bits +* sec 8.4.2.2.1 titled "Luma sample interpolation process" +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[out] pu1_dst +* UWORD8 pointer to the destination +* +* @param[in] src_strd +* integer source stride +* +* @param[in] dst_strd +* integer destination stride +* +* @returns +* +* @remarks +* None +* +******************************************************************************* +*/ +void ih264e_sixtapfilter_horz(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd) +{ + UWORD32 u4_i, u4_j; + UWORD32 u4_w, u4_h; + + /* width and height of interpolation */ + u4_w = HP_PL_WD; + u4_h = MB_SIZE; + + pu1_src -= 2; + + for (u4_i = 0; u4_i < u4_h; u4_i++) + { + for (u4_j = 0; u4_j < u4_w; u4_j++, pu1_dst++, pu1_src++) + { + WORD16 i16_temp; + + i16_temp = ih264_g_six_tap[0] * (*pu1_src + pu1_src[5]) + + ih264_g_six_tap[1] * (pu1_src[1] + pu1_src[4]) + + ih264_g_six_tap[2] * (pu1_src[2] + pu1_src[3]); + + i16_temp = (i16_temp + 16) >> 5; + + *pu1_dst = CLIP_U8(i16_temp); + } + pu1_src += src_strd - u4_w; + pu1_dst += dst_strd - u4_w; + } +} + +/** +******************************************************************************* +* +* @brief +* This function implements a two stage cascaded six tap filter. It applies +* the six tap filter in the vertical direction on the predictor values, +* followed by applying the same filter in the horizontal direction on the +* output of the first stage. The six tap filtering operation is described in +* sec 8.4.2.2.1 titled "Luma sample interpolation process" (Filter run for +* width = 17 and height = 17) +* +* @par Description: +* The function interpolates the predictors first in the vertical direction and +* then in the horizontal direction to output the (1/2,1/2). The output of the +* first stage of the filter is stored in the buffer pointed to by +* pi16_pred1(only in C) in 16 bit precision. +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[out] pu1_dst1 +* UWORD8 pointer to the destination (Horizontal filtered output) +* +* @param[out] pu1_dst2 +* UWORD8 pointer to the destination (output after applying vertical filter to +* the intermediate horizontal output) +* +* @param[in] src_strd +* integer source stride + +* @param[in] dst_strd +* integer destination stride of pu1_dst +* +* @param[in] pi4_pred +* Pointer to 16bit intermediate buffer (used only in c) +* +* @param[in] i4_pred_strd +* integer destination stride of pi16_pred1 +* +* @returns +* +* @remarks +* None +* +******************************************************************************* +*/ +void ih264e_sixtap_filter_2dvh_vert(UWORD8 *pu1_src, + UWORD8 *pu1_dst1, + UWORD8 *pu1_dst2, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 *pi4_pred, + WORD32 i4_pred_strd) +{ + WORD32 row, col; + WORD32 tmp; + WORD32 *pi4_pred_temp = pi4_pred; + WORD32 ht = HP_PL_HT, wd = HP_PL_WD; + + for (row = 0; row < ht; row++) + { + for (col = -2; col < wd + 3; col++) + { + tmp = ih264_g_six_tap[0] * (pu1_src[col - 2 * src_strd] + pu1_src[col + 3 * src_strd]) + + ih264_g_six_tap[1] * (pu1_src[col - 1 * src_strd] + pu1_src[col + 2 * src_strd]) + + ih264_g_six_tap[2] * (pu1_src[col] + pu1_src[col + 1 * src_strd]); + + pi4_pred_temp[col] = tmp; + } + + pu1_src += src_strd; + pi4_pred_temp += i4_pred_strd; + } + + for (row = 0; row < ht; row++) + { + for (col = 0; col < wd; col++) + { + tmp = (pi4_pred[col - 2] + pi4_pred[col + 3]) + + ih264_g_six_tap[1] * (pi4_pred[col - 1] + pi4_pred[col + 2]) + + ih264_g_six_tap[2] * (pi4_pred[col] + pi4_pred[col + 1]); + + tmp = (tmp + 512) >> 10; + + pu1_dst2[col] = CLIP_U8(tmp); + pu1_dst1[col] = CLIP_U8((pi4_pred[col] + 16) >> 5); + } + pi4_pred += i4_pred_strd; + pu1_dst2 += dst_strd; + pu1_dst1 += dst_strd; + } +} + diff --git a/encoder/ih264e_half_pel.h b/encoder/ih264e_half_pel.h new file mode 100755 index 0000000..92bd37f --- /dev/null +++ b/encoder/ih264e_half_pel.h @@ -0,0 +1,162 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** + ******************************************************************************* + * @file + * ih264e_half_pel.h + * + * @brief + * Contains extern declarations of subpel functions used by the encoder + * + * @author + * ittiam + * + * @remarks + * none + * + ******************************************************************************* + */ + +#ifndef IH264E_HALF_PEL_H_ +#define IH264E_HALF_PEL_H_ + +/*****************************************************************************/ +/* Global constants */ +/*****************************************************************************/ +/* + * Dimensions of subpel plane buffers + */ +#define HP_PL_WD MB_SIZE + 1 +#define HP_PL_HT MB_SIZE + 1 + +/*****************************************************************************/ +/* Extern Function Declarations */ +/*****************************************************************************/ + +/** +******************************************************************************* +* +* @brief +* Interprediction luma filter for horizontal input (Filter run for width = 17 +* and height =16) +* +* @par Description: +* Applies a 6 tap horizontal filter .The output is clipped to 8 bits +* sec 8.4.2.2.1 titled "Luma sample interpolation process" +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[out] pu1_dst +* UWORD8 pointer to the destination +* +* @param[in] src_strd +* integer source stride +* +* @param[in] dst_strd +* integer destination stride +* +* @returns +* +* @remarks +* None +* +******************************************************************************* +*/ +typedef void ih264e_sixtapfilter_horz_ft(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd); + +ih264e_sixtapfilter_horz_ft ih264e_sixtapfilter_horz; + +/* arm assembly */ +ih264e_sixtapfilter_horz_ft ih264e_sixtapfilter_horz_a9q; +ih264e_sixtapfilter_horz_ft ih264e_sixtapfilter_horz_av8; + +/* x86 intrinsics*/ +ih264e_sixtapfilter_horz_ft ih264e_sixtapfilter_horz_ssse3; + +/** +******************************************************************************* +* +* @brief +* This function implements a two stage cascaded six tap filter. It applies +* the six tap filter in the vertical direction on the predictor values, +* followed by applying the same filter in the horizontal direction on the +* output of the first stage. The six tap filtering operation is described in +* sec 8.4.2.2.1 titled "Luma sample interpolation process" (Filter run for +* width = 17 and height = 17) +* +* @par Description: +* The function interpolates the predictors first in the vertical direction and +* then in the horizontal direction to output the (1/2,1/2). The output of the +* first stage of the filter is stored in the buffer pointed to by +* pi16_pred1(only in C) in 16 bit precision. +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[out] pu1_dst1 +* UWORD8 pointer to the destination (Horizontal filtered output) +* +* @param[out] pu1_dst2 +* UWORD8 pointer to the destination (output after applying vertical filter to +* the intermediate horizontal output) +* +* @param[in] src_strd +* integer source stride + +* @param[in] dst_strd +* integer destination stride of pu1_dst +* +* @param[in] pi4_pred +* Pointer to 16bit intermediate buffer (used only in c) +* +* @param[in] i4_pred_strd +* integer destination stride of pi16_pred1 +* +* @returns +* +* @remarks +* None +* +******************************************************************************* +*/ +typedef void ih264e_sixtap_filter_2dvh_vert_ft(UWORD8 *pu1_src, + UWORD8 *pu1_dst1, + UWORD8 *pu1_dst2, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 *pi4_pred, + WORD32 i4_pred_strd); + +ih264e_sixtap_filter_2dvh_vert_ft ih264e_sixtap_filter_2dvh_vert; + +/* assembly */ +ih264e_sixtap_filter_2dvh_vert_ft ih264e_sixtap_filter_2dvh_vert_a9q; + +ih264e_sixtap_filter_2dvh_vert_ft ih264e_sixtap_filter_2dvh_vert_av8; + +/* x86 intrinsics */ +ih264e_sixtap_filter_2dvh_vert_ft ih264e_sixtap_filter_2dvh_vert_ssse3; + +#endif /* IH264E_HALF_PEL_H_ */ diff --git a/encoder/ih264e_intra_modes_eval.c b/encoder/ih264e_intra_modes_eval.c new file mode 100755 index 0000000..b41d717 --- /dev/null +++ b/encoder/ih264e_intra_modes_eval.c @@ -0,0 +1,2296 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +******************************************************************************* +* @file +* ih264e_intra_modes_eval.c +* +* @brief +* This file contains definitions of routines that perform rate distortion +* analysis on a macroblock if they are to be coded as intra. +* +* @author +* ittiam +* +* @par List of Functions: +* - ih264e_derive_neighbor_availability_of_mbs() +* - ih264e_derive_ngbr_avbl_of_mb_partitions() +* - ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff() +* - ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff() +* - ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff() +* - ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton() +* - ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff() +* - ih264e_evaluate_intra16x16_modes() +* - ih264e_evaluate_intra4x4_modes() +* - ih264e_evaluate_intra_chroma_modes() +* +* @remarks +* None +* +******************************************************************************* +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include <stdio.h> +#include <string.h> +#include <limits.h> +#include <assert.h> + +/* User include files */ +#include "ih264e_config.h" +#include "ih264_typedefs.h" +#include "ih264e_defs.h" +#include "iv2.h" +#include "ive2.h" +#include "ih264_debug.h" +#include "ih264_defs.h" +#include "ih264_macros.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_structs.h" +#include "ih264_common_tables.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include "ih264_inter_pred_filters.h" +#include "ih264_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" +#include "ime_distortion_metrics.h" +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "ime_structs.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "ih264e_rate_control.h" +#include "ih264e_structs.h" +#include "ih264e_intra_modes_eval.h" +#include "ih264e_globals.h" +#include "ime_platform_macros.h" + + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + +/** +****************************************************************************** +* +* @brief +* derivation process for macroblock availability +* +* @par Description +* Calculates the availability of the left, top, topright and topleft macroblocks. +* +* @param[in] ps_proc_ctxt +* pointer to proc context (handle) +* +* @remarks Based on section 6.4.5 in H264 spec +* +* @return none +* +****************************************************************************** +*/ +void ih264e_derive_nghbr_avbl_of_mbs(process_ctxt_t *ps_proc) +{ + UWORD8 *pu1_slice_idx_curr = ps_proc->pu1_slice_idx; + UWORD8 *pu1_slice_idx_b; + UWORD8 *pu1_slice_idx_a; + UWORD8 *pu1_slice_idx_c; + UWORD8 *pu1_slice_idx_d; + block_neighbors_t *ps_ngbr_avbl; + WORD32 i4_mb_x, i4_mb_y; + WORD32 i4_wd_mbs; + + i4_mb_x = ps_proc->i4_mb_x; + i4_mb_y = ps_proc->i4_mb_y; + + i4_wd_mbs = ps_proc->i4_wd_mbs; + + pu1_slice_idx_curr += (i4_mb_y * i4_wd_mbs) + i4_mb_x; + pu1_slice_idx_a = pu1_slice_idx_curr - 1; + pu1_slice_idx_b = pu1_slice_idx_curr - i4_wd_mbs; + pu1_slice_idx_c = pu1_slice_idx_b + 1; + pu1_slice_idx_d = pu1_slice_idx_b - 1; + ps_ngbr_avbl = ps_proc->ps_ngbr_avbl; + + /**********************************************************************/ + /* The macroblock is marked as available, unless one of the following */ + /* conditions is true in which case the macroblock shall be marked as */ + /* not available. */ + /* 1. mbAddr < 0 */ + /* 2 mbAddr > CurrMbAddr */ + /* 3. the macroblock with address mbAddr belongs to a different slice */ + /* than the macroblock with address CurrMbAddr */ + /**********************************************************************/ + + /* left macroblock availability */ + if (i4_mb_x == 0) + { /* macroblocks along first column */ + ps_ngbr_avbl->u1_mb_a = 0; + } + else + { /* macroblocks belong to same slice? */ + if (*pu1_slice_idx_a != *pu1_slice_idx_curr) + ps_ngbr_avbl->u1_mb_a = 0; + else + ps_ngbr_avbl->u1_mb_a = 1; + } + + /* top macroblock availability */ + if (i4_mb_y == 0) + { /* macroblocks along first row */ + ps_ngbr_avbl->u1_mb_b = 0; + } + else + { /* macroblocks belong to same slice? */ + if (*pu1_slice_idx_b != *pu1_slice_idx_curr) + ps_ngbr_avbl->u1_mb_b = 0; + else + ps_ngbr_avbl->u1_mb_b = 1; + } + + /* top right macroblock availability */ + if (i4_mb_x == i4_wd_mbs-1 || i4_mb_y == 0) + { /* macroblocks along last column */ + ps_ngbr_avbl->u1_mb_c = 0; + } + else + { /* macroblocks belong to same slice? */ + if (*pu1_slice_idx_c != *pu1_slice_idx_curr) + ps_ngbr_avbl->u1_mb_c = 0; + else + ps_ngbr_avbl->u1_mb_c = 1; + } + + /* top left macroblock availability */ + if (i4_mb_x == 0 || i4_mb_y == 0) + { /* macroblocks along first column */ + ps_ngbr_avbl->u1_mb_d = 0; + } + else + { /* macroblocks belong to same slice? */ + if (*pu1_slice_idx_d != *pu1_slice_idx_curr) + ps_ngbr_avbl->u1_mb_d = 0; + else + ps_ngbr_avbl->u1_mb_d = 1; + } +} + +/** +****************************************************************************** +* +* @brief +* derivation process for subblock/partition availability +* +* @par Description +* Calculates the availability of the left, top, topright and topleft subblock +* or partitions. +* +* @param[in] ps_proc_ctxt +* pointer to macroblock context (handle) +* +* @param[in] i1_pel_pos_x +* column position of the pel wrt the current block +* +* @param[in] i1_pel_pos_y +* row position of the pel in wrt current block +* +* @remarks Assumptions: before calling this function it is assumed that +* the neighbor availability of the current macroblock is already derived. +* Based on table 6-3 of H264 specification +* +* @return availability status (yes or no) +* +****************************************************************************** +*/ +UWORD8 ih264e_derive_ngbr_avbl_of_mb_partitions(block_neighbors_t *ps_ngbr_avbl, + WORD8 i1_pel_pos_x, + WORD8 i1_pel_pos_y) +{ + UWORD8 u1_neighbor_avail=0; + + /**********************************************************************/ + /* values of i1_pel_pos_x in the range 0-15 inclusive correspond to */ + /* various columns of a macroblock */ + /* */ + /* values of i1_pel_pos_y in the range 0-15 inclusive correspond to */ + /* various rows of a macroblock */ + /* */ + /* other values of i1_pel_pos_x & i1_pel_pos_y represents elements */ + /* outside the bound of an mb ie., represents its neighbors. */ + /**********************************************************************/ + if (i1_pel_pos_x < 0) + { /* column(-1) */ + if (i1_pel_pos_y < 0) + { /* row(-1) */ + u1_neighbor_avail = ps_ngbr_avbl->u1_mb_d; /* current mb topleft availability */ + } + else if (i1_pel_pos_y >= 0 && i1_pel_pos_y < 16) + { /* all rows of a macroblock */ + u1_neighbor_avail = ps_ngbr_avbl->u1_mb_a; /* current mb left availability */ + } + else /* if (i1_pel_pos_y >= 16) */ + { /* rows(+16) */ + u1_neighbor_avail = 0; /* current mb bottom left availability */ + } + } + else if (i1_pel_pos_x >= 0 && i1_pel_pos_x < 16) + { /* all columns of a macroblock */ + if (i1_pel_pos_y < 0) + { /* row(-1) */ + u1_neighbor_avail = ps_ngbr_avbl->u1_mb_b; /* current mb top availability */ + } + else if (i1_pel_pos_y >= 0 && i1_pel_pos_y < 16) + { /* all rows of a macroblock */ + u1_neighbor_avail = 1; /* current mb availability */ + /* availability of the partition is dependent on the position of the partition inside the mb */ + /* although the availability is declared as 1 in all cases these needs to be corrected somewhere else and this is not done in here */ + } + else /* if (i1_pel_pos_y >= 16) */ + { /* rows(+16) */ + u1_neighbor_avail = 0; /* current mb bottom availability */ + } + } + else if (i1_pel_pos_x >= 16) + { /* column(+16) */ + if (i1_pel_pos_y < 0) + { /* row(-1) */ + u1_neighbor_avail = ps_ngbr_avbl->u1_mb_c; /* current mb top right availability */ + } + else /* if (i1_pel_pos_y >= 0) */ + { /* all other rows */ + u1_neighbor_avail = 0; /* current mb right & bottom right availability */ + } + } + + return u1_neighbor_avail; +} + +/** +****************************************************************************** +* +* @brief +* evaluate best intra 16x16 mode (rate distortion opt off) +* +* @par Description +* This function evaluates all the possible intra 16x16 modes and finds the mode +* that best represents the macro-block (least distortion) and occupies fewer +* bits in the bit-stream. +* +* @param[in] ps_proc_ctxt +* pointer to process context (handle) +* +* @remarks +* Ideally the cost of encoding a macroblock is calculated as +* (distortion + lambda*rate). Where distortion is SAD/SATD,... between the +* input block and the reconstructed block and rate is the number of bits taken +* to place the macroblock in the bit-stream. In this routine the rate does not +* exactly point to the total number of bits it takes, rather it points to header +* bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits +* and residual bits fall in to texture bits the number of bits taken to encoding +* mbtype is considered as rate, we compute cost. Further we will approximate +* the distortion as the deviation b/w input and the predicted block as opposed +* to input and reconstructed block. +* +* NOTE: As per the Document JVT-O079, for intra 16x16 macroblock, +* the SAD and cost are one and the same. +* +* @return none +* +****************************************************************************** +*/ + +void ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc) +{ + /* Codec Context */ + codec_t *ps_codec = ps_proc->ps_codec; + + /* SAD(distortion metric) of an 8x8 block */ + WORD32 i4_mb_distortion = INT_MAX, i4_mb_distortion_least = INT_MAX; + + /* lambda */ + UWORD32 u4_lambda = ps_proc->u4_lambda; + + /* cost = distortion + lambda*rate */ + WORD32 i4_mb_cost= INT_MAX, i4_mb_cost_least = INT_MAX; + + /* intra mode */ + UWORD32 u4_intra_mode, u4_best_intra_16x16_mode = DC_I16x16; + + /* neighbor pels for intra prediction */ + UWORD8 *pu1_ngbr_pels_i16 = ps_proc->au1_ngbr_pels; + + /* neighbor availability */ + WORD32 i4_ngbr_avbl; + + /* pointer to src macro block */ + UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma; + UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_luma; + + /* pointer to prediction macro block */ + UWORD8 *pu1_pred_mb_intra_16x16 = ps_proc->pu1_pred_mb_intra_16x16; + UWORD8 *pu1_pred_mb_intra_16x16_plane = ps_proc->pu1_pred_mb_intra_16x16_plane; + + /* strides */ + WORD32 i4_src_strd = ps_proc->i4_src_strd; + WORD32 i4_pred_strd = ps_proc->i4_pred_strd; + WORD32 i4_rec_strd = ps_proc->i4_rec_strd; + + /* pointer to neighbors left, top, topleft */ + UWORD8 *pu1_mb_a = pu1_ref_mb - 1; + UWORD8 *pu1_mb_b = pu1_ref_mb - i4_rec_strd; + UWORD8 *pu1_mb_d = pu1_mb_b - 1; + + /* valid intra modes map */ + UWORD32 u4_valid_intra_modes; + + /* lut for valid intra modes */ + const UWORD8 u1_valid_intra_modes[8] = {4, 6, 12, 14, 5, 7, 13, 15}; + + /* temp var */ + UWORD32 i, u4_enable_fast_sad = 0, offset = 0; + + /* init temp var */ + if (ps_proc->i4_slice_type == PSLICE) + { + offset = 5; + u4_enable_fast_sad = ps_proc->s_me_ctxt.u4_enable_fast_sad; + } + + /* locating neighbors that are available for prediction */ + /* TODO : update the neighbor availability information basing on constrained intra pred information */ + /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines + * basing on neighbors available and hence evade the computation of neighbor availability totally. */ + /* i4_ngbr_avbl = blk_a * LEFT_MB_AVAILABLE_MASK + blk_b * TOP_MB_AVAILABLE_MASK + blk_d * TOP_LEFT_MB_AVAILABLE_MASK */ + i4_ngbr_avbl = (ps_proc->ps_ngbr_avbl->u1_mb_a) + (ps_proc->ps_ngbr_avbl->u1_mb_b << 2) + (ps_proc->ps_ngbr_avbl->u1_mb_d << 1); + ps_proc->i4_ngbr_avbl_16x16_mb = i4_ngbr_avbl; + + /* gather prediction pels from the neighbors, if particular set is not available + * it is set to zero*/ + /* left pels */ + if (ps_proc->ps_ngbr_avbl->u1_mb_a) + { + for(i = 0; i < 16; i++) + pu1_ngbr_pels_i16[16-1-i] = pu1_mb_a[i * i4_rec_strd]; + } + else + { + ps_codec->pf_mem_set_mul8(pu1_ngbr_pels_i16,0,MB_SIZE); + } + /* top pels */ + if (ps_proc->ps_ngbr_avbl->u1_mb_b) + { + ps_codec->pf_mem_cpy_mul8(pu1_ngbr_pels_i16+16+1,pu1_mb_b,16); + /*for(i = 0; i < 16; i++) + pu1_ngbr_pels_i16[16+1+i] = pu1_mb_b[i];*/ + } + else + { + ps_codec->pf_mem_set_mul8(pu1_ngbr_pels_i16+16+1,0,MB_SIZE); + } + /* topleft pels */ + if (ps_proc->ps_ngbr_avbl->u1_mb_d) + pu1_ngbr_pels_i16[16] = *pu1_mb_d; + else + pu1_ngbr_pels_i16[16] = 0; + + /* set valid intra modes for evaluation */ +// u4_valid_intra_modes = 15; +//// ih264e_filter_intra16x16modes(pu1_mb_curr, i4_src_strd, &u4_valid_intra_modes); +// if (!ps_proc->ps_ngbr_avbl->u1_mb_a) +// u4_valid_intra_modes &= ~(1 << HORZ_I16x16); +// if (!ps_proc->ps_ngbr_avbl->u1_mb_b) +// u4_valid_intra_modes &= ~(1 << VERT_I16x16); +//// if (!ps_proc->ps_ngbr_avbl->u1_mb_a || !ps_proc->ps_ngbr_avbl->u1_mb_b || !ps_proc->ps_ngbr_avbl->u1_mb_d) +// if (i4_ngbr_avbl != 7) +// u4_valid_intra_modes &= ~(1 << PLANE_I16x16); + + u4_valid_intra_modes = u1_valid_intra_modes[i4_ngbr_avbl]; + + if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_FAST) + u4_valid_intra_modes &= ~(1 << PLANE_I16x16); + + /* evaluate b/w HORZ_I16x16, VERT_I16x16 & DC_I16x16 */ + ps_codec->pf_ih264e_evaluate_intra16x16_modes(pu1_curr_mb, pu1_ngbr_pels_i16, pu1_pred_mb_intra_16x16, + i4_src_strd, i4_pred_strd, + i4_ngbr_avbl, &u4_intra_mode, &i4_mb_distortion_least, + u4_valid_intra_modes); + + /* cost = distortion + lambda*rate */ + i4_mb_cost_least = i4_mb_distortion_least; + + if (( (u4_valid_intra_modes >> 3) & 1) != 0 && (ps_codec->s_cfg.u4_enc_speed_preset != IVE_FASTEST || + ps_proc->i4_slice_type == ISLICE)) + { + /* intra prediction for PLANE mode*/ + (ps_codec->apf_intra_pred_16_l)[PLANE_I16x16](pu1_ngbr_pels_i16, pu1_pred_mb_intra_16x16_plane, 0, i4_pred_strd, i4_ngbr_avbl); + + /* evaluate distortion between the actual blk and the estimated blk for the given mode */ + ps_codec->apf_compute_sad_16x16[u4_enable_fast_sad](pu1_curr_mb, pu1_pred_mb_intra_16x16_plane, i4_src_strd, i4_pred_strd, i4_mb_cost_least, &i4_mb_distortion); + + /* cost = distortion + lambda*rate */ + i4_mb_cost = i4_mb_distortion; + + /* update the least cost information if necessary */ + if(i4_mb_cost < i4_mb_distortion_least) + { + u4_intra_mode = PLANE_I16x16; + + i4_mb_cost_least = i4_mb_cost; + i4_mb_distortion_least = i4_mb_distortion; + } + } + + u4_best_intra_16x16_mode = u4_intra_mode; + + DEBUG("%d partition cost, %d intra mode\n", i4_mb_cost_least * 32, u4_best_intra_16x16_mode); + + ps_proc->u1_l_i16_mode = u4_best_intra_16x16_mode; + + /* cost = distortion + lambda*rate */ + i4_mb_cost_least = i4_mb_distortion_least + u4_lambda*u1_uev_codelength[offset + u4_best_intra_16x16_mode]; + + + /* update the type of the mb if necessary */ + if (i4_mb_cost_least < ps_proc->i4_mb_cost) + { + ps_proc->i4_mb_cost = i4_mb_cost_least; + ps_proc->i4_mb_distortion = i4_mb_distortion_least; + ps_proc->u4_mb_type = I16x16; + } + + return ; +} + + +/** +****************************************************************************** +* +* @brief +* evaluate best intra 8x8 mode (rate distortion opt on) +* +* @par Description +* This function evaluates all the possible intra 8x8 modes and finds the mode +* that best represents the macro-block (least distortion) and occupies fewer +* bits in the bit-stream. +* +* @param[in] ps_proc_ctxt +* pointer to proc ctxt +* +* @remarks Ideally the cost of encoding a macroblock is calculated as +* (distortion + lambda*rate). Where distortion is SAD/SATD,... between the +* input block and the reconstructed block and rate is the number of bits taken +* to place the macroblock in the bit-stream. In this routine the rate does not +* exactly point to the total number of bits it takes, rather it points to header +* bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits +* and residual bits fall in to texture bits the number of bits taken to encoding +* mbtype is considered as rate, we compute cost. Further we will approximate +* the distortion as the deviation b/w input and the predicted block as opposed +* to input and reconstructed block. +* +* NOTE: TODO: This function needs to be tested +* +* @return none +* +****************************************************************************** +*/ +void ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc) +{ + /* Codec Context */ + codec_t *ps_codec = ps_proc->ps_codec; + + /* SAD(distortion metric) of an 4x4 block */ + WORD32 i4_partition_distortion, i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0; + + /* lambda */ + UWORD32 u4_lambda = ps_proc->u4_lambda; + + /* cost = distortion + lambda*rate */ + WORD32 i4_partition_cost, i4_partition_cost_least, i4_total_cost = u4_lambda; + + /* cost due to mbtype */ + UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda; + + /* intra mode */ + UWORD32 u4_intra_mode, u4_best_intra_8x8_mode = DC_I8x8, u4_estimated_intra_8x8_mode; + + /* neighbor pels for intra prediction */ + UWORD8 *pu1_ngbr_pels_i8 = ps_proc->au1_ngbr_pels; + + /* pointer to curr partition */ + UWORD8 *pu1_mb_curr; + + /* pointer to prediction macro block */ + UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb; + + /* strides */ + WORD32 i4_src_strd = ps_proc->i4_src_strd; + WORD32 i4_pred_strd = ps_proc->i4_pred_strd; + + /* neighbors left, top, top right, top left */ + UWORD8 *pu1_mb_a; + UWORD8 *pu1_mb_b; + UWORD8 *pu1_mb_d; + + /* neighbor availability */ + WORD32 i4_ngbr_avbl; + block_neighbors_t s_ngbr_avbl; + + /* temp vars */ + UWORD32 b8, u4_pix_x, u4_pix_y; + + /* ngbr mb syntax information */ + UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (ps_proc->i4_mb_x << 4); + mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x; + + /* valid intra modes map */ + UWORD32 u4_valid_intra_modes; + + for(b8 = 0; b8 < 4; b8++) + { + u4_pix_x = (b8 & 0x01) << 3; + u4_pix_y = (b8 >> 1) << 3; + + pu1_mb_curr = ps_proc->pu1_src_buf_luma + u4_pix_x + (u4_pix_y * i4_src_strd); + /* when rdopt is off, we use the input as reference for constructing prediction buffer */ + /* as opposed to using the recon pels. (open loop intra prediction) */ + pu1_mb_a = pu1_mb_curr - 1; /* pointer to left macro block */ + pu1_mb_b = pu1_mb_curr - i4_src_strd; /* pointer to top macro block */ + pu1_mb_d = pu1_mb_b - 1; /* pointer to top left macro block */ + + /* locating neighbors that are available for prediction */ + /* TODO : update the neighbor availability information basing on constrained intra pred information */ + /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines */ + /* basing on neighbors available and hence evade the computation of neighbor availability totally. */ + s_ngbr_avbl.u1_mb_a = ih264e_derive_ngbr_avbl_of_mb_partitions(ps_proc->ps_ngbr_avbl, u4_pix_x - 1, u4_pix_y); /* xD = -1, yD = 0 */ + s_ngbr_avbl.u1_mb_b = ih264e_derive_ngbr_avbl_of_mb_partitions(ps_proc->ps_ngbr_avbl, u4_pix_x, u4_pix_y - 1); /* xD = 0, yD = -1 */ + s_ngbr_avbl.u1_mb_c = ih264e_derive_ngbr_avbl_of_mb_partitions(ps_proc->ps_ngbr_avbl, u4_pix_x + 8, u4_pix_y - 1); /* xD = BLK_8x8_SIZE, yD = -1 */ + s_ngbr_avbl.u1_mb_d = ih264e_derive_ngbr_avbl_of_mb_partitions(ps_proc->ps_ngbr_avbl, u4_pix_x - 1, u4_pix_y - 1); /* xD = -1, yD = -1 */ + + /* i4_ngbr_avbl = blk_a * LEFT_MB_AVAILABLE_MASK + blk_b * TOP_MB_AVAILABLE_MASK + blk_c * TOP_RIGHT_MB_AVAILABLE_MASK + blk_d * TOP_LEFT_MB_AVAILABLE_MASK */ + i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) + (s_ngbr_avbl.u1_mb_c << 3) + + (s_ngbr_avbl.u1_mb_a << 4); + /* if top partition is available and top right is not available for intra prediction, then */ + /* padd top right samples using top sample and make top right also available */ + /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b | s_ngbr_avbl.u1_mb_c) << 3); */ + ps_proc->ai4_neighbor_avail_8x8_subblks[b8] = i4_ngbr_avbl; + + + ih264_intra_pred_luma_8x8_mode_ref_filtering(pu1_mb_a, pu1_mb_b, pu1_mb_d, pu1_ngbr_pels_i8, + i4_src_strd, i4_ngbr_avbl); + + i4_partition_cost_least = INT_MAX; + /* set valid intra modes for evaluation */ + u4_valid_intra_modes = 0x1ff; + + if (!s_ngbr_avbl.u1_mb_b) + { + u4_valid_intra_modes &= ~(1 << VERT_I4x4); + u4_valid_intra_modes &= ~(1 << DIAG_DL_I4x4); + u4_valid_intra_modes &= ~(1 << VERT_L_I4x4); + } + if (!s_ngbr_avbl.u1_mb_a) + { + u4_valid_intra_modes &= ~(1 << HORZ_I4x4); + u4_valid_intra_modes &= ~(1 << HORZ_U_I4x4); + } + if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b || !s_ngbr_avbl.u1_mb_d) + { + u4_valid_intra_modes &= ~(1 << DIAG_DR_I4x4); + u4_valid_intra_modes &= ~(1 << VERT_R_I4x4); + u4_valid_intra_modes &= ~(1 << HORZ_D_I4x4); + } + + /* estimate the intra 8x8 mode for the current partition (for evaluating cost) */ + if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b) + { + u4_estimated_intra_8x8_mode = DC_I8x8; + } + else + { + UWORD32 u4_left_intra_8x8_mode = DC_I8x8; + UWORD32 u4_top_intra_8x8_mode = DC_I8x8; + + if (u4_pix_x == 0) + { + if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I8x8) + { + u4_left_intra_8x8_mode = ps_proc->au1_left_mb_intra_modes[b8+1]; + } + else if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I4x4) + { + u4_left_intra_8x8_mode = ps_proc->au1_left_mb_intra_modes[(b8+1)*4+2]; + } + } + else + { + u4_left_intra_8x8_mode = ps_proc->au1_intra_luma_mb_8x8_modes[b8-1]; + } + + if (u4_pix_y == 0) + { + if (ps_top_mb_syn_ele->u2_mb_type == I8x8) + { + u4_top_intra_8x8_mode = pu1_top_mb_intra_modes[b8+2]; + } + else if (ps_top_mb_syn_ele->u2_mb_type == I4x4) + { + u4_top_intra_8x8_mode = pu1_top_mb_intra_modes[(b8+2)*4+2]; + } + } + else + { + u4_top_intra_8x8_mode = ps_proc->au1_intra_luma_mb_8x8_modes[b8-2]; + } + + u4_estimated_intra_8x8_mode = MIN(u4_left_intra_8x8_mode, u4_top_intra_8x8_mode); + } + + /* perform intra mode 8x8 evaluation */ + for (u4_intra_mode = VERT_I8x8; u4_valid_intra_modes != 0; u4_intra_mode++, u4_valid_intra_modes >>= 1) + { + if ( (u4_valid_intra_modes & 1) == 0) + continue; + + /* intra prediction */ + (ps_codec->apf_intra_pred_8_l)[u4_intra_mode](pu1_ngbr_pels_i8, pu1_pred_mb, 0, i4_pred_strd, i4_ngbr_avbl); + + /* evaluate distortion between the actual blk and the estimated blk for the given mode */ + ime_compute_sad_8x8(pu1_mb_curr, pu1_pred_mb, i4_src_strd, i4_pred_strd, i4_partition_cost_least, &i4_partition_distortion); + + i4_partition_cost = i4_partition_distortion + ((u4_estimated_intra_8x8_mode == u4_intra_mode)?u4_cost_one_bit:u4_cost_four_bits); + + /* update the least cost information if necessary */ + if (i4_partition_cost < i4_partition_cost_least) + { + i4_partition_cost_least = i4_partition_cost; + i4_partition_distortion_least = i4_partition_distortion; + u4_best_intra_8x8_mode = u4_intra_mode; + } + } + /* macroblock distortion */ + i4_total_cost += i4_partition_cost_least; + i4_total_distortion += i4_partition_distortion_least; + /* mb partition mode */ + ps_proc->au1_intra_luma_mb_8x8_modes[b8] = u4_best_intra_8x8_mode; + + } + + /* update the type of the mb if necessary */ + if (i4_total_cost < ps_proc->i4_mb_cost) + { + ps_proc->i4_mb_cost = i4_total_cost; + ps_proc->i4_mb_distortion = i4_total_distortion; + ps_proc->u4_mb_type = I8x8; + } + + return ; +} + + +/** +****************************************************************************** +* +* @brief +* evaluate best intra 4x4 mode (rate distortion opt off) +* +* @par Description +* This function evaluates all the possible intra 4x4 modes and finds the mode +* that best represents the macro-block (least distortion) and occupies fewer +* bits in the bit-stream. +* +* @param[in] ps_proc_ctxt +* pointer to proc ctxt +* +* @remarks +* Ideally the cost of encoding a macroblock is calculated as +* (distortion + lambda*rate). Where distortion is SAD/SATD,... between the +* input block and the reconstructed block and rate is the number of bits taken +* to place the macroblock in the bit-stream. In this routine the rate does not +* exactly point to the total number of bits it takes, rather it points to header +* bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits +* and residual bits fall in to texture bits the number of bits taken to encoding +* mbtype is considered as rate, we compute cost. Further we will approximate +* the distortion as the deviation b/w input and the predicted block as opposed +* to input and reconstructed block. +* +* NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock, +* 24*lambda is added to the SAD before comparison with the best SAD for +* inter prediction. This is an empirical value to prevent using too many intra +* blocks. +* +* @return none +* +****************************************************************************** +*/ +void ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc) +{ + /* Codec Context */ + codec_t *ps_codec = ps_proc->ps_codec; + + /* SAD(distortion metric) of an 4x4 block */ + WORD32 i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0; + + /* lambda */ + UWORD32 u4_lambda = ps_proc->u4_lambda; + + /* cost = distortion + lambda*rate */ + WORD32 i4_partition_cost_least, i4_total_cost = (24 + 1) * u4_lambda; + + /* cost due to mbtype */ + UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda; + + /* intra mode */ + UWORD32 u4_best_intra_4x4_mode = DC_I4x4, u4_estimated_intra_4x4_mode; + + /* neighbor pels for intra prediction */ + UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels; + + /* pointer to curr partition */ + UWORD8 *pu1_mb_curr; + + /* pointer to prediction macro block */ + UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb; + + /* strides */ + WORD32 i4_src_strd = ps_proc->i4_src_strd; + WORD32 i4_pred_strd = ps_proc->i4_pred_strd; + + /* neighbors left, top, top right, top left */ + UWORD8 *pu1_mb_a; + UWORD8 *pu1_mb_b; + UWORD8 *pu1_mb_c; + UWORD8 *pu1_mb_d; + + /* neighbor availability */ + WORD32 i4_ngbr_avbl; + block_neighbors_t s_ngbr_avbl; + + /* temp vars */ + UWORD32 i, b8, b4, u4_blk_x, u4_blk_y, u4_pix_x, u4_pix_y; + + /* scan order inside 4x4 block */ + const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15}; + + /* ngbr sub mb modes */ + UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (ps_proc->i4_mb_x << 4); + mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x; + + /* valid intra modes map */ + UWORD32 u4_valid_intra_modes; + UWORD16 u2_valid_modes[8] = {4, 262, 4, 262, 141, 399, 141, 511}; + + i4_ngbr_avbl = (ps_proc->ps_ngbr_avbl->u1_mb_a) + (ps_proc->ps_ngbr_avbl->u1_mb_d << 1) + (ps_proc->ps_ngbr_avbl->u1_mb_b << 2) + (ps_proc->ps_ngbr_avbl->u1_mb_c << 3); + memcpy(ps_proc->au1_ngbr_avbl_4x4_subblks, gau1_ih264_4x4_ngbr_avbl[i4_ngbr_avbl], 16); + + for (b8 = 0; b8 < 4; b8++) + { + u4_blk_x = (b8 & 0x01) << 3; + u4_blk_y = (b8 >> 1) << 3; + for (b4 = 0; b4 < 4; b4++) + { + u4_pix_x = u4_blk_x + ((b4 & 0x01) << 2); + u4_pix_y = u4_blk_y + ((b4 >> 1) << 2); + + pu1_mb_curr = ps_proc->pu1_src_buf_luma + u4_pix_x + (u4_pix_y * i4_src_strd); + /* when rdopt is off, we use the input as reference for constructing prediction buffer */ + /* as opposed to using the recon pels. (open loop intra prediction) */ + pu1_mb_a = pu1_mb_curr - 1; /* pointer to left macro block */ + pu1_mb_b = pu1_mb_curr - i4_src_strd; /* pointer to top macro block */ + pu1_mb_c = pu1_mb_b + 4; /* pointer to top macro block */ + pu1_mb_d = pu1_mb_b - 1; /* pointer to top left macro block */ + + /* locating neighbors that are available for prediction */ + /* TODO : update the neighbor availability information basing on constrained intra pred information */ + /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines */ + /* basing on neighbors available and hence evade the computation of neighbor availability totally. */ + + i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4]; + s_ngbr_avbl.u1_mb_a = (i4_ngbr_avbl & 0x1); + s_ngbr_avbl.u1_mb_d = (i4_ngbr_avbl & 0x2) >> 1; + s_ngbr_avbl.u1_mb_b = (i4_ngbr_avbl & 0x4) >> 2; + s_ngbr_avbl.u1_mb_c = (i4_ngbr_avbl & 0x8) >> 3; + /* set valid intra modes for evaluation */ + u4_valid_intra_modes = u2_valid_modes[i4_ngbr_avbl & 0x7]; + + /* if top partition is available and top right is not available for intra prediction, then */ + /* padd top right samples using top sample and make top right also available */ + /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b | s_ngbr_avbl.u1_mb_c) << 3); */ + + /* gather prediction pels from the neighbors */ + if (s_ngbr_avbl.u1_mb_a) + { + for(i = 0; i < 4; i++) + pu1_ngbr_pels_i4[4 - 1 -i] = pu1_mb_a[i * i4_src_strd]; + } + else + { + memset(pu1_ngbr_pels_i4, 0, 4); + } + + if (s_ngbr_avbl.u1_mb_b) + { + memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4); + } + else + { + memset(pu1_ngbr_pels_i4 + 5, 0, 4); + } + + if (s_ngbr_avbl.u1_mb_d) + pu1_ngbr_pels_i4[4] = *pu1_mb_d; + else + pu1_ngbr_pels_i4[4] = 0; + + if (s_ngbr_avbl.u1_mb_c) + { + memcpy(pu1_ngbr_pels_i4 + 8 + 1, pu1_mb_c, 4); + } + else if (s_ngbr_avbl.u1_mb_b) + { + memset(pu1_ngbr_pels_i4 + 8 + 1, pu1_ngbr_pels_i4[8], 4); + s_ngbr_avbl.u1_mb_c = s_ngbr_avbl.u1_mb_b; + } + + i4_partition_cost_least = INT_MAX; + + /* predict the intra 4x4 mode for the current partition (for evaluating cost) */ + if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b) + { + u4_estimated_intra_4x4_mode = DC_I4x4; + } + else + { + UWORD32 u4_left_intra_4x4_mode = DC_I4x4; + UWORD32 u4_top_intra_4x4_mode = DC_I4x4; + + if (u4_pix_x == 0) + { + if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I4x4) + { + u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[u1_scan_order[3 + u4_pix_y]]; + } + else if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I8x8) + { + u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[b8 + 1]; + } + } + else + { + u4_left_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 1]]; + } + + if (u4_pix_y == 0) + { + if (ps_top_mb_syn_ele->u2_mb_type == I4x4) + { + u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[u1_scan_order[12 + (u4_pix_x >> 2)]]; + } + else if (ps_top_mb_syn_ele->u2_mb_type == I8x8) + { + u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[b8 + 2]; + } + } + else + { + u4_top_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 4]]; + } + + u4_estimated_intra_4x4_mode = MIN(u4_left_intra_4x4_mode, u4_top_intra_4x4_mode); + } + + ps_proc->au1_predicted_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_estimated_intra_4x4_mode; + + /* mode evaluation and prediction */ + ps_codec->pf_ih264e_evaluate_intra_4x4_modes(pu1_mb_curr, + pu1_ngbr_pels_i4, + pu1_pred_mb, i4_src_strd, + i4_pred_strd, i4_ngbr_avbl, + &u4_best_intra_4x4_mode, + &i4_partition_cost_least, + u4_valid_intra_modes, + u4_lambda, + u4_estimated_intra_4x4_mode); + + + i4_partition_distortion_least = i4_partition_cost_least - ((u4_estimated_intra_4x4_mode == u4_best_intra_4x4_mode) ? u4_cost_one_bit : u4_cost_four_bits); + + DEBUG("%d partition cost, %d intra mode\n", i4_partition_cost_least, u4_best_intra_4x4_mode); + /* macroblock distortion */ + i4_total_distortion += i4_partition_distortion_least; + i4_total_cost += i4_partition_cost_least; + /* mb partition mode */ + ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_best_intra_4x4_mode; + } + } + + /* update the type of the mb if necessary */ + if (i4_total_cost < ps_proc->i4_mb_cost) + { + ps_proc->i4_mb_cost = i4_total_cost; + ps_proc->i4_mb_distortion = i4_total_distortion; + ps_proc->u4_mb_type = I4x4; + } + + return ; +} + +/** +****************************************************************************** +* +* @brief evaluate best intra 4x4 mode (rate distortion opt on) +* +* @par Description +* This function evaluates all the possible intra 4x4 modes and finds the mode +* that best represents the macro-block (least distortion) and occupies fewer +* bits in the bit-stream. +* +* @param[in] ps_proc_ctxt +* pointer to proc ctxt +* +* @remarks +* Ideally the cost of encoding a macroblock is calculated as +* (distortion + lambda*rate). Where distortion is SAD/SATD,... between the +* input block and the reconstructed block and rate is the number of bits taken +* to place the macroblock in the bit-stream. In this routine the rate does not +* exactly point to the total number of bits it takes, rather it points to header +* bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits +* and residual bits fall in to texture bits the number of bits taken to encoding +* mbtype is considered as rate, we compute cost. Further we will approximate +* the distortion as the deviation b/w input and the predicted block as opposed +* to input and reconstructed block. +* +* NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock, +* 24*lambda is added to the SAD before comparison with the best SAD for +* inter prediction. This is an empirical value to prevent using too many intra +* blocks. +* +* @return none +* +****************************************************************************** +*/ +void ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton(process_ctxt_t *ps_proc) +{ + /* Codec Context */ + codec_t *ps_codec = ps_proc->ps_codec; + + /* SAD(distortion metric) of an 4x4 block */ + WORD32 i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0; + + /* lambda */ + UWORD32 u4_lambda = ps_proc->u4_lambda; + + /* cost = distortion + lambda*rate */ + WORD32 i4_partition_cost_least, i4_total_cost = (24 + 1) * u4_lambda; + + /* cost due to mbtype */ + UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda; + + /* intra mode */ + UWORD32 u4_best_intra_4x4_mode = DC_I4x4, u4_estimated_intra_4x4_mode; + + /* neighbor pels for intra prediction */ + UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels; + + /* pointer to curr partition */ + UWORD8 *pu1_mb_curr; + UWORD8 *pu1_mb_ref_left, *pu1_mb_ref_top; + UWORD8 *pu1_ref_mb_intra_4x4; + + /* pointer to residual macro block */ + WORD16 *pi2_res_mb = ps_proc->pi2_res_buf_intra_4x4; + + /* pointer to prediction macro block */ + UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb; + + /* strides */ + WORD32 i4_src_strd = ps_proc->i4_src_strd; + WORD32 i4_pred_strd = ps_proc->i4_pred_strd; + WORD32 i4_ref_strd_left, i4_ref_strd_top; + + /* neighbors left, top, top right, top left */ + UWORD8 *pu1_mb_a; + UWORD8 *pu1_mb_b; + UWORD8 *pu1_mb_c; + UWORD8 *pu1_mb_d; + + /* number of non zero coeffs*/ + UWORD8 *pu1_nnz = (UWORD8 *)ps_proc->au4_nnz_intra_4x4; + + /* quantization parameters */ + quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0]; + + /* neighbor availability */ + WORD32 i4_ngbr_avbl; + block_neighbors_t s_ngbr_avbl; + + /* temp vars */ + UWORD32 i, b8, b4, u4_blk_x, u4_blk_y, u4_pix_x, u4_pix_y; + + /* scan order inside 4x4 block */ + const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15}; + + /* ngbr sub mb modes */ + UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (ps_proc->i4_mb_x << 4); + mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x; + + /* valid intra modes map */ + UWORD32 u4_valid_intra_modes; + UWORD16 u2_valid_modes[8] = {4, 262, 4, 262, 141, 399, 141, 511}; + + /* Dummy variable for 4x4 trans function */ + WORD16 i2_dc_dummy; + + /* compute ngbr availability for sub blks */ + i4_ngbr_avbl = (ps_proc->ps_ngbr_avbl->u1_mb_a) + (ps_proc->ps_ngbr_avbl->u1_mb_d << 1) + (ps_proc->ps_ngbr_avbl->u1_mb_b << 2) + (ps_proc->ps_ngbr_avbl->u1_mb_c << 3); + memcpy(ps_proc->au1_ngbr_avbl_4x4_subblks, gau1_ih264_4x4_ngbr_avbl[i4_ngbr_avbl], 16); + + for(b8 = 0; b8 < 4; b8++) + { + u4_blk_x = (b8 & 0x01) << 3; + u4_blk_y = (b8 >> 1) << 3; + for(b4 = 0; b4 < 4; b4++, pu1_nnz++, pi2_res_mb += MB_SIZE) + { + u4_pix_x = u4_blk_x + ((b4 & 0x01) << 2); + u4_pix_y = u4_blk_y + ((b4 >> 1) << 2); + + pu1_ref_mb_intra_4x4 = ps_proc->pu1_ref_mb_intra_4x4 + u4_pix_x + (u4_pix_y * i4_pred_strd); + pu1_mb_curr = ps_proc->pu1_src_buf_luma + u4_pix_x + (u4_pix_y * i4_src_strd); + if (u4_pix_x == 0) + { + i4_ref_strd_left = ps_proc->i4_rec_strd; + pu1_mb_ref_left = ps_proc->pu1_rec_buf_luma + u4_pix_x + (u4_pix_y * i4_ref_strd_left); + } + else + { + i4_ref_strd_left = i4_pred_strd; + pu1_mb_ref_left = pu1_ref_mb_intra_4x4; + } + if (u4_pix_y == 0) + { + i4_ref_strd_top = ps_proc->i4_rec_strd; + pu1_mb_ref_top = ps_proc->pu1_rec_buf_luma + u4_pix_x + (u4_pix_y * i4_ref_strd_top); + } + else + { + i4_ref_strd_top = i4_pred_strd; + pu1_mb_ref_top = pu1_ref_mb_intra_4x4; + } + + pu1_mb_a = pu1_mb_ref_left - 1; /* pointer to left macro block */ + pu1_mb_b = pu1_mb_ref_top - i4_ref_strd_top; /* pointer to top macro block */ + pu1_mb_c = pu1_mb_b + 4; /* pointer to top right macro block */ + if (u4_pix_y == 0) + pu1_mb_d = pu1_mb_b - 1; + else + pu1_mb_d = pu1_mb_a - i4_ref_strd_left; /* pointer to top left macro block */ + + /* locating neighbors that are available for prediction */ + /* TODO : update the neighbor availability information basing on constrained intra pred information */ + /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines */ + /* basing on neighbors available and hence evade the computation of neighbor availability totally. */ + + i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4]; + s_ngbr_avbl.u1_mb_a = (i4_ngbr_avbl & 0x1); + s_ngbr_avbl.u1_mb_d = (i4_ngbr_avbl & 0x2) >> 1; + s_ngbr_avbl.u1_mb_b = (i4_ngbr_avbl & 0x4) >> 2; + s_ngbr_avbl.u1_mb_c = (i4_ngbr_avbl & 0x8) >> 3; + /* set valid intra modes for evaluation */ + u4_valid_intra_modes = u2_valid_modes[i4_ngbr_avbl & 0x7]; + + /* if top partition is available and top right is not available for intra prediction, then */ + /* padd top right samples using top sample and make top right also available */ + /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b | s_ngbr_avbl.u1_mb_c) << 3); */ + + /* gather prediction pels from the neighbors */ + if (s_ngbr_avbl.u1_mb_a) + { + for(i = 0; i < 4; i++) + pu1_ngbr_pels_i4[4 - 1 -i] = pu1_mb_a[i * i4_ref_strd_left]; + } + else + { + memset(pu1_ngbr_pels_i4,0,4); + } + if(s_ngbr_avbl.u1_mb_b) + { + memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4); + } + else + { + memset(pu1_ngbr_pels_i4 + 4 + 1, 0, 4); + } + if (s_ngbr_avbl.u1_mb_d) + pu1_ngbr_pels_i4[4] = *pu1_mb_d; + else + pu1_ngbr_pels_i4[4] = 0; + if (s_ngbr_avbl.u1_mb_c) + { + memcpy(pu1_ngbr_pels_i4 + 8 + 1, pu1_mb_c, 4); + } + else if (s_ngbr_avbl.u1_mb_b) + { + memset(pu1_ngbr_pels_i4 + 8 + 1, pu1_ngbr_pels_i4[8], 4); + s_ngbr_avbl.u1_mb_c = s_ngbr_avbl.u1_mb_b; + } + + i4_partition_cost_least = INT_MAX; + + /* predict the intra 4x4 mode for the current partition (for evaluating cost) */ + if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b) + { + u4_estimated_intra_4x4_mode = DC_I4x4; + } + else + { + UWORD32 u4_left_intra_4x4_mode = DC_I4x4; + UWORD32 u4_top_intra_4x4_mode = DC_I4x4; + + if (u4_pix_x == 0) + { + if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I4x4) + { + u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[u1_scan_order[3 + u4_pix_y]]; + } + else if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I8x8) + { + u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[b8 + 1]; + } + } + else + { + u4_left_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 1]]; + } + + if (u4_pix_y == 0) + { + if (ps_top_mb_syn_ele->u2_mb_type == I4x4) + { + u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[u1_scan_order[12 + (u4_pix_x >> 2)]]; + } + else if (ps_top_mb_syn_ele->u2_mb_type == I8x8) + { + u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[b8 + 2]; + } + } + else + { + u4_top_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 4]]; + } + + u4_estimated_intra_4x4_mode = MIN(u4_left_intra_4x4_mode, u4_top_intra_4x4_mode); + } + + ps_proc->au1_predicted_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_estimated_intra_4x4_mode; + + /*mode evaluation and prediction*/ + ps_codec->pf_ih264e_evaluate_intra_4x4_modes(pu1_mb_curr, + pu1_ngbr_pels_i4, + pu1_pred_mb, i4_src_strd, + i4_pred_strd, i4_ngbr_avbl, + &u4_best_intra_4x4_mode, + &i4_partition_cost_least, + u4_valid_intra_modes, + u4_lambda, + u4_estimated_intra_4x4_mode); + + + i4_partition_distortion_least = i4_partition_cost_least - ((u4_estimated_intra_4x4_mode == u4_best_intra_4x4_mode)?u4_cost_one_bit:u4_cost_four_bits); + + DEBUG("%d partition cost, %d intra mode\n", i4_partition_cost_least, u4_best_intra_4x4_mode); + + /* macroblock distortion */ + i4_total_distortion += i4_partition_distortion_least; + i4_total_cost += i4_partition_cost_least; + + /* mb partition mode */ + ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_best_intra_4x4_mode; + + + /********************************************************/ + /* error estimation, */ + /* transform */ + /* quantization */ + /********************************************************/ + ps_codec->pf_resi_trans_quant_4x4(pu1_mb_curr, pu1_pred_mb, + pi2_res_mb, i4_src_strd, + i4_pred_strd, + /* No op stride, this implies a buff of lenght 1x16 */ + ps_qp_params->pu2_scale_mat, + ps_qp_params->pu2_thres_mat, + ps_qp_params->u1_qbits, + ps_qp_params->u4_dead_zone, + pu1_nnz, &i2_dc_dummy); + + /********************************************************/ + /* ierror estimation, */ + /* itransform */ + /* iquantization */ + /********************************************************/ + ps_codec->pf_iquant_itrans_recon_4x4(pi2_res_mb, pu1_pred_mb, + pu1_ref_mb_intra_4x4, + i4_pred_strd, i4_pred_strd, + ps_qp_params->pu2_iscale_mat, + ps_qp_params->pu2_weigh_mat, + ps_qp_params->u1_qp_div, + ps_proc->pv_scratch_buff, 0, + NULL); + } + } + + /* update the type of the mb if necessary */ + if (i4_total_cost < ps_proc->i4_mb_cost) + { + ps_proc->i4_mb_cost = i4_total_cost; + ps_proc->i4_mb_distortion = i4_total_distortion; + ps_proc->u4_mb_type = I4x4; + } + + return ; +} + +/** +****************************************************************************** +* +* @brief +* evaluate best chroma intra 8x8 mode (rate distortion opt off) +* +* @par Description +* This function evaluates all the possible chroma intra 8x8 modes and finds +* the mode that best represents the macroblock (least distortion) and occupies +* fewer bits in the bitstream. +* +* @param[in] ps_proc_ctxt +* pointer to macroblock context (handle) +* +* @remarks +* For chroma best intra pred mode is calculated based only on SAD +* +* @returns none +* +****************************************************************************** +*/ + +void ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc) +{ + /* Codec Context */ + codec_t *ps_codec = ps_proc->ps_codec; + + /* SAD(distortion metric) of an 8x8 block */ + WORD32 i4_mb_distortion, i4_chroma_mb_distortion; + + /* intra mode */ + UWORD32 u4_best_chroma_intra_8x8_mode = DC_CH_I8x8; + + /* neighbor pels for intra prediction */ + UWORD8 *pu1_ngbr_pels_c_i8x8 = ps_proc->au1_ngbr_pels; + + /* pointer to curr macro block */ + UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_chroma; + UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_chroma; + + /* pointer to prediction macro block */ + UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma; + UWORD8 *pu1_pred_mb_plane = ps_proc->pu1_pred_mb_intra_chroma_plane; + + /* strides */ + WORD32 i4_src_strd_c = ps_proc->i4_src_strd; + WORD32 i4_pred_strd = ps_proc->i4_pred_strd; + WORD32 i4_rec_strd_c = ps_proc->i4_rec_strd; + + /* neighbors left, top, top left */ + UWORD8 *pu1_mb_a = pu1_ref_mb - 2; + UWORD8 *pu1_mb_b = pu1_ref_mb - i4_rec_strd_c; + UWORD8 *pu1_mb_d = pu1_mb_b - 2; + + /* neighbor availability */ + const UWORD8 u1_valid_intra_modes[8] = {1, 3, 9, 11, 5, 7, 13, 15,}; + WORD32 i4_ngbr_avbl; + + /* valid intra modes map */ + UWORD32 u4_valid_intra_modes; + + /* temp var */ + UWORD8 i; + + /* locating neighbors that are available for prediction */ + /* TODO : update the neighbor availability information basing on constrained intra pred information */ + /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines + * basing on neighbors available and hence evade the computation of neighbor availability totally. */ + /* i4_ngbr_avbl = blk_a * LEFT_MB_AVAILABLE_MASK + blk_b * TOP_MB_AVAILABLE_MASK + blk_d * TOP_LEFT_MB_AVAILABLE_MASK */ + i4_ngbr_avbl = (ps_proc->ps_ngbr_avbl->u1_mb_a) + (ps_proc->ps_ngbr_avbl->u1_mb_b << 2) + (ps_proc->ps_ngbr_avbl->u1_mb_d << 1); + ps_proc->i4_chroma_neighbor_avail_8x8_mb = i4_ngbr_avbl; + + /* gather prediction pels from the neighbors */ + /* left pels */ + if (ps_proc->ps_ngbr_avbl->u1_mb_a) + { + for (i = 0; i < 16; i += 2) + { + pu1_ngbr_pels_c_i8x8[16 - 2 - i] = pu1_mb_a[(i / 2) * i4_rec_strd_c]; + pu1_ngbr_pels_c_i8x8[16 - 1 - i] = pu1_mb_a[(i / 2) * i4_rec_strd_c + 1]; + } + } + else + { + ps_codec->pf_mem_set_mul8(pu1_ngbr_pels_c_i8x8, 0, MB_SIZE); + } + + /* top pels */ + if (ps_proc->ps_ngbr_avbl->u1_mb_b) + { + ps_codec->pf_mem_cpy_mul8(&pu1_ngbr_pels_c_i8x8[18], pu1_mb_b, 16); + } + else + { + ps_codec->pf_mem_set_mul8((pu1_ngbr_pels_c_i8x8 + 18), 0, MB_SIZE); + } + + /* top left pels */ + if (ps_proc->ps_ngbr_avbl->u1_mb_d) + { + pu1_ngbr_pels_c_i8x8[16] = *pu1_mb_d; + pu1_ngbr_pels_c_i8x8[17] = *(pu1_mb_d + 1); + } + + u4_valid_intra_modes = u1_valid_intra_modes[i4_ngbr_avbl]; + + if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_FAST) + u4_valid_intra_modes &= ~(1 << PLANE_CH_I8x8); + + i4_chroma_mb_distortion = INT_MAX; + + /* perform intra mode chroma 8x8 evaluation */ + /* intra prediction */ + ps_codec->pf_ih264e_evaluate_intra_chroma_modes(pu1_curr_mb, + pu1_ngbr_pels_c_i8x8, + pu1_pred_mb, + i4_src_strd_c, + i4_pred_strd, + i4_ngbr_avbl, + &u4_best_chroma_intra_8x8_mode, + &i4_chroma_mb_distortion, + u4_valid_intra_modes); + + if (u4_valid_intra_modes & 8)/* if Chroma PLANE is valid*/ + { + (ps_codec->apf_intra_pred_c)[PLANE_CH_I8x8](pu1_ngbr_pels_c_i8x8, pu1_pred_mb_plane, 0, i4_pred_strd, i4_ngbr_avbl); + + /* evaluate distortion(sad) */ + ps_codec->pf_compute_sad_16x8(pu1_curr_mb, pu1_pred_mb_plane, i4_src_strd_c, i4_pred_strd, i4_chroma_mb_distortion, &i4_mb_distortion); + + /* update the least distortion information if necessary */ + if(i4_mb_distortion < i4_chroma_mb_distortion) + { + i4_chroma_mb_distortion = i4_mb_distortion; + u4_best_chroma_intra_8x8_mode = PLANE_CH_I8x8; + } + } + + DEBUG("%d partition cost, %d intra mode\n", i4_chroma_mb_distortion, u4_best_chroma_intra_8x8_mode); + + ps_proc->u1_c_i8_mode = u4_best_chroma_intra_8x8_mode; + + return ; +} + + +/** +****************************************************************************** +* +* @brief +* Evaluate best intra 16x16 mode (among VERT, HORZ and DC) and do the +* prediction. +* +* @par Description +* This function evaluates first three 16x16 modes and compute corresponding sad +* and return the buffer predicted with best mode. +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[in] pu1_ngbr_pels_i16 +* UWORD8 pointer to neighbouring pels +* +* @param[out] pu1_dst +* UWORD8 pointer to the destination +* +* @param[in] src_strd +* integer source stride +* +* @param[in] dst_strd +* integer destination stride +* +* @param[in] u4_n_avblty +* availability of neighbouring pixels +* +* @param[in] u4_intra_mode +* Pointer to the variable in which best mode is returned +* +* @param[in] pu4_sadmin +* Pointer to the variable in which minimum sad is returned +* +* @param[in] u4_valid_intra_modes +* Says what all modes are valid +* +* @returns none +* +****************************************************************************** +*/ +void ih264e_evaluate_intra16x16_modes(UWORD8 *pu1_src, + UWORD8 *pu1_ngbr_pels_i16, + UWORD8 *pu1_dst, + UWORD32 src_strd, + UWORD32 dst_strd, + WORD32 u4_n_avblty, + UWORD32 *u4_intra_mode, + WORD32 *pu4_sadmin, + UWORD32 u4_valid_intra_modes) +{ + UWORD8 *pu1_neighbour; + UWORD8 *pu1_src_temp = pu1_src; + UWORD8 left = 0, top = 0; + WORD32 u4_dcval = 0; + WORD32 i, j; + WORD32 i4_sad_vert = INT_MAX, i4_sad_horz = INT_MAX, i4_sad_dc = INT_MAX, + i4_min_sad = INT_MAX; + UWORD8 val; + + left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK); + top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2; + + /* left available */ + if (left) + { + i4_sad_horz = 0; + + for (i = 0; i < 16; i++) + { + val = pu1_ngbr_pels_i16[15 - i]; + + u4_dcval += val; + + for (j = 0; j < 16; j++) + { + i4_sad_horz += ABS(val - pu1_src_temp[j]); + } + + pu1_src_temp += src_strd; + } + u4_dcval += 8; + } + + pu1_src_temp = pu1_src; + /* top available */ + if (top) + { + i4_sad_vert = 0; + + for (i = 0; i < 16; i++) + { + u4_dcval += pu1_ngbr_pels_i16[17 + i]; + + for (j = 0; j < 16; j++) + { + i4_sad_vert += ABS(pu1_ngbr_pels_i16[17 + j] - pu1_src_temp[j]); + } + pu1_src_temp += src_strd; + + } + u4_dcval += 8; + } + + u4_dcval = (u4_dcval) >> (3 + left + top); + + pu1_src_temp = pu1_src; + + /* none available */ + u4_dcval += (left == 0) * (top == 0) * 128; + + i4_sad_dc = 0; + + for (i = 0; i < 16; i++) + { + for (j = 0; j < 16; j++) + { + i4_sad_dc += ABS(u4_dcval - pu1_src_temp[j]); + } + pu1_src_temp += src_strd; + } + + if ((u4_valid_intra_modes & 04) == 0)/* If DC is disabled */ + i4_sad_dc = INT_MAX; + + if ((u4_valid_intra_modes & 01) == 0)/* If VERT is disabled */ + i4_sad_vert = INT_MAX; + + if ((u4_valid_intra_modes & 02) == 0)/* If HORZ is disabled */ + i4_sad_horz = INT_MAX; + + i4_min_sad = MIN3(i4_sad_horz, i4_sad_dc, i4_sad_vert); + + /* Finding Minimum sad and doing corresponding prediction */ + if (i4_min_sad < *pu4_sadmin) + { + *pu4_sadmin = i4_min_sad; + if (i4_min_sad == i4_sad_vert) + { + *u4_intra_mode = VERT_I16x16; + pu1_neighbour = pu1_ngbr_pels_i16 + 17; + for (j = 0; j < 16; j++) + { + memcpy(pu1_dst, pu1_neighbour, MB_SIZE); + pu1_dst += dst_strd; + } + } + else if (i4_min_sad == i4_sad_horz) + { + *u4_intra_mode = HORZ_I16x16; + for (j = 0; j < 16; j++) + { + val = pu1_ngbr_pels_i16[15 - j]; + memset(pu1_dst, val, MB_SIZE); + pu1_dst += dst_strd; + } + } + else + { + *u4_intra_mode = DC_I16x16; + for (j = 0; j < 16; j++) + { + memset(pu1_dst, u4_dcval, MB_SIZE); + pu1_dst += dst_strd; + } + } + } + return; +} + +/** +****************************************************************************** +* +* @brief +* Evaluate best intra 4x4 mode and perform prediction. +* +* @par Description +* This function evaluates 4x4 modes and compute corresponding sad +* and return the buffer predicted with best mode. +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[in] pu1_ngbr_pels +* UWORD8 pointer to neighbouring pels +* +* @param[out] pu1_dst +* UWORD8 pointer to the destination +* +* @param[in] src_strd +* integer source stride +* +* @param[in] dst_strd +* integer destination stride +* +* @param[in] u4_n_avblty +* availability of neighbouring pixels +* +* @param[in] u4_intra_mode +* Pointer to the variable in which best mode is returned +* +* @param[in] pu4_sadmin +* Pointer to the variable in which minimum cost is returned +* +* @param[in] u4_valid_intra_modes +* Says what all modes are valid +* +* @param[in] u4_lambda +* Lamda value for computing cost from SAD +* +* @param[in] u4_predictd_mode +* Predicted mode for cost computation +* +* @returns none +* +****************************************************************************** +*/ +void ih264e_evaluate_intra_4x4_modes(UWORD8 *pu1_src, + UWORD8 *pu1_ngbr_pels, + UWORD8 *pu1_dst, + UWORD32 src_strd, + UWORD32 dst_strd, + WORD32 u4_n_avblty, + UWORD32 *u4_intra_mode, + WORD32 *pu4_sadmin, + UWORD32 u4_valid_intra_modes, + UWORD32 u4_lambda, + UWORD32 u4_predictd_mode) +{ + UWORD8 *pu1_src_temp = pu1_src; + UWORD8 *pu1_pred = pu1_ngbr_pels; + UWORD8 left = 0, top = 0; + UWORD8 u1_pred_val = 0; + UWORD8 u1_pred_vals[4] = {0}; + UWORD8 *pu1_pred_val = NULL; + /* To store FILT121 operated values*/ + UWORD8 u1_pred_vals_diag_121[15] = {0}; + /* To store FILT11 operated values*/ + UWORD8 u1_pred_vals_diag_11[15] = {0}; + UWORD8 u1_pred_vals_vert_r[8] = {0}; + UWORD8 u1_pred_vals_horz_d[10] = {0}; + UWORD8 u1_pred_vals_horz_u[10] = {0}; + WORD32 u4_dcval = 0; + WORD32 i4_sad[MAX_I4x4] = {INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX, + INT_MAX, INT_MAX, INT_MAX, INT_MAX}; + + WORD32 i4_cost[MAX_I4x4] = {INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX, + INT_MAX, INT_MAX, INT_MAX, INT_MAX}; + WORD32 i, i4_min_cost = INT_MAX; + + left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK); + top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2; + + /* Computing SAD */ + + /* VERT mode valid */ + if (u4_valid_intra_modes & 1) + { + pu1_pred = pu1_ngbr_pels + 5; + i4_sad[VERT_I4x4] = 0; + i4_cost[VERT_I4x4] = 0; + + USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]); + pu1_src_temp += src_strd; + USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]); + pu1_src_temp += src_strd; + USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]); + pu1_src_temp += src_strd; + USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]); + + i4_cost[VERT_I4x4] = i4_sad[VERT_I4x4] + ((u4_predictd_mode == VERT_I4x4) ? + u4_lambda : 4 * u4_lambda); + } + + /* HORZ mode valid */ + if (u4_valid_intra_modes & 2) + { + i4_sad[HORZ_I4x4] = 0; + i4_cost[HORZ_I4x4] =0; + pu1_src_temp = pu1_src; + + u1_pred_val = pu1_ngbr_pels[3]; + + i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val) + + ABS(pu1_src_temp[1] - u1_pred_val) + + ABS(pu1_src_temp[2] - u1_pred_val) + + ABS(pu1_src_temp[3] - u1_pred_val); + pu1_src_temp += src_strd; + + u1_pred_val = pu1_ngbr_pels[2]; + + i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val) + + ABS(pu1_src_temp[1] - u1_pred_val) + + ABS(pu1_src_temp[2] - u1_pred_val) + + ABS(pu1_src_temp[3] - u1_pred_val); + pu1_src_temp += src_strd; + + u1_pred_val = pu1_ngbr_pels[1]; + + i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val) + + ABS(pu1_src_temp[1] - u1_pred_val) + + ABS(pu1_src_temp[2] - u1_pred_val) + + ABS(pu1_src_temp[3] - u1_pred_val); + pu1_src_temp += src_strd; + + u1_pred_val = pu1_ngbr_pels[0]; + + i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val) + + ABS(pu1_src_temp[1] - u1_pred_val) + + ABS(pu1_src_temp[2] - u1_pred_val) + + ABS(pu1_src_temp[3] - u1_pred_val); + + i4_cost[HORZ_I4x4] = i4_sad[HORZ_I4x4] + ((u4_predictd_mode == HORZ_I4x4) ? + u4_lambda : 4 * u4_lambda); + } + + /* DC mode valid */ + if (u4_valid_intra_modes & 4) + { + i4_sad[DC_I4x4] = 0; + i4_cost[DC_I4x4] = 0; + pu1_src_temp = pu1_src; + + if (left) + u4_dcval = pu1_ngbr_pels[0] + pu1_ngbr_pels[1] + pu1_ngbr_pels[2] + + pu1_ngbr_pels[3] + 2; + if (top) + u4_dcval += pu1_ngbr_pels[5] + pu1_ngbr_pels[6] + pu1_ngbr_pels[7] + + pu1_ngbr_pels[8] + 2; + + u4_dcval = (u4_dcval) ? (u4_dcval >> (1 + left + top)) : 128; + + /* none available */ + memset(u1_pred_vals, u4_dcval, 4); + USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]); + pu1_src_temp += src_strd; + USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]); + pu1_src_temp += src_strd; + USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]); + pu1_src_temp += src_strd; + USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]); + pu1_src_temp += src_strd; + + i4_cost[DC_I4x4] = i4_sad[DC_I4x4] + ((u4_predictd_mode == DC_I4x4) ? + u4_lambda : 4 * u4_lambda); + } + + /* if modes other than VERT, HORZ and DC are valid */ + if (u4_valid_intra_modes > 7) + { + pu1_pred = pu1_ngbr_pels; + pu1_pred[13] = pu1_pred[14] = pu1_pred[12]; + + /* Performing FILT121 and FILT11 operation for all neighbour values*/ + for (i = 0; i < 13; i++) + { + u1_pred_vals_diag_121[i] = FILT121(pu1_pred[0], pu1_pred[1], pu1_pred[2]); + u1_pred_vals_diag_11[i] = FILT11(pu1_pred[0], pu1_pred[1]); + + pu1_pred++; + } + + if (u4_valid_intra_modes & 8)/* DIAG_DL */ + { + i4_sad[DIAG_DL_I4x4] = 0; + i4_cost[DIAG_DL_I4x4] = 0; + pu1_src_temp = pu1_src; + pu1_pred_val = u1_pred_vals_diag_121 + 5; + + USADA8(pu1_src_temp, pu1_pred_val, i4_sad[DIAG_DL_I4x4]); + pu1_src_temp += src_strd; + USADA8(pu1_src_temp, (pu1_pred_val + 1), i4_sad[DIAG_DL_I4x4]); + pu1_src_temp += src_strd; + USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[DIAG_DL_I4x4]); + pu1_src_temp += src_strd; + USADA8(pu1_src_temp, (pu1_pred_val + 3), i4_sad[DIAG_DL_I4x4]); + pu1_src_temp += src_strd; + i4_cost[DIAG_DL_I4x4] = i4_sad[DIAG_DL_I4x4] + ((u4_predictd_mode == DIAG_DL_I4x4) ? + u4_lambda : 4 * u4_lambda); + } + + if (u4_valid_intra_modes & 16)/* DIAG_DR */ + { + i4_sad[DIAG_DR_I4x4] = 0; + i4_cost[DIAG_DR_I4x4] = 0; + pu1_src_temp = pu1_src; + pu1_pred_val = u1_pred_vals_diag_121 + 3; + + USADA8(pu1_src_temp, pu1_pred_val, i4_sad[DIAG_DR_I4x4]); + pu1_src_temp += src_strd; + USADA8(pu1_src_temp, (pu1_pred_val - 1), i4_sad[DIAG_DR_I4x4]); + pu1_src_temp += src_strd; + USADA8(pu1_src_temp, (pu1_pred_val - 2), i4_sad[DIAG_DR_I4x4]); + pu1_src_temp += src_strd; + USADA8(pu1_src_temp, (pu1_pred_val - 3), i4_sad[DIAG_DR_I4x4]); + pu1_src_temp += src_strd; + i4_cost[DIAG_DR_I4x4] = i4_sad[DIAG_DR_I4x4] + ((u4_predictd_mode == DIAG_DR_I4x4) ? + u4_lambda : 4 * u4_lambda); + + } + + if (u4_valid_intra_modes & 32)/* VERT_R mode valid ????*/ + { + i4_sad[VERT_R_I4x4] = 0; + + pu1_src_temp = pu1_src; + u1_pred_vals_vert_r[0] = u1_pred_vals_diag_121[2]; + memcpy((u1_pred_vals_vert_r + 1), (u1_pred_vals_diag_11 + 4), 3); + u1_pred_vals_vert_r[4] = u1_pred_vals_diag_121[1]; + memcpy((u1_pred_vals_vert_r + 5), (u1_pred_vals_diag_121 + 3), 3); + + pu1_pred_val = u1_pred_vals_diag_11 + 4; + USADA8(pu1_src_temp, pu1_pred_val, i4_sad[VERT_R_I4x4]); + pu1_pred_val = u1_pred_vals_diag_121 + 3; + pu1_src_temp += src_strd; + USADA8(pu1_src_temp, pu1_pred_val, i4_sad[VERT_R_I4x4]); + pu1_src_temp += src_strd; + USADA8(pu1_src_temp, (u1_pred_vals_vert_r), i4_sad[VERT_R_I4x4]); + pu1_src_temp += src_strd; + USADA8(pu1_src_temp, (u1_pred_vals_vert_r + 4), + i4_sad[VERT_R_I4x4]); + + i4_cost[VERT_R_I4x4] = i4_sad[VERT_R_I4x4] + ((u4_predictd_mode == VERT_R_I4x4) ? + u4_lambda : 4 * u4_lambda); + } + + if (u4_valid_intra_modes & 64)/* HORZ_D mode valid ????*/ + { + i4_sad[HORZ_D_I4x4] = 0; + + pu1_src_temp = pu1_src; + u1_pred_vals_horz_d[6] = u1_pred_vals_diag_11[3]; + memcpy((u1_pred_vals_horz_d + 7), (u1_pred_vals_diag_121 + 3), 3); + u1_pred_vals_horz_d[0] = u1_pred_vals_diag_11[0]; + u1_pred_vals_horz_d[1] = u1_pred_vals_diag_121[0]; + u1_pred_vals_horz_d[2] = u1_pred_vals_diag_11[1]; + u1_pred_vals_horz_d[3] = u1_pred_vals_diag_121[1]; + u1_pred_vals_horz_d[4] = u1_pred_vals_diag_11[2]; + u1_pred_vals_horz_d[5] = u1_pred_vals_diag_121[2]; + + pu1_pred_val = u1_pred_vals_horz_d; + USADA8(pu1_src_temp, (pu1_pred_val + 6), i4_sad[HORZ_D_I4x4]); + pu1_src_temp += src_strd; + USADA8(pu1_src_temp, (pu1_pred_val + 4), i4_sad[HORZ_D_I4x4]); + pu1_src_temp += src_strd; + USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[HORZ_D_I4x4]); + pu1_src_temp += src_strd; + USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[HORZ_D_I4x4]); + + i4_cost[HORZ_D_I4x4] = i4_sad[HORZ_D_I4x4] + ((u4_predictd_mode == HORZ_D_I4x4) ? + u4_lambda : 4 * u4_lambda); + } + + if (u4_valid_intra_modes & 128)/* VERT_L mode valid ????*/ + { + i4_sad[VERT_L_I4x4] = 0; + pu1_src_temp = pu1_src; + pu1_pred_val = u1_pred_vals_diag_11 + 5; + USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]); + pu1_src_temp += src_strd; + pu1_pred_val = u1_pred_vals_diag_121 + 5; + USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]); + pu1_src_temp += src_strd; + pu1_pred_val = u1_pred_vals_diag_11 + 6; + USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]); + pu1_src_temp += src_strd; + pu1_pred_val = u1_pred_vals_diag_121 + 6; + USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]); + + i4_cost[VERT_L_I4x4] = i4_sad[VERT_L_I4x4] + ((u4_predictd_mode == VERT_L_I4x4) ? + u4_lambda : 4 * u4_lambda); + } + + if (u4_valid_intra_modes & 256)/* HORZ_U mode valid ????*/ + { + i4_sad[HORZ_U_I4x4] = 0; + pu1_src_temp = pu1_src; + u1_pred_vals_horz_u[0] = u1_pred_vals_diag_11[2]; + u1_pred_vals_horz_u[1] = u1_pred_vals_diag_121[1]; + u1_pred_vals_horz_u[2] = u1_pred_vals_diag_11[1]; + u1_pred_vals_horz_u[3] = u1_pred_vals_diag_121[0]; + u1_pred_vals_horz_u[4] = u1_pred_vals_diag_11[0]; + u1_pred_vals_horz_u[5] = FILT121(pu1_ngbr_pels[0], pu1_ngbr_pels[0], pu1_ngbr_pels[1]); + + memset((u1_pred_vals_horz_u + 6), pu1_ngbr_pels[0], 4); + + pu1_pred_val = u1_pred_vals_horz_u; + USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[HORZ_U_I4x4]); + pu1_src_temp += src_strd; + USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[HORZ_U_I4x4]); + pu1_src_temp += src_strd; + USADA8(pu1_src_temp, (pu1_pred_val + 4), i4_sad[HORZ_U_I4x4]); + pu1_src_temp += src_strd; + USADA8(pu1_src_temp, (pu1_pred_val + 6), i4_sad[HORZ_U_I4x4]); + + i4_cost[HORZ_U_I4x4] = i4_sad[HORZ_U_I4x4] + ((u4_predictd_mode == HORZ_U_I4x4) ? + u4_lambda : 4 * u4_lambda); + } + + i4_min_cost = MIN3(MIN3(i4_cost[0], i4_cost[1], i4_cost[2]), + MIN3(i4_cost[3], i4_cost[4], i4_cost[5]), + MIN3(i4_cost[6], i4_cost[7], i4_cost[8])); + + } + else + { + /* Only first three modes valid */ + i4_min_cost = MIN3(i4_cost[0], i4_cost[1], i4_cost[2]); + } + + *pu4_sadmin = i4_min_cost; + + if (i4_min_cost == i4_cost[0]) + { + *u4_intra_mode = VERT_I4x4; + pu1_pred_val = pu1_ngbr_pels + 5; + memcpy(pu1_dst, (pu1_pred_val), 4); + pu1_dst += dst_strd; + memcpy(pu1_dst, (pu1_pred_val), 4); + pu1_dst += dst_strd; + memcpy(pu1_dst, (pu1_pred_val), 4); + pu1_dst += dst_strd; + memcpy(pu1_dst, (pu1_pred_val), 4); + } + else if (i4_min_cost == i4_cost[1]) + { + *u4_intra_mode = HORZ_I4x4; + memset(pu1_dst, pu1_ngbr_pels[3], 4); + pu1_dst += dst_strd; + memset(pu1_dst, pu1_ngbr_pels[2], 4); + pu1_dst += dst_strd; + memset(pu1_dst, pu1_ngbr_pels[1], 4); + pu1_dst += dst_strd; + memset(pu1_dst, pu1_ngbr_pels[0], 4); + } + else if (i4_min_cost == i4_cost[2]) + { + *u4_intra_mode = DC_I4x4; + memset(pu1_dst, u4_dcval, 4); + pu1_dst += dst_strd; + memset(pu1_dst, u4_dcval, 4); + pu1_dst += dst_strd; + memset(pu1_dst, u4_dcval, 4); + pu1_dst += dst_strd; + memset(pu1_dst, u4_dcval, 4); + } + + else if (i4_min_cost == i4_cost[3]) + { + *u4_intra_mode = DIAG_DL_I4x4; + pu1_pred_val = u1_pred_vals_diag_121 + 5; + memcpy(pu1_dst, (pu1_pred_val), 4); + pu1_dst += dst_strd; + memcpy(pu1_dst, (pu1_pred_val + 1), 4); + pu1_dst += dst_strd; + memcpy(pu1_dst, (pu1_pred_val + 2), 4); + pu1_dst += dst_strd; + memcpy(pu1_dst, (pu1_pred_val + 3), 4); + } + else if (i4_min_cost == i4_cost[4]) + { + *u4_intra_mode = DIAG_DR_I4x4; + pu1_pred_val = u1_pred_vals_diag_121 + 3; + + memcpy(pu1_dst, (pu1_pred_val), 4); + pu1_dst += dst_strd; + memcpy(pu1_dst, (pu1_pred_val - 1), 4); + pu1_dst += dst_strd; + memcpy(pu1_dst, (pu1_pred_val - 2), 4); + pu1_dst += dst_strd; + memcpy(pu1_dst, (pu1_pred_val - 3), 4); + } + + else if (i4_min_cost == i4_cost[5]) + { + *u4_intra_mode = VERT_R_I4x4; + pu1_pred_val = u1_pred_vals_diag_11 + 4; + memcpy(pu1_dst, (pu1_pred_val), 4); + pu1_dst += dst_strd; + pu1_pred_val = u1_pred_vals_diag_121 + 3; + memcpy(pu1_dst, (pu1_pred_val), 4); + pu1_dst += dst_strd; + memcpy(pu1_dst, (u1_pred_vals_vert_r), 4); + pu1_dst += dst_strd; + memcpy(pu1_dst, (u1_pred_vals_vert_r + 4), 4); + } + else if (i4_min_cost == i4_cost[6]) + { + *u4_intra_mode = HORZ_D_I4x4; + pu1_pred_val = u1_pred_vals_horz_d; + memcpy(pu1_dst, (pu1_pred_val + 6), 4); + pu1_dst += dst_strd; + memcpy(pu1_dst, (pu1_pred_val + 4), 4); + pu1_dst += dst_strd; + memcpy(pu1_dst, (pu1_pred_val + 2), 4); + pu1_dst += dst_strd; + memcpy(pu1_dst, (pu1_pred_val), 4); + pu1_dst += dst_strd; + } + else if (i4_min_cost == i4_cost[7]) + { + *u4_intra_mode = VERT_L_I4x4; + pu1_pred_val = u1_pred_vals_diag_11 + 5; + memcpy(pu1_dst, (pu1_pred_val), 4); + pu1_dst += dst_strd; + pu1_pred_val = u1_pred_vals_diag_121 + 5; + memcpy(pu1_dst, (pu1_pred_val), 4); + pu1_dst += dst_strd; + pu1_pred_val = u1_pred_vals_diag_11 + 6; + memcpy(pu1_dst, (pu1_pred_val), 4); + pu1_dst += dst_strd; + pu1_pred_val = u1_pred_vals_diag_121 + 6; + memcpy(pu1_dst, (pu1_pred_val), 4); + } + else if (i4_min_cost == i4_cost[8]) + { + *u4_intra_mode = HORZ_U_I4x4; + pu1_pred_val = u1_pred_vals_horz_u; + memcpy(pu1_dst, (pu1_pred_val), 4); + pu1_dst += dst_strd; + memcpy(pu1_dst, (pu1_pred_val + 2), 4); + pu1_dst += dst_strd; + memcpy(pu1_dst, (pu1_pred_val + 4), 4); + pu1_dst += dst_strd; + memcpy(pu1_dst, (pu1_pred_val + 6), 4); + pu1_dst += dst_strd; + } + + return; +} + +/** +****************************************************************************** +* +* @brief: +* Evaluate best intr chroma mode (among VERT, HORZ and DC ) and do the prediction. +* +* @par Description +* This function evaluates first three intra chroma modes and compute corresponding sad +* and return the buffer predicted with best mode. +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[in] pu1_ngbr_pels +* UWORD8 pointer to neighbouring pels +* +* @param[out] pu1_dst +* UWORD8 pointer to the destination +* +* @param[in] src_strd +* integer source stride +* +* @param[in] dst_strd +* integer destination stride +* +* @param[in] u4_n_avblty +* availability of neighbouring pixels +* +* @param[in] u4_intra_mode +* Pointer to the variable in which best mode is returned +* +* @param[in] pu4_sadmin +* Pointer to the variable in which minimum sad is returned +* +* @param[in] u4_valid_intra_modes +* Says what all modes are valid +* +* @return none +* +****************************************************************************** +*/ +void ih264e_evaluate_intra_chroma_modes(UWORD8 *pu1_src, + UWORD8 *pu1_ngbr_pels, + UWORD8 *pu1_dst, + UWORD32 src_strd, + UWORD32 dst_strd, + WORD32 u4_n_avblty, + UWORD32 *u4_intra_mode, + WORD32 *pu4_sadmin, + UWORD32 u4_valid_intra_modes) +{ + UWORD8 *pu1_neighbour; + UWORD8 *pu1_src_temp = pu1_src; + UWORD8 left = 0, top = 0; + WORD32 u4_dcval_u_l[2] = { 0, 0 }, /*sum left neighbours for 'U' ,two separate sets - sum of first four from top,and sum of four values from bottom */ + u4_dcval_u_t[2] = { 0, 0 }; /*sum top neighbours for 'U'*/ + + WORD32 u4_dcval_v_l[2] = { 0, 0 }, /*sum left neighbours for 'V'*/ + u4_dcval_v_t[2] = { 0, 0 }; /*sum top neighbours for 'V'*/ + + WORD32 i, j, row, col, i4_sad_vert = INT_MAX, i4_sad_horz = INT_MAX, + i4_sad_dc = INT_MAX, i4_min_sad = INT_MAX; + UWORD8 val_u, val_v; + + WORD32 u4_dc_val[2][2][2];/* ----------- + | | | Chroma can have four + | 00 | 01 | separate dc value... + ----------- u4_dc_val corresponds to this dc values + | | | with u4_dc_val[2][2][U] and u4_dc_val[2][2][V] + | 10 | 11 | + ----------- */ + left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK); + top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2; + + /*Evaluating HORZ*/ + if (left)/* Ifleft available*/ + { + i4_sad_horz = 0; + + for (i = 0; i < 8; i++) + { + val_v = pu1_ngbr_pels[15 - 2 * i]; + val_u = pu1_ngbr_pels[15 - 2 * i - 1]; + row = i / 4; + u4_dcval_u_l[row] += val_u; + u4_dcval_v_l[row] += val_v; + for (j = 0; j < 8; j++) + { + i4_sad_horz += ABS(val_u - pu1_src_temp[2 * j]);/* Finding SAD for HORZ mode*/ + i4_sad_horz += ABS(val_v - pu1_src_temp[2 * j + 1]); + } + + pu1_src_temp += src_strd; + } + u4_dcval_u_l[0] += 2; + u4_dcval_u_l[1] += 2; + u4_dcval_v_l[0] += 2; + u4_dcval_v_l[1] += 2; + } + + /*Evaluating VERT**/ + pu1_src_temp = pu1_src; + if (top) /* top available*/ + { + i4_sad_vert = 0; + + for (i = 0; i < 8; i++) + { + col = i / 4; + + val_u = pu1_ngbr_pels[18 + i * 2]; + val_v = pu1_ngbr_pels[18 + i * 2 + 1]; + u4_dcval_u_t[col] += val_u; + u4_dcval_v_t[col] += val_v; + + for (j = 0; j < 16; j++) + { + i4_sad_vert += ABS(pu1_ngbr_pels[18 + j] - pu1_src_temp[j]);/* Finding SAD for VERT mode*/ + } + pu1_src_temp += src_strd; + + } + u4_dcval_u_t[0] += 2; + u4_dcval_u_t[1] += 2; + u4_dcval_v_t[0] += 2; + u4_dcval_v_t[1] += 2; + } + + /* computing DC value*/ + /* Equation 8-128 in spec*/ + u4_dc_val[0][0][0] = (u4_dcval_u_l[0] + u4_dcval_u_t[0]) >> (1 + left + top); + u4_dc_val[0][0][1] = (u4_dcval_v_l[0] + u4_dcval_v_t[0]) >> (1 + left + top); + u4_dc_val[1][1][0] = (u4_dcval_u_l[1] + u4_dcval_u_t[1]) >> (1 + left + top); + u4_dc_val[1][1][1] = (u4_dcval_v_l[1] + u4_dcval_v_t[1]) >> (1 + left + top); + + if (top) + { + /* Equation 8-132 in spec*/ + u4_dc_val[0][1][0] = (u4_dcval_u_t[1]) >> (1 + top); + u4_dc_val[0][1][1] = (u4_dcval_v_t[1]) >> (1 + top); + } + else + { + u4_dc_val[0][1][0] = (u4_dcval_u_l[0]) >> (1 + left); + u4_dc_val[0][1][1] = (u4_dcval_v_l[0]) >> (1 + left); + } + + if (left) + { + u4_dc_val[1][0][0] = (u4_dcval_u_l[1]) >> (1 + left); + u4_dc_val[1][0][1] = (u4_dcval_v_l[1]) >> (1 + left); + } + else + { + u4_dc_val[1][0][0] = (u4_dcval_u_t[0]) >> (1 + top); + u4_dc_val[1][0][1] = (u4_dcval_v_t[0]) >> (1 + top); + } + + if (!(left || top)) + { + /*none available*/ + u4_dc_val[0][0][0] = u4_dc_val[0][0][1] = + u4_dc_val[0][1][0] = u4_dc_val[0][1][1] = + u4_dc_val[1][0][0] = u4_dc_val[1][0][1] = + u4_dc_val[1][1][0] = u4_dc_val[1][1][1] = 128; + } + + /* Evaluating DC */ + pu1_src_temp = pu1_src; + i4_sad_dc = 0; + for (i = 0; i < 8; i++) + { + for (j = 0; j < 8; j++) + { + col = j / 4; + row = i / 4; + val_u = u4_dc_val[row][col][0]; + val_v = u4_dc_val[row][col][1]; + + i4_sad_dc += ABS(val_u - pu1_src_temp[2 * j]);/* Finding SAD for DC mode*/ + i4_sad_dc += ABS(val_v - pu1_src_temp[2 * j + 1]); + } + pu1_src_temp += src_strd; + } + + if ((u4_valid_intra_modes & 01) == 0)/* If DC is disabled*/ + i4_sad_dc = INT_MAX; + if ((u4_valid_intra_modes & 02) == 0)/* If HORZ is disabled*/ + i4_sad_horz = INT_MAX; + if ((u4_valid_intra_modes & 04) == 0)/* If VERT is disabled*/ + i4_sad_vert = INT_MAX; + + i4_min_sad = MIN3(i4_sad_horz, i4_sad_dc, i4_sad_vert); + + /* Finding Minimum sad and doing corresponding prediction*/ + if (i4_min_sad < *pu4_sadmin) + { + *pu4_sadmin = i4_min_sad; + + if (i4_min_sad == i4_sad_dc) + { + *u4_intra_mode = DC_CH_I8x8; + for (i = 0; i < 8; i++) + { + for (j = 0; j < 8; j++) + { + col = j / 4; + row = i / 4; + + pu1_dst[2 * j] = u4_dc_val[row][col][0]; + pu1_dst[2 * j + 1] = u4_dc_val[row][col][1]; + } + pu1_dst += dst_strd; + } + } + else if (i4_min_sad == i4_sad_horz) + { + *u4_intra_mode = HORZ_CH_I8x8; + for (j = 0; j < 8; j++) + { + val_v = pu1_ngbr_pels[15 - 2 * j]; + val_u = pu1_ngbr_pels[15 - 2 * j - 1]; + + for (i = 0; i < 8; i++) + { + pu1_dst[2 * i] = val_u; + pu1_dst[2 * i + 1] = val_v; + + } + pu1_dst += dst_strd; + } + } + else + { + *u4_intra_mode = VERT_CH_I8x8; + pu1_neighbour = pu1_ngbr_pels + 18; + for (j = 0; j < 8; j++) + { + memcpy(pu1_dst, pu1_neighbour, MB_SIZE); + pu1_dst += dst_strd; + } + } + } + + return; +} diff --git a/encoder/ih264e_intra_modes_eval.h b/encoder/ih264e_intra_modes_eval.h new file mode 100755 index 0000000..c8402e5 --- /dev/null +++ b/encoder/ih264e_intra_modes_eval.h @@ -0,0 +1,418 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +******************************************************************************* +* @file +* ih264e_intra_modes_eval.h +* +* @brief +* This file contains declarations of routines that perform rate distortion +* analysis on a macroblock if coded as intra. +* +* @author +* ittiam +* +* @remarks +* none +* +******************************************************************************* +*/ + +#ifndef IH264E_INTRA_MODES_EVAL_H_ +#define IH264E_INTRA_MODES_EVAL_H_ + +/*****************************************************************************/ +/* Extern Function Declarations */ +/*****************************************************************************/ + +/** +****************************************************************************** +* +* @brief +* derivation process for macroblock availability +* +* @par Description +* Calculates the availability of the left, top, topright and topleft macroblocks. +* +* @param[in] ps_proc_ctxt +* pointer to proc context (handle) +* +* @remarks Based on section 6.4.5 in H264 spec +* +* @return none +* +****************************************************************************** +*/ +void ih264e_derive_nghbr_avbl_of_mbs + ( + process_ctxt_t *ps_proc_ctxt + ); + +/** +****************************************************************************** +* +* @brief +* derivation process for subblock/partition availability +* +* @par Description +* Calculates the availability of the left, top, topright and topleft subblock +* or partitions. +* +* @param[in] ps_proc_ctxt +* pointer to macroblock context (handle) +* +* @param[in] i1_pel_pos_x +* column position of the pel wrt the current block +* +* @param[in] i1_pel_pos_y +* row position of the pel in wrt current block +* +* @remarks Assumptions: before calling this function it is assumed that +* the neighbor availability of the current macroblock is already derived. +* Based on table 6-3 of H264 specification +* +* @return availability status (yes or no) +* +****************************************************************************** +*/ +UWORD8 ih264e_derive_ngbr_avbl_of_mb_partitions + ( + block_neighbors_t *s_ngbr_avbl, + WORD8 i1_pel_pos_x, + WORD8 i1_pel_pos_y + ); + +/** +****************************************************************************** +* +* @brief +* evaluate best intra 16x16 mode (rate distortion opt off) +* +* @par Description +* This function evaluates all the possible intra 16x16 modes and finds the mode +* that best represents the macro-block (least distortion) and occupies fewer +* bits in the bit-stream. +* +* @param[in] ps_proc_ctxt +* pointer to process context (handle) +* +* @remarks +* Ideally the cost of encoding a macroblock is calculated as +* (distortion + lambda*rate). Where distortion is SAD/SATD,... between the +* input block and the reconstructed block and rate is the number of bits taken +* to place the macroblock in the bit-stream. In this routine the rate does not +* exactly point to the total number of bits it takes, rather it points to header +* bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits +* and residual bits fall in to texture bits the number of bits taken to encoding +* mbtype is considered as rate, we compute cost. Further we will approximate +* the distortion as the deviation b/w input and the predicted block as opposed +* to input and reconstructed block. +* +* NOTE: As per the Document JVT-O079, for intra 16x16 macroblock, +* the SAD and cost are one and the same. +* +* @return none +* +****************************************************************************** +*/ +void ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff + ( + process_ctxt_t *ps_proc_ctxt + ); + +/** +****************************************************************************** +* +* @brief +* evaluate best intra 8x8 mode (rate distortion opt on) +* +* @par Description +* This function evaluates all the possible intra 8x8 modes and finds the mode +* that best represents the macro-block (least distortion) and occupies fewer +* bits in the bit-stream. +* +* @param[in] ps_proc_ctxt +* pointer to proc ctxt +* +* @remarks Ideally the cost of encoding a macroblock is calculated as +* (distortion + lambda*rate). Where distortion is SAD/SATD,... between the +* input block and the reconstructed block and rate is the number of bits taken +* to place the macroblock in the bit-stream. In this routine the rate does not +* exactly point to the total number of bits it takes, rather it points to header +* bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits +* and residual bits fall in to texture bits the number of bits taken to encoding +* mbtype is considered as rate, we compute cost. Further we will approximate +* the distortion as the deviation b/w input and the predicted block as opposed +* to input and reconstructed block. +* +* NOTE: TODO: This function needs to be tested +* +* @return none +* +****************************************************************************** +*/ +void ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff + ( + process_ctxt_t *ps_proc_ctxt + ); + +/** +****************************************************************************** +* +* @brief +* evaluate best intra 4x4 mode (rate distortion opt on) +* +* @par Description +* This function evaluates all the possible intra 4x4 modes and finds the mode +* that best represents the macro-block (least distortion) and occupies fewer +* bits in the bit-stream. +* +* @param[in] ps_proc_ctxt +* pointer to proc ctxt +* +* @remarks +* Ideally the cost of encoding a macroblock is calculated as +* (distortion + lambda*rate). Where distortion is SAD/SATD,... between the +* input block and the reconstructed block and rate is the number of bits taken +* to place the macroblock in the bit-stream. In this routine the rate does not +* exactly point to the total number of bits it takes, rather it points to header +* bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits +* and residual bits fall in to texture bits the number of bits taken to encoding +* mbtype is considered as rate, we compute cost. Further we will approximate +* the distortion as the deviation b/w input and the predicted block as opposed +* to input and reconstructed block. +* +* NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock, +* 24*lambda is added to the SAD before comparison with the best SAD for +* inter prediction. This is an empirical value to prevent using too many intra +* blocks. +* +* @return none +* +****************************************************************************** +*/ +void ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton + ( + process_ctxt_t *ps_proc_ctxt + ); + +/** +****************************************************************************** +* +* @brief +* evaluate best intra 4x4 mode (rate distortion opt off) +* +* @par Description +* This function evaluates all the possible intra 4x4 modes and finds the mode +* that best represents the macro-block (least distortion) and occupies fewer +* bits in the bit-stream. +* +* @param[in] ps_proc_ctxt +* pointer to proc ctxt +* +* @remarks +* Ideally the cost of encoding a macroblock is calculated as +* (distortion + lambda*rate). Where distortion is SAD/SATD,... between the +* input block and the reconstructed block and rate is the number of bits taken +* to place the macroblock in the bit-stream. In this routine the rate does not +* exactly point to the total number of bits it takes, rather it points to header +* bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits +* and residual bits fall in to texture bits the number of bits taken to encoding +* mbtype is considered as rate, we compute cost. Further we will approximate +* the distortion as the deviation b/w input and the predicted block as opposed +* to input and reconstructed block. +* +* NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock, +* 24*lambda is added to the SAD before comparison with the best SAD for +* inter prediction. This is an empirical value to prevent using too many intra +* blocks. +* +* @return none +* +****************************************************************************** +*/ +void ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff + ( + process_ctxt_t *ps_proc_ctxt + ); + +/** +****************************************************************************** +* +* @brief +* evaluate best chroma intra 8x8 mode (rate distortion opt off) +* +* @par Description +* This function evaluates all the possible chroma intra 8x8 modes and finds +* the mode that best represents the macroblock (least distortion) and occupies +* fewer bits in the bitstream. +* +* @param[in] ps_proc_ctxt +* pointer to macroblock context (handle) +* +* @remarks +* For chroma best intra pred mode is calculated based only on SAD +* +* @returns none +* +****************************************************************************** +*/ +void ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff + ( + process_ctxt_t *ps_proc_ctxt + ); + + +/** +****************************************************************************** +* +* @brief +* Evaluate best intra 16x16 mode (among VERT, HORZ and DC) and do the +* prediction. +* +* @par Description +* This function evaluates first three 16x16 modes and compute corresponding sad +* and return the buffer predicted with best mode. +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[in] pu1_ngbr_pels_i16 +* UWORD8 pointer to neighbouring pels +* +* @param[out] pu1_dst +* UWORD8 pointer to the destination +* +* @param[in] src_strd +* integer source stride +* +* @param[in] dst_strd +* integer destination stride +* +* @param[in] u4_n_avblty +* availability of neighbouring pixels +* +* @param[in] u4_intra_mode +* Pointer to the variable in which best mode is returned +* +* @param[in] pu4_sadmin +* Pointer to the variable in which minimum sad is returned +* +* @param[in] u4_valid_intra_modes +* Says what all modes are valid +* +* @returns none +* +****************************************************************************** +*/ +typedef void ih264e_evaluate_intra_modes_ft(UWORD8 *pu1_src, + UWORD8 *pu1_ngbr_pels_i16, + UWORD8 *pu1_dst, + UWORD32 src_strd, + UWORD32 dst_strd, + WORD32 u4_n_avblty, + UWORD32 *u4_intra_mode, + WORD32 *pu4_sadmin, + UWORD32 u4_valid_intra_modes); + +ih264e_evaluate_intra_modes_ft ih264e_evaluate_intra16x16_modes; +ih264e_evaluate_intra_modes_ft ih264e_evaluate_intra_chroma_modes; + +/* assembly */ +ih264e_evaluate_intra_modes_ft ih264e_evaluate_intra16x16_modes_a9q; +ih264e_evaluate_intra_modes_ft ih264e_evaluate_intra_chroma_modes_a9q; + +ih264e_evaluate_intra_modes_ft ih264e_evaluate_intra16x16_modes_av8; +ih264e_evaluate_intra_modes_ft ih264e_evaluate_intra_chroma_modes_av8; + +/* x86 intrinsics */ +ih264e_evaluate_intra_modes_ft ih264e_evaluate_intra16x16_modes_ssse3; +ih264e_evaluate_intra_modes_ft ih264e_evaluate_intra_chroma_modes_ssse3; + +/** +****************************************************************************** +* +* @brief +* Evaluate best intra 4x4 mode and perform prediction. +* +* @par Description +* This function evaluates 4x4 modes and compute corresponding sad +* and return the buffer predicted with best mode. +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[in] pu1_ngbr_pels +* UWORD8 pointer to neighbouring pels +* +* @param[out] pu1_dst +* UWORD8 pointer to the destination +* +* @param[in] src_strd +* integer source stride +* +* @param[in] dst_strd +* integer destination stride +* +* @param[in] u4_n_avblty +* availability of neighbouring pixels +* +* @param[in] u4_intra_mode +* Pointer to the variable in which best mode is returned +* +* @param[in] pu4_sadmin +* Pointer to the variable in which minimum cost is returned +* +* @param[in] u4_valid_intra_modes +* Says what all modes are valid +* +* @param[in] u4_lambda +* Lamda value for computing cost from SAD +* +* @param[in] u4_predictd_mode +* Predicted mode for cost computation +* +* @returns none +* +****************************************************************************** +*/ +typedef void ih264e_evaluate_intra_4x4_modes_ft(UWORD8 *pu1_src, + UWORD8 *pu1_ngbr_pels, + UWORD8 *pu1_dst, + UWORD32 src_strd, + UWORD32 dst_strd, + WORD32 u4_n_avblty, + UWORD32 *u4_intra_mode, + WORD32 *pu4_sadmin, + UWORD32 u4_valid_intra_modes, + UWORD32 u4_lambda, + UWORD32 u4_predictd_mode); + +ih264e_evaluate_intra_4x4_modes_ft ih264e_evaluate_intra_4x4_modes; + +/* x86 intrinsics */ +ih264e_evaluate_intra_4x4_modes_ft ih264e_evaluate_intra_4x4_modes_ssse3; + +/* assembly */ +ih264e_evaluate_intra_4x4_modes_ft ih264e_evaluate_intra_4x4_modes_a9q; +ih264e_evaluate_intra_4x4_modes_ft ih264e_evaluate_intra_4x4_modes_av8; + +#endif /* IH264E_INTRA_MODES_EVAL_H_ */ diff --git a/encoder/ih264e_list.h b/encoder/ih264e_list.h new file mode 100755 index 0000000..782c007 --- /dev/null +++ b/encoder/ih264e_list.h @@ -0,0 +1,42 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +******************************************************************************* +* @file +* ih264e_list.h +* +* @brief +* The file contains declarations of functions for encoder queue management +* +* @author +* ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef IH264E_LIST_H_ +#define IH264E_LIST_H_ + + +#endif /* IH264E_LIST_H_ */ diff --git a/encoder/ih264e_master.h b/encoder/ih264e_master.h new file mode 100755 index 0000000..6c7505a --- /dev/null +++ b/encoder/ih264e_master.h @@ -0,0 +1,132 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +******************************************************************************* +* @file +* ih264e_master.h +* +* @brief +* Contains declarations of functions used by master thread +* +* @author +* ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef IH264E_MASTER_H_ +#define IH264E_MASTER_H_ + +/*****************************************************************************/ +/* Extern Function Declarations */ +/*****************************************************************************/ + +/** +****************************************************************************** +* +* @brief +* This function joins all the spawned threads after successful completion of +* their tasks +* +* @par Description +* +* @param[in] ps_codec +* pointer to codec context +* +* @returns none +* +****************************************************************************** +*/ +void ih264e_join_threads(codec_t *ps_codec); + +/** +****************************************************************************** +* +* @brief This function puts the current thread to sleep for a duration +* of sleep_us +* +* @par Description +* ithread_yield() method causes the calling thread to yield execution to another +* thread that is ready to run on the current processor. The operating system +* selects the thread to yield to. ithread_usleep blocks the current thread for +* the specified number of milliseconds. In other words, yield just says, +* end my timeslice prematurely, look around for other threads to run. If there +* is nothing better than me, continue. Sleep says I don't want to run for x +* milliseconds. Even if no other thread wants to run, don't make me run. +* +* @param[in] sleep_us +* thread sleep duration +* +* @returns error_status +* +****************************************************************************** +*/ +IH264E_ERROR_T ih264e_wait_for_thread(UWORD32 sleep_us); + +/** +****************************************************************************** +* +* @brief +* Encodes in synchronous api mode +* +* @par Description +* This routine processes input yuv, encodes it and outputs bitstream and recon +* +* @param[in] ps_codec_obj +* Pointer to codec object at API level +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @returns Status +* +****************************************************************************** +*/ +WORD32 ih264e_encode(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *pv_api_op); + +/** +******************************************************************************* +* +* @brief update encoder configuration parameters +* +* @par Description: +* updates encoder configuration parameters from the given config set. +* Initialize/reinitialize codec parameters according to new configurations. +* +* @param[in] ps_codec +* Pointer to codec context +* +* @param[in] ps_cfg +* Pointer to config param set +* +* @remarks none +* +******************************************************************************* +*/ +IH264E_ERROR_T ih264e_codec_update_config(codec_t *ps_codec, cfg_params_t *ps_cfg); + +#endif /* IH264E_MASTER_H_ */ diff --git a/encoder/ih264e_mc.c b/encoder/ih264e_mc.c new file mode 100755 index 0000000..2dd0974 --- /dev/null +++ b/encoder/ih264e_mc.c @@ -0,0 +1,320 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +******************************************************************************* +* @file +* ih264e_mc.c +* +* @brief +* Contains definition of functions for motion compensation +* +* @author +* ittiam +* +* @par List of Functions: +* - ih264e_motion_comp_luma() +* - ih264e_motion_comp_chroma() +* +* @remarks +* None +* +******************************************************************************* +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include <stdio.h> + +/* User include files */ +#include "ih264_typedefs.h" +#include "ih264_defs.h" +#include "iv2.h" +#include "ive2.h" +#include "ime_distortion_metrics.h" +#include "ime_structs.h" +#include "ih264_structs.h" +#include "ih264_inter_pred_filters.h" +#include "ih264_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include "ih264_inter_pred_filters.h" +#include "ih264_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" +#include "ih264e_defs.h" +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "ih264e_rate_control.h" +#include "ih264e_structs.h" +#include "ih264e_mc.h" +#include "ih264e_half_pel.h" + + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + +/** +****************************************************************************** +* +* @brief +* performs motion compensation for a luma mb for the given mv. +* +* @par Description +* This routine performs motion compensation of an inter mb. When the inter +* mb mode is P16x16, there is no need to copy 16x16 unit from reference buffer +* to pred buffer. In this case the function returns pointer and stride of the +* ref. buffer and this info is used in place of pred buffer else where. +* In other cases, the pred buffer is populated via copy / filtering + copy +* (q pel cases) and returned. +* +* @param[in] ps_proc +* pointer to current proc ctxt +* +* @param[out] pu1_pseudo_pred +* pseudo prediction buffer +* +* @param[out] u4_pseudo_pred_strd +* pseudo pred buffer stride +* +* @return none +* +* @remarks Assumes half pel buffers for the entire frame are populated. +* +****************************************************************************** +*/ +void ih264e_motion_comp_luma(process_ctxt_t *ps_proc, + UWORD8 **pu1_pseudo_pred, + WORD32 *pi4_pseudo_pred_strd) +{ + /* codec context */ + codec_t *ps_codec = ps_proc->ps_codec; + + /* me ctxt */ + me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt; + + /* Pointer to the structure having motion vectors, size and position of curr partitions */ + enc_pu_t *ps_curr_pu; + + /* pointers to full pel, half pel x, half pel y, half pel xy reference buffer */ + UWORD8 *pu1_ref[4]; + + /* pred buffer ptr */ + UWORD8 *pu1_pred; + + /* strides of full pel, half pel x, half pel y, half pel xy reference buffer */ + WORD32 i4_ref_strd[4]; + + /* pred buffer stride */ + WORD32 i4_pred_strd = ps_proc->i4_pred_strd; + + /* full pel motion vectors */ + WORD32 u4_mv_x_full, u4_mv_y_full; + + /* half pel motion vectors */ + WORD32 u4_mv_x_hpel, u4_mv_y_hpel; + + /* quarter pel motion vectors */ + WORD32 u4_mv_x_qpel, u4_mv_y_qpel; + + /* width & height of the partition */ + UWORD32 wd, ht; + + /* partition idx */ + UWORD32 u4_num_prtn; + + /* half / qpel coefficient */ + UWORD32 u4_subpel_factor; + + /* temp var */ + UWORD32 u4_lkup_idx1; + + /* Init */ + i4_ref_strd[0] = ps_proc->i4_rec_strd; + + i4_ref_strd[1] = i4_ref_strd[2] = i4_ref_strd[3] = ps_me_ctxt->u4_hp_buf_strd; + + for (u4_num_prtn = 0; u4_num_prtn < ps_proc->u4_num_sub_partitions; u4_num_prtn++) + { + /* update ptr to curr partition */ + ps_curr_pu = ps_proc->ps_pu + u4_num_prtn; + + + /* get full pel mv's (full pel units) */ + u4_mv_x_full = ps_curr_pu->s_l0_mv.i2_mvx >> 2; + u4_mv_y_full = ps_curr_pu->s_l0_mv.i2_mvy >> 2; + + /* get half pel mv's */ + u4_mv_x_hpel = (ps_curr_pu->s_l0_mv.i2_mvx & 0x2) >> 1; + u4_mv_y_hpel = (ps_curr_pu->s_l0_mv.i2_mvy & 0x2) >> 1; + + /* get quarter pel mv's */ + u4_mv_x_qpel = (ps_curr_pu->s_l0_mv.i2_mvx & 0x1); + u4_mv_y_qpel = (ps_curr_pu->s_l0_mv.i2_mvy & 0x1); + + /* width and height of partition */ + wd = (ps_curr_pu->b4_wd + 1) << 2; + ht = (ps_curr_pu->b4_ht + 1) << 2; + + /* decision ? qpel/hpel, fpel */ + u4_subpel_factor = (u4_mv_y_hpel << 3) + (u4_mv_x_hpel << 2) + (u4_mv_y_qpel << 1) + (u4_mv_x_qpel); + + /* update ref buffer ptrs */ + pu1_ref[0] = ps_proc->pu1_ref_buf_luma + (u4_mv_y_full * i4_ref_strd[0]) + u4_mv_x_full; + + pu1_ref[1] = ps_proc->pu1_best_subpel_buf; + i4_ref_strd[1] = ps_proc->u4_bst_spel_buf_strd; + + + /* update pred buff ptr */ + pu1_pred = ps_proc->pu1_pred_mb + 4 * ps_curr_pu->b4_pos_y * i4_pred_strd + 4 * ps_curr_pu->b4_pos_x; + + /*u4_lkup_idx1 will be non zero for half pel*/ + u4_lkup_idx1 = (u4_subpel_factor >> 2 ) != 0 ; + + { + /********************************************************************/ + /* if the block is P16x16 MB and mv are not quarter pel motion */ + /* vectors, there is no need to copy 16x16 unit from reference frame*/ + /* to pred buffer. We might as well send the reference frame buffer */ + /* pointer as pred buffer (ofc with updated stride) to fwd transform*/ + /* and inverse transform unit. */ + /********************************************************************/ + if (ps_proc->u4_num_sub_partitions == 1) + { + *pu1_pseudo_pred = pu1_ref[u4_lkup_idx1]; + *pi4_pseudo_pred_strd = i4_ref_strd[u4_lkup_idx1]; + + } + /* + * Copying half pel or full pel to prediction buffer + * Currently ps_proc->u4_num_sub_partitions will always be 1 as we only support 16x16 in P mbs + */ + else + { + ps_codec->pf_inter_pred_luma_copy(pu1_ref[u4_lkup_idx1], pu1_pred, i4_ref_strd[u4_lkup_idx1], i4_pred_strd, ht, wd, NULL, 0); + } + + } + } +} + +/** +****************************************************************************** +* +* @brief +* performs motion compensation for chroma mb +* +* @par Description +* Copies a MB of data from the reference buffer (Full pel, half pel or q pel) +* according to the motion vectors given +* +* @param[in] ps_proc +* pointer to current proc ctxt +* +* @return none +* +* @remarks Assumes half pel and quarter pel buffers for the entire frame are +* populated. +****************************************************************************** +*/ +void ih264e_motion_comp_chroma(process_ctxt_t *ps_proc) +{ + /* codec context */ + codec_t *ps_codec = ps_proc->ps_codec; + + /* Pointer to the structure having motion vectors, size and position of curr partitions */ + enc_pu_t *ps_curr_pu; + + /* pointers to full pel, half pel x, half pel y, half pel xy reference buffer */ + UWORD8 *pu1_ref; + + /* pred buffer ptr */ + UWORD8 *pu1_pred; + + /* strides of full pel reference buffer */ + WORD32 i4_ref_strd = ps_proc->i4_rec_strd; + + /* pred buffer stride */ + WORD32 i4_pred_strd = ps_proc->i4_pred_strd; + + /* full pel motion vectors */ + WORD32 u4_mv_x_full, u4_mv_y_full; + + /* half pel motion vectors */ + WORD32 u4_mv_x_hpel, u4_mv_y_hpel; + + /* quarter pel motion vectors */ + WORD32 u4_mv_x_qpel, u4_mv_y_qpel; + + /* width & height of the partition */ + UWORD32 wd, ht; + + /* partition idx */ + UWORD32 u4_num_prtn; + + WORD32 u4_mv_x; + WORD32 u4_mv_y; + UWORD8 u1_dx, u1_dy; + + for (u4_num_prtn = 0; u4_num_prtn < ps_proc->u4_num_sub_partitions; u4_num_prtn++) + { + ps_curr_pu =ps_proc->ps_pu + u4_num_prtn; + + u4_mv_x = ps_curr_pu->s_l0_mv.i2_mvx >> 3; + u4_mv_y = ps_curr_pu->s_l0_mv.i2_mvy >> 3; + + /* corresponds to full pel motion vector in luma, but in chroma corresponds to pel formed with dx, dy =4*/ + u4_mv_x_full = (ps_curr_pu->s_l0_mv.i2_mvx & 0x4) >> 2; + u4_mv_y_full = (ps_curr_pu->s_l0_mv.i2_mvy & 0x4) >> 2; + + /* get half pel mv's */ + u4_mv_x_hpel = (ps_curr_pu->s_l0_mv.i2_mvx & 0x2) >> 1; + u4_mv_y_hpel = (ps_curr_pu->s_l0_mv.i2_mvy & 0x2) >> 1; + + /* get quarter pel mv's */ + u4_mv_x_qpel = (ps_curr_pu->s_l0_mv.i2_mvx & 0x1); + u4_mv_y_qpel = (ps_curr_pu->s_l0_mv.i2_mvy & 0x1); + + /* width and height of sub macro block */ + wd = (ps_curr_pu->b4_wd + 1) << 1; + ht = (ps_curr_pu->b4_ht + 1) << 1; + + /* move the pointers so that they point to the motion compensated locations */ + pu1_ref = ps_proc->pu1_ref_buf_chroma + (u4_mv_y * i4_ref_strd) + (u4_mv_x << 1); + + pu1_pred = ps_proc->pu1_pred_mb + 4 * ps_curr_pu->b4_pos_y * i4_pred_strd + 2 * ps_curr_pu->b4_pos_x; + + u1_dx = (u4_mv_x_full << 2) + (u4_mv_x_hpel << 1) + (u4_mv_x_qpel); + u1_dy = (u4_mv_y_full << 2) + (u4_mv_y_hpel << 1) + (u4_mv_y_qpel); + + ps_codec->pf_inter_pred_chroma(pu1_ref, pu1_pred, i4_ref_strd, i4_pred_strd, + u1_dx, u1_dy, ht, wd); + } +} diff --git a/encoder/ih264e_mc.h b/encoder/ih264e_mc.h new file mode 100755 index 0000000..965e1d1 --- /dev/null +++ b/encoder/ih264e_mc.h @@ -0,0 +1,104 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +******************************************************************************* +* @file +* ih264e_mc.h +* +* @brief +* This file contains declarations of routines that perform motion compensation +* of luma and chroma macroblocks. +* +* @author +* ittiam +* +* @remarks +* none +* +******************************************************************************* +*/ + +#ifndef IH264E_MC_H_ +#define IH264E_MC_H_ + +/*****************************************************************************/ +/* Extern Function Declarations */ +/*****************************************************************************/ + +/** +****************************************************************************** +* +* @brief +* performs motion compensation for a luma mb for the given mv. +* +* @par Description +* This routine performs motion compensation of an inter mb. When the inter +* mb mode is P16x16, there is no need to copy 16x16 unit from reference buffer +* to pred buffer. In this case the function returns pointer and stride of the +* ref. buffer and this info is used in place of pred buffer else where. +* In other cases, the pred buffer is populated via copy / filtering + copy +* (q pel cases) and returned. +* +* @param[in] ps_proc +* pointer to current proc ctxt +* +* @param[out] pu1_pseudo_pred +* pseudo prediction buffer +* +* @param[out] u4_pseudo_pred_strd +* pseudo pred buffer stride +* +* @return none +* +* @remarks Assumes half pel buffers for the entire frame are populated. +* +****************************************************************************** +*/ +void ih264e_motion_comp_luma(process_ctxt_t *ps_proc, + UWORD8 **pu1_pseudo_pred, + WORD32 *pi4_pseudo_pred_strd); + +/** +****************************************************************************** +* +* @brief +* performs motion compensation for chroma mb +* +* @par Description +* Copies a MB of data from the reference buffer (Full pel, half pel or q pel) +* according to the motion vectors given +* +* @param[in] ps_proc +* pointer to current proc ctxt +* +* @return none +* +* @remarks Assumes half pel and quarter pel buffers for the entire frame are +* populated. +****************************************************************************** +*/ +void ih264e_motion_comp_chroma + ( + process_ctxt_t *ps_proc + ); + + +#endif // IH264E_MC_H_ diff --git a/encoder/ih264e_me.c b/encoder/ih264e_me.c new file mode 100755 index 0000000..9e8d7a3 --- /dev/null +++ b/encoder/ih264e_me.c @@ -0,0 +1,1153 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** + ******************************************************************************* + * @file + * ih264e_me.c + * + * @brief + * Contains definition of functions for motion estimation + * + * @author + * ittiam + * + * @par List of Functions: + * - ih264e_init_mv_bits() + * - ih264e_skip_analysis_chroma() + * - ih264e_skip_analysis_luma() + * - ih264e_analyse_skip() + * - ih264e_get_search_candidates() + * - ih264e_find_skip_motion_vector() + * - ih264e_get_mv_predictor() + * - ih264e_mv_pred() + * - ih264e_mv_pred_me() + * - ih264e_init_me() + * - ih264e_compute_me() + * - ih264e_compute_me_nmb() + * + * @remarks + * None + * + ******************************************************************************* + */ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include <stdio.h> +#include <assert.h> +#include <limits.h> + +/* User include files */ +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" +#include "ithread.h" +#include "ih264_platform_macros.h" +#include "ih264_defs.h" +#include "ime_defs.h" +#include "ime_distortion_metrics.h" +#include "ime_structs.h" +#include "ih264_structs.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include "ih264_inter_pred_filters.h" +#include "ih264_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" +#include "ih264e_defs.h" +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "ih264e_rate_control.h" +#include "ih264e_structs.h" +#include "ih264e_globals.h" +#include "ih264_macros.h" +#include "ih264e_me.h" +#include "ime.h" +#include "ime_distortion_metrics.h" +#include "ih264_debug.h" +#include "ithread.h" +#include "ih264e_intra_modes_eval.h" +#include "ih264e_core_coding.h" +#include "ih264e_mc.h" +#include "ih264e_debug.h" +#include "ih264e_half_pel.h" +#include "ime_statistics.h" +#include "ih264e_platform_macros.h" + + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + +/** +******************************************************************************* +* +* @brief +* This function populates the length of the codewords for motion vectors in the +* range (-search range, search range) in pixels +* +* @param[in] ps_me +* Pointer to me ctxt +* +* @param[out] pu1_mv_bits +* length of the codeword for all mv's +* +* @remarks The length of the code words are derived from signed exponential +* goloumb codes. +* +******************************************************************************* +*/ +void ih264e_init_mv_bits(me_ctxt_t *ps_me_ctxt) +{ + /* temp var */ + WORD32 i, codesize = 3, diff, limit; + UWORD32 u4_code_num, u4_range; + UWORD32 u4_uev_min, u4_uev_max, u4_sev_min, u4_sev_max; + + /* max srch range */ + diff = MAX(DEFAULT_MAX_SRCH_RANGE_X, DEFAULT_MAX_SRCH_RANGE_Y); + /* sub pel */ + diff <<= 2; + /* delta mv */ + diff <<= 1; + + /* codeNum for positive integer = 2x-1 : Table9-3 */ + u4_code_num = (diff << 1); + + /* get range of the bit string and put using put_bits() */ + GETRANGE(u4_range, u4_code_num); + + limit = 2*u4_range - 1; + + /* init mv bits */ + ps_me_ctxt->pu1_mv_bits[0] = 1; + + while (codesize < limit) + { + u4_uev_min = (1 << (codesize >> 1)); + u4_uev_max = 2*u4_uev_min - 1; + + u4_sev_min = u4_uev_min >> 1; + u4_sev_max = u4_uev_max >> 1; + + DEBUG("\n%d min, %d max %d codesize", u4_sev_min, u4_sev_max, codesize); + + for (i = u4_sev_min; i <= (WORD32)u4_sev_max; i++) + { + ps_me_ctxt->pu1_mv_bits[-i] = ps_me_ctxt->pu1_mv_bits[i] = codesize; + } + + codesize += 2; + } +} + +/** +******************************************************************************* +* +* @brief Determines the valid candidates for which the initial search shall happen. +* The best of these candidates is used to center the diamond pixel search. +* +* @par Description: The function sends the skip, (0,0), left, top and top-right +* neighbouring MBs MVs. The left, top and top-right MBs MVs are used because +* these are the same MVs that are used to form the MV predictor. This initial MV +* search candidates need not take care of slice boundaries and hence neighbor +* availability checks are not made here. +* +* @param[in] ps_left_mb_pu +* pointer to left mb motion vector info +* +* @param[in] ps_top_mb_pu +* pointer to top & top right mb motion vector info +* +* @param[in] ps_top_left_mb_pu +* pointer to top left mb motion vector info +* +* @param[out] ps_skip_mv +* pointer to skip motion vectors for the curr mb +* +* @param[in] i4_mb_x +* mb index x +* +* @param[in] i4_mb_y +* mb index y +* +* @param[in] i4_wd_mbs +* pic width in mbs +* +* @param[in] ps_motionEst +* pointer to me context +* +* @returns The list of MVs to be used of priming the full pel search and the +* number of such MVs +* +* @remarks +* Assumptions : 1. Assumes Single reference frame +* 2. Assumes Only partition of size 16x16 +* +******************************************************************************* +*/ +static void ih264e_get_search_candidates(process_ctxt_t *ps_proc, + me_ctxt_t *ps_me_ctxt) +{ + /* curr mb indices */ + WORD32 i4_mb_x = ps_proc->i4_mb_x; + + /* left mb motion vector */ + mv_t *ps_left_mv; + + /* top left mb motion vector */ + mv_t *ps_top_mv; + + /* top left mb motion vector */ + mv_t *ps_top_left_mv; + + /* top left mb motion vector */ + mv_t *ps_top_right_mv; + + /* skip mv */ + mv_t *ps_skip_mv = ps_proc->ps_skip_mv; + + /* mb part info */ + mb_part_ctxt *ps_mb_part = &ps_me_ctxt->s_mb_part; + + /* num of candidate search candidates */ + UWORD32 u4_num_candidates = 0; + + /* mvs */ + WORD32 mvx, mvy; + + /* ngbr availability */ + block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl; + + /* srch range*/ + WORD32 i4_srch_range_n = ps_me_ctxt->i4_srch_range_n; + WORD32 i4_srch_range_s = ps_me_ctxt->i4_srch_range_s; + WORD32 i4_srch_range_e = ps_me_ctxt->i4_srch_range_e; + WORD32 i4_srch_range_w = ps_me_ctxt->i4_srch_range_w; + + ps_left_mv = &ps_proc->s_left_mb_pu_ME.s_l0_mv; + ps_top_mv = &(ps_proc->ps_top_row_pu_ME + i4_mb_x)->s_l0_mv; + ps_top_left_mv = &ps_proc->s_top_left_mb_pu_ME.s_l0_mv; + ps_top_right_mv = &(ps_proc->ps_top_row_pu_ME + i4_mb_x + 1)->s_l0_mv; + + /************************************************************/ + /* Taking the Zero motion vector as one of the candidates */ + /************************************************************/ + ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvx = 0; + ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvy = 0; + + u4_num_candidates++; + + /************************************************************/ + /* Taking the Left MV Predictor as one of the candidates */ + /************************************************************/ + if (ps_ngbr_avbl->u1_mb_a) + { + mvx = (ps_left_mv->i2_mvx + 2) >> 2; + mvy = (ps_left_mv->i2_mvy + 2) >> 2; + + mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx); + mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy); + + ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvx = mvx; + ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvy = mvy; + + u4_num_candidates ++; + } + /*else + { + ps_me_ctxt->as_mv_init_search[LEFT_CAND].i2_mvx = 0; + ps_me_ctxt->as_mv_init_search[LEFT_CAND].i2_mvy = 0; + }*/ + + /************************************************************/ + /* Taking the Top MV Predictor as one of the candidates */ + /************************************************************/ + if (ps_ngbr_avbl->u1_mb_b) + { + mvx = (ps_top_mv->i2_mvx + 2) >> 2; + mvy = (ps_top_mv->i2_mvy + 2) >> 2; + + mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx); + mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy); + + ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvx = mvx; + ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvy = mvy; + + u4_num_candidates ++; + + /************************************************************/ + /* Taking the TopRt MV Predictor as one of the candidates */ + /************************************************************/ + if (ps_ngbr_avbl->u1_mb_c) + { + mvx = (ps_top_right_mv->i2_mvx + 2) >> 2; + mvy = (ps_top_right_mv->i2_mvy + 2)>> 2; + + mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx); + mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy); + + ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvx = mvx; + ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvy = mvy; + + u4_num_candidates ++; + } + /************************************************************/ + /* Taking the TopLt MV Predictor as one of the candidates */ + /************************************************************/ + else if (ps_ngbr_avbl->u1_mb_d) + { + mvx = (ps_top_left_mv->i2_mvx + 2) >> 2; + mvy = (ps_top_left_mv->i2_mvy + 2) >> 2; + + mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx); + mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy); + + ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvx = mvx; + ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvy = mvy; + + u4_num_candidates ++; + } + /*else + { + ps_me_ctxt->as_mv_init_search[TOPR_CAND].i2_mvx = 0; + ps_me_ctxt->as_mv_init_search[TOPR_CAND].i2_mvy = 0; + }*/ + } + /*else + { + ps_me_ctxt->as_mv_init_search[TOP_CAND].i2_mvx = 0; + ps_me_ctxt->as_mv_init_search[TOP_CAND].i2_mvy = 0; + + ps_me_ctxt->as_mv_init_search[TOPR_CAND].i2_mvx = 0; + ps_me_ctxt->as_mv_init_search[TOPR_CAND].i2_mvy = 0; + }*/ + + + /********************************************************************/ + /* MV Prediction */ + /********************************************************************/ + ih264e_mv_pred_me(ps_proc); + + ps_mb_part->s_mv_pred.i2_mvx = ps_proc->ps_pred_mv->i2_mvx; + ps_mb_part->s_mv_pred.i2_mvy = ps_proc->ps_pred_mv->i2_mvy; + + /************************************************************/ + /* Get the skip motion vector */ + /************************************************************/ + ih264e_find_skip_motion_vector(ps_proc, 1); + + /************************************************************/ + /* Taking the Skip motion vector as one of the candidates */ + /************************************************************/ + mvx = (ps_skip_mv->i2_mvx + 2) >> 2; + mvy = (ps_skip_mv->i2_mvy + 2) >> 2; + + mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx); + mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy); + + ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvx = mvx; + ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvy = mvy; + + u4_num_candidates++; + + ASSERT(u4_num_candidates <= 5); + + ps_me_ctxt->u4_num_candidates = u4_num_candidates; +} + +/** +******************************************************************************* +* +* @brief The function gives the skip motion vector +* +* @par Description: +* The function gives the skip motion vector +* +* @param[in] ps_left_mb_pu +* pointer to left mb motion vector info +* +* @param[in] ps_top_row_pu +* pointer to top & top right mb motion vector info +* +* @param[out] ps_pred_mv +* pointer to candidate predictors for the current block +* +* @returns The x & y components of the MV predictor. +* +* @remarks The code implements the logic as described in sec 8.4.1.1 in H264 +* specification. +* +******************************************************************************* +*/ +void ih264e_find_skip_motion_vector(process_ctxt_t *ps_proc, UWORD32 u4_for_me) +{ + /* left mb motion vector */ + enc_pu_t *ps_left_mb_pu ; + + /* top mb motion vector */ + enc_pu_t *ps_top_mb_pu ; + + /* skip mv */ + mv_t *ps_skip_mv = ps_proc->ps_skip_mv; + + if (u4_for_me == 1) + { + ps_left_mb_pu = &ps_proc->s_left_mb_pu_ME; + ps_top_mb_pu = ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x; + } + else + { + ps_left_mb_pu = &ps_proc->s_left_mb_pu ; + ps_top_mb_pu = ps_proc->ps_top_row_pu + ps_proc->i4_mb_x; + } + + if ( (!ps_proc->ps_ngbr_avbl->u1_mb_a) || + (!ps_proc->ps_ngbr_avbl->u1_mb_b) || + ((ps_left_mb_pu->i1_l0_ref_idx | ps_left_mb_pu->s_l0_mv.i2_mvx | ps_left_mb_pu->s_l0_mv.i2_mvy) == 0) || + ((ps_top_mb_pu->i1_l0_ref_idx | ps_top_mb_pu->s_l0_mv.i2_mvx | ps_top_mb_pu->s_l0_mv.i2_mvy) == 0) ) + { + ps_skip_mv->i2_mvx = 0; + ps_skip_mv->i2_mvy = 0; + } + else + { + ps_skip_mv->i2_mvx = ps_proc->ps_pred_mv->i2_mvx; + ps_skip_mv->i2_mvy = ps_proc->ps_pred_mv->i2_mvy; + } +} + +/** +******************************************************************************* +* +* @brief motion vector predictor +* +* @par Description: +* The routine calculates the motion vector predictor for a given block, +* given the candidate MV predictors. +* +* @param[in] ps_left_mb_pu +* pointer to left mb motion vector info +* +* @param[in] ps_top_row_pu +* pointer to top & top right mb motion vector info +* +* @param[out] ps_pred_mv +* pointer to candidate predictors for the current block +* +* @returns The x & y components of the MV predictor. +* +* @remarks The code implements the logic as described in sec 8.4.1.3 in H264 +* specification. +* Assumptions : 1. Assumes Single reference frame +* 2. Assumes Only partition of size 16x16 +* +******************************************************************************* +*/ +void ih264e_get_mv_predictor(enc_pu_t *ps_left_mb_pu, + enc_pu_t *ps_top_row_pu, + mv_t *ps_pred_mv) +{ + /* curr frame ref idx */ + /* we are assuming that we are operating on single reference frame + * hence the ref idx is insignificant during mv prediction. + */ + WORD32 u4_ref_idx = 0; + + /* temp var */ + WORD32 pred_algo = 3, a, b, c; + + /* If only one of the candidate blocks has a reference frame equal to + * the current block then use the same block as the final predictor */ + a = (ps_left_mb_pu->i1_l0_ref_idx == u4_ref_idx)? 0:-1; + b = (ps_top_row_pu[0].i1_l0_ref_idx == u4_ref_idx)? 0:-1; + c = (ps_top_row_pu[1].i1_l0_ref_idx == u4_ref_idx)? 0:-1; + + if (a == 0 && b == -1 && c == -1) + pred_algo = 0; /* LEFT */ + else if (a == -1 && b == 0 && c == -1) + pred_algo = 1; /* TOP */ + else if (a == -1 && b == -1 && c == 0) + pred_algo = 2; /* TOP RIGHT */ + + switch (pred_algo) + { + case 0: + /* left */ + ps_pred_mv->i2_mvx = ps_left_mb_pu->s_l0_mv.i2_mvx; + ps_pred_mv->i2_mvy = ps_left_mb_pu->s_l0_mv.i2_mvy; + break; + case 1: + /* top */ + ps_pred_mv->i2_mvx = ps_top_row_pu[0].s_l0_mv.i2_mvx; + ps_pred_mv->i2_mvy = ps_top_row_pu[0].s_l0_mv.i2_mvy; + break; + case 2: + /* top right */ + ps_pred_mv->i2_mvx = ps_top_row_pu[1].s_l0_mv.i2_mvx; + ps_pred_mv->i2_mvy = ps_top_row_pu[1].s_l0_mv.i2_mvy; + break; + case 3: + /* median */ + MEDIAN(ps_left_mb_pu->s_l0_mv.i2_mvx, + ps_top_row_pu[0].s_l0_mv.i2_mvx, + ps_top_row_pu[1].s_l0_mv.i2_mvx, + ps_pred_mv->i2_mvx); + MEDIAN(ps_left_mb_pu->s_l0_mv.i2_mvy, + ps_top_row_pu[0].s_l0_mv.i2_mvy, + ps_top_row_pu[1].s_l0_mv.i2_mvy, + ps_pred_mv->i2_mvy); + + break; + default: + break; + } +} + +/** +******************************************************************************* +* +* @brief This function performs MV prediction +* +* @par Description: +* +* @param[in] ps_proc +* Process context corresponding to the job +* +* @returns none +* +* @remarks none +* This function will update the MB availability since intra inter decision +* should be done before the call +* +******************************************************************************* +*/ +void ih264e_mv_pred(process_ctxt_t *ps_proc) +{ + + /* left mb motion vector */ + enc_pu_t *ps_left_mb_pu ; + + /* top left mb motion vector */ + enc_pu_t *ps_top_left_mb_pu ; + + /* top row motion vector info */ + enc_pu_t *ps_top_row_pu; + + /* predicted motion vector */ + mv_t *ps_pred_mv = ps_proc->ps_pred_mv; + + /* zero mv */ + mv_t zero_mv = {0, 0}; + + /* mb neighbor availability */ + block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl; + + /* mb syntax elements of neighbors */ + mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x; + mb_info_t *ps_top_left_syn; + UWORD32 u4_left_is_intra; + + ps_top_left_syn = &(ps_proc->s_top_left_mb_syntax_ele); + u4_left_is_intra = ps_proc->s_left_mb_syntax_ele.u2_is_intra; + ps_left_mb_pu = &ps_proc->s_left_mb_pu; + ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu; + ps_top_row_pu = (ps_proc->ps_top_row_pu + ps_proc->i4_mb_x); + + /* Before performing mv prediction prepare the ngbr information and + * reset motion vectors basing on their availability */ + if (!ps_ngbr_avbl->u1_mb_a || (u4_left_is_intra == 1) ) + { + /* left mv */ + ps_left_mb_pu->i1_l0_ref_idx = -1; + ps_left_mb_pu->s_l0_mv = zero_mv; + } + if (!ps_ngbr_avbl->u1_mb_b || ps_top_syn->u2_is_intra) + { + /* top mv */ + ps_top_row_pu[0].i1_l0_ref_idx = -1; + ps_top_row_pu[0].s_l0_mv = zero_mv; + } + if (!ps_ngbr_avbl->u1_mb_c) + { + /* top right mv - When top right partition is not available for + * prediction if top left is available use it for prediction else + * set the mv information to -1 and (0, 0) + * */ + if (!ps_ngbr_avbl->u1_mb_d || ps_top_left_syn->u2_is_intra) + { + ps_top_row_pu[1].i1_l0_ref_idx = -1; + ps_top_row_pu[1].s_l0_mv = zero_mv; + } + else + { + ps_top_row_pu[1].i1_l0_ref_idx = ps_top_left_mb_pu->i1_l0_ref_idx; + ps_top_row_pu[1].s_l0_mv = ps_top_left_mb_pu->s_l0_mv; + } + } + else if (ps_top_syn[1].u2_is_intra) + { + ps_top_row_pu[1].i1_l0_ref_idx = -1; + ps_top_row_pu[1].s_l0_mv = zero_mv; + } + + ih264e_get_mv_predictor(ps_left_mb_pu, ps_top_row_pu, ps_pred_mv); +} + +/** +******************************************************************************* +* +* @brief This function approximates Pred. MV +* +* @par Description: +* +* @param[in] ps_proc +* Process context corresponding to the job +* +* @returns none +* +* @remarks none +* Motion estimation happens at nmb level. For cost calculations, mv is appro +* ximated using this function +* +******************************************************************************* +*/ +void ih264e_mv_pred_me(process_ctxt_t *ps_proc) +{ + /* left mb motion vector */ + enc_pu_t *ps_left_mb_pu ; + + /* top left mb motion vector */ + enc_pu_t *ps_top_left_mb_pu ; + + /* top row motion vector info */ + enc_pu_t *ps_top_row_pu; + + enc_pu_t s_top_row_pu[2]; + + /* predicted motion vector */ + mv_t *ps_pred_mv = ps_proc->ps_pred_mv; + + /* zero mv */ + mv_t zero_mv = {0, 0}; + + /* mb neighbor availability */ + block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl; + + ps_left_mb_pu = &ps_proc->s_left_mb_pu_ME; + ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu_ME; + ps_top_row_pu = (ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x); + + s_top_row_pu[0] = ps_top_row_pu[0]; + s_top_row_pu[1] = ps_top_row_pu[1]; + + /* Before performing mv prediction prepare the ngbr information and + * reset motion vectors basing on their availability */ + if (!ps_ngbr_avbl->u1_mb_a ) + { + /* left mv */ + ps_left_mb_pu->i1_l0_ref_idx = -1; + ps_left_mb_pu->s_l0_mv = zero_mv; + } + if (!ps_ngbr_avbl->u1_mb_b ) + { + /* top mv */ + s_top_row_pu[0].i1_l0_ref_idx = -1; + s_top_row_pu[0].s_l0_mv = zero_mv; + } + if (!ps_ngbr_avbl->u1_mb_c) + { + /* top right mv - When top right partition is not available for + * prediction if top left is available use it for prediction else + * set the mv information to -1 and (0, 0) + * */ + if (!ps_ngbr_avbl->u1_mb_d) + { + s_top_row_pu[1].i1_l0_ref_idx = -1; + s_top_row_pu[1].s_l0_mv = zero_mv; + } + else + { + s_top_row_pu[1].i1_l0_ref_idx = ps_top_left_mb_pu->i1_l0_ref_idx; + s_top_row_pu[1].s_l0_mv = ps_top_left_mb_pu->s_l0_mv; + } + } + + ih264e_get_mv_predictor(ps_left_mb_pu, &(s_top_row_pu[0]), ps_pred_mv); +} + +/** +******************************************************************************* +* +* @brief This function initializes me ctxt +* +* @par Description: +* Before dispatching the current job to me thread, the me context associated +* with the job is initialized. +* +* @param[in] ps_proc +* Process context corresponding to the job +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void ih264e_init_me(process_ctxt_t *ps_proc) +{ + /* me ctxt */ + me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt; + + /* src ptr */ + ps_me_ctxt->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma; + + /* ref ptr */ + ps_me_ctxt->pu1_ref_buf_luma = ps_proc->pu1_ref_buf_luma; + + /* lagrange param */ + ps_me_ctxt->u4_lambda_motion = gu1_qp0[ps_me_ctxt->u1_mb_qp]; +} + +/** +******************************************************************************* +* +* @brief This function performs motion estimation for the current mb +* +* @par Description: +* The current mb is compared with a list of mb's in the reference frame for +* least cost. The mb that offers least cost is chosen as predicted mb and the +* displacement of the predicted mb from index location of the current mb is +* signaled as mv. The list of the mb's that are chosen in the reference frame +* are dependent on the speed of the ME configured. +* +* @param[in] ps_proc +* Process context corresponding to the job +* +* @returns motion vector of the pred mb, sad, cost. +* +* @remarks none +* +******************************************************************************* +*/ +void ih264e_compute_me(process_ctxt_t *ps_proc) +{ + /* me ctxt */ + me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt; + + /* codec context */ + codec_t *ps_codec = ps_proc->ps_codec; + +// /* mb syntax elements of neighbors */ +// mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x; +// mb_info_t *ps_top_left_syn = &(ps_proc->s_top_left_mb_syntax_ME); + + /* mb part info */ + mb_part_ctxt *ps_mb_part = &ps_me_ctxt->s_mb_part; + mb_part_ctxt skip_mb_part_info; + + /* temp var */ + WORD32 rows_above, rows_below, columns_left, columns_right,u4_use_stat_sad; + + /* Motion vectors in full-pel units */ + WORD16 mv_x, mv_y; + + /* recon stride */ + WORD32 i4_rec_strd = ps_proc->i4_rec_strd; + + /* source buffer for halp pel generation functions */ + UWORD8 *pu1_hpel_src; + + /* quantization parameters */ + quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0]; + + /* Sad therholds */ + ps_me_ctxt->pu2_sad_thrsh = ps_qp_params->pu2_sad_thrsh; + + /*Best half pel buffer*/ + UWORD8 *pu1_best_subpel_buf = ps_proc->pu1_best_subpel_buf; + UWORD32 u4_bst_spel_strd = ps_proc->u4_bst_spel_buf_strd; + + /* During evaluation for motion vectors do not search through padded regions */ + /* Obtain number of rows and columns that are effective for computing for me evaluation */ + rows_above = MB_SIZE + ps_proc->i4_mb_y * MB_SIZE; + rows_below = (ps_proc->i4_ht_mbs - ps_proc->i4_mb_y) * MB_SIZE; + columns_left = MB_SIZE + ps_proc->i4_mb_x * MB_SIZE; + columns_right = (ps_proc->i4_wd_mbs - ps_proc->i4_mb_x) * MB_SIZE; + + /* init srch range */ + /* NOTE : For now, lets limit the search range by DEFAULT_MAX_SRCH_RANGE_X / 2 + * on all sides. + */ +// ps_me_ctxt->i4_srch_range_w = -MIN(columns_left, ps_me_ctxt->ai2_srch_boundaries[0]); +// ps_me_ctxt->i4_srch_range_e = MIN(columns_right, ps_me_ctxt->ai2_srch_boundaries[0]); +// ps_me_ctxt->i4_srch_range_n = -MIN(rows_above, ps_me_ctxt->ai2_srch_boundaries[1]); +// ps_me_ctxt->i4_srch_range_s = MIN(rows_below, ps_me_ctxt->ai2_srch_boundaries[1]); + + ps_me_ctxt->i4_srch_range_w = -MIN(columns_left, DEFAULT_MAX_SRCH_RANGE_X >> 1); + ps_me_ctxt->i4_srch_range_e = MIN(columns_right, DEFAULT_MAX_SRCH_RANGE_X >> 1); + ps_me_ctxt->i4_srch_range_n = -MIN(rows_above, DEFAULT_MAX_SRCH_RANGE_Y >> 1); + ps_me_ctxt->i4_srch_range_s = MIN(rows_below, DEFAULT_MAX_SRCH_RANGE_Y >> 1); + + /* this is to facilitate fast sub pel computation with minimal loads */ + if (ps_me_ctxt->u4_enable_hpel) + { + ps_me_ctxt->i4_srch_range_w += 1; + ps_me_ctxt->i4_srch_range_e -= 1; + ps_me_ctxt->i4_srch_range_n += 1; + ps_me_ctxt->i4_srch_range_s -= 1; + } + + /*Initialize the min sad option*/ + ps_me_ctxt->u4_min_sad_reached = 0; /*Not yet found min sad*/ + ps_me_ctxt->i4_min_sad = ps_proc->ps_cur_mb->u4_min_sad; + + /************************************************************/ + /* Get the seed motion vector candidates */ + /************************************************************/ + ih264e_get_search_candidates(ps_proc, ps_me_ctxt); + + /************************************************************/ + /* Init the MB part ctxt structure */ + /************************************************************/ + ps_mb_part->s_mv_curr.i2_mvx = 0; + ps_mb_part->s_mv_curr.i2_mvy = 0; + ps_mb_part->i4_mb_cost = INT_MAX; + ps_mb_part->i4_mb_distortion = INT_MAX; + + /* With NMB changes this logic will not work as we cannot exit NME in between*/ + /********************************************************************/ + /* Analyse skip */ + /********************************************************************/ +// if (ps_proc->ps_codec->s_cfg.u4_enable_satqd == 0 +// && u4_frame_level_me == 0) +// { +// if ( (ps_proc->ps_ngbr_avbl->u1_mb_a && (ps_me_ctxt->u4_left_is_skip == 1)) || +// (ps_proc->ps_ngbr_avbl->u1_mb_b && ps_top_syn->u2_mb_type == PSKIP) || +// (ps_proc->ps_ngbr_avbl->u1_mb_d && ps_top_left_syn->u2_mb_type == PSKIP) ) +// { +// if ( 0 == ih264e_analyse_skip(ps_proc, ps_me_ctxt) ) +// { +// return; +// } +// } +// } + + /********************************************************************/ + /* compute skip cost */ + /********************************************************************/ + /* See if we need to use modified sad */ + u4_use_stat_sad = (ps_proc->ps_codec->s_cfg.u4_enable_satqd == 1); + + /* init the cost of skip MB */ + skip_mb_part_info.i4_mb_cost = INT_MAX; + ime_compute_skip_cost(ps_me_ctxt, ps_proc->ps_skip_mv, &skip_mb_part_info, u4_use_stat_sad); + + + if (ps_me_ctxt->u4_min_sad_reached == 0) + { + /************************************************************/ + /* Evaluate search candidates for initial mv pt. */ + /************************************************************/ + ime_evaluate_init_srchposn_16x16(ps_me_ctxt); + + /********************************************************************/ + /* full pel motion estimation */ + /********************************************************************/ + ime_full_pel_motion_estimation_16x16(ps_me_ctxt); + + DEBUG_MV_HISTOGRAM_ADD((ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvx >> 2), + (ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvy >> 2)); + + DEBUG_SAD_HISTOGRAM_ADD(ps_me_ctxt->s_mb_part.i4_mb_distortion, 1); + /********************************************************************/ + /* sub pel motion estimation */ + /********************************************************************/ + if (ps_me_ctxt->u4_enable_hpel) + { + /* motion vectors in terms of full pel values */ + mv_x = ps_mb_part->s_mv_curr.i2_mvx >> 2; + mv_y = ps_mb_part->s_mv_curr.i2_mvy >> 2; + + /* moving src pointer to the converged motion vector location*/ + pu1_hpel_src = ps_me_ctxt->pu1_ref_buf_luma + mv_x + (mv_y * i4_rec_strd); + + ps_me_ctxt->pu1_half_x = ps_proc->pu1_half_x; + ps_me_ctxt->pu1_half_y = ps_proc->pu1_half_y; + ps_me_ctxt->pu1_half_xy = ps_proc->pu1_half_xy; + ps_me_ctxt->u4_hp_buf_strd = HP_BUFF_WD; + + /* half pel search is done for both sides of full pel, + * hence half_x of width x height = 17x16 is created + * starting from left half_x of converged full pel */ + pu1_hpel_src -= 1; + + /* computing half_x */ + ps_codec->pf_ih264e_sixtapfilter_horz(pu1_hpel_src, + ps_proc->pu1_half_x, + i4_rec_strd, + ps_me_ctxt->u4_hp_buf_strd); + + /* + * Halfpel search is done for both sides of full pel, + * hence half_y of width x height = 16x17 is created + * starting from top half_y of converged full pel + * for half_xy top_left is required + * hence it starts from pu1_hpel_src = full_pel_converged_point - i4_rec_strd - 1 + */ + + pu1_hpel_src -= i4_rec_strd; + + /* computing half_y , and half_xy*/ + ps_codec->pf_ih264e_sixtap_filter_2dvh_vert( + pu1_hpel_src, ps_proc->pu1_half_y, + ps_proc->pu1_half_xy, i4_rec_strd, + ps_me_ctxt->u4_hp_buf_strd, ps_proc->ai16_pred1 + 3, + ps_me_ctxt->u4_hp_buf_strd); + + ime_sub_pel_motion_estimation_16x16(ps_me_ctxt); + } + } + + { + + /* if skip gives a better cost than other search, copy the cost accordingly*/ + if (skip_mb_part_info.i4_mb_cost < ps_mb_part->i4_mb_cost) + { + ps_mb_part->i4_mb_cost = skip_mb_part_info.i4_mb_cost; + ps_mb_part->i4_mb_distortion = skip_mb_part_info.i4_mb_distortion; + ps_mb_part->s_mv_curr.i2_mvx = skip_mb_part_info.s_mv_curr.i2_mvx; + ps_mb_part->s_mv_curr.i2_mvy = skip_mb_part_info.s_mv_curr.i2_mvy; + } + else + { + /* + * If the current MB has a sub pel component, + * we need to copy that to the best subpel buffer + */ + if (ps_me_ctxt->u4_enable_hpel && ps_mb_part->pu1_best_hpel_buf) + { + ps_codec->pf_inter_pred_luma_copy(ps_mb_part->pu1_best_hpel_buf, + pu1_best_subpel_buf, + ps_me_ctxt->u4_hp_buf_strd, + u4_bst_spel_strd, MB_SIZE, + MB_SIZE, NULL, 0); + } + } + } + + DEBUG_SAD_HISTOGRAM_ADD(ps_me_ctxt->s_mb_part.i4_mb_distortion, 0); + + /* update the type of the mb if necessary */ + if (ps_me_ctxt->s_mb_part.i4_mb_cost < ps_proc->ps_cur_mb->i4_mb_cost) + { + /* mb cost */ + ps_proc->ps_cur_mb->i4_mb_cost = ps_me_ctxt->s_mb_part.i4_mb_cost; + + /* mb distortion */ + ps_proc->ps_cur_mb->i4_mb_distortion = ps_me_ctxt->s_mb_part.i4_mb_distortion; + + /* mb type */ + ps_proc->ps_cur_mb->u4_mb_type = P16x16; + } + + /* number of partitions */ + ps_proc->u4_num_sub_partitions = 1; + *(ps_proc->pu4_mb_pu_cnt) = 1; + + /* position in-terms of PU */ + ps_proc->ps_pu->b4_pos_x = 0; + ps_proc->ps_pu->b4_pos_y = 0; + + /* PU size */ + ps_proc->ps_pu->b4_wd = 3; + ps_proc->ps_pu->b4_ht = 3; + + /* ref idx */ + ps_proc->ps_pu->i1_l0_ref_idx = 0; + + /* motion vector L0 */ + ps_proc->ps_pu->s_l0_mv.i2_mvx = ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvx; + ps_proc->ps_pu->s_l0_mv.i2_mvy = ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvy; + + /* Update min sad conditions */ + if (ps_me_ctxt->u4_min_sad_reached == 1) + { + ps_proc->ps_cur_mb->u4_min_sad_reached = 1; + ps_proc->ps_cur_mb->u4_min_sad = ps_me_ctxt->i4_min_sad; + } +} + +/** +******************************************************************************* +* +* @brief This function performs motion estimation for the current NMB +* +* @par Description: +* Intializes input and output pointers required by the function ih264e_compute_me +* and calls the function ih264e_compute_me in a loop to process NMBs. +* +* @param[in] ps_proc +* Process context corresponding to the job +* +* @returns +* +* @remarks none +* +******************************************************************************* +*/ +void ih264e_compute_me_nmb(process_ctxt_t *ps_proc, UWORD32 u4_nmb_count) +{ + /* pic pu */ + enc_pu_t *ps_pu_begin = ps_proc->ps_pu; + + /* ME map */ + UWORD8 *pu1_me_map = ps_proc->pu1_me_map + (ps_proc->i4_mb_y * ps_proc->i4_wd_mbs); + + /* temp var */ + UWORD32 u4_i; + + ps_proc->s_me_ctxt.u4_left_is_intra = ps_proc->s_left_mb_syntax_ele.u2_is_intra; + ps_proc->s_me_ctxt.u4_left_is_skip = (ps_proc->s_left_mb_syntax_ele.u2_mb_type == PSKIP); + + for (u4_i = 0; u4_i < u4_nmb_count; u4_i++) + { + /* Wait for ME map */ + if (ps_proc->i4_mb_y > 0) + { + /* Wait for top right ME to be done */ + UWORD8 *pu1_me_map_tp_rw = ps_proc->pu1_me_map + (ps_proc->i4_mb_y - 1) * ps_proc->i4_wd_mbs; + + while (1) + { + volatile UWORD8 *pu1_buf; + WORD32 idx = ps_proc->i4_mb_x + u4_i + 1; + + idx = MIN(idx, (ps_proc->i4_wd_mbs - 1)); + pu1_buf = pu1_me_map_tp_rw + idx; + if(*pu1_buf) + break; + ithread_yield(); + } + } + + ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_i].s_skip_mv); + ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_i].s_ngbr_avbl); + ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_i].s_pred_mv); + + ps_proc->ps_cur_mb = &(ps_proc->ps_nmb_info[u4_i]); + + ps_proc->ps_cur_mb->u4_min_sad = ps_proc->u4_min_sad; + ps_proc->ps_cur_mb->u4_min_sad_reached = 0; + + ps_proc->ps_cur_mb->i4_mb_cost = INT_MAX; + ps_proc->ps_cur_mb->i4_mb_distortion = SHRT_MAX; + + /* Set the best subpel buf to the correct mb so that the buffer can be copied */ + ps_proc->pu1_best_subpel_buf = ps_proc->ps_nmb_info[u4_i].pu1_best_sub_pel_buf; + ps_proc->u4_bst_spel_buf_strd = ps_proc->ps_nmb_info[u4_i].u4_bst_spel_buf_strd; + + /* Set the min sad conditions */ + ps_proc->ps_cur_mb->u4_min_sad = ps_proc->ps_codec->u4_min_sad; + ps_proc->ps_cur_mb->u4_min_sad_reached = 0; + + /* Derive neighbor availability for the current macroblock */ + ih264e_derive_nghbr_avbl_of_mbs(ps_proc); + + /* init me */ + ih264e_init_me(ps_proc); + + ih264e_compute_me(ps_proc); + + /* update top and left structs */ + { + mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x; + mb_info_t *ps_top_left_syn = &(ps_proc->s_top_left_mb_syntax_ME); + enc_pu_t *ps_left_mb_pu = &ps_proc->s_left_mb_pu_ME; + enc_pu_t *ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu_ME; + enc_pu_t *ps_top_mv = ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x; + + *ps_top_left_syn = *ps_top_syn; + + *ps_top_left_mb_pu = *ps_top_mv; + *ps_left_mb_pu = *ps_proc->ps_pu; + } + + ps_proc->ps_pu += *ps_proc->pu4_mb_pu_cnt; + + /* Copy the min sad reached info */ + ps_proc->ps_nmb_info[u4_i].u4_min_sad_reached = ps_proc->ps_cur_mb->u4_min_sad_reached; + ps_proc->ps_nmb_info[u4_i].u4_min_sad = ps_proc->ps_cur_mb->u4_min_sad; + + /* + * To make sure that the MV map is properly sync to the + * cache we need to do a DDB + */ + { + DATA_SYNC(); + + pu1_me_map[ps_proc->i4_mb_x] = 1; + } + ps_proc->i4_mb_x++; + + ps_proc->s_me_ctxt.u4_left_is_intra = 0; + ps_proc->s_me_ctxt.u4_left_is_skip = (ps_proc->ps_cur_mb->u4_mb_type == PSKIP); + + /* update buffers pointers */ + ps_proc->pu1_src_buf_luma += MB_SIZE; + ps_proc->pu1_rec_buf_luma += MB_SIZE; + ps_proc->pu1_ref_buf_luma += MB_SIZE; + + /* + * Note: Although chroma mb size is 8, as the chroma buffers are interleaved, + * the stride per MB is MB_SIZE + */ + ps_proc->pu1_src_buf_chroma += MB_SIZE; + ps_proc->pu1_rec_buf_chroma += MB_SIZE; + ps_proc->pu1_ref_buf_chroma += MB_SIZE; + + ps_proc->pu4_mb_pu_cnt += 1; + } + + + ps_proc->ps_pu = ps_pu_begin; + ps_proc->i4_mb_x = ps_proc->i4_mb_x - u4_nmb_count; + + /* update buffers pointers */ + ps_proc->pu1_src_buf_luma -= MB_SIZE * u4_nmb_count; + ps_proc->pu1_rec_buf_luma -= MB_SIZE * u4_nmb_count; + ps_proc->pu1_ref_buf_luma -= MB_SIZE * u4_nmb_count; + + /* + * Note: Although chroma mb size is 8, as the chroma buffers are interleaved, + * the stride per MB is MB_SIZE + */ + ps_proc->pu1_src_buf_chroma -= MB_SIZE * u4_nmb_count; + ps_proc->pu1_rec_buf_chroma -= MB_SIZE * u4_nmb_count; + ps_proc->pu1_ref_buf_chroma -= MB_SIZE * u4_nmb_count; + + ps_proc->pu4_mb_pu_cnt -= u4_nmb_count; +} diff --git a/encoder/ih264e_me.h b/encoder/ih264e_me.h new file mode 100755 index 0000000..c4834a1 --- /dev/null +++ b/encoder/ih264e_me.h @@ -0,0 +1,278 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** + ******************************************************************************* + * @file + * ih264e_me.h + * + * @brief + * Contains declarations of global variables for H264 encoder + * + * @author + * ittiam + * + * @remarks + * + ******************************************************************************* + */ + +#ifndef IH264E_ME_H_ +#define IH264E_ME_H_ + +/*****************************************************************************/ +/* Function Macros */ +/*****************************************************************************/ + +/** +****************************************************************************** + * @brief compute median of 3 elements (a, b, c) and store the output + * in to result. This is used for mv prediction +****************************************************************************** + */ + +#define MEDIAN(a, b, c, result) if (a > b){\ + if (b > c)\ + result = b;\ + else {\ + if (a > c)\ + result = c;\ + else \ + result = a;\ + }\ + }\ + else {\ + if (c > b)\ + result = b;\ + else {\ + if (c > a)\ + result = c;\ + else \ + result = a;\ + }\ + } + + + +/*****************************************************************************/ +/* Extern Function Declarations */ +/*****************************************************************************/ + +/** +******************************************************************************* +* +* @brief +* This function populates the length of the codewords for motion vectors in the +* range (-search range, search range) in pixels +* +* @param[in] ps_me +* Pointer to me ctxt +* +* @param[out] pu1_mv_bits +* length of the codeword for all mv's +* +* @remarks The length of the code words are derived from signed exponential +* goloumb codes. +* +******************************************************************************* +*/ +void ih264e_init_mv_bits + ( + me_ctxt_t *ps_me + ); + +/** +******************************************************************************* +* +* @brief The function gives the skip motion vector +* +* @par Description: +* The function gives the skip motion vector +* +* @param[in] ps_left_mb_pu +* pointer to left mb motion vector info +* +* @param[in] ps_top_row_pu +* pointer to top & top right mb motion vector info +* +* @param[out] ps_pred_mv +* pointer to candidate predictors for the current block +* +* @returns The x & y components of the MV predictor. +* +* @remarks The code implements the logic as described in sec 8.4.1.1 in H264 +* specification. +* +******************************************************************************* +*/ +void ih264e_find_skip_motion_vector + ( + process_ctxt_t *ps_proc, + UWORD32 u4_for_me + ); + +/** +******************************************************************************* +* +* @brief motion vector predictor +* +* @par Description: +* The routine calculates the motion vector predictor for a given block, +* given the candidate MV predictors. +* +* @param[in] ps_left_mb_pu +* pointer to left mb motion vector info +* +* @param[in] ps_top_row_pu +* pointer to top & top right mb motion vector info +* +* @param[out] ps_pred_mv +* pointer to candidate predictors for the current block +* +* @returns The x & y components of the MV predictor. +* +* @remarks The code implements the logic as described in sec 8.4.1.3 in H264 +* specification. +* Assumptions : 1. Assumes Single reference frame +* 2. Assumes Only partition of size 16x16 +* +******************************************************************************* +*/ +void ih264e_get_mv_predictor + ( + enc_pu_t *ps_left_mb_pu, + enc_pu_t *ps_top_row_pu, + mv_t *ps_pred_mv + ); + +/** +******************************************************************************* +* +* @brief This function computes the best motion vector for the current mb +* +* @par Description: +* This function currently does nothing except set motion vectors from external +* source +* +* @param[in] ps_proc +* Process context corresponding to the job +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void ih264e_compute_me + ( + process_ctxt_t *ps_proc + ); + +/** +******************************************************************************* +* +* @brief This function initializes me ctxt +* +* @par Description: +* Before dispatching the current job to me thread, the me context associated +* with the job is initialized. +* +* @param[in] ps_proc +* Process context corresponding to the job +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void ih264e_init_me(process_ctxt_t *ps_proc); + +/** +******************************************************************************* +* +* @brief This function performs motion estimation for the current NMB +* +* @par Description: +* Intializes input and output pointers required by the function ih264e_compute_me +* and calls the function ih264e_compute_me in a loop to process NMBs. +* +* @param[in] ps_proc +* Process context corresponding to the job +* +* @returns +* +* @remarks none +* +******************************************************************************* +*/ +void ih264e_compute_me_nmb + ( + process_ctxt_t *ps_proc, + UWORD32 u4_nmb_count + ); + +/** +******************************************************************************* +* +* @brief This function performs MV prediction +* +* @par Description: +* +* @param[in] ps_proc +* Process context corresponding to the job +* +* @returns none +* +* @remarks none +* This function will update the MB availability since intra inter decision +* should be done before the call +* +******************************************************************************* +*/ +void ih264e_mv_pred + ( + process_ctxt_t *ps_proc + ); + +/** +******************************************************************************* +* +* @brief This function approximates Pred. MV +* +* @par Description: +* +* @param[in] ps_proc +* Process context corresponding to the job +* +* @returns none +* +* @remarks none +* Motion estimation happens at nmb level. For cost calculations, mv is appro +* ximated using this function +* +******************************************************************************* +*/ +void ih264e_mv_pred_me + ( + process_ctxt_t *ps_proc + ); + +#endif /* IH264E_ME_H_ */ diff --git a/encoder/ih264e_modify_frm_rate.c b/encoder/ih264e_modify_frm_rate.c new file mode 100755 index 0000000..bc0e873 --- /dev/null +++ b/encoder/ih264e_modify_frm_rate.c @@ -0,0 +1,240 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +******************************************************************************* +* @file +* ih264e_modify_frm_rate.c +* +* @brief +* Functions used to modify frame rate +* +* @author +* ittiam +* +* @par List of Functions: +* - ih264e_pd_frm_rate_get_init_free_memtab() +* - ih264e_init_pd_frm_rate() +* - ih264e_update_pd_frm_rate() +* - ih264e_get_pd_avg_frm_rate() +* +* @remarks +* None +* +******************************************************************************* +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* User include files */ +#include "irc_datatypes.h" +#include "iv2.h" +#include "ive2.h" +#include "ih264_defs.h" +#include "ih264_structs.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include "ih264_inter_pred_filters.h" +#include "ih264_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "ih264e_defs.h" +#include "ime_distortion_metrics.h" +#include "ime_structs.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "ih264e_rate_control.h" +#include "ih264e_structs.h" +#include "ih264e_rc_mem_interface.h" +#include "ih264e_time_stamp.h" +#include "ih264e_modify_frm_rate.h" + + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + +/** +******************************************************************************* +* +* @brief Function to init pd frame rate memtab +* +* @par Description +* Function to init pull down frame rate memtab +* +* @param[in] pps_pd_frm_rate +* pull down frame rate context +* +* @param[in] ps_memtab +* Handle to memtab +* +* @param[in] e_func_type +* Function type (get memtab/ update memtab) +* +* @returns Number of memtabs used +* +* @remarks None +* +******************************************************************************* +*/ +WORD32 ih264e_pd_frm_rate_get_init_free_memtab(pd_frm_rate_handle *pps_pd_frm_rate, + itt_memtab_t *ps_memtab, + ITT_FUNC_TYPE_E e_func_type) +{ + WORD32 i4_mem_tab_idx = 0; + static pd_frm_rate_t s_temp_pd_frm_rate_t; + + /* Hack for al alloc, during which we dont have any state memory. + Dereferencing can cause issues */ + if (e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB) + (*pps_pd_frm_rate) = &s_temp_pd_frm_rate_t; + + /* for src rate control state structure */ + if (e_func_type != GET_NUM_MEMTAB) + { + fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(pd_frm_rate_t), + ALIGN_128_BYTE, PERSISTENT, DDR); + use_or_fill_base(&ps_memtab[0], (void**) pps_pd_frm_rate, e_func_type); + } + i4_mem_tab_idx++; + + return (i4_mem_tab_idx); +} + +/** +******************************************************************************* +* +* @brief Initializes the pull down frame rate state structure based on input +* frame rate +* +* @par Description +* Initializes the pull down frame rate state structure based on input frame rate +* +* @param[in] ps_pd_frm_rate +* Pull down frame rate context +* +* @param[in] u4_input_frm_rate +* Input frame rate in frame per 1000sec +* +* @returns none +* +* @remarks +* +******************************************************************************* +*/ +void ih264e_init_pd_frm_rate(pd_frm_rate_t *ps_pd_frm_rate, + UWORD32 u4_input_frm_rate) +{ + WORD32 i; + + ps_pd_frm_rate->u4_input_frm_rate = u4_input_frm_rate; + + for (i = 0; i < (WORD32) (u4_input_frm_rate / 1000); i++) + { + ps_pd_frm_rate->u4_cur_frm_rate[i] = u4_input_frm_rate; + } + + ps_pd_frm_rate->u4_frm_num = 0; + + ps_pd_frm_rate->u4_tot_frm_encoded = 0; +} + +/** +******************************************************************************* +* +* @brief Function to update pull down frame rate +* +* @par Description +* For each frame a run time frame rate value is sent based on whether a frame +* is skipped or not. If it is skipped for pull down then the current frame +* rate for the pull down period is signaled as 4/5th of the original frame +* rate. Thus when this is averaged the frame rate gradually switches from the +* input frame rate to 4/5th of input frame rate as and when more 3:2 pull +* down patterns are detected +* +* @param[in] ps_pd_frm_rate +* Pull down frame rate context +* +* @param[in] u4_input_frm_rate +* Input frame rate in frame per 1000sec +* +* @returns none +* +* @remarks +* +******************************************************************************* +*/ +void ih264e_update_pd_frm_rate(pd_frm_rate_t *ps_pd_frm_rate, + UWORD32 u4_cur_frm_rate) +{ + ps_pd_frm_rate->u4_cur_frm_rate[ps_pd_frm_rate->u4_frm_num] = u4_cur_frm_rate; + + ps_pd_frm_rate->u4_frm_num++; + + /* Increment the frame number */ + if (ps_pd_frm_rate->u4_tot_frm_encoded < (ps_pd_frm_rate->u4_input_frm_rate / 1000)) + { + ps_pd_frm_rate->u4_tot_frm_encoded++; + } + + /* Reset frm_num to zero */ + if (ps_pd_frm_rate->u4_frm_num >= (ps_pd_frm_rate->u4_input_frm_rate / 1000)) + { + ps_pd_frm_rate->u4_frm_num = 0; + } +} + +/** +******************************************************************************* +* +* @brief returns average frame rate in 1 sec duration +* +* @par Description +* Averages the last N frame in period(1 sec) and then gives that +* as the current frames frame rate. Thus this averages out the sudden +* variation in frame rate +* +* @param[in] ps_pd_frm_rate +* Handle to pull down frame rate context +* +* @returns average frame rate +* +* @remarks +* +******************************************************************************* +*/ +UWORD32 ih264e_get_pd_avg_frm_rate(pd_frm_rate_t *ps_pd_frm_rate) +{ + WORD32 i; + WORD32 i4_avg_frm_rate = 0; + + for (i = 0; i < (WORD32) ps_pd_frm_rate->u4_tot_frm_encoded; i++) + { + i4_avg_frm_rate += ps_pd_frm_rate->u4_cur_frm_rate[i]; + } + + i4_avg_frm_rate = i4_avg_frm_rate / ps_pd_frm_rate->u4_tot_frm_encoded; + + return i4_avg_frm_rate; +} diff --git a/encoder/ih264e_modify_frm_rate.h b/encoder/ih264e_modify_frm_rate.h new file mode 100755 index 0000000..c301e2c --- /dev/null +++ b/encoder/ih264e_modify_frm_rate.h @@ -0,0 +1,182 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +******************************************************************************* +* @file +* ih264e_modify_frm_rate.h +* +* @brief +* Functions declarations used to modify frame rate +* +* @author +* ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef IH264E_MODIFY_FRM_RATE_H_ +#define IH264E_MODIFY_FRM_RATE_H_ + +/*****************************************************************************/ +/* Constant Definitions */ +/*****************************************************************************/ + +#define MAX_NUM_FRAME 120 + + +/*****************************************************************************/ +/* Structures */ +/*****************************************************************************/ +typedef struct pd_frm_rate_t +{ + /* + * The input frame rate set in the encoder (per 1000 sec) + */ + UWORD32 u4_input_frm_rate; + + /* + * Frame rate of current frame due to pull down + */ + UWORD32 u4_cur_frm_rate[MAX_NUM_FRAME]; + + /* + * current frame num in the above buffer + */ + UWORD32 u4_frm_num; + + /* + * Total number of frames encoded. + * if greater than input frame rate stays at input frame rate + */ + UWORD32 u4_tot_frm_encoded; + +}pd_frm_rate_t; + +typedef struct pd_frm_rate_t *pd_frm_rate_handle; + + +/*****************************************************************************/ +/* Function Declarations */ +/*****************************************************************************/ + +/** +******************************************************************************* +* +* @brief Function to init pd frame rate memtab +* +* @par Description +* Function to init pull down frame rate memtab +* +* @param[in] pps_pd_frm_rate +* pull down frame rate context +* +* @param[in] ps_memtab +* Handle to memtab +* +* @param[in] e_func_type +* Function type (get memtab/ update memtab) +* +* @returns Number of memtabs used +* +* @remarks None +* +******************************************************************************* +*/ +WORD32 ih264e_pd_frm_rate_get_init_free_memtab(pd_frm_rate_handle *pps_pd_frm_rate, + itt_memtab_t *ps_memtab, + ITT_FUNC_TYPE_E e_func_type); +/** +******************************************************************************* +* +* @brief Initializes the pull down frame rate state structure based on input +* frame rate +* +* @par Description +* Initializes the pull down frame rate state structure based on input frame rate +* +* @param[in] ps_pd_frm_rate +* Pull down frame rate context +* +* @param[in] u4_input_frm_rate +* Input frame rate in frame per 1000sec +* +* @returns none +* +* @remarks +* +******************************************************************************* +*/ +void ih264e_init_pd_frm_rate(pd_frm_rate_handle ps_pd_frm_rate, + UWORD32 u4_input_frm_rate); + +/** +******************************************************************************* +* +* @brief Function to update pull down frame rate +* +* @par Description +* For each frame a run time frame rate value is sent based on whether a frame +* is skipped or not. If it is skipped for pull down then the current frame +* rate for the pull down period is signaled as 4/5th of the original frame +* rate. Thus when this is averaged the frame rate gradually switches from the +* input frame rate to 4/5th of input frame rate as and when more 3:2 pull +* down patterns are detected +* +* @param[in] ps_pd_frm_rate +* Pull down frame rate context +* +* @param[in] u4_input_frm_rate +* Input frame rate in frame per 1000sec +* +* @returns none +* +* @remarks +* +******************************************************************************* +*/ +void ih264e_update_pd_frm_rate(pd_frm_rate_handle ps_pd_frm_rate, + UWORD32 u4_cur_frm_rate); + +/** +******************************************************************************* +* +* @brief returns average frame rate in 1 sec duration +* +* @par Description +* Averages the last N frame in period(1 sec) and then gives that +* as the current frames frame rate. Thus this averages out the sudden +* variation in frame rate +* +* @param[in] ps_pd_frm_rate +* Handle to pull down frame rate context +* +* @returns average frame rate +* +* @remarks +* +******************************************************************************* +*/ +UWORD32 ih264e_get_pd_avg_frm_rate(pd_frm_rate_handle ps_pd_frm_rate); + +#endif /* IH264E_MODIFY_FRM_RATE_H_ */ diff --git a/encoder/ih264e_process.c b/encoder/ih264e_process.c new file mode 100755 index 0000000..9a468e9 --- /dev/null +++ b/encoder/ih264e_process.c @@ -0,0 +1,2369 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +******************************************************************************* +* @file +* ih264e_process.c +* +* @brief +* Contains functions for codec thread +* +* @author +* Harish +* +* @par List of Functions: +* - ih264e_generate_sps_pps() +* - ih264e_init_entropy_ctxt() +* - ih264e_entropy() +* - ih264e_pack_header_data() +* - ih264e_update_proc_ctxt() +* - ih264e_init_proc_ctxt() +* - ih264e_pad_recon_buffer() +* - ih264e_dblk_pad_hpel_processing_n_mbs() +* - ih264e_process() +* - ih264e_set_rc_pic_params() +* - ih264e_update_rc_post_enc() +* - ih264e_process_thread() +* +* @remarks +* None +* +******************************************************************************* +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <limits.h> +#include <assert.h> + +/* User include files */ +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" +#include "ih264_defs.h" +#include "ih264_debug.h" +#include "ime_distortion_metrics.h" +#include "ime_structs.h" +#include "ih264_defs.h" +#include "ih264_error.h" +#include "ih264_structs.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include "ih264_inter_pred_filters.h" +#include "ih264_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" +#include "ih264_platform_macros.h" +#include "ih264_macros.h" +#include "ih264_error.h" +#include "ih264_buf_mgr.h" +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "ih264_structs.h" +#include "ih264_common_tables.h" +#include "ih264_list.h" +#include "ih264e_defs.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "ih264e_rate_control.h" +#include "ih264e_structs.h" +#include "ih264e_process.h" +#include "ithread.h" +#include "ih264e_intra_modes_eval.h" +#include "ih264e_encode_header.h" +#include "ih264e_globals.h" +#include "ih264e_config.h" +#include "ih264e_trace.h" +#include "ih264e_statistics.h" +#include "ih264_cavlc_tables.h" +#include "ih264e_cavlc.h" +#include "ih264e_deblk.h" +#include "ih264e_me.h" +#include "ih264e_debug.h" +#include "ih264e_process.h" +#include "ih264e_master.h" +#include "ih264e_utils.h" +#include "irc_mem_req_and_acq.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "irc_rate_control_api.h" +#include "ih264e_platform_macros.h" +#include "ih264_padding.h" +#include "ime_statistics.h" + + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + +/** +****************************************************************************** +* +* @brief This function generates sps, pps set on request +* +* @par Description +* When the encoder is set in header generation mode, the following function +* is called. This generates sps and pps headers and returns the control back +* to caller. +* +* @param[in] ps_codec +* pointer to codec context +* +* @return success or failure error code +* +****************************************************************************** +*/ +IH264E_ERROR_T ih264e_generate_sps_pps(codec_t *ps_codec) +{ + /* choose between ping-pong process buffer set */ + WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt & 1; + + /* entropy ctxt */ + entropy_ctxt_t *ps_entropy = &ps_codec->as_process[ctxt_sel * MAX_PROCESS_THREADS].s_entropy; + + /* Bitstream structure */ + bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm; + + /* sps */ + sps_t *ps_sps = NULL; + + /* pps */ + pps_t *ps_pps = NULL; + + /* output buff */ + out_buf_t *ps_out_buf = &ps_codec->as_out_buf[ctxt_sel]; + + + /********************************************************************/ + /* initialize the bit stream buffer */ + /********************************************************************/ + ih264e_bitstrm_init(ps_bitstrm, ps_out_buf->s_bits_buf.pv_buf, ps_out_buf->s_bits_buf.u4_bufsize); + + /********************************************************************/ + /* BEGIN HEADER GENERATION */ + /********************************************************************/ + /*ps_codec->i4_pps_id ++;*/ + ps_codec->i4_pps_id %= MAX_PPS_CNT; + + /*ps_codec->i4_sps_id ++;*/ + ps_codec->i4_sps_id %= MAX_SPS_CNT; + + /* populate sps header */ + ps_sps = ps_codec->ps_sps_base + ps_codec->i4_sps_id; + ih264e_populate_sps(ps_codec, ps_sps); + + /* populate pps header */ + ps_pps = ps_codec->ps_pps_base + ps_codec->i4_pps_id; + ih264e_populate_pps(ps_codec, ps_pps); + + ps_entropy->i4_error_code = IH264E_SUCCESS; + + /* generate sps */ + ps_entropy->i4_error_code |= ih264e_generate_sps(ps_bitstrm, ps_sps); + + /* generate pps */ + ps_entropy->i4_error_code |= ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps); + + /* queue output buffer */ + ps_out_buf->s_bits_buf.u4_bytes = ps_bitstrm->u4_strm_buf_offset; + + return ps_entropy->i4_error_code; +} + +/** +******************************************************************************* +* +* @brief initialize entropy context. +* +* @par Description: +* Before invoking the call to perform to entropy coding the entropy context +* associated with the job needs to be initialized. This involves the start +* mb address, end mb address, slice index and the pointer to location at +* which the mb residue info and mb header info are packed. +* +* @param[in] ps_proc +* Pointer to the current process context +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +IH264E_ERROR_T ih264e_init_entropy_ctxt(process_ctxt_t *ps_proc) +{ + /* codec context */ + codec_t *ps_codec = ps_proc->ps_codec; + + /* entropy ctxt */ + entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy; + + /* start address */ + ps_entropy->i4_mb_start_add = ps_entropy->i4_mb_y * ps_entropy->i4_wd_mbs + ps_entropy->i4_mb_x; + + /* end address */ + ps_entropy->i4_mb_end_add = ps_entropy->i4_mb_start_add + ps_entropy->i4_mb_cnt; + + /* slice index */ + ps_entropy->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_entropy->i4_mb_start_add]; + + /* sof */ + /* @ start of frame or start of a new slice, set sof flag */ + if (ps_entropy->i4_mb_start_add == 0) + { + ps_entropy->i4_sof = 1; + } + + if (ps_entropy->i4_mb_x == 0) + { + /* packed mb coeff data */ + ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) + + ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data; + + /* packed mb header data */ + ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) + + ps_entropy->i4_mb_y * ps_codec->u4_size_header_data; + } + + return IH264E_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief entry point for entropy coding +* +* @par Description +* This function calls lower level functions to perform entropy coding for a +* group (n rows) of mb's. After encoding 1 row of mb's, the function takes +* back the control, updates the ctxt and calls lower level functions again. +* This process is repeated till all the rows or group of mb's (which ever is +* minimum) are coded +* +* @param[in] ps_proc +* process context +* +* @returns error status +* +* @remarks +* +******************************************************************************* +*/ +#define GET_NUM_BITS(ps_bitstream) ((ps_bitstream->u4_strm_buf_offset << 3) + WORD_SIZE - ps_bitstream->i4_bits_left_in_cw) + +IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc) +{ + /* codec context */ + codec_t *ps_codec = ps_proc->ps_codec; + + /* entropy context */ + entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy; + + /* sps */ + sps_t *ps_sps = ps_entropy->ps_sps_base + (ps_entropy->u4_sps_id % MAX_SPS_CNT); + + /* pps */ + pps_t *ps_pps = ps_entropy->ps_pps_base + (ps_entropy->u4_pps_id % MAX_PPS_CNT); + + /* slice header */ + slice_header_t *ps_slice_hdr = ps_entropy->ps_slice_hdr_base + (ps_entropy->i4_cur_slice_idx % MAX_SLICE_HDR_CNT); + + /* slice type */ + WORD32 i4_slice_type = ps_proc->i4_slice_type; + + /* Bitstream structure */ + bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm; + + /* output buff */ + out_buf_t s_out_buf; + + /* proc map */ + UWORD8 *pu1_proc_map; + + /* entropy map */ + UWORD8 *pu1_entropy_map_curr; + + /* proc base idx */ + WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt & 1; + + /* temp var */ + WORD32 i4_wd_mbs, i4_ht_mbs; + UWORD32 u4_mb_cnt, u4_mb_idx, u4_mb_end_idx; + + /********************************************************************/ + /* BEGIN INIT */ + /********************************************************************/ + + /* entropy encode start address */ + u4_mb_idx = ps_entropy->i4_mb_start_add; + + /* entropy encode end address */ + u4_mb_end_idx = ps_entropy->i4_mb_end_add; + + /* width in mbs */ + i4_wd_mbs = ps_entropy->i4_wd_mbs; + + /* height in mbs */ + i4_ht_mbs = ps_entropy->i4_ht_mbs; + + /* total mb cnt */ + u4_mb_cnt = i4_wd_mbs * i4_ht_mbs; + + /* proc map */ + pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs; + + /* entropy map */ + pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs; + + /********************************************************************/ + /* @ start of frame / slice, */ + /* initialize the output buffer, */ + /* initialize the bit stream buffer, */ + /* check if sps and pps headers have to be generated, */ + /* populate and generate slice header */ + /********************************************************************/ + if (ps_entropy->i4_sof) + { + /********************************************************************/ + /* initialize the output buffer */ + /********************************************************************/ + s_out_buf = ps_codec->as_out_buf[ctxt_sel]; + + /* is last frame to encode */ + s_out_buf.u4_is_last = ps_entropy->u4_is_last; + + /* frame idx */ + s_out_buf.u4_timestamp_high = ps_entropy->u4_timestamp_high; + s_out_buf.u4_timestamp_low = ps_entropy->u4_timestamp_low; + + /********************************************************************/ + /* initialize the bit stream buffer */ + /********************************************************************/ + ih264e_bitstrm_init(ps_bitstrm, s_out_buf.s_bits_buf.pv_buf, s_out_buf.s_bits_buf.u4_bufsize); + + /********************************************************************/ + /* BEGIN HEADER GENERATION */ + /********************************************************************/ + if (1 == ps_entropy->i4_gen_header) + { + /* generate sps */ + ps_entropy->i4_error_code |= ih264e_generate_sps(ps_bitstrm, ps_sps); + + /* generate pps */ + ps_entropy->i4_error_code |= ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps); + + /* reset i4_gen_header */ + ps_entropy->i4_gen_header = 0; + } + + /* populate slice header */ + ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps, ps_sps); + + /* generate slice header */ + ps_entropy->i4_error_code |= ih264e_generate_slice_header(ps_bitstrm, ps_slice_hdr, + ps_pps, ps_sps); + + /* once start of frame / slice is done, you can reset it */ + /* it is the responsibility of the caller to set this flag */ + ps_entropy->i4_sof = 0; + } + + /* begin entropy coding for the mb set */ + while (u4_mb_idx < u4_mb_end_idx) + { + /* init ptrs/indices */ + if (ps_entropy->i4_mb_x == i4_wd_mbs) + { + ps_entropy->i4_mb_y ++; + ps_entropy->i4_mb_x = 0; + + /* packed mb coeff data */ + ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) + + ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data; + + /* packed mb header data */ + ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) + + ps_entropy->i4_mb_y * ps_codec->u4_size_header_data; + + /* proc map */ + pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs; + + /* entropy map */ + pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs; + } + + DEBUG("\nmb indices x, y %d, %d", ps_entropy->i4_mb_x, ps_entropy->i4_mb_y); + ENTROPY_TRACE("mb index x %d", ps_entropy->i4_mb_x); + ENTROPY_TRACE("mb index y %d", ps_entropy->i4_mb_y); + + /* wait until the curr mb is core coded */ + /* The wait for curr mb to be core coded is essential when entropy is launched + * as a separate job + */ + while (1) + { + volatile UWORD8 *pu1_buf1; + WORD32 idx = ps_entropy->i4_mb_x; + + pu1_buf1 = pu1_proc_map + idx; + if(*pu1_buf1) + break; + ithread_yield(); + } + + /* write mb layer */ + ps_codec->pf_write_mb_syntax_layer[i4_slice_type](ps_entropy); + + /* set entropy map */ + pu1_entropy_map_curr[ps_entropy->i4_mb_x] = 1; + + u4_mb_idx ++; + ps_entropy->i4_mb_x ++; + + if (ps_entropy->i4_mb_x == i4_wd_mbs) + { + /* if slices are enabled */ + if (ps_codec->s_cfg.e_slice_mode == IVE_SLICE_MODE_BLOCKS) + { + /* current slice index */ + WORD32 i4_curr_slice_idx = ps_entropy->i4_cur_slice_idx; + + /* slice map */ + UWORD8 *pu1_slice_idx = ps_entropy->pu1_slice_idx; + + /* No need to open a slice at end of frame. The current slice can be closed at the time + * of signaling eof flag. + */ + if ( (u4_mb_idx != u4_mb_cnt) && (i4_curr_slice_idx != pu1_slice_idx[u4_mb_idx])) + { + /* mb skip run */ + if ((i4_slice_type != ISLICE) && *ps_entropy->pi4_mb_skip_run) + { + if (*ps_entropy->pi4_mb_skip_run) + { + PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run, ps_entropy->i4_error_code, "mb skip run"); + *ps_entropy->pi4_mb_skip_run = 0; + } + } + + /* put rbsp trailing bits for the previous slice */ + ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm); + + /* update slice header pointer */ + i4_curr_slice_idx = pu1_slice_idx[u4_mb_idx]; + ps_entropy->i4_cur_slice_idx = i4_curr_slice_idx; + ps_slice_hdr = ps_entropy->ps_slice_hdr_base + (i4_curr_slice_idx % MAX_SLICE_HDR_CNT); + + /* populate slice header */ + ps_entropy->i4_mb_start_add = u4_mb_idx; + ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps, ps_sps); + + /* generate slice header */ + ps_entropy->i4_error_code |= ih264e_generate_slice_header(ps_bitstrm, ps_slice_hdr, + ps_pps, ps_sps); + } + } + + /* Dont execute any further instructions until store synchronization took place */ + DATA_SYNC(); + } + } + + /* check for eof */ + if (u4_mb_idx == u4_mb_cnt) + { + /* set end of frame flag */ + ps_entropy->i4_eof = 1; + } + + if (ps_entropy->i4_eof) + { + /* mb skip run */ + if ((i4_slice_type != ISLICE) && *ps_entropy->pi4_mb_skip_run) + { + if (*ps_entropy->pi4_mb_skip_run) + { + PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run, ps_entropy->i4_error_code, "mb skip run"); + *ps_entropy->pi4_mb_skip_run = 0; + } + } + + /* put rbsp trailing bits */ + ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm); + + /* update current frame stats to rc library */ + if (IVE_RC_NONE != ps_codec->s_cfg.e_rc_mode) + { + /* number of bytes to stuff */ + WORD32 i4_stuff_bytes; + + /* update */ + i4_stuff_bytes = ih264e_update_rc_post_enc(ps_codec, ctxt_sel, ps_proc->i4_pic_cnt); + + /* cbr rc - house keeping */ + if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel]) + { + ps_entropy->ps_bitstrm->u4_strm_buf_offset = 0; + } + else if (i4_stuff_bytes) + { + /* add filler nal units */ + ps_entropy->i4_error_code |= ih264e_add_filler_nal_unit(ps_bitstrm, i4_stuff_bytes); + } + } + + /********************************************************************/ + /* signal the output */ + /********************************************************************/ + ps_codec->as_out_buf[ctxt_sel].s_bits_buf.u4_bytes = ps_entropy->ps_bitstrm->u4_strm_buf_offset; + + DEBUG("entropy status %x", ps_entropy->i4_error_code); + } + + /* allow threads to dequeue entropy jobs */ + ps_codec->au4_entropy_thread_active[ctxt_sel] = 0; + + return ps_entropy->i4_error_code; +} + +/** +******************************************************************************* +* +* @brief Packs header information of a mb in to a buffer +* +* @par Description: +* After the deciding the mode info of a macroblock, the syntax elements +* associated with the mb are packed and stored. The entropy thread unpacks +* this buffer and generates the end bit stream. +* +* @param[in] ps_proc +* Pointer to the current process context +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +IH264E_ERROR_T ih264e_pack_header_data(process_ctxt_t *ps_proc) +{ + /* curr mb type */ + UWORD32 u4_mb_type = ps_proc->u4_mb_type; + + /* pack mb syntax layer of curr mb (used for entropy coding) */ + if (u4_mb_type == I4x4) + { + /* pointer to mb header storage space */ + UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; + + /* temp var */ + WORD32 i4, byte; + + /* mb type plus mode */ + *pu1_ptr++ = (ps_proc->u1_c_i8_mode << 6) + u4_mb_type; + + /* cbp */ + *pu1_ptr++ = ps_proc->u4_cbp; + + /* mb qp delta */ + *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev; + + /* sub mb modes */ + for (i4 = 0; i4 < 16; i4 ++) + { + byte = 0; + + if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] == + ps_proc->au1_intra_luma_mb_4x4_modes[i4]) + { + byte |= 1; + } + else + { + + if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] < + ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4]) + { + byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 1); + } + else + { + byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 1; + } + } + + i4++; + + if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] == + ps_proc->au1_intra_luma_mb_4x4_modes[i4]) + { + byte |= 16; + } + else + { + + if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] < + ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4]) + { + byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 5); + } + else + { + byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 5; + } + } + + *pu1_ptr++ = byte; + } + + /* end of mb layer */ + ps_proc->pv_mb_header_data = pu1_ptr; + } + else if (u4_mb_type == I16x16) + { + /* pointer to mb header storage space */ + UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; + + /* mb type plus mode */ + *pu1_ptr++ = (ps_proc->u1_c_i8_mode << 6) + (ps_proc->u1_l_i16_mode << 4) + u4_mb_type; + + /* cbp */ + *pu1_ptr++ = ps_proc->u4_cbp; + + /* mb qp delta */ + *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev; + + /* end of mb layer */ + ps_proc->pv_mb_header_data = pu1_ptr; + } + else if (u4_mb_type == P16x16) + { + /* pointer to mb header storage space */ + UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; + + WORD16 *i2_mv_ptr; + + /* mb type plus mode */ + *pu1_ptr++ = u4_mb_type; + + /* cbp */ + *pu1_ptr++ = ps_proc->u4_cbp; + + /* mb qp delta */ + *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev; + + i2_mv_ptr = (WORD16 *)pu1_ptr; + + *i2_mv_ptr++ = ps_proc->ps_pu->s_l0_mv.i2_mvx - ps_proc->ps_pred_mv->i2_mvx; + + *i2_mv_ptr++ = ps_proc->ps_pu->s_l0_mv.i2_mvy - ps_proc->ps_pred_mv->i2_mvy; + + /* end of mb layer */ + ps_proc->pv_mb_header_data = i2_mv_ptr; + } + else if (u4_mb_type == PSKIP) + { + /* pointer to mb header storage space */ + UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; + + /* mb type plus mode */ + *pu1_ptr++ = u4_mb_type; + + /* end of mb layer */ + ps_proc->pv_mb_header_data = pu1_ptr; + } + + return IH264E_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief update process context after encoding an mb. This involves preserving +* the current mb information for later use, initialize the proc ctxt elements to +* encode next mb. +* +* @par Description: +* This function performs house keeping tasks after encoding an mb. +* After encoding an mb, various elements of the process context needs to be +* updated to encode the next mb. For instance, the source, recon and reference +* pointers, mb indices have to be adjusted to the next mb. The slice index of +* the current mb needs to be updated. If mb qp modulation is enabled, then if +* the qp changes the quant param structure needs to be updated. Also to encoding +* the next mb, the current mb info is used as part of mode prediction or mv +* prediction. Hence the current mb info has to preserved at top/top left/left +* locations. +* +* @param[in] ps_proc +* Pointer to the current process context +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +WORD32 ih264e_update_proc_ctxt(process_ctxt_t *ps_proc) +{ + /* error status */ + WORD32 error_status = IH264_SUCCESS; + + /* codec context */ + codec_t *ps_codec = ps_proc->ps_codec; + + /* curr mb indices */ + WORD32 i4_mb_x = ps_proc->i4_mb_x; + WORD32 i4_mb_y = ps_proc->i4_mb_y; + + /* mb syntax elements of neighbors */ + mb_info_t *ps_left_syn = &ps_proc->s_left_mb_syntax_ele; + mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + i4_mb_x; + mb_info_t *ps_top_left_syn = &ps_proc->s_top_left_mb_syntax_ele; + + /* curr mb type */ + UWORD32 u4_mb_type = ps_proc->u4_mb_type; + + /* curr mb type */ + UWORD32 u4_is_intra = ps_proc->u4_is_intra; + + /* width in mbs */ + WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs; + + /*height in mbs*/ + WORD32 i4_ht_mbs = ps_proc->i4_ht_mbs; + + /* proc map */ + UWORD8 *pu1_proc_map = ps_proc->pu1_proc_map + (i4_mb_y * i4_wd_mbs); + + /* deblk context */ + deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt; + + /* deblk bs context */ + bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt); + + /* top row motion vector info */ + enc_pu_t *ps_top_row_pu = ps_proc->ps_top_row_pu + i4_mb_x; + + /* top left mb motion vector */ + enc_pu_t *ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu; + + /* left mb motion vector */ + enc_pu_t *ps_left_mb_pu = &ps_proc->s_left_mb_pu; + + /* sub mb modes */ + UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (i4_mb_x << 4); + +// /* zero mv */ +// mv_t zero_mv = {0, 0}; + + /* Pad the MB to support non standard sizes */ + UWORD32 u4_pad_right_sz = ps_codec->s_cfg.u4_wd - ps_codec->s_cfg.u4_disp_wd; + UWORD32 u4_pad_bottom_sz = ps_codec->s_cfg.u4_ht - ps_codec->s_cfg.u4_disp_ht; + + /*************************************************************/ + /* During MV prediction, when top right mb is not available, */ + /* top left mb info. is used for prediction. Hence the curr */ + /* top, which will be top left for the next mb needs to be */ + /* preserved before updating it with curr mb info. */ + /*************************************************************/ + + /* mb type, mb class, csbp */ + *ps_top_left_syn = *ps_top_syn; + + if (ps_proc->i4_slice_type == PSLICE) + { + /*****************************************/ + /* update top left with top info results */ + /*****************************************/ + + /* mv */ + *ps_top_left_mb_pu = *ps_top_row_pu; + } + + /*************************************************/ + /* update top and left with curr mb info results */ + /*************************************************/ + + /* mb type */ + ps_left_syn->u2_mb_type = ps_top_syn->u2_mb_type = u4_mb_type; + + /* mb class */ + ps_left_syn->u2_is_intra = ps_top_syn->u2_is_intra = u4_is_intra; + + /* csbp */ + ps_left_syn->u4_csbp = ps_top_syn->u4_csbp = ps_proc->u4_csbp; + + /* distortion */ + ps_left_syn->i4_mb_distortion = ps_top_syn->i4_mb_distortion = ps_proc->i4_mb_distortion; + + if (u4_is_intra) + { + /* mb / sub mb modes */ + if (I16x16 == u4_mb_type) + { + pu1_top_mb_intra_modes[0] = ps_proc->au1_left_mb_intra_modes[0] = ps_proc->u1_l_i16_mode; + } + else if (I4x4 == u4_mb_type) + { + ps_codec->pf_mem_cpy_mul8(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16); + ps_codec->pf_mem_cpy_mul8(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16); + } + else if (I8x8 == u4_mb_type) + { + memcpy(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4); + memcpy(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4); + } + + if (ps_proc->i4_slice_type == PSLICE) + { + /* mv */ + *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu); + +// /* reset ngbr mv's */ +// ps_top_row_pu->i1_l0_ref_idx = -1; +// ps_top_row_pu->s_l0_mv = zero_mv; +// +// *ps_left_mb_pu = *ps_top_row_pu; + } + } + else + { + /* mv */ + *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu); + } + + /* + * Mark that the MB has been coded intra + * So that future AIRs can skip it + */ + ps_proc->pu1_is_intra_coded[i4_mb_x + (i4_mb_y * i4_wd_mbs)] = u4_is_intra; + + /**************************************************/ + /* pack mb header info. for entropy coding */ + /**************************************************/ + ih264e_pack_header_data(ps_proc); + + /* update previous mb qp */ + ps_proc->u4_mb_qp_prev = ps_proc->u4_mb_qp; + + /* store qp */ + ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp; + + /* + * We need to sync the cache to make sure that the nmv content of proc + * is updated to cache properly + */ + DATA_SYNC(); + + /* Just before finishing the row, enqueue the job in to entropy queue. + * The master thread depending on its convenience shall dequeue it and + * performs entropy. + * + * WARN !! Placing this block post proc map update can cause queuing of + * entropy jobs in out of order. + */ + if (i4_mb_x == i4_wd_mbs - 1) + { + /* job structures */ + job_t s_job; + + /* job class */ + s_job.i4_cmd = CMD_ENTROPY; + + /* number of mbs to be processed in the current job */ + s_job.i2_mb_cnt = ps_codec->s_cfg.i4_wd_mbs; + + /* job start index x */ + s_job.i2_mb_x = 0; + + /* job start index y */ + s_job.i2_mb_y = ps_proc->i4_mb_y; + + /* proc base idx */ + s_job.i2_proc_base_idx = (ps_codec->i4_encode_api_call_cnt & 1) ? (MAX_PROCESS_CTXT / 2): 0 ; + + /* queue the job */ + error_status |= ih264_list_queue(ps_proc->pv_entropy_jobq, &s_job, 1); + + if(ps_proc->i4_mb_y == (i4_ht_mbs - 1)) + ih264_list_terminate(ps_codec->pv_entropy_jobq); + } + + /* update proc map */ + pu1_proc_map[i4_mb_x] = 1; + + /**************************************************/ + /* update proc ctxt elements for encoding next mb */ + /**************************************************/ + /* update indices */ + i4_mb_x ++; + ps_proc->i4_mb_x = i4_mb_x; + + if (ps_proc->i4_mb_x == i4_wd_mbs) + { + ps_proc->i4_mb_y++; + ps_proc->i4_mb_x = 0; + } + + /* update slice index */ + ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_proc->i4_mb_y * i4_wd_mbs + ps_proc->i4_mb_x]; + + /* update buffers pointers */ + ps_proc->pu1_src_buf_luma += MB_SIZE; + ps_proc->pu1_rec_buf_luma += MB_SIZE; + ps_proc->pu1_ref_buf_luma += MB_SIZE; + + /* + * Note: Although chroma mb size is 8, as the chroma buffers are interleaved, + * the stride per MB is MB_SIZE + */ + ps_proc->pu1_src_buf_chroma += MB_SIZE; + ps_proc->pu1_rec_buf_chroma += MB_SIZE; + ps_proc->pu1_ref_buf_chroma += MB_SIZE; + + /* pad right edge */ + if (u4_pad_right_sz && (ps_proc->i4_mb_x == i4_wd_mbs - 1)) + { + ih264_pad_right_luma( + ps_proc->pu1_src_buf_luma + MB_SIZE - u4_pad_right_sz, + ps_proc->i4_src_strd, MB_SIZE, u4_pad_right_sz); + + ih264_pad_right_chroma( + ps_proc->pu1_src_buf_chroma + MB_SIZE - u4_pad_right_sz, + ps_proc->i4_src_strd, BLK8x8SIZE, u4_pad_right_sz); + } + + /* pad bottom edge */ + if (u4_pad_bottom_sz && (ps_proc->i4_mb_y == i4_ht_mbs - 1) && + ps_proc->i4_mb_x != 0) + { + ih264_pad_bottom(ps_proc->pu1_src_buf_luma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_strd, + ps_proc->i4_src_strd, MB_SIZE, u4_pad_bottom_sz); + + ih264_pad_bottom(ps_proc->pu1_src_buf_chroma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_strd / 2, + ps_proc->i4_src_strd, MB_SIZE, (u4_pad_bottom_sz / 2)); + } + + /* Reset cost, distortion params */ + ps_proc->i4_mb_cost = INT_MAX; + ps_proc->i4_mb_distortion = SHRT_MAX; + + ps_proc->ps_pu += *ps_proc->pu4_mb_pu_cnt; + + ps_proc->pu4_mb_pu_cnt += 1; + + /* deblk ctxts */ + if (ps_proc->u4_disable_deblock_level != 1) + { + /* indices */ + ps_bs->i4_mb_x = ps_proc->i4_mb_x; + ps_bs->i4_mb_y = ps_proc->i4_mb_y; + +#ifndef N_MB_ENABLE /* For N MB processing update take place inside deblocking function */ + ps_deblk->i4_mb_x ++; + + ps_deblk->pu1_cur_pic_luma += MB_SIZE; + /* + * Note: Although chroma mb size is 8, as the chroma buffers are interleaved, + * the stride per MB is MB_SIZE + */ + ps_deblk->pu1_cur_pic_chroma += MB_SIZE; +#endif + } + + return error_status; +} + +/** +******************************************************************************* +* +* @brief initialize process context. +* +* @par Description: +* Before dispatching the current job to process thread, the process context +* associated with the job is initialized. Usually every job aims to encode one +* row of mb's. Basing on the row indices provided by the job, the process +* context's buffer ptrs, slice indices and other elements that are necessary +* during core-coding are initialized. +* +* @param[in] ps_proc +* Pointer to the current process context +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc) +{ + /* codec context */ + codec_t *ps_codec = ps_proc->ps_codec; + + /* nmb processing context*/ + n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt; + + /* indices */ + WORD32 i4_mb_x, i4_mb_y; + + /* strides */ + WORD32 i4_src_strd = ps_proc->i4_src_strd; + WORD32 i4_rec_strd = ps_proc->i4_rec_strd; + + /* quant params */ + quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0]; + + /* deblk ctxt */ + deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt; + + /* deblk bs context */ + bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt); + + /* Pointer to mv_buffer of current frame */ + mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf; + + /* Pointers for color space conversion */ + UWORD8 *pu1_y_buf_base, *pu1_u_buf_base, *pu1_v_buf_base; + + /* Pad the MB to support non standard sizes */ + UWORD32 u4_pad_bottom_sz = ps_codec->s_cfg.u4_ht - ps_codec->s_cfg.u4_disp_ht; + + /********************************************************************/ + /* BEGIN INIT */ + /********************************************************************/ + + i4_mb_x = ps_proc->i4_mb_x; + i4_mb_y = ps_proc->i4_mb_y; + + /* Number of mbs processed in one loop of process function */ + ps_proc->i4_nmb_ntrpy = (ps_proc->i4_wd_mbs > MAX_NMB) ? MAX_NMB : ps_proc->i4_wd_mbs; + ps_proc->u4_nmb_me = (ps_proc->i4_wd_mbs > MAX_NMB)? MAX_NMB : ps_proc->i4_wd_mbs; + + /* init buffer pointers */ + ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_src_strd * (i4_mb_y * MB_SIZE); + ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_src_strd * (i4_mb_y * BLK8x8SIZE); + ps_proc->pu1_rec_buf_luma = ps_proc->pu1_rec_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE); + ps_proc->pu1_rec_buf_chroma = ps_proc->pu1_rec_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE); + ps_proc->pu1_ref_buf_luma = ps_proc->pu1_ref_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE); + ps_proc->pu1_ref_buf_chroma = ps_proc->pu1_ref_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE); + + /* + * Do color space conversion + * NOTE : We assume there that the number of MB's to process will not span multiple rows + */ + switch (ps_codec->s_cfg.e_inp_color_fmt) + { + case IV_YUV_420SP_UV: + case IV_YUV_420SP_VU: + break; + + case IV_YUV_420P : + pu1_y_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) + + ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE); + + pu1_u_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) + + ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE); + + pu1_v_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[2] + (i4_mb_x * BLK8x8SIZE) + + ps_proc->s_inp_buf.s_raw_buf.au4_strd[2] * (i4_mb_y * BLK8x8SIZE); + + ps_codec->pf_ih264e_conv_420p_to_420sp( + pu1_y_buf_base, pu1_u_buf_base, pu1_v_buf_base, + ps_proc->pu1_src_buf_luma, + ps_proc->pu1_src_buf_chroma, MB_SIZE, + ps_proc->i4_wd_mbs * MB_SIZE, + ps_proc->s_inp_buf.s_raw_buf.au4_strd[0], + ps_proc->s_inp_buf.s_raw_buf.au4_strd[1], + ps_proc->s_inp_buf.s_raw_buf.au4_strd[2], + ps_proc->i4_src_strd, ps_proc->i4_src_strd, 1); + break; + + case IV_YUV_422ILE : + pu1_y_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE * 2) + + ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE); + + ps_codec->pf_ih264e_fmt_conv_422i_to_420sp( + ps_proc->pu1_src_buf_luma, + ps_proc->pu1_src_buf_chroma, + ps_proc->pu1_src_buf_chroma + 1, pu1_y_buf_base, + ps_proc->i4_wd_mbs * MB_SIZE, MB_SIZE, + ps_proc->i4_src_strd, ps_proc->i4_src_strd, + ps_proc->i4_src_strd, + ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] >> 1); + break; + + default: + break; + } + + /* pad bottom edge */ + if (u4_pad_bottom_sz && (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) && ps_proc->i4_mb_x == 0) + { + ih264_pad_bottom(ps_proc->pu1_src_buf_luma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_strd, + ps_proc->i4_src_strd, MB_SIZE, u4_pad_bottom_sz); + + ih264_pad_bottom(ps_proc->pu1_src_buf_chroma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_strd / 2, + ps_proc->i4_src_strd, MB_SIZE, (u4_pad_bottom_sz / 2)); + } + + /* packed mb coeff data */ + ps_proc->pv_mb_coeff_data = ((UWORD8 *)ps_proc->pv_pic_mb_coeff_data) + i4_mb_y * ps_codec->u4_size_coeff_data; + + /* packed mb header data */ + ps_proc->pv_mb_header_data = ((UWORD8 *)ps_proc->pv_pic_mb_header_data) + i4_mb_y * ps_codec->u4_size_header_data; + + /* slice index */ + ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[i4_mb_y * ps_proc->i4_wd_mbs + i4_mb_x]; + + /*********************************************************************/ + /* ih264e_init_quant_params() routine is called at the pic init level*/ + /* this would have initialized the qp. */ + /* TODO_LATER: currently it is assumed that quant params donot change*/ + /* across mb's. When they do calculate update ps_qp_params accordingly*/ + /*********************************************************************/ + + /* init mv buffer ptr */ + ps_proc->ps_pu = ps_cur_mv_buf->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs * (MIN_PU_SIZE * MIN_PU_SIZE)); + + if (i4_mb_y == 0) + { + ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu; + } + else + { + ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu + ((i4_mb_y - 1) * ps_proc->i4_wd_mbs * (MIN_PU_SIZE * MIN_PU_SIZE)); + } + + ps_proc->pu4_mb_pu_cnt = ps_cur_mv_buf->pu4_mb_pu_cnt + (i4_mb_y * ps_proc->i4_wd_mbs); + + /* mb type */ + ps_proc->u4_mb_type = I16x16; + + /* lambda */ + ps_proc->u4_lambda = gu1_qp0[ps_qp_params->u1_mb_qp]; + + /* mb distortion */ + ps_proc->i4_mb_distortion = SHRT_MAX; + + if (i4_mb_x == 0) + { + ps_proc->s_left_mb_syntax_ele.i4_mb_distortion = 0; + + ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion = 0; + + ps_proc->s_top_left_mb_syntax_ME.i4_mb_distortion = 0; + + if (i4_mb_y == 0) + { + memset(ps_proc->ps_top_row_mb_syntax_ele, 0, (ps_proc->i4_wd_mbs + 1)*sizeof(mb_info_t)); + } + } + + /* mb cost */ + ps_proc->i4_mb_cost = INT_MAX; + + /**********************/ + /* init deblk context */ + /**********************/ + ps_deblk->i4_mb_x = ps_proc->i4_mb_x; + /* deblk lags the current mb proc by 1 row */ + /* NOTE: Intra prediction has to happen with non deblocked samples used as reference */ + /* Hence to deblk MB 0 of row 0, you have wait till MB 0 of row 1 is encoded. */ + /* For simplicity, we chose to lag deblking by 1 Row wrt to proc */ + ps_deblk->i4_mb_y = ps_proc->i4_mb_y - 1; + + /* buffer ptrs */ + ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + i4_rec_strd * (ps_deblk->i4_mb_y * MB_SIZE); + ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + i4_rec_strd * (ps_deblk->i4_mb_y * BLK8x8SIZE); + + /* init deblk bs context */ + /* mb indices */ + ps_bs->i4_mb_x = ps_proc->i4_mb_x; + ps_bs->i4_mb_y = ps_proc->i4_mb_y; + + /* init n_mb_process context */ + ps_n_mb_ctxt->i4_mb_x = 0; + ps_n_mb_ctxt->i4_mb_y = ps_deblk->i4_mb_y; + ps_n_mb_ctxt->i4_n_mbs = ps_proc->i4_nmb_ntrpy; + + return IH264E_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief This function performs luma & chroma padding +* +* @par Description: +* +* @param[in] ps_proc +* Process context corresponding to the job +* +* @param[in] pu1_curr_pic_luma +* Pointer to luma buffer +* +* @param[in] pu1_curr_pic_chroma +* Pointer to chroma buffer +* +* @param[in] i4_mb_x +* mb index x +* +* @param[in] i4_mb_y +* mb index y +* +* @param[in] i4_pad_ht +* number of rows to be padded +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +IH264E_ERROR_T ih264e_pad_recon_buffer(process_ctxt_t *ps_proc, + UWORD8 *pu1_curr_pic_luma, + UWORD8 *pu1_curr_pic_chroma, + WORD32 i4_mb_x, + WORD32 i4_mb_y, + WORD32 i4_pad_ht) +{ + /* codec context */ + codec_t *ps_codec = ps_proc->ps_codec; + + /* strides */ + WORD32 i4_rec_strd = ps_proc->i4_rec_strd; + + if (i4_mb_x == 0) + { + /* padding left luma */ + ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, i4_pad_ht, PAD_LEFT); + + /* padding left chroma */ + ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, i4_pad_ht >> 1, PAD_LEFT); + } + else if (i4_mb_x == ps_proc->i4_wd_mbs - 1) + { + /* padding right luma */ + ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, i4_pad_ht, PAD_RIGHT); + + /* padding right chroma */ + ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, i4_pad_ht >> 1, PAD_RIGHT); + + if (i4_mb_y == ps_proc->i4_ht_mbs - 1) + { + UWORD8 *pu1_rec_luma = pu1_curr_pic_luma + MB_SIZE + PAD_RIGHT + ((i4_pad_ht - 1) * i4_rec_strd); + UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma + MB_SIZE + PAD_RIGHT + (((i4_pad_ht >> 1) - 1) * i4_rec_strd); + + /* padding bottom luma */ + ps_codec->pf_pad_bottom(pu1_rec_luma, i4_rec_strd, i4_rec_strd, PAD_BOT); + + /* padding bottom chroma */ + ps_codec->pf_pad_bottom(pu1_rec_chroma, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1)); + } + } + + if (i4_mb_y == 0) + { + UWORD8 *pu1_rec_luma = pu1_curr_pic_luma; + UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma; + WORD32 wd = MB_SIZE; + + if (i4_mb_x == 0) + { + pu1_rec_luma -= PAD_LEFT; + pu1_rec_chroma -= PAD_LEFT; + + wd += PAD_LEFT; + } + else if (i4_mb_x == ps_proc->i4_wd_mbs - 1) + { + wd += PAD_RIGHT; + } + + /* padding top luma */ + ps_codec->pf_pad_top(pu1_rec_luma, i4_rec_strd, wd, PAD_TOP); + + /* padding top chroma */ + ps_codec->pf_pad_top(pu1_rec_chroma, i4_rec_strd, wd, (PAD_TOP >> 1)); + } + + return IH264E_SUCCESS; +} + + + + +/** +******************************************************************************* +* +* @brief This function performs deblocking, padding and halfpel generation for +* 'n' MBs +* +* @par Description: +* +* @param[in] ps_proc +* Process context corresponding to the job +* +* @param[in] pu1_curr_pic_luma +* Current MB being processed(Luma) +* +* @param[in] pu1_curr_pic_chroma +* Current MB being processed(Chroma) +* +* @param[in] i4_mb_x +* Column value of current MB processed +* +* @param[in] i4_mb_y +* Curent row processed +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +IH264E_ERROR_T ih264e_dblk_pad_hpel_processing_n_mbs(process_ctxt_t *ps_proc, + UWORD8 *pu1_curr_pic_luma, + UWORD8 *pu1_curr_pic_chroma, + WORD32 i4_mb_x, + WORD32 i4_mb_y) +{ + /* codec context */ + codec_t *ps_codec = ps_proc->ps_codec; + + /* n_mb processing context */ + n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt; + + /* deblk context */ + deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt; + + /* strides */ + WORD32 i4_rec_strd = ps_proc->i4_rec_strd; + + /* loop variables */ + WORD32 row, i, j, col; + + /* Padding Width */ + UWORD32 u4_pad_wd; + + /* deblk_map of the row being deblocked */ + UWORD8 *pu1_deblk_map = ps_proc->pu1_deblk_map + ps_deblk->i4_mb_y * ps_proc->i4_wd_mbs; + + /* deblk_map_previous row */ + UWORD8 *pu1_deblk_map_prev_row = pu1_deblk_map - ps_proc->i4_wd_mbs; + + WORD32 u4_pad_top = 0; + + WORD32 u4_deblk_prev_row = 0; + + /* Number of mbs to be processed */ + WORD32 i4_n_mbs = ps_n_mb_ctxt->i4_n_mbs; + + /* Number of mbs actually processed + * (at the end of a row, when remaining number of MBs are less than i4_n_mbs) */ + WORD32 i4_n_mb_process_count = 0; + + UWORD8 *pu1_pad_bottom_src = NULL; + + UWORD8 *pu1_pad_src_luma = NULL; + UWORD8 *pu1_pad_src_chroma = NULL; + + if (ps_proc->u4_disable_deblock_level == 1) + { + /* If left most MB is processed, then pad left */ + if (i4_mb_x == 0) + { + /* padding left luma */ + ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, MB_SIZE, PAD_LEFT); + + /* padding left chroma */ + ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT); + } + /*last col*/ + if (i4_mb_x == (ps_proc->i4_wd_mbs - 1)) + { + /* padding right luma */ + ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT); + + /* padding right chroma */ + ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT); + } + } + + if (i4_mb_y > 0) + { + /* if number of mb's to be processed are less than 'N', go back. + * exception to the above clause is end of row */ + if ( ((i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1)) < i4_n_mbs) && (i4_mb_x < (ps_proc->i4_wd_mbs - 1)) ) + { + return IH264E_SUCCESS; + } + else + { + i4_n_mb_process_count = MIN(i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1), i4_n_mbs); + + u4_deblk_prev_row = 1; + + /* checking whether the top rows are deblocked */ + for (col = 0; col < i4_n_mb_process_count; col++) + { + u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + col]; + } + + /* checking whether the top right MB is deblocked */ + if ((ps_deblk->i4_mb_x + i4_n_mb_process_count) != ps_proc->i4_wd_mbs) + { + u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + i4_n_mb_process_count]; + } + + /* performing deblocking for required number of MBs */ + if (ps_proc->u4_disable_deblock_level != 1) + { + /* Top or Top right MBs not deblocked */ + if (u4_deblk_prev_row != 1) + { + return IH264E_SUCCESS; + } + + for (row = 0; row < i4_n_mb_process_count; row++) + { + ih264e_deblock_mb(ps_proc, ps_deblk); + + pu1_deblk_map[ps_deblk->i4_mb_x] = 1; + + if (ps_deblk->i4_mb_y > 0) + { + if (ps_deblk->i4_mb_x == 0)/* If left most MB is processed, then pad left*/ + { + /* padding left luma */ + ps_codec->pf_pad_left_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE, i4_rec_strd, MB_SIZE, PAD_LEFT); + + /* padding left chroma */ + ps_codec->pf_pad_left_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT); + } + + if (ps_deblk->i4_mb_x == (ps_proc->i4_wd_mbs - 1))/*last column*/ + { + /* padding right luma */ + ps_codec->pf_pad_right_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT); + + /* padding right chroma */ + ps_codec->pf_pad_right_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT); + } + } + ps_deblk->i4_mb_x++; + + ps_deblk->pu1_cur_pic_luma += MB_SIZE; + ps_deblk->pu1_cur_pic_chroma += MB_SIZE; + + } + } + else + { + ps_deblk->i4_mb_x += i4_n_mb_process_count; + + ps_deblk->pu1_cur_pic_luma += i4_n_mb_process_count * MB_SIZE; + ps_deblk->pu1_cur_pic_chroma += i4_n_mb_process_count * MB_SIZE; + } + + if (i4_mb_y == 2) + { + u4_pad_wd = i4_n_mb_process_count * MB_SIZE; + u4_pad_top = ps_n_mb_ctxt->i4_mb_x * MB_SIZE; + + if (ps_n_mb_ctxt->i4_mb_x == 0) + { + u4_pad_wd += PAD_LEFT; + u4_pad_top = -PAD_LEFT; + } + + if (i4_mb_x == ps_proc->i4_wd_mbs - 1) + { + u4_pad_wd += PAD_RIGHT; + } + + /* padding top luma */ + ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_luma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, PAD_TOP); + + /* padding top chroma */ + ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_chroma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, (PAD_TOP >> 1)); + } + + ps_n_mb_ctxt->i4_mb_x += i4_n_mb_process_count; + + if (i4_mb_x == ps_proc->i4_wd_mbs - 1) + { + if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) + { + /* Bottom Padding is done in one stretch for the entire width */ + if (ps_proc->u4_disable_deblock_level != 1) + { + ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * MB_SIZE; + + ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * BLK8x8SIZE; + + ps_n_mb_ctxt->i4_mb_x = 0; + ps_n_mb_ctxt->i4_mb_y = ps_proc->i4_mb_y; + ps_deblk->i4_mb_x = 0; + ps_deblk->i4_mb_y = ps_proc->i4_mb_y; + + /* update pic qp map (as update_proc_ctxt is still not called for the last MB) */ + ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp; + + i4_n_mb_process_count = (ps_proc->i4_wd_mbs) % i4_n_mbs; + + j = (ps_proc->i4_wd_mbs) / i4_n_mbs; + + for (i = 0; i < j; i++) + { + for (col = 0; col < i4_n_mbs; col++) + { + ih264e_deblock_mb(ps_proc, ps_deblk); + + pu1_deblk_map[ps_deblk->i4_mb_x] = 1; + + ps_deblk->i4_mb_x++; + ps_deblk->pu1_cur_pic_luma += MB_SIZE; + ps_deblk->pu1_cur_pic_chroma += MB_SIZE; + ps_n_mb_ctxt->i4_mb_x++; + } + } + + for (col = 0; col < i4_n_mb_process_count; col++) + { + ih264e_deblock_mb(ps_proc, ps_deblk); + + pu1_deblk_map[ps_deblk->i4_mb_x] = 1; + + ps_deblk->i4_mb_x++; + ps_deblk->pu1_cur_pic_luma += MB_SIZE; + ps_deblk->pu1_cur_pic_chroma += MB_SIZE; + ps_n_mb_ctxt->i4_mb_x++; + } + + pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd; + + pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd; + + /* padding left luma */ + ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT); + + /* padding left chroma */ + ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT); + + pu1_pad_src_luma += i4_rec_strd * MB_SIZE; + pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE; + + /* padding left luma */ + ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT); + + /* padding left chroma */ + ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT); + + pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE; + + pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE; + + /* padding right luma */ + ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT); + + /* padding right chroma */ + ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT); + + pu1_pad_src_luma += i4_rec_strd * MB_SIZE; + pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE; + + /* padding right luma */ + ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT); + + /* padding right chroma */ + ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT); + + } + + /* padding bottom luma */ + pu1_pad_bottom_src = ps_proc->pu1_rec_buf_luma_base + ps_proc->i4_ht_mbs * MB_SIZE * i4_rec_strd - PAD_LEFT; + ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, PAD_BOT); + + /* padding bottom chroma */ + pu1_pad_bottom_src = ps_proc->pu1_rec_buf_chroma_base + ps_proc->i4_ht_mbs * (MB_SIZE >> 1) * i4_rec_strd - PAD_LEFT; + ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1)); + } + } + } + } + + return IH264E_SUCCESS; +} + + +/** +******************************************************************************* +* +* @brief This function performs luma & chroma core coding for a set of mb's. +* +* @par Description: +* The mb to be coded is taken and is evaluated over a predefined set of modes +* (intra (i16, i4, i8)/inter (mv, skip)) for best cost. The mode with least cost +* is selected and using intra/inter prediction filters, prediction is carried out. +* The deviation between src and pred signal constitutes error signal. This error +* signal is transformed (hierarchical transform if necessary) and quantized. The +* quantized residue is packed in to entropy buffer for entropy coding. This is +* repeated for all the mb's enlisted under the job. +* +* @param[in] ps_proc +* Process context corresponding to the job +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +WORD32 ih264e_process(process_ctxt_t *ps_proc) +{ + /* error status */ + WORD32 error_status = IH264_SUCCESS; + + /* codec context */ + codec_t *ps_codec = ps_proc->ps_codec; + + /* cbp luma, chroma */ + UWORD32 u4_cbp_l, u4_cbp_c; + + /* width in mbs */ + WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs; + + /* loop var */ + WORD32 i4_mb_idx, i4_mb_cnt = ps_proc->i4_mb_cnt; + + /* valid modes */ + UWORD32 u4_valid_modes = 0; + + /* gate threshold */ + WORD32 i4_gate_threshold = 0; + + /* is intra */ + WORD32 luma_idx, chroma_idx, is_intra; + + /* temp variables */ + WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt & 1; + + /* list of modes for evaluation */ + if (ps_proc->i4_slice_type == ISLICE) + { + /* enable intra 16x16 */ + u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0; + + /* enable intra 8x8 */ + u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_8x8 ? (1 << I8x8) : 0; + + /* enable intra 4x4 */ + u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0; + } + else if (ps_proc->i4_slice_type == PSLICE) + { + /* enable intra 16x16 */ + u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0; + + /* enable intra 4x4 */ + if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST) + { + u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0; + } + + /* enable inter 16x16 */ + u4_valid_modes |= (1 << P16x16); + } + + + /* init entropy */ + ps_proc->s_entropy.i4_mb_x = ps_proc->i4_mb_x; + ps_proc->s_entropy.i4_mb_y = ps_proc->i4_mb_y; + ps_proc->s_entropy.i4_mb_cnt = MIN(ps_proc->i4_nmb_ntrpy, i4_wd_mbs - ps_proc->i4_mb_x); + + /* compute recon when : + * 1. current frame is to be used as a reference + * 2. dump recon for bit stream sanity check + */ + ps_proc->u4_compute_recon = ps_codec->u4_is_curr_frm_ref || + ps_codec->s_cfg.u4_enable_recon; + + /* Encode 'n' macroblocks, + * 'n' being the number of mbs dictated by current proc ctxt */ + for (i4_mb_idx = 0; i4_mb_idx < i4_mb_cnt; i4_mb_idx ++) + { + /* since we have not yet found sad, we have not yet got min sad */ + /* we need to initialize these variables for each MB */ + /* TODO how to get the min sad into the codec */ + ps_proc->u4_min_sad = ps_codec->s_cfg.i4_min_sad; + ps_proc->u4_min_sad_reached = 0; + + /* mb analysis */ + { + /* temp var */ + WORD32 i4_mb_id = ps_proc->i4_mb_x + ps_proc->i4_mb_y * i4_wd_mbs; + + /* force intra refresh ? */ + WORD32 i4_air_enable_inter = (ps_codec->s_cfg.e_air_mode == IVE_AIR_MODE_NONE) || + (ps_proc->pu1_is_intra_coded[i4_mb_id] != 0) || + (ps_codec->pu2_intr_rfrsh_map[i4_mb_id] != ps_codec->i4_air_pic_cnt); + + /* evaluate inter 16x16 modes */ + if (u4_valid_modes & (1 << P16x16)) + { + /* compute nmb me */ + if (ps_proc->i4_mb_x % ps_proc->u4_nmb_me == 0) + { + ih264e_compute_me_nmb(ps_proc, MIN((WORD32)ps_proc->u4_nmb_me, + i4_wd_mbs - ps_proc->i4_mb_x)); + } + + /* set pointers to ME data appropriately for other modules to use */ + { + UWORD32 u4_mb_index = ps_proc->i4_mb_x % ps_proc->u4_nmb_me ; + + /* get the min sad condition for current mb */ + ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached; + ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad; + + ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_mb_index].s_skip_mv); + ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_mb_index].s_ngbr_avbl); + ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_mb_index].s_pred_mv); + + ps_proc->i4_mb_distortion = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_distortion; + ps_proc->i4_mb_cost = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_cost; + ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad; + ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached; + ps_proc->u4_mb_type = ps_proc->ps_nmb_info[u4_mb_index].u4_mb_type; + + /* get the best sub pel buffer */ + ps_proc->pu1_best_subpel_buf = ps_proc->ps_nmb_info[u4_mb_index].pu1_best_sub_pel_buf; + ps_proc->u4_bst_spel_buf_strd = ps_proc->ps_nmb_info[u4_mb_index].u4_bst_spel_buf_strd; + } + ih264e_derive_nghbr_avbl_of_mbs(ps_proc); + } + else + { + /* Derive neighbor availability for the current macroblock */ + ps_proc->ps_ngbr_avbl = &ps_proc->s_ngbr_avbl; + + ih264e_derive_nghbr_avbl_of_mbs(ps_proc); + } + + /* + * If air says intra, we need to force the following code path to evaluate intra + * The easy way is just to say that the inter cost is too much + */ + if (!i4_air_enable_inter) + { + ps_proc->u4_min_sad_reached = 0; + ps_proc->i4_mb_cost = INT_MAX; + ps_proc->i4_mb_distortion = INT_MAX; + } + else if (ps_proc->u4_mb_type == PSKIP) + { + goto UPDATE_MB_INFO; + } + + /* wait until the proc of [top + 1] mb is computed. + * We wait till the proc dependencies are satisfied */ + if(ps_proc->i4_mb_y > 0) + { + /* proc map */ + UWORD8 *pu1_proc_map_top; + + pu1_proc_map_top = ps_proc->pu1_proc_map + ((ps_proc->i4_mb_y - 1) * i4_wd_mbs); + + while (1) + { + volatile UWORD8 *pu1_buf; + WORD32 idx = i4_mb_idx + 1; + + idx = MIN(idx, ((WORD32)ps_codec->s_cfg.i4_wd_mbs - 1)); + pu1_buf = pu1_proc_map_top + idx; + if(*pu1_buf) + break; + ithread_yield(); + } + } + + /* If we already have the minimum sad, there is no point in searching for sad again */ + if (ps_proc->u4_min_sad_reached == 0) + { + /* intra gating in inter slices */ + /* No need of gating if we want to force intra, we need to find the threshold only if inter is enabled by AIR*/ + if (i4_air_enable_inter && ps_proc->i4_slice_type == PSLICE && ps_codec->u4_inter_gate) + { + /* distortion of neighboring blocks */ + WORD32 i4_distortion[4]; + + i4_distortion[0] = ps_proc->s_left_mb_syntax_ele.i4_mb_distortion; + + i4_distortion[1] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x].i4_mb_distortion; + + i4_distortion[2] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x + 1].i4_mb_distortion; + + i4_distortion[3] = ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion; + + i4_gate_threshold = (i4_distortion[0] + i4_distortion[1] + i4_distortion[2] + i4_distortion[3]) >> 2; + + } + + /* If we are going to force intra we need to evaluate intra irrespective of gating */ + if ( (!i4_air_enable_inter) || ((i4_gate_threshold + 16 *((WORD32) ps_proc->u4_lambda)) < ps_proc->i4_mb_distortion)) + { + /* evaluate intra 4x4 modes */ + if (u4_valid_modes & (1 << I4x4)) + { + if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST) + { + ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton(ps_proc); + } + else + { + ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff(ps_proc); + } + } + + /* evaluate intra 16x16 modes */ + if (u4_valid_modes & (1 << I16x16)) + { + ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff(ps_proc); + } + + /* evaluate intra 8x8 modes */ + if (u4_valid_modes & (1 << I8x8)) + { + ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff(ps_proc); + } + } + + } + } + + /* is intra */ + if (ps_proc->u4_mb_type == I4x4 || ps_proc->u4_mb_type == I16x16 || ps_proc->u4_mb_type == I8x8) + { + luma_idx = ps_proc->u4_mb_type; + chroma_idx = 0; + is_intra = 1; + + /* evaluate chroma blocks for intra */ + ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(ps_proc); + } + else + { + luma_idx = 3; + chroma_idx = 1; + is_intra = 0; + } + ps_proc->u4_is_intra = is_intra; + + /* redo MV pred of neighbors in the case intra mb */ + /* TODO : currently called unconditionally, needs to be called only in the case of intra + * to modify neighbors */ + if (ps_proc->i4_slice_type != ISLICE) + { + ih264e_mv_pred(ps_proc); + } + + /* Perform luma mb core coding */ + u4_cbp_l = (ps_codec->luma_energy_compaction)[luma_idx](ps_proc); + + /* Perform luma mb core coding */ + u4_cbp_c = (ps_codec->chroma_energy_compaction)[chroma_idx](ps_proc); + + /* coded block pattern */ + ps_proc->u4_cbp = (u4_cbp_c << 4) | u4_cbp_l; + + /* mb skip */ + if (is_intra == 0) + { + if (ps_proc->u4_cbp == 0) + { + /* get skip mv */ + UWORD32 u4_for_me = 0; + ih264e_find_skip_motion_vector(ps_proc,u4_for_me); + + /* skip ? */ + if (ps_proc->ps_skip_mv->i2_mvx == ps_proc->ps_pu->s_l0_mv.i2_mvx && + ps_proc->ps_skip_mv->i2_mvy == ps_proc->ps_pu->s_l0_mv.i2_mvy) + { + ps_proc->u4_mb_type = PSKIP; + } + } + } + +UPDATE_MB_INFO: + + /* Update mb sad, mb qp and intra mb cost. Will be used by rate control */ + ih264e_update_rc_mb_info(&ps_proc->s_frame_info, ps_proc); + + /**********************************************************************/ + /* if disable deblock level is '0' this implies enable deblocking for */ + /* all edges of all macroblocks with out any restrictions */ + /* */ + /* if disable deblock level is '1' this implies disable deblocking for*/ + /* all edges of all macroblocks with out any restrictions */ + /* */ + /* if disable deblock level is '2' this implies enable deblocking for */ + /* all edges of all macroblocks except edges overlapping with slice */ + /* boundaries. This option is not currently supported by the encoder */ + /* hence the slice map should be of no significance to perform debloc */ + /* king */ + /**********************************************************************/ + + if (ps_proc->u4_compute_recon) + { + /* deblk context */ + /* src pointers */ + UWORD8 *pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma; + UWORD8 *pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma; + + /* src indices */ + UWORD32 i4_mb_x = ps_proc->i4_mb_x; + UWORD32 i4_mb_y = ps_proc->i4_mb_y; + + /* compute blocking strength */ + if (ps_proc->u4_disable_deblock_level != 1) + { + ih264e_compute_bs(ps_proc); + } + + /* nmb deblocking and hpel and padding */ + ih264e_dblk_pad_hpel_processing_n_mbs(ps_proc, pu1_cur_pic_luma, + pu1_cur_pic_chroma, i4_mb_x, + i4_mb_y); + } + + /* update the context after for coding next mb */ + error_status |= ih264e_update_proc_ctxt(ps_proc); + + /* Once the last row is processed, mark the buffer status appropriately */ + if (ps_proc->i4_ht_mbs == ps_proc->i4_mb_y) + { + /* Pointer to current picture buffer structure */ + pic_buf_t *ps_cur_pic = ps_proc->ps_cur_pic; + + /* Pointer to current picture's mv buffer structure */ + mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf; + + /**********************************************************************/ + /* if disable deblock level is '0' this implies enable deblocking for */ + /* all edges of all macroblocks with out any restrictions */ + /* */ + /* if disable deblock level is '1' this implies disable deblocking for*/ + /* all edges of all macroblocks with out any restrictions */ + /* */ + /* if disable deblock level is '2' this implies enable deblocking for */ + /* all edges of all macroblocks except edges overlapping with slice */ + /* boundaries. This option is not currently supported by the encoder */ + /* hence the slice map should be of no significance to perform debloc */ + /* king */ + /**********************************************************************/ + error_status |= ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr, ps_cur_mv_buf->i4_buf_id , BUF_MGR_CODEC); + + error_status |= ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_cur_pic->i4_buf_id , BUF_MGR_CODEC); + + if (ps_codec->s_cfg.u4_enable_recon) + { + /* pic cnt */ + ps_codec->as_rec_buf[ctxt_sel].i4_pic_cnt = ps_proc->i4_pic_cnt; + + /* rec buffers */ + ps_codec->as_rec_buf[ctxt_sel].s_pic_buf = *ps_proc->ps_cur_pic; + + /* is last? */ + ps_codec->as_rec_buf[ctxt_sel].u4_is_last = ps_proc->s_entropy.u4_is_last; + + /* frame time stamp */ + ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_high = ps_proc->s_entropy.u4_timestamp_high; + ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_low = ps_proc->s_entropy.u4_timestamp_low; + } + + } + } + + DEBUG_HISTOGRAM_DUMP(ps_codec->s_cfg.i4_ht_mbs == ps_proc->i4_mb_y); + + return error_status; +} + +/** +******************************************************************************* +* +* @brief +* function to receive frame qp and pic type before encoding +* +* @par Description: +* Before encoding the frame, this function calls the rc library for frame qp +* and picture type +* +* @param[in] ps_codec +* Pointer to codec context +* +* @param[in] pic_cnt +* pic count +* +* @param[out] pi4_pic_type +* pic type + +* @returns skip_src +* if the source frame rate and target frame rate are not identical, the encoder +* skips few source frames. skip_src is set when the source need not be encoded. +* +* @remarks none +* +******************************************************************************* +*/ +WORD32 ih264e_set_rc_pic_params(codec_t *ps_codec, WORD32 cur_pic_cnt, WORD32 *pi4_pic_type) +{ + /* rate control context */ + rate_control_ctxt_t *ps_rate_control = &ps_codec->s_rate_control; + + /* frame qp */ + UWORD8 u1_frame_qp; + + /* pic type */ + PIC_TYPE_T pic_type = PIC_NA; + + /* should src be skipped */ + WORD32 skip_src = 0; + + /* temp var */ + WORD32 delta_time_stamp = 1; + + /* see if the app requires any specific frame */ + if (ps_codec->force_curr_frame_type == IV_IDR_FRAME || ps_codec->force_curr_frame_type == IV_I_FRAME) + { + irc_force_I_frame(ps_codec->s_rate_control.pps_rate_control_api); + } + + /* call rate control lib to get curr pic type and qp to be used */ + skip_src = ih264e_rc_pre_enc(ps_rate_control->pps_rate_control_api, + ps_rate_control->pps_pd_frm_rate, + ps_rate_control->pps_time_stamp, + ps_rate_control->pps_frame_time, + delta_time_stamp, + (ps_codec->s_cfg.i4_wd_mbs * ps_codec->s_cfg.i4_ht_mbs), + &ps_rate_control->e_pic_type, + &u1_frame_qp); + + switch (ps_rate_control->e_pic_type) + { + case I_PIC: + pic_type = PIC_I; + break; + + case P_PIC: + pic_type = PIC_P; + break; + + case B_PIC: + pic_type = PIC_B; + break; + + default: + break; + } + + /* is idr? */ + if ((0 == cur_pic_cnt % ps_codec->s_cfg.u4_idr_frm_interval) || + ps_codec->force_curr_frame_type == IV_IDR_FRAME) + { + pic_type = PIC_IDR; + } + + /* force frame tag is not sticky */ + if (ps_codec->force_curr_frame_type == IV_IDR_FRAME || ps_codec->force_curr_frame_type == IV_I_FRAME) + { + ps_codec->force_curr_frame_type = IV_NA_FRAME; + } + + /* qp */ + ps_codec->u4_frame_qp = gau1_mpeg2_to_h264_qmap[u1_frame_qp]; + + /* pic type */ + *pi4_pic_type = pic_type; + + return skip_src; +} + +/** +******************************************************************************* +* +* @brief +* Function to update rc context after encoding +* +* @par Description +* This function updates the rate control context after the frame is encoded. +* Number of bits consumed by the current frame, frame distortion, frame cost, +* number of intra/inter mb's, ... are passed on to rate control context for +* updating the rc model. +* +* @param[in] ps_codec +* Handle to codec context +* +* @param[in] ctxt_sel +* frame context selector +* +* @param[in] pic_cnt +* pic count +* +* @returns i4_stuffing_byte +* number of stuffing bytes (if necessary) +* +* @remarks +* +******************************************************************************* +*/ +WORD32 ih264e_update_rc_post_enc(codec_t *ps_codec, WORD32 ctxt_sel, WORD32 pic_cnt) +{ + /* proc set base idx */ + WORD32 i4_proc_ctxt_sel_base = ctxt_sel ? (MAX_PROCESS_CTXT / 2) : 0; + + /* proc ctxt */ + process_ctxt_t *ps_proc = &ps_codec->as_process[i4_proc_ctxt_sel_base]; + + /* frame qp */ + UWORD8 u1_frame_qp = ps_codec->u4_frame_qp; + + /* cbr rc return status */ + WORD32 i4_stuffing_byte = 0; + + /* current frame stats */ + frame_info_t s_frame_info; + picture_type_e rc_pic_type; + + /* temp var */ + WORD32 i, j; + + /********************************************************************/ + /* BEGIN INIT */ + /********************************************************************/ + + /* init frame info */ + irc_init_frame_info(&s_frame_info); + + /* get frame info */ + for (i = 0; i < (WORD32)ps_codec->s_cfg.u4_num_cores; i++) + { + /*****************************************************************/ + /* One frame can be encoded by max of u4_num_cores threads */ + /* Accumulating the num mbs, sad, qp and intra_mb_cost from */ + /* u4_num_cores threads */ + /*****************************************************************/ + for (j = 0; j< MAX_MB_TYPE; j++) + { + s_frame_info.num_mbs[j] += ps_proc[i].s_frame_info.num_mbs[j]; + + s_frame_info.tot_mb_sad[j] += ps_proc[i].s_frame_info.tot_mb_sad[j]; + + s_frame_info.qp_sum[j] += ps_proc[i].s_frame_info.qp_sum[j]; + } + + s_frame_info.intra_mb_cost_sum += ps_proc[i].s_frame_info.intra_mb_cost_sum; + + s_frame_info.activity_sum += ps_proc[i].s_frame_info.activity_sum; + + /*****************************************************************/ + /* gather number of residue and header bits consumed by the frame*/ + /*****************************************************************/ + ih264e_update_rc_bits_info(&s_frame_info, &ps_proc[i].s_entropy); + } + + /* get pic type */ + switch (ps_codec->pic_type) + { + case PIC_I: + case PIC_IDR: + rc_pic_type = I_PIC; + break; + case PIC_P: + rc_pic_type = P_PIC; + break; + case PIC_B: + rc_pic_type = B_PIC; + break; + default: + assert(0); + break; + } + + /* update rc lib with current frame stats */ + i4_stuffing_byte = ih264e_rc_post_enc(ps_codec->s_rate_control.pps_rate_control_api, + &(s_frame_info), + ps_codec->s_rate_control.pps_pd_frm_rate, + ps_codec->s_rate_control.pps_time_stamp, + ps_codec->s_rate_control.pps_frame_time, + (ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs), + &rc_pic_type, + pic_cnt, + &ps_codec->s_rate_control.post_encode_skip[ctxt_sel], + u1_frame_qp, + &ps_codec->s_rate_control.num_intra_in_prev_frame, + &ps_codec->s_rate_control.i4_avg_activity); + + /* in case the frame needs to be skipped, the frame num should not be incremented */ + if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel]) + { + ps_codec->i4_frame_num --; + } + + return i4_stuffing_byte; +} + +/** +******************************************************************************* +* +* @brief +* entry point of a spawned encoder thread +* +* @par Description: +* The encoder thread dequeues a proc/entropy job from the encoder queue and +* calls necessary routines. +* +* @param[in] pv_proc +* Process context corresponding to the thread +* +* @returns error status +* +* @remarks +* +******************************************************************************* +*/ +WORD32 ih264e_process_thread(void *pv_proc) +{ + /* error status */ + IH264_ERROR_T ret = IH264_SUCCESS; + WORD32 error_status = IH264_SUCCESS; + + /* proc ctxt */ + process_ctxt_t *ps_proc = pv_proc; + + /* codec ctxt */ + codec_t *ps_codec = ps_proc->ps_codec; + + /* structure to represent a processing job entry */ + job_t s_job; + + /* blocking call : entropy dequeue is non-blocking till all + * the proc jobs are processed */ + WORD32 is_blocking = 0; + + /* set affinity */ + ithread_set_affinity(ps_proc->i4_id); + + while(1) + { + /* dequeue a job from the entropy queue */ + { + int error = ithread_mutex_lock(ps_codec->pv_entropy_mutex); + + /* codec context selector */ + WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt & 1; + + volatile UWORD32 *pu4_buf = &ps_codec->au4_entropy_thread_active[ctxt_sel]; + + /* have the lock */ + if (error == 0) + { + if (*pu4_buf == 0) + { + /* no entropy threads are active, try dequeuing a job from the entropy queue */ + ret = ih264_list_dequeue(ps_proc->pv_entropy_jobq, &s_job, is_blocking); + if (IH264_SUCCESS == ret) + { + *pu4_buf = 1; + ithread_mutex_unlock(ps_codec->pv_entropy_mutex); + goto WORKER; + } + else if(is_blocking) + { + ithread_mutex_unlock(ps_codec->pv_entropy_mutex); + break; + } + } + ithread_mutex_unlock(ps_codec->pv_entropy_mutex); + } + } + + /* dequeue a job from the process queue */ + ret = ih264_list_dequeue(ps_proc->pv_proc_jobq, &s_job, 1); + if (IH264_SUCCESS != ret) + { + if(ps_proc->i4_id) + break; + else + { + is_blocking = 1; + continue; + } + } + +WORKER: + /* choose appropriate proc context based on proc_base_idx */ + ps_proc = &ps_codec->as_process[ps_proc->i4_id + s_job.i2_proc_base_idx]; + + switch (s_job.i4_cmd) + { + case CMD_PROCESS: + ps_proc->i4_mb_cnt = s_job.i2_mb_cnt; + ps_proc->i4_mb_x = s_job.i2_mb_x; + ps_proc->i4_mb_y = s_job.i2_mb_y; + + /* init process context */ + ih264e_init_proc_ctxt(ps_proc); + + /* core code all mbs enlisted under the current job */ + error_status |= ih264e_process(ps_proc); + break; + + case CMD_ENTROPY: + ps_proc->s_entropy.i4_mb_x = s_job.i2_mb_x; + ps_proc->s_entropy.i4_mb_y = s_job.i2_mb_y; + ps_proc->s_entropy.i4_mb_cnt = s_job.i2_mb_cnt; + + /* init entropy */ + ih264e_init_entropy_ctxt(ps_proc); + + /* entropy code all mbs enlisted under the current job */ + error_status |= ih264e_entropy(ps_proc); + break; + + default: + error_status |= IH264_FAIL; + break; + } + } + + /* send error code */ + ps_proc->i4_error_code = error_status; + return ret; +} diff --git a/encoder/ih264e_process.h b/encoder/ih264e_process.h new file mode 100755 index 0000000..9715434 --- /dev/null +++ b/encoder/ih264e_process.h @@ -0,0 +1,364 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +******************************************************************************* +* @file +* ih264e_process.h +* +* @brief +* Contains functions for codec thread +* +* @author +* ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef IH264E_PROCESS_H_ +#define IH264E_PROCESS_H_ + +/*****************************************************************************/ +/* Function Declarations */ +/*****************************************************************************/ + +/** +****************************************************************************** +* +* @brief This function generates sps, pps set on request +* +* @par Description +* When the encoder is set in header generation mode, the following function +* is called. This generates sps and pps headers and returns the control back +* to caller. +* +* @param[in] ps_codec +* pointer to codec context +* +* @return success or failure error code +* +****************************************************************************** +*/ +IH264E_ERROR_T ih264e_generate_sps_pps + ( + codec_t *ps_codec + ); + +/** +******************************************************************************* +* +* @brief initialize entropy context. +* +* @par Description: +* Before invoking the call to perform to entropy coding the entropy context +* associated with the job needs to be initialized. This involves the start +* mb address, end mb address, slice index and the pointer to location at +* which the mb residue info and mb header info are packed. +* +* @param[in] ps_proc +* Pointer to the current process context +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +IH264E_ERROR_T ih264e_init_entropy_ctxt(process_ctxt_t *ps_proc); + +/** +******************************************************************************* +* +* @brief entry point for entropy coding +* +* @par Description +* This function calls lower level functions to perform entropy coding for a +* group (n rows) of mb's. After encoding 1 row of mb's, the function takes +* back the control, updates the ctxt and calls lower level functions again. +* This process is repeated till all the rows or group of mb's (which ever is +* minimum) are coded +* +* @param[in] ps_proc +* process context +* +* @returns error status +* +* @remarks +* NOTE : It is assumed that this routine is invoked at the start of a slice, +* so the slice header is generated by default. +* +******************************************************************************* +*/ +IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc); + +/** +******************************************************************************* +* +* @brief Packs header information of a mb in to a buffer +* +* @par Description: +* After the deciding the mode info of a macroblock, the syntax elements +* associated with the mb are packed and stored. The entropy thread unpacks +* this buffer and generates the end bit stream. +* +* @param[in] ps_proc +* Pointer to the current process context +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +IH264E_ERROR_T ih264e_pack_header_data + ( + process_ctxt_t *ps_proc + ); + +/** +******************************************************************************* +* +* @brief update process context after encoding an mb. This involves preserving +* the current mb information for later use, initialize the proc ctxt elements to +* encode next mb. +* +* @par Description: +* This function performs house keeping tasks after encoding an mb. +* After encoding an mb, various elements of the process context needs to be +* updated to encode the next mb. For instance, the source, recon and reference +* pointers, mb indices have to be adjusted to the next mb. The slice index of +* the current mb needs to be updated. If mb qp modulation is enabled, then if +* the qp changes the quant param structure needs to be updated. Also to encoding +* the next mb, the current mb info is used as part of mode prediction or mv +* prediction. Hence the current mb info has to preserved at top/top left/left +* locations. +* +* @param[in] ps_proc +* Pointer to the current process context +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +WORD32 ih264e_update_proc_ctxt + ( + process_ctxt_t *ps_proc + ); + +/** +******************************************************************************* +* +* @brief initialize process context. +* +* @par Description: +* Before dispatching the current job to process thread, the process context +* associated with the job is initialized. Usually every job aims to encode one +* row of mb's. Basing on the row indices provided by the job, the process +* context's buffer ptrs, slice indices and other elements that are necessary +* during core-coding are initialized. +* +* @param[in] ps_proc +* Pointer to the current process context +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc); + +/** +******************************************************************************* +* +* @brief This function performs luma & chroma padding +* +* @par Description: +* +* @param[in] ps_proc +* Process context corresponding to the job +* +* @param[in] pu1_curr_pic_luma +* Pointer to luma buffer +* +* @param[in] pu1_curr_pic_chroma +* Pointer to chroma buffer +* +* @param[in] i4_mb_x +* mb index x +* +* @param[in] i4_mb_y +* mb index y +* +* @param[in] i4_pad_ht +* number of rows to be padded +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +IH264E_ERROR_T ih264e_pad_recon_buffer + ( + process_ctxt_t *ps_proc, + UWORD8 *pu1_curr_pic_luma, + UWORD8 *pu1_curr_pic_chroma, + WORD32 i4_mb_x, + WORD32 i4_mb_y, + WORD32 i4_pad_ht + ); + +/** +******************************************************************************* +* +* @brief This function performs luma half pel planes generation +* +* @par Description: +* +* @param[in] ps_proc +* Process context corresponding to the job +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +IH264E_ERROR_T ih264e_halfpel_generation + ( + process_ctxt_t *ps_proc, + UWORD8 *pu1_curr_pic_luma, + WORD32 i4_mb_x, + WORD32 i4_mb_y + ); + +/** +******************************************************************************* +* +* @brief This function performs luma & chroma core coding for a set of mb's. +* +* @par Description: +* The mb to be coded is taken and is evaluated over a predefined set of modes +* (intra (i16, i4, i8)/inter (mv, skip)) for best cost. The mode with least cost +* is selected and using intra/inter prediction filters, prediction is carried out. +* The deviation between src and pred signal constitutes error signal. This error +* signal is transformed (hierarchical transform if necessary) and quantized. The +* quantized residue is packed in to entropy buffer for entropy coding. This is +* repeated for all the mb's enlisted under the job. +* +* @param[in] ps_proc +* Process context corresponding to the job +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +WORD32 ih264e_process(process_ctxt_t *ps_proc); + +/** +******************************************************************************* +* +* @brief +* function to receive frame qp and pic type before encoding +* +* @par Description: +* Before encoding the frame, this function calls the rc library for frame qp +* and picture type +* +* @param[in] ps_codec +* Pointer to codec context +* +* @param[in] pic_cnt +* pic count +* +* @param[out] pi4_pic_type +* pic type + +* @returns skip_src +* if the source frame rate and target frame rate are not identical, the encoder +* skips few source frames. skip_src is set when the source need not be encoded. +* +* @remarks none +* +******************************************************************************* +*/ +WORD32 ih264e_set_rc_pic_params(codec_t *ps_codec, WORD32 cur_pic_cnt, WORD32 *pi4_pic_type); + + +/** +******************************************************************************* +* +* @brief +* Function to update rc context after encoding +* +* @par Description +* This function updates the rate control context after the frame is encoded. +* Number of bits consumed by the current frame, frame distortion, frame cost, +* number of intra/inter mb's, ... are passed on to rate control context for +* updating the rc model. +* +* @param[in] ps_codec +* Handle to codec context +* +* @param[in] ctxt_sel +* frame context selector +* +* @param[in] pic_cnt +* pic count +* +* @returns i4_stuffing_byte +* number of stuffing bytes (if necessary) +* +* @remarks +* +******************************************************************************* +*/ +WORD32 ih264e_update_rc_post_enc(codec_t *ps_codec, WORD32 ctxt_sel, WORD32 pic_cnt); + +/** +******************************************************************************* +* +* @brief +* entry point of a spawned encoder thread +* +* @par Description: +* The encoder thread dequeues a proc/entropy job from the encoder queue and +* calls necessary routines. +* +* @param[in] pv_proc +* Process context corresponding to the thread +* +* @returns error status +* +* @remarks +* +******************************************************************************* +*/ +WORD32 ih264e_process_thread(void *pv_proc); + +#endif /* IH264E_PROCESS_H_ */ diff --git a/encoder/ih264e_rate_control.c b/encoder/ih264e_rate_control.c new file mode 100755 index 0000000..1e2fe4f --- /dev/null +++ b/encoder/ih264e_rate_control.c @@ -0,0 +1,801 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +******************************************************************************* +* @file +* ih264e_rate_control.c +* +* @brief +* Contains api function definitions for h264 rate control +* +* @author +* ittiam +* +* @par List of Functions: +* - ih264e_rc_init() +* - ih264e_rc_get_picture_details() +* - ih264e_rc_pre_enc() +* - ih264e_update_rc_mb_info() +* - ih264e_rc_get_buffer_status() +* - ih264e_rc_post_enc() +* - ih264e_update_rc_bits_info() +* +* @remarks +* None +* +******************************************************************************* +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* User include files */ +#include "irc_datatypes.h" +#include "iv2.h" +#include "ive2.h" +#include "ih264e.h" +#include "ih264_defs.h" +#include "ih264_macros.h" +#include "ih264_structs.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include "ih264_inter_pred_filters.h" +#include "ih264_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" +#include "ih264_common_tables.h" +#include "ih264e_defs.h" +#include "ih264e_globals.h" +#include "irc_mem_req_and_acq.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "irc_rate_control_api.h" +#include "ih264e_time_stamp.h" +#include "ih264e_modify_frm_rate.h" +#include "ih264e_rate_control.h" +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "ime_distortion_metrics.h" +#include "ime_structs.h" +#include "ih264e_structs.h" +#include "ih264e_utils.h" +#include "irc_trace_support.h" + + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + +/** +******************************************************************************* +* +* @brief This function does nothing +* +* @par Description +* This function does nothing +* +* @param[in] variadic function + +* @returns none +* +* @remarks This function is used by the rc library for debugging purposes. +* However this function was not part of rc library. So this is defined here +* to resolve link issues. +* +******************************************************************************* +*/ +int trace_printf(const WORD8 *format, ...) +{ + UNUSED(format); + return(0); +}; + +/** +******************************************************************************* +* +* @brief +* This function initializes rate control context and variables +* +* @par Description +* This function initializes rate control type, source and target frame rate, +* average and peak bitrate, intra-inter frame interval and initial +* quantization parameter +* +* @param[in] pv_rc_api +* Handle to rate control api +* +* @param[in] pv_frame_time +* Handle to frame time context +* +* @param[in] pv_time_stamp +* Handle to time stamp context +* +* @param[in] pv_pd_frm_rate +* Handle to pull down frame time context +* +* @param[in] u4_max_frm_rate +* Maximum frame rate +* +* @param[in] u4_src_frm_rate +* Source frame rate +* +* @param[in] u4_tgt_frm_rate +* Target frame rate +* +* @param[in] e_rate_control_type +* Rate control type +* +* @param[in] u4_avg_bit_rate +* Average bit rate +* +* @param[in] u4_peak_bit_rate +* Peak bit rate +* +* @param[in] u4_max_delay +* Maximum delay between frames +* +* @param[in] u4_intra_frame_interval +* Intra frame interval +* +* @param[in] pu1_init_qp +* Initial qp +* +* @param[in] i4_max_inter_frm_int +* Maximum inter frame interval +* +* @param[in] pu1_min_max_qp +* Array of min/max qp +* +* @param[in] u1_profile_level +* Encoder profile level +* +* @returns none +* +* @remarks +* +******************************************************************************* +*/ +void ih264e_rc_init(void *pv_rc_api, + void *pv_frame_time, + void *pv_time_stamp, + void *pv_pd_frm_rate, + UWORD32 u4_max_frm_rate, + UWORD32 u4_src_frm_rate, + UWORD32 u4_tgt_frm_rate, + rc_type_e e_rate_control_type, + UWORD32 u4_avg_bit_rate, + UWORD32 u4_peak_bit_rate, + UWORD32 u4_max_delay, + UWORD32 u4_intra_frame_interval, + UWORD8 *pu1_init_qp, + WORD32 i4_max_inter_frm_int, + UWORD8 *pu1_min_max_qp, + UWORD8 u1_profile_level) +{ +// UWORD8 u1_is_mb_level_rc_on = 0; + UWORD32 au4_peak_bit_rate[2] = {0,0}; + UWORD32 u4_min_bit_rate = 0; + WORD32 i4_is_gop_closed = 0; +// WORD32 i4_use_est_intra_sad = 1; + UWORD32 u4_src_ticks = 0; + UWORD32 u4_tgt_ticks = 0; + UWORD8 u1_level_idx = ih264e_get_lvl_idx(u1_profile_level); + UWORD32 u4_max_cpb_size = 1200 * gas_ih264_lvl_tbl[u1_level_idx].u4_max_cpb_size; + + /* Fill the params needed for the RC init */ + if (e_rate_control_type == CBR_NLDRC) + { + au4_peak_bit_rate[0] = u4_avg_bit_rate; + au4_peak_bit_rate[1] = u4_avg_bit_rate; + } + else + { + au4_peak_bit_rate[0] = u4_peak_bit_rate; + au4_peak_bit_rate[1] = u4_peak_bit_rate; + } + + /* Initialize frame time computation module*/ + ih264e_init_frame_time(pv_frame_time, + u4_src_frm_rate, /* u4_src_frm_rate */ + u4_tgt_frm_rate); /* u4_tgt_frm_rate */ + + /* Initialize the pull_down frame rate */ + ih264e_init_pd_frm_rate(pv_pd_frm_rate, + u4_src_frm_rate); /* u4_input_frm_rate */ + + /* Initialize time stamp structure */ + ih264e_init_time_stamp(pv_time_stamp, + u4_max_frm_rate, /* u4_max_frm_rate */ + u4_src_frm_rate); /* u4_src_frm_rate */ + + u4_src_ticks = ih264e_frame_time_get_src_ticks(pv_frame_time); + u4_tgt_ticks = ih264e_frame_time_get_tgt_ticks(pv_frame_time); + + /* Initialize the rate control */ + irc_initialise_rate_control(pv_rc_api, /* RC handle */ + e_rate_control_type, /* RC algo type */ + 0, /* MB activity on/off */ + u4_avg_bit_rate, /* Avg Bitrate */ + au4_peak_bit_rate, /* Peak bitrate array[2]:[I][P] */ + u4_min_bit_rate, /* Min Bitrate */ + u4_src_frm_rate, /* Src frame_rate */ + u4_max_delay, /* Max buffer delay */ + u4_intra_frame_interval, /* Intra frm_interval */ + pu1_init_qp, /* Init QP array[3]:[I][P][B] */ + u4_max_cpb_size, /* Max VBV/CPB Buffer Size */ + i4_max_inter_frm_int, /* Max inter frm_interval */ + i4_is_gop_closed, /* Open/Closed GOP */ + pu1_min_max_qp, /* Min-max QP array[6]:[Imax][Imin][Pmax][Pmin][Bmax][Bmin] */ + 0, /* How to calc the I-frame estimated_sad */ + u4_src_ticks, /* Src_ticks = LCM(src_frm_rate,tgt_frm_rate)/src_frm_rate */ + u4_tgt_ticks); /* Tgt_ticks = LCM(src_frm_rate,tgt_frm_rate)/tgt_frm_rate */ +} + +/** +******************************************************************************* +* +* @brief Function to get picture details +* +* @par Description +* This function returns the Picture type(I/P/B) +* +* @param[in] pv_rc_api +* Handle to Rate control api +* +* @returns +* Picture type +* +* @remarks none +* +******************************************************************************* +*/ +picture_type_e ih264e_rc_get_picture_details(void *pv_rc_api) +{ + WORD32 i4_pic_id = 0; + WORD32 i4_pic_disp_order_no = 0; + picture_type_e e_rc_pic_type = P_PIC; + + irc_get_picture_details(pv_rc_api, &i4_pic_id, &i4_pic_disp_order_no, + &e_rc_pic_type); + + return (e_rc_pic_type); +} + +/** +******************************************************************************* +* +* @brief Function to get rate control output before encoding +* +* @par Description +* This function is called before encoding the current frame and gets the qp +* for the current frame from rate control module +* +* @param[in] ps_rate_control_api +* Handle to rate control api +* +* @param[in] ps_pd_frm_rate +* Handle to pull down frm rate context +* +* @param[in] ps_time_stamp +* Handle to time stamp context +* +* @param[in] ps_frame_time +* Handle to frame time context +* +* @param[in] i4_delta_time_stamp +* Time stamp difference between frames +* +* @param[in] i4_total_mb_in_frame +* Total Macro Blocks in frame +* +* @param[in/out] pe_vop_coding_type +* Picture coding type(I/P/B) +* +* @param[in/out] pu1_frame_qp +* QP for current frame +* +* @returns +* Skip or encode the current frame +* +* @remarks +* +******************************************************************************* +*/ +WORD32 ih264e_rc_pre_enc(void * ps_rate_control_api, + void * ps_pd_frm_rate, + void * ps_time_stamp, + void * ps_frame_time, + WORD32 i4_delta_time_stamp, + WORD32 i4_total_mb_in_frame, + picture_type_e *pe_vop_coding_type, + UWORD8 *pu1_frame_qp) +{ + WORD8 i4_skip_src = 0, i4_num_app_skips = 0; + UWORD32 u4_src_not_skipped_for_dts = 0; + + /* Variables for the update_frm_level_info */ + WORD32 ai4_tot_mb_in_type[MAX_MB_TYPE]; + WORD32 ai4_tot_mb_type_qp[MAX_MB_TYPE] = {0, 0}; + WORD32 ai4_mb_type_sad[MAX_MB_TYPE] = {0, 0}; + WORD32 ai4_mb_type_tex_bits[MAX_MB_TYPE] = {0, 0}; + WORD32 i4_total_frame_bits = 0; + WORD32 i4_total_hdr_bits = 0; + WORD32 i4_avg_mb_activity = 0; + WORD32 i4_intra_frm_cost = 0; + UWORD8 u1_is_scd = 0; + + /* Set all the MBs to Intra */ + ai4_tot_mb_in_type[0] = i4_total_mb_in_frame; + ai4_tot_mb_in_type[1] = 0; + + /* If delta time stamp is greater than 1, do rcupdate that many times */ + for (i4_num_app_skips = 0; (i4_num_app_skips < i4_delta_time_stamp - 1); i4_num_app_skips++) + { + /*update the missing frames frm_rate with 0 */ + ih264e_update_pd_frm_rate(ps_pd_frm_rate,0); + + /* Update the time stamp */ + ih264e_update_time_stamp(ps_time_stamp); + + /* Do a pre encode skip update */ + + irc_update_frame_level_info(ps_rate_control_api, + (*pe_vop_coding_type), + ai4_mb_type_sad, /* Frame level SAD for each type of MB[Intra/Inter] */ + i4_total_frame_bits, /* Total frame bits actually consumed */ + i4_total_hdr_bits, /*header bits for model updation*/ + ai4_mb_type_tex_bits, /* Total texture bits consumed for each type of MB[Intra/Inter] used for model */ + ai4_tot_mb_type_qp, /* Total qp of all MBs based on mb type */ + ai4_tot_mb_in_type, /* total number of mbs in each mb type */ + i4_avg_mb_activity, /* Average mb activity in frame */ + u1_is_scd, /* Is a scene change detected at the current frame */ + 1, /* If it's a pre-encode skip */ + i4_intra_frm_cost, /* Sum of Intra cost for each frame */ + 0); /* Is pic handling [irc_update_pic_handling_state] done before update */ + } + + /* Update the time stamp for the current frame */ + ih264e_update_time_stamp(ps_time_stamp); + + /* Check if a src not needs to be skipped */ + i4_skip_src = ih264e_should_src_be_skipped(ps_frame_time, + i4_delta_time_stamp, + &u4_src_not_skipped_for_dts); + + /*********************************************************************** + Based on difference in source and target frame rate frames are skipped + ***********************************************************************/ + if (i4_skip_src) + { + /*update the missing frames frm_rate with 0 */ + ih264e_update_pd_frm_rate(ps_pd_frm_rate,0); + + /* Do a pre encode skip update */ + irc_update_frame_level_info(ps_rate_control_api, + (*pe_vop_coding_type), + ai4_mb_type_sad, /* Frame level SAD for each type of MB[Intra/Inter] */ + i4_total_frame_bits, /* Total frame bits actually consumed */ + i4_total_hdr_bits, /*header bits for model updation*/ + ai4_mb_type_tex_bits, /* Total texture bits consumed for each type of MB[Intra/Inter] used for model */ + ai4_tot_mb_type_qp, /* Total qp of all MBs based on mb type */ + ai4_tot_mb_in_type, /* total number of mbs in each mb type */ + i4_avg_mb_activity, /* Average mb activity in frame */ + u1_is_scd, /* Is a scene change detected at the current frame */ + 1, /* If it's a pre-encode skip */ + i4_intra_frm_cost, /* Sum of Intra cost for each frame */ + 0); /* Is pic handling [irc_update_pic_handling_state] done before update */ + + /* Set the current frame type to NA */ + *pe_vop_coding_type = BUF_PIC; + } + else + { +#define MAX_FRAME_BITS 0x7FFFFFFF +// WORD32 i4_pic_id; +// WORD32 i4_pic_disp_order_no; + WORD32 i4_avg_frm_rate, i4_source_frame_rate; + + i4_source_frame_rate = ih264e_frame_time_get_src_frame_rate(ps_frame_time); + + /* Update the frame rate of the frame present with the tgt_frm_rate */ + /* If the frm was not skipped due to delta_time_stamp, update the + frame_rate with double the tgt_frame_rate value, so that it makes + up for one of the frames skipped by the application */ + ih264e_update_pd_frm_rate(ps_pd_frm_rate, + i4_source_frame_rate); + + /* Based on the update get the average frame rate */ + i4_avg_frm_rate = ih264e_get_pd_avg_frm_rate(ps_pd_frm_rate); + + /* Call the RC library function to change the frame_rate to the + actually achieved frm_rate */ + irc_change_frm_rate_for_bit_alloc(ps_rate_control_api, i4_avg_frm_rate); + + /* --------Rate control related things. Get pic type and frame Qp---------*/ + /* Add picture to the stack. For IPP encoder we push the variable + into the stack and get back the variables by requesting RC. + This interface is designed for IPB encoder */ + irc_add_picture_to_stack(ps_rate_control_api, 1); + + /* Query the picture_type */ + *pe_vop_coding_type = ih264e_rc_get_picture_details(ps_rate_control_api); + + /* Get current frame Qp */ + pu1_frame_qp[0] = (UWORD8)irc_get_frame_level_qp(ps_rate_control_api, + (picture_type_e)(pe_vop_coding_type[0]), + MAX_FRAME_BITS); + } + + return(i4_skip_src); +} + +/** +******************************************************************************* +* +* @brief Function to update mb info for rate control context +* +* @par Description +* After encoding a mb, information such as mb type, qp used, mb distortion +* resulted in encoding the block and so on needs to be preserved for modeling +* RC. This is preserved via this function call. +* +* @param[in] ps_frame_info +* Handle Frame info context +* +* @param[in] ps_proc +* Process context +* +* @returns +* +* @remarks +* +******************************************************************************* +*/ +void ih264e_update_rc_mb_info(frame_info_t *ps_frame_info, void *pv_proc) +{ + /* proc ctxt */ + process_ctxt_t *ps_proc = pv_proc; + + /* is intra or inter */ + WORD32 mb_type = !ps_proc->u4_is_intra; + + /* distortion */ + ps_frame_info->tot_mb_sad[mb_type] += ps_proc->i4_mb_distortion; + + /* qp */ + ps_frame_info->qp_sum[mb_type] += gau1_h264_to_mpeg2_qmap[ps_proc->u4_mb_qp]; + + /* mb cnt */ + ps_frame_info->num_mbs[mb_type]++; + + /* cost */ + if (ps_proc->u4_is_intra) + { + ps_frame_info->intra_mb_cost_sum += ps_proc->i4_mb_cost; + } +} + +/** +******************************************************************************* +* +* @brief Function to get rate control buffer status +* +* @par Description +* This function is used to get buffer status(underflow/overflow) by rate +* control module +* +* @param[in] pv_rc_api +* Handle to rate control api context +* +* @param[in] i4_total_frame_bits +* Total frame bits +* +* @param[in] u1_pic_type +* Picture type +* +* @param[in] pi4_num_bits_to_prevent_vbv_underflow +* Number of bits to prevent underflow +* +* @param[out] pu1_is_enc_buf_overflow +* Buffer overflow indication flag +* +* @param[out] pu1_is_enc_buf_underflow +* Buffer underflow indication flag +* +* @returns +* +* @remarks +* +******************************************************************************* +*/ +void ih264e_rc_get_buffer_status(void *pv_rc_api, + WORD32 i4_total_frame_bits, + picture_type_e e_pic_type, + WORD32 *pi4_num_bits_to_prevent_vbv_underflow, + UWORD8 *pu1_is_enc_buf_overflow, + UWORD8 *pu1_is_enc_buf_underflow) +{ + vbv_buf_status_e e_vbv_buf_status = VBV_NORMAL; + + e_vbv_buf_status = irc_get_buffer_status(pv_rc_api, + i4_total_frame_bits, + e_pic_type, + pi4_num_bits_to_prevent_vbv_underflow); + + if (e_vbv_buf_status == VBV_OVERFLOW) + { + *pu1_is_enc_buf_underflow = 1; + *pu1_is_enc_buf_overflow = 0; + } + else if (e_vbv_buf_status == VBV_UNDERFLOW) + { + *pu1_is_enc_buf_underflow = 0; + *pu1_is_enc_buf_overflow = 1; + } + else + { + *pu1_is_enc_buf_underflow = 0; + *pu1_is_enc_buf_overflow = 0; + } +} + +/** +******************************************************************************* +* +* @brief Function to update rate control module after encoding +* +* @par Description +* This function is used to update the rate control module after the current +* frame encoding is done with details such as bits consumed, SAD for I/P/B, +* intra cost ,mb type and other +* +* @param[in] ps_rate_control_api +* Handle to rate control api context +* +* @param[in] ps_frame_info +* Handle to frame info context +* +* @param[in] ps_pd_frm_rate +* Handle to pull down frame rate context +* +* @param[in] ps_time_stamp +* Handle to time stamp context +* +* @param[in] ps_frame_time +* Handle to frame time context +* +* @param[in] i4_total_mb_in_frame +* Total mb in frame +* +* @param[in] pe_vop_coding_type +* Picture coding type +* +* @param[in] i4_is_first_frame +* Is first frame +* +* @param[in] pi4_is_post_encode_skip +* Post encoding skip flag +* +* @param[in] u1_frame_qp +* Frame qp +* +* @param[in] pi4_num_intra_in_prev_frame +* Numberf of intra mbs in previous frame +* +* @param[in] pi4_avg_activity +* Average activity +* +* @returns +* +* @remarks +* +******************************************************************************* +*/ +WORD32 ih264e_rc_post_enc(void * ps_rate_control_api, + frame_info_t *ps_frame_info, + void * ps_pd_frm_rate, + void * ps_time_stamp, + void * ps_frame_time, + WORD32 i4_total_mb_in_frame, + picture_type_e *pe_vop_coding_type, + WORD32 i4_is_first_frame, + WORD32 *pi4_is_post_encode_skip, + UWORD8 u1_frame_qp, + WORD32 *pi4_num_intra_in_prev_frame, + WORD32 *pi4_avg_activity) +{ + /* Variables for the update_frm_level_info */ + WORD32 ai4_tot_mb_in_type[MAX_MB_TYPE]; + WORD32 ai4_tot_mb_type_qp[MAX_MB_TYPE] = {0, 0}; + WORD32 ai4_mb_type_sad[MAX_MB_TYPE] = {0, 0}; + WORD32 ai4_mb_type_tex_bits[MAX_MB_TYPE] = {0, 0}; + WORD32 i4_total_frame_bits = 0; + WORD32 i4_total_hdr_bits = 0; + WORD32 i4_total_texturebits; + WORD32 i4_avg_mb_activity = 0; + WORD32 i4_intra_frm_cost = 0; + UWORD8 u1_is_scd = 0; + WORD32 i4_cbr_bits_to_stuff = 0; + UWORD32 u4_num_intra_in_prev_frame = *pi4_num_intra_in_prev_frame; + UNUSED(ps_pd_frm_rate); + UNUSED(ps_time_stamp); + UNUSED(ps_frame_time); + UNUSED(u1_frame_qp); + /* Accumulate RC stats */ + ai4_tot_mb_in_type[MB_TYPE_INTRA] = irc_fi_get_total_mb(ps_frame_info,MB_TYPE_INTRA); + ai4_tot_mb_in_type[MB_TYPE_INTER] = irc_fi_get_total_mb(ps_frame_info,MB_TYPE_INTER); + /* ai4_tot_mb_type_qp[MB_TYPE_INTRA] = 0; + ai4_tot_mb_type_qp[MB_TYPE_INTER] = ps_enc->pu1_h264_mpg2quant[u1_frame_qp] * i4_total_mb_in_frame;*/ + ai4_tot_mb_type_qp[MB_TYPE_INTRA] = irc_fi_get_total_mb_qp(ps_frame_info,MB_TYPE_INTRA); + ai4_tot_mb_type_qp[MB_TYPE_INTER] = irc_fi_get_total_mb_qp(ps_frame_info,MB_TYPE_INTER); + ai4_mb_type_sad[MB_TYPE_INTRA] = irc_fi_get_total_mb_sad(ps_frame_info,MB_TYPE_INTRA); + ai4_mb_type_sad[MB_TYPE_INTER] = irc_fi_get_total_mb_sad(ps_frame_info,MB_TYPE_INTER); + i4_intra_frm_cost = irc_fi_get_total_intra_mb_cost(ps_frame_info); + i4_avg_mb_activity = irc_fi_get_avg_activity(ps_frame_info); + i4_total_hdr_bits = irc_fi_get_total_header_bits(ps_frame_info); + i4_total_texturebits = irc_fi_get_total_mb_texture_bits(ps_frame_info,MB_TYPE_INTRA); + i4_total_texturebits += irc_fi_get_total_mb_texture_bits(ps_frame_info,MB_TYPE_INTER); + i4_total_frame_bits = i4_total_hdr_bits + i4_total_texturebits ; + + *pi4_avg_activity = i4_avg_mb_activity; + + + /* Texture bits are not accumulated. Hence subtracting hdr bits from total bits */ + ai4_mb_type_tex_bits[MB_TYPE_INTRA] = 0; + ai4_mb_type_tex_bits[MB_TYPE_INTER] = i4_total_frame_bits - i4_total_hdr_bits; + + /* Set post encode skip to zero */ + pi4_is_post_encode_skip[0]= 0; + + /* For NLDRC, get the buffer status for stuffing or skipping */ + if (irc_get_rc_type(ps_rate_control_api) == CBR_NLDRC) + { + WORD32 i4_get_num_bit_to_prevent_vbv_overflow; + UWORD8 u1_enc_buf_overflow,u1_enc_buf_underflow; + + /* Getting the buffer status */ + ih264e_rc_get_buffer_status(ps_rate_control_api, i4_total_frame_bits, + pe_vop_coding_type[0], &i4_get_num_bit_to_prevent_vbv_overflow, + &u1_enc_buf_overflow,&u1_enc_buf_underflow); + + /* We skip the frame if decoder buffer is underflowing. But we never skip first I frame */ + // if((u1_enc_buf_overflow == 1) && (i4_is_first_frame != 1)) + if ((u1_enc_buf_overflow == 1) && (i4_is_first_frame != 0)) + { + irc_post_encode_frame_skip(ps_rate_control_api, (picture_type_e)pe_vop_coding_type[0]); + // i4_total_frame_bits = imp4_write_skip_frame_header(ps_enc); + i4_total_frame_bits = 0; + + *pi4_is_post_encode_skip = 1; + + /* Adjust the GOP if in case we skipped an I-frame */ + if (*pe_vop_coding_type == I_PIC) + irc_force_I_frame(ps_rate_control_api); + + /* Since this frame is skipped by writing 7 bytes header, we say this is a P frame */ + // *pe_vop_coding_type = P; + + /* Getting the buffer status again,to check if it underflows */ + irc_get_buffer_status(ps_rate_control_api, i4_total_frame_bits, + (picture_type_e)pe_vop_coding_type[0], &i4_get_num_bit_to_prevent_vbv_overflow); + + } + + /* In this case we stuff bytes as buffer is overflowing */ + if (u1_enc_buf_underflow == 1) + { + /* The stuffing function is directly pulled out from split controller workspace. + encode_vop_data() function makes sure alignment data is dumped at the end of a + frame. Split controller was identifying this alignment byte, overwriting it with + the stuff data and then finally aligning the buffer. Here every thing is inside + the DSP. So, ideally encode_vop_data needn't align, and we can start stuffing directly. + But in that case, it'll break the logic for a normal frame. + Hence for simplicity, not changing this part since it is ok to align and + then overwrite since stuffing is not done for every frame */ + i4_cbr_bits_to_stuff = irc_get_bits_to_stuff(ps_rate_control_api, i4_total_frame_bits, pe_vop_coding_type[0]); + + /* Just add extra 32 bits to make sure we don't stuff lesser */ + i4_cbr_bits_to_stuff += 32; + + /* We can not stuff more than the outbuf size. So have a check here */ + /* Add stuffed bits to total bits */ + i4_total_frame_bits += i4_cbr_bits_to_stuff; + } + } + +#define ENABLE_SCD 1 +#if ENABLE_SCD + /* If number of intra MBs are more than 2/3rd of total MBs, assume it as a scene change */ + if ((ai4_tot_mb_in_type[MB_TYPE_INTRA] > ((2 * i4_total_mb_in_frame) / 3)) && + (*pe_vop_coding_type == P_PIC) && + (ai4_tot_mb_in_type[MB_TYPE_INTRA] > ((11 * (WORD32)u4_num_intra_in_prev_frame) / 10))) + { + u1_is_scd = 1; + } +#endif + + /* Update num intra mbs of this frame */ + if (pi4_is_post_encode_skip[0] == 0) + { + *pi4_num_intra_in_prev_frame = ai4_tot_mb_in_type[MB_TYPE_INTRA]; + } + + /* Reset intra count to zero, if u encounter an I frame */ + if (*pe_vop_coding_type == I_PIC) + { + *pi4_num_intra_in_prev_frame = 0; + } + + /* Do an update of rate control after post encode */ + irc_update_frame_level_info(ps_rate_control_api, /* RC state */ + pe_vop_coding_type[0], /* PIC type */ + ai4_mb_type_sad, /* SAD for [Intra/Inter] */ + i4_total_frame_bits, /* Total frame bits */ + i4_total_hdr_bits, /* header bits for */ + ai4_mb_type_tex_bits, /* for MB[Intra/Inter] */ + ai4_tot_mb_type_qp, /* for MB[Intra/Inter] */ + ai4_tot_mb_in_type, /* for MB[Intra/Inter] */ + i4_avg_mb_activity, /* Average mb activity in frame */ + u1_is_scd, /* Is a scene change detected */ + 0, /* Pre encode skip */ + (WORD32)i4_intra_frm_cost, /* Intra cost for frame */ + 0); /* Not done outside */ + + return (i4_cbr_bits_to_stuff >> 3); +} + +/** +******************************************************************************* +* +* @brief Function to update bits consumed info to rate control context +* +* @par Description +* Function to update bits consume info to rate control context +* +* @param[in] ps_frame_info +* Frame info context +* +* @param[in] ps_entropy +* Entropy context +* +* @returns +* total bits consumed by the frame +* +* @remarks +* +******************************************************************************* +*/ +void ih264e_update_rc_bits_info(frame_info_t *ps_frame_info, void *pv_entropy) +{ + entropy_ctxt_t *ps_entropy = pv_entropy; + + ps_frame_info->mb_header_bits[MB_TYPE_INTRA] += ps_entropy->u4_header_bits[MB_TYPE_INTRA]; + + ps_frame_info->mb_texture_bits[MB_TYPE_INTRA] += ps_entropy->u4_residue_bits[MB_TYPE_INTRA]; + + ps_frame_info->mb_header_bits[MB_TYPE_INTER] += ps_entropy->u4_header_bits[MB_TYPE_INTER]; + + ps_frame_info->mb_texture_bits[MB_TYPE_INTER] += ps_entropy->u4_residue_bits[MB_TYPE_INTER]; + + return; +} + diff --git a/encoder/ih264e_rate_control.h b/encoder/ih264e_rate_control.h new file mode 100755 index 0000000..de9466a --- /dev/null +++ b/encoder/ih264e_rate_control.h @@ -0,0 +1,351 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +******************************************************************************* +* @file +* ih264e_rate_control.h +* +* @brief +* This file contains function declarations of api functions for h264 rate +* control +* +* @author +* ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef IH264E_RATE_CONTROL_H_ +#define IH264E_RATE_CONTROL_H_ + +/*****************************************************************************/ +/* Function Declarations */ +/*****************************************************************************/ + +/** +******************************************************************************* +* +* @brief +* This function initializes rate control context and variables +* +* @par Description +* This function initializes rate control type, source and target frame rate, +* average and peak bitrate, intra-inter frame interval and initial +* quantization parameter +* +* @param[in] pv_rc_api +* Handle to rate control api +* +* @param[in] pv_frame_time +* Handle to frame time context +* +* @param[in] pv_time_stamp +* Handle to time stamp context +* +* @param[in] pv_pd_frm_rate +* Handle to pull down frame time context +* +* @param[in] u4_max_frm_rate +* Maximum frame rate +* +* @param[in] u4_src_frm_rate +* Source frame rate +* +* @param[in] u4_tgt_frm_rate +* Target frame rate +* +* @param[in] e_rate_control_type +* Rate control type +* +* @param[in] u4_avg_bit_rate +* Average bit rate +* +* @param[in] u4_peak_bit_rate +* Peak bit rate +* +* @param[in] u4_max_delay +* Maximum delay between frames +* +* @param[in] u4_intra_frame_interval +* Intra frame interval +* +* @param[in] pu1_init_qp +* Initial qp +* +* @param[in] i4_max_inter_frm_int +* Maximum inter frame interval +* +* @param[in] pu1_min_max_qp +* Array of min/max qp +* +* @param[in] u1_profile_level +* Encoder profile level +* +* @returns none +* +* @remarks +* +******************************************************************************* +*/ +void ih264e_rc_init(void *pv_rc_api, + void *pv_frame_time, + void *pv_time_stamp, + void *pv_pd_frm_rate, + UWORD32 u4_max_frm_rate, + UWORD32 u4_src_frm_rate, + UWORD32 u4_tgt_frm_rate, + rc_type_e e_rate_control_type, + UWORD32 u4_avg_bit_rate, + UWORD32 u4_peak_bit_rate, + UWORD32 u4_max_delay, + UWORD32 u4_intra_frame_interval, + UWORD8 *pu1_init_qp, + WORD32 i4_max_inter_frm_int, + UWORD8 *pu1_min_max_qp, + UWORD8 u1_profile_level); + +/** +******************************************************************************* +* +* @brief Function to get picture details +* +* @par Description +* This function returns the Picture type(I/P/B) +* +* @param[in] pv_rc_api +* Handle to Rate control api +* +* @returns +* Picture type +* +* @remarks none +* +******************************************************************************* +*/ +picture_type_e ih264e_rc_get_picture_details(void *pv_rc_api); + + +/** +******************************************************************************* +* +* @brief Function to get rate control output before encoding +* +* @par Description +* This function is called before encoding the current frame and gets the qp +* for the current frame from rate control module +* +* @param[in] ps_rate_control_api +* Handle to rate control api +* +* @param[in] ps_pd_frm_rate +* Handle to pull down frm rate context +* +* @param[in] ps_time_stamp +* Handle to time stamp context +* +* @param[in] ps_frame_time +* Handle to frame time context +* +* @param[in] i4_delta_time_stamp +* Time stamp difference between frames +* +* @param[in] i4_total_mb_in_frame +* Total Macro Blocks in frame +* +* @param[in/out] pe_vop_coding_type +* Picture coding type(I/P/B) +* +* @param[in/out] pu1_frame_qp +* QP for current frame +* +* @returns +* Skip or encode the current frame +* +* @remarks +* +******************************************************************************* +*/ +WORD32 ih264e_rc_pre_enc(void * ps_rate_control_api, + void * ps_pd_frm_rate, + void * ps_time_stamp, + void * ps_frame_time, + WORD32 i4_delta_time_stamp, + WORD32 i4_total_mb_in_frame, + picture_type_e *pe_vop_coding_type, + UWORD8 *pu1_frame_qp); + +/** +******************************************************************************* +* +* @brief Function to update mb info for rate control context +* +* @par Description +* After encoding a mb, information such as mb type, qp used, mb distortion +* resulted in encoding the block and so on needs to be preserved for modelling +* RC. This is preserved via this function call. +* +* @param[in] ps_frame_info +* Handle Frame info context +* +* @param[in] ps_proc +* Process context +* +* @returns +* +* @remarks +* +******************************************************************************* +*/ +void ih264e_update_rc_mb_info(frame_info_t *ps_frame_info, void *pv_proc); + +/** +******************************************************************************* +* +* @brief Function to get rate control buffer status +* +* @par Description +* This function is used to get buffer status(underflow/overflow) by rate +* control module +* +* @param[in] pv_rc_api +* Handle to rate control api context +* +* @param[in] i4_total_frame_bits +* Total frame bits +* +* @param[in] u1_pic_type +* Picture type +* +* @param[in] pi4_num_bits_to_prevent_vbv_underflow +* Number of bits to prevent underflow +* +* @param[out] pu1_is_enc_buf_overflow +* Buffer overflow indication flag +* +* @param[out] pu1_is_enc_buf_underflow +* Buffer underflow indication flag +* +* @returns +* +* @remarks +* +******************************************************************************* +*/ +void ih264e_rc_get_buffer_status(void *pv_rc_api, + WORD32 i4_total_frame_bits, + picture_type_e e_pic_type, + WORD32 *pi4_num_bits_to_prevent_vbv_underflow, + UWORD8 *pu1_is_enc_buf_overflow, + UWORD8 *pu1_is_enc_buf_underflow); + +/** +******************************************************************************* +* +* @brief Function to update rate control module after encoding +* +* @par Description +* This function is used to update the rate control module after the current +* frame encoding is done with details such as bits consumed, SAD for I/P/B, +* intra cost ,mb type and other +* +* @param[in] ps_rate_control_api +* Handle to rate control api context +* +* @param[in] ps_frame_info +* Handle to frame info context +* +* @param[in] ps_pd_frm_rate +* Handle to pull down frame rate context +* +* @param[in] ps_time_stamp +* Handle to time stamp context +* +* @param[in] ps_frame_time +* Handle to frame time context +* +* @param[in] i4_total_mb_in_frame +* Total mb in frame +* +* @param[in] pe_vop_coding_type +* Picture coding type +* +* @param[in] i4_is_first_frame +* Is first frame +* +* @param[in] pi4_is_post_encode_skip +* Post encoding skip flag +* +* @param[in] u1_frame_qp +* Frame qp +* +* @param[in] pi4_num_intra_in_prev_frame +* Number of intra mbs in previous frame +* +* @param[in] pi4_avg_activity +* Average activity +* +* @returns +* +* @remarks +* +******************************************************************************* +*/ +WORD32 ih264e_rc_post_enc(void *ps_rate_control_api, + frame_info_t *ps_frame_info, + void *ps_pd_frm_rate, + void *ps_time_stamp, + void *ps_frame_time, + WORD32 i4_total_mb_in_frame, + picture_type_e *pe_vop_coding_type, + WORD32 i4_is_first_frame, + WORD32 *pi4_is_post_encode_skip, + UWORD8 u1_frame_qp, + WORD32 *pi4_num_intra_in_prev_frame, + WORD32 *pi4_avg_activity); + +/** +******************************************************************************* +* +* @brief Function to update bits consumed info to rate control context +* +* @par Description +* Function to update bits consume info to rate control context +* +* @param[in] ps_frame_info +* Frame info context +* +* @param[in] ps_entropy +* Entropy context +* +* @returns +* total bits consumed by the frame +* +* @remarks +* +******************************************************************************* +*/ +void ih264e_update_rc_bits_info(frame_info_t *ps_frame_info, void *pv_entropy); + +#endif /* IH264E_RATE_CONTROL_H */ + diff --git a/encoder/ih264e_rc_mem_interface.c b/encoder/ih264e_rc_mem_interface.c new file mode 100755 index 0000000..e4d5781 --- /dev/null +++ b/encoder/ih264e_rc_mem_interface.c @@ -0,0 +1,395 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +****************************************************************************** +* @file +* ih264e_rc_mem_interface.c +* +* @brief +* This file contains api function definitions for rate control memtabs +* +* @author +* ittiam +* +* List of Functions +* - fill_memtab() +* - use_or_fill_base() +* - ih264e_map_rc_mem_recs_to_itt_api() +* - ih264e_map_itt_mem_rec_to_rc_mem_rec() +* - ih264e_get_rate_control_mem_tab() +* +* @remarks +* None +* +******************************************************************************* +*/ + + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <assert.h> +#include <stdarg.h> +#include <math.h> + +/* User Include Files */ +#include "ih264e_config.h" +#include "ih264_typedefs.h" +#include "ih264_size_defs.h" +#include "iv2.h" +#include "ive2.h" +#include "ime_distortion_metrics.h" +#include "ime_structs.h" +#include "ih264e.h" +#include "ithread.h" +#include "ih264e.h" +#include "ih264_defs.h" +#include "ih264_debug.h" +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264_error.h" +#include "ih264_structs.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include "ih264_inter_pred_filters.h" +#include "ih264_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" +#include "ih264_common_tables.h" +#include "ih264_list.h" +#include "ih264e_error.h" +#include "ih264e_defs.h" +#include "ih264e_bitstream.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "ih264e_rate_control.h" +#include "ih264e_structs.h" +#include "ih264e_master.h" +#include "ih264_buf_mgr.h" +#include "ih264_dpb_mgr.h" +#include "ih264e_utils.h" +#include "ih264e_platform_macros.h" +#include "ih264_cavlc_tables.h" +#include "ih264e_config.h" +#include "ih264e_statistics.h" +#include "ih264e_trace.h" +#include "ih264e_statistics.h" +#include "ih264e_error.h" +#include "ih264e_utils.h" +#include "ih264e_fmt_conv.h" +#include "ih264e_cavlc.h" +#include "ih264e_rc_mem_interface.h" +#include "ih264e_time_stamp.h" +#include "irc_common.h" +#include "irc_rd_model.h" +#include "irc_est_sad.h" +#include "irc_fixed_point_error_bits.h" +#include "irc_vbr_storage_vbv.h" +#include "irc_picture_type.h" +#include "irc_bit_allocation.h" +#include "irc_mb_model_based.h" +#include "irc_cbr_buffer_control.h" +#include "irc_vbr_str_prms.h" +#include "irc_rate_control_api.h" +#include "irc_rate_control_api_structs.h" +#include "ih264e_modify_frm_rate.h" + + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + +/** +****************************************************************************** +* +* @brief This function fills memory record attributes +* +* @par Description +* This function fills memory record attributes +* +* @param[in] ps_mem_tab +* pointer to mem records +* +* @param[in] u4_size +* size of the record +* +* @param[in] i4_alignment +* memory alignment size +* +* @param[in] e_usage +* usage +* +* @param[in] e_mem_region +* mem region +* +* @return void +* +****************************************************************************** +*/ +void fill_memtab(itt_memtab_t *ps_mem_tab, + WORD32 u4_size, + WORD32 i4_alignment, + ITT_MEM_USAGE_TYPE_E e_usage, + ITT_MEM_REGION_E e_mem_region) +{ + /* Make the size next multiple of alignment */ + WORD32 i4_aligned_size = (((u4_size) + (i4_alignment-1)) & (~(i4_alignment-1))); + + /* Fill the memtab */ + ps_mem_tab->u4_size = i4_aligned_size; + ps_mem_tab->i4_alignment = i4_alignment; + ps_mem_tab->e_usage = e_usage; + ps_mem_tab->e_mem_region = e_mem_region; +} + +/** +****************************************************************************** +* +* @brief This function fills memory record attributes +* +* @par Description +* This function fills memory record attributes +* +* @param[in] ps_mem_tab +* pointer to mem records +* +* @param[in] ptr_to_be_filled +* handle to the memory record storage space +* +* @param[in] e_func_type +* enum that dictates fill memory records or use memory records +* +* @return void +* +****************************************************************************** +*/ +WORD32 use_or_fill_base(itt_memtab_t *ps_mem_tab, + void **ptr_to_be_filled, + ITT_FUNC_TYPE_E e_func_type) +{ + /* Fill base for freeing the allocated memory */ + if (e_func_type == FILL_BASE) + { + if (ptr_to_be_filled[0] != 0) + { + ps_mem_tab->pv_base = ptr_to_be_filled[0]; + return (0); + } + else + { + return (-1); + } + } + /* obtain the allocated memory from base pointer */ + if (e_func_type == USE_BASE) + { + if (ps_mem_tab->pv_base != 0) + { + ptr_to_be_filled[0] = ps_mem_tab->pv_base; + return (0); + } + else + { + return (-1); + } + } + return (0); +} + +/** +****************************************************************************** +* +* @brief This function maps rc mem records structure to encoder lib mem records +* structure +* +* @par Description +* This function maps rc mem records structure to encoder lib mem records +* structure +* +* @param[in] ps_mem +* pointer to encoder lib mem records +* +* @param[in] rc_memtab +* pointer to rc mem records +* +* @param[in] num_mem_recs +* number of memory records +* +* @return void +* +****************************************************************************** +*/ +void ih264e_map_rc_mem_recs_to_itt_api(iv_mem_rec_t *ps_mem, + itt_memtab_t *rc_memtab, + UWORD32 num_mem_recs) +{ + UWORD32 j; + UWORD32 Size, align; + + for (j = 0; j < num_mem_recs; j++) + { + Size = rc_memtab->u4_size; + align = rc_memtab->i4_alignment; + + /* we always ask for external persistent cacheable memory */ + FILL_MEMTAB(ps_mem, j, Size, align, IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM); + + rc_memtab++; + } +} + +/** +******************************************************************************* +* +* @brief This function maps encoder lib mem records structure to RC memory +* records structure +* +* @par Description +* This function maps encoder lib mem records structure to RC memory +* records structure +* +* @param[in] ps_mem +* pointer to encoder lib mem records +* +* @param[in] rc_memtab +* pointer to rc mem records +* +* @param[in] num_mem_recs +* Number of memory records + +* @returns none +* +* @remarks +* +******************************************************************************* +*/ +void ih264e_map_itt_mem_rec_to_rc_mem_rec(iv_mem_rec_t *ps_mem, + itt_memtab_t *rc_memtab, + UWORD32 num_mem_recs) +{ + UWORD32 i; + + for (i = 0; i < num_mem_recs; i++) + { + rc_memtab->i4_alignment = ps_mem->u4_mem_alignment; + rc_memtab->u4_size = ps_mem->u4_mem_size; + rc_memtab->pv_base = ps_mem->pv_base; + + /* only DDR memory is available */ + rc_memtab->e_mem_region = DDR; + rc_memtab->e_usage = PERSISTENT; + + rc_memtab++; + ps_mem++; + } +} + +/** +****************************************************************************** +* +* @brief Get memtabs for rate control +* +* @par Description +* This routine is used to Get/init memtabs for rate control +* +* @param[in] pv_rate_control +* pointer to rate control context (handle) +* +* @param[in] ps_mem +* pointer to encoder lib mem records +* +* @param[in] e_func_type +* enum that dictates fill memory records or Init memory records +* +* @return total number of mem records +* +****************************************************************************** +*/ +WORD32 ih264e_get_rate_control_mem_tab(void *pv_rate_control, + iv_mem_rec_t *ps_mem, + ITT_FUNC_TYPE_E e_func_type) +{ + static itt_memtab_t as_itt_memtab[NUM_RC_MEMTABS]; + WORD32 i4_num_memtab = 0, j = 0; + void *refptr2[4]; + void **refptr1[4]; + rate_control_ctxt_t *ps_rate_control = pv_rate_control; + + for (j = 0; j < 4; j++) + refptr1[j] = &(refptr2[j]); + + j = 0; + + if (e_func_type == USE_BASE || e_func_type == FILL_BASE) + { + refptr1[1] = &ps_rate_control->pps_frame_time; + refptr1[2] = &ps_rate_control->pps_time_stamp; + refptr1[3] = &ps_rate_control->pps_pd_frm_rate; + refptr1[0] = &ps_rate_control->pps_rate_control_api; + } + + /* Get the total number of memtabs used by Rate Controller */ + i4_num_memtab = irc_rate_control_num_fill_use_free_memtab((rate_control_api_t **)refptr1[0], NULL, GET_NUM_MEMTAB); + /* Few extra steps during init */ + ih264e_map_itt_mem_rec_to_rc_mem_rec((&ps_mem[j]), as_itt_memtab+j, i4_num_memtab); + /* Fill the memtabs used by Rate Controller */ + i4_num_memtab = irc_rate_control_num_fill_use_free_memtab((rate_control_api_t **)refptr1[0],as_itt_memtab+j,e_func_type); + /* Mapping ittiam memtabs to App. memtabs */ + ih264e_map_rc_mem_recs_to_itt_api((&ps_mem[j]), as_itt_memtab+j, i4_num_memtab); + j += i4_num_memtab; + + /* Get the total number of memtabs used by Frame time Module */ + i4_num_memtab = ih264e_frame_time_get_init_free_memtab((frame_time_t **)refptr1[1], NULL, GET_NUM_MEMTAB); + /* Few extra steps during init */ + ih264e_map_itt_mem_rec_to_rc_mem_rec((&ps_mem[j]), as_itt_memtab+j, i4_num_memtab); + /* Fill the memtabs used by Frame time Module */ + i4_num_memtab = ih264e_frame_time_get_init_free_memtab((frame_time_t **)refptr1[1], as_itt_memtab+j, e_func_type); + /* Mapping ittiam memtabs to App. memtabs */ + ih264e_map_rc_mem_recs_to_itt_api((&ps_mem[j]), as_itt_memtab+j, i4_num_memtab); + j += i4_num_memtab; + + /* Get the total number of memtabs used by Time stamp Module */ + i4_num_memtab = ih264e_time_stamp_get_init_free_memtab((time_stamp_t **)refptr1[2], NULL, GET_NUM_MEMTAB); + /* Few extra steps during init */ + ih264e_map_itt_mem_rec_to_rc_mem_rec((&ps_mem[j]), as_itt_memtab+j, i4_num_memtab); + /* Fill the memtabs used by Time Stamp Module */ + i4_num_memtab = ih264e_time_stamp_get_init_free_memtab((time_stamp_t **)refptr1[2], as_itt_memtab+j, e_func_type); + /* Mapping ittiam memtabs to App. memtabs */ + ih264e_map_rc_mem_recs_to_itt_api((&ps_mem[j]), as_itt_memtab+j, i4_num_memtab); + j += i4_num_memtab; + + /* Get the total number of memtabs used by Frame rate Module */ + i4_num_memtab = ih264e_pd_frm_rate_get_init_free_memtab((pd_frm_rate_t **)refptr1[3], NULL, GET_NUM_MEMTAB); + /* Few extra steps during init */ + ih264e_map_itt_mem_rec_to_rc_mem_rec((&ps_mem[j]), as_itt_memtab+j, i4_num_memtab); + /* Fill the memtabs used by Frame Rate Module */ + i4_num_memtab = ih264e_pd_frm_rate_get_init_free_memtab((pd_frm_rate_t **)refptr1[3], as_itt_memtab+j, e_func_type); + /* Mapping ittiam memtabs to App. memtabs */ + ih264e_map_rc_mem_recs_to_itt_api((&ps_mem[j]), as_itt_memtab+j, i4_num_memtab); + j += i4_num_memtab; + + return j; /* Total MemTabs Needed by Rate Control Module */ +} diff --git a/encoder/ih264e_rc_mem_interface.h b/encoder/ih264e_rc_mem_interface.h new file mode 100755 index 0000000..a2946a7 --- /dev/null +++ b/encoder/ih264e_rc_mem_interface.h @@ -0,0 +1,179 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +****************************************************************************** +* @file +* ih264e_rc_mem_interface.h +* +* @brief +* This file contains function declaration and structures for rate control +* memtabs +* +* @author +* ittiam +* +* @remarks +* The rate control library is a global library across various codecs. It +* anticipates certain structures definitions. Those definitions are to be +* imported from global workspace. Instead of that, the structures needed for +* rc library are copied in to this file and exported to rc library. If the +* structures / enums / ... in the global workspace change, this file also needs +* to be modified accordingly. +* +****************************************************************************** +*/ +#ifndef IH264E_RC_MEM_INTERFACE_H_ +#define IH264E_RC_MEM_INTERFACE_H_ + + +/*****************************************************************************/ +/* Function Macros */ +/*****************************************************************************/ + +#define FILL_MEMTAB(m_pv_mem_rec, m_j, m_mem_size, m_align, m_type) \ +{ \ + m_pv_mem_rec[m_j].u4_size = sizeof(iv_mem_rec_t); \ + m_pv_mem_rec[m_j].u4_mem_size = m_mem_size; \ + m_pv_mem_rec[m_j].u4_mem_alignment = m_align; \ + m_pv_mem_rec[m_j].e_mem_type = m_type; \ +} + +/*****************************************************************************/ +/* Enums */ +/*****************************************************************************/ +typedef enum +{ + ALIGN_BYTE = 1, + ALIGN_WORD16 = 2, + ALIGN_WORD32 = 4, + ALIGN_WORD64 = 8, + ALIGN_128_BYTE = 128 +}ITT_MEM_ALIGNMENT_TYPE_E; + +typedef enum +{ + SCRATCH = 0, + PERSISTENT = 1, + WRITEONCE = 2 +}ITT_MEM_USAGE_TYPE_E; + +typedef enum +{ + L1D = 0, + SL2 = 1, + DDR = 3 +}ITT_MEM_REGION_E; + +typedef enum +{ + GET_NUM_MEMTAB = 0, + FILL_MEMTAB = 1, + USE_BASE = 2, + FILL_BASE =3 +}ITT_FUNC_TYPE_E; + + +/*****************************************************************************/ +/* Structures */ +/*****************************************************************************/ + +/*NOTE : This should be an exact replica of IALG_MemRec, any change in IALG_MemRec + must be replicated here*/ +typedef struct +{ + /* Size in bytes */ + UWORD32 u4_size; + + /* Alignment in bytes */ + WORD32 i4_alignment; + + /* decides which memory region to be placed */ + ITT_MEM_REGION_E e_mem_region; + + /* memory is scratch or persistent */ + ITT_MEM_USAGE_TYPE_E e_usage; + + /* Base pointer for allocated memory */ + void *pv_base; +} itt_memtab_t; + + +/*****************************************************************************/ +/* Extern Function Declarations */ +/*****************************************************************************/ + +/** +****************************************************************************** +* +* @brief This function fills memory record attributes +* +* @par Description +* This function fills memory record attributes +* +* @param[in] ps_mem_tab +* pointer to mem records +* +* @param[in] u4_size +* size of the record +* +* @param[in] i4_alignment +* memory alignment size +* +* @param[in] e_usage +* usage +* +* @param[in] e_mem_region +* mem region +* +* @return void +* +****************************************************************************** +*/ +void fill_memtab(itt_memtab_t *ps_mem_tab, WORD32 u4_size, WORD32 i4_alignment, + ITT_MEM_USAGE_TYPE_E e_usage, ITT_MEM_REGION_E e_mem_region); + +/** +****************************************************************************** +* +* @brief This function fills memory record attributes +* +* @par Description +* This function fills memory record attributes +* +* @param[in] ps_mem_tab +* pointer to mem records +* +* @param[in] ptr_to_be_filled +* handle to the memory record storage space +* +* @param[in] e_func_type +* enum that dictates fill memory records or use memory records +* +* @return void +* +****************************************************************************** +*/ +WORD32 use_or_fill_base(itt_memtab_t *ps_mem_tab, void **ptr_to_be_filled, + ITT_FUNC_TYPE_E e_func_type); + + +#endif // IH264E_RC_MEM_INTERFACE_H_ + diff --git a/encoder/ih264e_statistics.h b/encoder/ih264e_statistics.h new file mode 100755 index 0000000..0ab33ca --- /dev/null +++ b/encoder/ih264e_statistics.h @@ -0,0 +1,141 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +******************************************************************************* +* @file +* ih264e_statistics.h +* +* @brief +* Contains macros for generating stats about h264 encoder +* +* @author +* ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef IH264E_STATISTICS_H_ +#define IH264E_STATISTICS_H_ + +#if CAVLC_LEVEL_STATS + +/*****************************************************************************/ +/* Extern global declarations */ +/*****************************************************************************/ + +/** + ****************************************************************************** + * @brief In cavlc encoding, a lut is used for encoding levels. It is not possible + * to use look up for all possible levels. The extent to which look up is generated + * is based on the statistics that were collected in the following global variables. + * + * gu4_cavlc_level_bin_lt_4 represents the number coefficients with abs(level) < 4 + * gu4_cavlc_level_bin_lt_16 represents the number coefficients with 4 < abs(level) < 16 + * gu4_cavlc_level_bin_lt_32 represents the number coefficients with 16 < abs(level) < 32 + * and so on ... + * ****************************************************************************** + */ +extern UWORD32 gu4_cavlc_level_bin_lt_4; +extern UWORD32 gu4_cavlc_level_bin_lt_16; +extern UWORD32 gu4_cavlc_level_bin_lt_32; +extern UWORD32 gu4_cavlc_level_bin_lt_64; +extern UWORD32 gu4_cavlc_level_bin_lt_128; +extern UWORD32 gu4_cavlc_level_bin_else_where; +extern UWORD32 gu4_cavlc_level_lut_hit_rate; + +/*****************************************************************************/ +/* Extern function declarations */ +/*****************************************************************************/ + +/** +****************************************************************************** +* @brief print cavlc stats +****************************************************************************** +*/ +void print_cavlc_level_stats(void); + +#define GATHER_CAVLC_STATS1() \ + if (u4_abs_level < 4)\ + gu4_cavlc_level_bin_lt_4 ++; \ + else if (u4_abs_level < 16) \ + gu4_cavlc_level_bin_lt_16 ++; \ + else if (u4_abs_level < 32) \ + gu4_cavlc_level_bin_lt_32 ++; \ + else if (u4_abs_level < 64) \ + gu4_cavlc_level_bin_lt_64 ++; \ + else if (u4_abs_level < 128) \ + gu4_cavlc_level_bin_lt_128 ++; \ + else \ + gu4_cavlc_level_bin_else_where ++; + +#define GATHER_CAVLC_STATS2() \ + gu4_cavlc_level_lut_hit_rate ++; + +#else + +#define GATHER_CAVLC_STATS1() + +#define GATHER_CAVLC_STATS2() + +#endif + + +#if GATING_STATS + +/*****************************************************************************/ +/* Extern global declarations */ +/*****************************************************************************/ + +/** +****************************************************************************** +* @brief During encoding at fastest preset, some times if the inter threshold +* is lesser than the predefined threshold, intra analysis is not done. The +* below variable keeps track of the number of mb for which intra analysis is not +* done +* ****************************************************************************** +*/ +extern UWORD32 gu4_mb_gated_cnt; + +/*****************************************************************************/ +/* Extern function declarations */ +/*****************************************************************************/ + +/** +****************************************************************************** +* @brief print gating stats +****************************************************************************** +*/ +void print_gating_stats(void); + +#define GATHER_GATING_STATS() \ + gu4_mb_gated_cnt ++; + +#else + +#define GATHER_GATING_STATS() + +#endif + + +#endif /* IH264E_STATISTICS_H_ */ diff --git a/encoder/ih264e_structs.h b/encoder/ih264e_structs.h new file mode 100755 index 0000000..1043a53 --- /dev/null +++ b/encoder/ih264e_structs.h @@ -0,0 +1,2566 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +******************************************************************************* +* @file +* ih264e_structs.h +* +* @brief +* Structure definitions used in the encoder +* +* @author +* Harish +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef IH264E_STRUCTS_H_ +#define IH264E_STRUCTS_H_ + +/*****************************************************************************/ +/* Extern Function type definitions */ +/*****************************************************************************/ + +/** +****************************************************************************** + * @brief intra prediction filters leaf level +****************************************************************************** + */ +typedef void (*pf_intra_pred)(UWORD8 *pu1_src, UWORD8 *pu1_dst, + WORD32 src_strd, WORD32 dst_strd, + WORD32 ui_neighboravailability); + +/** +****************************************************************************** + * @brief inter prediction filters leaf level +****************************************************************************** + */ + +typedef void (*pf_inter_pred_luma_bilinear)(UWORD8 *pu1_src1, UWORD8 *pu1_src2, UWORD8 *pu1_dst, + WORD32 src_strd1, WORD32 src_strd2, WORD32 dst_strd, + WORD32 height, WORD32 width); + +/** +****************************************************************************** + * @brief fwd transform leaf level +****************************************************************************** + */ +typedef void (*pf_trans_quant)(UWORD8*pu1_src, UWORD8 *pu1_pred, WORD16 *pi2_out, + WORD32 i4_src_stride, UWORD32 u4_pred_stride, UWORD32 u4_dst_stride, + const UWORD16 *pu2_scale_mat, const UWORD16 *pu2_thresh_mat, + UWORD32 u4_qbit, UWORD32 u4_round_fact, UWORD8 *pu1_nnz); + +typedef void (*pf_iquant_itrans)(WORD16 *pi2_src, UWORD8 *pu1_pred, UWORD8 *pu1_out, + WORD32 i4_src_stride, UWORD32 u4_pred_stride, UWORD32 u4_out_stride, + const UWORD16 *pu2_iscale_mat, const UWORD16 *pu2_weigh_mat, + UWORD32 qp_div, WORD32 *pi4_tmp); + +/** +****************************************************************************** + * @brief Padding leaf level +****************************************************************************** + */ +typedef void (*pf_pad)(UWORD8 *pu1_src, WORD32 src_strd, WORD32 wd, WORD32 pad_size); + +/** +****************************************************************************** + * @brief memory handling leaf level +****************************************************************************** + */ +typedef void (*pf_memcpy)(UWORD8 *pu1_dst, UWORD8 *pu1_src, UWORD32 num_bytes); + +typedef void (*pf_memset)(UWORD8 *pu1_dst, UWORD8 value, UWORD32 num_bytes); + +typedef void (*pf_memcpy_mul8)(UWORD8 *pu1_dst, UWORD8 *pu1_src, UWORD32 num_bytes); + +typedef void (*pf_memset_mul8)(UWORD8 *pu1_dst, UWORD8 value, UWORD32 num_bytes); + +/** +****************************************************************************** + * @brief Sad computation +****************************************************************************** + */ +typedef void (*pf_compute_sad)(UWORD8 *pu1_src, UWORD8 *pu1_est, + UWORD32 src_strd, UWORD32 est_strd, + WORD32 i4_max_sad, WORD32 *pi4_mb_distortion); + +/** +****************************************************************************** + * @brief Intra mode eval:encoder level +****************************************************************************** + */ +typedef void (*pf_evaluate_intra_modes)(UWORD8 *pu1_src, UWORD8 *pu1_ngbr_pels_i16, UWORD8 *pu1_dst, + UWORD32 src_strd, UWORD32 dst_strd, + WORD32 u4_n_avblty, UWORD32 *u4_intra_mode, + WORD32 *pu4_sadmin, + UWORD32 u4_valid_intra_modes); + +typedef void (*pf_evaluate_intra_4x4_modes)(UWORD8 *pu1_src, UWORD8 *pu1_ngbr_pels, UWORD8 *pu1_dst, + UWORD32 src_strd, UWORD32 dst_strd, + WORD32 u4_n_avblty, UWORD32 *u4_intra_mode, + WORD32 *pu4_sadmin, + UWORD32 u4_valid_intra_modes, UWORD32 u4_lambda, + UWORD32 u4_predictd_mode); + +/** +****************************************************************************** + * @brief half_pel generation :encoder level +****************************************************************************** + */ +typedef void (*pf_sixtapfilter_horz)(UWORD8 *pu1_src, UWORD8 *pu1_dst, + WORD32 src_strd, WORD32 dst_strd); + +typedef void (*pf_sixtap_filter_2dvh_vert)(UWORD8 *pu1_src, UWORD8 *pu1_dst1, UWORD8 *pu1_dst2, + WORD32 src_strd, WORD32 dst_strd, + WORD32 *pi16_pred1, + WORD32 pi16_pred1_strd); +/** +****************************************************************************** + * @brief color space conversion +****************************************************************************** + */ +typedef void (*pf_fmt_conv_420p_to_420sp)(UWORD8 *pu1_y_src, UWORD8 *pu1_u_src, UWORD8 *pu1_v_src, + UWORD8 *pu1_y_dst, UWORD8 *pu1_uv_dst, + UWORD16 u2_height, UWORD16 u2_width, + UWORD16 src_y_strd, UWORD16 src_u_strd, UWORD16 src_v_strd, + UWORD16 dst_y_strd, UWORD16 dst_uv_strd, + UWORD32 convert_uv_only); + +typedef void (*pf_fmt_conv_422ile_to_420sp)(UWORD8 *pu1_y_buf, UWORD8 *pu1_u_buf, UWORD8 *pu1_v_buf, + UWORD8 *pu1_422i_buf, + WORD32 u4_y_width, WORD32 u4_y_height, WORD32 u4_y_stride, + WORD32 u4_u_stride, WORD32 u4_v_stride, + WORD32 u4_422i_stride); + + +/*****************************************************************************/ +/* Enums */ +/*****************************************************************************/ + +/** + ****************************************************************************** + * @enum CODEC_STATE_T + * @brief codec state + ****************************************************************************** + */ +typedef enum +{ + INIT_DONE, + HEADER_DONE, + FIRST_FRAME_DONE, +} CODEC_STATE_T; + + +/** + ****************************************************************************** + * @enum JOBQ_CMD_T + * @brief list of job commands (used during job instantiation) + ****************************************************************************** + */ +typedef enum +{ + CMD_PROCESS, + CMD_ENTROPY, + CMD_FMTCONV, + CMD_ME, +}JOBQ_CMD_T; + + +/*****************************************************************************/ +/* Structures */ +/*****************************************************************************/ + +/** + * PU information + */ +typedef struct +{ + + /** + * L0 Motion Vector + */ + mv_t s_l0_mv; + + /** + * PU X position in terms of min PU (4x4) units + */ + UWORD32 b4_pos_x : 4; + + /** + * PU Y position in terms of min PU (4x4) units + */ + UWORD32 b4_pos_y : 4; + + /** + * PU width in pixels = (b4_wd + 1) << 2 + */ + UWORD32 b4_wd : 2; + + /** + * PU height in pixels = (b4_ht + 1) << 2 + */ + UWORD32 b4_ht : 2; + + /** + * L0 Ref index + */ + WORD8 i1_l0_ref_idx; + +} enc_pu_t; + +typedef struct _codec_t codec_t; + +typedef struct +{ + /** Descriptor of raw buffer */ + iv_raw_buf_t s_raw_buf; + + /** Lower 32bits of time stamp corresponding to the above buffer */ + UWORD32 u4_timestamp_low; + + /** Upper 32bits of time stamp corresponding to the above buffer */ + UWORD32 u4_timestamp_high; + + /** Flag to indicate if the current buffer is last buffer */ + UWORD32 u4_is_last; + + /** Flag to indicate if mb info is sent along with input buffer */ + UWORD32 u4_mb_info_type; + + /** Flag to indicate the size of mb info structure */ + UWORD32 u4_mb_info_size; + + /** Buffer containing mb info if mb_info_type is non-zero */ + void *pv_mb_info; + + /** Flag to indicate if pic info is sent along with input buffer */ + UWORD32 u4_pic_info_type; + + /** Buffer containing pic info if mb_info_type is non-zero */ + void *pv_pic_info; + +}inp_buf_t; + +typedef struct +{ + /** Descriptor of bitstream buffer */ + iv_bits_buf_t s_bits_buf; + + /** Lower 32bits of time stamp corresponding to the above buffer */ + UWORD32 u4_timestamp_low; + + /** Upper 32bits of time stamp corresponding to the above buffer */ + UWORD32 u4_timestamp_high; + + /** Flag to indicate if the current buffer is last buffer */ + UWORD32 u4_is_last; + +}out_buf_t; + +typedef struct +{ + /** Descriptor of picture buffer */ + pic_buf_t s_pic_buf; + + /** Lower 32bits of time stamp corresponding to the above buffer */ + UWORD32 u4_timestamp_low; + + /** Upper 32bits of time stamp corresponding to the above buffer */ + UWORD32 u4_timestamp_high; + + /** Flag to indicate if the current buffer is last buffer */ + UWORD32 u4_is_last; + + /** Picture count corresponding to current picture */ + WORD32 i4_pic_cnt; + +}rec_buf_t; + +typedef struct +{ + /** maximum width for which codec should request memory requirements */ + UWORD32 u4_max_wd; + + /** maximum height for which codec should request memory requirements */ + UWORD32 u4_max_ht; + + /** Maximum number of reference frames */ + UWORD32 u4_max_ref_cnt; + + /** Maximum number of reorder frames */ + UWORD32 u4_max_reorder_cnt; + + /** Maximum level supported */ + UWORD32 u4_max_level; + + /** Input color format */ + IV_COLOR_FORMAT_T e_inp_color_fmt; + + /** Flag to enable/disable - To be used only for debugging/testing */ + UWORD32 u4_enable_recon; + + /** Recon color format */ + IV_COLOR_FORMAT_T e_recon_color_fmt; + + /** Encoder Speed preset - Value between 0 (slowest) and 100 (fastest) */ + IVE_SPEED_CONFIG u4_enc_speed_preset; + + /** Rate control mode */ + IVE_RC_MODE_T e_rc_mode; + + /** Maximum frame rate to be supported */ + UWORD32 u4_max_framerate; + + /** Maximum bitrate to be supported */ + UWORD32 u4_max_bitrate; + + /** Maximum number of consecutive B frames */ + UWORD32 u4_max_num_bframes; + + /** Content type Interlaced/Progressive */ + IV_CONTENT_TYPE_T e_content_type; + + /** Maximum search range to be used in X direction */ + UWORD32 u4_max_srch_rng_x; + + /** Maximum search range to be used in Y direction */ + UWORD32 u4_max_srch_rng_y; + + /** Slice Mode */ + IVE_SLICE_MODE_T e_slice_mode; + + /** Slice parameter */ + UWORD32 u4_slice_param; + + /** Processor architecture */ + IV_ARCH_T e_arch; + + /** SOC details */ + IV_SOC_T e_soc; + + /** Input width to be sent in bitstream */ + UWORD32 u4_disp_wd; + + /** Input height to be sent in bitstream */ + UWORD32 u4_disp_ht; + + /** Input width */ + UWORD32 u4_wd; + + /** Input height */ + UWORD32 u4_ht; + + /** Input stride */ + UWORD32 u4_strd; + + /** Source frame rate */ + UWORD32 u4_src_frame_rate; + + /** Target frame rate */ + UWORD32 u4_tgt_frame_rate; + + /** Target bitrate in kilobits per second */ + UWORD32 u4_target_bitrate; + + /** Force current frame type */ + IV_PICTURE_CODING_TYPE_T e_frame_type; + + /** Encoder mode */ + IVE_ENC_MODE_T e_enc_mode; + + /** Set initial Qp for I pictures */ + UWORD32 u4_i_qp; + + /** Set initial Qp for P pictures */ + UWORD32 u4_p_qp; + + /** Set initial Qp for B pictures */ + UWORD32 u4_b_qp; + + /** Set minimum Qp for I pictures */ + UWORD32 u4_i_qp_min; + + /** Set maximum Qp for I pictures */ + UWORD32 u4_i_qp_max; + + /** Set minimum Qp for P pictures */ + UWORD32 u4_p_qp_min; + + /** Set maximum Qp for P pictures */ + UWORD32 u4_p_qp_max; + + /** Set minimum Qp for B pictures */ + UWORD32 u4_b_qp_min; + + /** Set maximum Qp for B pictures */ + UWORD32 u4_b_qp_max; + + /** Adaptive intra refresh mode */ + IVE_AIR_MODE_T e_air_mode; + + /** Adaptive intra refresh period in frames */ + UWORD32 u4_air_refresh_period; + + /** VBV buffer delay */ + UWORD32 u4_vbv_buffer_delay; + + /** VBV buffer size */ + UWORD32 u4_vbv_buf_size; + + /** Number of cores to be used */ + UWORD32 u4_num_cores; + + /** ME speed preset - Value between 0 (slowest) and 100 (fastest) */ + UWORD32 u4_me_speed_preset; + + /** Flag to enable/disable half pel motion estimation */ + UWORD32 u4_enable_hpel; + + /** Flag to enable/disable quarter pel motion estimation */ + UWORD32 u4_enable_qpel; + + /** Flag to enable/disable intra 4x4 analysis */ + UWORD32 u4_enable_intra_4x4; + + /** Flag to enable/disable intra 8x8 analysis */ + UWORD32 u4_enable_intra_8x8; + + /** Flag to enable/disable intra 16x16 analysis */ + UWORD32 u4_enable_intra_16x16; + + /** Flag to enable/disable fast SAD approximation */ + UWORD32 u4_enable_fast_sad; + + /*flag to enable/disable alternate reference frames */ + UWORD32 u4_enable_alt_ref; + + /*Flag to enable/disable computation of SATDQ in ME*/ + UWORD32 u4_enable_satqd; + + /*Minimum SAD to search for*/ + WORD32 i4_min_sad; + + /** Maximum search range in X direction for farthest reference */ + UWORD32 u4_srch_rng_x; + + /** Maximum search range in Y direction for farthest reference */ + UWORD32 u4_srch_rng_y; + + /** I frame interval */ + UWORD32 u4_i_frm_interval; + + /** IDR frame interval */ + UWORD32 u4_idr_frm_interval; + + /** consecutive B frames */ + UWORD32 u4_num_b_frames; + + /** Disable deblock level (0: Enable completely, 3: Disable completely */ + UWORD32 u4_disable_deblock_level; + + /** Profile */ + IV_PROFILE_T e_profile; + + /** Lower 32bits of time stamp corresponding to input buffer, + * from which this command takes effect */ + UWORD32 u4_timestamp_low; + + /** Upper 32bits of time stamp corresponding to input buffer, + * from which this command takes effect */ + UWORD32 u4_timestamp_high; + + /** Flag to say if the current config parameter set is valid + * Will be zero to start with and will be set to 1, when configured + * Once encoder uses the parameter set, this will be set to zero */ + UWORD32 u4_is_valid; + + /** Command associated with this config param set */ + IVE_CONTROL_API_COMMAND_TYPE_T e_cmd; + + /** Input width in mbs */ + UWORD32 i4_wd_mbs; + + /** Input height in mbs */ + UWORD32 i4_ht_mbs; + + /** entropy coding mode flag */ + UWORD32 u4_entropy_coding_mode; + + /** enable weighted prediction */ + UWORD32 u4_weighted_prediction; + + /** enable constrained intra prediction */ + UWORD32 u4_constrained_intra_pred; + + /** Pic info type */ + UWORD32 u4_pic_info_type; + /** + * MB info type + */ + UWORD32 u4_mb_info_type; + +}cfg_params_t; + + + +/** Structure to hold format conversion context */ +typedef struct +{ + /** Current row for which format conversion should be done */ + WORD32 i4_cur_row; + + /** Number of rows for which format conversion should be done */ + WORD32 i4_num_rows; + +}fmt_conv_t; + + +/** + * Structure to represent a processing job entry + */ +typedef struct +{ + /** + * Command + */ + WORD32 i4_cmd; + + /** + * MB x of the starting MB + */ + WORD16 i2_mb_x; + + /** + * MB y of the starting MB + */ + + WORD16 i2_mb_y; + + /** + * Number of MBs that need to be processed in this job + */ + WORD16 i2_mb_cnt; + + /** + * Process contexts base index + * Will toggle between 0 and MAX_PROCESS_THREADS + */ + WORD16 i2_proc_base_idx; + +} job_t; + + +/** + * Structure to represent a MV Bank buffer + */ +typedef struct +{ + /** + * Pointer to hold num PUs each MB in a picture + */ + UWORD32 *pu4_mb_pu_cnt; + + /** + * Pointer to hold enc_pu_t for each PU in a picture + */ + enc_pu_t *ps_pic_pu; + + /** + * Pointer to hold PU map for each MB in a picture + */ + UWORD8 *pu1_pic_pu_map; + + /** + * Pointer to hold the Slice map + */ + UWORD16 *pu1_pic_slice_map; + + /** + * Absolute POC for the current MV Bank + */ + WORD32 i4_abs_poc; + + /** + * Buffer Id + */ + WORD32 i4_buf_id; + +} mv_buf_t; + + +/** + * Reference set containing pointers to MV buf and pic buf + */ +typedef struct +{ + /** Picture count */ + WORD32 i4_pic_cnt; + + /** POC */ + WORD32 i4_poc; + + /** picture buffer */ + pic_buf_t *ps_pic_buf; + + /** mv buffer */ + mv_buf_t *ps_mv_buf; + +}ref_set_t; + +typedef struct +{ + + /** + * Pointer to current PPS + */ + pps_t *ps_pps; + + /** + * Pointer to current SPS + */ + sps_t *ps_sps; + + /** + * Pointer to current slice header structure + */ + slice_header_t *ps_slice_hdr; + + /** + * MB's x position within a picture in raster scan in MB units + */ + WORD32 i4_mb_x; + + /** + * MB's y position within a picture in raster scan in MB units + */ + + WORD32 i4_mb_y; + + /** + * Current PU structure - set to MB enc_pu_t pointer at the start of MB processing and incremented + * for every TU + */ + enc_pu_t *ps_pu; + + /** + * Pointer to frame level enc_pu_t for the current frame being parsed + * where MVs and Intra pred modes will be updated + */ + enc_pu_t *ps_pic_pu; + + /** + * Pointer to hold num PUs each MB in a picture + */ + UWORD32 *pu4_mb_pu_cnt; + + /** PU Index map per MB. The indices in this map are w.r.t picture pu array and not + * w.r.t MB pu array. + * This will be used during mv prediction and since neighbors will have different MB pu map + * it will be easier if they all have indices w.r.t picture level PU array rather than MB level + * PU array. + * pu1_pic_pu_map is map w.r.t MB's enc_pu_t array + */ + UWORD32 *pu4_pic_pu_idx_map; + + /** + * Pointer to pu_map for the current frame being parsed + * where MVs and Intra pred modes will be updated + */ + UWORD8 *pu1_pic_pu_map; + + /** + * PU count in current MB + */ + WORD32 i4_mb_pu_cnt; + + /** + * PU count in current MB + */ + WORD32 i4_mb_start_pu_idx; + + /** + * Top availability for current MB level + */ + UWORD8 u1_top_mb_avail; + + /** + * Top right availability for current MB level + */ + UWORD8 u1_top_rt_mb_avail; + /** + * Top left availability for current MB level + */ + UWORD8 u1_top_lt_mb_avail; + /** + * left availability for current MB level + */ + UWORD8 u1_left_mb_avail; + +}mv_ctxt_t; + +typedef struct +{ + /** + * MB's x position within a picture in raster scan in MB units + */ + WORD32 i4_mb_x; + + /** + * MB's y position within a picture in raster scan in MB units + */ + WORD32 i4_mb_y; + + /** + * MB's x position within a Slice in raster scan in MB units + */ + WORD32 i4_mb_slice_x; + + /** + * MB's y position within a Slice in raster scan in MB units + */ + WORD32 i4_mb_slice_y; + + /** + * Vertical strength, Two bits per edge. + * Stored in format. BS[15] | BS[14] | .. |BS[0] + */ + UWORD32 *pu4_pic_vert_bs; + + /** + * Boundary strength, Two bits per edge. + * Stored in format. BS[15] | BS[14] | .. |BS[0] + */ + UWORD32 *pu4_pic_horz_bs; + + /** + * Qp array stored for each mb + */ + UWORD8 *pu1_pic_qp; + +}bs_ctxt_t; + +typedef struct +{ + /** + * MB's x position within a picture in raster scan in MB units + */ + WORD32 i4_mb_x; + + /** + * MB's y position within a picture in raster scan in MB units + */ + WORD32 i4_mb_y; + + /** + * structure that contains BS and QP frame level arrays + */ + bs_ctxt_t s_bs_ctxt; + + /** + * Pointer to 0th luma pixel in current pic + */ + UWORD8 *pu1_cur_pic_luma; + + /** + * Pointer to 0th chroma pixel in current pic + */ + UWORD8 *pu1_cur_pic_chroma; + + /** + * Points to the array of slice indices which is used to identify the slice + * to which each MB in a frame belongs. + */ + UWORD8 *pu1_slice_idx; + +}deblk_ctxt_t; + + +/** + ****************************************************************************** + * @brief Structure to hold data and flags for 'n' mb processing for + * deblocking , padding and half pel generation. + ****************************************************************************** + */ +typedef struct +{ + /** + * MB's x position last processed + 1 + */ + WORD32 i4_mb_x; + + /** + * MB's y position ,current processing. + */ + WORD32 i4_mb_y; + + /** + * Number of MBs processed in a stretch + */ + WORD32 i4_n_mbs; + +}n_mb_process_ctxt_t; + + +/** +****************************************************************************** + * @brief Structure to hold coefficient info for a 4x4 subblock. + * The following can be used to type-cast coefficient data that is stored + * per subblock. Note that though i2_level is shown as an array that + * holds 16 coefficients, only the first few entries will be valid. Next + * subblocks data starts after the valid number of coefficients. Number + * of non-zero coefficients will be derived using number of non-zero bits + * in sig coeff map +****************************************************************************** + */ +typedef struct +{ + /** + * significant coefficient map and nnz are packed in + * to msb (2 bytes) and lsb (2 bytes) respectively + */ + WORD32 i4_sig_map_nnz; + + /** + * array of non zero residue coefficients + */ + WORD16 ai2_residue[16]; + +}tu_sblk_coeff_data_t; + +/** +****************************************************************************** + * @brief Structure contains few common state variables such as MB indices, + * current SPS, PPS etc which are to be used in the entropy thread. By keeping + * it a different structure it is being explicitly signaled that these + * variables are specific to entropy threads context and other threads should + * not update these elements +****************************************************************************** + */ +typedef struct +{ + + /** + * start of frame / start of slice flag + */ + WORD32 i4_sof; + + /** + * end of frame / end of slice flag + */ + WORD32 i4_eof; + + /** + * generate header upon request + */ + WORD32 i4_gen_header; + + /** + * seq_parameter_set_id + */ + UWORD32 u4_sps_id; + + /** + * Pointer to base of sequence parameter set structure array + */ + sps_t *ps_sps_base; + + /** + * pic_parameter_set_id + */ + UWORD32 u4_pps_id; + + /** + * Pointer to base of Picture parameter set structure array + */ + pps_t *ps_pps_base; + + /** + * Current slice idx + */ + WORD32 i4_cur_slice_idx; + + /** + * Points to the array of slice indices which is used to identify the independent slice + * to which each MB in a frame belongs. + */ + UWORD8 *pu1_slice_idx; + + /** + * Pointer to base of slice header structure array + */ + slice_header_t *ps_slice_hdr_base; + + /** + * entropy status + */ + UWORD8 *pu1_entropy_map; + + /** + * MB's x position within a picture in raster scan in MB units + */ + WORD32 i4_mb_x; + + /** + * MB's y position within a picture in raster scan in MB units + */ + WORD32 i4_mb_y; + + /** + * MB start address + */ + WORD32 i4_mb_cnt; + + /** + * MB start address + */ + WORD32 i4_mb_start_add; + + /** + * MB end address + */ + WORD32 i4_mb_end_add; + + /** + * Input width in mbs + */ + WORD32 i4_wd_mbs; + + /** + * Input height in mbs + */ + WORD32 i4_ht_mbs; + + /** + * Bitstream structure + */ + bitstrm_t *ps_bitstrm; + + /** + * transform_8x8_mode_flag + */ + WORD8 i1_transform_8x8_mode_flag; + + /** + * entropy_coding_mode_flag + */ + WORD8 u1_entropy_coding_mode_flag; + + /** + * Pointer to the top row nnz for luma + */ + UWORD8 (*pu1_top_nnz_luma)[4]; + + /** + * left nnz for luma + */ + UWORD32 u4_left_nnz_luma; + + /** + * Pointer to zero runs before for the mb + */ + UWORD8 au1_zero_run[16]; + + /** + * Pointer to the top row nnz for chroma + */ + UWORD8 (*pu1_top_nnz_cbcr)[4]; + + /** + * left nnz for chroma + */ + UWORD8 u4_left_nnz_cbcr; + + /** + * Pointer frame level mb subblock coeff data + */ + void *pv_pic_mb_coeff_data; + + /** + * Pointer to mb subblock coeff data and number of subblocks and scan idx + * Incremented each time a coded subblock is processed + */ + void *pv_mb_coeff_data; + + /** + * Pointer frame level mb header data + */ + void *pv_pic_mb_header_data; + + /** + * Pointer to mb header data and + * incremented each time a coded mb is encoded + */ + void *pv_mb_header_data; + + /** + * Error code during parse stage + */ + IH264E_ERROR_T i4_error_code; + + /** + * Void pointer to job context + */ + void *pv_proc_jobq, *pv_entropy_jobq; + + /** + * Flag to signal end of frame + */ + WORD32 i4_end_of_frame; + + /** + * Abs POC count of the frame + */ + WORD32 i4_abs_pic_order_cnt; + + /** + * mb skip run + */ + WORD32 *pi4_mb_skip_run; + + /** + * Flag to signal end of sequence + */ + UWORD32 u4_is_last; + + /** + * Lower 32bits of time-stamp corresponding to the buffer being encoded + */ + UWORD32 u4_timestamp_low; + + /** + * Upper 32bits of time-stamp corresponding to the buffer being encoded + */ + UWORD32 u4_timestamp_high; + + /** + * Current Picture count - used for synchronization + */ + WORD32 i4_pic_cnt; + + /** + * Number of bits consumed by header for I and P mb types + */ + UWORD32 u4_header_bits[MAX_MB_TYPE]; + + /** + * Number of bits consumed by residue for I and P mb types + */ + UWORD32 u4_residue_bits[MAX_MB_TYPE]; + +} entropy_ctxt_t; + +/** +****************************************************************************** +* @brief macro block info. +****************************************************************************** +*/ +typedef struct +{ + /** + * mb type + */ + UWORD16 u2_is_intra; + + /** + * mb type + */ + UWORD16 u2_mb_type; + + /** + * csbp + */ + UWORD32 u4_csbp; + + /** + * mb distortion + */ + WORD32 i4_mb_distortion; + +}mb_info_t; + +/** +****************************************************************************** +* @brief structure presenting the neighbor availability of a mb +* or subblk or any other partition +****************************************************************************** +*/ +typedef struct +{ + /** + * left blk/subblk/partition + */ + UWORD8 u1_mb_a; + + /** + * top blk/subblk/partition + */ + UWORD8 u1_mb_b; + + /** + * topright blk/subblk/partition + */ + UWORD8 u1_mb_c; + + /** + * topleft blk/subblk/partition + */ + UWORD8 u1_mb_d; + +}block_neighbors_t; + +/** + ****************************************************************************** + * @brief MB info related variables used during NMB processing + ****************************************************************************** + */ +typedef struct +{ + UWORD32 u4_mb_type; + UWORD32 u4_min_sad; + UWORD32 u4_min_sad_reached; + WORD32 i4_mb_cost; + WORD32 i4_mb_distortion; + + + mv_t s_skip_mv; + mv_t s_pred_mv; + + block_neighbors_t s_ngbr_avbl; + + /* + * Buffer to hold best subpel buffer in each MB of NMB + */ + UWORD8 *pu1_best_sub_pel_buf; + + /* + * Stride for subpel buffer + */ + UWORD32 u4_bst_spel_buf_strd; + +}mb_info_nmb_t; + +/** + ****************************************************************************** + * @brief Pixel processing thread context + ****************************************************************************** + */ +typedef struct +{ + /** + * entropy context + */ + entropy_ctxt_t s_entropy; + + /** + * me context + */ + me_ctxt_t s_me_ctxt; + + /** + * Pointer to codec context + */ + codec_t *ps_codec; + + /** + * N mb process contest + */ + n_mb_process_ctxt_t s_n_mb_ctxt; + + /** + * Source pointer to current MB luma + */ + UWORD8 *pu1_src_buf_luma; + + /** + * Source pointer to current MB chroma + */ + UWORD8 *pu1_src_buf_chroma; + + /** + * Recon pointer to current MB luma + */ + UWORD8 *pu1_rec_buf_luma; + + /** + * Recon pointer to current MB chroma + */ + UWORD8 *pu1_rec_buf_chroma; + + /** + * Ref pointer to current MB luma + */ + UWORD8 *pu1_ref_buf_luma; + + /** + * Ref pointer to current MB chroma + */ + UWORD8 *pu1_ref_buf_chroma; + + /** + * pointer to luma plane of input buffer (base :: mb (0,0)) + */ + UWORD8 *pu1_src_buf_luma_base; + + /** + * pointer to luma plane of reconstructed buffer (base :: mb (0,0)) + */ + UWORD8 *pu1_rec_buf_luma_base; + + /** + * pointer to luma plane of ref buffer (base :: mb (0,0)) + */ + UWORD8 *pu1_ref_buf_luma_base; + + /** + * pointer to chroma plane of input buffer (base :: mb (0,0)) + */ + UWORD8 *pu1_src_buf_chroma_base; + + /* + * Buffer for color space conversion of luma + */ + UWORD8 *pu1_y_csc_buf; + + /* + * Buffer for color space conversion of luma + */ + + UWORD8 *pu1_uv_csc_buf; + + /** + * pointer to chroma plane of reconstructed buffer (base :: mb (0,0)) + */ + UWORD8 *pu1_rec_buf_chroma_base; + + /** + * pointer to chroma plane of reconstructed buffer (base :: mb (0,0)) + */ + UWORD8 *pu1_ref_buf_chroma_base; + + /** + * Pointer to ME NMB info + */ + mb_info_nmb_t *ps_nmb_info; + + mb_info_nmb_t *ps_cur_mb; + + /** + * source stride + * (strides for luma and chroma are the same) + */ + WORD32 i4_src_strd; + + /** + * recon stride & ref stride + * (strides for luma and chroma are the same) + */ + WORD32 i4_rec_strd; + + /** + * Offset for half pel x plane from the pic buf + */ + UWORD32 u4_half_x_offset; + + /** + * Offset for half pel y plane from half x plane + */ + UWORD32 u4_half_y_offset; + + /** + * Offset for half pel xy plane from half y plane + */ + UWORD32 u4_half_xy_offset; + + /** + * pred buffer pointer (temp buffer 1) + */ + UWORD8 *pu1_pred_mb; + + /** + * pred buffer pointer (prediction buffer for intra 16x16 + */ + UWORD8 *pu1_pred_mb_intra_16x16; + + /** + * pred buffer pointer (prediction buffer for intra 16x16_plane + */ + UWORD8 *pu1_pred_mb_intra_16x16_plane; + + /** + * pred buffer pointer (prediction buffer for intra chroma + */ + UWORD8 *pu1_pred_mb_intra_chroma; + + /** + * pred buffer pointer (prediction buffer for intra chroma plane + */ + UWORD8 *pu1_pred_mb_intra_chroma_plane; + + /** + * temp. reference buffer ptr for intra 4x4 when rdopt is on + */ + UWORD8 *pu1_ref_mb_intra_4x4; + + /** + * prediction buffer stride + */ + WORD32 i4_pred_strd; + + /** + * transform buffer pointer (temp buffer 2) + */ + WORD16 *pi2_res_buf; + + /** + * temp. transform buffer ptr for intra 4x4 when rdopt is on + */ + WORD16 *pi2_res_buf_intra_4x4; + + /** + * transform buffer stride + */ + WORD32 i4_res_strd; + + /** + * scratch buffer for inverse transform (temp buffer 3) + */ + void *pv_scratch_buff; + + /** + * frame num + */ + WORD32 i4_frame_num; + + /** + * start address of frame / sub-frame + */ + WORD32 i4_frame_strt_add; + + /** + * IDR pic + */ + UWORD32 u4_is_idr; + + /** + * idr_pic_id + */ + UWORD32 u4_idr_pic_id; + + /** + * Input width in mbs + */ + WORD32 i4_wd_mbs; + + /** + * Input height in mbs + */ + WORD32 i4_ht_mbs; + + /** + * slice_type + */ + WORD32 i4_slice_type; + + /** + * Current slice idx + */ + WORD32 i4_cur_slice_idx; + + /** + * MB's x position within a picture in raster scan in MB units + */ + WORD32 i4_mb_x; + + /** + * MB's y position within a picture in raster scan in MB units + */ + WORD32 i4_mb_y; + + /** + * MB's x position within a Slice in raster scan in MB units + */ + WORD32 i4_mb_slice_x; + + /** + * MB's y position within a Slice in raster scan in MB units + */ + WORD32 i4_mb_slice_y; + + /** + * mb type + */ + UWORD32 u4_mb_type; + + /** + * is intra + */ + UWORD32 u4_is_intra; + + /** + * mb neighbor availability pointer + */ + block_neighbors_t *ps_ngbr_avbl; + + /** + * lambda (lagrange multiplier for cost computation) + */ + UWORD32 u4_lambda; + + /** + * mb distortion + */ + WORD32 i4_mb_distortion; + + /** + * mb cost + */ + WORD32 i4_mb_cost; + + /********************************************************************/ + /* i4_ngbr_avbl_mb_16 - ngbr avbl of curr mb */ + /* i4_ngbr_avbl_sb_8 - ngbr avbl of all 8x8 sub blocks of curr mb */ + /* i4_ngbr_avbl_sb_4 - ngbr avbl of all 4x4 sub blocks of curr mb */ + /* i4_ngbr_avbl_mb_c - chroma ngbr avbl of curr mb */ + /********************************************************************/ + WORD32 i4_ngbr_avbl_16x16_mb; + WORD32 ai4_neighbor_avail_8x8_subblks[4]; + UWORD8 au1_ngbr_avbl_4x4_subblks[16]; + WORD32 i4_chroma_neighbor_avail_8x8_mb; + + /** + * array to store the mode of mb sub blocks + */ + UWORD8 au1_intra_luma_mb_4x4_modes[16]; + + /** + * array to store the predicted mode of mb sub blks + */ + UWORD8 au1_predicted_intra_luma_mb_4x4_modes[16]; + + /** + * macro block intra 16x16 mode + */ + UWORD8 u1_l_i16_mode; + + /** + * array to store the mode of the macro block intra 8x8 4 modes + */ + UWORD8 au1_intra_luma_mb_8x8_modes[4]; + + /** + * intra chroma mb mode + */ + UWORD8 u1_c_i8_mode; + + /********************************************************************/ + /* array to store pixels from the neighborhood for intra prediction */ + /* i16 - 16 left pels + 1 top left pel + 16 top pels = 33 pels */ + /* i8 - 8 lpels + 1 tlpels + 8 tpels + 8 tr pels = 25 pels */ + /* i4 - 4 lpels + 1 tlpels + 4 tpels + 4 tr pels = 13 pels */ + /* ic - 8 left pels + 1 top left pel + 8 top pels )*2 */ + /********************************************************************/ + UWORD8 au1_ngbr_pels[34]; + + /** + * array for 8x8 intra pels filtering (temp buff 4) + */ + UWORD8 au1_neighbor_pels_i8x8_unfiltered[25]; + + /** + * Number of sub partitons in the inter pred MB + */ + UWORD32 u4_num_sub_partitions; + + /** + * Pointer to hold num PUs each MB in a picture + */ + UWORD32 *pu4_mb_pu_cnt; + + /** + * Pointer to the array of structures having motion vectors, size + * and position of sub partitions + */ + enc_pu_t *ps_pu; + + /** + * predicted motion vector + */ + mv_t *ps_pred_mv; + + /** + * top row mb syntax information base + * In normal working scenarios, for a given context set, + * the mb syntax info pointer is identical across all process threads. + * But when the hard bound on slices are enabled, in multi core, frame + * is partitioned in to sections equal to set number of cores and each + * partition is run independently. In this scenario, a ctxt set will alone + * appear to run multiple frames at a time. For this to occur, the common + * pointers across the proc ctxt should disappear. + * + * This is done by allocating MAX_PROCESS_THREADS memory and distributing + * across individual ctxts when byte bnd per slice is enabled. + */ + mb_info_t *ps_top_row_mb_syntax_ele_base; + + /** + * top row mb syntax information + */ + mb_info_t *ps_top_row_mb_syntax_ele; + + /** + * left mb syntax information + */ + mb_info_t s_left_mb_syntax_ele; + + /** + * top left mb syntax information + */ + mb_info_t s_top_left_mb_syntax_ele; + + /** + * top left mb syntax information + */ + + mb_info_t s_top_left_mb_syntax_ME; + + /** + * left mb motion vector + */ + enc_pu_t s_left_mb_pu_ME; + + /** + * top left mb motion vector + */ + enc_pu_t s_top_left_mb_pu_ME; + + + /** + * mb neighbor availability pointer + */ + block_neighbors_t s_ngbr_avbl; + + /** + * In case the macroblock type is intra, the intra modes of all + * partitions for the left mb are stored in the array below + */ + UWORD8 au1_left_mb_intra_modes[16]; + + /** + * In case the macroblock type is intra, the intra modes of all + * partitions for the top mb are stored in the array below + * + * In normal working scenarios, for a given context set, + * the mb syntax info pointer is identical across all process threads. + * But when the hard bound on slices are enabled, in multi core, frame + * is partitioned in to sections equal to set number of cores and each + * partition is run independently. In this scenario, a ctxt set will alone + * appear to run multiple frames at a time. For this to occur, the common + * pointers across the proc ctxt should disappear. + * + * This is done by allocating MAX_PROCESS_THREADS memory and distributing + * across individual ctxts when byte bnd per slice is enabled. + */ + UWORD8 *pu1_top_mb_intra_modes_base; + + /** + * In case the macroblock type is intra, the intra modes of all + * partitions for the top mb are stored in the array below + */ + UWORD8 *pu1_top_mb_intra_modes; + + /** + * skip motion vector info + */ + mv_t *ps_skip_mv; + + /** + * left mb motion vector + */ + enc_pu_t s_left_mb_pu; + + /** + * top left mb motion vector + */ + enc_pu_t s_top_left_mb_pu; + + /** + * top row motion vector info + * + * In normal working scenarios, for a given context set, + * the top row pu pointer is identical across all process threads. + * But when the hard bound on slices are enabled, in multi core, frame + * is partitioned in to sections equal to set number of cores and each + * partition is run independently. In this scenario, a ctxt set will alone + * appear to run multiple frames at a time. For this to occur, the common + * pointers across the proc ctxt should disappear. + * + * This is done by allocating MAX_PROCESS_THREADS memory and distributing + * across individual ctxts when byte bnd per slice is enabled. + */ + enc_pu_t *ps_top_row_pu_base; + + /** + * top row motion vector info + */ + enc_pu_t *ps_top_row_pu; + + enc_pu_t *ps_top_row_pu_ME; + + /** + * coded block pattern + */ + UWORD32 u4_cbp; + + /** + * csbp + */ + UWORD32 u4_csbp; + + /** + * number of non zero coeffs + */ + UWORD32 au4_nnz[5]; + + /** + * number of non zero coeffs for intra 4x4 when rdopt is on + */ + UWORD32 au4_nnz_intra_4x4[4]; + + /** + * frame qp & mb qp + */ + UWORD32 u4_frame_qp, u4_mb_qp; + + /** + * mb qp previous + */ + UWORD32 u4_mb_qp_prev; + + /** + * quantization parameters for luma & chroma planes + */ + quant_params_t *ps_qp_params[3]; + + /** + * Pointer frame level mb subblock coeff data + */ + void *pv_pic_mb_coeff_data; + + /** + * Pointer to mb subblock coeff data and number of subblocks and scan idx + * Incremented each time a coded subblock is processed + */ + void *pv_mb_coeff_data; + + /** + * Pointer frame level mb header data + */ + void *pv_pic_mb_header_data; + + /** + * Pointer to mb header data and + * incremented each time a coded mb is encoded + */ + void *pv_mb_header_data; + + /** + * Signal that pic_init is called first time + */ + WORD32 i4_first_pic_init; + + /** + * Current MV Bank's buffer ID + */ + WORD32 i4_cur_mv_bank_buf_id; + + /** + * Void pointer to job context + */ + void *pv_proc_jobq, *pv_entropy_jobq; + + /** + * Number of MBs to be processed in the current Job + */ + WORD32 i4_mb_cnt; + + /** + * ID for the current context - Used for debugging + */ + WORD32 i4_id; + + /** + * Pointer to current picture buffer structure + */ + pic_buf_t *ps_cur_pic; + + /** + * Pointer to current picture's mv buffer structure + */ + mv_buf_t *ps_cur_mv_buf; + + /** + * Flag to indicate if ps_proc was initialized at least once in a frame. + * This is needed to handle cases where a core starts to handle format + * conversion jobs directly + */ + WORD32 i4_init_done; + + /** + * Process status: one byte per MB + */ + UWORD8 *pu1_proc_map; + + /** + * Deblk status: one byte per MB + */ + UWORD8 *pu1_deblk_map; + + /** + * Process status: one byte per MB + */ + UWORD8 *pu1_me_map; + + /* + * Intra refresh mask. + * Indicates if an Mb is coded in intra mode within the current AIR interval + * NOTE Refreshes after each AIR period + * NOTE The map is shared between process + */ + UWORD8 *pu1_is_intra_coded; + + /** + * Disable deblock level (0: Enable completely, 3: Disable completely + */ + UWORD32 u4_disable_deblock_level; + + /** + * Pointer to the structure that contains deblock context + */ + deblk_ctxt_t s_deblk_ctxt; + + /** + * Points to the array of slice indices which is used to identify the independent + * slice to which each MB in a frame belongs. + */ + UWORD8 *pu1_slice_idx; + + /** + * Pointer to base of slice header structure array + */ + slice_header_t *ps_slice_hdr_base; + + /** + * Number of mb's to process in one loop + */ + WORD32 i4_nmb_ntrpy; + + /** + * Number of mb's to process in one loop + */ + UWORD32 u4_nmb_me; + + /** + * Structure for current input buffer + */ + inp_buf_t s_inp_buf; + + /** + * api call cnt + */ + WORD32 i4_encode_api_call_cnt; + + /** + * Current Picture count - used for synchronization + */ + WORD32 i4_pic_cnt; + + /** + * Intermediate buffer for interpred leaf level functions + */ + WORD32 ai16_pred1[HP_BUFF_WD * HP_BUFF_HT]; + + /** + * Reference picture for the current picture + * TODO: Only 1 reference assumed currently + */ + pic_buf_t *ps_ref_pic; + + /** + * frame info used by RC + */ + frame_info_t s_frame_info; + + /* + * NOTE NOT PERSISTANT INSIDE FUNCTIONS + * Min sad for current MB + * will be populated initially + * Once a sad less than eq to u4_min_sad is reached, the value will be copied to the cariable + */ + UWORD32 u4_min_sad; + + /* + * indicates weather we have rached minimum sa or not + */ + UWORD32 u4_min_sad_reached; + + /** + * Current error code + */ + WORD32 i4_error_code; + + /* + * Enables or disables computation of recon + */ + UWORD32 u4_compute_recon; + + /* + * Buffer for holding half_x (1/2,1 - interpolated) + * values when halfpel generation + * for the entire plane is not enabled + */ + UWORD8 *pu1_half_x; + + /* + * Buffer for holding half_x (1,1/2 - interpolated) + * values when halfpel generation + * for the entire plane is not enabled + */ + UWORD8 *pu1_half_y; + + /* + * Buffer for holding half_x (1/2,1/2 - interpolated) + * values when halfpel generation + * for the entire plane is not enabled + * + */ + UWORD8 *pu1_half_xy; + + /* + * Buffer holding best sub pel values + */ + UWORD8 *pu1_best_subpel_buf; + + /* + * Stride for buffer holding best sub pel + */ + UWORD32 u4_bst_spel_buf_strd; + +} process_ctxt_t; + +/** + ****************************************************************************** + * @brief Rate control related variables + ****************************************************************************** + */ +typedef struct +{ + void *pps_rate_control_api; + + void *pps_frame_time; + + void *pps_time_stamp; + + void *pps_pd_frm_rate; + + /** + * frame rate pull down + */ + WORD32 pre_encode_skip[MAX_CTXT_SETS]; + + /** + * skip frame (cbr) + */ + WORD32 post_encode_skip[MAX_CTXT_SETS]; + + /** + * rate control type + */ + rc_type_e e_rc_type; + + /** + * pic type + */ + picture_type_e e_pic_type; + + /** + * intra cnt in previous frame + */ + WORD32 num_intra_in_prev_frame; + + /** + * avg activity of prev frame + */ + WORD32 i4_avg_activity; + +}rate_control_ctxt_t; + +/** + * Codec context + */ +struct _codec_t +{ + /** + * Number of coded pictures + */ + WORD32 i4_coded_pic_cnt; + + /** + * Number of encode frame API calls made + */ + WORD32 i4_encode_api_call_cnt; + + /** + * Number of pictures encoded + */ + WORD32 i4_pic_cnt; + + /** + * Number of threads created + */ + WORD32 i4_proc_thread_cnt; + + /** + * Mutex used to keep the control calls thread-safe + */ + void *pv_ctl_mutex; + + /** + * Current active config parameters + */ + cfg_params_t s_cfg; + + /** + * Array containing the config parameter sets + */ + cfg_params_t as_cfg[MAX_ACTIVE_CONFIG_PARAMS]; + + /** + * Color format used by encoder internally + */ + IV_COLOR_FORMAT_T e_codec_color_format; + + /** + * source stride + * (strides for luma and chroma are the same) + */ + WORD32 i4_src_strd; + + /** + * recon stride + * (strides for luma and chroma are the same) + */ + WORD32 i4_rec_strd; + + /** + * Flag to enable/disable deblocking of a frame + */ + WORD32 i4_disable_deblk_pic; + + /** + * Number of continuous frames where deblocking was disabled + */ + WORD32 i4_disable_deblk_pic_cnt; + + /** + * frame type + */ + PIC_TYPE_T pic_type; + + /** + * frame qp + */ + UWORD32 u4_frame_qp; + + /** + * frame num + */ + WORD32 i4_frame_num; + + /** + * slice_type + */ + WORD32 i4_slice_type; + + /* + * Force current frame to specific type + */ + IV_PICTURE_CODING_TYPE_T force_curr_frame_type; + + /** + * IDR pic + */ + UWORD32 u4_is_idr; + + /** + * idr_pic_id + */ + WORD32 i4_idr_pic_id; + + /** + * Flush mode + */ + WORD32 i4_flush_mode; + + /** + * Encode header mode + */ + WORD32 i4_header_mode; + + /** + * Flag to indicate if header has already + * been generated when i4_api_call_cnt 0 + */ + UWORD32 u4_header_generated; + + /** + * Encode generate header + */ + WORD32 i4_gen_header; + + /** + * To signal successful completion of init + */ + WORD32 i4_init_done; + + /** + * To signal that at least one picture was decoded + */ + WORD32 i4_first_pic_done; + + /** + * Reset flag - Codec is reset if this flag is set + */ + WORD32 i4_reset_flag; + + /** + * Current error code + */ + WORD32 i4_error_code; + + /** + * threshold residue + */ + WORD32 u4_thres_resi; + + /** + * disable intra inter gating + */ + UWORD32 u4_inter_gate; + + /** + * Holds mem records passed during init. + * This will be used to return the mem records during retrieve call + */ + iv_mem_rec_t *ps_mem_rec_backup; + + /** + * Flag to determine if the entropy thread is active + */ + volatile UWORD32 au4_entropy_thread_active[MAX_CTXT_SETS]; + + /** + * Mutex used to keep the entropy calls thread-safe + */ + void *pv_entropy_mutex; + + /** + * Job queue buffer base + */ + void *pv_proc_jobq_buf, *pv_entropy_jobq_buf; + + /** + * Job Queue mem tab size + */ + WORD32 i4_proc_jobq_buf_size, i4_entropy_jobq_buf_size; + + /** + * Memory for MV Bank buffer manager + */ + void *pv_mv_buf_mgr_base; + + /** + * MV Bank buffer manager + */ + void *pv_mv_buf_mgr; + + /** + * Pointer to MV Buf structure array + */ + void *ps_mv_buf; + + /** + * Base address for Motion Vector bank buffer + */ + void *pv_mv_bank_buf_base; + + /** + * MV Bank size allocated + */ + WORD32 i4_total_mv_bank_size; + + /** + * Memory for Picture buffer manager for reference pictures + */ + void *pv_ref_buf_mgr_base; + + /** + * Picture buffer manager for reference pictures + */ + void *pv_ref_buf_mgr; + + /** + * Number of reference buffers added to the buffer manager + */ + WORD32 i4_ref_buf_cnt; + + /** + * Pointer to Pic Buf structure array + */ + void *ps_pic_buf; + + /** + * Base address for Picture buffer + */ + void *pv_pic_buf_base; + + /** + * Total pic buffer size allocated + */ + WORD32 i4_total_pic_buf_size; + + /** + * Memory for Buffer manager for output buffers + */ + void *pv_out_buf_mgr_base; + + /** + * Buffer manager for output buffers + */ + void *pv_out_buf_mgr; + + /** + * Current output buffer's buffer ID + */ + WORD32 i4_out_buf_id; + + /** + * Number of output buffers added to the buffer manager + */ + WORD32 i4_out_buf_cnt; + + /** + * Memory for Picture buffer manager for input buffers + */ + void *pv_inp_buf_mgr_base; + + /** + * Picture buffer manager for input buffers + */ + void *pv_inp_buf_mgr; + + /** + * Current input buffer's buffer ID + */ + WORD32 i4_inp_buf_id; + + /** + * Number of input buffers added to the buffer manager + */ + WORD32 i4_inp_buf_cnt; + + /** + * Current input buffer + */ + pic_buf_t *ps_inp_buf; + + /** + * Pointer to dpb manager structure + */ + void *pv_dpb_mgr; + + /** + * Pointer to base of Sequence parameter set structure array + */ + sps_t *ps_sps_base; + + /** + * Pointer to base of Picture parameter set structure array + */ + pps_t *ps_pps_base; + + /** + * seq_parameter_set_id + */ + WORD32 i4_sps_id; + + /** + * pic_parameter_set_id + */ + WORD32 i4_pps_id; + + /** + * Pointer to base of slice header structure array + */ + slice_header_t *ps_slice_hdr_base; + + /** + * packed residue coeff data size for 1 row of mbs + */ + UWORD32 u4_size_coeff_data; + + /** + * packed header data size for 1 row of mbs + */ + UWORD32 u4_size_header_data; + + /** + * Processing context - One for each processing thread + * Create two sets, each set used for alternate frames + */ + process_ctxt_t as_process[MAX_PROCESS_CTXT]; + + /** + * Thread handle for each of the processing threads + */ + void *apv_proc_thread_handle[MAX_PROCESS_THREADS]; + + /** + * Thread created flag for each of the processing threads + */ + WORD32 ai4_process_thread_created[MAX_PROCESS_THREADS]; + + /** + * Void pointer to process job context + */ + void *pv_proc_jobq, *pv_entropy_jobq; + + /** + * Number of MBs processed together for better instruction cache handling + */ + WORD32 i4_proc_nmb; + + /** + * Previous POC lsb + */ + WORD32 i4_prev_poc_lsb; + + /** + * Previous POC msb + */ + WORD32 i4_prev_poc_msb; + + /** + * Max POC lsb that has arrived till now + */ + WORD32 i4_max_prev_poc_lsb; + + /** + * Context for format conversion + */ + fmt_conv_t s_fmt_conv; + + /** + * Absolute pic order count + */ + WORD32 i4_abs_pic_order_cnt; + + /** + * Pic order count of lsb + */ + WORD32 i4_pic_order_cnt_lsb; + + /** + * Array giving current picture being processed in each context set + */ + WORD32 ai4_pic_cnt[MAX_CTXT_SETS]; + + /* + * Min sad to search for + */ + UWORD32 u4_min_sad; + + /** + * Reference picture set + */ + ref_set_t as_ref_set[MAX_DPB_SIZE + MAX_CTXT_SETS]; + + /* + * Air pic cnt + * Contains the number of pictures that have been encoded with air + * This value is moudulo air refresh period + */ + WORD32 i4_air_pic_cnt; + + /* + * Intra refresh map + * Stores the frames at which intra refresh should occur for a MB + */ + UWORD16 *pu2_intr_rfrsh_map; + + /* + * Alternate reference frames + * Indicates if the current frame is used as a reference frame + */ + UWORD32 u4_is_curr_frm_ref; + + /* + * Memory for color space conversion for luma plane + */ + UWORD8 *pu1_y_csc_buf_base; + + /* + * Memory for color space conversion foe chroma plane + */ + UWORD8 *pu1_uv_csc_buf_base; + + /** + * Function pointers for intra pred leaf level functions luma + */ + pf_intra_pred apf_intra_pred_16_l[MAX_I16x16]; + pf_intra_pred apf_intra_pred_8_l[MAX_I8x8]; + pf_intra_pred apf_intra_pred_4_l[MAX_I4x4]; + + /** + * Function pointers for intra pred leaf level functions chroma + */ + pf_intra_pred apf_intra_pred_c[MAX_CH_I8x8]; + + /** + * luma core coding function pointer + */ + UWORD8 (*luma_energy_compaction[4])(process_ctxt_t *ps_proc); + + /** + * chroma core coding function pointer + */ + UWORD8 (*chroma_energy_compaction[2])(process_ctxt_t *ps_proc); + + /** + * forward transform for intra blk of mb type 16x16 + */ + ih264_luma_16x16_resi_trans_dctrans_quant_ft *pf_resi_trans_dctrans_quant_16x16; + + /** + * inverse transform for intra blk of mb type 16x16 + */ + ih264_luma_16x16_idctrans_iquant_itrans_recon_ft *pf_idctrans_iquant_itrans_recon_16x16; + + /** + * forward transform for 4x4 blk luma + */ + ih264_resi_trans_quant_ft *pf_resi_trans_quant_4x4; + + /** + * forward transform for 4x4 blk luma + */ + ih264_resi_trans_quant_ft *pf_resi_trans_quant_chroma_4x4; + + /* + * hadamard transform and quant for a 4x4 block + */ + ih264_hadamard_quant_ft *pf_hadamard_quant_4x4; + + /* + * hadamard transform and quant for a 4x4 block + */ + ih264_hadamard_quant_ft *pf_hadamard_quant_2x2_uv; + + /** + * inverse transform for 4x4 blk + */ + ih264_iquant_itrans_recon_ft *pf_iquant_itrans_recon_4x4; + + /** + * inverse transform for chroma 4x4 blk + */ + ih264_iquant_itrans_recon_chroma_ft *pf_iquant_itrans_recon_chroma_4x4; + + /** + * inverse transform for 4x4 blk with only single dc coeff + */ + ih264_iquant_itrans_recon_ft *pf_iquant_itrans_recon_4x4_dc; + + /** + * inverse transform for chroma 4x4 blk with only single dc coeff + */ + ih264_iquant_itrans_recon_chroma_ft *pf_iquant_itrans_recon_chroma_4x4_dc; + + /* + * Inverse hadamard transform and iquant for a 4x4 block + */ + ih264_ihadamard_scaling_ft *pf_ihadamard_scaling_4x4; + + /* + * Inverse hadamard transform and iquant for a 4x4 block + */ + ih264_ihadamard_scaling_ft *pf_ihadamard_scaling_2x2_uv; + + /* + * Function for interleave copy* + */ + ih264_interleave_copy_ft *pf_interleave_copy; + + /** + * forward transform for 8x8 blk + */ + ih264_resi_trans_quant_ft *pf_resi_trans_quant_8x8; + + /** + * inverse transform for 8x8 blk + */ + /** + * inverse transform for 4x4 blk + */ + ih264_iquant_itrans_recon_ft *pf_iquant_itrans_recon_8x8; + + /** + * forward transform for chroma MB + */ + ih264_chroma_8x8_resi_trans_dctrans_quant_ft *pf_resi_trans_dctrans_quant_8x8_chroma; + + /** + * inverse transform for chroma MB + */ + ih264_idctrans_iquant_itrans_recon_ft *pf_idctrans_iquant_itrans_recon_8x8_chroma; + + /** + * deblock vertical luma edge with blocking strength 4 + */ + ih264_deblk_edge_bs4_ft *pf_deblk_luma_vert_bs4; + + /** + * deblock vertical chroma edge with blocking strength 4 + */ + ih264_deblk_chroma_edge_bs4_ft *pf_deblk_chroma_vert_bs4; + + /** + * deblock vertical luma edge with blocking strength less than 4 + */ + ih264_deblk_edge_bslt4_ft *pf_deblk_luma_vert_bslt4; + + /** + * deblock vertical chroma edge with blocking strength less than 4 + */ + ih264_deblk_chroma_edge_bslt4_ft *pf_deblk_chroma_vert_bslt4; + + /** + * deblock horizontal luma edge with blocking strength 4 + */ + ih264_deblk_edge_bs4_ft *pf_deblk_luma_horz_bs4; + + /** + * deblock horizontal chroma edge with blocking strength 4 + */ + ih264_deblk_chroma_edge_bs4_ft *pf_deblk_chroma_horz_bs4; + + /** + * deblock horizontal luma edge with blocking strength less than 4 + */ + ih264_deblk_edge_bslt4_ft *pf_deblk_luma_horz_bslt4; + + /** + * deblock horizontal chroma edge with blocking strength less than 4 + */ + ih264_deblk_chroma_edge_bslt4_ft *pf_deblk_chroma_horz_bslt4; + + + /** + * functions for padding + */ + pf_pad pf_pad_top; + pf_pad pf_pad_bottom; + pf_pad pf_pad_left_luma; + pf_pad pf_pad_left_chroma; + pf_pad pf_pad_right_luma; + pf_pad pf_pad_right_chroma; + + /** + * Inter pred leaf level functions + */ + ih264_inter_pred_luma_ft *pf_inter_pred_luma_copy; + ih264_inter_pred_luma_ft *pf_inter_pred_luma_horz; + ih264_inter_pred_luma_ft *pf_inter_pred_luma_vert; + pf_inter_pred_luma_bilinear pf_inter_pred_luma_bilinear; + ih264_inter_pred_chroma_ft *pf_inter_pred_chroma; + + /** + * fn ptrs for compute sad routines + */ + ime_compute_sad_ft *apf_compute_sad_16x16[2]; + ime_compute_sad_ft *pf_compute_sad_16x8; + + /** + * fn ptrs for memory handling operations + */ + pf_memcpy pf_mem_cpy; + pf_memset pf_mem_set; + pf_memcpy_mul8 pf_mem_cpy_mul8; + pf_memset_mul8 pf_mem_set_mul8; + + /** + * intra mode eval -encoder level function + */ + pf_evaluate_intra_modes pf_ih264e_evaluate_intra16x16_modes; + pf_evaluate_intra_modes pf_ih264e_evaluate_intra_chroma_modes; + pf_evaluate_intra_4x4_modes pf_ih264e_evaluate_intra_4x4_modes; + + /* Half pel generation function - encoder level + * + */ + pf_sixtapfilter_horz pf_ih264e_sixtapfilter_horz; + pf_sixtap_filter_2dvh_vert pf_ih264e_sixtap_filter_2dvh_vert; + + /** + * color space conversion form YUV 420P to YUV 420Sp + */ + pf_fmt_conv_420p_to_420sp pf_ih264e_conv_420p_to_420sp; + + + /** + * color space conversion form YUV 420P to YUV 420Sp + */ + pf_fmt_conv_422ile_to_420sp pf_ih264e_fmt_conv_422i_to_420sp; + + /** + * write mb layer for a given slice I, P, B + */ + IH264E_ERROR_T (*pf_write_mb_syntax_layer[3]) ( entropy_ctxt_t *ps_ent_ctxt ); + + + /** + * Output buffer + */ + out_buf_t as_out_buf[MAX_CTXT_SETS]; + + /** + * recon buffer + */ + rec_buf_t as_rec_buf[MAX_CTXT_SETS]; + + /** + * rate control context + */ + rate_control_ctxt_t s_rate_control; +}; +#endif /* IH264E_STRUCTS_H_ */ diff --git a/encoder/ih264e_time_stamp.c b/encoder/ih264e_time_stamp.c new file mode 100755 index 0000000..a6a7f3c --- /dev/null +++ b/encoder/ih264e_time_stamp.c @@ -0,0 +1,748 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +******************************************************************************* +* @file +* ih264e_time_stamp.c +* +* @brief +* This file contains functions used for source and target time stamp management +* +* @author +* ittiam +* +* @par List of Functions: +* - gcd() +* - ih264e_get_range() +* - ih264e_frame_time_get_init_free_memtab() +* - ih264e_init_frame_time() +* - ih264e_should_src_be_skipped() +* - ih264e_time_stamp_get_init_free_memtab() +* - ih264e_init_time_stamp() +* - ih264e_update_time_stamp() +* - ih264e_frame_time_get_src_frame_rate() +* - ih264e_frame_time_get_tgt_frame_rate() +* - ih264e_frame_time_get_src_ticks() +* - ih264e_frame_time_get_tgt_ticks() +* - ih264e_frame_time_get_src_time() +* - ih264e_frame_time_get_tgt_time() +* - ih264e_frame_time_update_src_frame_rate() +* - ih264e_frame_time_update_tgt_frame_rate() +* - ih264_time_stamp_update_frame_rate() +* +* @remarks +* None +* +******************************************************************************* +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* user include files */ +#include "irc_datatypes.h" +#include "iv2.h" +#include "ive2.h" +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "ih264_defs.h" +#include "ih264e_defs.h" +#include "ime_distortion_metrics.h" +#include "ime_structs.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "ih264e_rate_control.h" +#include "ih264_structs.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include "ih264_inter_pred_filters.h" +#include "ih264_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" +#include "ih264e_structs.h" +#include "ih264e_rc_mem_interface.h" +#include "ih264e_time_stamp.h" +#include "irc_rate_control_api.h" + + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + +/** +******************************************************************************* +* +* @brief Function to compute gcd of two numbers +* +* @par Description +* Function to compute gcd of two numbers +* +* @param[in] i4_x +* value 1 +* +* @param[in] i4_y +* value 2 +* +* @returns +* GCD(value 1, value 2) +* +* @remarks none +* +******************************************************************************* +*/ +static WORD32 gcd(WORD32 i4_x, WORD32 i4_y) +{ + if (i4_x > i4_y) + { + i4_x = i4_y + i4_x; + i4_y = i4_x - i4_y; + i4_x = i4_x - i4_y; + } + while (i4_y != 0) + { + WORD32 temp; + i4_x = i4_x % i4_y; + temp = i4_x; + i4_x = i4_y; + i4_y = temp; + } + return (i4_x); +} + +/** +******************************************************************************* +* +* @brief Function to determine number of bits required to represent a given +* value +* +* @par Description +* This function determines the number of bits required to represent the given +* value. It is used to find out number of bits to read when the data size is +* not fixed (e.g. vop_time_increment_resolution). +* +* @param[in] u4_value +* Value for which the number of bits required to represent is to be determined +* +* @param[in] u1_no_of_bits +* Represents the value's word type = 8/16/32 +* +* @returns +* The number of bits required to represent the given number +* +* @remarks none +* +******************************************************************************* +*/ +static UWORD8 ih264e_get_range(UWORD32 u4_value, UWORD8 u1_no_of_bits) +{ + UWORD8 count; + UWORD32 temp; + + if (u4_value > (UWORD32) ((1 << (u1_no_of_bits >> 1)) - 1)) + { + temp = (1 << (u1_no_of_bits - 1)); + for (count = 0; count < (u1_no_of_bits >> 1); count++) + { + if ((temp & u4_value) != 0) + { + return (UWORD8) (u1_no_of_bits - count); + } + else + { + temp >>= 1; + } + } + return 0; + } + else + { + temp = (1 << ((u1_no_of_bits >> 1) - 1)); + for (count = 0; count < ((u1_no_of_bits >> 1) - 1); count++) + { + if ((temp & u4_value) != 0) + { + return (UWORD8) ((u1_no_of_bits >> 1) - count); + } + else + { + temp >>= 1; + } + } + return 1; + } +} + +/** +******************************************************************************* +* +* @brief +* Function to init frame time memtabs +* +* @par Description +* Function to init frame time memtabs +* +* @param[in] pps_frame_time +* Pointer to frame time contexts +* +* @param[in] ps_memtab +* Pointer to memtab +* +* @param[in] e_func_type +* Function type (get memtabs/init memtabs) +* +* @returns +* none +* +* @remarks +* +******************************************************************************* +*/ +WORD32 ih264e_frame_time_get_init_free_memtab(frame_time_handle *pps_frame_time, + itt_memtab_t *ps_memtab, + ITT_FUNC_TYPE_E e_func_type) +{ + WORD32 i4_mem_tab_idx = 0; + static frame_time_t s_temp_frame_time_t; + + /* Hack for al alloc, during which we dont have any state memory. + Dereferencing can cause issues */ + if (e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB) + (*pps_frame_time) = &s_temp_frame_time_t; + + /* for src rate control state structure */ + if (e_func_type != GET_NUM_MEMTAB) + { + fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(frame_time_t), + ALIGN_128_BYTE, PERSISTENT, DDR); + use_or_fill_base(&ps_memtab[0], (void**) pps_frame_time, e_func_type); + } + i4_mem_tab_idx++; + + return (i4_mem_tab_idx); +} + +/** +******************************************************************************* +* +* @brief +* Function to init frame time context +* +* @par Description +* Frame time structure stores the time of the source and the target frames to +* be encoded. Based on the time we decide whether or not to encode the source +* frame +* +* @param[in] ps_frame_time +* Pointer Frame time context +* +* @param[in] u4_src_frm_rate +* Source frame rate +* +* @param[in] u4_tgt_frm_rate +* Target frame rate +* +* @returns +* none +* +* @remarks +* +******************************************************************************* +*/ +void ih264e_init_frame_time(frame_time_t *ps_frame_time, + UWORD32 u4_src_frm_rate, + UWORD32 u4_tgt_frm_rate) +{ + /* Initialise the common time base based on which the source and target + * frame times increase */ + WORD32 i4_gcd = gcd(u4_src_frm_rate, u4_tgt_frm_rate); + + ps_frame_time->common_time_base = (u4_src_frm_rate * u4_tgt_frm_rate) + / i4_gcd; + + /* The source and target increment per vop is initialized */ + ps_frame_time->u4_src_frm_time_incr = ps_frame_time->common_time_base + / u4_src_frm_rate; + ps_frame_time->u4_tgt_frm_time_incr = ps_frame_time->common_time_base + / u4_tgt_frm_rate; + + /* Initialise the source and target times to 0 (RESET) */ + ps_frame_time->u4_src_frm_time = 0; + ps_frame_time->u4_tgt_frm_time = 0; + + /* Initialize the number of frms not to be skipped to 0 */ + ps_frame_time->u4_num_frms_dont_skip = 0; +} + +/** +******************************************************************************* +* +* @brief +* Function to check if frame can be skipped +* +* @par Description +* Based on the source and target frame time and the delta time stamp +* we decide whether to code the source or not. +* This is based on the assumption +* that the source frame rate is greater that target frame rate. +* Updates the time_stamp structure +* +* @param[in] ps_frame_time +* Handle to frame time context +* +* @param[in] u4_delta_time_stamp +* Time stamp difference between frames +* +* @param[out] pu4_frm_not_skipped_for_dts +* Flag to indicate if frame is already skipped by application +* +* @returns +* Flag to skip frame +* +* @remarks +* +******************************************************************************* +*/ +UWORD8 ih264e_should_src_be_skipped(frame_time_t *ps_frame_time, + UWORD32 u4_delta_time_stamp, + UWORD32 *pu4_frm_not_skipped_for_dts) +{ + UWORD8 skip_src = 0; + + if (ps_frame_time->u4_tgt_frm_time > ps_frame_time->u4_src_frm_time && + ps_frame_time->u4_tgt_frm_time >= (ps_frame_time->u4_src_frm_time + + ps_frame_time->u4_src_frm_time_incr)) + { + skip_src = 1; + } + + /* source time gets updated every frame */ + ps_frame_time->u4_src_frm_time += ps_frame_time->u4_src_frm_time_incr; + + /* target time gets updated only when the source is coded */ + if (!skip_src) + { + ps_frame_time->u4_tgt_frm_time += ps_frame_time->u4_tgt_frm_time_incr; + } + + /* If the source and target frame times get incremented properly + both should be equal to the common time base at the same time. If + that happens we reset the time to zero*/ + if (( ps_frame_time->common_time_base ==(WORD32)ps_frame_time->u4_src_frm_time) + && (ps_frame_time->common_time_base ==(WORD32) ps_frame_time->u4_tgt_frm_time )) + { + ps_frame_time->u4_src_frm_time = 0; + ps_frame_time->u4_tgt_frm_time = 0; + } + + /* This keeps a count of how many frames need not be skipped in order + to take care of the delta time stamp */ + ps_frame_time->u4_num_frms_dont_skip += (u4_delta_time_stamp - 1); + + /** If this frame is to be skipped in order to maintain the tgt_frm_rate + check if already a frame has been skipped by the application. + In that case, do not skip this frame **/ + if (ps_frame_time->u4_num_frms_dont_skip && skip_src) + { + skip_src = 0; + *pu4_frm_not_skipped_for_dts = 1; + ps_frame_time->u4_num_frms_dont_skip -= 1; + } + else + { + pu4_frm_not_skipped_for_dts[0] = 0; + } + + return (skip_src); +} + +/** +******************************************************************************* +* +* @brief +* Function to inititialize time stamp memtabs +* +* @par Description +* Function to initialize time stamp memtabs +* +* @param[in] pps_time_stamp +* Pointer to time stamp context +* +* @param[in] ps_memtab +* Pointer to memtab +* +* @param[in] e_func_type +* Funcion type (Get memtab/ init memtab) +* +* @returns +* number of memtabs used +* +* @remarks +* +******************************************************************************* +*/ +WORD32 ih264e_time_stamp_get_init_free_memtab(time_stamp_handle *pps_time_stamp, + itt_memtab_t *ps_memtab, + ITT_FUNC_TYPE_E e_func_type) +{ + WORD32 i4_mem_tab_idx = 0; + static time_stamp_t s_temp_time_stamp_t; + + /* Hack for al alloc, during which we dont have any state memory. + Dereferencing can cause issues */ + if (e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB) + (*pps_time_stamp) = &s_temp_time_stamp_t; + + /* for src rate control state structure */ + if (e_func_type != GET_NUM_MEMTAB) + { + fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(time_stamp_t), + ALIGN_128_BYTE, PERSISTENT, DDR); + use_or_fill_base(&ps_memtab[0], (void**) pps_time_stamp, e_func_type); + } + i4_mem_tab_idx++; + + return (i4_mem_tab_idx); +} + +/** +******************************************************************************* +* +* @brief +* Function to initialize time stamp context +* +* @par Description +* Time stamp structure stores the time stamp data that +* needs to be sent in to the header of MPEG4. Based on the +* max target frame rate the vop_time increment resolution is set +* so as to support all the frame rates below max frame rate. +* A support till the third decimal point is assumed. +* +* @param[in] ps_time_stamp +* Pointer to time stamp structure +* +* @param[in] u4_max_frm_rate +* Maximum frame rate +* +* @param[in] u4_src_frm_rate +* Source frame rate +* +* @returns +* none +* +* @remarks +* +******************************************************************************* +*/ +void ih264e_init_time_stamp(time_stamp_t *ps_time_stamp, + UWORD32 u4_max_frm_rate, + UWORD32 u4_src_frm_rate) +{ + /* We expect the max frame rate to be less than 60000, + * if not we divide it by zero and work with it */ + if (u4_max_frm_rate > 60000) + { + u4_max_frm_rate >>= 1; + ps_time_stamp->is_max_frame_rate_scaled = 1; + } + else + { + ps_time_stamp->is_max_frame_rate_scaled = 0; + } + + ps_time_stamp->u4_vop_time_incr_res = u4_max_frm_rate; + ps_time_stamp->u4_vop_time_incr_range = ih264e_get_range(u4_max_frm_rate, 32); + ps_time_stamp->u4_vop_time_incr = (ps_time_stamp->u4_vop_time_incr_res * 1000) / u4_src_frm_rate;/* Since frm rate is in millisec */ + ps_time_stamp->u4_vop_time = 0; + ps_time_stamp->u4_cur_tgt_vop_time = 0; + ps_time_stamp->u4_prev_tgt_vop_time = 0; +} + +/** +******************************************************************************* +* +* @brief Function to update time stamp context +* +* @par Description +* Vop time is incremented by increment value. When vop time goes +* more than the vop time resolution set the modulo time base to +* 1 and reduce the vop time by vop time resolution so that the +* excess value is present in vop time and get accumulated over time +* so that the corresponding frame rate is achieved at a average of +* 1000 seconds +* +* @param[in] ps_time_stamp +* Pointer to time stamp structure +* +* @returns +* none +* +* @remarks +* +******************************************************************************* +*/ +void ih264e_update_time_stamp(time_stamp_t *ps_time_stamp) +{ + /* Since get time stamp is called after the update + A copy of the vop time and the modulo time is stored */ + ps_time_stamp->u4_cur_tgt_vop_time = ps_time_stamp->u4_vop_time; + + ps_time_stamp->u4_vop_time += ps_time_stamp->u4_vop_time_incr; + if (ps_time_stamp->u4_vop_time >= ps_time_stamp->u4_vop_time_incr_res) + { + ps_time_stamp->u4_vop_time -= ps_time_stamp->u4_vop_time_incr_res; + } +} + +/**************************************************************************** + Run-Time Modifying functions +****************************************************************************/ + +/** +******************************************************************************* +* +* @brief Function to get source frame rate +* +* @par Description +* Function to get source frame rate +* +* @param[in] ps_frame_time +* Pointer to frame time context +* +* @returns +* source frame rate +* +* @remarks +* +******************************************************************************* +*/ +WORD32 ih264e_frame_time_get_src_frame_rate(frame_time_t *ps_frame_time) +{ + return (ps_frame_time->common_time_base / ps_frame_time->u4_src_frm_time_incr); +} + +/** +******************************************************************************* +* +* @brief Function to get target frame rate +* +* @par Description +* Function to get target frame rate +* +* @param[in] ps_frame_time +* Pointer to frame time context +* +* @returns +* target frame rate +* +* @remarks +* +******************************************************************************* +*/ +WORD32 ih264e_frame_time_get_tgt_frame_rate(frame_time_t *ps_frame_time) +{ + return (ps_frame_time->common_time_base / ps_frame_time->u4_tgt_frm_time_incr); +} + +/** +******************************************************************************* +* +* @brief Function to get source time increment +* +* @par Description +* Function to get source time increment +* +* @param[in] ps_frame_time +* Pointer to frame time context +* +* @returns +* source time increment +* +* @remarks +* +******************************************************************************* +*/ +WORD32 ih264e_frame_time_get_src_ticks(frame_time_t *ps_frame_time) +{ + return (ps_frame_time->u4_src_frm_time_incr); +} + +/** +******************************************************************************* +* +* @brief Function to get target time increment +* +* @par Description +* Function to get target time increment +* +* @param[in] ps_frame_time +* Pointer to frame time context +* +* @returns +* target time increment +* +* @remarks +* +******************************************************************************* +*/ +WORD32 ih264e_frame_time_get_tgt_ticks(frame_time_t *ps_frame_time) +{ + return (ps_frame_time->u4_tgt_frm_time_incr); +} + +/** +******************************************************************************* +* +* @brief Function to get src frame time +* +* @par Description +* Function to get src frame time +* +* @param[in] ps_frame_time +* Pointer to frame time context +* +* @returns +* src frame time +* +* @remarks +* +******************************************************************************* +*/ +WORD32 ih264e_frame_time_get_src_time(frame_time_t *frame_time) +{ + return (frame_time->u4_src_frm_time); +} + +/** +******************************************************************************* +* +* @brief Function to get tgt frame time +* +* @par Description +* Function to get tgt frame time +* +* @param[in] ps_frame_time +* Pointer to frame time context +* +* @returns +* tgt frame time +* +* @remarks +* +******************************************************************************* +*/ +WORD32 ih264e_frame_time_get_tgt_time(frame_time_t *frame_time) +{ + return (frame_time->u4_tgt_frm_time); +} + +/** +******************************************************************************* +* +* @brief Function to update source frame time with a new source frame rate +* +* @par Description +* Function to update source frame time with a new source frame rate +* +* @param[in] ps_frame_time +* Pointer to frame time context +* +* @param[in] src_frm_rate +* source frame rate +* +* @returns +* None +* +* @remarks +* +******************************************************************************* +*/ +void ih264e_frame_time_update_src_frame_rate(frame_time_t *ps_frame_time, + WORD32 src_frm_rate) +{ + /* Since tgt frame rate does not change deriving the tgt_frm rate from + * common_time_base */ + WORD32 tgt_frm_rate = ps_frame_time->common_time_base / ps_frame_time->u4_tgt_frm_time_incr; + + /* Re-initialise frame_time based on the new src_frame_rate and + * old tgt_frame_rate */ + ih264e_init_frame_time(ps_frame_time, src_frm_rate, tgt_frm_rate); +} + +/** +******************************************************************************* +* +* @brief Function to update target frame time with a new source frame rate +* +* @par Description +* Function to update target frame time with a new source frame rate +* +* @param[in] ps_frame_time +* Pointer to frame time context +* +* @param[in] tgt_frm_rate +* target frame rate +* +* @returns +* None +* +* @remarks +* +******************************************************************************* +*/ +void ih264e_frame_time_update_tgt_frame_rate(frame_time_t *ps_frame_time, + WORD32 tgt_frm_rate) +{ + /* Since src frame rate does not change deriving the src_frm rate from + * common_time_base */ + WORD32 src_frm_rate = ps_frame_time->common_time_base / ps_frame_time->u4_src_frm_time_incr; + + /* Re-initialise frame_time based on the new tgt_frame_rate and + * old src_frame_rate */ + ih264e_init_frame_time(ps_frame_time, src_frm_rate, tgt_frm_rate); +} + +/** +******************************************************************************* +* +* @brief Function to update target frame time with a new source frame rate +* +* @par Description +* When the frame rate changes the time increment is modified by appropriate ticks +* +* @param[in] ps_time_stamp +* Pointer to time stamp structure +* +* @param[in] src_frm_rate +* source frame rate +* +* @returns +* None +* +* @remarks +* +******************************************************************************* +*/ +void ih264_time_stamp_update_frame_rate(time_stamp_t *ps_time_stamp, + UWORD32 src_frm_rate) +{ + ps_time_stamp->u4_vop_time_incr = (ps_time_stamp->u4_vop_time_incr_res * 1000) / src_frm_rate;/* Since frm rate is in millisec */ +} diff --git a/encoder/ih264e_time_stamp.h b/encoder/ih264e_time_stamp.h new file mode 100755 index 0000000..1ee559d --- /dev/null +++ b/encoder/ih264e_time_stamp.h @@ -0,0 +1,498 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +******************************************************************************* +* @file +* ih264e_time_stamp.h +* +* @brief +* This file contains function declarations used for managing input and output +* frame time stamps +* +* @author +* ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef IH264E_TIME_STAMP_H_ +#define IH264E_TIME_STAMP_H_ + +/*****************************************************************************/ +/* Structures */ +/*****************************************************************************/ + +/** + * Parameters for Src/Tgt frames that are encoded + */ +typedef struct frame_time_t +{ + /* common time base(=LCM) between source and target frame rate (in ticks)*/ + WORD32 common_time_base; + + /* number of ticks between two source frames */ + UWORD32 u4_src_frm_time_incr; + + /* number of ticks between two target frames */ + UWORD32 u4_tgt_frm_time_incr; + + /* Source frame time - measured as modulo of common time base + and incremented by src_frm_time_incr */ + UWORD32 u4_src_frm_time; + + /* Target frame time - measured as modulo of common time base + and incremented by tgt_frm_time_incr */ + UWORD32 u4_tgt_frm_time; + + /* Number of frames not to be skipped while maintaining + tgt_frm_rate due to delta_time_stamp */ + UWORD32 u4_num_frms_dont_skip; +}frame_time_t; + +typedef struct frame_time_t *frame_time_handle; + +/** + * Parameters that go in the bitstream based on tgt_frm_rate + * 1) Initialize the vop_time_incr_res with the max_frame_rate (in frames per 1000 bits) + * - To represent all kinds of frame rates + * 2) Decide the vop_time_incr based on the source frame rate + * - The decoder would like to know which source frame is encoded i.e. the source time + * id of the target frame encoded and there by adjusting its time of delay + * 3) vop_time increments every source frame and whenever a frame is encoded (target frame), + * the encoder queries the vop time of the source frame and sends it in the bit stream. + * 4) Since the Source frame skip logic is taken care by the frame_time module, whenever the + * encoder queries the time stamp module (which gets updated outside the encoder) the + * time stamp module would have the source time + */ +typedef struct time_stamp_t +{ + /*vop_time_incr_res is a integer that indicates + the number of evenly spaced subintervals, called ticks, + within one modulo time. */ + UWORD32 u4_vop_time_incr_res; + + /* number of bits to represent vop_time_incr_res */ + UWORD32 u4_vop_time_incr_range; + + /* The number of ticks elapsed between two source vops */ + UWORD32 u4_vop_time_incr; + + /* incremented by vop_time_incr for every source frame. + Represents the time offset after a modulo_time_base = 1 is sent + in bit stream*/ + UWORD32 u4_vop_time; + + /* A temporary buffer to copy of vop time and modulo time base + is stored since update is called before query (get time stamp) and + so these extra variables cur_tgt_vop_time, */ + UWORD32 u4_cur_tgt_vop_time; + + UWORD32 u4_prev_tgt_vop_time; + + /* This variable is set to 1 if we scale max frame rate by a factor of 2. + For mpeg4 standard, we just have 16bits and we can't accommodate more than 60000 as frame rate. + So we scale it and work with it */ + WORD32 is_max_frame_rate_scaled; +} time_stamp_t; + +typedef struct time_stamp_t *time_stamp_handle; + +/*****************************************************************************/ +/* Extern function declarations */ +/*****************************************************************************/ + +/** +******************************************************************************* +* +* @brief +* Function to init frame time context +* +* @par Description +* Frame time structure stores the time of the source and the target frames to +* be encoded. Based on the time we decide whether or not to encode the source +* frame +* +* @param[in] ps_frame_time +* Pointer Frame time context +* +* @param[in] u4_src_frm_rate +* Source frame rate +* +* @param[in] u4_tgt_frm_rate +* Target frame rate +* +* @returns +* none +* +* @remarks +* +******************************************************************************* +*/ +void ih264e_init_frame_time(frame_time_t *ps_frame_time, + UWORD32 u4_src_frm_rate, + UWORD32 u4_tgt_frm_rate); + +/** +******************************************************************************* +* +* @brief +* Function to check if frame can be skipped +* +* @par Description +* Based on the source and target frame time and the delta time stamp +* we decide whether to code the source or not. +* This is based on the assumption +* that the source frame rate is greater that target frame rate. +* Updates the time_stamp structure +* +* @param[in] ps_frame_time +* Handle to frame time context +* +* @param[in] u4_delta_time_stamp +* Time stamp difference between frames +* +* @param[out] pu4_frm_not_skipped_for_dts +* Flag to indicate if frame is already skipped by application +* +* @returns +* Flag to skip frame +* +* @remarks +* +******************************************************************************* +*/ +UWORD8 ih264e_should_src_be_skipped(frame_time_t *ps_frame_time, + UWORD32 u4_delta_time_stamp, + UWORD32 *pu4_frm_not_skipped_for_dts); + +/** +******************************************************************************* +* +* @brief +* Function to initialize time stamp context +* +* @par Description +* Time stamp structure stores the time stamp data that +* needs to be sent in to the header of MPEG4. Based on the +* max target frame rate the vop_time increment resolution is set +* so as to support all the frame rates below max frame rate. +* A support till the third decimal point is assumed. +* +* @param[in] ps_time_stamp +* Pointer to time stamp structure +* +* @param[in] u4_max_frm_rate +* Maximum frame rate +* +* @param[in] u4_src_frm_rate +* Source frame rate +* +* @returns +* none +* +* @remarks +* +******************************************************************************* +*/ +void ih264e_init_time_stamp(time_stamp_handle time_stamp, + UWORD32 max_frm_rate, + UWORD32 src_frm_rate); + +/** +******************************************************************************* +* +* @brief Function to update time stamp context +* +* @par Description +* Vop time is incremented by increment value. When vop time goes +* more than the vop time resolution set the modulo time base to +* 1 and reduce the vop time by vop time resolution so that the +* excess value is present in vop time and get accumulated over time +* so that the corresponding frame rate is achieved at a average of +* 1000 seconds +* +* @param[in] ps_time_stamp +* Pointer to time stamp structure +* +* @returns +* none +* +* @remarks +* +******************************************************************************* +*/ +void ih264e_update_time_stamp(time_stamp_handle time_stamp); + +/** +******************************************************************************* +* +* @brief +* Function to init frame time memtabs +* +* @par Description +* Function to init frame time memtabs +* +* @param[in] pps_frame_time +* Pointer to frame time contexts +* +* @param[in] ps_memtab +* Pointer to memtab +* +* @param[in] e_func_type +* Function type (get memtabs/init memtabs) +* +* @returns +* none +* +* @remarks +* +******************************************************************************* +*/ +WORD32 ih264e_frame_time_get_init_free_memtab(frame_time_handle *pps_frame_time, + itt_memtab_t *ps_memtab, + ITT_FUNC_TYPE_E e_func_type); + +/** +******************************************************************************* +* +* @brief +* Function to initialize time stamp memtabs +* +* @par Description +* Function to initialize time stamp memtabs +* +* @param[in] pps_time_stamp +* Pointer to time stamp context +* +* @param[in] ps_memtab +* Pointer to memtab +* +* @param[in] e_func_type +* Funcion type (Get memtab/ init memtab) +* +* @returns +* number of memtabs used +* +* @remarks +* +******************************************************************************* +*/ +WORD32 ih264e_time_stamp_get_init_free_memtab(time_stamp_handle *pps_time_stamp, + itt_memtab_t *ps_memtab, + ITT_FUNC_TYPE_E e_func_type); + +/**************************************************************************** + Run-Time Modifying functions +****************************************************************************/ +/** +******************************************************************************* +* +* @brief Function to get source frame rate +* +* @par Description +* Function to get source frame rate +* +* @param[in] ps_frame_time +* Pointer to frame time context +* +* @returns +* source frame rate +* +* @remarks +* +******************************************************************************* +*/ +WORD32 ih264e_frame_time_get_src_frame_rate(frame_time_t *ps_frame_time); + +/** +******************************************************************************* +* +* @brief Function to get target frame rate +* +* @par Description +* Function to get target frame rate +* +* @param[in] ps_frame_time +* Pointer to frame time context +* +* @returns +* target frame rate +* +* @remarks +* +******************************************************************************* +*/ +WORD32 ih264e_frame_time_get_tgt_frame_rate(frame_time_t *ps_frame_time); + +/** +******************************************************************************* +* +* @brief Function to get source time increment +* +* @par Description +* Function to get source time increment +* +* @param[in] ps_frame_time +* Pointer to frame time context +* +* @returns +* source time increment +* +* @remarks +* +******************************************************************************* +*/ +WORD32 ih264e_frame_time_get_src_ticks(frame_time_t *ps_frame_time); + +/** +******************************************************************************* +* +* @brief Function to get target time increment +* +* @par Description +* Function to get target time increment +* +* @param[in] ps_frame_time +* Pointer to frame time context +* +* @returns +* target time increment +* +* @remarks +* +******************************************************************************* +*/ +WORD32 ih264e_frame_time_get_tgt_ticks(frame_time_t *ps_frame_time); + +/** +******************************************************************************* +* +* @brief Function to get src frame time +* +* @par Description +* Function to get src frame time +* +* @param[in] ps_frame_time +* Pointer to frame time context +* +* @returns +* src frame time +* +* @remarks +* +******************************************************************************* +*/ +WORD32 ih264e_frame_time_get_src_time(frame_time_t *frame_time); + +/** +******************************************************************************* +* +* @brief Function to get tgt frame time +* +* @par Description +* Function to get tgt frame time +* +* @param[in] ps_frame_time +* Pointer to frame time context +* +* @returns +* tgt frame time +* +* @remarks +* +******************************************************************************* +*/ +WORD32 ih264e_frame_time_get_tgt_time(frame_time_t *frame_time); + +/** +******************************************************************************* +* +* @brief Function to update source frame time with a new source frame rate +* +* @par Description +* Function to update source frame time with a new source frame rate +* +* @param[in] ps_frame_time +* Pointer to frame time context +* +* @param[in] src_frm_rate +* source frame rate +* +* @returns +* None +* +* @remarks +* +******************************************************************************* +*/ +void ih264e_frame_time_update_src_frame_rate(frame_time_t *ps_frame_time, WORD32 src_frm_rate); + +/** +******************************************************************************* +* +* @brief Function to update target frame time with a new source frame rate +* +* @par Description +* Function to update target frame time with a new source frame rate +* +* @param[in] ps_frame_time +* Pointer to frame time context +* +* @param[in] tgt_frm_rate +* target frame rate +* +* @returns +* None +* +* @remarks +* +******************************************************************************* +*/ +void ih264e_frame_time_update_tgt_frame_rate(frame_time_t *ps_frame_time, WORD32 tgt_frm_rate); + +/** +******************************************************************************* +* +* @brief Function to update target frame time with a new source frame rate +* +* @par Description +* When the frame rate changes the time increment is modified by appropriate ticks +* +* @param[in] ps_time_stamp +* Pointer to time stamp structure +* +* @param[in] src_frm_rate +* source frame rate +* +* @returns +* None +* +* @remarks +* +******************************************************************************* +*/ +void ih264_time_stamp_update_frame_rate(time_stamp_t *ps_time_stamp, UWORD32 src_frm_rate); + +#endif /*IH264E_TIME_STAMP_H_*/ + diff --git a/encoder/ih264e_trace.h b/encoder/ih264e_trace.h new file mode 100755 index 0000000..8134524 --- /dev/null +++ b/encoder/ih264e_trace.h @@ -0,0 +1,161 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +****************************************************************************** +* @file +* ih264e_trace.h +* +* @brief +* This file contains extern declarations of routines that could be helpful +* for debugging purposes. +* +* @author +* ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef IH264E_TRACE_H_ +#define IH264E_TRACE_H_ + +#if ENABLE_TRACE +/*****************************************************************************/ +/* Structures */ +/*****************************************************************************/ + +/** +****************************************************************************** + * @brief Data for the trace functionality +****************************************************************************** + */ +typedef struct +{ + /** + * fp + */ + FILE *fp; +}enc_trace_t; + +/*****************************************************************************/ +/* Extern variable declarations */ +/*****************************************************************************/ +extern enc_trace_t g_enc_trace; + +/*****************************************************************************/ +/* Constant Macros */ +/*****************************************************************************/ + +/** +****************************************************************************** + * @brief defines flag used for enabling trace +****************************************************************************** + */ + + +/*****************************************************************************/ +/* Function Macros */ +/*****************************************************************************/ + +/** +****************************************************************************** + * @brief Macro to print trace messages +****************************************************************************** + */ + +#define ENTROPY_TRACE(syntax_string, value) \ + { \ + if(g_enc_trace.fp) \ + { \ + fprintf( g_enc_trace.fp, "%-40s : %d\n", syntax_string, value ); \ + fflush ( g_enc_trace.fp); \ + } \ + } + + +/** +****************************************************************************** + * @brief Macro to print CABAC trace messages +****************************************************************************** + */ + +#define AEV_TRACE(string, value, range) \ + if(range && g_enc_trace.fp) \ + { \ + fprintf( g_enc_trace.fp, "%-40s:%8d R:%d\n", string, value, range); \ + fflush ( g_enc_trace.fp); \ + } + +#else + +/* Dummy macros when trace is disabled */ +#define ENTROPY_TRACE(syntax_string, value) + +#define AEV_TRACE(string, value, range) + +#endif + + +/*****************************************************************************/ +/* Extern Function Declarations */ +/*****************************************************************************/ + + +/** +****************************************************************************** +* +* @brief Dummy trace init when trace is disabled in encoder +* +* @par Description +* This routine needs to be called at start of trace +* +* @param[in] pu1_file_name +* Name of file where trace outputs need to be stores (handle) +* +* @return success or failure error code +* +****************************************************************************** +*/ +extern WORD32 ih264e_trace_init + ( + const char *pu1_file_name + ); + +/** +****************************************************************************** +* +* @brief Dummy trace de-init function when trace is disabled +* +* @par Description +* This routine needs to be called at end of trace +* +* @return success or failure error code +* +****************************************************************************** +*/ +extern WORD32 ih264e_trace_deinit + ( + void + ); + +#endif // IH264E_TRACE_H_ diff --git a/encoder/ih264e_trace_support.h b/encoder/ih264e_trace_support.h new file mode 100755 index 0000000..c35bd4f --- /dev/null +++ b/encoder/ih264e_trace_support.h @@ -0,0 +1,61 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +******************************************************************************* +* @file +* ih264e_trace_support.h +* +* @brief +* This file contains extern declarations of routines that could be helpful +* for debugging purposes. +* +* @author +* Harish +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef TRACE_SUPPORT_H_ +#define TRACE_SUPPORT_H_ + +/*****************************************************************************/ +/* Structures */ +/*****************************************************************************/ + +typedef struct +{ + WORD8 * pu1_buf; + WORD32 i4_offset; + WORD32 i4_max_size; +}trace_support_t; + +/*****************************************************************************/ +/* Extern function declarations */ +/*****************************************************************************/ + +void init_trace_support(WORD8 *pu1_buf, WORD32 i4_size); + +int trace_printf(const WORD8 *format, ...); + +#endif // TRACE_SUPPORT_H_ diff --git a/encoder/ih264e_utils.c b/encoder/ih264e_utils.c new file mode 100755 index 0000000..f0086cb --- /dev/null +++ b/encoder/ih264e_utils.c @@ -0,0 +1,1804 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +******************************************************************************* +* @file +* ih264e_utils.c +* +* @brief +* Contains miscellaneous utility functions used by the encoder +* +* @author +* ittiam +* +* @par List of Functions: +* - ih264e_get_min_level() +* - ih264e_get_lvl_idx() +* - ih264e_get_dpb_size() +* - ih264e_get_total_pic_buf_size() +* - ih264e_get_pic_mv_bank_size() +* - ih264e_pic_buf_mgr_add_bufs() +* - ih264e_mv_buf_mgr_add_bufs() +* - ih264e_init_quant_params() +* - ih264e_init_air_map() +* - ih264e_codec_init() +* - ih264e_pic_init() +* +* @remarks +* None +* +******************************************************************************* +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* system include files */ +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> + +/* user include files */ +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" +#include "ih264e.h" +#include "ithread.h" +#include "ih264_defs.h" +#include "ih264_size_defs.h" +#include "ime_distortion_metrics.h" +#include "ime_structs.h" +#include "ih264_defs.h" +#include "ih264_error.h" +#include "ih264_structs.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include "ih264_inter_pred_filters.h" +#include "ih264_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" +#include "ih264_macros.h" +#include "ih264_common_tables.h" +#include "ih264_debug.h" +#include "ih264_trans_data.h" +#include "ih264e_defs.h" +#include "ih264e_globals.h" +#include "ih264_buf_mgr.h" +#include "ih264_dpb_mgr.h" +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "ih264e_rate_control.h" +#include "ih264e_structs.h" +#include "ih264e_utils.h" +#include "ih264e_config.h" +#include "ih264e_statistics.h" +#include "ih264e_trace.h" +#include "ih264_list.h" +#include "ih264e_encode_header.h" +#include "ih264e_me.h" +#include "ime_defs.h" +#include "ime.h" +#include "ih264e_rate_control.h" +#include "ih264e_core_coding.h" +#include "ih264e_rc_mem_interface.h" +#include "ih264e_time_stamp.h" +#include "ih264e_debug.h" +#include "ih264e_process.h" +#include "ih264e_master.h" +#include "irc_rate_control_api.h" +#include "ime_statistics.h" + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + +/** +******************************************************************************* +* +* @brief +* Used to get minimum level index for a given picture size +* +* @par Description: +* Gets the minimum level index and then gets corresponding level. +* Also used to ignore invalid levels like 2.3, 3.3 etc +* +* @param[in] level +* Level of the stream +* +* @returns Level index for a given level +* +* @remarks +* +******************************************************************************* +*/ +WORD32 ih264e_get_min_level(WORD32 pic_size) +{ + WORD32 lvl_idx = MAX_LEVEL, i; + + for (i = 0; i < MAX_LEVEL; i++) + { + if (pic_size <= gai4_ih264_max_luma_pic_size[i]) + { + lvl_idx = i; + break; + } + } + + return gai4_ih264_levels[lvl_idx]; +} + +/** +******************************************************************************* +* +* @brief +* Used to get level index for a given level +* +* @par Description: +* Converts from level_idc (which is multiplied by 30) to an index that can be +* used as a lookup. Also used to ignore invalid levels like 2.2 , 3.2 etc +* +* @param[in] level +* Level of the stream +* +* @returns Level index for a given level +* +* @remarks +* +******************************************************************************* +*/ +WORD32 ih264e_get_lvl_idx(WORD32 level) +{ + WORD32 lvl_idx = 0; + + if (level < IH264_LEVEL_11) + { + lvl_idx = 0; + } + else if (level < IH264_LEVEL_12) + { + lvl_idx = 1; + } + else if (level < IH264_LEVEL_13) + { + lvl_idx = 2; + } + else if (level < IH264_LEVEL_20) + { + lvl_idx = 3; + } + else if (level < IH264_LEVEL_21) + { + lvl_idx = 4; + } + else if (level < IH264_LEVEL_22) + { + lvl_idx = 5; + } + else if (level < IH264_LEVEL_30) + { + lvl_idx = 6; + } + else if (level < IH264_LEVEL_31) + { + lvl_idx = 7; + } + else if (level < IH264_LEVEL_32) + { + lvl_idx = 8; + } + else if (level < IH264_LEVEL_40) + { + lvl_idx = 9; + } + else if (level < IH264_LEVEL_41) + { + lvl_idx = 10; + } + else if (level < IH264_LEVEL_42) + { + lvl_idx = 11; + } + else if (level < IH264_LEVEL_50) + { + lvl_idx = 12; + } + + return (lvl_idx); +} + +/** +******************************************************************************* +* +* @brief returns maximum number of pictures allowed in dpb for a given level +* +* @par Description: +* For given width, height and level, number of pictures allowed in decoder +* picture buffer is computed as per Annex A.3.1 +* +* @param[in] level +* level of the bit-stream +* +* @param[in] pic_size +* width * height +* +* @returns Number of buffers in DPB +* +* @remarks +* From annexure A.3.1 of H264 specification, +* max_dec_frame_buffering <= MaxDpbSize, where MaxDpbSize is equal to +* Min( 1024 * MaxDPB / ( PicWidthInMbs * FrameHeightInMbs * 384 ), 16 ) and +* MaxDPB is given in Table A-1 in units of 1024 bytes. However the MaxDPB size +* presented in the look up table gas_ih264_lvl_tbl is in units of 512 +* bytes. Hence the expression is modified accordingly. +* +******************************************************************************* +*/ +WORD32 ih264e_get_dpb_size(WORD32 level, WORD32 pic_size) +{ + /* dpb size */ + WORD32 max_dpb_size_bytes = 0; + + /* dec frame buffering */ + WORD32 max_dpb_size_frames = 0; + + /* temp var */ + WORD32 i; + + /* determine max luma samples */ + for (i = 0; i < 16; i++) + if (level == (WORD32)gas_ih264_lvl_tbl[i].u4_level_idc) + max_dpb_size_bytes = gas_ih264_lvl_tbl[i].u4_max_dpb_size; + + /* from Annexure A.3.1 h264 specification */ + max_dpb_size_frames = + MIN( 1024 * max_dpb_size_bytes / ( pic_size * 3 ), MAX_DPB_SIZE ); + + return max_dpb_size_frames; +} + +/** +******************************************************************************* +* +* @brief +* Used to get reference picture buffer size for a given level and +* and padding used +* +* @par Description: +* Used to get reference picture buffer size for a given level and padding used +* Each picture is padded on all four sides +* +* @param[in] pic_size +* Number of luma samples (Width * Height) +* +* @param[in] level +* Level +* +* @param[in] horz_pad +* Total padding used in horizontal direction +* +* @param[in] vert_pad +* Total padding used in vertical direction +* +* @returns Total picture buffer size +* +* @remarks +* +* +******************************************************************************* +*/ +WORD32 ih264e_get_total_pic_buf_size(WORD32 pic_size, + WORD32 level, + WORD32 horz_pad, + WORD32 vert_pad, + WORD32 num_ref_frames, + WORD32 num_reorder_frames) +{ + WORD32 size; + WORD32 num_luma_samples; + WORD32 lvl_idx; + WORD32 max_wd, min_ht; + WORD32 num_samples; + WORD32 max_num_bufs; + WORD32 pad = MAX(horz_pad, vert_pad); + UNUSED(pic_size); + /* + * If num_ref_frames and num_reorder_frmaes is specified + * Use minimum value + */ + max_num_bufs = (num_ref_frames + num_reorder_frames + MAX_CTXT_SETS); + + /* Get level index */ + lvl_idx = ih264e_get_lvl_idx(level); + + /* Maximum number of luma samples in a picture at given level */ + num_luma_samples = gai4_ih264_max_luma_pic_size[lvl_idx]; + + /* Account for chroma */ + num_samples = num_luma_samples * 3 / 2; + + /* Maximum width of luma samples in a picture at given level */ + max_wd = gai4_ih264_max_wd_ht[lvl_idx]; + + /* Minimum height of luma samples in a picture at given level */ + min_ht = gai4_ih264_min_wd_ht[lvl_idx]; + + /* Allocation is required for + * (Wd + horz_pad) * (Ht + vert_pad) * (2 * max_dpb_size + 1) + * + * Above expanded as + * ((Wd * Ht) + (horz_pad * vert_pad) + Wd * vert_pad + Ht * horz_pad) * (2 * max_dpb_size + 1) + * (Wd * Ht) * (2 * max_dpb_size + 1) + ((horz_pad * vert_pad) + Wd * vert_pad + Ht * horz_pad) * (2 * max_dpb_size + 1) + * Now max_dpb_size increases with smaller Wd and Ht, but Wd * ht * max_dpb_size will still be lesser or equal to max_wd * max_ht * dpb_size + * + * In the above equation (Wd * Ht) * (2 * max_dpb_size + 1) is accounted by using num_samples * (2 * max_dpb_size + 1) below + * + * For the padded area use MAX(horz_pad, vert_pad) as pad + * ((pad * pad) + pad * (Wd + Ht)) * (2 * max_dpb_size + 1) has to accounted from the above for padding + * + * Since Width and Height can change worst Wd + Ht is when One of the dimensions is max and other is min + * So use max_wd and min_ht + */ + + /* Number of bytes in reference pictures */ + size = num_samples * max_num_bufs; + + /* Account for padding area */ + size += ((pad * pad) + pad * (max_wd + min_ht)) * max_num_bufs; + + return size; +} + +/** +******************************************************************************* +* +* @brief Returns MV bank buffer size for a given number of luma samples +* +* @par Description: +* For given number of luma samples one MV bank size is computed. +* Each MV bank includes pu_map and enc_pu_t for all the min PUs(4x4) in a picture +* +* @param[in] num_luma_samples +* Max number of luma pixels in the frame +* +* @returns Total MV Bank size +* +* @remarks +* +******************************************************************************* +*/ +WORD32 ih264e_get_pic_mv_bank_size(WORD32 num_luma_samples) +{ + /* mv bank buffer size */ + WORD32 mv_bank_size = 0; + + /* number of sub mb partitions possible */ + WORD32 num_pu = num_luma_samples / (MIN_PU_SIZE * MIN_PU_SIZE); + + /* number of mbs */ + WORD32 num_mb = num_luma_samples / (MB_SIZE * MB_SIZE); + + /* Size for storing enc_pu_t start index each MB */ + /* One extra entry is needed to compute number of PUs in the last MB */ + mv_bank_size += num_mb * sizeof(WORD32); + + /* Size for pu_map */ + mv_bank_size += num_pu; + + /* Size for storing enc_pu_t for each PU */ + mv_bank_size += num_pu * sizeof(enc_pu_t); + + return mv_bank_size; +} + +/** +******************************************************************************* +* +* @brief +* Function to initialize ps_pic_buf structs add pic buffers to +* buffer manager in case of non-shared mode +* +* @par Description: +* Function to initialize ps_pic_buf structs add pic buffers to +* buffer manager in case of non-shared mode +* To be called once per stream or for every reset +* +* @param[in] ps_codec +* Pointer to codec context +* +* @returns error status +* +* @remarks +* +******************************************************************************* +*/ +IH264E_ERROR_T ih264e_pic_buf_mgr_add_bufs(codec_t *ps_codec) +{ + /* error status */ + IH264E_ERROR_T ret = IH264E_SUCCESS; + + /* max ref buffer cnt */ + WORD32 max_num_bufs = ps_codec->i4_ref_buf_cnt; + + /* total size for pic buffers */ + WORD32 pic_buf_size_allocated = ps_codec->i4_total_pic_buf_size + - BUF_MGR_MAX_CNT * sizeof(pic_buf_t); + + /* temp var */ + UWORD8 *pu1_buf = (UWORD8 *) ps_codec->ps_pic_buf; + pic_buf_t *ps_pic_buf = (pic_buf_t *) ps_codec->ps_pic_buf; + WORD32 i; + + pu1_buf += BUF_MGR_MAX_CNT * sizeof(pic_buf_t); + + /* In case of non-shared mode, add picture buffers to buffer manager + * In case of shared mode, buffers are added in the run-time + */ + { + WORD32 buf_ret; + + WORD32 luma_samples = (ps_codec->i4_rec_strd) + * (ps_codec->s_cfg.u4_ht + PAD_HT); + + WORD32 chroma_samples = luma_samples >> 1; + + /* Try and add as many buffers as possible for the memory that is allocated */ + /* If the number of buffers that can be added is less than max_num_bufs + * return with an error */ + for (i = 0; i < max_num_bufs; i++) + { + pic_buf_size_allocated -= (luma_samples + chroma_samples); + + if (pic_buf_size_allocated < 0) + { + ps_codec->i4_error_code = IH264E_INSUFFICIENT_MEM_PICBUF; + return IH264E_INSUFFICIENT_MEM_PICBUF; + } + + ps_pic_buf->pu1_luma = pu1_buf + ps_codec->i4_rec_strd * PAD_TOP + + PAD_LEFT; + pu1_buf += luma_samples; + + ps_pic_buf->pu1_chroma = pu1_buf + + ps_codec->i4_rec_strd * (PAD_TOP / 2)+ PAD_LEFT; + pu1_buf += chroma_samples; + + buf_ret = ih264_buf_mgr_add((buf_mgr_t *) ps_codec->pv_ref_buf_mgr, + ps_pic_buf, i); + + if (0 != buf_ret) + { + ps_codec->i4_error_code = IH264E_BUF_MGR_ERROR; + return IH264E_BUF_MGR_ERROR; + } + pu1_buf += (HPEL_PLANES_CNT - 1) * (chroma_samples + luma_samples); + ps_pic_buf++; + } + } + + return ret; +} + +/** +******************************************************************************* +* +* @brief Function to add buffers to MV Bank buffer manager +* +* @par Description: +* Function to add buffers to MV Bank buffer manager. To be called once per +* stream or for every reset +* +* @param[in] ps_codec +* Pointer to codec context +* +* @returns error status +* +* @remarks +* +******************************************************************************* +*/ +IH264E_ERROR_T ih264e_mv_buf_mgr_add_bufs(codec_t *ps_codec) +{ + /* error status */ + IH264E_ERROR_T error_status = IH264E_SUCCESS; + IH264_ERROR_T ret; + + /* max dpb size in frames */ + WORD32 max_dpb_size = 0; + + /* mv bank size for the entire dpb */ + WORD32 mv_bank_size_allocated = 0; + + /* mv bank size per pic */ + WORD32 pic_mv_bank_size = 0; + + /* mv buffer ptr */ + mv_buf_t *ps_mv_buf = NULL; + + /* num of luma samples */ + WORD32 num_luma_samples = ALIGN16(ps_codec->s_cfg.u4_wd) + * ALIGN16(ps_codec->s_cfg.u4_ht); + + /* number of mb's & frame partitions */ + WORD32 num_pu, num_mb; + + /* temp var */ + UWORD8 *pu1_buf = NULL; + WORD32 i; + + /* Compute the number of MB Bank buffers needed */ + max_dpb_size = ps_codec->i4_ref_buf_cnt; + + /* allocate memory for mv buffer array */ + ps_codec->ps_mv_buf = ps_codec->pv_mv_bank_buf_base; + pu1_buf = ps_codec->pv_mv_bank_buf_base; + pu1_buf += BUF_MGR_MAX_CNT * sizeof(mv_buf_t); + + /********************************************************************/ + /* allocate memory for individual elements of mv buffer ptr */ + /********************************************************************/ + mv_bank_size_allocated = ps_codec->i4_total_mv_bank_size + - (BUF_MGR_MAX_CNT * sizeof(mv_buf_t)); + + /* compute MV bank size per picture */ + pic_mv_bank_size = ih264e_get_pic_mv_bank_size(num_luma_samples); + + num_pu = num_luma_samples / (MIN_PU_SIZE * MIN_PU_SIZE); + num_mb = num_luma_samples / (MB_SIZE * MB_SIZE); + i = 0; + ps_mv_buf = ps_codec->pv_mv_bank_buf_base; + + while (i < max_dpb_size) + { + mv_bank_size_allocated -= pic_mv_bank_size; + + if (mv_bank_size_allocated < 0) + { + ps_codec->i4_error_code = IH264E_INSUFFICIENT_MEM_MVBANK; + + error_status = IH264E_INSUFFICIENT_MEM_MVBANK; + + return error_status; + } + + ps_mv_buf->pu4_mb_pu_cnt = (UWORD32 *) pu1_buf; + + ps_mv_buf->pu1_pic_pu_map = (pu1_buf + num_mb * sizeof(WORD32)); + + ps_mv_buf->ps_pic_pu = (enc_pu_t *) (pu1_buf + num_mb * sizeof(WORD32) + + num_pu); + + ret = ih264_buf_mgr_add((buf_mgr_t *) ps_codec->pv_mv_buf_mgr, + ps_mv_buf, i); + + if (IH264_SUCCESS != ret) + { + ps_codec->i4_error_code = IH264E_BUF_MGR_ERROR; + error_status = IH264E_BUF_MGR_ERROR; + return error_status; + } + + pu1_buf += pic_mv_bank_size; + ps_mv_buf++; + i++; + } + + return error_status; +} + +/** +******************************************************************************* +* +* @brief Function to initialize quant params structure +* +* @par Description: +* The forward quantization modules depends on qp/6, qp mod 6, forward scale +* matrix, forward threshold matrix, weight list. The inverse quantization +* modules depends on qp/6, qp mod 6, inverse scale matrix, weight list. +* These params are initialized in this function. +* +* @param[in] ps_proc +* pointer to process context +* +* @param[in] qp +* quantization parameter +* +* @returns none +* +* @remarks +* +******************************************************************************* +*/ +void ih264e_init_quant_params(process_ctxt_t *ps_proc, int qp) +{ + /* quant params */ + quant_params_t *ps_qp_params; + + /* ptr to forward quant threshold matrix */ + const UWORD16 *pu2_thres_mat = NULL; + + /* ptr to forward scale matrix */ + const UWORD16 *pu2_scale_mat = gu2_quant_scale_matrix_4x4; + + /* ptr to inverse scale matrix */ + const UWORD16 *pu2_iscale_mat = gau2_ih264_iquant_scale_matrix_4x4; + + /* temp var */ + UWORD32 u4_qp[3], u4_qp_div6, u4_qp_mod6; + COMPONENT_TYPE plane; + WORD32 i; + UWORD32 u4_satdq_t; + const UWORD16 *pu2_smat; + + /********************************************************************/ + /* init quant params for all planes Y, U and V */ + /********************************************************************/ + /* luma qp */ + u4_qp[Y] = qp; + + /* chroma qp + * TODO_LATER : just in case if the chroma planes use different qp's this + * needs to be corrected accordingly. + */ + u4_qp[U] = gu1_qpc_fqpi[qp]; + u4_qp[V] = gu1_qpc_fqpi[qp]; + + plane = Y; + while (plane <= V) + { + u4_qp_div6 = (u4_qp[plane] / 6); + u4_qp_mod6 = (u4_qp[plane] % 6); + + ps_qp_params = ps_proc->ps_qp_params[plane]; + + /* mb qp */ + ps_qp_params->u1_mb_qp = u4_qp[plane]; + + /* mb qp / 6 */ + ps_qp_params->u1_qp_div = u4_qp_div6; + + /* mb qp % 6 */ + ps_qp_params->u1_qp_rem = u4_qp_mod6; + + /* QP bits */ + ps_qp_params->u1_qbits = QP_BITS_h264_4x4 + u4_qp_div6; + + /* forward scale matrix */ + ps_qp_params->pu2_scale_mat = pu2_scale_mat + (u4_qp_mod6 * 16); + + /* threshold matrix & weight for quantization */ + pu2_thres_mat = gu2_forward_quant_threshold_4x4 + (u4_qp_mod6 * 16); + for (i = 0; i < 16; i++) + { + ps_qp_params->pu2_thres_mat[i] = pu2_thres_mat[i] + >> (8 - u4_qp_div6); + ps_qp_params->pu2_weigh_mat[i] = 16; + } + + /* qp dependent rounding constant */ + ps_qp_params->u4_dead_zone = + gu4_forward_quant_round_factor_4x4[u4_qp_div6]; + + /* slice dependent rounding constant */ + if (ps_proc->i4_slice_type != ISLICE + && ps_proc->i4_slice_type != SISLICE) + { + ps_qp_params->u4_dead_zone >>= 1; + } + + /* SATQD threshold for zero block prediction */ + if (ps_proc->ps_codec->s_cfg.u4_enable_satqd) + { + pu2_smat = ps_qp_params->pu2_scale_mat; + + u4_satdq_t = ((1 << (ps_qp_params->u1_qbits)) - ps_qp_params->u4_dead_zone); + + ps_qp_params->pu2_sad_thrsh[0] = u4_satdq_t / MAX(pu2_smat[3], pu2_smat[11]); + ps_qp_params->pu2_sad_thrsh[1] = u4_satdq_t / MAX(pu2_smat[1], pu2_smat[9]); + ps_qp_params->pu2_sad_thrsh[2] = u4_satdq_t / pu2_smat[15]; + ps_qp_params->pu2_sad_thrsh[3] = u4_satdq_t / pu2_smat[7]; + ps_qp_params->pu2_sad_thrsh[4] = u4_satdq_t / MAX(pu2_smat[12], pu2_smat[14]); + ps_qp_params->pu2_sad_thrsh[5] = u4_satdq_t / MAX(pu2_smat[4], pu2_smat[6]); + ps_qp_params->pu2_sad_thrsh[6] = u4_satdq_t / pu2_smat[13]; + ps_qp_params->pu2_sad_thrsh[7] = u4_satdq_t / pu2_smat[5]; + ps_qp_params->pu2_sad_thrsh[8] = u4_satdq_t / MAX(MAX3(pu2_smat[0], pu2_smat[2], pu2_smat[8]), pu2_smat[10]); + } + + /* inverse scale matrix */ + ps_qp_params->pu2_iscale_mat = pu2_iscale_mat + (u4_qp_mod6 * 16); + + plane += 1; + } + return ; +} + +/** +******************************************************************************* +* +* @brief +* Initialize AIR mb frame Map +* +* @par Description: +* Initialize AIR mb frame map +* MB frame map indicates which frame an Mb should be coded as intra according to AIR +* +* @param[in] ps_codec +* Pointer to codec context +* +* @returns error_status +* +* @remarks +* +* +******************************************************************************* +*/ +IH264E_ERROR_T ih264e_init_air_map(codec_t *ps_codec) +{ + /* intra refresh map */ + UWORD16 *pu2_intr_rfrsh_map = ps_codec->pu2_intr_rfrsh_map; + + /* air mode */ + IVE_AIR_MODE_T air_mode = ps_codec->s_cfg.e_air_mode; + + /* refresh period */ + UWORD32 air_period = ps_codec->s_cfg.u4_air_refresh_period; + + /* mb cnt */ + UWORD32 u4_mb_cnt = ps_codec->s_cfg.i4_wd_mbs * ps_codec->s_cfg.i4_ht_mbs; + + /* temp var */ + UWORD32 curr_mb, seed_rand = 1; + + switch (air_mode) + { + case IVE_AIR_MODE_CYCLIC: + + for (curr_mb = 0; curr_mb < u4_mb_cnt; curr_mb++) + { + pu2_intr_rfrsh_map[curr_mb] = curr_mb % air_period; + } + break; + + case IVE_AIR_MODE_RANDOM: + + for (curr_mb = 0; curr_mb < u4_mb_cnt; curr_mb++) + { + seed_rand = (seed_rand * 32719 + 3) % 32749; + pu2_intr_rfrsh_map[curr_mb] = seed_rand % air_period; + } + break; + + default: + + break; + } + + return IH264E_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Codec level initializations +* +* @par Description: +* Initializes the codec with parameters that needs to be set before encoding +* first frame +* +* @param[in] ps_codec +* Pointer to codec context +* +* @param[in] ps_inp_buf +* Pointer to input buffer context +* +* @returns error_status +* +* @remarks +* +* +******************************************************************************* +*/ +IH264E_ERROR_T ih264e_codec_init(codec_t *ps_codec) +{ + /******************************************************************** + * INITIALIZE CODEC CONTEXT * + ********************************************************************/ + /* encoder presets */ + if (ps_codec->s_cfg.u4_enc_speed_preset != IVE_CONFIG) + { + if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST) + {/* high quality */ + /* enable diamond search */ + ps_codec->s_cfg.u4_me_speed_preset = DMND_SRCH; + ps_codec->s_cfg.u4_enable_fast_sad = 0; + + /* disable intra 4x4 */ + ps_codec->s_cfg.u4_enable_intra_4x4 = 1; + ps_codec->luma_energy_compaction[1] = + ih264e_code_luma_intra_macroblock_4x4_rdopt_on; + + /* sub pel off */ + ps_codec->s_cfg.u4_enable_hpel = 1; + + /* deblocking off */ + ps_codec->s_cfg.u4_disable_deblock_level = DISABLE_DEBLK_LEVEL_0; + + /* disabled intra inter gating in Inter slices */ + ps_codec->u4_inter_gate = 0; + } + else if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_NORMAL) + {/* normal */ + /* enable diamond search */ + ps_codec->s_cfg.u4_me_speed_preset = DMND_SRCH; + ps_codec->s_cfg.u4_enable_fast_sad = 0; + + /* disable intra 4x4 */ + ps_codec->s_cfg.u4_enable_intra_4x4 = 1; + + /* sub pel off */ + ps_codec->s_cfg.u4_enable_hpel = 1; + + /* deblocking off */ + ps_codec->s_cfg.u4_disable_deblock_level = DISABLE_DEBLK_LEVEL_0; + + /* disabled intra inter gating in Inter slices */ + ps_codec->u4_inter_gate = 0; + } + else if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_FAST) + {/* normal */ + /* enable diamond search */ + ps_codec->s_cfg.u4_me_speed_preset = DMND_SRCH; + ps_codec->s_cfg.u4_enable_fast_sad = 0; + + /* disable intra 4x4 */ + ps_codec->s_cfg.u4_enable_intra_4x4 = 0; + + /* sub pel off */ + ps_codec->s_cfg.u4_enable_hpel = 1; + + /* deblocking off */ + ps_codec->s_cfg.u4_disable_deblock_level = DISABLE_DEBLK_LEVEL_0; + + /* disabled intra inter gating in Inter slices */ + ps_codec->u4_inter_gate = 1; + } + else if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_HIGH_SPEED) + {/* fast */ + /* enable diamond search */ + ps_codec->s_cfg.u4_me_speed_preset = DMND_SRCH; + ps_codec->s_cfg.u4_enable_fast_sad = 0; + + /* disable intra 4x4 */ + ps_codec->s_cfg.u4_enable_intra_4x4 = 0; + + /* sub pel off */ + ps_codec->s_cfg.u4_enable_hpel = 0; + + /* deblocking off */ + ps_codec->s_cfg.u4_disable_deblock_level = DISABLE_DEBLK_LEVEL_4; + + /* disabled intra inter gating in Inter slices */ + ps_codec->u4_inter_gate = 0; + } + else if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_FASTEST) + {/* fastest */ + /* enable diamond search */ + ps_codec->s_cfg.u4_me_speed_preset = DMND_SRCH; + + /* disable intra 4x4 */ + ps_codec->s_cfg.u4_enable_intra_4x4 = 0; + + /* sub pel off */ + ps_codec->s_cfg.u4_enable_hpel = 0; + + /* deblocking off */ + ps_codec->s_cfg.u4_disable_deblock_level = DISABLE_DEBLK_LEVEL_4; + + /* disabled intra inter gating in Inter slices */ + ps_codec->u4_inter_gate = 1; + } + } + + /***************************************************************** + * Initialize AIR inside codec + *****************************************************************/ + if (IVE_AIR_MODE_NONE != ps_codec->s_cfg.e_air_mode) + { + ih264e_init_air_map(ps_codec); + + ps_codec->i4_air_pic_cnt = -1; + } + + /****************************************************/ + /* INITIALIZE RATE CONTROL */ + /****************************************************/ + { + /* init qp */ + UWORD8 au1_init_qp[MAX_PIC_TYPE]; + + /* min max qp */ + UWORD8 au1_min_max_qp[2 * MAX_PIC_TYPE]; + + /* init i,p,b qp */ + au1_init_qp[0] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_i_qp]; + au1_init_qp[1] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_p_qp]; + au1_init_qp[2] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_b_qp]; + + /* init min max qp */ + au1_min_max_qp[2 * I_PIC] = + gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_i_qp_min]; + au1_min_max_qp[2 * I_PIC + 1] = + gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_i_qp_max]; + + au1_min_max_qp[2 * P_PIC] = + gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_p_qp_min]; + au1_min_max_qp[2 * P_PIC + 1] = + gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_p_qp_max]; + + au1_min_max_qp[2 * B_PIC] = + gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_b_qp_min]; + au1_min_max_qp[2 * B_PIC + 1] = + gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_b_qp_max]; + + /* get rc mode */ + switch (ps_codec->s_cfg.e_rc_mode) + { + case IVE_RC_STORAGE: + ps_codec->s_rate_control.e_rc_type = VBR_STORAGE; + break; + case IVE_RC_CBR_NON_LOW_DELAY: + ps_codec->s_rate_control.e_rc_type = CBR_NLDRC; + break; + case IVE_RC_CBR_LOW_DELAY: + ps_codec->s_rate_control.e_rc_type = CBR_LDRC; + break; + case IVE_RC_NONE: + ps_codec->s_rate_control.e_rc_type = CONST_QP; + break; + default: + break; + } + + /* init rate control */ + ih264e_rc_init(ps_codec->s_rate_control.pps_rate_control_api, + ps_codec->s_rate_control.pps_frame_time, + ps_codec->s_rate_control.pps_time_stamp, + ps_codec->s_rate_control.pps_pd_frm_rate, + ps_codec->s_cfg.u4_max_framerate, + ps_codec->s_cfg.u4_src_frame_rate, + ps_codec->s_cfg.u4_tgt_frame_rate, + ps_codec->s_rate_control.e_rc_type, + ps_codec->s_cfg.u4_target_bitrate, + ps_codec->s_cfg.u4_max_bitrate, + ps_codec->s_cfg.u4_vbv_buffer_delay, + ps_codec->s_cfg.u4_i_frm_interval, au1_init_qp, + H264_ALLOC_INTER_FRM_INTV, au1_min_max_qp, + ps_codec->s_cfg.u4_max_level); + } + + /* src stride */ + ps_codec->i4_src_strd = ps_codec->s_cfg.u4_strd; + + /* recon stride */ + ps_codec->i4_rec_strd = ALIGN16(ps_codec->s_cfg.u4_max_wd) + PAD_WD; + + /* max ref and reorder cnt */ + ps_codec->i4_ref_buf_cnt = ps_codec->s_cfg.u4_max_ref_cnt + + ps_codec->s_cfg.u4_max_reorder_cnt; + ps_codec->i4_ref_buf_cnt += MAX_CTXT_SETS; + + DEBUG_HISTOGRAM_INIT(); + + return IH264E_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Picture level initializations +* +* @par Description: +* Before beginning to encode the frame, the current function initializes all +* the ctxts (proc, entropy, me, ...) basing on the input configured params. +* It locates space for storing recon in the encoder picture buffer set, fetches +* reference frame from encoder picture buffer set. Calls RC pre-enc to get +* qp and pic type for the current frame. Queues proc jobs so that +* the other threads can begin encoding. In brief, this function sets up the +* tone for the entire encoder. +* +* @param[in] ps_codec +* Pointer to codec context +* +* @param[in] ps_inp_buf +* Pointer to input buffer context +* +* @returns error_status +* +* @remarks +* +* +******************************************************************************* +*/ +IH264E_ERROR_T ih264e_pic_init(codec_t *ps_codec, inp_buf_t *ps_inp_buf) +{ + /* error status */ + IH264E_ERROR_T error_status = IH264E_SUCCESS; + IH264_ERROR_T ret = IH264_SUCCESS; + + /* mv buff bank */ + mv_buf_t *ps_mv_buf = NULL; + WORD32 cur_mv_bank_buf_id; + + /* recon buffer set */ + pic_buf_t *ps_cur_pic; + WORD32 cur_pic_buf_id; + UWORD8 *pu1_cur_pic_luma, *pu1_cur_pic_chroma; + + /* ref buffer set */ + pic_buf_t *ps_ref_pic; + WORD32 ref_set_id; + + /* pic time stamp */ + UWORD32 u4_timestamp_high = ps_inp_buf->u4_timestamp_high; + UWORD32 u4_timestamp_low = ps_inp_buf->u4_timestamp_low; + + /* indices to access curr/prev frame info */ + WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt & 1; + + /* curr pic type */ + PIC_TYPE_T *pic_type = &ps_codec->pic_type; + + /* should src be skipped */ + WORD32 *skip_src = &ps_codec->s_rate_control.pre_encode_skip[ctxt_sel]; + + /* Diamond search Iteration Max Cnt */ + UWORD32 u4_num_layers = + (ps_codec->s_cfg.u4_enc_speed_preset == IVE_FASTEST) ? + (NUM_LAYERS >> 2) : NUM_LAYERS; + + /* enable fast sad */ + UWORD32 u4_enable_fast_sad = ps_codec->s_cfg.u4_enable_fast_sad; + + /********************************************************************/ + /* INITIALIZE CODEC CONTEXT */ + /********************************************************************/ + + /* pre enc rc call */ + *skip_src = ih264e_set_rc_pic_params(ps_codec, + ps_codec->i4_encode_api_call_cnt, + (WORD32 *) pic_type); + if (*skip_src == 1) + { + ps_codec->as_process[ctxt_sel * MAX_PROCESS_THREADS].s_inp_buf = + *ps_inp_buf; + + /* inform output bytes generated as zero */ + ps_codec->as_out_buf[ctxt_sel].s_bits_buf.u4_bytes = 0; + + return error_status; + } + + /********************************************************************/ + /* Alternate reference frame */ + /********************************************************************/ + if (ps_codec->s_cfg.u4_enable_alt_ref) + { + if (PIC_IDR == *pic_type || PIC_I == *pic_type) + { + ps_codec->u4_is_curr_frm_ref = 1; + } + else + { + ps_codec->u4_is_curr_frm_ref = 1; + if(ps_codec->i4_encode_api_call_cnt % (ps_codec->s_cfg.u4_enable_alt_ref + 1)) + ps_codec->u4_is_curr_frm_ref = 0; + } + + if ((ps_codec->u4_is_curr_frm_ref == 1) || (ps_codec->i4_frame_num < 0)) + { + ps_codec->i4_frame_num++; + } + } + else + { + ps_codec->u4_is_curr_frm_ref = 1; + + ps_codec->i4_frame_num++; + } + + /* slice_type */ + ps_codec->i4_slice_type = PSLICE; + + if ((PIC_I == *pic_type) || (PIC_IDR == *pic_type)) + { + ps_codec->i4_slice_type = ISLICE; + } + else if (PIC_P == *pic_type) + { + ps_codec->i4_slice_type = PSLICE; + } + + /* is this an IDR pic */ + ps_codec->u4_is_idr = 0; + + if (PIC_IDR == *pic_type) + { + /* set idr flag */ + ps_codec->u4_is_idr = 1; + + /* reset frame num */ + ps_codec->i4_frame_num = 0; + + /* idr_pic_id */ + ps_codec->i4_idr_pic_id++; + } + + /* set deblock disable flags based on disable deblock level */ + ps_codec->i4_disable_deblk_pic = 1; + + if (ps_codec->s_cfg.u4_disable_deblock_level == DISABLE_DEBLK_LEVEL_0) + { + /* enable deblocking */ + ps_codec->i4_disable_deblk_pic = 0; + } + else if (ps_codec->s_cfg.u4_disable_deblock_level == DISABLE_DEBLK_LEVEL_2) + { + /* enable deblocking after a period of frames */ + if (ps_codec->i4_disable_deblk_pic_cnt == DISABLE_DEBLOCK_INTERVAL + || ps_codec->i4_slice_type == ISLICE) + { + ps_codec->i4_disable_deblk_pic = 0; + } + } + else if (ps_codec->s_cfg.u4_disable_deblock_level == DISABLE_DEBLK_LEVEL_3) + { + if (ps_codec->i4_slice_type == ISLICE) + { + ps_codec->i4_disable_deblk_pic = 0; + } + } + + if (ps_codec->i4_disable_deblk_pic) + { + ps_codec->i4_disable_deblk_pic_cnt++; + } + else + { + ps_codec->i4_disable_deblk_pic_cnt = 0; + } + + /* In slice mode - lets not deblk mb edges that lie along slice boundaries */ + if (ps_codec->i4_disable_deblk_pic == 0) + { + if (ps_codec->s_cfg.e_slice_mode != IVE_SLICE_MODE_NONE) + { + ps_codec->i4_disable_deblk_pic = 2; + } + } + + /* error status */ + ps_codec->i4_error_code = IH264E_SUCCESS; + + /* populate header */ + if (ps_codec->i4_gen_header) + { + /* sps */ + sps_t *ps_sps = NULL; + + /* pps */ + pps_t *ps_pps = NULL; + + /*ps_codec->i4_pps_id ++;*/ + ps_codec->i4_pps_id %= MAX_PPS_CNT; + + /*ps_codec->i4_sps_id ++;*/ + ps_codec->i4_sps_id %= MAX_SPS_CNT; + + /* populate sps header */ + ps_sps = ps_codec->ps_sps_base + ps_codec->i4_sps_id; + ih264e_populate_sps(ps_codec, ps_sps); + + /* populate pps header */ + ps_pps = ps_codec->ps_pps_base + ps_codec->i4_pps_id; + ih264e_populate_pps(ps_codec, ps_pps); + } + + /* Reference and MV bank Buffer Manager */ + { + /* min pic cnt among the list of pics stored in ref list */ + WORD32 min_pic_cnt; + + /* max pic cnt among the list of pics stored in ref list */ + WORD32 max_pic_cnt; + + /* temp var */ + WORD32 i; + + ps_ref_pic = NULL; + + /* get reference picture when necessary */ + /* Only nearest picture encoded (max pic cnt) is used as reference */ + if ((*pic_type != PIC_IDR) && (*pic_type != PIC_I)) + { + max_pic_cnt = ps_codec->as_ref_set[0].i4_pic_cnt; + + ps_ref_pic = ps_codec->as_ref_set[0].ps_pic_buf; + + /* loop through to get the max pic cnt among the list of pics stored in ref list */ + for (i = 1; i < ps_codec->i4_ref_buf_cnt; i++) + { + if (max_pic_cnt < ps_codec->as_ref_set[i].i4_pic_cnt) + { + max_pic_cnt = ps_codec->as_ref_set[i].i4_pic_cnt; + ps_ref_pic = ps_codec->as_ref_set[i].ps_pic_buf; + } + } + } + + /* get a location at which the curr pic info can be stored for future reference */ + ref_set_id = -1; + + for (i = 0; i < ps_codec->i4_ref_buf_cnt; i++) + { + if (-1 == ps_codec->as_ref_set[i].i4_pic_cnt) + { + ref_set_id = i; + break; + } + } + + /* If all the entries in the ref_set array are filled, then remove the entry with least pic_cnt */ + if (ref_set_id == -1) + { + /* pic info */ + pic_buf_t *ps_cur_pic; + + /* mv info */ + mv_buf_t *ps_cur_mv_buf; + + ref_set_id = 0; + min_pic_cnt = ps_codec->as_ref_set[0].i4_pic_cnt; + + /* loop through to get the min pic cnt among the list of pics stored in ref list */ + for (i = 1; i < ps_codec->i4_ref_buf_cnt; i++) + { + if (min_pic_cnt > ps_codec->as_ref_set[i].i4_pic_cnt) + { + min_pic_cnt = ps_codec->as_ref_set[i].i4_pic_cnt; + ref_set_id = i; + } + } + + ps_cur_pic = ps_codec->as_ref_set[ref_set_id].ps_pic_buf; + + ps_cur_mv_buf = ps_codec->as_ref_set[ref_set_id].ps_mv_buf; + + /* release this frame from reference list */ + ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr, + ps_cur_mv_buf->i4_buf_id, BUF_MGR_REF); + + ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, + ps_cur_pic->i4_buf_id, BUF_MGR_REF); + } + + if (ps_codec->s_cfg.u4_enable_recon) + { + ret = ih264_buf_mgr_check_free((buf_mgr_t *)ps_codec->pv_ref_buf_mgr); + + if (ret != IH264_SUCCESS) + { + return IH264E_NO_FREE_RECONBUF; + } + } + } + + { + /*****************************************************************/ + /* Get free MV Bank to hold current picture's motion vector data */ + /* If there are no free buffers then return with an error code. */ + /* If the buffer is to be freed by another thread, change the */ + /* following to call thread yield and wait for buffer to be freed*/ + /*****************************************************************/ + ps_mv_buf = (mv_buf_t *) ih264_buf_mgr_get_next_free( + (buf_mgr_t *) ps_codec->pv_mv_buf_mgr, + &cur_mv_bank_buf_id); + + if (NULL == ps_mv_buf) + { + ps_codec->i4_error_code = IH264E_NO_FREE_MVBANK; + return IH264E_NO_FREE_MVBANK; + } + + /* mark the buffer as needed for reference if the curr pic is available for ref */ + if (ps_codec->u4_is_curr_frm_ref) + { + ih264_buf_mgr_set_status(ps_codec->pv_mv_buf_mgr, + cur_mv_bank_buf_id, BUF_MGR_REF); + } + + /* Set current ABS poc to ps_mv_buf, so that while freeing a reference buffer + * corresponding mv buffer can be found by looping through ps_codec->ps_mv_buf array + * and getting a buffer id to free + */ + ps_mv_buf->i4_abs_poc = ps_codec->i4_abs_pic_order_cnt; + + ps_mv_buf->i4_buf_id = cur_mv_bank_buf_id; + } + + { + /*****************************************************************/ + /* Get free pic buf to hold current picture's recon data */ + /* If there are no free buffers then return with an error code. */ + /* If the buffer is to be freed by another thread, change the */ + /* following to call thread yield and wait for buffer to be freed*/ + /*****************************************************************/ + ps_cur_pic = (pic_buf_t *) ih264_buf_mgr_get_next_free( + (buf_mgr_t *) ps_codec->pv_ref_buf_mgr, + &cur_pic_buf_id); + + if (NULL == ps_cur_pic) + { + ps_codec->i4_error_code = IH264E_NO_FREE_PICBUF; + return IH264E_NO_FREE_PICBUF; + } + + /* mark the buffer as needed for reference if the curr pic is available for ref */ + if (1 == ps_codec->u4_is_curr_frm_ref) + { + ih264_buf_mgr_set_status(ps_codec->pv_ref_buf_mgr, cur_pic_buf_id, + BUF_MGR_REF); + } + + /* Mark the current buffer as needed for IO if recon is enabled */ + if (1 == ps_codec->s_cfg.u4_enable_recon) + { + ih264_buf_mgr_set_status(ps_codec->pv_ref_buf_mgr, cur_pic_buf_id, + BUF_MGR_IO); + } + + /* Associate input timestamp with current buffer */ + ps_cur_pic->u4_timestamp_high = ps_inp_buf->u4_timestamp_high; + ps_cur_pic->u4_timestamp_low = ps_inp_buf->u4_timestamp_low; + + ps_cur_pic->i4_abs_poc = ps_codec->i4_abs_pic_order_cnt; + ps_cur_pic->i4_poc_lsb = ps_codec->i4_pic_order_cnt_lsb; + + ps_cur_pic->i4_buf_id = cur_pic_buf_id; + + pu1_cur_pic_luma = ps_cur_pic->pu1_luma; + pu1_cur_pic_chroma = ps_cur_pic->pu1_chroma; + } + + /* in case the current picture is used for reference then add it to the reference set */ + if (ps_codec->u4_is_curr_frm_ref + && ((*pic_type == PIC_IDR) || (*pic_type == PIC_I) + || (*pic_type == PIC_P))) + { + ps_codec->as_ref_set[ref_set_id].i4_pic_cnt = ps_codec->i4_pic_cnt; + + /* TODO: Currently pic_cnt and poc are same - Once frame drops are introduced change appropriately */ + ps_codec->as_ref_set[ref_set_id].i4_poc = ps_codec->i4_pic_cnt; + + ps_codec->as_ref_set[ref_set_id].ps_mv_buf = ps_mv_buf; + + ps_codec->as_ref_set[ref_set_id].ps_pic_buf = ps_cur_pic; + } + + /********************************************************************/ + /* INITIALIZE PROCESS CONTEXT */ + /********************************************************************/ + { + /* temp var */ + WORD32 i, j = 0; + + /* curr proc ctxt */ + process_ctxt_t *ps_proc = NULL; + + j = ctxt_sel * MAX_PROCESS_THREADS; + + /* begin init */ + for (i = j; i < (j + MAX_PROCESS_THREADS); i++) + { + ps_proc = &ps_codec->as_process[i]; + + /* luma src buffer */ + if (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE) + { + ps_proc->pu1_src_buf_luma_base = ps_codec->pu1_y_csc_buf_base; + } + else + { + ps_proc->pu1_src_buf_luma_base = + ps_inp_buf->s_raw_buf.apv_bufs[0]; + } + + /* chroma src buffer */ + if (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE + || ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420P) + { + ps_proc->pu1_src_buf_chroma_base = + ps_codec->pu1_uv_csc_buf_base; + } + else + { + ps_proc->pu1_src_buf_chroma_base = + ps_inp_buf->s_raw_buf.apv_bufs[1]; + } + + /* luma rec buffer */ + ps_proc->pu1_rec_buf_luma_base = pu1_cur_pic_luma; + + /* chroma rec buffer */ + ps_proc->pu1_rec_buf_chroma_base = pu1_cur_pic_chroma; + + /* src stride */ + ps_proc->i4_src_strd = ps_codec->i4_src_strd; + + /* rec stride */ + ps_proc->i4_rec_strd = ps_codec->i4_rec_strd; + + /* frame num */ + ps_proc->i4_frame_num = ps_codec->i4_frame_num; + + /* is idr */ + ps_proc->u4_is_idr = ps_codec->u4_is_idr; + + /* idr pic id */ + ps_proc->u4_idr_pic_id = ps_codec->i4_idr_pic_id; + + /* slice_type */ + ps_proc->i4_slice_type = ps_codec->i4_slice_type; + + /* Input width in mbs */ + ps_proc->i4_wd_mbs = ps_codec->s_cfg.i4_wd_mbs; + + /* Input height in mbs */ + ps_proc->i4_ht_mbs = ps_codec->s_cfg.i4_ht_mbs; + + /* Half x plane offset from pic buf */ + ps_proc->u4_half_x_offset = 0; + + /* Half y plane offset from half x plane */ + ps_proc->u4_half_y_offset = 0; + + /* Half x plane offset from half y plane */ + ps_proc->u4_half_xy_offset = 0; + + /* top row syntax elements */ + ps_proc->ps_top_row_mb_syntax_ele = + ps_proc->ps_top_row_mb_syntax_ele_base; + + ps_proc->pu1_top_mb_intra_modes = + ps_proc->pu1_top_mb_intra_modes_base; + + ps_proc->ps_top_row_pu = ps_proc->ps_top_row_pu_base; + + /* initialize quant params */ + ps_proc->u4_frame_qp = ps_codec->u4_frame_qp; + ps_proc->u4_mb_qp = ps_codec->u4_frame_qp; + ih264e_init_quant_params(ps_proc, ps_proc->u4_frame_qp); + + /* previous mb qp*/ + ps_proc->u4_mb_qp_prev = ps_proc->u4_frame_qp; + + /* Reset frame info */ + memset(&ps_proc->s_frame_info, 0, sizeof(frame_info_t)); + + /* initialize proc, deblk and ME map */ + if (i == j) + { + /* row '-1' */ + memset(ps_proc->pu1_proc_map - ps_proc->i4_wd_mbs, 1, ps_proc->i4_wd_mbs); + /* row 0 to ht in mbs */ + memset(ps_proc->pu1_proc_map, 0, ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs); + + /* row '-1' */ + memset(ps_proc->pu1_deblk_map - ps_proc->i4_wd_mbs, 1, ps_proc->i4_wd_mbs); + /* row 0 to ht in mbs */ + memset(ps_proc->pu1_deblk_map, 0, ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs); + + /* row '-1' */ + memset(ps_proc->pu1_me_map - ps_proc->i4_wd_mbs, 1, ps_proc->i4_wd_mbs); + /* row 0 to ht in mbs */ + memset(ps_proc->pu1_me_map, 0, ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs); + + /* at the start of air refresh period, reset intra coded map */ + if (IVE_AIR_MODE_NONE != ps_codec->s_cfg.e_air_mode) + { + ps_codec->i4_air_pic_cnt = (ps_codec->i4_air_pic_cnt + 1) + % ps_codec->s_cfg.u4_air_refresh_period; + + if (!ps_codec->i4_air_pic_cnt) + { + memset(ps_proc->pu1_is_intra_coded, 0, ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs); + } + } + } + + /* deblock level */ + ps_proc->u4_disable_deblock_level = ps_codec->i4_disable_deblk_pic; + + /* slice index map */ + /* no slice */ + if (ps_codec->s_cfg.e_slice_mode == IVE_SLICE_MODE_NONE) + { + memset(ps_proc->pu1_slice_idx, 0, ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs); + } + /* generate slices for every 'n' rows, 'n' is given through slice param */ + else if (ps_codec->s_cfg.e_slice_mode == IVE_SLICE_MODE_BLOCKS) + { + /* slice idx map */ + UWORD8 *pu1_slice_idx = ps_proc->pu1_slice_idx; + + /* temp var */ + WORD32 i4_mb_y = 0, slice_idx = 0, cnt; + + while (i4_mb_y < ps_proc->i4_ht_mbs) + { + if (i4_mb_y +(WORD32)ps_codec->s_cfg.u4_slice_param < ps_proc->i4_ht_mbs) + { + cnt = ps_codec->s_cfg.u4_slice_param * ps_proc->i4_wd_mbs; + i4_mb_y += ps_codec->s_cfg.u4_slice_param; + } + else + { + cnt = (ps_proc->i4_ht_mbs - i4_mb_y) * ps_proc->i4_wd_mbs; + i4_mb_y += (ps_proc->i4_ht_mbs - i4_mb_y); + } + memset(pu1_slice_idx, slice_idx, cnt); + slice_idx++; + pu1_slice_idx += cnt; + } + } + + /* Current MV Bank's buffer ID */ + ps_proc->i4_cur_mv_bank_buf_id = cur_mv_bank_buf_id; + + /* Pointer to current picture buffer structure */ + ps_proc->ps_cur_pic = ps_cur_pic; + + /* Pointer to current pictures mv buffers */ + ps_proc->ps_cur_mv_buf = ps_mv_buf; + + /* pointer to ref picture */ + ps_proc->ps_ref_pic = ps_ref_pic; + + if ((*pic_type != PIC_IDR) && (*pic_type != PIC_I)) + { + /* ref pointer luma */ + ps_proc->pu1_ref_buf_luma_base = ps_ref_pic->pu1_luma; + + /* ref pointer chroma */ + ps_proc->pu1_ref_buf_chroma_base = ps_ref_pic->pu1_chroma; + } + + /* Structure for current input buffer */ + ps_proc->s_inp_buf = *ps_inp_buf; + + /* Number of encode frame API calls made */ + ps_proc->i4_encode_api_call_cnt = ps_codec->i4_encode_api_call_cnt; + + /* Current Picture count */ + ps_proc->i4_pic_cnt = ps_codec->i4_pic_cnt; + + /* error status */ + ps_proc->i4_error_code = 0; + + /********************************************************************/ + /* INITIALIZE ENTROPY CONTEXT */ + /********************************************************************/ + { + entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy; + + /* start of frame */ + ps_entropy->i4_sof = 0; + + /* end of frame */ + ps_entropy->i4_eof = 0; + + /* generate header */ + ps_entropy->i4_gen_header = ps_codec->i4_gen_header; + + /* sps ref_set_id */ + ps_entropy->u4_sps_id = ps_codec->i4_sps_id; + + /* sps base */ + ps_entropy->ps_sps_base = ps_codec->ps_sps_base; + + /* sps id */ + ps_entropy->u4_pps_id = ps_codec->i4_pps_id; + + /* sps base */ + ps_entropy->ps_pps_base = ps_codec->ps_pps_base; + + /* slice map */ + ps_entropy->pu1_slice_idx = ps_proc->pu1_slice_idx; + + /* slice hdr base */ + ps_entropy->ps_slice_hdr_base = ps_proc->ps_slice_hdr_base; + + /* initialize entropy map */ + if (i == j) + { + /* row '-1' */ + memset(ps_entropy->pu1_entropy_map - ps_proc->i4_wd_mbs, 1, ps_proc->i4_wd_mbs); + /* row 0 to ht in mbs */ + memset(ps_entropy->pu1_entropy_map, 0, ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs); + } + + /* wd in mbs */ + ps_entropy->i4_wd_mbs = ps_proc->i4_wd_mbs; + + /* ht in mbs */ + ps_entropy->i4_ht_mbs = ps_proc->i4_ht_mbs; + + /* transform_8x8_mode_flag */ + ps_entropy->i1_transform_8x8_mode_flag = 0; + + /* entropy_coding_mode_flag */ + ps_entropy->u1_entropy_coding_mode_flag = + ps_codec->s_cfg.u4_entropy_coding_mode; + + /* error code */ + ps_entropy->i4_error_code = IH264E_SUCCESS; + + /* mb skip run */ + *(ps_proc->s_entropy.pi4_mb_skip_run) = 0; + + /* last frame to encode */ + ps_proc->s_entropy.u4_is_last = ps_inp_buf->u4_is_last; + + /* Current Picture count */ + ps_proc->s_entropy.i4_pic_cnt = ps_codec->i4_pic_cnt; + + /* time stamps */ + ps_entropy->u4_timestamp_low = u4_timestamp_low; + ps_entropy->u4_timestamp_high = u4_timestamp_high; + + /* init frame statistics */ + ps_entropy->u4_header_bits[MB_TYPE_INTRA] = 0; + ps_entropy->u4_header_bits[MB_TYPE_INTER] = 0; + ps_entropy->u4_residue_bits[MB_TYPE_INTRA] = 0; + ps_entropy->u4_residue_bits[MB_TYPE_INTER] = 0; + } + + /********************************************************************/ + /* INITIALIZE DEBLOCK CONTEXT */ + /********************************************************************/ + { + /* deblk ctxt */ + deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt; + + /* slice idx map */ + ps_deblk->pu1_slice_idx = ps_proc->pu1_slice_idx; + } + + /********************************************************************/ + /* INITIALIZE ME CONTEXT */ + /********************************************************************/ + { + /* me ctxt */ + me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt; + + /* srch range x */ + ps_me_ctxt->ai2_srch_boundaries[0] = + ps_codec->s_cfg.u4_srch_rng_x; + + /* srch range y */ + ps_me_ctxt->ai2_srch_boundaries[1] = + ps_codec->s_cfg.u4_srch_rng_y; + + /* src stride */ + ps_me_ctxt->i4_src_strd = ps_codec->i4_src_strd; + + /* rec stride */ + ps_me_ctxt->i4_rec_strd = ps_codec->i4_rec_strd; + + /* Half x plane offset from pic buf */ + ps_me_ctxt->u4_half_x_offset = ps_proc->u4_half_x_offset; + + /* Half y plane offset from half x plane */ + ps_me_ctxt->u4_half_y_offset = ps_proc->u4_half_y_offset; + + /* Half x plane offset from half y plane */ + ps_me_ctxt->u4_half_xy_offset = ps_proc->u4_half_xy_offset; + + /* enable fast sad */ + ps_me_ctxt->u4_enable_fast_sad = u4_enable_fast_sad; + + /* half pel */ + ps_me_ctxt->u4_enable_hpel = ps_codec->s_cfg.u4_enable_hpel; + + /* Diamond search Iteration Max Cnt */ + ps_me_ctxt->u4_num_layers = u4_num_layers; + + /* me speed preset */ + ps_me_ctxt->u4_me_speed_preset = + ps_codec->s_cfg.u4_me_speed_preset; + + /* qp */ + ps_me_ctxt->u1_mb_qp = ps_codec->u4_frame_qp; + + if ((i == 0) && (0 == ps_codec->i4_pic_cnt)) + { + /* init mv bits tables */ + ih264e_init_mv_bits(ps_me_ctxt); + } + } + + ps_proc->ps_ngbr_avbl = &(ps_proc->s_ngbr_avbl); + + } + + /* reset encoder header */ + ps_codec->i4_gen_header = 0; + } + + /********************************************************************/ + /* ADD JOBS TO THE QUEUE */ + /********************************************************************/ + { + /* job structures */ + job_t s_job; + + /* temp var */ + WORD32 i; + + /* job class */ + s_job.i4_cmd = CMD_PROCESS; + + /* number of mbs to be processed in the current job */ + s_job.i2_mb_cnt = ps_codec->s_cfg.i4_wd_mbs; + + /* job start index x */ + s_job.i2_mb_x = 0; + + /* proc base idx */ + s_job.i2_proc_base_idx = ctxt_sel ? (MAX_PROCESS_CTXT / 2) : 0; + + for (i = 0; i < (WORD32)ps_codec->s_cfg.i4_ht_mbs; i++) + { + /* job start index y */ + s_job.i2_mb_y = i; + + /* queue the job */ + ret = ih264_list_queue(ps_codec->pv_proc_jobq, &s_job, 1); + if (ret != IH264_SUCCESS) + { + ps_codec->i4_error_code = ret; + return IH264E_FAIL; + } + } + + /* Once all the jobs are queued, terminate the queue */ + /* Since the threads are created and deleted in each call, terminating + here is not an issue */ + ih264_list_terminate(ps_codec->pv_proc_jobq); + } + + return error_status; +} diff --git a/encoder/ih264e_utils.h b/encoder/ih264e_utils.h new file mode 100755 index 0000000..651dad9 --- /dev/null +++ b/encoder/ih264e_utils.h @@ -0,0 +1,327 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +******************************************************************************* +* @file +* ih264e_utils.h +* +* @brief +* Contains declarations of miscellaneous utility functions used by the encoder +* +* @author +* Harish +* +* @par List of Functions: +* -ih264e_get_min_level() +* -ih264e_get_lvl_idx() +* -ih264e_get_dpb_size() +* -ih264e_get_total_pic_buf_size() +* -ih264e_get_pic_mv_bank_size() +* -ih264e_pic_buf_mgr_add_bufs() +* -ih264e_mv_buf_mgr_add_bufs() +* -ih264e_init_quant_params() +* -ih264e_init_air_map() +* -ih264e_codec_init() +* -ih264e_pic_init() +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef IH264E_UTILS_H_ +#define IH264E_UTILS_H_ + +/** +******************************************************************************* +* +* @brief +* Used to get minimum level index for a given picture size +* +* @par Description: +* Gets the minimum level index and then gets corresponding level. +* Also used to ignore invalid levels like 2.3, 3.3 etc +* +* @param[in] level +* Level of the stream +* +* @returns Level index for a given level +* +* @remarks +* +******************************************************************************* +*/ +WORD32 ih264e_get_min_level(WORD32 pic_size); + +/** +******************************************************************************* +* +* @brief +* Used to get level index for a given level +* +* @par Description: +* Converts from level_idc (which is multiplied by 30) to an index that can be +* used as a lookup. Also used to ignore invalid levels like 2.2 , 3.2 etc +* +* @param[in] level +* Level of the stream +* +* @returns Level index for a given level +* +* @remarks +* +******************************************************************************* +*/ +WORD32 ih264e_get_lvl_idx(WORD32 level); + +/** +******************************************************************************* +* +* @brief returns maximum number of pictures allowed in dpb for a given level +* +* @par Description: +* For given width, height and level, number of pictures allowed in decoder +* picture buffer is computed as per Annex A.3.1 +* +* @param[in] level +* level of the bit-stream +* +* @param[in] pic_size +* width * height +* +* @returns Number of buffers in DPB +* +* @remarks +* From annexure A.3.1 of H264 specification, +* max_dec_frame_buffering <= MaxDpbSize, where MaxDpbSize is equal to +* Min( 1024 * MaxDPB / ( PicWidthInMbs * FrameHeightInMbs * 384 ), 16 ) and +* MaxDPB is given in Table A-1 in units of 1024 bytes. However the MaxDPB size +* presented in the look up table gas_ih264_lvl_tbl is in units of 512 +* bytes. Hence the expression is modified accordingly. +* +******************************************************************************* +*/ +WORD32 ih264e_get_dpb_size(WORD32 level, WORD32 pic_size); + +/** +******************************************************************************* +* +* @brief +* Used to get reference picture buffer size for a given level and +* and padding used +* +* @par Description: +* Used to get reference picture buffer size for a given level and padding used +* Each picture is padded on all four sides +* +* @param[in] pic_size +* Number of luma samples (Width * Height) +* +* @param[in] level +* Level +* +* @param[in] horz_pad +* Total padding used in horizontal direction +* +* @param[in] vert_pad +* Total padding used in vertical direction +* +* @returns Total picture buffer size +* +* @remarks +* +* +******************************************************************************* +*/ +WORD32 ih264e_get_total_pic_buf_size(WORD32 pic_size, WORD32 level, + WORD32 horz_pad, WORD32 vert_pad, + WORD32 num_ref_frames, + WORD32 num_reorder_frames); + +/** +******************************************************************************* +* +* @brief Returns MV bank buffer size for a given number of luma samples +* +* @par Description: +* For given number of luma samples one MV bank size is computed. +* Each MV bank includes pu_map and enc_pu_t for all the min PUs(4x4) in a picture +* +* @param[in] num_luma_samples +* Max number of luma pixels in the frame +* +* @returns Total MV Bank size +* +* @remarks +* +* +******************************************************************************* +*/ +WORD32 ih264e_get_pic_mv_bank_size(WORD32 num_luma_samples); + +/** +******************************************************************************* +* +* @brief +* Function to initialize ps_pic_buf structs add pic buffers to +* buffer manager in case of non-shared mode +* +* @par Description: +* Function to initialize ps_pic_buf structs add pic buffers to +* buffer manager in case of non-shared mode +* To be called once per stream or for every reset +* +* @param[in] ps_codec +* Pointer to codec context +* +* @returns error status +* +* @remarks +* +* +******************************************************************************* +*/ +IH264E_ERROR_T ih264e_pic_buf_mgr_add_bufs(codec_t *ps_codec); + +/** +******************************************************************************* +* +* @brief Function to add buffers to MV Bank buffer manager +* +* @par Description: +* Function to add buffers to MV Bank buffer manager. To be called once per +* stream or for every reset +* +* @param[in] ps_codec +* Pointer to codec context +* +* @returns error status +* +* @remarks +* +******************************************************************************* +*/ +IH264E_ERROR_T ih264e_mv_buf_mgr_add_bufs(codec_t *ps_codec); + +/** +******************************************************************************* +* +* @brief Function to initialize quant params structure +* +* @par Description: +* The forward quantization modules depends on qp/6, qp mod 6, forward scale +* matrix, forward threshold matrix, weight list. The inverse quantization +* modules depends on qp/6, qp mod 6, inverse scale matrix, weight list. +* These params are initialized in this function. +* +* @param[in] ps_proc +* pointer to process context +* +* @param[in] qp +* quantization parameter +* +* @returns none +* +* @remarks +* +******************************************************************************* +*/ +void ih264e_init_quant_params(process_ctxt_t *ps_proc, int qp); + +/** +******************************************************************************* +* +* @brief +* Initialize AIR mb frame Map +* +* @par Description: +* Initialize AIR mb frame map +* MB frame map indicates which frame an Mb should be coded as intra according to AIR +* +* @param[in] ps_codec +* Pointer to codec context +* +* @returns error_status +* +* @remarks +* +* +******************************************************************************* +*/ +IH264E_ERROR_T ih264e_init_air_map(codec_t *ps_codec); + +/** +******************************************************************************* +* +* @brief +* Codec level initializations +* +* @par Description: +* Initializes the codec with parameters that needs to be set before encoding +* first frame +* +* @param[in] ps_codec +* Pointer to codec context +* +* @param[in] ps_inp_buf +* Pointer to input buffer context +* +* @returns error_status +* +* @remarks +* +* +******************************************************************************* +*/ +IH264E_ERROR_T ih264e_codec_init(codec_t *ps_codec); + +/** +******************************************************************************* +* +* @brief +* Picture level initializations +* +* @par Description: +* Before beginning to encode the frame, the current function initializes all +* the ctxts (proc, entropy, me, ...) basing on the input configured params. +* It locates space for storing recon in the encoder picture buffer set, fetches +* reference frame from encoder picture buffer set. Calls RC pre-enc to get +* qp and pic type for the current frame. Queues proc jobs so that +* the other threads can begin encoding. In brief, this function sets up the +* tone for the entire encoder. +* +* @param[in] ps_codec +* Pointer to codec context +* +* @param[in] ps_inp_buf +* Pointer to input buffer context +* +* @returns error_status +* +* @remarks +* +* +******************************************************************************* +*/ +IH264E_ERROR_T ih264e_pic_init(codec_t *ps_codec, inp_buf_t *ps_inp_buf); + +#endif /* IH264E_UTILS_H_ */ diff --git a/encoder/ih264e_version.c b/encoder/ih264e_version.c new file mode 100755 index 0000000..3dcba8d --- /dev/null +++ b/encoder/ih264e_version.c @@ -0,0 +1,143 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +******************************************************************************* +* @file +* ih264e_version.c +* +* @brief +* Contains version info for H264 encoder +* +* @author +* ittiam +* +* @par List of Functions: +* - ih264e_get_version() +* +* @remarks +* None +* +******************************************************************************* +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ +/* system include files */ +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +/* user include files */ +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" +#include "ih264e.h" +#include "ih264_defs.h" +#include "ih264_debug.h" +#include "ih264_structs.h" +#include "ih264e_version.h" + + +/*****************************************************************************/ +/* Constant Macros */ +/*****************************************************************************/ + +/** + * Name of the codec and target platform (All Cortex A processors in this case) + */ +#define CODEC_NAME "H264ENC" +/** + * Codec release type, production or evaluation + */ +#define CODEC_RELEASE_TYPE "production" +/** + * Version string. First two digits signify major version and last two minor + */ +#define CODEC_RELEASE_VER "01.00" +/** + * Vendor name + */ +#define CODEC_VENDOR "ITTIAM" + +#define MAX_STRLEN 511 +/** +******************************************************************************* +* Concatenates various strings to form a version string +******************************************************************************* +*/ +#define VERSION(version_string, codec_name, codec_release_type, codec_release_ver, codec_vendor) \ + strncpy(version_string,"@(#)Id:", MAX_STRLEN); \ + strncat(version_string,codec_name, MAX_STRLEN); \ + strncat(version_string,"_", MAX_STRLEN); \ + strncat(version_string,codec_release_type, MAX_STRLEN); \ + strncat(version_string," Ver:", MAX_STRLEN); \ + strncat(version_string,codec_release_ver, MAX_STRLEN); \ + strncat(version_string," Released by ", MAX_STRLEN); \ + strncat(version_string,codec_vendor, MAX_STRLEN); \ + strncat(version_string," Build: ", MAX_STRLEN); \ + strncat(version_string,__DATE__, MAX_STRLEN); \ + strncat(version_string," @ ", MAX_STRLEN); \ + strncat(version_string,__TIME__, MAX_STRLEN); + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + +/** +******************************************************************************* +* +* @brief +* Fills the version info in the given char pointer +* +* @par Description: +* Fills the version info in the given char pointer +* +* @param[in] pc_version +* Pointer to hold version info +* +* @param[in] u4_version_bufsize +* Size of the buffer passed +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +IV_STATUS_T ih264e_get_version(CHAR *pc_version, UWORD32 u4_version_bufsize) +{ + CHAR ac_version_tmp[MAX_STRLEN]; + + VERSION(ac_version_tmp, CODEC_NAME, CODEC_RELEASE_TYPE, CODEC_RELEASE_VER, + CODEC_VENDOR); + + if (u4_version_bufsize >= (strnlen(ac_version_tmp, MAX_STRLEN) + 1)) + { + memcpy(pc_version, ac_version_tmp, (strnlen(ac_version_tmp, MAX_STRLEN) + 1)); + return IV_SUCCESS; + } + else + { + return IV_FAIL; + } +} diff --git a/encoder/ih264e_version.h b/encoder/ih264e_version.h new file mode 100755 index 0000000..303a1e2 --- /dev/null +++ b/encoder/ih264e_version.h @@ -0,0 +1,64 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +******************************************************************************* +* @file +* ih264e_version.h +* +* @brief +* Contains declarations of miscellaneous utility functions used by the encoder +* +* @author +* ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef IH264E_VERSION_H_ +#define IH264E_VERSION_H_ + +/** +******************************************************************************* +* +* @brief +* Fills the version info in the given char pointer +* +* @par Description: +* Fills the version info in the given char pointer +* +* @param[in] pc_version +* Pointer to hold version info +* +* @param[in] u4_version_bufsize +* Size of the buffer passed +* +* @returns error status +* +* @remarks none +* +******************************************************************************* +*/ +IV_STATUS_T ih264e_get_version(CHAR *pc_version, UWORD32 u4_version_bufsize); + +#endif /* IH264E_VERSION_H_ */ diff --git a/encoder/ime.c b/encoder/ime.c new file mode 100755 index 0000000..c89aaab --- /dev/null +++ b/encoder/ime.c @@ -0,0 +1,836 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** + ******************************************************************************* + * @file + * ih264e_me.c + * + * @brief + * + * + * @author + * Ittiam + * + * @par List of Functions: + * - + * + * @remarks + * None + * + ******************************************************************************* + */ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include <stdio.h> +#include <assert.h> +#include <limits.h> +#include <string.h> + +/* User include files */ +#include "ime_typedefs.h" +#include "ime_distortion_metrics.h" +#include "ime_structs.h" +#include "ime_defs.h" +#include "ime_macros.h" +#include "ime.h" +#include "ime_statistics.h" + +/** +******************************************************************************* +* +* @brief Diamond Search +* +* @par Description: +* This function computes the sad at vertices of several layers of diamond grid +* at a time. The number of layers of diamond grid that would be evaluated is +* configurable.The function computes the sad at vertices of a diamond grid. If +* the sad at the center of the diamond grid is lesser than the sad at any other +* point of the diamond grid, the function marks the candidate Mb partition as +* mv. +* +* @param[in] ps_mb_part +* pointer to current mb partition ctxt with respect to ME +* +* @param[in] ps_me_ctxt +* pointer to me context +* +* @param[in] u4_lambda_motion +* lambda motion +* +* @param[in] u4_enable_fast_sad +* enable/disable fast sad computation +* +* @returns mv pair & corresponding distortion and cost +* +* @remarks Diamond Srch, radius is 1 +* +******************************************************************************* +*/ +void ime_diamond_search_16x16(me_ctxt_t *ps_me_ctxt) +{ + /* MB partition info */ + mb_part_ctxt *ps_mb_part = &ps_me_ctxt->s_mb_part; + + /* lagrange parameter */ + UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion; + + /* srch range*/ + WORD32 i4_srch_range_n = ps_me_ctxt->i4_srch_range_n; + WORD32 i4_srch_range_s = ps_me_ctxt->i4_srch_range_s; + WORD32 i4_srch_range_e = ps_me_ctxt->i4_srch_range_e; + WORD32 i4_srch_range_w = ps_me_ctxt->i4_srch_range_w; + + /* enabled fast sad computation */ +// UWORD32 u4_enable_fast_sad = ps_me_ctxt->u4_enable_fast_sad; + + /* pointer to src macro block */ + UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma; + UWORD8 *pu1_ref_mb = ps_me_ctxt->pu1_ref_buf_luma; + + /* strides */ + WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd; + WORD32 i4_ref_strd = ps_me_ctxt->i4_rec_strd; + + /* least cost */ + WORD32 i4_cost_least = ps_mb_part->i4_mb_cost; + + /* least sad */ + WORD32 i4_distortion_least = ps_mb_part->i4_mb_distortion; + + /* mv pair */ + WORD16 i2_mvx, i2_mvy; + + /* mv bits */ + UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits; + + /* temp var */ + WORD32 i4_cost[4]; + WORD32 i4_sad[4]; + UWORD8 *pu1_ref; + WORD16 i2_mv_u_x, i2_mv_u_y; + + /* Diamond search Iteration Max Cnt */ + UWORD32 u4_num_layers = ps_me_ctxt->u4_num_layers; + + /* temp var */ +// UWORD8 u1_prev_jump = NONE; +// UWORD8 u1_curr_jump = NONE; +// UWORD8 u1_next_jump; +// WORD32 mask_arr[5] = {15, 13, 14, 7, 11}; +// WORD32 mask; +// UWORD8 *apu1_ref[4]; +// WORD32 i, cnt; +// WORD32 dia[4][2] = {{-1, 0}, {1, 0}, {0, -1}, {0, 1}}; + + /* mv with best sad during initial evaluation */ + i2_mvx = ps_mb_part->s_mv_curr.i2_mvx; + i2_mvy = ps_mb_part->s_mv_curr.i2_mvy; + + i2_mv_u_x = i2_mvx; + i2_mv_u_y = i2_mvy; + + while (u4_num_layers--) + { + /* FIXME : is this the write way to check for out of bounds ? */ + if ( (i2_mvx - 1 < i4_srch_range_w) || + (i2_mvx + 1 > i4_srch_range_e) || + (i2_mvy - 1 < i4_srch_range_n) || + (i2_mvy + 1 > i4_srch_range_s) ) + { + break; + } + + pu1_ref = pu1_ref_mb + i2_mvx + (i2_mvy * i4_ref_strd); + + ps_me_ctxt->pf_ime_compute_sad4_diamond(pu1_ref, + pu1_curr_mb, + i4_ref_strd, + i4_src_strd, + i4_sad); + + DEBUG_SAD_HISTOGRAM_ADD(i4_sad[0], 2); + DEBUG_SAD_HISTOGRAM_ADD(i4_sad[1], 2); + DEBUG_SAD_HISTOGRAM_ADD(i4_sad[2], 2); + DEBUG_SAD_HISTOGRAM_ADD(i4_sad[3], 2); + + /* compute cost */ + i4_cost[0] = i4_sad[0] + u4_lambda_motion * ( pu1_mv_bits[ ((i2_mvx - 1) << 2) - ps_mb_part->s_mv_pred.i2_mvx] + + pu1_mv_bits[(i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy] ); + i4_cost[1] = i4_sad[1] + u4_lambda_motion * ( pu1_mv_bits[ ((i2_mvx + 1) << 2) - ps_mb_part->s_mv_pred.i2_mvx] + + pu1_mv_bits[(i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy] ); + i4_cost[2] = i4_sad[2] + u4_lambda_motion * ( pu1_mv_bits[ (i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx] + + pu1_mv_bits[((i2_mvy - 1) << 2) - ps_mb_part->s_mv_pred.i2_mvy] ); + i4_cost[3] = i4_sad[3] + u4_lambda_motion * ( pu1_mv_bits[ (i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx] + + pu1_mv_bits[((i2_mvy + 1) << 2) - ps_mb_part->s_mv_pred.i2_mvy] ); + + + if (i4_cost_least > i4_cost[0]) + { + i4_cost_least = i4_cost[0]; + i4_distortion_least = i4_sad[0]; + + i2_mv_u_x = (i2_mvx - 1); + i2_mv_u_y = i2_mvy; + } + + if (i4_cost_least > i4_cost[1]) + { + i4_cost_least = i4_cost[1]; + i4_distortion_least = i4_sad[1]; + + i2_mv_u_x = (i2_mvx + 1); + i2_mv_u_y = i2_mvy; + } + + if (i4_cost_least > i4_cost[2]) + { + i4_cost_least = i4_cost[2]; + i4_distortion_least = i4_sad[2]; + + i2_mv_u_x = i2_mvx; + i2_mv_u_y = i2_mvy - 1; + } + + if (i4_cost_least > i4_cost[3]) + { + i4_cost_least = i4_cost[3]; + i4_distortion_least = i4_sad[3]; + + i2_mv_u_x = i2_mvx; + i2_mv_u_y = i2_mvy + 1; + } + + if( (i2_mv_u_x == i2_mvx) && (i2_mv_u_y == i2_mvy)) + { + ps_mb_part->u4_exit = 1; + break; + } + else + { + i2_mvx = i2_mv_u_x; + i2_mvy = i2_mv_u_y; + } + + + } + + if (i4_cost_least < ps_mb_part->i4_mb_cost) + { + ps_mb_part->i4_mb_cost = i4_cost_least; + ps_mb_part->i4_mb_distortion = i4_distortion_least; + ps_mb_part->s_mv_curr.i2_mvx = i2_mvx; + ps_mb_part->s_mv_curr.i2_mvy = i2_mvy; + } + +} + + +/** +******************************************************************************* +* +* @brief This function computes the best motion vector among the tentative mv +* candidates chosen. +* +* @par Description: +* This function determines the position in the search window at which the motion +* estimation should begin in order to minimise the number of search iterations. +* +* @param[in] ps_mb_part +* pointer to current mb partition ctxt with respect to ME +* +* @param[in] u4_lambda_motion +* lambda motion +* +* @param[in] u4_fast_flag +* enable/disable fast sad computation +* +* @returns mv pair & corresponding distortion and cost +* +* @remarks none +* +******************************************************************************* +*/ +void ime_evaluate_init_srchposn_16x16 + ( + me_ctxt_t *ps_me_ctxt + ) +{ + UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion; + + /* candidate mv cnt */ + UWORD32 u4_num_candidates = ps_me_ctxt->u4_num_candidates; + + /* list of candidate mvs */ + ime_mv_t *ps_mv_list = ps_me_ctxt->as_mv_init_search; + + /* pointer to src macro block */ + UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma; + UWORD8 *pu1_ref_mb = ps_me_ctxt->pu1_ref_buf_luma; + + /* strides */ + WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd; + WORD32 i4_ref_strd = ps_me_ctxt->i4_rec_strd; + + /* enabled fast sad computation */ + UWORD32 u4_enable_fast_sad = ps_me_ctxt->u4_enable_fast_sad; + + /* SAD(distortion metric) of an 8x8 block */ + WORD32 i4_mb_distortion; + + /* cost = distortion + u4_lambda_motion * rate */ + WORD32 i4_mb_cost, i4_mb_cost_least = INT_MAX, i4_distortion_least = INT_MAX; + + /* mb partitions info */ + mb_part_ctxt *ps_mb_part = &ps_me_ctxt->s_mb_part; + + /* mv bits */ + UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits; + + /* temp var */ + UWORD32 i, j, u4_srch_pos_idx = 0; + UWORD8 *pu1_ref = NULL; + WORD16 mv_x, mv_y; + + if (0) + { + /************************************************************/ + /* Compute SKIP Cost */ + /************************************************************/ + mv_x = ps_mv_list[SKIP_CAND].i2_mvx; + mv_y = ps_mv_list[SKIP_CAND].i2_mvy; + + /* adjust ref pointer */ + pu1_ref = pu1_ref_mb + mv_x + (mv_y * i4_ref_strd); + + /* compute distortion */ + ps_me_ctxt->pf_ime_compute_sad_16x16[u4_enable_fast_sad](pu1_curr_mb, pu1_ref, i4_src_strd, i4_ref_strd, i4_mb_cost_least, &i4_mb_distortion); + + /* for skip mode cost & distortion are identical + * But we shall add a bias to favor skip mode. + * Doc. JVT B118 Suggests SKIP_BIAS as 16. + * TODO : Empirical analysis of SKIP_BIAS is necessary */ + + i4_distortion_least = i4_mb_distortion; + + u4_srch_pos_idx = 0; + +#define SKIP_BIAS 8 + + i4_mb_cost_least = i4_mb_distortion - (u4_lambda_motion * SKIP_BIAS); + +#undef SKIP_BIAS + } + + + /* Carry out a search using each of the motion vector pairs identified above as predictors. */ + /* TODO : Just like Skip, Do we need to add any bias to zero mv as well */ + for(i = 0; i < u4_num_candidates; i++) + { + /* compute sad */ + WORD32 c_sad = 1; + + for(j = 0; j < i; j++ ) + { + if ( (ps_mv_list[i].i2_mvx == ps_mv_list[j].i2_mvx) && + (ps_mv_list[i].i2_mvy == ps_mv_list[j].i2_mvy) ) + { + c_sad = 0; + break; + } + } + if(c_sad) + { + /* adjust ref pointer */ + pu1_ref = pu1_ref_mb + ps_mv_list[i].i2_mvx + (ps_mv_list[i].i2_mvy * i4_ref_strd); + + /* compute distortion */ + ps_me_ctxt->pf_ime_compute_sad_16x16[u4_enable_fast_sad](pu1_curr_mb, pu1_ref, i4_src_strd, i4_ref_strd, i4_mb_cost_least, &i4_mb_distortion); + DEBUG_SAD_HISTOGRAM_ADD(i4_mb_distortion, 3); + /* compute cost */ + i4_mb_cost = i4_mb_distortion + u4_lambda_motion * ( pu1_mv_bits[ (ps_mv_list[i].i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx] + + pu1_mv_bits[(ps_mv_list[i].i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy] ); + + if (i4_mb_cost < i4_mb_cost_least) + { + i4_mb_cost_least = i4_mb_cost; + + i4_distortion_least = i4_mb_distortion; + + u4_srch_pos_idx = i; + } + } + } + + if (i4_mb_cost_least < ps_mb_part->i4_mb_cost) + { + ps_mb_part->u4_srch_pos_idx = u4_srch_pos_idx; + ps_mb_part->i4_mb_cost = i4_mb_cost_least; + ps_mb_part->i4_mb_distortion = i4_distortion_least; + ps_mb_part->s_mv_curr.i2_mvx = ps_mv_list[u4_srch_pos_idx].i2_mvx; + ps_mb_part->s_mv_curr.i2_mvy = ps_mv_list[u4_srch_pos_idx].i2_mvy; + } +} + + +/** +******************************************************************************* +* +* @brief Searches for the best matching full pixel predictor within the search +* range +* +* @par Description: +* This function begins by computing the mv predict vector for the current mb. +* This is used for cost computations. Further basing on the algo. chosen, it +* looks through a set of candidate vectors that best represent the mb a least +* cost and returns this information. +* +* @param[in] ps_proc +* pointer to current proc ctxt +* +* @param[in] ps_me_ctxt +* pointer to me context +* +* @returns mv pair & corresponding distortion and cost +* +* @remarks none +* +******************************************************************************* +*/ +void ime_full_pel_motion_estimation_16x16 + ( + me_ctxt_t *ps_me_ctxt + ) +{ + /* mb part info */ + mb_part_ctxt *ps_mb_part = &ps_me_ctxt->s_mb_part; + + /******************************************************************/ + /* Modify Search range about initial candidate instead of zero mv */ + /******************************************************************/ + /* + * FIXME: The motion vectors in a way can become unbounded. It may so happen that + * MV might exceed the limit of the profile configured. + */ + ps_me_ctxt->i4_srch_range_w = MAX(ps_me_ctxt->i4_srch_range_w, + -ps_me_ctxt->ai2_srch_boundaries[0] + ps_mb_part->s_mv_curr.i2_mvx); + ps_me_ctxt->i4_srch_range_e = MIN(ps_me_ctxt->i4_srch_range_e, + ps_me_ctxt->ai2_srch_boundaries[0] + ps_mb_part->s_mv_curr.i2_mvx); + ps_me_ctxt->i4_srch_range_n = MAX(ps_me_ctxt->i4_srch_range_n, + -ps_me_ctxt->ai2_srch_boundaries[1] + ps_mb_part->s_mv_curr.i2_mvy); + ps_me_ctxt->i4_srch_range_s = MIN(ps_me_ctxt->i4_srch_range_s, + ps_me_ctxt->ai2_srch_boundaries[1] + ps_mb_part->s_mv_curr.i2_mvy); + + /************************************************************/ + /* Traverse about best initial candidate for mv */ + /************************************************************/ + + switch (ps_me_ctxt->u4_me_speed_preset) + { + case DMND_SRCH: + ime_diamond_search_16x16(ps_me_ctxt); + break; + default: + assert(0); + break; + } + + ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvx = ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvx << 2; + ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvy = ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvy << 2; + +} + + +/** +******************************************************************************* +* +* @brief Searches for the best matching sub pixel predictor within the search +* range +* +* @par Description: +* This function begins by searching across all sub pixel sample points +* around the full pel motion vector. The vector with least cost is chosen as +* the mv for the current mb. If the skip mode is not evaluated while analysing +* the initial search candidates then analyse it here and update the mv. +* +* @param[in] ps_proc +* pointer to current proc ctxt +* +* @param[in] ps_me_ctxt +* pointer to me context +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void ime_sub_pel_motion_estimation_16x16 + ( + me_ctxt_t *ps_me_ctxt + ) +{ + /* pointers to src & ref macro block */ + UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma; + + + /* pointers to ref. half pel planes */ + UWORD8 *pu1_ref_mb_half_x; + UWORD8 *pu1_ref_mb_half_y; + UWORD8 *pu1_ref_mb_half_xy; + + /* pointers to ref. half pel planes */ + UWORD8 *pu1_ref_mb_half_x_temp; + UWORD8 *pu1_ref_mb_half_y_temp; + UWORD8 *pu1_ref_mb_half_xy_temp; + + /* strides */ + WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd; + + WORD32 i4_ref_strd = ps_me_ctxt->u4_hp_buf_strd; + + /* mb partitions info */ + mb_part_ctxt *ps_mb_part = &ps_me_ctxt->s_mb_part; + + /* SAD(distortion metric) of an mb */ + WORD32 i4_mb_distortion; + WORD32 i4_distortion_least = ps_mb_part->i4_mb_distortion; + + /* cost = distortion + u4_lambda_motion * rate */ + WORD32 i4_mb_cost; + WORD32 i4_mb_cost_least = ps_mb_part->i4_mb_cost; + + /*Best half pel buffer*/ + UWORD8 *pu1_best_hpel_buf = NULL; + + + /* mv bits */ + UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits; + + /* Motion vectors in full-pel units */ + WORD16 mv_x, mv_y; + + /* lambda - lagrange constant */ + UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion; + + /* Flags to check if half pel points needs to be evaluated */ + /**************************************/ + /* 1 bit for each half pel candidate */ + /* bit 0 - half x = 1, half y = 0 */ + /* bit 1 - half x = -1, half y = 0 */ + /* bit 2 - half x = 0, half y = 1 */ + /* bit 3 - half x = 0, half y = -1 */ + /* bit 4 - half x = 1, half y = 1 */ + /* bit 5 - half x = -1, half y = 1 */ + /* bit 6 - half x = 1, half y = -1 */ + /* bit 7 - half x = -1, half y = -1 */ + /**************************************/ + /* temp var */ + WORD16 i2_mv_u_x, i2_mv_u_y; + WORD32 i, j; + WORD32 ai4_sad[8]; + + i2_mv_u_x = ps_mb_part->s_mv_curr.i2_mvx; + i2_mv_u_y = ps_mb_part->s_mv_curr.i2_mvy; + + /************************************************************/ + /* Evaluate half pel */ + /************************************************************/ + mv_x = ps_mb_part->s_mv_curr.i2_mvx >> 2; + mv_y = ps_mb_part->s_mv_curr.i2_mvy >> 2; + + + /**************************************************************/ + /* ps_me_ctxt->pu1_half_x points to the half pel pixel on the */ + /* left side of full pel */ + /* ps_me_ctxt->pu1_half_y points to the half pel pixel on the */ + /* top side of full pel */ + /* ps_me_ctxt->pu1_half_xy points to the half pel pixel */ + /* on the top left side of full pel */ + /* for the function pf_ime_sub_pel_compute_sad_16x16 the */ + /* default postions are */ + /* ps_me_ctxt->pu1_half_x = right halp_pel */ + /* ps_me_ctxt->pu1_half_y = bottom halp_pel */ + /* ps_me_ctxt->pu1_half_xy = bottom right halp_pel */ + /* Hence corresponding adjustments made here */ + /**************************************************************/ + + pu1_ref_mb_half_x_temp = pu1_ref_mb_half_x = ps_me_ctxt->pu1_half_x + 1; + pu1_ref_mb_half_y_temp = pu1_ref_mb_half_y = ps_me_ctxt->pu1_half_y + 1 + i4_ref_strd; + pu1_ref_mb_half_xy_temp = pu1_ref_mb_half_xy = ps_me_ctxt->pu1_half_xy + 1 + i4_ref_strd; + + + ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16(pu1_curr_mb, pu1_ref_mb_half_x, + pu1_ref_mb_half_y, + pu1_ref_mb_half_xy, + i4_src_strd, i4_ref_strd, + ai4_sad); + + /* Half x plane */ + for(i = 0; i < 2; i++) + { + WORD32 mv_x_tmp = (mv_x << 2) + 2; + WORD32 mv_y_tmp = (mv_y << 2); + + mv_x_tmp -= (i * 4); + + i4_mb_distortion = ai4_sad[i]; + + /* compute cost */ + i4_mb_cost = i4_mb_distortion + u4_lambda_motion * ( pu1_mv_bits[ mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx] + + pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy] ); + + if (i4_mb_cost < i4_mb_cost_least) + { + i4_mb_cost_least = i4_mb_cost; + + i4_distortion_least = i4_mb_distortion; + + i2_mv_u_x = mv_x_tmp; + + i2_mv_u_y = mv_y_tmp; + +#ifndef HP_PL /*choosing whether left or right half_x*/ + ps_me_ctxt->pu1_half_x = pu1_ref_mb_half_x_temp - i; + pu1_best_hpel_buf = pu1_ref_mb_half_x_temp - i; +#endif + } + + } + + /* Half y plane */ + for(i = 0; i < 2; i++) + { + WORD32 mv_x_tmp = (mv_x << 2); + WORD32 mv_y_tmp = (mv_y << 2) + 2; + + mv_y_tmp -= (i * 4); + + i4_mb_distortion = ai4_sad[2 + i]; + + /* compute cost */ + i4_mb_cost = i4_mb_distortion + u4_lambda_motion * ( pu1_mv_bits[ mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx] + + pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy] ); + + if (i4_mb_cost < i4_mb_cost_least) + { + i4_mb_cost_least = i4_mb_cost; + + i4_distortion_least = i4_mb_distortion; + + i2_mv_u_x = mv_x_tmp; + + i2_mv_u_y = mv_y_tmp; + +#ifndef HP_PL/*choosing whether top or bottom half_y*/ + ps_me_ctxt->pu1_half_y = pu1_ref_mb_half_y_temp - i*(i4_ref_strd); + pu1_best_hpel_buf = pu1_ref_mb_half_y_temp - i*(i4_ref_strd); +#endif + } + + } + + /* Half xy plane */ + for(j = 0; j < 2; j++) + { + for(i = 0; i < 2; i++) + { + WORD32 mv_x_tmp = (mv_x << 2) + 2; + WORD32 mv_y_tmp = (mv_y << 2) + 2; + + mv_x_tmp -= (i * 4); + mv_y_tmp -= (j * 4); + + i4_mb_distortion = ai4_sad[4 + i + 2 * j]; + + /* compute cost */ + i4_mb_cost = i4_mb_distortion + u4_lambda_motion * ( pu1_mv_bits[ mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx] + + pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy] ); + + if (i4_mb_cost < i4_mb_cost_least) + { + i4_mb_cost_least = i4_mb_cost; + + i4_distortion_least = i4_mb_distortion; + + i2_mv_u_x = mv_x_tmp; + + i2_mv_u_y = mv_y_tmp; + +#ifndef HP_PL /*choosing between four half_xy */ + ps_me_ctxt->pu1_half_xy = pu1_ref_mb_half_xy_temp - j*(i4_ref_strd) - i; + pu1_best_hpel_buf = pu1_ref_mb_half_xy_temp - j*(i4_ref_strd) - i; +#endif + } + + } + } + + ps_mb_part->i4_mb_cost = i4_mb_cost_least; + ps_mb_part->i4_mb_distortion = i4_distortion_least; + ps_mb_part->s_mv_curr.i2_mvx = i2_mv_u_x; + ps_mb_part->s_mv_curr.i2_mvy = i2_mv_u_y; + ps_mb_part->pu1_best_hpel_buf = pu1_best_hpel_buf; + +} + + +/** +******************************************************************************* +* +* @brief This function computes cost of skip macroblocks +* +* @par Description: +* +* @param[in] ps_me_ctxt +* pointer to me ctxt +* +* @param[in] ps_skip_mv +* pointer to skip mv +* +* @returns none +* +* @remarks +* NOTE: while computing the skip cost, do not enable early exit from compute +* sad function because, a negative bias gets added later +* +******************************************************************************* +*/ +void ime_compute_skip_cost + ( + me_ctxt_t *ps_me_ctxt, + void *pv_skip_mv, + mb_part_ctxt *ps_smb_part_info, + UWORD32 u4_use_stat_sad + ) +{ + + /* pointers to src & ref macro block */ + UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma; + UWORD8 *pu1_ref_mb = ps_me_ctxt->pu1_ref_buf_luma; + + /* strides */ + WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd; + WORD32 i4_ref_strd = ps_me_ctxt->i4_rec_strd; + + /* enabled fast sad computation */ + UWORD32 u4_enable_fast_sad = ps_me_ctxt->u4_enable_fast_sad; + + /* SAD(distortion metric) of an mb */ + WORD32 i4_mb_distortion; + + /* cost = distortion + u4_lambda_motion * rate */ + WORD32 i4_mb_cost; + + /* Motion vectors in full-pel units */ + WORD16 mv_x, mv_y; + + /* lambda - lagrange constant */ + UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion; + + /* skip mv */ + ime_mv_t *ps_skip_mv = pv_skip_mv, s_clip_skip_mv; + + /* temp var */ + UWORD8 *pu1_ref = NULL; + UWORD32 u4_is_nonzero; + + s_clip_skip_mv.i2_mvx = CLIP3(ps_me_ctxt->i4_srch_range_w, ps_me_ctxt->i4_srch_range_e, ps_skip_mv->i2_mvx); + s_clip_skip_mv.i2_mvy = CLIP3(ps_me_ctxt->i4_srch_range_n, ps_me_ctxt->i4_srch_range_s, ps_skip_mv->i2_mvy); + + if ((s_clip_skip_mv.i2_mvx != ps_skip_mv->i2_mvx) || + (s_clip_skip_mv.i2_mvy != ps_skip_mv->i2_mvy)) + { + /* skip motion vector not with in bounds */ + /* it is possible that mv is already evaluated */ + return ; + } + + mv_x = (ps_skip_mv->i2_mvx + 2) >> 2; + mv_y = (ps_skip_mv->i2_mvy + 2) >> 2; + + if ((mv_x << 2) != ps_skip_mv->i2_mvx || (mv_y << 2) != ps_skip_mv->i2_mvy) + { + + + return ; + + + } + else + { + /* adjust ref pointer */ + pu1_ref = pu1_ref_mb + mv_x + (mv_y * i4_ref_strd); + } + + if(u4_use_stat_sad == 1) + { + ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16(pu1_curr_mb, pu1_ref, i4_src_strd, i4_ref_strd, + ps_me_ctxt->pu2_sad_thrsh, &i4_mb_distortion,&u4_is_nonzero); + + /* + *NOTE The check here is two fold + * One is checking if the sad has been reached, ie min sad, which a configurable parameter + * If that is reached,we need not do any mode evaluation + * Similary if we find a distortion of zero there is no point of doing any further mode evaluation + * as sad is a non negative quantity + * hence in this case too, no further evaluation is necessary + */ + /* + *NOTE in case we need to disable the zero check using satdq, + * we need only to set the u4_is_zero to a non zero value + */ + if(u4_is_nonzero==0 || i4_mb_distortion <= ps_me_ctxt->i4_min_sad) + { + ps_me_ctxt->u4_min_sad_reached = 1; /* found min sad*/ + ps_me_ctxt->i4_min_sad = (u4_is_nonzero == 0)?0:i4_mb_distortion; + } + } + else + { + ps_me_ctxt->pf_ime_compute_sad_16x16[u4_enable_fast_sad](pu1_curr_mb, pu1_ref, i4_src_strd, i4_ref_strd, INT_MAX, &i4_mb_distortion); + + if(i4_mb_distortion <= ps_me_ctxt->i4_min_sad) + { + ps_me_ctxt->i4_min_sad = i4_mb_distortion; + ps_me_ctxt->u4_min_sad_reached = 1; /* found min sad*/ + } + } + + /* for skip mode cost & distortion are identical + * But we shall add a bias to favor skip mode. + * Doc. JVT B118 Suggests SKIP_BIAS as 16. + * TODO : Empirical analysis of SKIP_BIAS is necessary */ +#define SKIP_BIAS 8 + i4_mb_cost = i4_mb_distortion - (u4_lambda_motion * SKIP_BIAS); +#undef SKIP_BIAS + + if (i4_mb_cost <= ps_smb_part_info->i4_mb_cost) + { + ps_smb_part_info->i4_mb_cost = i4_mb_cost; + ps_smb_part_info->i4_mb_distortion = i4_mb_distortion; + ps_smb_part_info->s_mv_curr.i2_mvx = ps_skip_mv->i2_mvx; + ps_smb_part_info->s_mv_curr.i2_mvy = ps_skip_mv->i2_mvy; + } +} + diff --git a/encoder/ime.h b/encoder/ime.h new file mode 100755 index 0000000..5c039e8 --- /dev/null +++ b/encoder/ime.h @@ -0,0 +1,209 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** + ******************************************************************************* + * @file + * ime.h + * + * @brief + * Contains declarations of global variables for H264 encoder + * + * @author + * Ittiam + * + * @remarks + * + ******************************************************************************* + */ + +#ifndef IME_H_ +#define IME_H_ + +/*****************************************************************************/ +/* Constant Macros */ +/*****************************************************************************/ + +/** +****************************************************************************** + * @brief Number of iterations before exiting during diamond search +****************************************************************************** + */ +#define NUM_LAYERS 16 + + +/*****************************************************************************/ +/* Extern Function Declarations */ +/*****************************************************************************/ + + +/** +******************************************************************************* +* +* @brief Diamond Search +* +* @par Description: +* This function computes the sad at vertices of several layers of diamond grid +* at a time. The number of layers of diamond grid that would be evaluated is +* configurable.The function computes the sad at vertices of a diamond grid. If +* the sad at the center of the diamond grid is lesser than the sad at any other +* point of the diamond grid, the function marks the candidate Mb partition as +* mv. +* +* @param[in] ps_mb_part +* pointer to current mb partition ctxt with respect to ME +* +* @param[in] ps_me_ctxt +* pointer to me context +* +* @param[in] u4_lambda +* lambda motion +* +* @param[in] u4_fast_flag +* enable/disable fast sad computation +* +* @returns mv pair & corresponding distortion and cost +* +* @remarks This module cannot be part of the final product due to its lack of +* computational feasibility. This is only for quality eval purposes. +* +******************************************************************************* +*/ +extern void ime_diamond_search_16x16(me_ctxt_t *ps_me_ctxt); + + +/** +******************************************************************************* +* +* @brief This function computes the best motion vector among the tentative mv +* candidates chosen. +* +* @par Description: +* This function determines the position in the search window at which the motion +* estimation should begin in order to minimise the number of search iterations. +* +* @param[in] ps_mb_part +* pointer to current mb partition ctxt with respect to ME +* +* @param[in] u4_lambda_motion +* lambda motion +* +* @param[in] u4_fast_flag +* enable/disable fast sad computation +* +* @returns mv pair & corresponding distortion and cost +* +* @remarks none +* +******************************************************************************* +*/ +extern void ime_evaluate_init_srchposn_16x16 + ( + me_ctxt_t *ps_me_ctxt + ); + +/** +******************************************************************************* +* +* @brief Searches for the best matching full pixel predictor within the search +* range +* +* @par Description: +* This function begins by computing the mv predict vector for the current mb. +* This is used for cost computations. Further basing on the algo. chosen, it +* looks through a set of candidate vectors that best represent the mb a least +* cost and returns this information. +* +* @param[in] ps_proc +* pointer to current proc ctxt +* +* @param[in] ps_me_ctxt +* pointer to me context +* +* @returns mv pair & corresponding distortion and cost +* +* @remarks none +* +******************************************************************************* +*/ +extern void ime_full_pel_motion_estimation_16x16 + ( + me_ctxt_t *ps_me_ctxt + ); + +/** +******************************************************************************* +* +* @brief Searches for the best matching sub pixel predictor within the search +* range +* +* @par Description: +* This function begins by searching across all sub pixel sample points +* around the full pel motion vector. The vector with least cost is chosen as +* the mv for the current mb. If the skip mode is not evaluated while analysing +* the initial search candidates then analyse it here and update the mv. +* +* @param[in] ps_proc +* pointer to current proc ctxt +* +* @param[in] ps_me_ctxt +* pointer to me context +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +extern void ime_sub_pel_motion_estimation_16x16 + ( + me_ctxt_t *ps_me_ctxt + ); + +/** +******************************************************************************* +* +* @brief This function computes cost of skip macroblocks +* +* @par Description: +* +* @param[in] ps_me_ctxt +* pointer to me ctxt +* +* @param[in] ps_skip_mv +* pointer to skip mv +* +* @returns none +* +* @remarks +* NOTE: while computing the skip cost, do not enable early exit from compute +* sad function because, a negative bias gets added later +* +******************************************************************************* +*/ +extern void ime_compute_skip_cost + ( + me_ctxt_t *ps_me_ctxt, + void *pv_skip_mv, + mb_part_ctxt *ps_smb_part_info, + UWORD32 u4_use_stat_sad + ); + + +#endif /* IME_H_ */ diff --git a/encoder/ime_defs.h b/encoder/ime_defs.h new file mode 100755 index 0000000..14d9c55 --- /dev/null +++ b/encoder/ime_defs.h @@ -0,0 +1,59 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ihevc_typedefs.h +* +* @brief +* Type definitions used in the code +* +* +* @remarks +* None +* +******************************************************************************* +*/ +#ifndef _IME_DEFS_H_ +#define _IME_DEFS_H_ + + +/* Macros to Label candidates */ +#define SKIP_CAND 0 +#define ZERO_CAND 1 +#define LEFT_CAND 2 +#define TOP_CAND 3 +#define TOPR_CAND 4 + +#define NONE 0 +#define LEFT 1 +#define RIGHT 2 +#define TOP 3 +#define BOTTOM 4 + +#define MB_SIZE 16 + +#define FULL_SRCH 0 +#define DMND_SRCH 100 +#define NSTEP_SRCH 50 +#define HEX_SRCH 75 + +#endif /*_IME_DEFS_H_*/ + diff --git a/encoder/ime_distortion_metrics.c b/encoder/ime_distortion_metrics.c new file mode 100755 index 0000000..23a1fbc --- /dev/null +++ b/encoder/ime_distortion_metrics.c @@ -0,0 +1,1262 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +****************************************************************************** +* @file ih264e_distortion_metrics.c +* +* @brief +* This file contains definitions of routines that compute distortion +* between two macro/sub blocks of identical dimensions +* +* @author +* Ittiam +* +* @par List of Functions: +* - ime_sub_pel_compute_sad_16x16() +* - ime_calculate_sad4_prog() +* - ime_calculate_sad3_prog() +* - ime_calculate_sad2_prog() +* - ime_compute_sad_16x16() +* - ime_compute_sad_16x16_fast() +* - ime_compute_sad_16x16_ea8() +* - ime_compute_sad_8x8() +* - ime_compute_sad_4x4() +* - ime_compute_sad_16x8() +* - ime_compute_satqd_16x16_lumainter() +* - ime_compute_satqd_8x16_chroma() +* - ime_compute_satqd_16x16_lumaintra() +* +* +* @remarks +* None +* +******************************************************************************* +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +/* User include files */ +#include "ime_typedefs.h" +#include "ime_defs.h" +#include "ime_macros.h" +#include "ime_statistics.h" +#include "ime_platform_macros.h" +#include "ime_distortion_metrics.h" + + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + +/** +****************************************************************************** +* +* @brief computes distortion (SAD) at all subpel points about the src location +* +* @par Description +* This functions computes SAD at all points at a subpel distance from the +* current source location. +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[out] pu1_ref_half_x +* UWORD8 pointer to half pel buffer +* +* @param[out] pu1_ref_half_y +* UWORD8 pointer to half pel buffer +* +* @param[out] pu1_ref_half_xy +* UWORD8 pointer to half pel buffer +* +* @param[in] src_strd +* integer source stride +* +* @param[in] ref_strd +* integer ref stride +* +* @param[out] pi4_sad +* integer evaluated sad +* pi4_sad[0] - half x +* pi4_sad[1] - half x - 1 +* pi4_sad[2] - half y +* pi4_sad[3] - half y - 1 +* pi4_sad[4] - half xy +* pi4_sad[5] - half xy - 1 +* pi4_sad[6] - half xy - strd +* pi4_sad[7] - half xy - 1 - strd +* +* @remarks +* +****************************************************************************** +*/ +void ime_sub_pel_compute_sad_16x16(UWORD8 *pu1_src, + UWORD8 *pu1_ref_half_x, + UWORD8 *pu1_ref_half_y, + UWORD8 *pu1_ref_half_xy, + WORD32 src_strd, + WORD32 ref_strd, + WORD32 *pi4_sad) +{ + UWORD8 *pu1_ref_half_x_left = pu1_ref_half_x - 1; + UWORD8 *pu1_ref_half_y_top = pu1_ref_half_y - ref_strd; + UWORD8 *pu1_ref_half_xy_left = pu1_ref_half_xy - 1; + UWORD8 *pu1_ref_half_xy_top = pu1_ref_half_xy - ref_strd; + UWORD8 *pu1_ref_half_xy_top_left = pu1_ref_half_xy - ref_strd - 1; + + WORD32 row, col; + + memset(pi4_sad, 0, 8 * sizeof(WORD32)); + + for(row = 0; row < MB_SIZE; row++) + { + for(col = 0; col < MB_SIZE; col++) + { + WORD32 src; + WORD32 diff; + + src = pu1_src[col]; + + diff = src - pu1_ref_half_x[col]; + pi4_sad[0] += ABS(diff); + + diff = src - pu1_ref_half_x_left[col]; + pi4_sad[1] += ABS(diff); + + diff = src - pu1_ref_half_y[col]; + pi4_sad[2] += ABS(diff); + + diff = src - pu1_ref_half_y_top[col]; + pi4_sad[3] += ABS(diff); + + diff = src - pu1_ref_half_xy[col]; + pi4_sad[4] += ABS(diff); + + diff = src - pu1_ref_half_xy_left[col]; + pi4_sad[5] += ABS(diff); + + diff = src - pu1_ref_half_xy_top[col]; + pi4_sad[6] += ABS(diff); + + diff = src - pu1_ref_half_xy_top_left[col]; + pi4_sad[7] += ABS(diff); + } + + pu1_src += src_strd; + + pu1_ref_half_x += ref_strd; + pu1_ref_half_x_left += ref_strd; + + pu1_ref_half_y += ref_strd; + pu1_ref_half_y_top += ref_strd; + + pu1_ref_half_xy += ref_strd; + pu1_ref_half_xy_left += ref_strd; + pu1_ref_half_xy_top += ref_strd; + pu1_ref_half_xy_top_left += ref_strd; + } +} + +/** +******************************************************************************* +* +* @brief compute sad +* +* @par Description: This function computes the sad at vertices of diamond grid +* centered at reference pointer and at unit distance from it. +* +* @param[in] pu1_ref +* UWORD8 pointer to the reference +* +* @param[out] pu1_src +* UWORD8 pointer to the source +* +* @param[in] ref_strd +* integer reference stride +* +* @param[in] src_strd +* integer source stride +* +* @param[out] pi4_sad +* pointer to integer array evaluated sad +* +* @returns sad at all evaluated vertexes +* +* @remarks none +* +******************************************************************************* +*/ +void ime_calculate_sad4_prog(UWORD8 *pu1_ref, + UWORD8 *pu1_src, + WORD32 ref_strd, + WORD32 src_strd, + WORD32 *pi4_sad) +{ + + /* reference ptrs at unit 1 distance in diamond pattern centered at pu1_ref */ + UWORD8 *left_ptr = pu1_ref - 1; + UWORD8 *right_ptr = pu1_ref + 1; + UWORD8 *top_ptr = pu1_ref - ref_strd; + UWORD8 *bot_ptr = pu1_ref + ref_strd; + + /* temp var */ + WORD32 count2, count3; + UWORD32 u4_ref_buf_offset = ref_strd - MB_SIZE; + UWORD32 u4_cur_buf_offset = src_strd - MB_SIZE; + + memset(pi4_sad, 0, 4 * sizeof(WORD32)); + + for(count2 = MB_SIZE; count2 > 0; count2--) + { + for(count3 = MB_SIZE; count3 > 0 ; count3--) + { + WORD32 src; + WORD32 diff; + + src = *pu1_src++; + + diff = src - *left_ptr++; + pi4_sad[0] += ABS(diff); + + diff = src - *right_ptr++; + pi4_sad[1] += ABS(diff); + + diff = src - *top_ptr++; + pi4_sad[2] += ABS(diff); + + diff = src - *bot_ptr++; + pi4_sad[3] += ABS(diff); + } + + bot_ptr += u4_ref_buf_offset; + left_ptr += u4_ref_buf_offset; + right_ptr += u4_ref_buf_offset; + top_ptr += u4_ref_buf_offset; + + pu1_src += u4_cur_buf_offset; + } + +} + +/** +******************************************************************************* +* +* @brief compute sad +* +* @par Description: This function computes the sad at vertices of diamond grid +* centered at reference pointer and at unit distance from it. +* +* @param[in] pu1_ref1, pu1_ref2, pu1_ref3 +* UWORD8 pointer to the reference +* +* @param[out] pu1_src +* UWORD8 pointer to the source +* +* @param[in] ref_strd +* integer reference stride +* +* @param[in] src_strd +* integer source stride +* +* @param[out] pi4_sad +* pointer to integer array evaluated sad +* +* @returns sad at all evaluated vertexes +* +* @remarks none +* +******************************************************************************* +*/ +void ime_calculate_sad3_prog(UWORD8 *pu1_ref1, + UWORD8 *pu1_ref2, + UWORD8 *pu1_ref3, + UWORD8 *pu1_src, + WORD32 ref_strd, + WORD32 src_strd, + WORD32 *pi4_sad) +{ + /* temp var */ + WORD32 i; + UWORD32 u4_ref_buf_offset = ref_strd - MB_SIZE; + UWORD32 u4_cur_buf_offset = src_strd - MB_SIZE; + + for(i = 16; i > 0; i--) + { + USADA8(pu1_src, pu1_ref1, pi4_sad[0]); + USADA8(pu1_src, pu1_ref2, pi4_sad[1]); + USADA8(pu1_src, pu1_ref3, pi4_sad[2]); + pu1_src += 4; + pu1_ref1 += 4; + pu1_ref2 += 4; + pu1_ref3 += 4; + + USADA8(pu1_src, pu1_ref1, pi4_sad[0]); + USADA8(pu1_src, pu1_ref2, pi4_sad[1]); + USADA8(pu1_src, pu1_ref3, pi4_sad[2]); + pu1_src += 4; + pu1_ref1 += 4; + pu1_ref2 += 4; + pu1_ref3 += 4; + + USADA8(pu1_src, pu1_ref1, pi4_sad[0]); + USADA8(pu1_src, pu1_ref2, pi4_sad[1]); + USADA8(pu1_src, pu1_ref3, pi4_sad[2]); + pu1_src += 4; + pu1_ref1 += 4; + pu1_ref2 += 4; + pu1_ref3 += 4; + + USADA8(pu1_src, pu1_ref1, pi4_sad[0]); + USADA8(pu1_src, pu1_ref2, pi4_sad[1]); + USADA8(pu1_src, pu1_ref3, pi4_sad[2]); + pu1_src += 4; + pu1_ref1 += 4; + pu1_ref2 += 4; + pu1_ref3 += 4; + + pu1_src += u4_cur_buf_offset; + pu1_ref1 += u4_ref_buf_offset; + pu1_ref2 += u4_ref_buf_offset; + pu1_ref3 += u4_ref_buf_offset; + } + +} + +/** +******************************************************************************* +* +* @brief compute sad +* +* @par Description: This function computes the sad at vertices of diamond grid +* centered at reference pointer and at unit distance from it. +* +* @param[in] pu1_ref1, pu1_ref2 +* UWORD8 pointer to the reference +* +* @param[out] pu1_src +* UWORD8 pointer to the source +* +* @param[in] ref_strd +* integer reference stride +* +* @param[in] src_strd +* integer source stride +* +* @param[out] pi4_sad +* pointer to integer array evaluated sad +* +* @returns sad at all evaluated vertexes +* +* @remarks none +* +******************************************************************************* +*/ +void ime_calculate_sad2_prog(UWORD8 *pu1_ref1, + UWORD8 *pu1_ref2, + UWORD8 *pu1_src, + WORD32 ref_strd, + WORD32 src_strd, + WORD32 *pi4_sad) +{ + /* temp var */ + WORD32 i; + UWORD32 u4_ref_buf_offset = ref_strd - MB_SIZE; + UWORD32 u4_cur_buf_offset = src_strd - MB_SIZE; + + for(i = 16; i > 0; i--) + { + USADA8(pu1_src, pu1_ref1, pi4_sad[0]); + USADA8(pu1_src, pu1_ref2, pi4_sad[1]); + pu1_src += 4; + pu1_ref1 += 4; + pu1_ref2 += 4; + + USADA8(pu1_src, pu1_ref1, pi4_sad[0]); + USADA8(pu1_src, pu1_ref2, pi4_sad[1]); + pu1_src += 4; + pu1_ref1 += 4; + pu1_ref2 += 4; + + USADA8(pu1_src, pu1_ref1, pi4_sad[0]); + USADA8(pu1_src, pu1_ref2, pi4_sad[1]); + pu1_src += 4; + pu1_ref1 += 4; + pu1_ref2 += 4; + + USADA8(pu1_src, pu1_ref1, pi4_sad[0]); + USADA8(pu1_src, pu1_ref2, pi4_sad[1]); + pu1_src += 4; + pu1_ref1 += 4; + pu1_ref2 += 4; + + pu1_src += u4_cur_buf_offset; + pu1_ref1 += u4_ref_buf_offset; + pu1_ref2 += u4_ref_buf_offset; + } + +} + +/** +****************************************************************************** +* +* @brief computes distortion (SAD) between 2 16x16 blocks +* +* @par Description +* This functions computes SAD between 2 16x16 blocks. There is a provision +* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To +* compute the distortion of the entire block set u4_max_sad to USHRT_MAX. +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[out] pu1_dst +* UWORD8 pointer to the destination +* +* @param[in] src_strd +* integer source stride +* +* @param[in] dst_strd +* integer destination stride +* +* @param[in] i4_max_sad +* integer maximum allowed distortion +* +* @param[out] pi4_mb_distortion +* integer evaluated sad +* +* @remarks +* +****************************************************************************** +*/ +void ime_compute_sad_16x16(UWORD8 *pu1_src, + UWORD8 *pu1_est, + WORD32 src_strd, + WORD32 est_strd, + WORD32 i4_max_sad, + WORD32 *pi4_mb_distortion) +{ + WORD32 i4_sad = 0; + UWORD32 u4_src_offset = src_strd - 16; + UWORD32 u4_est_offset = est_strd - 16; + UWORD32 i; + +GATHER_16x16_SAD_EE_STATS(gu4_16x16_sad_ee_stats, 16); + + for(i = 16; i > 0; i--) + { + USADA8(pu1_src, pu1_est, i4_sad); + pu1_src += 4; + pu1_est += 4; + + USADA8(pu1_src, pu1_est, i4_sad); + pu1_src += 4; + pu1_est += 4; + + USADA8(pu1_src, pu1_est, i4_sad); + pu1_src += 4; + pu1_est += 4; + + USADA8(pu1_src, pu1_est, i4_sad); + pu1_src += 4; + pu1_est += 4; + + /* early exit */ + if(i4_max_sad < i4_sad) + { + +GATHER_16x16_SAD_EE_STATS(gu4_16x16_sad_ee_stats, 16-i); + + *pi4_mb_distortion = i4_sad; + return ; + } + pu1_src += u4_src_offset; + pu1_est += u4_est_offset; + } + + *pi4_mb_distortion = i4_sad; + return ; +} + +/** +****************************************************************************** +* +* @brief computes distortion (SAD) between 2 16x16 blocks (fast mode) +* +* @par Description +* This functions computes SAD between 2 16x16 blocks. There is a provision +* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To +* compute the distortion of the entire block set u4_max_sad to USHRT_MAX. +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[out] pu1_dst +* UWORD8 pointer to the destination +* +* @param[in] src_strd +* integer source stride +* +* @param[in] dst_strd +* integer destination stride +* +* @param[in] i4_max_sad +* integer maximum allowed distortion +* +* @param[out] pi4_mb_distortion +* integer evaluated sad +* +* @remarks +* +****************************************************************************** +*/ +void ime_compute_sad_16x16_fast(UWORD8 *pu1_src, + UWORD8 *pu1_est, + WORD32 src_strd, + WORD32 est_strd, + WORD32 i4_max_sad, + WORD32 *pi4_mb_distortion) +{ + + WORD32 i4_sad = 0; + UWORD32 u4_src_offset = 2 * src_strd - 16; + UWORD32 u4_est_offset = 2 * est_strd - 16; + UWORD32 i; + + UNUSED(i4_max_sad); + + for(i = 16; i > 0; i-= 2) + { + USADA8(pu1_src, pu1_est, i4_sad); + pu1_src += 4; + pu1_est += 4; + + USADA8(pu1_src, pu1_est, i4_sad); + pu1_src += 4; + pu1_est += 4; + + USADA8(pu1_src, pu1_est, i4_sad); + pu1_src += 4; + pu1_est += 4; + + USADA8(pu1_src, pu1_est, i4_sad); + pu1_src += 4; + pu1_est += 4; + + pu1_src += u4_src_offset; + pu1_est += u4_est_offset; + } + + *pi4_mb_distortion = (i4_sad << 1); + return ; +} + +/** +****************************************************************************** +* +* @brief computes distortion (SAD) between 2 8x8 blocks +* +* @par Description +* This functions computes SAD between 2 8x8 blocks. There is a provision +* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To +* compute the distortion of the entire block set u4_max_sad to USHRT_MAX. +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[out] pu1_dst +* UWORD8 pointer to the destination +* +* @param[in] src_strd +* integer source stride +* +* @param[in] dst_strd +* integer destination stride +* +* @param[in] u4_max_sad +* integer maximum allowed distortion +* +* @param[out] i4_sad +* integer evaluated sad +* +* @remarks +* +****************************************************************************** + */ + +void ime_compute_sad_8x8(UWORD8 *pu1_src, + UWORD8 *pu1_est, + WORD32 src_strd, + WORD32 est_strd, + WORD32 i4_max_sad, + WORD32 *pi4_mb_distortion) +{ + WORD32 i4_sad = 0; + UWORD32 u4_src_offset = src_strd - 8; + UWORD32 u4_est_offset = est_strd - 8; + UWORD32 i, j; + WORD16 temp; + + for(i = 8; i > 0; i--) + { + for(j = 8; j > 0; j--) + { + /* SAD */ + temp = *pu1_src++ - *pu1_est++; + i4_sad += ABS(temp); + } + /* early exit */ + if(i4_max_sad < i4_sad) + { + *pi4_mb_distortion = i4_sad; + return; + } + pu1_src += u4_src_offset; + pu1_est += u4_est_offset; + } + *pi4_mb_distortion = i4_sad; +} + +/** +****************************************************************************** +* +* @brief computes distortion (SAD) between 2 4x4 blocks +* +* @par Description +* This functions computes SAD between 2 4x4 blocks. There is a provision +* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To +* compute the distortion of the entire block set u4_max_sad to USHRT_MAX. +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[out] pu1_dst +* UWORD8 pointer to the destination +* +* @param[in] src_strd +* integer source stride +* +* @param[in] dst_strd +* integer destination stride +* +* @param[in] u4_max_sad +* integer maximum allowed distortion +* +* @param[out] pi4_mb_distortion +* integer evaluated sad +* +* @remarks +* +****************************************************************************** +*/ +void ime_compute_sad_4x4 + ( + UWORD8 *pu1_src, + UWORD8 *pu1_est, + WORD32 src_strd, + WORD32 est_strd, + WORD32 i4_max_sad, + WORD32 *pi4_mb_distortion + ) +{ + WORD32 i4_sad = 0; + + UNUSED(i4_max_sad); + + USADA8(pu1_src, pu1_est, i4_sad); + pu1_src += src_strd; + pu1_est += est_strd; + + USADA8(pu1_src, pu1_est, i4_sad); + pu1_src += src_strd; + pu1_est += est_strd; + + USADA8(pu1_src, pu1_est, i4_sad); + pu1_src += src_strd; + pu1_est += est_strd; + + USADA8(pu1_src, pu1_est, i4_sad); + *pi4_mb_distortion = i4_sad; +} + + +/** +****************************************************************************** +* +* @brief computes distortion (SAD) between 2 16x8 blocks +* +* +* @par Description +* This functions computes SAD between 2 16x8 blocks. There is a provision +* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To +* compute the distortion of the entire block set u4_max_sad to USHRT_MAX. +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[out] pu1_dst +* UWORD8 pointer to the destination +* +* @param[in] src_strd +* integer source stride +* +* @param[in] dst_strd +* integer destination stride +* +* @param[in] u4_max_sad +* integer maximum allowed distortion +* +* @param[out] pi4_mb_distortion +* integer evaluated sad +* +* @remarks +* +****************************************************************************** +*/ +void ime_compute_sad_16x8 + ( + UWORD8 *pu1_src, + UWORD8 *pu1_est, + WORD32 src_strd, + WORD32 est_strd, + WORD32 i4_max_sad, + WORD32 *pi4_mb_distortion + ) +{ + WORD32 i4_sad = 0; + UWORD32 u4_src_offset = src_strd - 16; + UWORD32 u4_est_offset = est_strd - 16; + UWORD32 i, j; + WORD16 temp; + +GATHER_16x8_SAD_EE_STATS(gu4_16x8_sad_ee_stats, 8); + + for(i = 8; i > 0; i--) + { + for(j = 16; j > 0; j--) + { + /* SAD */ + temp = *pu1_src++ - *pu1_est++; + i4_sad += ABS(temp); + } + /* early exit */ + if(i4_max_sad < i4_sad) + { + +GATHER_16x8_SAD_EE_STATS(gu4_16x8_sad_ee_stats, 8-i); + + *pi4_mb_distortion = i4_sad; + + return; + } + pu1_src += u4_src_offset; + pu1_est += u4_est_offset; + } + + *pi4_mb_distortion = i4_sad; + return; + +} + +/** +****************************************************************************** +* +* @brief computes distortion (SAD) between 2 16x16 blocks +* +* @par Description +* This functions computes SAD between 2 16x16 blocks. There is a provision +* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To +* compute the distortion of the entire block set u4_max_sad to USHRT_MAX. +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[out] pu1_dst +* UWORD8 pointer to the destination +* +* @param[in] src_strd +* integer source stride +* +* @param[in] dst_strd +* integer destination stride +* +* @param[in] i4_max_sad +* integer maximum allowed distortion +* +* @param[out] pi4_mb_distortion +* integer evaluated sad +* +* @remarks +* +****************************************************************************** +*/ +void ime_compute_sad_16x16_ea8(UWORD8 *pu1_src, + UWORD8 *pu1_est, + WORD32 src_strd, + WORD32 est_strd, + WORD32 i4_max_sad, + WORD32 *pi4_mb_distortion) +{ + WORD32 i4_sad = 0; + UWORD32 u4_src_offset = src_strd - 16; + UWORD32 u4_est_offset = est_strd - 16; + UWORD32 i, j; + WORD16 temp; + UWORD8 *pu1_src_temp = pu1_src + src_strd; + UWORD8 *pu1_est_temp = pu1_est + est_strd; + + for(i = 16; i > 0; i -= 2) + { + for(j = 16; j > 0; j--) + { + /* SAD */ + temp = *pu1_src++ - *pu1_est++; + i4_sad += ABS(temp); + } + + pu1_src += (u4_src_offset + src_strd); + pu1_est += (u4_est_offset + est_strd); + + } + + /* early exit */ + if(i4_max_sad < i4_sad) + { + *pi4_mb_distortion = i4_sad; + return; + } + + pu1_src = pu1_src_temp; + pu1_est = pu1_est_temp; + + for(i = 16; i > 0; i -= 2) + { + for(j = 16; j > 0; j--) + { + /* SAD */ + temp = *pu1_src++ - *pu1_est++; + i4_sad += ABS(temp); + } + + pu1_src += u4_src_offset + src_strd; + pu1_est += u4_est_offset + est_strd; + } + + *pi4_mb_distortion = i4_sad; + return; +} + + +/** +******************************************************************************* +* +* @brief This function computes SAD between two 16x16 blocks +* It also computes if the block will be zero after H264 transform and quant for +* Intra 16x16 blocks +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[out] pu1_dst +* UWORD8 pointer to the destination +* +* @param[in] src_strd +* integer source stride +* +* @param[in] dst_strd +* integer destination stride +* +* @param[in] pu2_thrsh +* Threshold for each element of transofrmed quantized block +* +* @param[out] pi4_mb_distortion +* integer evaluated sad +* +* @param[out] pu4_is_zero +* Poitner to store if the block is zero after transform and quantization +* +* @remarks +* +****************************************************************************** +*/ +void ime_compute_satqd_16x16_lumainter(UWORD8 *pu1_src, + UWORD8 *pu1_est, + WORD32 src_strd, + WORD32 est_strd, + UWORD16 *pu2_thrsh, + WORD32 *pi4_mb_distortion, + UWORD32 *pu4_is_non_zero) +{ + UWORD32 i,j; + WORD16 s1,s2,s3,s4,sad_1,sad_2,ls1,ls2,ls3,ls4,ls5,ls6,ls7,ls8; + UWORD8 *pu1_src_lp,*pu1_est_lp; + UWORD32 sad = 0; + + (*pi4_mb_distortion) = 0; + for(i=0;i<4;i++) + { + for(j=0;j<4;j++) + { + pu1_src_lp = pu1_src + 4*j; + pu1_est_lp = pu1_est + 4*j; + + s1 = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]); + s4 = ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]); + + pu1_src_lp += src_strd; + pu1_est_lp += est_strd; + + s2 = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]); + s3 = ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]); + + pu1_src_lp += src_strd; + pu1_est_lp += est_strd; + + s2 += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]); + s3 += ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]); + + pu1_src_lp += src_strd; + pu1_est_lp += est_strd; + + s1 += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]); + s4 += ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]); + + sad_1 = s1+s2+s3+s4; + + if(sad == 0) + { + sad_2 = sad_1<<1; + + ls1 = sad_2 -(s2 + s3); + ls2 = sad_2 -(s1 + s4); + ls3 = sad_2 -(s3 + s4); + ls4 = sad_2 -(s3 - (s1<<1)); + ls5 = sad_2 -(s4 - (s2<<1)); + ls6 = sad_2 -(s1 + s2); + ls7 = sad_2 -(s2 - (s4<<1)); + ls8 = sad_2 -(s1 - (s3<<1)); + + if( + pu2_thrsh[8] <= sad_1 || + pu2_thrsh[0] <= ls2 || + pu2_thrsh[1] <= ls1 || + pu2_thrsh[2] <= ls8 || + pu2_thrsh[3] <= ls5 || + + pu2_thrsh[4] <= ls6 || + pu2_thrsh[5] <= ls3 || + pu2_thrsh[6] <= ls7 || + pu2_thrsh[7] <= ls4 + + )sad = 1; + } + (*pi4_mb_distortion) += sad_1; + } + pu1_src += (src_strd *4); + pu1_est += (est_strd *4); + } + *pu4_is_non_zero = sad; +} + + +/** +****************************************************************************** +* +* @brief computes distortion (SAD and SAQTD) between 2 16x8 (interleaved) chroma blocks +* +* +* @par Description +* This functions computes SAD between2 16x8 chroma blocks(interleaved) +* It also checks if the SATDD(Sum of absolute transformed wuqntized differnce beteern the blocks +* If SAQTD is zero, it gives back zero +* Other wise sad is retrned +* There is no provison for early exit +* +* The transform done here is the transform for chroma blocks in H264 +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[out] pu1_dst +* UWORD8 pointer to the destination +* +* @param[in] src_strd +* integer source stride +* +* @param[in] dst_strd +* integer destination stride +* +* @param[in] pu2_thrsh +* Threshold for each element of transofrmed quantized block +* +* @param[out] pi4_mb_distortion +* integer evaluated sad +* +* @remarks +* Fucntion code is nit updated. +* Will require debugging and minor modifications +* +****************************************************************************** +*/ +void ime_compute_satqd_8x16_chroma(UWORD8 *pu1_src, + UWORD8 *pu1_est, + WORD32 src_strd, + WORD32 est_strd, + WORD32 max_sad, + UWORD16 *thrsh) +{ + WORD32 i,j,plane; + WORD16 s1,s2,s3,s4,sad_1,sad_2,ls1,ls2,ls3,ls4,ls5,ls6,ls7,ls8; + UWORD8 *pu1_src_lp,*pu1_est_lp,*pu1_src_plane,*pu1_est_plane; + WORD32 sad =0; + UNUSED(max_sad); + + pu1_src_plane = pu1_src; + pu1_est_plane = pu1_est; + + for(plane =0;plane<2;plane++) + { + for(i=0;i<4;i++) + { + for(j=0;j<4;j++) + { + pu1_src_lp = pu1_src + 8*j; + pu1_est_lp = pu1_est + 8*j; + + s1 = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[6] - (WORD16)pu1_est_lp[6]); + s4 = ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2])+ ABS((WORD16)pu1_src_lp[4] - (WORD16)pu1_est_lp[4]); + + pu1_src_lp += src_strd; + pu1_est_lp += est_strd; + + s2 = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[6] - (WORD16)pu1_est_lp[6]); + s3 = ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2])+ ABS((WORD16)pu1_src_lp[4] - (WORD16)pu1_est_lp[4]); + + pu1_src_lp += src_strd; + pu1_est_lp += est_strd; + + s2 += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[6] - (WORD16)pu1_est_lp[6]); + s3 += ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2])+ ABS((WORD16)pu1_src_lp[4] - (WORD16)pu1_est_lp[4]); + + pu1_src_lp += src_strd; + pu1_est_lp += est_strd; + + s1 += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[6] - (WORD16)pu1_est_lp[6]); + s4 += ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2])+ ABS((WORD16)pu1_src_lp[4] - (WORD16)pu1_est_lp[4]); + + sad_1 = s1+s2+s3+s4; + sad_2 = sad_1<<1; + + ls1 = sad_2 -(s2 + s3); + ls2 = sad_2 -(s1 + s4); + ls3 = sad_2 -(s3 + s4); + ls4 = sad_2 -(s3 - (s1<<1)); + ls5 = sad_2 -(s4 - (s2<<1)); + ls6 = sad_2 -(s1 + s2); + ls7 = sad_2 -(s2 - (s4<<1)); + ls8 = sad_2 -(s1 - (s3<<1)); + + if( + //thrsh[0] > sad_1 && Chroma Dc is checked later + thrsh[1] > ls1 && + thrsh[2] > sad_1 && + thrsh[3] > ls2 && + + thrsh[4] > ls3 && + thrsh[5] > ls4 && + thrsh[6] > ls3 && + thrsh[7] > ls5 && + + thrsh[8] > sad_1 && + thrsh[9] > ls1 && + thrsh[10]> sad_1 && + thrsh[11]> ls2 && + + thrsh[12]> ls6 && + thrsh[13]> ls7 && + thrsh[14]> ls6 && + thrsh[15]> ls8 + ) + { + /*set current sad to be zero*/ + } + else + return ; + + sad += sad_1; + } + pu1_src += (src_strd *4); + pu1_est += (est_strd *4); + } + if(sad < (thrsh[0]<<1))sad = 0; + else return ; + + pu1_src = pu1_src_plane+1; + pu1_est = pu1_est_plane+1; + } + return ; +} + + +/** +****************************************************************************** +* +* @brief computes distortion (SAD and SAQTD) between 2 16x16 blocks +* +* @par Description +* This functions computes SAD between 2 16x16 blocks. +* It also checks if the SATDD(Sum of absolute transformed wuqntized differnce beteern the blocks +* If SAQTD is zero, it gives back zero +* Other wise sad is retrned +* There is no provison for early exit +* +* The transform done here is the transform for inter 16x16 blocks in H264 +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[out] pu1_dst +* UWORD8 pointer to the destination +* +* @param[in] src_strd +* integer source stride +* +* @param[in] dst_strd +* integer destination stride +* +* @param[in] pu2_thrsh +* Threshold for each element of transofrmed quantized block +* +* @param[out] pi4_mb_distortion +* integer evaluated sad +* +* @remarks +* +****************************************************************************** +*/ +void ime_compute_satqd_16x16_lumaintra(UWORD8 *pu1_src, + UWORD8 *pu1_est, + WORD32 src_strd, + WORD32 est_strd, + WORD32 max_sad, + UWORD16 *thrsh, + WORD32 *pi4_mb_distortion, + UWORD8 *sig_nz_sad) +{ + UWORD32 i,j; + WORD16 s1[4],s2[4],s3[4],s4[4],sad[4]; + UWORD8 *pu1_src_lp,*pu1_est_lp; + UWORD8 *sig_sad_dc; + UWORD32 nz_sad_sig = 0; + UNUSED(max_sad); + *pi4_mb_distortion =0; + + sig_sad_dc = sig_nz_sad; + sig_nz_sad++; + + for(i=0;i<4;i++) + { + for(j=0;j<4;j++) + { + pu1_src_lp = pu1_src + 4*j; + pu1_est_lp = pu1_est + 4*j; + + s1[j] = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]); + s4[j] = ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]); + + pu1_src_lp += src_strd; + pu1_est_lp += est_strd; + + s2[j] = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]); + s3[j] = ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]); + + pu1_src_lp += src_strd; + pu1_est_lp += est_strd; + + s2[j] += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]); + s3[j] += ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]); + + pu1_src_lp += src_strd; + pu1_est_lp += est_strd; + + s1[j] += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]); + s4[j] += ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]); + + sad[j] = ((s1[j]+s2[j]+s3[j]+s4[j])<<1); + } + + for(j=0;j<4;j++) + { + + if( + //thrsh[0] > (sad[j] >> 1) &&Dc goes in the other part + thrsh[1] > (sad[j] -(s2[j] + s3[j])) && + thrsh[2] > (sad[j]>>1) && + thrsh[3] > (sad[j] -(s1[j] + s4[j])) && + + thrsh[4] > (sad[j] -(s3[j] + s4[j])) && + thrsh[5] > (sad[j] -(s3[j] - (s1[j]<<1))) && + thrsh[6] > (sad[j] -(s3[j] + s4[j])) && + thrsh[7] > (sad[j] -(s4[j] - (s2[j]<<1))) && + + thrsh[8] > (sad[j]>>1) && + thrsh[9] > (sad[j] -(s2[j] + s3[j])) && + thrsh[10]> (sad[j]>>1) && + thrsh[11]> (sad[j] -(s1[j] + s4[j])) && + + thrsh[12]> (sad[j] -(s1[j] + s2[j])) && + thrsh[13]> (sad[j] -(s2[j] - (s4[j]<<1))) && + thrsh[14]> (sad[j] -(s1[j] + s2[j])) && + thrsh[15]> (sad[j] -(s1[j] - (s3[j]<<1))) + ) + { + //sad[j] = 0; /*set current sad to be zero*/ + sig_nz_sad[j] = 0;/*Signal that the sad is zero*/ + } + else + { + sig_nz_sad[j] = 1;/*signal that sad is non zero*/ + nz_sad_sig = 1; + } + + (*pi4_mb_distortion) += (sad[j]>>1); + //if((*pi4_mb_distortion) >= max_sad)return; /*return or some thing*/ + } + + sig_nz_sad += 4; + pu1_src += (src_strd *4); + pu1_est += (est_strd *4); + } + + if((*pi4_mb_distortion) < thrsh[0]<<2) + { + *sig_sad_dc = 0; + if(nz_sad_sig == 0)(*pi4_mb_distortion) = 0; + } + else *sig_sad_dc = 1; +} + diff --git a/encoder/ime_distortion_metrics.h b/encoder/ime_distortion_metrics.h new file mode 100755 index 0000000..a30e1fc --- /dev/null +++ b/encoder/ime_distortion_metrics.h @@ -0,0 +1,170 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +****************************************************************************** +* @file ih264e_distortion_metrics.h +* +* @brief +* This file contains declarations of routines that compute distortion +* between two macro/sub blocks of identical dimensions +* +* @author +* Ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef IME_DISTORTION_METRICS_H_ +#define IME_DISTORTION_METRICS_H_ + + +/*****************************************************************************/ +/* Type definitions for function prototypes */ +/*****************************************************************************/ + +typedef void ime_compute_sad_ft(UWORD8 *pu1_src, + UWORD8 *pu1_est, + WORD32 src_strd, + WORD32 est_strd, + WORD32 i4_max_sad, + WORD32 *pi4_mb_distortion); + +typedef void ime_compute_sad4_diamond(UWORD8 *pu1_ref, + UWORD8 *pu1_src, + WORD32 ref_strd, + WORD32 src_strd, + WORD32 *pi4_sad); + +typedef void ime_compute_sad3_diamond(UWORD8 *pu1_ref1, + UWORD8 *pu1_ref2, + UWORD8 *pu1_ref3, + UWORD8 *pu1_src, + WORD32 ref_strd, + WORD32 src_strd, + WORD32 *pi4_sad); + +typedef void ime_compute_sad2_diamond(UWORD8 *pu1_ref1, + UWORD8 *pu1_ref2, + UWORD8 *pu1_src, + WORD32 ref_strd, + WORD32 src_strd, + WORD32 *pi4_sad); + +typedef void ime_sub_pel_compute_sad_16x16_ft(UWORD8 *pu1_src, + UWORD8 *pu1_ref_half_x, + UWORD8 *pu1_ref_half_y, + UWORD8 *pu1_ref_half_xy, + WORD32 src_strd, + WORD32 ref_strd, + WORD32 *pi4_sad); + +typedef void ime_compute_sad_stat(UWORD8 *pu1_src, + UWORD8 *pu1_est, + WORD32 src_strd, + WORD32 est_strd, + UWORD16 *pu2_thrsh, + WORD32 *pi4_mb_distortion, + UWORD32 *pu4_is_zero); + +typedef void ime_compute_satqd_16x16_lumainter_ft(UWORD8 *pu1_src, + UWORD8 *pu1_est, + WORD32 src_strd, + WORD32 est_strd, + UWORD16 *pu2_thrsh, + WORD32 *pi4_mb_distortion, + UWORD32 *pu4_is_zero); + +typedef void ime_compute_satqd_8x16_chroma_ft(UWORD8 *pu1_src, + UWORD8 *pu1_est, + WORD32 src_strd, + WORD32 est_strd, + WORD32 i4_max_sad, + UWORD16 *thrsh); + +typedef void ime_compute_satqd_16x16_lumaintra_ft(UWORD8 *pu1_src, + UWORD8 *pu1_est, + WORD32 src_strd, + WORD32 est_strd, + WORD32 i4_max_sad, + UWORD16 *thrsh, + WORD32 *pi4_mb_distortion, + UWORD8 *sig_nz_sad); + +/*****************************************************************************/ +/* Extern Function Declarations */ +/*****************************************************************************/ + +ime_compute_sad_ft ime_compute_sad_16x16; +ime_compute_sad_ft ime_compute_sad_16x16_fast; +ime_compute_sad_ft ime_compute_sad_16x8; +ime_compute_sad_ft ime_compute_sad_16x16_ea8; +ime_compute_sad_ft ime_compute_sad_8x8; +ime_compute_sad_ft ime_compute_sad_4x4; +ime_compute_sad4_diamond ime_calculate_sad4_prog; +ime_compute_sad3_diamond ime_calculate_sad3_prog; +ime_compute_sad2_diamond ime_calculate_sad2_prog; +ime_sub_pel_compute_sad_16x16_ft ime_sub_pel_compute_sad_16x16; +ime_compute_sad_stat ime_compute_16x16_sad_stat; +ime_compute_satqd_16x16_lumainter_ft ime_compute_satqd_16x16_lumainter; +ime_compute_satqd_8x16_chroma_ft ime_compute_satqd_8x16_chroma; +ime_compute_satqd_16x16_lumaintra_ft ime_compute_satqd_16x16_lumaintra; + +/*SSE4.2 Declarations*/ +ime_compute_sad_ft ime_compute_sad_16x16_sse42; +ime_compute_sad_ft ime_compute_sad_16x16_fast_sse42; +ime_compute_sad_ft ime_compute_sad_16x8_sse42; +ime_compute_sad_ft ime_compute_sad_16x16_ea8_sse42; +ime_sub_pel_compute_sad_16x16_ft ime_sub_pel_compute_sad_16x16_sse42; +ime_compute_sad4_diamond ime_calculate_sad4_prog_sse42; +ime_compute_satqd_16x16_lumainter_ft ime_compute_satqd_16x16_lumainter_sse42; + +/* assembly */ +ime_compute_sad_ft ime_compute_sad_16x16_a9q; +ime_compute_sad_ft ime_compute_sad_16x16_fast_a9q; +ime_compute_sad_ft ime_compute_sad_16x8_a9q; +ime_compute_sad_ft ime_compute_sad_16x16_ea8_a9q; +ime_compute_sad4_diamond ime_calculate_sad4_prog_a9q; +ime_compute_sad3_diamond ime_calculate_sad3_prog_a9q; +ime_compute_sad2_diamond ime_calculate_sad2_prog_a9q; +ime_sub_pel_compute_sad_16x16_ft ime_sub_pel_compute_sad_16x16_a9q; +ime_compute_sad_stat ime_compute_16x16_sad_stat_a9; +ime_compute_satqd_16x16_lumainter_ft ime_compute_satqd_16x16_lumainter_a9q; + + +/* assembly - AV8 declarations */ +ime_compute_sad_ft ime_compute_sad_16x16_av8; +ime_compute_sad_ft ime_compute_sad_16x16_fast_av8; +ime_compute_sad_ft ime_compute_sad_16x8_av8; +ime_compute_sad_ft ime_compute_sad_16x16_ea8_av8; +ime_compute_sad4_diamond ime_calculate_sad4_prog_av8; +ime_compute_sad3_diamond ime_calculate_sad3_prog_av8; +ime_compute_sad2_diamond ime_calculate_sad2_prog_av8; +ime_sub_pel_compute_sad_16x16_ft ime_sub_pel_compute_sad_16x16_av8; +ime_compute_sad_stat ime_compute_16x16_sad_stat_av8; +ime_compute_satqd_16x16_lumainter_ft ime_compute_satqd_16x16_lumainter_av8; + + +#endif /* IME_DISTORTION_METRICS_H_ */ + + diff --git a/encoder/ime_macros.h b/encoder/ime_macros.h new file mode 100755 index 0000000..a7b8c65 --- /dev/null +++ b/encoder/ime_macros.h @@ -0,0 +1,44 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ihevc_typedefs.h +* +* @brief +* Type definitions used in the code +* +* +* @remarks +* None +* +******************************************************************************* +*/ +#ifndef _IME_MACROS_H_ +#define _IME_MACROS_H_ + +#define ABS(x) ((x) < 0 ? (-(x)) : (x)) +#define MAX(a,b) ((a > b)?(a):(b)) +#define MIN(a,b) ((a < b)?(a):(b)) + +#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > maxy)?(maxy):(y))) +#define UNUSED(x) ((void)(x)) + +#endif /*_IME_MACROS_H_*/ diff --git a/encoder/ime_statistics.h b/encoder/ime_statistics.h new file mode 100755 index 0000000..eeacaf2 --- /dev/null +++ b/encoder/ime_statistics.h @@ -0,0 +1,86 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ihevc_typedefs.h +* +* @brief +* Type definitions used in the code +* +* +* @remarks +* None +* +******************************************************************************* +*/ +#ifndef _IME_STATISTICS_H_ +#define _IME_STATISTICS_H_ +#define DEBUG_HISTOGRAM_ENABLE 0 +#define SAD_EXIT_STATS 0 + + +#if SAD_EXIT_STATS + +/** +****************************************************************************** +* @brief While computing sad, if we want to do a early exit, how often we +* should check if the sad computed till now has exceeded min sad param is +* chosen statistically. +* ****************************************************************************** +*/ +extern UWORD32 gu4_16x16_sad_ee_stats[16+1]; +extern UWORD32 gu4_16x8_sad_ee_stats[8+1]; + +/** +****************************************************************************** +* @brief print sad early exit stats +****************************************************************************** +*/ +extern void print_sad_ee_stats(void); + +#define GATHER_16x16_SAD_EE_STATS(gu4_16x16_sad_ee_stats, i) \ + gu4_16x16_sad_ee_stats[i]++; +#define GATHER_16x8_SAD_EE_STATS(gu4_16x8_sad_ee_stats, i) \ + gu4_16x8_sad_ee_stats[i]++; + +#else + +#define GATHER_16x16_SAD_EE_STATS(gu4_16x16_sad_ee_stats, i) +#define GATHER_16x8_SAD_EE_STATS(gu4_16x8_sad_ee_stats, i) + +#endif + + +#if DEBUG_HISTOGRAM_ENABLE +#define DEBUG_HISTOGRAM_INIT() debug_histogram_init() +#define DEBUG_HISTOGRAM_DUMP(condition) if(condition) debug_histogram_dump() +#define DEBUG_MV_HISTOGRAM_ADD(mv_x, mv_y) debug_mv_histogram_add(mv_x, mv_y) +#define DEBUG_SAD_HISTOGRAM_ADD(sad, level) debug_sad_histogram_add(sad, level) +#else +#define DEBUG_HISTOGRAM_INIT() +#define DEBUG_HISTOGRAM_DUMP(condition) +#define DEBUG_MV_HISTOGRAM_ADD(mv_x, mv_y) +#define DEBUG_SAD_HISTOGRAM_ADD(sad, level) +#endif + + + +#endif /*_IME_STATISTICS_H_*/ diff --git a/encoder/ime_structs.h b/encoder/ime_structs.h new file mode 100755 index 0000000..7819b91 --- /dev/null +++ b/encoder/ime_structs.h @@ -0,0 +1,305 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** + ******************************************************************************* + * @file + * ih264e_me.h + * + * @brief + * + * + * @author + * Ittiam + * + * @par List of Functions: + * - + * + * @remarks + * None + * + ******************************************************************************* + */ + +#ifndef _IME_STRUCTS_H_ +#define _IME_STRUCTS_H_ + +/** + * Motion vector + */ +typedef struct +{ + /** + * Horizontal Motion Vector + */ + WORD16 i2_mvx; + + /** + * Vertical Motion Vector + */ + WORD16 i2_mvy; +} ime_mv_t; + + +/** +************************************************************************** +* @brief mb_part_ctxt +* +* Structure that would hold the information for individual MB partitions +* gathered during the full pel ME stage +************************************************************************** +*/ +typedef struct +{ + /** + * best mvs + */ + ime_mv_t s_mv_curr; + + /** + * mv predictor + */ + ime_mv_t s_mv_pred; + + /** + * SAD associated with the MB partition + */ + WORD32 i4_mb_distortion; + + /** + * cost for the MB partition + */ + WORD32 i4_mb_cost; + + /** + * Search position for least cost among the list of candidates + */ + UWORD32 u4_srch_pos_idx; + + /** + * Search position for least cost among the list of candidates + */ + UWORD32 u4_exit; + + /* + * Buffer corresponding to best half pel cost + */ + UWORD8 *pu1_best_hpel_buf; + +} mb_part_ctxt; + + +/** +************************************************************************** +* @brief me_ctxt_t +* +* Structure encapsulating the parameters used in the motion estimation +* context +************************************************************************** +*/ +typedef struct +{ + /** + * Ref pointer to current MB luma + */ + UWORD8 *pu1_ref_buf_luma; + + /** + * Src pointer to current MB luma + */ + UWORD8 *pu1_src_buf_luma; + + /** + * source stride + * (strides for luma and chroma are the same) + */ + WORD32 i4_src_strd; + + /** + * recon stride + * (strides for luma and chroma are the same) + */ + WORD32 i4_rec_strd; + + /** + * Offset for half pel x plane from the pic buf + */ + UWORD32 u4_half_x_offset; + + /** + * Offset for half pel y plane from half x plane + */ + UWORD32 u4_half_y_offset; + + /** + * Offset for half pel xy plane from half y plane + */ + UWORD32 u4_half_xy_offset; + + /** + * Search range in the X, Y axis in terms of pixels + */ + WORD32 ai2_srch_boundaries[2]; + + /** + * Search range in the north direction in terms of pixels + */ + WORD32 i4_srch_range_n; + + /** + * Search range in the south direction in terms of pixels + */ + WORD32 i4_srch_range_s; + + /** + * Search range in the east direction in terms of pixels + */ + WORD32 i4_srch_range_e; + + /** + * Search range in the west direction in terms of pixels + */ + WORD32 i4_srch_range_w; + + /** + * left mb motion vector + */ + ime_mv_t s_left_mv; + + /** + * top left mb motion vector + */ + ime_mv_t s_top_left_mv; + + /** + * Number of valid candidates for the Initial search position + */ + UWORD32 u4_num_candidates; + + /** + * Motion vector predictors derived from neighbouring + * blocks for each of the six block partitions + */ + ime_mv_t as_mv_init_search[5]; + + /** + * mv bits + */ + UWORD8 *pu1_mv_bits; + + /** + * lambda (lagrange multiplier for cost computation) + */ + UWORD32 u4_lambda_motion; + + /** + * enabled fast sad computation + */ + UWORD32 u4_enable_fast_sad; + + /* + * Enable SKIP block prediction based on SATQD + */ + UWORD32 u4_enable_stat_sad; + + /* + * Minimum distortion to search for + * */ + WORD32 i4_min_sad; + + /* + * Signal that minimum sad has been reached in ME + * */ + UWORD32 u4_min_sad_reached; + + /** + * Flag to enable/disbale half pel motion estimation + */ + UWORD32 u4_enable_hpel; + + /** + * Diamond search Iteration Max Cnt + */ + UWORD32 u4_num_layers; + + /** + * encoder me speed + */ + UWORD32 u4_me_speed_preset; + + UWORD32 u4_left_is_intra; + + UWORD32 u4_left_is_skip; + + /** + * Structure to store the MB partition info + */ + mb_part_ctxt s_mb_part; + /* + * Threshold to compare the sad with + */ + UWORD16 *pu2_sad_thrsh; + + /** + * fn ptrs for compute sad routines + */ + ime_compute_sad_ft *pf_ime_compute_sad_16x16[2]; + ime_compute_sad_ft *pf_ime_compute_sad_16x8; + ime_compute_sad4_diamond *pf_ime_compute_sad4_diamond; + ime_compute_sad3_diamond *pf_ime_compute_sad3_diamond; + ime_compute_sad2_diamond *pf_ime_compute_sad2_diamond; + ime_sub_pel_compute_sad_16x16_ft *pf_ime_sub_pel_compute_sad_16x16; + + /* + * Function poitners for SATQD + */ + ime_compute_sad_stat *pf_ime_compute_sad_stat_luma_16x16; + + /** + * Qp + */ + UWORD8 u1_mb_qp; + + /* + * Buffers for holding half_x , half_y and half_xy + * values when halfpel generation + * for the entire plane is not enabled + */ + UWORD8 *pu1_half_x; + UWORD8 *pu1_half_y; + UWORD8 *pu1_half_xy; + + + /* + * Buffers to store the best halfpel plane* + */ + UWORD8 *pu1_hpel_buf; + + /* + * Stride for hpel buffer + */ + UWORD32 u4_hpel_buf_strd; + + WORD32 u4_hp_buf_strd; + +} me_ctxt_t; + + +#endif // _IME_STRUCTS_H_ + diff --git a/encoder/ime_typedefs.h b/encoder/ime_typedefs.h new file mode 100755 index 0000000..d36632d --- /dev/null +++ b/encoder/ime_typedefs.h @@ -0,0 +1,50 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ihevc_typedefs.h +* +* @brief +* Type definitions used in the code +* +* +* @remarks +* None +* +******************************************************************************* +*/ +#ifndef _IME_TYPEDEFS_H_ +#define _IME_TYPEDEFS_H_ + + +typedef unsigned char UWORD8; +typedef unsigned short UWORD16; +typedef unsigned int UWORD32; +typedef unsigned long UWORD64; + +typedef signed char WORD8; +typedef short WORD16; +typedef int WORD32; +typedef long WORD64; + +typedef char CHAR; + +#endif /*_IME_TYPEDEFS_H_*/ diff --git a/encoder/irc_bit_allocation.c b/encoder/irc_bit_allocation.c new file mode 100755 index 0000000..1dfd9de --- /dev/null +++ b/encoder/irc_bit_allocation.c @@ -0,0 +1,859 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** Includes */ +#include <stdio.h> +#include <string.h> +#include "irc_datatypes.h" +#include "irc_mem_req_and_acq.h" +#include "irc_common.h" +#include "irc_cntrl_param.h" +#include "irc_fixed_point_error_bits.h" +#include "irc_rd_model.h" +#include "irc_est_sad.h" +#include "irc_picture_type.h" +#include "irc_bit_allocation.h" +#include "irc_trace_support.h" + +/** Macros **/ +#define MIN(x,y) ((x) < (y))? (x) : (y) + +/* State structure for bit allocation */ +typedef struct +{ + /* using var_q number as it can cross 31 bits for large intra frameinterval */ + number_t vq_rem_bits_in_period; + + /* Storing inputs */ + WORD32 i4_tot_frms_in_gop; + + WORD32 i4_num_intra_frm_interval; + + WORD32 i4_bits_per_frm; + +} rem_bit_in_prd_t; + +typedef struct bit_allocation_t +{ + rem_bit_in_prd_t s_rbip; + + /* A universal constant giving the relative complexity between pictures */ + WORD32 i2_K[MAX_PIC_TYPE]; + + /* To get a estimate of the header bits consumed */ + WORD32 i4_prev_frm_header_bits[MAX_PIC_TYPE]; + + WORD32 i4_bits_per_frm; + + WORD32 i4_num_gops_in_period; + + /* Num gops as set by rate control module */ + WORD32 i4_actual_num_gops_in_period; + + number_t vq_saved_bits; + + WORD32 i4_max_bits_per_frm[MAX_NUM_DRAIN_RATES]; + + WORD32 i4_min_bits_per_frm; + + /* Error bits module */ + error_bits_handle ps_error_bits; + + /* Storing frame rate */ + WORD32 i4_frame_rate; + + WORD32 i4_bit_rate; + + WORD32 ai4_peak_bit_rate[MAX_NUM_DRAIN_RATES]; + +} bit_allocation_t; + +static WORD32 get_number_of_frms_in_a_gop(pic_handling_handle ps_pic_handling) +{ + WORD32 i4_tot_frms_in_gop = 0, i; + WORD32 ai4_frms_in_gop[MAX_PIC_TYPE]; + + /* Query the pic_handling struct for the rem frames in the period */ + irc_pic_type_get_frms_in_gop(ps_pic_handling, ai4_frms_in_gop); + + /* Get the total frms in the gop */ + i4_tot_frms_in_gop = 0; + for(i = 0; i < MAX_PIC_TYPE; i++) + { + i4_tot_frms_in_gop += ai4_frms_in_gop[i]; + } + return (i4_tot_frms_in_gop); +} + +static void init_rbip(rem_bit_in_prd_t *ps_rbip, + pic_handling_handle ps_pic_handling, + WORD32 i4_bits_per_frm, + WORD32 i4_num_intra_frm_interval) +{ + WORD32 i4_tot_frms_in_gop = get_number_of_frms_in_a_gop(ps_pic_handling); + + /* rem_bits_in_period = bits_per_frm * tot_frms_in_gop * num_intra_frm_interval */ + { + number_t vq_bits_per_frm, vq_tot_frms_in_gop, vq_num_intra_frm_interval; + number_t *pvq_rem_bits_in_period = &ps_rbip->vq_rem_bits_in_period; + + SET_VAR_Q(vq_bits_per_frm, i4_bits_per_frm, 0); + SET_VAR_Q(vq_tot_frms_in_gop, i4_tot_frms_in_gop, 0); + SET_VAR_Q(vq_num_intra_frm_interval, i4_num_intra_frm_interval, 0); + + /* rem_bits_in_period = bits_per_frm * tot_frms_in_gop */ + mult32_var_q(vq_bits_per_frm, vq_tot_frms_in_gop, + pvq_rem_bits_in_period); + + /* rem_bits_in_period *= num_intra_frm_interval */ + mult32_var_q(vq_num_intra_frm_interval, pvq_rem_bits_in_period[0], + pvq_rem_bits_in_period); + } + + /* + * Store the total number of frames in GOP value which is + * used from module A + */ + ps_rbip->i4_tot_frms_in_gop = i4_tot_frms_in_gop; + ps_rbip->i4_num_intra_frm_interval = i4_num_intra_frm_interval; + ps_rbip->i4_bits_per_frm = i4_bits_per_frm; +} + +static void check_update_rbip(rem_bit_in_prd_t *ps_rbip, + pic_handling_handle ps_pic_handling) +{ + /* + * NOTE: Intra frame interval changes after the first I frame that is + * encoded in a GOP + */ + WORD32 i4_new_tot_frms_in_gop = get_number_of_frms_in_a_gop( + ps_pic_handling); + + if(i4_new_tot_frms_in_gop != ps_rbip->i4_tot_frms_in_gop) + { + WORD32 i4_rem_frames_in_period = + ps_rbip->i4_num_intra_frm_interval + * (i4_new_tot_frms_in_gop + - ps_rbip->i4_tot_frms_in_gop); + + number_t vq_rem_frms_in_period, s_bits_per_frm, vq_delta_bits_in_period; + + SET_VAR_Q(vq_rem_frms_in_period, i4_rem_frames_in_period, 0); + SET_VAR_Q(s_bits_per_frm, ps_rbip->i4_bits_per_frm, 0); + + /* delta_bits_in_period = bits_per_frm * rem_frms_in_period */ + mult32_var_q(s_bits_per_frm, vq_rem_frms_in_period, + &vq_delta_bits_in_period); + + /* rem_bits_in_period += delta_bits_in_period */ + add32_var_q(vq_delta_bits_in_period, ps_rbip->vq_rem_bits_in_period, + &ps_rbip->vq_rem_bits_in_period); + } + /* Updated the new values */ + ps_rbip->i4_tot_frms_in_gop = i4_new_tot_frms_in_gop; +} + +static void irc_ba_update_rbip(rem_bit_in_prd_t *ps_rbip, + pic_handling_handle ps_pic_handling, + WORD32 i4_num_of_bits) +{ + number_t vq_num_bits; + + check_update_rbip(ps_rbip, ps_pic_handling); + + /* rem_bits_in_period += num_of_bits */ + SET_VAR_Q(vq_num_bits, i4_num_of_bits, 0); + add32_var_q(vq_num_bits, ps_rbip->vq_rem_bits_in_period, + &ps_rbip->vq_rem_bits_in_period); +} + +static void irc_ba_change_rbip(rem_bit_in_prd_t *ps_rbip, + pic_handling_handle ps_pic_handling, + WORD32 i4_new_bits_per_frm, + WORD32 i4_new_num_intra_frm_interval) +{ + WORD32 ai4_rem_frms_in_period[MAX_PIC_TYPE], i4_rem_frms_in_gop, i; + irc_pic_type_get_rem_frms_in_gop(ps_pic_handling, ai4_rem_frms_in_period); + + i4_rem_frms_in_gop = 0; + for(i = 0; i < MAX_PIC_TYPE; i++) + i4_rem_frms_in_gop += ai4_rem_frms_in_period[i]; + + if(i4_new_bits_per_frm != ps_rbip->i4_bits_per_frm) + { + WORD32 i4_rem_frms_in_period = (ps_rbip->i4_num_intra_frm_interval - 1) + * ps_rbip->i4_tot_frms_in_gop + i4_rem_frms_in_gop; + + number_t vq_rem_frms_in_period, vq_delta_bits_per_frm, + vq_delta_bits_in_period; + + /* delta_bits_per_frm = new_bits_per_frm - old_bits_per_frm */ + SET_VAR_Q(vq_delta_bits_per_frm, + (i4_new_bits_per_frm - ps_rbip->i4_bits_per_frm), 0); + + SET_VAR_Q(vq_rem_frms_in_period, i4_rem_frms_in_period, 0); + + /* delta_bits_in_period = delta_bits_per_frm * rem_frms_in_period */ + mult32_var_q(vq_delta_bits_per_frm, vq_rem_frms_in_period, + &vq_delta_bits_in_period); + + /* ps_rbip->rem_bits_in_period += delta_bits_in_period */ + add32_var_q(vq_delta_bits_in_period, ps_rbip->vq_rem_bits_in_period, + &ps_rbip->vq_rem_bits_in_period); + } + + if(i4_new_num_intra_frm_interval != ps_rbip->i4_num_intra_frm_interval) + { + WORD32 i4_rem_frms_in_period = ps_rbip->i4_tot_frms_in_gop + * (i4_new_num_intra_frm_interval + - ps_rbip->i4_num_intra_frm_interval); + + number_t vq_rem_frms_in_period, vq_new_bits_per_frm, + vq_delta_bits_in_period; + + /* new_bits_per_frm = new_new_bits_per_frm - old_new_bits_per_frm */ + SET_VAR_Q(vq_new_bits_per_frm, i4_new_bits_per_frm, 0); + + SET_VAR_Q(vq_rem_frms_in_period, i4_rem_frms_in_period, 0); + + /* delta_bits_in_period = new_bits_per_frm * rem_frms_in_period */ + mult32_var_q(vq_new_bits_per_frm, vq_rem_frms_in_period, + &vq_delta_bits_in_period); + + /* ps_rbip->rem_bits_in_period += delta_bits_in_period */ + add32_var_q(vq_delta_bits_in_period, ps_rbip->vq_rem_bits_in_period, + &ps_rbip->vq_rem_bits_in_period); + } + /* Update the new value */ + ps_rbip->i4_num_intra_frm_interval = i4_new_num_intra_frm_interval; + ps_rbip->i4_bits_per_frm = i4_new_bits_per_frm; +} + +WORD32 irc_ba_num_fill_use_free_memtab(bit_allocation_t **pps_bit_allocation, + itt_memtab_t *ps_memtab, + ITT_FUNC_TYPE_E e_func_type) +{ + WORD32 i4_mem_tab_idx = 0; + static bit_allocation_t s_bit_allocation_temp; + + /* + * Hack for all alloc, during which we don't have any state memory. + * Dereferencing can cause issues + */ + if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB) + (*pps_bit_allocation) = &s_bit_allocation_temp; + + /*for src rate control state structure*/ + if(e_func_type != GET_NUM_MEMTAB) + { + fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(bit_allocation_t), + ALIGN_128_BYTE, PERSISTENT, DDR); + use_or_fill_base(&ps_memtab[0], (void**)pps_bit_allocation, + e_func_type); + } + i4_mem_tab_idx++; + + i4_mem_tab_idx += irc_error_bits_num_fill_use_free_memtab( + &pps_bit_allocation[0]->ps_error_bits, + &ps_memtab[i4_mem_tab_idx], e_func_type); + + return (i4_mem_tab_idx); +} + +/******************************************************************************* + Function Name : irc_ba_init_bit_allocation + Description : Initialize the bit_allocation structure. + ******************************************************************************/ +void irc_ba_init_bit_allocation(bit_allocation_t *ps_bit_allocation, + pic_handling_handle ps_pic_handling, + WORD32 i4_num_intra_frm_interval, + WORD32 i4_bit_rate, + WORD32 i4_frm_rate, + WORD32 *i4_peak_bit_rate, + WORD32 i4_min_bitrate) +{ + WORD32 i; + WORD32 i4_bits_per_frm, i4_max_bits_per_frm[MAX_NUM_DRAIN_RATES]; + + /* Calculate the bits per frame */ + X_PROD_Y_DIV_Z(i4_bit_rate, 1000, i4_frm_rate, i4_bits_per_frm); + for(i = 0; i < MAX_NUM_DRAIN_RATES; i++) + { + X_PROD_Y_DIV_Z(i4_peak_bit_rate[i], 1000, i4_frm_rate, + i4_max_bits_per_frm[i]); + } + /* Initialize the bits_per_frame */ + ps_bit_allocation->i4_bits_per_frm = i4_bits_per_frm; + for(i = 0; i < MAX_NUM_DRAIN_RATES; i++) + { + ps_bit_allocation->i4_max_bits_per_frm[i] = i4_max_bits_per_frm[i]; + } + X_PROD_Y_DIV_Z(i4_min_bitrate, 1000, i4_frm_rate, + ps_bit_allocation->i4_min_bits_per_frm); + + /* + * Initialize the rem_bits in period + * The first gop in case of an OPEN GOP may have fewer B_PICs, + * That condition is not taken care of + */ + init_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling, i4_bits_per_frm, + i4_num_intra_frm_interval); + + /* Initialize the num_gops_in_period */ + ps_bit_allocation->i4_num_gops_in_period = i4_num_intra_frm_interval; + ps_bit_allocation->i4_actual_num_gops_in_period = i4_num_intra_frm_interval; + + /* Relative complexity between I and P frames */ + ps_bit_allocation->i2_K[I_PIC] = (1 << K_Q); + ps_bit_allocation->i2_K[P_PIC] = I_TO_P_RATIO; + ps_bit_allocation->i2_K[B_PIC] = (P_TO_B_RATIO * I_TO_P_RATIO) >> K_Q; + + /* Initialize the saved bits to 0*/ + SET_VAR_Q(ps_bit_allocation->vq_saved_bits, 0, 0); + + /* Update the error bits module with average bits */ + irc_init_error_bits(ps_bit_allocation->ps_error_bits, i4_frm_rate, + i4_bit_rate); + /* Store the input for implementing change in values */ + ps_bit_allocation->i4_frame_rate = i4_frm_rate; + ps_bit_allocation->i4_bit_rate = i4_bit_rate; + + memset(ps_bit_allocation->i4_prev_frm_header_bits, 0, sizeof(ps_bit_allocation->i4_prev_frm_header_bits)); + for(i=0;i<MAX_NUM_DRAIN_RATES;i++) + ps_bit_allocation->ai4_peak_bit_rate[i] = i4_peak_bit_rate[i]; +} + +/******************************************************************************* + Function Name : get_cur_frm_est_bits + Description : Based on remaining bits in period and rd_model + the number of bits required for the current frame is estimated. + ******************************************************************************/ +WORD32 irc_ba_get_cur_frm_est_texture_bits(bit_allocation_t *ps_bit_allocation, + rc_rd_model_handle *pps_rd_model, + est_sad_handle ps_est_sad, + pic_handling_handle ps_pic_handling, + picture_type_e e_pic_type) +{ + WORD32 i, j; + WORD32 i4_est_texture_bits_for_frm; + number_t vq_rem_texture_bits; + number_t vq_complexity_estimate[MAX_PIC_TYPE]; + WORD32 i4_rem_frms_in_period[MAX_PIC_TYPE], i4_frms_in_period[MAX_PIC_TYPE]; + number_t vq_max_consumable_bits; + number_t vq_rem_frms_in_period[MAX_PIC_TYPE], vq_est_texture_bits_for_frm; + number_t vq_prev_hdr_bits[MAX_PIC_TYPE]; + + WORD32 complexity_est = 0; + + /* Get the rem_frms_in_gop & the frms_in_gop from the pic_type state struct */ + irc_pic_type_get_rem_frms_in_gop(ps_pic_handling, i4_rem_frms_in_period); + irc_pic_type_get_frms_in_gop(ps_pic_handling, i4_frms_in_period); + + /* Depending on the number of gops in a period, find the num_frms_in_prd */ + for(j = 0; j < MAX_PIC_TYPE; j++) + { + i4_rem_frms_in_period[j] += (i4_frms_in_period[j] + * (ps_bit_allocation->i4_num_gops_in_period - 1)); + i4_frms_in_period[j] *= ps_bit_allocation->i4_num_gops_in_period; + } + + /* Remove the header bits from the remaining bits to find how many bits you + can transfer.*/ + irc_ba_update_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling, 0); + for(i = 0; i < MAX_PIC_TYPE; i++) + { + SET_VAR_Q(vq_rem_frms_in_period[i], i4_rem_frms_in_period[i], 0); + SET_VAR_Q(vq_prev_hdr_bits[i], + ps_bit_allocation->i4_prev_frm_header_bits[i], 0); + } + { + /* + *rem_texture_bits = rem_bits_in_period - + *(rem_frms_in_period[I_PIC] * prev_frm_header_bits[I_PIC]) - + *(rem_frms_in_period[P_PIC] * prev_frm_header_bits[P_PIC]) - + *(rem_frms_in_period[B_PIC] * prev_frm_header_bits[B_PIC]); + */ + number_t vq_rem_hdr_bits; + vq_rem_texture_bits = ps_bit_allocation->s_rbip.vq_rem_bits_in_period; + + mult32_var_q(vq_prev_hdr_bits[I_PIC], vq_rem_frms_in_period[I_PIC], + &vq_rem_hdr_bits); + sub32_var_q(vq_rem_texture_bits, vq_rem_hdr_bits, &vq_rem_texture_bits); + + mult32_var_q(vq_prev_hdr_bits[P_PIC], vq_rem_frms_in_period[P_PIC], + &vq_rem_hdr_bits); + sub32_var_q(vq_rem_texture_bits, vq_rem_hdr_bits, &vq_rem_texture_bits); + + mult32_var_q(vq_prev_hdr_bits[B_PIC], vq_rem_frms_in_period[B_PIC], + &vq_rem_hdr_bits); + sub32_var_q(vq_rem_texture_bits, vq_rem_hdr_bits, &vq_rem_texture_bits); + } + { + /* max_consumable_bits = + *(frms_in_period[I_PIC] * max_bits_per_frm[0] ) + + *(frms_in_period[P_PIC] + frms_in_period[B_PIC] ) * max_bits_per_frm[1]; + */ + number_t vq_max_bits, vq_max_bits_per_frm[2]; + + SET_VAR_Q(vq_max_bits_per_frm[0], + ps_bit_allocation->i4_max_bits_per_frm[0], 0); + SET_VAR_Q(vq_max_bits_per_frm[1], + ps_bit_allocation->i4_max_bits_per_frm[1], 0); + + mult32_var_q(vq_rem_frms_in_period[I_PIC], vq_max_bits_per_frm[0], + &vq_max_bits); + vq_max_consumable_bits = vq_max_bits; + + mult32_var_q(vq_rem_frms_in_period[P_PIC], vq_max_bits_per_frm[1], + &vq_max_bits); + add32_var_q(vq_max_bits, vq_max_consumable_bits, + &vq_max_consumable_bits); + + mult32_var_q(vq_rem_frms_in_period[B_PIC], vq_max_bits_per_frm[1], + &vq_max_bits); + add32_var_q(vq_max_bits, vq_max_consumable_bits, + &vq_max_consumable_bits); + } + + /* rem_texture_bits = MIN(rem_texture_bits, max_consumable_bits) */ + MIN_VARQ(vq_max_consumable_bits, vq_rem_texture_bits, vq_rem_texture_bits); + + /* The bits are then allocated based on the relative complexity of the + current frame with respect to that of the rest of the frames in period */ + for(i = 0; i < MAX_PIC_TYPE; i++) + { + number_t vq_lin_mod_coeff, vq_est_sad, vq_K; + + /* Getting the linear model coefficient */ + vq_lin_mod_coeff = irc_get_linear_coefficient(pps_rd_model[i]); + + /* Getting the estimated SAD */ + SET_VAR_Q(vq_est_sad, irc_get_est_sad(ps_est_sad,i), 0); + + /* Making K factor a var Q format */ + SET_VAR_Q(vq_K, ps_bit_allocation->i2_K[i], K_Q); + + /* Complexity_estimate = [ (lin_mod_coeff * estimated_sad) / K factor ] */ + mult32_var_q(vq_lin_mod_coeff, vq_est_sad, &vq_lin_mod_coeff); + div32_var_q(vq_lin_mod_coeff, vq_K, &vq_complexity_estimate[i]); + } + + /* + * For simple cases, one of the complexities go to zero and in those cases + * distribute the bits evenly among frames based on I_TO_P_RATIO + */ + + /* Also check the B-pictures complexity only in case they are present*/ + if(i4_frms_in_period[B_PIC] == 0) + { + complexity_est = (vq_complexity_estimate[I_PIC] + && vq_complexity_estimate[P_PIC]); + } + else + { + complexity_est = (vq_complexity_estimate[I_PIC] + && vq_complexity_estimate[P_PIC] + && vq_complexity_estimate[B_PIC]); + } + + if(complexity_est) + { + /* + * Estimated texture bits = + * (remaining bits) * (cur frm complexity) + * --------------------------------------- + * (num_i_frm*i_frm_complexity) + (num_p_frm*pfrm_complexity) + * + (b_frm * b_frm_cm) + */ + mult32_var_q(vq_rem_texture_bits, vq_complexity_estimate[e_pic_type], + &vq_rem_texture_bits); + + for(i = 0; i < MAX_PIC_TYPE; i++) + { + mult32_var_q(vq_rem_frms_in_period[i], vq_complexity_estimate[i], + &vq_rem_frms_in_period[i]); + } + + add32_var_q(vq_rem_frms_in_period[I_PIC], vq_rem_frms_in_period[P_PIC], + &vq_rem_frms_in_period[I_PIC]); + + add32_var_q(vq_rem_frms_in_period[I_PIC], vq_rem_frms_in_period[B_PIC], + &vq_rem_frms_in_period[I_PIC]); + + div32_var_q(vq_rem_texture_bits, vq_rem_frms_in_period[I_PIC], + &vq_est_texture_bits_for_frm); + + number_t_to_word32(vq_est_texture_bits_for_frm, + &i4_est_texture_bits_for_frm); + } + else + { + number_t vq_i_to_p_bit_ratio, vq_rem_frms; + + SET_VAR_Q(vq_i_to_p_bit_ratio, I_TO_P_BIT_RATIO, 0); + + /* rem_frms = ((I_TO_P_BIT_RATIO * rem_frms_in_period[I_PIC]) + + * rem_frms_in_period[P_PIC] + rem_frms_in_period[B_PIC]); + */ + mult32_var_q(vq_rem_frms_in_period[I_PIC], vq_i_to_p_bit_ratio, + &vq_rem_frms); + add32_var_q(vq_rem_frms_in_period[P_PIC], vq_rem_frms, &vq_rem_frms); + add32_var_q(vq_rem_frms_in_period[B_PIC], vq_rem_frms, &vq_rem_frms); + + /* est_texture_bits_for_frm = rem_texture_bits / rem_frms */ + div32_var_q(vq_rem_texture_bits, vq_rem_frms, + &vq_est_texture_bits_for_frm); + number_t_to_word32(vq_est_texture_bits_for_frm, + &i4_est_texture_bits_for_frm); + + i4_est_texture_bits_for_frm = + (I_PIC == e_pic_type) ? + (i4_est_texture_bits_for_frm + * I_TO_P_BIT_RATIO) : + i4_est_texture_bits_for_frm; + } + + /* + * If the remaining bits in the period becomes negative then the estimated + * texture bits would also become negative. This would send a feedback to + * the model which may go for a toss. Thus sending the minimum possible + * value = 0 + */ + if(i4_est_texture_bits_for_frm < 0) + { + i4_est_texture_bits_for_frm = 0; + } + + return (i4_est_texture_bits_for_frm); +} + +/****************************************************************************** + Function Name : irc_ba_get_cur_frm_est_header_bits + Description : Based on remaining bits in period and rd_model + the number of bits required for the current frame is estimated. + ******************************************************************************/ +WORD32 irc_ba_get_cur_frm_est_header_bits(bit_allocation_t *ps_bit_allocation, + picture_type_e e_pic_type) +{ + return (ps_bit_allocation->i4_prev_frm_header_bits[e_pic_type]); +} + +WORD32 irc_ba_get_rem_bits_in_period(bit_allocation_t *ps_bit_allocation, + pic_handling_handle ps_pic_handling) +{ + WORD32 i4_rem_bits_in_gop = 0; + irc_ba_update_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling, 0); + number_t_to_word32(ps_bit_allocation->s_rbip.vq_rem_bits_in_period, + &i4_rem_bits_in_gop); + return (i4_rem_bits_in_gop); +} + +/******************************************************************************* + Function Name : irc_ba_update_cur_frm_consumed_bits + Description : Based on remaining bits in period and rd_model + the number of bits required for the current frame is estimated. + ******************************************************************************/ +void irc_ba_update_cur_frm_consumed_bits(bit_allocation_t *ps_bit_allocation, + pic_handling_handle ps_pic_handling, + WORD32 i4_total_frame_bits, + WORD32 i4_model_updation_hdr_bits, + picture_type_e e_pic_type, + UWORD8 u1_is_scd, + WORD32 i4_last_frm_in_gop) +{ + WORD32 i4_error_bits = irc_get_error_bits(ps_bit_allocation->ps_error_bits); + + /* Update the remaining bits in period */ + irc_ba_update_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling, + (-i4_total_frame_bits + i4_error_bits)); + + /* + * Update the header bits so that it can be used as an estimate to the next + * frame + */ + if(u1_is_scd) + { + /* + * In case of SCD, even though the frame type is P, it is equivalent to + * a I frame and so the corresponding header bits is updated + */ + ps_bit_allocation->i4_prev_frm_header_bits[I_PIC] = + i4_model_updation_hdr_bits; + +#define MAX_NUM_GOPS_IN_PERIOD (3) + if(ps_bit_allocation->i4_num_gops_in_period < MAX_NUM_GOPS_IN_PERIOD) + { + /* + * Whenever there is a scene change increase the number of gops by + * 2 so that the number of bits allocated is not very constrained + */ + ps_bit_allocation->i4_num_gops_in_period += 2; + /* Add the extra bits in GOP to remaining bits in period */ + irc_ba_change_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling, + ps_bit_allocation->i4_bits_per_frm, + ps_bit_allocation->i4_num_gops_in_period); + } + } + else + { + ps_bit_allocation->i4_prev_frm_header_bits[e_pic_type] = + i4_model_updation_hdr_bits; + } + + if(i4_last_frm_in_gop) + { + WORD32 i4_num_bits_in_a_gop = get_number_of_frms_in_a_gop( + ps_pic_handling) * ps_bit_allocation->i4_bits_per_frm; + /* + * If the number of gops in period has been increased due to scene + * change, slowly bring in down across the gops + */ + if(ps_bit_allocation->i4_num_gops_in_period + > ps_bit_allocation->i4_actual_num_gops_in_period) + { + ps_bit_allocation->i4_num_gops_in_period--; + irc_ba_change_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling, + ps_bit_allocation->i4_bits_per_frm, + ps_bit_allocation->i4_num_gops_in_period); + } + /* + * If rem_bits_in_period < 0 decrease the number of bits allocated for + * the next period else increase it + */ + irc_ba_update_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling, + i4_num_bits_in_a_gop); + } + /* Update the lower modules */ + irc_update_error_bits(ps_bit_allocation->ps_error_bits); +} + +void irc_ba_change_remaining_bits_in_period(bit_allocation_t *ps_bit_allocation, + pic_handling_handle ps_pic_handling, + WORD32 i4_bit_rate, + WORD32 i4_frame_rate, + WORD32 *i4_peak_bit_rate) +{ + WORD32 i4_new_avg_bits_per_frm; + WORD32 i4_new_peak_bits_per_frm[MAX_NUM_DRAIN_RATES]; + WORD32 i4_rem_frms_in_period[MAX_PIC_TYPE]; + int i; + + /* Calculate the new per frame bits */ + X_PROD_Y_DIV_Z(i4_bit_rate, 1000, i4_frame_rate, i4_new_avg_bits_per_frm); + for(i = 0; i < MAX_NUM_DRAIN_RATES; i++) + { + X_PROD_Y_DIV_Z(i4_peak_bit_rate[i], 1000, i4_frame_rate, + i4_new_peak_bits_per_frm[i]); + } + + for(i = 0; i < MAX_NUM_DRAIN_RATES; i++) + { + ps_bit_allocation->i4_max_bits_per_frm[i] = i4_new_peak_bits_per_frm[i]; + } + + /* + * Get the rem_frms_in_prd & the frms_in_prd from the pic_type state + * struct + */ + irc_pic_type_get_rem_frms_in_gop(ps_pic_handling, i4_rem_frms_in_period); + + /* + * If the difference > 0(/ <0), the remaining bits in period needs to be + * increased(/decreased) based on the remaining number of frames + */ + irc_ba_change_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling, + i4_new_avg_bits_per_frm, + ps_bit_allocation->i4_num_gops_in_period); + + /* Update the new average bits per frame */ + ps_bit_allocation->i4_bits_per_frm = i4_new_avg_bits_per_frm; + /* change the lower modules state */ + irc_change_bitrate_in_error_bits(ps_bit_allocation->ps_error_bits, + i4_bit_rate); + irc_change_frm_rate_in_error_bits(ps_bit_allocation->ps_error_bits, + i4_frame_rate); + + /* Store the modified frame_rate */ + ps_bit_allocation->i4_frame_rate = i4_frame_rate; + ps_bit_allocation->i4_bit_rate = i4_bit_rate; + for(i = 0; i < MAX_NUM_DRAIN_RATES; i++) + ps_bit_allocation->ai4_peak_bit_rate[i] = i4_peak_bit_rate[i]; +} + +void irc_ba_change_ba_peak_bit_rate(bit_allocation_t *ps_bit_allocation, + WORD32 *ai4_peak_bit_rate) +{ + WORD32 i; + + /* Calculate the bits per frame */ + for(i = 0; i < MAX_NUM_DRAIN_RATES; i++) + { + X_PROD_Y_DIV_Z(ai4_peak_bit_rate[i], 1000, + ps_bit_allocation->i4_frame_rate, + ps_bit_allocation->i4_max_bits_per_frm[i]); + ps_bit_allocation->ai4_peak_bit_rate[i] = ai4_peak_bit_rate[i]; + } +} + +/****************************************************************************** + * @brief Modifies the remaining bit in period for the gop which has fif. + * since fif would cause a new gop to be created, we need to add the number + * of encoded frames in the fif GOP worth of bits to remaining bits in + * period + ******************************************************************************/ +void irc_ba_change_rem_bits_in_prd_at_force_I_frame(bit_allocation_t *ps_bit_allocation, + pic_handling_handle ps_pic_handling) +{ + WORD32 i4_frms_in_period; + i4_frms_in_period = irc_pic_type_get_frms_in_gop_force_I_frm( + ps_pic_handling); + irc_ba_update_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling, + ps_bit_allocation->i4_bits_per_frm * i4_frms_in_period); +} + +void irc_ba_check_and_update_bit_allocation(bit_allocation_t *ps_bit_allocation, + pic_handling_handle ps_pic_handling, + WORD32 i4_cur_buf_size, + WORD32 i4_max_buf_size, + WORD32 i4_max_bits_inflow_per_frm, + WORD32 i4_tot_frame_bits) +{ + + number_t vq_max_drain_bits, vq_extra_bits, vq_less_bits, + vq_allocated_saved_bits, vq_min_bits_for_period; + WORD32 i4_num_frms_in_period = get_number_of_frms_in_a_gop(ps_pic_handling); + number_t vq_rem_bits_in_period, vq_num_frms_in_period, vq_zero; + WORD32 b_rem_bits_gt_max_drain, b_rem_bits_lt_min_bits, + b_saved_bits_gt_zero; + rem_bit_in_prd_t *ps_rbip = &ps_bit_allocation->s_rbip; + + UNUSED(i4_cur_buf_size); + UNUSED(i4_max_buf_size); + UNUSED(i4_tot_frame_bits); + + /* + * If the remaining bits is greater than what can be drained in that period + * Clip the remaining bits in period to the maximum it can drain in that + * period with the error of current buffer size.Accumulate the saved bits + * if any. else if the remaining bits is lesser than the minimum bit rate + * promised in that period Add the excess bits to remaining bits in period + * and reduce it from the saved bits Else Provide the extra bits from the + * "saved bits pool". + */ + /* + * max_drain_bits = num_gops_in_period * num_frms_in_period * + * * max_bits_inflow_per_frm + */ + SET_VAR_Q(vq_num_frms_in_period, + (ps_bit_allocation->i4_num_gops_in_period * i4_num_frms_in_period), + 0); + SET_VAR_Q(vq_max_drain_bits, i4_max_bits_inflow_per_frm, 0); + SET_VAR_Q(vq_zero, 0, 0); + mult32_var_q(vq_max_drain_bits, vq_num_frms_in_period, &vq_max_drain_bits); + + /* + * min_bits_for_period = num_gops_in_period * num_frms_in_period * + * min_bits_per_frm + */ + SET_VAR_Q(vq_min_bits_for_period, ps_bit_allocation->i4_min_bits_per_frm, + 0); + mult32_var_q(vq_min_bits_for_period, vq_num_frms_in_period, + &vq_min_bits_for_period); + + vq_rem_bits_in_period = ps_rbip->vq_rem_bits_in_period; + + /* Evaluate rem_bits_in_period > max_drain_bits */ + VQ_A_GT_VQ_B(ps_rbip->vq_rem_bits_in_period, vq_max_drain_bits, + b_rem_bits_gt_max_drain); + + /* Evaluate rem_bits_in_period < min_bits_for_period */ + VQ_A_LT_VQ_B(ps_rbip->vq_rem_bits_in_period, vq_min_bits_for_period, + b_rem_bits_lt_min_bits); + + /* Evaluate saved_bits > 0 */ + VQ_A_LT_VQ_B(ps_bit_allocation->vq_saved_bits, vq_zero, + b_saved_bits_gt_zero); + + /* (i4_rem_bits_in_period > i4_max_drain_bits) */ + if(b_rem_bits_gt_max_drain) + { + /* extra_bits = rem_bits_in_period - max_drain_bits */ + sub32_var_q(ps_rbip->vq_rem_bits_in_period, vq_max_drain_bits, + &vq_extra_bits); + + /* saved_bits += extra_bits */ + add32_var_q(ps_bit_allocation->vq_saved_bits, vq_extra_bits, + &ps_bit_allocation->vq_saved_bits); + + /* rem_bits_in_period = vq_max_drain_bits */ + ps_rbip->vq_rem_bits_in_period = vq_max_drain_bits; + } + else if(b_rem_bits_lt_min_bits) + { + /* extra_bits(-ve) = rem_bits_in_period - i4_min_bits_for_period */ + sub32_var_q(ps_rbip->vq_rem_bits_in_period, vq_min_bits_for_period, + &vq_extra_bits); + + /* saved_bits += extra_bits(-ve) */ + add32_var_q(ps_bit_allocation->vq_saved_bits, vq_extra_bits, + &ps_bit_allocation->vq_saved_bits); + + /* rem_bits_in_period = min_bits_for_period */ + ps_rbip->vq_rem_bits_in_period = vq_min_bits_for_period; + } + else if(b_saved_bits_gt_zero) + { + /* less_bits = max_drain_bits - _rem_bits_in_period */ + sub32_var_q(vq_max_drain_bits, vq_rem_bits_in_period, &vq_less_bits); + + /* allocated_saved_bits = MIN (less_bits, saved_bits) */ + MIN_VARQ(ps_bit_allocation->vq_saved_bits, vq_less_bits, + vq_allocated_saved_bits); + + /* rem_bits_in_period += allocted_save_bits */ + add32_var_q(ps_rbip->vq_rem_bits_in_period, vq_allocated_saved_bits, + &ps_rbip->vq_rem_bits_in_period); + + /* saved_bits -= allocted_save_bits */ + sub32_var_q(ps_bit_allocation->vq_saved_bits, vq_allocated_saved_bits, + &ps_bit_allocation->vq_saved_bits); + } + return; +} + +WORD32 irc_ba_get_frame_rate(bit_allocation_t *ps_bit_allocation) +{ + return (ps_bit_allocation->i4_frame_rate); +} + +WORD32 irc_ba_get_bit_rate(bit_allocation_t *ps_bit_allocation) +{ + return (ps_bit_allocation->i4_bit_rate); +} + +void irc_ba_get_peak_bit_rate(bit_allocation_t *ps_bit_allocation, + WORD32 *pi4_peak_bit_rate) +{ + WORD32 i; + for(i = 0; i < MAX_NUM_DRAIN_RATES; i++) + { + pi4_peak_bit_rate[i] = ps_bit_allocation->ai4_peak_bit_rate[i]; + } +} diff --git a/encoder/irc_bit_allocation.h b/encoder/irc_bit_allocation.h new file mode 100755 index 0000000..19ba0df --- /dev/null +++ b/encoder/irc_bit_allocation.h @@ -0,0 +1,99 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +#ifndef _BIT_ALLOCATION_H_ +#define _BIT_ALLOCATION_H_ + +typedef struct bit_allocation_t *bit_allocation_handle; + +WORD32 irc_ba_num_fill_use_free_memtab(bit_allocation_handle *pps_bit_allocation, + itt_memtab_t *ps_memtab, + ITT_FUNC_TYPE_E e_func_type); + +void irc_ba_init_bit_allocation(bit_allocation_handle ps_bit_allocation, + pic_handling_handle ps_pic_handling, + WORD32 i4_num_intra_frm_interval, + WORD32 i4_bit_rate, + WORD32 i4_frm_rate, + WORD32 *u4_peak_bit_rate, + WORD32 i4_min_bitrate); + +/* Estimates the number of texture bits required by the current frame */ +WORD32 irc_ba_get_cur_frm_est_texture_bits(bit_allocation_handle ps_bit_allocation, + rc_rd_model_handle *pps_rd_model, + est_sad_handle ps_est_sad, + pic_handling_handle ps_pic_handling, + picture_type_e e_pic_type); + +/* Estimate the number of header bits required by the current frame */ +WORD32 irc_ba_get_cur_frm_est_header_bits(bit_allocation_handle ps_bit_allocation, + picture_type_e e_pic_type); + +/* Get the remaining bits allocated in the period */ +WORD32 irc_ba_get_rem_bits_in_period(bit_allocation_handle ps_bit_allocation, + pic_handling_handle ps_pic_handling); + +WORD32 irc_ba_get_frame_rate(bit_allocation_handle ps_bit_allocation); + +WORD32 irc_ba_get_bit_rate(bit_allocation_handle ps_bit_allocation); +void irc_ba_get_peak_bit_rate(bit_allocation_handle ps_bit_allocation, + WORD32 *pi4_peak_bit_rate); + +/* Updates the bit allocation module with the actual encoded values */ +void irc_ba_update_cur_frm_consumed_bits(bit_allocation_handle ps_bit_allocation, + pic_handling_handle ps_pic_handling, + WORD32 i4_total_frame_bits, + WORD32 i4_model_updation_hdr_bits, + picture_type_e e_pic_type, + UWORD8 u1_is_scd, + WORD32 i4_last_frm_in_gop); + +void irc_ba_check_and_update_bit_allocation(bit_allocation_handle ps_bit_allocation, + pic_handling_handle ps_pic_handling, + WORD32 i4_cur_buf_size, + WORD32 i4_max_buf_size, + WORD32 i4_max_bits_inflow_per_frm, + WORD32 i4_tot_frame_bits); + +/* Based on the change in frame/bit rate update the remaining bits in period */ +void irc_ba_change_remaining_bits_in_period(bit_allocation_handle ps_bit_allocation, + pic_handling_handle ps_pic_handling, + WORD32 i4_bit_rate, + WORD32 i4_frame_rate, + WORD32 *i4_peak_bit_rate); + +/* Change the gop size in the middle of a current gop */ +void change_gop_size(bit_allocation_handle ps_bit_allocation, + WORD32 i4_intra_frm_interval, + WORD32 i4_inter_frm_interval, + WORD32 i4_num_intra_frm_interval); + +void update_rem_frms_in_period(bit_allocation_handle ps_bit_allocation, + picture_type_e e_pic_type, + UWORD8 u1_is_first_frm, + WORD32 i4_intra_frm_interval, + WORD32 i4_num_intra_frm_interval); + +void irc_ba_change_rem_bits_in_prd_at_force_I_frame(bit_allocation_handle ps_bit_allocation, + pic_handling_handle ps_pic_handling); + +void irc_ba_change_ba_peak_bit_rate(bit_allocation_handle ps_bit_allocation, + WORD32 *ai4_peak_bit_rate); +#endif diff --git a/encoder/irc_cbr_buffer_control.c b/encoder/irc_cbr_buffer_control.c new file mode 100755 index 0000000..c179a28 --- /dev/null +++ b/encoder/irc_cbr_buffer_control.c @@ -0,0 +1,653 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include <stdio.h> + +/* User include files */ +#include "irc_datatypes.h" +#include "irc_cntrl_param.h" +#include "irc_common.h" +#include "irc_mem_req_and_acq.h" +#include "irc_fixed_point_error_bits.h" +#include "irc_cbr_buffer_control.h" +#include "irc_trace_support.h" + +typedef struct cbr_buffer_t +{ + /* Buffer size = Delay * Bitrate*/ + WORD32 i4_buffer_size; + + /* Constant drain rate */ + WORD32 i4_drain_bits_per_frame[MAX_NUM_DRAIN_RATES]; + + /* Encoder Buffer Fullness */ + WORD32 i4_ebf; + + /* Upper threshold of the Buffer */ + WORD32 i4_upr_thr[MAX_PIC_TYPE]; + + /* Lower threshold of the Buffer */ + WORD32 i4_low_thr[MAX_PIC_TYPE]; + + /* Stuffing threshold equal to error bits per second in the drain bits + * fixed point computation */ + WORD32 i4_stuffing_threshold; + + /* For error due to bits per frame calculation */ + error_bits_handle aps_bpf_error_bits[MAX_NUM_DRAIN_RATES]; + + /* Whether the buffer model is used for CBR or VBR streaming */ + WORD32 i4_is_cbr_mode; + + /* Input parameters stored for initialization */ + WORD32 ai4_bit_rate[MAX_NUM_DRAIN_RATES]; + + WORD32 i4_max_delay; + + WORD32 ai4_num_pics_in_delay_period[MAX_PIC_TYPE]; + + WORD32 i4_tgt_frm_rate; + + UWORD32 u4_max_vbv_buf_size; + +} cbr_buffer_t; + +WORD32 irc_cbr_buffer_num_fill_use_free_memtab(cbr_buffer_t **pps_cbr_buffer, + itt_memtab_t *ps_memtab, + ITT_FUNC_TYPE_E e_func_type) +{ + WORD32 i4_mem_tab_idx = 0, i; + static cbr_buffer_t s_cbr_buffer_temp; + + /* + * Hack for all alloc, during which we don't have any state memory. + * Dereferencing can cause issues + */ + if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB) + (*pps_cbr_buffer) = &s_cbr_buffer_temp; + + if(e_func_type != GET_NUM_MEMTAB) + { + fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(cbr_buffer_t), + ALIGN_128_BYTE, PERSISTENT, DDR); + use_or_fill_base(&ps_memtab[0], (void**)pps_cbr_buffer, e_func_type); + } + i4_mem_tab_idx++; + + for(i = 0; i < MAX_NUM_DRAIN_RATES; i++) + { + i4_mem_tab_idx += irc_error_bits_num_fill_use_free_memtab( + &pps_cbr_buffer[0]->aps_bpf_error_bits[i], + &ps_memtab[i4_mem_tab_idx], e_func_type); + } + return (i4_mem_tab_idx); +} + +/****************************************************************************** + * @brief Initialize the CBR VBV buffer state. + * This could however be used for VBR streaming VBV also + * + ******************************************************************************/ +void irc_init_cbr_buffer(cbr_buffer_t *ps_cbr_buffer, + WORD32 i4_buffer_delay, + WORD32 i4_tgt_frm_rate, + WORD32 *i4_bit_rate, + UWORD32 *u4_num_pics_in_delay_prd, + UWORD32 u4_vbv_buf_size) +{ + WORD32 i4_i, i4_bits_per_frm[MAX_NUM_DRAIN_RATES]; + int i; + + for(i = 0; i < MAX_NUM_DRAIN_RATES; i++) + { + X_PROD_Y_DIV_Z(i4_bit_rate[i], 1000, i4_tgt_frm_rate, + i4_bits_per_frm[i]); + /* Drain rate = bitrate/(framerate/1000) */ + ps_cbr_buffer->i4_drain_bits_per_frame[i] = i4_bits_per_frm[i]; + /* Initialize the bits per frame error bits calculation */ + irc_init_error_bits(ps_cbr_buffer->aps_bpf_error_bits[i], + i4_tgt_frm_rate, i4_bit_rate[i]); + } + + /* Bitrate * delay = buffer size, divide by 1000 as delay is in ms*/ + /* This would mean CBR mode */ + if(i4_bit_rate[0] == i4_bit_rate[1]) + { + X_PROD_Y_DIV_Z(i4_bit_rate[0], i4_buffer_delay, 1000, + ps_cbr_buffer->i4_buffer_size); + ps_cbr_buffer->i4_is_cbr_mode = 1; + } + else + { + /* VBR streaming case which has different drain rates for I and P */ + ps_cbr_buffer->i4_buffer_size = u4_num_pics_in_delay_prd[0] + * ps_cbr_buffer->i4_drain_bits_per_frame[0] + + u4_num_pics_in_delay_prd[1] + * ps_cbr_buffer->i4_drain_bits_per_frame[1]; + + ps_cbr_buffer->i4_is_cbr_mode = 0; + } + + if(ps_cbr_buffer->i4_buffer_size > (WORD32)u4_vbv_buf_size) + { + ps_cbr_buffer->i4_buffer_size = u4_vbv_buf_size; + } + + /* Initially Encoder buffer fullness is zero */ + ps_cbr_buffer->i4_ebf = 0; + + /* tgt_frame_rate is divided by 1000 because, an approximate value is fine + * as this is just a threshold below which stuffing is done to avoid buffer + * underflow due to fixed point error in drain rate + */ + ps_cbr_buffer->i4_stuffing_threshold = (i4_bit_rate[0] + - (i4_bits_per_frm[0] * (i4_tgt_frm_rate / 1000))); + + for(i4_i = 0; i4_i < MAX_PIC_TYPE; i4_i++) + { + /* + * Upper threshold for + * I frame = 1 * bits per frame + * P Frame = 4 * bits per frame. + * The threshold for I frame is only 1 * bits per frame as the threshold + * should only account for error in estimated bits. + * In P frame it should account for difference bets bits consumed by + * I(Scene change) and P frame I to P complexity is assumed to be 5. + */ + WORD32 i4_index; + i4_index = i4_i > 0 ? 1 : 0; + ps_cbr_buffer->i4_upr_thr[i4_i] = ps_cbr_buffer->i4_buffer_size + - (ps_cbr_buffer->i4_buffer_size >> 3); + + /* + * For both I and P frame Lower threshold is equal to drain rate.Even if + * the encoder consumes zero bits it should have enough bits to drain + */ + ps_cbr_buffer->i4_low_thr[i4_i] = i4_bits_per_frm[i4_index]; + } + + /* Storing the input parameters for using it for change functions */ + for(i = 0; i < MAX_NUM_DRAIN_RATES; i++) + { + ps_cbr_buffer->ai4_bit_rate[i] = i4_bit_rate[i]; + } + + for(i = 0; i < MAX_PIC_TYPE; i++) + { + ps_cbr_buffer->ai4_num_pics_in_delay_period[i] = + u4_num_pics_in_delay_prd[i]; + } + ps_cbr_buffer->i4_tgt_frm_rate = i4_tgt_frm_rate; + ps_cbr_buffer->i4_max_delay = i4_buffer_delay; + ps_cbr_buffer->u4_max_vbv_buf_size = u4_vbv_buf_size; +} + +/****************************************************************************** + * @brief Condition check for constraining the number of bits allocated based on + * bufer size + ******************************************************************************/ +WORD32 irc_cbr_buffer_constraint_check(cbr_buffer_t *ps_cbr_buffer, + WORD32 i4_tgt_bits, + picture_type_e e_pic_type) +{ + WORD32 i4_max_tgt_bits, i4_min_tgt_bits; + WORD32 i4_drain_bits_per_frame = (e_pic_type == I_PIC) ? + ps_cbr_buffer->i4_drain_bits_per_frame[0] : + ps_cbr_buffer->i4_drain_bits_per_frame[1]; + + /* Max tgt bits = Upper threshold - current encoder buffer fullness */ + i4_max_tgt_bits = ps_cbr_buffer->i4_upr_thr[e_pic_type] + - ps_cbr_buffer->i4_ebf; + /* Max tgt bits cannot be negative */ + if(i4_max_tgt_bits < 0) + i4_max_tgt_bits = 0; + + /* + * Min tgt bits , least number of bits in the Encoder after + * draining such that it is greater than lower threshold + */ + i4_min_tgt_bits = ps_cbr_buffer->i4_low_thr[e_pic_type] + - (ps_cbr_buffer->i4_ebf - i4_drain_bits_per_frame); + /* Min tgt bits cannot be negative */ + if(i4_min_tgt_bits < 0) + i4_min_tgt_bits = 0; + + /* Current tgt bits should be between max and min tgt bits */ + CLIP(i4_tgt_bits, i4_max_tgt_bits, i4_min_tgt_bits); + return i4_tgt_bits; +} + +/* ***************************************************************************** + * @brief constaints the bit allocation based on buffer size + * + ******************************************************************************/ +WORD32 irc_vbr_stream_buffer_constraint_check(cbr_buffer_t *ps_cbr_buffer, + WORD32 i4_tgt_bits, + picture_type_e e_pic_type) +{ + WORD32 i4_max_tgt_bits; + + /* Max tgt bits = Upper threshold - current encoder buffer fullness */ + i4_max_tgt_bits = ps_cbr_buffer->i4_upr_thr[e_pic_type] + - ps_cbr_buffer->i4_ebf; + + /* Max tgt bits cannot be negative */ + if(i4_max_tgt_bits < 0) + i4_max_tgt_bits = 0; + + if(i4_tgt_bits > i4_max_tgt_bits) + i4_tgt_bits = i4_max_tgt_bits; + + return i4_tgt_bits; +} + +/* ***************************************************************************** + * @brief Verifies the buffer state and returns whether it is overflowing, + * underflowing or normal + * + ******************************************************************************/ +vbv_buf_status_e irc_get_cbr_buffer_status(cbr_buffer_t *ps_cbr_buffer, + WORD32 i4_tot_consumed_bits, + WORD32 *pi4_num_bits_to_prevent_overflow, + picture_type_e e_pic_type) +{ + vbv_buf_status_e e_buf_status; + WORD32 i4_cur_enc_buf; + WORD32 i4_error_bits = (e_pic_type == I_PIC) ? + irc_get_error_bits(ps_cbr_buffer + ->aps_bpf_error_bits[0]) : + irc_get_error_bits(ps_cbr_buffer + ->aps_bpf_error_bits[1]); + + WORD32 i4_drain_bits_per_frame = (e_pic_type == I_PIC) ? + ps_cbr_buffer->i4_drain_bits_per_frame[0] : + ps_cbr_buffer->i4_drain_bits_per_frame[1]; + + /* Add the tot consumed bits to the Encoder Buffer*/ + i4_cur_enc_buf = ps_cbr_buffer->i4_ebf + i4_tot_consumed_bits; + + /* If the Encoder exceeds the Buffer Size signal an Overflow*/ + if(i4_cur_enc_buf > ps_cbr_buffer->i4_buffer_size) + { + e_buf_status = VBV_OVERFLOW; + i4_cur_enc_buf = ps_cbr_buffer->i4_buffer_size; + } + else + { + /* + * Subtract the constant drain bits and error bits due to fixed point + * implementation + */ + i4_cur_enc_buf -= (i4_drain_bits_per_frame + i4_error_bits); + + /* + * If the buffer is less than stuffing threshold an Underflow is + * signaled else its NORMAL + */ + if(i4_cur_enc_buf < ps_cbr_buffer->i4_stuffing_threshold) + { + e_buf_status = VBV_UNDERFLOW; + } + else + { + e_buf_status = VBV_NORMAL; + } + + if(i4_cur_enc_buf < 0) + i4_cur_enc_buf = 0; + } + + /* + * The RC lib models the encoder buffer, but the VBV buffer characterizes + * the decoder buffer + */ + if(e_buf_status == VBV_OVERFLOW) + { + e_buf_status = VBV_UNDERFLOW; + } + else if(e_buf_status == VBV_UNDERFLOW) + { + e_buf_status = VBV_OVERFLOW; + } + + pi4_num_bits_to_prevent_overflow[0] = (ps_cbr_buffer->i4_buffer_size + - i4_cur_enc_buf); + + return e_buf_status; +} + +/******************************************************************************* + * @brief Based on the bits consumed the buffer model is updated + ******************************************************************************/ +void irc_update_cbr_buffer(cbr_buffer_t *ps_cbr_buffer, + WORD32 i4_tot_consumed_bits, + picture_type_e e_pic_type) +{ + WORD32 i4_error_bits = (e_pic_type == I_PIC) ? + irc_get_error_bits(ps_cbr_buffer-> + aps_bpf_error_bits[0]) : + irc_get_error_bits( ps_cbr_buffer-> + aps_bpf_error_bits[1]); + + WORD32 i4_drain_bits_per_frame = (e_pic_type == I_PIC) ? + ps_cbr_buffer->i4_drain_bits_per_frame[0] : + ps_cbr_buffer->i4_drain_bits_per_frame[1]; + + /* Update the Encoder buffer with the total consumed bits*/ + ps_cbr_buffer->i4_ebf += i4_tot_consumed_bits; + + /* + * Subtract the drain bits and error bits due to fixed point + * implementation + */ + ps_cbr_buffer->i4_ebf -= (i4_drain_bits_per_frame + i4_error_bits); + + if(ps_cbr_buffer->i4_ebf < 0) + ps_cbr_buffer->i4_ebf = 0; + + /*SS - Fix for lack of stuffing*/ + if(ps_cbr_buffer->i4_ebf > ps_cbr_buffer->i4_buffer_size) + { + trace_printf( + (const WORD8*)"Error: Should not be coming here with stuffing\n"); + ps_cbr_buffer->i4_ebf = ps_cbr_buffer->i4_buffer_size; + } +} + +/******************************************************************************* + * @brief If the buffer underflows then return the number of bits to prevent + * underflow + * + ******************************************************************************/ +WORD32 irc_get_cbr_bits_to_stuff(cbr_buffer_t *ps_cbr_buffer, + WORD32 i4_tot_consumed_bits, + picture_type_e e_pic_type) +{ + WORD32 i4_bits_to_stuff; + WORD32 i4_error_bits = (e_pic_type == I_PIC) ? + irc_get_error_bits(ps_cbr_buffer + ->aps_bpf_error_bits[0]) : + irc_get_error_bits(ps_cbr_buffer + ->aps_bpf_error_bits[1]); + + WORD32 i4_drain_bits_per_frame = (e_pic_type == I_PIC) ? + ps_cbr_buffer->i4_drain_bits_per_frame[0] : + ps_cbr_buffer->i4_drain_bits_per_frame[1]; + + /* + * Stuffing bits got from the following equation + * Stuffing_threshold = ebf + tcb - drain bits - error bits + stuff_bits + */ + i4_bits_to_stuff = i4_drain_bits_per_frame + i4_error_bits + + ps_cbr_buffer->i4_stuffing_threshold + - (ps_cbr_buffer->i4_ebf + i4_tot_consumed_bits); + + return i4_bits_to_stuff; +} + +/******************************************************************************* + * @brief Update the state for change in number of pics in the delay period + * + ******************************************************************************/ +void irc_change_cbr_vbv_num_pics_in_delay_period(cbr_buffer_t *ps_cbr_buffer, + UWORD32 *u4_num_pics_in_delay_prd) +{ + WORD32 i; + + if(!ps_cbr_buffer->i4_is_cbr_mode) + { + ps_cbr_buffer->i4_buffer_size = + u4_num_pics_in_delay_prd[0] + * ps_cbr_buffer->i4_drain_bits_per_frame[0] + + u4_num_pics_in_delay_prd[1] + * ps_cbr_buffer->i4_drain_bits_per_frame[1]; + + if(ps_cbr_buffer->i4_buffer_size + > (WORD32)ps_cbr_buffer->u4_max_vbv_buf_size) + { + ps_cbr_buffer->i4_buffer_size = ps_cbr_buffer->u4_max_vbv_buf_size; + } + for(i = 0; i < MAX_PIC_TYPE; i++) + { + ps_cbr_buffer->i4_upr_thr[i] = ps_cbr_buffer->i4_buffer_size + - (ps_cbr_buffer->i4_buffer_size >> 3); + } + + /* Re-initialize the number of pics in delay period */ + for(i = 0; i < MAX_PIC_TYPE; i++) + { + ps_cbr_buffer->ai4_num_pics_in_delay_period[i] = + u4_num_pics_in_delay_prd[i]; + } + } +} + +/****************************************************************************** + * @brief update the state for change in target frame rate + * + ******************************************************************************/ +void irc_change_cbr_vbv_tgt_frame_rate(cbr_buffer_t *ps_cbr_buffer, + WORD32 i4_tgt_frm_rate) +{ + WORD32 i4_i, i4_bits_per_frm[MAX_NUM_DRAIN_RATES]; + int i; + + for(i = 0; i < MAX_NUM_DRAIN_RATES; i++) + { + X_PROD_Y_DIV_Z(ps_cbr_buffer->ai4_bit_rate[i], 1000, i4_tgt_frm_rate, + i4_bits_per_frm[i]); + /* Drain rate = bitrate/(framerate/1000) */ + ps_cbr_buffer->i4_drain_bits_per_frame[i] = i4_bits_per_frm[i]; + /* Initialize the bits per frame error bits calculation */ + irc_change_frm_rate_in_error_bits(ps_cbr_buffer->aps_bpf_error_bits[i], + i4_tgt_frm_rate); + } + + /* Bitrate * delay = buffer size, divide by 1000 as delay is in ms*/ + if(!ps_cbr_buffer->i4_is_cbr_mode) + { + /* VBR streaming case which has different drain rates for I and P */ + ps_cbr_buffer->i4_buffer_size = + ps_cbr_buffer->ai4_num_pics_in_delay_period[0] + * ps_cbr_buffer->i4_drain_bits_per_frame[0] + + ps_cbr_buffer->ai4_num_pics_in_delay_period[1] + * ps_cbr_buffer->i4_drain_bits_per_frame[1]; + } + + if(ps_cbr_buffer->i4_buffer_size + > (WORD32)ps_cbr_buffer->u4_max_vbv_buf_size) + { + ps_cbr_buffer->i4_buffer_size = ps_cbr_buffer->u4_max_vbv_buf_size; + } + + /* + * Tgt_frame_rate is divided by 1000 because an approximate value is fine as + * this is just a threshold below which stuffing is done to avoid buffer + * underflow due to fixed point error in drain rate + */ + ps_cbr_buffer->i4_stuffing_threshold = (ps_cbr_buffer->ai4_bit_rate[0] + - (i4_bits_per_frm[0] * (i4_tgt_frm_rate / 1000))); + + for(i4_i = 0; i4_i < MAX_PIC_TYPE; i4_i++) + { + /* + * Upper threshold for + * I frame = 1 * bits per frame + * P Frame = 4 * bits per frame. + * The threshold for I frame is only 1 * bits per frame as the threshold should + * only account for error in estimated bits. + * In P frame it should account for difference bets bits consumed by I(Scene change) + * and P frame I to P complexity is assumed to be 5. + */ + WORD32 i4_index; + i4_index = i4_i > 0 ? 1 : 0; + ps_cbr_buffer->i4_upr_thr[i4_i] = ps_cbr_buffer->i4_buffer_size + - (ps_cbr_buffer->i4_buffer_size >> 3); + + /* + * For both I and P frame Lower threshold is equal to drain rate. + * Even if the encoder consumes zero bits it should have enough bits to + * drain + */ + ps_cbr_buffer->i4_low_thr[i4_i] = i4_bits_per_frm[i4_index]; + } + + /* Storing the input parameters for using it for change functions */ + ps_cbr_buffer->i4_tgt_frm_rate = i4_tgt_frm_rate; +} + +/******************************************************************************* + * @brief Change the state for change in bit rate + * + ******************************************************************************/ +void irc_change_cbr_vbv_bit_rate(cbr_buffer_t *ps_cbr_buffer, + WORD32 *i4_bit_rate) +{ + WORD32 i4_i, i4_bits_per_frm[MAX_NUM_DRAIN_RATES]; + int i; + + for(i = 0; i < MAX_NUM_DRAIN_RATES; i++) + { + X_PROD_Y_DIV_Z(i4_bit_rate[i], 1000, ps_cbr_buffer->i4_tgt_frm_rate, + i4_bits_per_frm[i]); + /* Drain rate = bitrate/(framerate/1000) */ + ps_cbr_buffer->i4_drain_bits_per_frame[i] = i4_bits_per_frm[i]; + /* Initialize the bits per frame error bits calculation */ + irc_change_bitrate_in_error_bits(ps_cbr_buffer->aps_bpf_error_bits[i], + i4_bit_rate[i]); + } + + /* Bitrate * delay = buffer size, divide by 1000 as delay is in ms*/ + if(i4_bit_rate[0] == i4_bit_rate[1]) /* This would mean CBR mode */ + { + X_PROD_Y_DIV_Z(i4_bit_rate[0], ps_cbr_buffer->i4_max_delay, 1000, + ps_cbr_buffer->i4_buffer_size); + ps_cbr_buffer->i4_is_cbr_mode = 1; + } + else + { + /* VBR streaming case which has different drain rates for I and P */ + ps_cbr_buffer->i4_buffer_size = + ps_cbr_buffer->ai4_num_pics_in_delay_period[0] + * ps_cbr_buffer->i4_drain_bits_per_frame[0] + + ps_cbr_buffer->ai4_num_pics_in_delay_period[1] + * ps_cbr_buffer->i4_drain_bits_per_frame[1]; + + ps_cbr_buffer->i4_is_cbr_mode = 0; + } + + if(ps_cbr_buffer->i4_buffer_size + > (WORD32)ps_cbr_buffer->u4_max_vbv_buf_size) + { + ps_cbr_buffer->i4_buffer_size = ps_cbr_buffer->u4_max_vbv_buf_size; + } + + /* + * tgt_frame_rate is divided by 1000 because + * an approximate value is fine as this is just a threshold below which + * stuffing is done to avoid buffer underflow due to fixed point + * error in drain rate + */ + ps_cbr_buffer->i4_stuffing_threshold = (i4_bit_rate[0] + - (i4_bits_per_frm[0] + * (ps_cbr_buffer->i4_tgt_frm_rate / 1000))); + + for(i4_i = 0; i4_i < MAX_PIC_TYPE; i4_i++) + { + /* + * Upper threshold for + * I frame = 1 * bits per frame + * P Frame = 4 * bits per frame. + * The threshold for I frame is only 1 * bits per frame as the threshold + * should only account for error in estimated bits. + * In P frame it should account for difference bets bits consumed by + * I(Scene change) and P frame I to P complexity is assumed to be 5. + */ + + WORD32 i4_index; + i4_index = i4_i > 0 ? 1 : 0; + ps_cbr_buffer->i4_upr_thr[i4_i] = ps_cbr_buffer->i4_buffer_size + - (ps_cbr_buffer->i4_buffer_size >> 3); + + /* For both I and P frame Lower threshold is equal to drain rate. + * Even if the encoder consumes zero bits it should have enough bits to + * drain + */ + ps_cbr_buffer->i4_low_thr[i4_i] = i4_bits_per_frm[i4_index]; + } + + /* Storing the input parameters for using it for change functions */ + for(i = 0; i < MAX_NUM_DRAIN_RATES; i++) + { + ps_cbr_buffer->ai4_bit_rate[i] = i4_bit_rate[i]; + } +} + +void irc_change_cbr_buffer_delay(cbr_buffer_t *ps_cbr_buffer, + WORD32 i4_buffer_delay) +{ + WORD32 i4_i; + + /* Bitrate * delay = buffer size, divide by 1000 as delay is in ms*/ + if(ps_cbr_buffer->i4_is_cbr_mode) + { + X_PROD_Y_DIV_Z(ps_cbr_buffer->ai4_bit_rate[0], i4_buffer_delay, 1000, + ps_cbr_buffer->i4_buffer_size); + } + + if(ps_cbr_buffer->i4_buffer_size + > (WORD32)ps_cbr_buffer->u4_max_vbv_buf_size) + { + ps_cbr_buffer->i4_buffer_size = ps_cbr_buffer->u4_max_vbv_buf_size; + } + + for(i4_i = 0; i4_i < MAX_PIC_TYPE; i4_i++) + { + /* + * Upper threshold for + * I frame = 1 * bits per frame + * P Frame = 4 * bits per frame. + * The threshold for I frame is only 1 * bits per frame as the threshold + * should only account for error in estimated bits. + * In P frame it should account for difference bets bits consumed by I + * (Scene change) and P frame I to P complexity is assumed to be 5. + */ + ps_cbr_buffer->i4_upr_thr[i4_i] = ps_cbr_buffer->i4_buffer_size + - (ps_cbr_buffer->i4_buffer_size >> 3); + } + + /* Storing the input parameters for using it for change functions */ + ps_cbr_buffer->i4_max_delay = i4_buffer_delay; +} + +WORD32 irc_get_cbr_buffer_delay(cbr_buffer_t *ps_cbr_buffer) +{ + return (ps_cbr_buffer->i4_max_delay); +} + +WORD32 irc_get_cbr_buffer_size(cbr_buffer_t *ps_cbr_buffer) +{ + return (ps_cbr_buffer->i4_buffer_size); +} diff --git a/encoder/irc_cbr_buffer_control.h b/encoder/irc_cbr_buffer_control.h new file mode 100755 index 0000000..2534961 --- /dev/null +++ b/encoder/irc_cbr_buffer_control.h @@ -0,0 +1,104 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/*****************************************************************************/ +/* */ +/* File Name : irc_cbr_buffer_control.h */ +/* */ +/* Description : This file contains all the necessary declarations */ +/* for cbr_buffer_control functions */ +/* */ +/* */ +/* List of Functions : <List the functions defined in this file> */ +/* */ +/* Issues / Problems : None */ +/* */ +/* Revision History : */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 06 05 2008 Ittiam Draft */ +/* */ +/*****************************************************************************/ + +#ifndef CBR_BUFFER_CONTROL_H +#define CBR_BUFFER_CONTROL_H + +/* Macro for clipping a number between to extremes */ +#define CLIP(Number,Max,Min) if((Number) > (Max)) (Number) = (Max); \ + else if((Number) < (Min)) (Number) = (Min); +/*****************************************************************************/ +/* Structure */ +/*****************************************************************************/ +typedef struct cbr_buffer_t *cbr_buffer_handle; + +WORD32 irc_cbr_buffer_num_fill_use_free_memtab(cbr_buffer_handle *pps_cbr_buffer, + itt_memtab_t *ps_memtab, + ITT_FUNC_TYPE_E e_func_type); + +/* Initialize the cbr Buffer*/ +void irc_init_cbr_buffer(cbr_buffer_handle ps_cbr_buffer, + WORD32 i4_buffer_delay, + WORD32 i4_tgt_frm_rate, + WORD32 *i4_bit_rate, + UWORD32 *u4_num_pics_in_delay_prd, + UWORD32 u4_vbv_buf_size); + +/* Check for tgt bits with in CBR buffer*/ +WORD32 irc_cbr_buffer_constraint_check(cbr_buffer_handle ps_cbr_buffer, + WORD32 i4_tgt_bits, + picture_type_e e_pic_type); + +/* Get the buffer status with the current consumed bits*/ +vbv_buf_status_e irc_get_cbr_buffer_status(cbr_buffer_handle ps_cbr_buffer, + WORD32 i4_tot_consumed_bits, + WORD32 *pi4_num_bits_to_prevent_overflow, + picture_type_e e_pic_type); + +/* Update the CBR buffer at the end of the VOP*/ +void irc_update_cbr_buffer(cbr_buffer_handle ps_cbr_buffer, + WORD32 i4_tot_consumed_bits, + picture_type_e e_pic_type); + +/*Get the bits needed to stuff in case of Underflow*/ +WORD32 irc_get_cbr_bits_to_stuff(cbr_buffer_handle ps_cbr_buffer, + WORD32 i4_tot_consumed_bits, + picture_type_e e_pic_type); + +WORD32 irc_get_cbr_buffer_delay(cbr_buffer_handle ps_cbr_buffer); + +WORD32 irc_get_cbr_buffer_size(cbr_buffer_handle ps_cbr_buffer); + +WORD32 irc_vbr_stream_buffer_constraint_check(cbr_buffer_handle ps_cbr_buffer, + WORD32 i4_tgt_bits, + picture_type_e e_pic_type); + +void irc_change_cbr_vbv_bit_rate(cbr_buffer_handle ps_cbr_buffer, + WORD32 *i4_bit_rate); + +void irc_change_cbr_vbv_tgt_frame_rate(cbr_buffer_handle ps_cbr_buffer, + WORD32 i4_tgt_frm_rate); + +void irc_change_cbr_vbv_num_pics_in_delay_period(cbr_buffer_handle ps_cbr_buffer, + UWORD32 *u4_num_pics_in_delay_prd); + +void irc_change_cbr_buffer_delay(cbr_buffer_handle ps_cbr_buffer, + WORD32 i4_buffer_delay); +#endif /* CBR_BUFFER_CONTROL_H */ + diff --git a/encoder/irc_cntrl_param.h b/encoder/irc_cntrl_param.h new file mode 100755 index 0000000..82235f7 --- /dev/null +++ b/encoder/irc_cntrl_param.h @@ -0,0 +1,59 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +#ifndef _RC_CNTRL_PARAM_H_ +#define _RC_CNTRL_PARAM_H_ + +/* This file should contain only enumerations exported to codec by RC */ + +/* RC algo type */ +typedef enum +{ + VBR_STORAGE = 0, + VBR_STORAGE_DVD_COMP = 1, + VBR_STREAMING = 2, + CONST_QP = 3, + CBR_LDRC = 4, + CBR_NLDRC = 5 + +} rc_type_e; + +/* Picture type structure*/ +typedef enum +{ + BUF_PIC = -1, I_PIC = 0, P_PIC, B_PIC, MAX_PIC_TYPE + +} picture_type_e; + +/* MB Type structure*/ +typedef enum +{ + /* Based on MB TYPES added the array size increases */ + MB_TYPE_INTRA, MB_TYPE_INTER, MAX_MB_TYPE +} mb_type_e; + +typedef enum +{ + VBV_NORMAL, VBV_UNDERFLOW, VBV_OVERFLOW, VBR_CAUTION + +} vbv_buf_status_e; + +#endif + diff --git a/encoder/irc_common.h b/encoder/irc_common.h new file mode 100755 index 0000000..c341de4 --- /dev/null +++ b/encoder/irc_common.h @@ -0,0 +1,104 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +#ifndef _RC_COMMON_H_ +#define _RC_COMMON_H_ + +/**************************************************************************** + NOTE : Put only those things into this file which are common across many + files, say I_TO_P_BIT_RATIO macro is used across irc_bit_allocation.c + and irc_rate_control_api.c.If anything is exclusive only to one file, + define it in the same file + + This file is an RC private file. It should not be exported to Codec + ****************************************************************************/ + +#define UNUSED(x) ((void)(x)) + +typedef float number_t; + +#define mult32_var_q(a,b,c) *c = a * b + +#define div32_var_q(a,b,c) (*c = ((b == 0)? a : (a / b))) + +#define add32_var_q(a,b,c) *c = a + b + +#define sub32_var_q(a,b,c) *c = a - b + +#define sqrt32_var_q(a, c) *c = sqrt(a) + +#define number_t_to_word32(num_a, a) *a = (WORD32)num_a + +#define convert_float_to_fix(a_f, a) *a = (WORD32)a_f + +#define convert_fix_to_float(a, a_f) *a_f = (float) a + +#define SET_VAR_Q(a,b,c) {a = (float) b;} + + +/* Defines the maximum and the minimum quantizer allowed in the stream.*/ +#define MAX_MPEG2_QP 255 /* 127*/ + +/* Bits ratio between I and P frame */ +#define I_TO_P_BIT_RATIO 5 + +/* Calculates P = (X*Y/Z) (Assuming all the four are in integers)*/ +#define X_PROD_Y_DIV_Z(X1,Y1,Z1,P1)\ +{\ + number_t vq_a,vq_b,vq_c;\ + SET_VAR_Q(vq_a,(X1),0);\ + SET_VAR_Q(vq_b,(Y1),0);\ + SET_VAR_Q(vq_c,(Z1),0);\ + mult32_var_q(vq_a,vq_b,&vq_a);\ + div32_var_q(vq_a,vq_c,&vq_a);\ + number_t_to_word32(vq_a,&(P1));\ +} +#define VQ_A_LT_VQ_B(A,B, Z) Z = A < B; +#define VQ_A_GT_VQ_B(A,B, Z) Z = A > B; + +/* Z=MAX(A,B) where A, B and Z are var_q variables */ +#define MAX_VARQ(A,B, Z)\ +{\ + WORD32 a_gt_b;\ + VQ_A_GT_VQ_B((A), (B), a_gt_b);\ + (Z) = (a_gt_b) ? (A) : (B);\ +} + +/* Z=MIN(A,B) where A, B and Z are var_q variables */ +#define MIN_VARQ(A,B, Z)\ +{\ + WORD32 a_lt_b;\ + VQ_A_LT_VQ_B((A), (B), a_lt_b);\ + (Z) = (a_lt_b) ? (A) : (B);\ +} + +/* Maximum number of drain-rates supported. Currently a maximum of only 2 + drain-rates supported. One for + I pictures and the other for P & B pictures */ +#define MAX_NUM_DRAIN_RATES 2 + +/* The ratios between I to P and P to B Qp is specified here */ +#define K_Q 4 +#define I_TO_P_RATIO (19) /* In K_Q Q factor */ +#define P_TO_B_RATIO (21) /* In K_Q Q factor */ +#define P_TO_I_RATIO (13) /* In K_Q Q factor */ + +#endif /* _RC_COMMON_H_ */ + diff --git a/encoder/irc_datatypes.h b/encoder/irc_datatypes.h new file mode 100755 index 0000000..8e4685a --- /dev/null +++ b/encoder/irc_datatypes.h @@ -0,0 +1,64 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ih264_typedefs.h +* +* @brief +* Type definitions used in the code +* +* @author +* Ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef _IH264_TYPEDEFS_H_ +#define _IH264_TYPEDEFS_H_ + + +/*****************************************************************************/ +/* Unsigned data types */ +/*****************************************************************************/ +typedef unsigned char UWORD8; +typedef unsigned short UWORD16; +typedef unsigned int UWORD32; +typedef unsigned long long UWORD64; + + +/*****************************************************************************/ +/* Signed data types */ +/*****************************************************************************/ +typedef signed char WORD8; +typedef short WORD16; +typedef int WORD32; + + +/*****************************************************************************/ +/* Miscellaneous data types */ +/*****************************************************************************/ +typedef char CHAR; +typedef double DOUBLE; + +#endif /* _IH264_TYPEDEFS_H_ */ diff --git a/encoder/irc_est_sad.c b/encoder/irc_est_sad.c new file mode 100755 index 0000000..0d8abc2 --- /dev/null +++ b/encoder/irc_est_sad.c @@ -0,0 +1,260 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/*****************************************************************************/ +/* Includes */ +/*****************************************************************************/ + +/* User include files */ +#include "irc_datatypes.h" +#include "irc_cntrl_param.h" +#include "irc_mem_req_and_acq.h" +#include "irc_est_sad.h" +#include "irc_common.h" + +typedef struct est_sad_t +{ + WORD32 i4_use_est_intra_sad; + + /* Previous frame SAD */ + UWORD32 au4_prev_frm_sad[MAX_PIC_TYPE]; + + /* Current (nth) ifi average P frame SAD */ + UWORD32 u4_n_p_frm_ifi_avg_sad; + + /* (n-1)th ifi average P frame SAD */ + UWORD32 u4_n_1_p_frm_ifi_avg_sad; + + /* (n-2)th ifi average P frame SAD */ + UWORD32 u4_n_2_p_frm_ifi_avg_sad; + + /* number of ifi encoded till now */ + WORD32 i4_num_ifi_encoded; + + /* number of P frames in the current IFI */ + WORD32 i4_num_p_frm_in_cur_ifi; + +} est_sad_t; + +WORD32 irc_est_sad_num_fill_use_free_memtab(est_sad_t **pps_est_sad, + itt_memtab_t *ps_memtab, + ITT_FUNC_TYPE_E e_func_type) +{ + WORD32 i4_mem_tab_idx = 0; + static est_sad_t s_est_sad; + + /* Hack for al alloc, during which we don't have any state memory. + * Dereferencing can cause issues + */ + if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB) + (*pps_est_sad) = &s_est_sad; + + /* For src rate control state structure */ + if(e_func_type != GET_NUM_MEMTAB) + { + fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(est_sad_t), + ALIGN_128_BYTE, PERSISTENT, DDR); + use_or_fill_base(&ps_memtab[0], (void**)pps_est_sad, e_func_type); + } + i4_mem_tab_idx++; + + return (i4_mem_tab_idx); +} + +void irc_init_est_sad(est_sad_t *ps_est_sad, WORD32 i4_use_est_intra_sad) +{ + WORD32 i; + ps_est_sad->i4_use_est_intra_sad = i4_use_est_intra_sad; + + for(i = 0; i < MAX_PIC_TYPE; i++) + { + ps_est_sad->au4_prev_frm_sad[i] = 0; + } + + ps_est_sad->u4_n_p_frm_ifi_avg_sad = 0; + ps_est_sad->u4_n_1_p_frm_ifi_avg_sad = 0; + ps_est_sad->u4_n_2_p_frm_ifi_avg_sad = 0; + ps_est_sad->i4_num_ifi_encoded = 0; + ps_est_sad->i4_num_p_frm_in_cur_ifi = 0; +} + +void irc_reset_est_sad(est_sad_t *ps_est_sad) +{ + irc_init_est_sad(ps_est_sad, ps_est_sad->i4_use_est_intra_sad); +} + +/* + * Get estimated SAD can be called at any point. The various use cases are: + * 1) When a I frame is getting encoded, + * - get the estimated of P => No issues since we use the last coded P frame + * value + * - get estimated of I => This call for two cases: + * => a) if num_ifi_encoded is less than 2 + * then return the previous encoded I frame sad + * => b) if num_ifi_encoded is more than 2, then we scale + * the prev I sad by the ratio of (n-1) ifi P to n-2 ifi P + * 2) When P frame is getting encoded, + * - get the estimated of P => No issues since we use the last coded P frame value + * - get the estimated of I => Simillar to I we have two cases. + * To handle the b) case extra logic had to introduced using + * u1_is_n_1_p_frm_ifi_avg_sad_usable flag + */ +UWORD32 irc_get_est_sad(est_sad_t *ps_est_sad, picture_type_e e_pic_type) +{ + if(ps_est_sad->i4_use_est_intra_sad) + { + UWORD32 u4_estimated_sad; + if(e_pic_type == P_PIC) + { + u4_estimated_sad = ps_est_sad->au4_prev_frm_sad[P_PIC]; + } + else if(e_pic_type == B_PIC) + { + u4_estimated_sad = ps_est_sad->au4_prev_frm_sad[B_PIC]; + } + else + { + if(ps_est_sad->i4_num_ifi_encoded < 2) + { + /* + * Only one IFI has been encoded and so use the previous I + * frames SAD + */ + u4_estimated_sad = ps_est_sad->au4_prev_frm_sad[I_PIC]; + } + else + { + /* + * Since the n-1 'P' frame IFI would have just accumulated the + * frame sads we average it out here + */ + UWORD32 u4_n_1_p_frm_ifi_avg_sad, u4_n_2_p_frm_ifi_avg_sad; + number_t vq_n_1_p_frm_ifi_avg_sad, vq_n_2_p_frm_ifi_avg_sad; + number_t vq_prev_frm_sad_i; + + /* + * If there are frames in the current IFI start using it to + * estimate the I frame SAD + */ + if(ps_est_sad->i4_num_p_frm_in_cur_ifi) + { + u4_n_1_p_frm_ifi_avg_sad = + (ps_est_sad->u4_n_p_frm_ifi_avg_sad + / ps_est_sad->i4_num_p_frm_in_cur_ifi); + u4_n_2_p_frm_ifi_avg_sad = + ps_est_sad->u4_n_1_p_frm_ifi_avg_sad; + } + else + { + u4_n_1_p_frm_ifi_avg_sad = + ps_est_sad->u4_n_1_p_frm_ifi_avg_sad; + u4_n_2_p_frm_ifi_avg_sad = + ps_est_sad->u4_n_2_p_frm_ifi_avg_sad; + } + + /* + * If any of the previous p frame SADs are zeros we just return + * the previous I frame SAD + */ + if(u4_n_1_p_frm_ifi_avg_sad && u4_n_2_p_frm_ifi_avg_sad) + { + SET_VAR_Q(vq_prev_frm_sad_i, + ps_est_sad->au4_prev_frm_sad[I_PIC], 0); + SET_VAR_Q(vq_n_1_p_frm_ifi_avg_sad, + u4_n_1_p_frm_ifi_avg_sad, 0); + SET_VAR_Q(vq_n_2_p_frm_ifi_avg_sad, + u4_n_2_p_frm_ifi_avg_sad, 0); + /* + * Estimated SAD = + *(n-1)th intra frame interval(ifi) P frame Avg SAD * + *(prev I frame SAD / + *(prev (n-2)nd intra frame interval(ifi) P frame Avg SAD) + */ + mult32_var_q(vq_prev_frm_sad_i, vq_n_1_p_frm_ifi_avg_sad, + &vq_prev_frm_sad_i); + div32_var_q(vq_prev_frm_sad_i, vq_n_2_p_frm_ifi_avg_sad, + &vq_prev_frm_sad_i); + number_t_to_word32(vq_prev_frm_sad_i, + (WORD32*)&u4_estimated_sad); + } + else + { + u4_estimated_sad = ps_est_sad->au4_prev_frm_sad[I_PIC]; + } + } + } + return u4_estimated_sad; + } + else + { + return ps_est_sad->au4_prev_frm_sad[e_pic_type]; + } +} + +void irc_update_actual_sad(est_sad_t *ps_est_sad, + UWORD32 u4_actual_sad, + picture_type_e e_pic_type) +{ + ps_est_sad->au4_prev_frm_sad[e_pic_type] = u4_actual_sad; + + if(ps_est_sad->i4_use_est_intra_sad) + { + if(e_pic_type == I_PIC) + { + /* The requirement is to have two IFI before estimating I frame SAD */ + if(ps_est_sad->i4_num_ifi_encoded < 2) + ps_est_sad->i4_num_ifi_encoded++; + + /* Calculate the average SAD */ + if(ps_est_sad->i4_num_p_frm_in_cur_ifi) + { + ps_est_sad->u4_n_p_frm_ifi_avg_sad /= + ps_est_sad->i4_num_p_frm_in_cur_ifi; + } + else + { + ps_est_sad->u4_n_p_frm_ifi_avg_sad = 0; + } + /* Push the (n-1)th average SAD to the (n-2)th average SAD */ + ps_est_sad->u4_n_2_p_frm_ifi_avg_sad = + ps_est_sad->u4_n_1_p_frm_ifi_avg_sad; + /* Push the nth average SAD to the (n-1)th average SAD */ + ps_est_sad->u4_n_1_p_frm_ifi_avg_sad = + ps_est_sad->u4_n_p_frm_ifi_avg_sad; + /* Reset SAD and number of P frames */ + ps_est_sad->u4_n_p_frm_ifi_avg_sad = 0; + ps_est_sad->i4_num_p_frm_in_cur_ifi = 0; + } + else + { + ps_est_sad->u4_n_p_frm_ifi_avg_sad += u4_actual_sad; + ps_est_sad->i4_num_p_frm_in_cur_ifi++; + } + } +} + +void irc_update_actual_sad_for_intra(est_sad_t *ps_est_sad, + WORD32 i4_intra_frm_cost) +{ + if(!(ps_est_sad->i4_use_est_intra_sad)) + { + irc_update_actual_sad(ps_est_sad, i4_intra_frm_cost, I_PIC); + } +} diff --git a/encoder/irc_est_sad.h b/encoder/irc_est_sad.h new file mode 100755 index 0000000..c8238c9 --- /dev/null +++ b/encoder/irc_est_sad.h @@ -0,0 +1,64 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +#ifndef _EST_SAD_H_ +#define _EST_SAD_H_ + +/* + * "est_sad_t->i4_use_est_intra_sad" Flag to control how the I frame SAD is estimated. + * If set to zero + * - it uses the Intra sad calculated by the previous P frame as + * the estimated sad for the current I frame + * else + * - it uses the ratio of P frame sads of the previous two GOPS and + * scales the I Frame sad with this ratio to estimate the current + * I frame SAD + */ + +/* Estimating the Average SAD for the current picture type is done by: + * 1) if picture_type is I + * - Estimated SAD = (n-1)th intra frame interval(ifi) P frame Avg SAD * + * ( prev I frame SAD / (n-2)nd intra frame interval(ifi) P frame Avg SAD) + * - if only one IFI is encoded use the previous I frame SAD + * 2) if picture type is P + * - Estimate SAD is previous P frame SAD + * 3) The first P frame in a IFI could use a little better logic to decide the + * estimated SAD but currently we assume the last coded P frames SAD + a*/ + +typedef struct est_sad_t *est_sad_handle; + +WORD32 irc_est_sad_num_fill_use_free_memtab(est_sad_handle *est_sad, + itt_memtab_t *ps_memtab, + ITT_FUNC_TYPE_E e_func_type); + +void irc_init_est_sad(est_sad_handle est_sad, WORD32 i4_use_est_frame_sad); + +UWORD32 irc_get_est_sad(est_sad_handle est_sad, picture_type_e e_pic_type); + +void irc_update_actual_sad(est_sad_handle est_sad, + UWORD32 u4_actual_sad, + picture_type_e e_pic_type); + +void irc_update_actual_sad_for_intra(est_sad_handle est_sad, + WORD32 i4_intra_frm_cost); + +void irc_reset_est_sad(est_sad_handle ps_est_sad); +#endif diff --git a/encoder/irc_fixed_point_error_bits.c b/encoder/irc_fixed_point_error_bits.c new file mode 100755 index 0000000..42dcfc5 --- /dev/null +++ b/encoder/irc_fixed_point_error_bits.c @@ -0,0 +1,185 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include <stdio.h> + +/* User include files */ +#include "irc_datatypes.h" +#include "irc_common.h" +#include "irc_cntrl_param.h" +#include "irc_mem_req_and_acq.h" +#include "irc_fixed_point_error_bits.h" + +typedef struct error_bits_t +{ + /* Max tgt frm rate so that dynamic change in frm rate can be handled */ + WORD32 i4_max_tgt_frm_rate; + + /* Cur frm rate */ + WORD32 i4_cur_tgt_frm_rate; + + /* tgt frame rate*/ + WORD32 i4_tgt_frm_rate; + + /* tgt frm rate increment */ + WORD32 i4_tgt_frm_rate_incr; + + /* flag to indicate 1 second is up */ + UWORD8 u1_compute_error_bits; + + /* Bitrate/frame rate value added over a period */ + WORD32 i4_accum_bitrate; + + /* bitrate */ + WORD32 i4_bitrate; + +} error_bits_t; + +WORD32 irc_error_bits_num_fill_use_free_memtab(error_bits_t **pps_error_bits, + itt_memtab_t *ps_memtab, + ITT_FUNC_TYPE_E e_func_type) +{ + WORD32 i4_mem_tab_idx = 0; + static error_bits_t s_error_bits_temp; + + /* + * Hack for all alloc, during which we don't have any state memory. + * Dereferencing can cause issues + */ + if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB) + (*pps_error_bits) = &s_error_bits_temp; + + /* For src rate control state structure */ + if(e_func_type != GET_NUM_MEMTAB) + { + fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(error_bits_t), + ALIGN_128_BYTE, PERSISTENT, DDR); + use_or_fill_base(&ps_memtab[0], (void**)pps_error_bits, e_func_type); + } + i4_mem_tab_idx++; + + return (i4_mem_tab_idx); +} + +/******************************************************************************* + * @brief Calculates the error bits due to fixed point divisions + ******************************************************************************/ +void irc_init_error_bits(error_bits_t *ps_error_bits, + WORD32 i4_max_tgt_frm_rate, + WORD32 i4_bitrate) +{ + /* Initializing the parameters*/ + ps_error_bits->i4_cur_tgt_frm_rate = 0; + ps_error_bits->i4_max_tgt_frm_rate = i4_max_tgt_frm_rate; + + /* Value by which i4_cur_tgt_frm_rate is incremented every VOP*/ + ps_error_bits->i4_tgt_frm_rate_incr = 1000; + + /*Compute error bits is set to 1 at the end of 1 second*/ + ps_error_bits->u1_compute_error_bits = 0; + ps_error_bits->i4_tgt_frm_rate = i4_max_tgt_frm_rate; + ps_error_bits->i4_accum_bitrate = 0; + ps_error_bits->i4_bitrate = i4_bitrate; +} + +/******************************************************************************* + * @brief Updates the error state + ******************************************************************************/ +void irc_update_error_bits(error_bits_t *ps_error_bits) +{ + WORD32 i4_bits_per_frame; + + X_PROD_Y_DIV_Z(ps_error_bits->i4_bitrate, 1000, + ps_error_bits->i4_tgt_frm_rate, i4_bits_per_frame); + + /* + * This value is incremented every at the end of every VOP by + * i4_tgt_frm_rate_incr + */ + ps_error_bits->i4_cur_tgt_frm_rate += ps_error_bits->i4_tgt_frm_rate_incr; + if(ps_error_bits->u1_compute_error_bits == 1) + { + ps_error_bits->i4_accum_bitrate = 0; + } + ps_error_bits->i4_accum_bitrate += i4_bits_per_frame; + + /* + * When current tgt frm rate is equal or greater than max tgt frame rate + * 1 second is up , compute the error bits + */ + if(ps_error_bits->i4_cur_tgt_frm_rate >= ps_error_bits->i4_max_tgt_frm_rate) + { + ps_error_bits->i4_cur_tgt_frm_rate -= + ps_error_bits->i4_max_tgt_frm_rate; + ps_error_bits->u1_compute_error_bits = 1; + } + else + { + ps_error_bits->u1_compute_error_bits = 0; + } +} + +/******************************************************************************* + * @brief Returns the error bits for the current frame if there are any + * + ******************************************************************************/ +WORD32 irc_get_error_bits(error_bits_t *ps_error_bits) +{ + WORD32 i4_error_bits = 0; + + /*If 1s is up calculate error for the last 1s worth of frames*/ + if(ps_error_bits->u1_compute_error_bits == 1) + { + /*Error = Actual bitrate - bits_per_frame * num of frames*/ + i4_error_bits = ps_error_bits->i4_bitrate + - ps_error_bits->i4_accum_bitrate; + } + + return (i4_error_bits); +} + +/* ***************************************************************************** + * + * @brief Change the frame rate parameter for the error bits state + * + ******************************************************************************/ +void irc_change_frm_rate_in_error_bits(error_bits_t *ps_error_bits, + WORD32 i4_tgt_frm_rate) +{ + /* Value by which i4_cur_tgt_frm_rate is incremented every VOP*/ + ps_error_bits->i4_tgt_frm_rate_incr = (ps_error_bits->i4_max_tgt_frm_rate + * 1000) / i4_tgt_frm_rate; + ps_error_bits->i4_tgt_frm_rate = i4_tgt_frm_rate; +} + +/******************************************************************************* + * @brief Change the bitrate value for error bits module + ******************************************************************************/ +void irc_change_bitrate_in_error_bits(error_bits_t *ps_error_bits, + WORD32 i4_bitrate) +{ + ps_error_bits->i4_bitrate = i4_bitrate; +} + diff --git a/encoder/irc_fixed_point_error_bits.h b/encoder/irc_fixed_point_error_bits.h new file mode 100755 index 0000000..4ddf1eb --- /dev/null +++ b/encoder/irc_fixed_point_error_bits.h @@ -0,0 +1,64 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/*****************************************************************************/ +/* */ +/* File Name : irc_cbr_buffer_control.h */ +/* */ +/* Description : This file contains all the necessary declarations */ +/* for cbr_buffer_control functions */ +/* */ +/* */ +/* List of Functions : <List the functions defined in this file> */ +/* */ +/* Issues / Problems : None */ +/* */ +/* Revision History : */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 06 05 2008 Ittiam Draft */ +/* */ +/*****************************************************************************/ + +#ifndef FIXED_POINT_ERROR_BITS_H +#define FIXED_POINT_ERROR_BITS_H + +typedef struct error_bits_t *error_bits_handle; + +WORD32 irc_error_bits_num_fill_use_free_memtab(error_bits_handle *pps_error_bits, + itt_memtab_t *ps_memtab, + ITT_FUNC_TYPE_E e_func_type); + +void irc_init_error_bits(error_bits_handle ps_error_bits, + WORD32 i4_max_tgt_frm_rate, + WORD32 i4_bitrate); + +void irc_update_error_bits(error_bits_handle ps_error_bits); + +WORD32 irc_get_error_bits(error_bits_handle ps_error_bits); + +void irc_change_frm_rate_in_error_bits(error_bits_handle ps_error_bits, + WORD32 i4_tgt_frm_rate); + +void irc_change_bitrate_in_error_bits(error_bits_handle ps_error_bits, + WORD32 i4_bitrate); + +#endif + diff --git a/encoder/irc_frame_info_collector.c b/encoder/irc_frame_info_collector.c new file mode 100755 index 0000000..65f24c4 --- /dev/null +++ b/encoder/irc_frame_info_collector.c @@ -0,0 +1,177 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/******************************************************************************/ +/* File Includes */ +/******************************************************************************/ + +/* User include files */ +#include "irc_datatypes.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" + +void irc_init_frame_info(frame_info_t *frame_info) +{ + WORD32 i; + + for(i = 0; i < MAX_MB_TYPE; i++) + { + frame_info->mb_header_bits[i] = 0; + frame_info->tot_mb_sad[i] = 0; + frame_info->num_mbs[i] = 0; + frame_info->qp_sum[i] = 0; + frame_info->mb_texture_bits[i] = 0; + } + + frame_info->other_header_bits = 0; + frame_info->activity_sum = 0; + frame_info->intra_mb_cost_sum = 0; +} + +/****************************************************************************** + * GET Functions: Sending back collected information to the rate control module + ******************************************************************************/ +WORD32 irc_fi_get_total_header_bits(frame_info_t *frame_info) +{ + WORD32 total_header_bits = 0, i; + + for(i = 0; i < MAX_MB_TYPE; i++) + { + total_header_bits += frame_info->mb_header_bits[i]; + } + total_header_bits += frame_info->other_header_bits; + + return (total_header_bits); +} + +WORD32 irc_fi_get_total_texture_bits(frame_info_t *frame_info) +{ + WORD32 total_texture_bits = 0, i; + + for(i = 0; i < MAX_MB_TYPE; i++) + { + total_texture_bits += frame_info->mb_texture_bits[i]; + } + + return (total_texture_bits); +} + +WORD32 irc_fi_get_total_frame_sad(frame_info_t *frame_info) +{ + WORD32 total_sad = 0, i; + + for(i = 0; i < MAX_MB_TYPE; i++) + { + total_sad += frame_info->tot_mb_sad[i]; + } + + return (total_sad); +} + +WORD32 irc_fi_get_average_qp(frame_info_t *frame_info) +{ + WORD32 i, total_qp = 0, total_mbs = 0; + + for(i = 0; i < MAX_MB_TYPE; i++) + { + total_qp += frame_info->qp_sum[i]; + total_mbs += frame_info->num_mbs[i]; + } + + if(total_mbs) + { + return (total_qp / total_mbs); + } + else + { + return 0; + } +} + +WORD32 irc_fi_get_avg_mb_header(frame_info_t *frame_info, UWORD8 mb_type) +{ + if(frame_info->num_mbs[mb_type]) + { + return (frame_info->mb_header_bits[mb_type] + / frame_info->num_mbs[mb_type]); + } + else + { + return 0; + } +} + +WORD32 irc_fi_get_total_mb_texture_bits(frame_info_t *frame_info, + UWORD8 mb_type) +{ + return (frame_info->mb_texture_bits[mb_type]); +} + +WORD32 irc_fi_get_total_mb_sad(frame_info_t *frame_info, UWORD8 mb_type) +{ + return (frame_info->tot_mb_sad[mb_type]); +} + +WORD32 irc_fi_get_total_mb_qp(frame_info_t *frame_info, UWORD8 mb_type) +{ + if(frame_info->num_mbs[mb_type]) + { + return (frame_info->qp_sum[mb_type]); + } + else + { + return 0; + } +} + +WORD32 irc_fi_get_total_mb(frame_info_t *frame_info, UWORD8 mb_type) +{ + return (frame_info->num_mbs[mb_type]); +} + +WORD32 irc_fi_get_num_intra_mb(frame_info_t *frame_info) +{ + return (frame_info->num_mbs[MB_TYPE_INTRA]); +} + +WORD32 irc_fi_get_avg_activity(frame_info_t *frame_info) +{ + WORD32 i; + WORD32 i4_tot_mbs = 0; + + for(i = 0; i < MAX_MB_TYPE; i++) + { + i4_tot_mbs += frame_info->num_mbs[i]; + } + + if(i4_tot_mbs) + { + return (frame_info->activity_sum / i4_tot_mbs); + } + else + { + return 0; + } +} + +WORD32 irc_fi_get_total_intra_mb_cost(frame_info_t *frame_info) +{ + return (frame_info->intra_mb_cost_sum); +} diff --git a/encoder/irc_frame_info_collector.h b/encoder/irc_frame_info_collector.h new file mode 100755 index 0000000..58dc467 --- /dev/null +++ b/encoder/irc_frame_info_collector.h @@ -0,0 +1,109 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +#ifndef _FRAME_INFO_COLLECTOR_H_ +#define _FRAME_INFO_COLLECTOR_H_ + +typedef struct +{ + /* Number of MBs in each type */ + WORD32 num_mbs[MAX_MB_TYPE]; + + /* Sum of all MB SADs of each MB type */ + WORD32 tot_mb_sad[MAX_MB_TYPE]; + + /* Sum of QPs for each mb type */ + WORD32 qp_sum[MAX_MB_TYPE]; + + /* Header bits consumed other than MB headers */ + WORD32 other_header_bits; + + /* Header bits consumed for each type of MBs */ + WORD32 mb_header_bits[MAX_MB_TYPE]; + + /* Texture bits consumed for each type of MBs */ + WORD32 mb_texture_bits[MAX_MB_TYPE]; + + /* Sum of all MB activity */ + WORD32 activity_sum; + + /* Sum of all the Intra MB cost values for the entire frame */ + WORD32 intra_mb_cost_sum; + +} frame_info_t; + +void irc_init_frame_info(frame_info_t *frame_info); + +/* + * Update functions: Collecting information from encoder + */ +#define FI_UPDATE_OTHER_HEADER_BITS(frame_info,header_bits)\ + {(frame_info)->other_header_bits += (header_bits);} + +#define FI_UPDATE_MB_HEADER(frame_info,header_bits,mb_type)\ + {(frame_info)->mb_header_bits[(mb_type)] += (header_bits);} + +#define FI_UPDATE_MB_TEXTURE(frame_info,texture_bits,mb_type)\ + {(frame_info)->mb_texture_bits[(mb_type)] += (texture_bits);} + +#define FI_UPDATE_MB_SAD(frame_info,mb_sad,mb_type)\ + {(frame_info)->tot_mb_sad[(mb_type)] += (mb_sad);} + +#define FI_UPDATE_MB_QP(frame_info,qp,mb_type)\ + {(frame_info)->qp_sum[(mb_type)] += (qp);(frame_info)->num_mbs[(mb_type)]++;} + +#define FI_UPDATE_ACTIVITY(frame_info,mb_activity)\ + {(frame_info)->activity_sum += (mb_activity);} + +#define FI_UPDATE_INTRA_MB_COST(frame_info,intra_mb_cost)\ + {(frame_info)->intra_mb_cost_sum += (intra_mb_cost);} + +/* + * GET Functions: Sending back collected information to the rate control module + */ + +/* Frame Level Model Information */ +WORD32 irc_fi_get_total_header_bits(frame_info_t *frame_info); + +WORD32 irc_fi_get_total_texture_bits(frame_info_t *frame_info); + +WORD32 irc_fi_get_average_qp(frame_info_t *frame_info); + +WORD32 irc_fi_get_total_frame_sad(frame_info_t *frame_info); + +WORD32 irc_fi_get_avg_activity(frame_info_t *frame_info); + +/* Number of Intra MBs for Scene Change Detection */ +WORD32 irc_fi_get_num_intra_mb(frame_info_t *frame_info); + +/* MB Level Model Information */ +WORD32 irc_fi_get_avg_mb_header(frame_info_t *frame_info, UWORD8 mb_type); + +WORD32 irc_fi_get_total_mb_texture_bits(frame_info_t *frame_info, + UWORD8 mb_type); + +WORD32 irc_fi_get_total_mb_sad(frame_info_t *frame_info, UWORD8 mb_type); + +WORD32 irc_fi_get_total_mb_qp(frame_info_t *frame_info, UWORD8 mb_type); + +WORD32 irc_fi_get_total_mb(frame_info_t *frame_info, UWORD8 mb_type); + +WORD32 irc_fi_get_total_intra_mb_cost(frame_info_t *frame_info); +#endif diff --git a/encoder/irc_mb_model_based.c b/encoder/irc_mb_model_based.c new file mode 100755 index 0000000..880ee19 --- /dev/null +++ b/encoder/irc_mb_model_based.c @@ -0,0 +1,157 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* User include files */ +#include "irc_datatypes.h" +#include "irc_cntrl_param.h" +#include "irc_mem_req_and_acq.h" +#include "irc_mb_model_based.h" + +typedef struct mb_rate_control_t +{ + /* Frame Qp */ + UWORD8 u1_frm_qp; + + /* + * Estimated average activity for the current frame (updated with the + * previous frame activity since it is independent of picture type whether + * it is I or P) + */ + WORD32 i4_avg_activity; + +} mb_rate_control_t; + +WORD32 irc_mbrc_num_fill_use_free_memtab(mb_rate_control_t **pps_mb_rate_control, + itt_memtab_t *ps_memtab, + ITT_FUNC_TYPE_E e_func_type) +{ + WORD32 i4_mem_tab_idx = 0; + static mb_rate_control_t s_mb_rate_control_temp; + + /* + * Hack for al alloc, during which we don't have any state memory. + * Dereferencing can cause issues + */ + if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB) + { + (*pps_mb_rate_control) = &s_mb_rate_control_temp; + } + + /*For src rate control state structure*/ + if(e_func_type != GET_NUM_MEMTAB) + { + fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(mb_rate_control_t), + ALIGN_128_BYTE, PERSISTENT, DDR); + use_or_fill_base(&ps_memtab[0], (void**)pps_mb_rate_control, + e_func_type); + } + i4_mem_tab_idx++; + + return (i4_mem_tab_idx); +} + +/******************************************************************************* + MB LEVEL API FUNCTIONS + ******************************************************************************/ + +/****************************************************************************** + Description : Initialize the mb model and the average activity to default + values + ******************************************************************************/ +void irc_init_mb_level_rc(mb_rate_control_t *ps_mb_rate_control) +{ + /* Set values to default */ + ps_mb_rate_control->i4_avg_activity = 0; +} + +/****************************************************************************** + Description : Initialize the mb state with frame level decisions + *********************************************************************************/ +void irc_mb_init_frame_level(mb_rate_control_t *ps_mb_rate_control, + UWORD8 u1_frame_qp) +{ + /* Update frame level QP */ + ps_mb_rate_control->u1_frm_qp = u1_frame_qp; +} + +/****************************************************************************** + Description : Reset the mb activity - Whenever there is SCD + the mb activity is reset + *********************************************************************************/ +void irc_reset_mb_activity(mb_rate_control_t *ps_mb_rate_control) +{ + ps_mb_rate_control->i4_avg_activity = 0; +} + +/****************************************************************************** + Description : Calculates the mb level qp + *********************************************************************************/ +void irc_get_mb_qp(mb_rate_control_t *ps_mb_rate_control, + WORD32 i4_cur_mb_activity, + WORD32 *pi4_mb_qp) +{ + WORD32 i4_qp; + /* Initialize the mb level qp with the frame level qp */ + i4_qp = ps_mb_rate_control->u1_frm_qp; + + /* + * Store the model based QP - This is used for updating the rate control model + */ + pi4_mb_qp[0] = i4_qp; + + /* Modulate the Qp based on the activity */ + if((ps_mb_rate_control->i4_avg_activity) && (i4_qp < 100)) + { + i4_qp =((((2 * i4_cur_mb_activity)) + + ps_mb_rate_control->i4_avg_activity)* i4_qp + + ((i4_cur_mb_activity + 2 * ps_mb_rate_control->i4_avg_activity) + >> 1))/ (i4_cur_mb_activity + 2 * ps_mb_rate_control->i4_avg_activity); + + if(i4_qp > ((3 * ps_mb_rate_control->u1_frm_qp) >> 1)) + { + i4_qp = ((3 * ps_mb_rate_control->u1_frm_qp) >> 1); + } + } + + /* Store the qp modulated by mb activity - This is used for encoding the MB */ + pi4_mb_qp[1] = i4_qp; +} + +/******************************************************************************* + Description : Returns the stored frame level QP + ******************************************************************************/ +UWORD8 irc_get_frm_level_qp(mb_rate_control_t *ps_mb_rate_control) +{ + return (ps_mb_rate_control->u1_frm_qp); +} + +/******************************************************************************* + Description : Update the frame level info collected + ******************************************************************************/ +void irc_mb_update_frame_level(mb_rate_control_t *ps_mb_rate_control, + WORD32 i4_avg_activity) +{ + /* Update the Average Activity */ + ps_mb_rate_control->i4_avg_activity = i4_avg_activity; +} diff --git a/encoder/irc_mb_model_based.h b/encoder/irc_mb_model_based.h new file mode 100755 index 0000000..aad520a --- /dev/null +++ b/encoder/irc_mb_model_based.h @@ -0,0 +1,57 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +#ifndef _MB_MODEL_BASED_H_ +#define _MB_MODEL_BASED_H_ + +typedef struct mb_rate_control_t *mb_rate_control_handle; + +WORD32 irc_mbrc_num_fill_use_free_memtab(mb_rate_control_handle *pps_mb_rate_control, + itt_memtab_t *ps_memtab, + ITT_FUNC_TYPE_E e_func_type); + +/* Initializing the state structure */ +void irc_init_mb_level_rc(mb_rate_control_handle ps_mb_rate_control); + +/* MB parameters that are to be initialized at a frame level */ +void irc_mb_init_frame_level(mb_rate_control_handle ps_mb_rate_control, + UWORD8 u1_frame_qp); + +/* MB Level call to get the mb_level QP */ +void irc_get_mb_qp(mb_rate_control_handle ps_mb_rate_control, + WORD32 i4_cur_mb_activity, + WORD32 *pi4_mb_qp); + +/* MB Parameters that are to be updated at a frame level */ +void irc_mb_update_frame_level(mb_rate_control_handle ps_mb_rate_control, + WORD32 i4_avg_activity); + +/**************************************************************************** + CONTROL FUCNTIONS FROM FRAME LEVEL + ****************************************************************************/ + +/* Returns the stored frame level QP */ +UWORD8 irc_get_frm_level_qp(mb_rate_control_handle ps_mb_rate_control); + +/* Disables activity based qp modulation */ +void irc_reset_mb_activity(mb_rate_control_handle ps_mb_rate_control); + +#endif + diff --git a/encoder/irc_mem_req_and_acq.h b/encoder/irc_mem_req_and_acq.h new file mode 100755 index 0000000..a2946a7 --- /dev/null +++ b/encoder/irc_mem_req_and_acq.h @@ -0,0 +1,179 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +****************************************************************************** +* @file +* ih264e_rc_mem_interface.h +* +* @brief +* This file contains function declaration and structures for rate control +* memtabs +* +* @author +* ittiam +* +* @remarks +* The rate control library is a global library across various codecs. It +* anticipates certain structures definitions. Those definitions are to be +* imported from global workspace. Instead of that, the structures needed for +* rc library are copied in to this file and exported to rc library. If the +* structures / enums / ... in the global workspace change, this file also needs +* to be modified accordingly. +* +****************************************************************************** +*/ +#ifndef IH264E_RC_MEM_INTERFACE_H_ +#define IH264E_RC_MEM_INTERFACE_H_ + + +/*****************************************************************************/ +/* Function Macros */ +/*****************************************************************************/ + +#define FILL_MEMTAB(m_pv_mem_rec, m_j, m_mem_size, m_align, m_type) \ +{ \ + m_pv_mem_rec[m_j].u4_size = sizeof(iv_mem_rec_t); \ + m_pv_mem_rec[m_j].u4_mem_size = m_mem_size; \ + m_pv_mem_rec[m_j].u4_mem_alignment = m_align; \ + m_pv_mem_rec[m_j].e_mem_type = m_type; \ +} + +/*****************************************************************************/ +/* Enums */ +/*****************************************************************************/ +typedef enum +{ + ALIGN_BYTE = 1, + ALIGN_WORD16 = 2, + ALIGN_WORD32 = 4, + ALIGN_WORD64 = 8, + ALIGN_128_BYTE = 128 +}ITT_MEM_ALIGNMENT_TYPE_E; + +typedef enum +{ + SCRATCH = 0, + PERSISTENT = 1, + WRITEONCE = 2 +}ITT_MEM_USAGE_TYPE_E; + +typedef enum +{ + L1D = 0, + SL2 = 1, + DDR = 3 +}ITT_MEM_REGION_E; + +typedef enum +{ + GET_NUM_MEMTAB = 0, + FILL_MEMTAB = 1, + USE_BASE = 2, + FILL_BASE =3 +}ITT_FUNC_TYPE_E; + + +/*****************************************************************************/ +/* Structures */ +/*****************************************************************************/ + +/*NOTE : This should be an exact replica of IALG_MemRec, any change in IALG_MemRec + must be replicated here*/ +typedef struct +{ + /* Size in bytes */ + UWORD32 u4_size; + + /* Alignment in bytes */ + WORD32 i4_alignment; + + /* decides which memory region to be placed */ + ITT_MEM_REGION_E e_mem_region; + + /* memory is scratch or persistent */ + ITT_MEM_USAGE_TYPE_E e_usage; + + /* Base pointer for allocated memory */ + void *pv_base; +} itt_memtab_t; + + +/*****************************************************************************/ +/* Extern Function Declarations */ +/*****************************************************************************/ + +/** +****************************************************************************** +* +* @brief This function fills memory record attributes +* +* @par Description +* This function fills memory record attributes +* +* @param[in] ps_mem_tab +* pointer to mem records +* +* @param[in] u4_size +* size of the record +* +* @param[in] i4_alignment +* memory alignment size +* +* @param[in] e_usage +* usage +* +* @param[in] e_mem_region +* mem region +* +* @return void +* +****************************************************************************** +*/ +void fill_memtab(itt_memtab_t *ps_mem_tab, WORD32 u4_size, WORD32 i4_alignment, + ITT_MEM_USAGE_TYPE_E e_usage, ITT_MEM_REGION_E e_mem_region); + +/** +****************************************************************************** +* +* @brief This function fills memory record attributes +* +* @par Description +* This function fills memory record attributes +* +* @param[in] ps_mem_tab +* pointer to mem records +* +* @param[in] ptr_to_be_filled +* handle to the memory record storage space +* +* @param[in] e_func_type +* enum that dictates fill memory records or use memory records +* +* @return void +* +****************************************************************************** +*/ +WORD32 use_or_fill_base(itt_memtab_t *ps_mem_tab, void **ptr_to_be_filled, + ITT_FUNC_TYPE_E e_func_type); + + +#endif // IH264E_RC_MEM_INTERFACE_H_ + diff --git a/encoder/irc_picture_type.c b/encoder/irc_picture_type.c new file mode 100755 index 0000000..186188c --- /dev/null +++ b/encoder/irc_picture_type.c @@ -0,0 +1,1585 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include "stdio.h" +#include "string.h" + +/* User include files */ +#include "irc_datatypes.h" +#include "irc_cntrl_param.h" +#include "irc_mem_req_and_acq.h" +#include "irc_picture_type.h" +#include "irc_trace_support.h" + +#define MAX_INTER_FRM_INT 10 + +/******************************Pic_details ************************************/ +typedef struct +{ + /* The id sent by the codec */ + WORD32 i4_pic_id; + + /* The pics come in, in this order */ + WORD32 i4_pic_disp_order_no; + + /* I,P,B */ + picture_type_e e_pic_type; + +} pic_details_t; + +/**************************Pic_handling structure *****************************/ +typedef struct pic_handling_t +{ + /*************************************************************************** + * Inputs from the codec + **************************************************************************/ + + /* Number of frames after which an I frame will repeat in display order */ + WORD32 i4_intra_frm_int; + + /* (num_b_pics_in_subgop + 1) */ + WORD32 i4_inter_frm_int; + + /* After these many buffered frames, the pics are encoded */ + WORD32 i4_max_inter_frm_int; + + /* OPEN or CLOSED */ + WORD32 i4_is_gop_closed; + + /* The pic stack */ + /* Stack used to store the input pics in encode order */ + pic_details_t as_pic_stack[MAX_INTER_FRM_INT + 2]; + + /*************************************************************************** + * Counters + **************************************************************************/ + + /* Decides whether a B or ref pic */ + WORD32 i4_buf_pic_no; + + /* Current pic's number in displayed, and gets reset after an I-frm */ + WORD32 i4_pic_disp_order_no; + + /* Number of P frms that have come, in the current gop, so far */ + WORD32 i4_p_count_in_gop; + + /* Number of B frms that have come, in the current gop, so far */ + WORD32 i4_b_count_in_gop; + + /* Number of B frms that have come, in the current subgop, so far */ + WORD32 i4_b_count_in_subgop; + + /*************************************************************************** + * Indices to the pic stack (Since we store the pics in the encode order, + * these vars are modified to meet that) + **************************************************************************/ + + /* B_PIC index */ + WORD32 i4_b_pic_idx; + + /* I,P PIC index */ + WORD32 i4_ref_pic_idx; + + /*************************************************************************** + * Variables operating on the input pics + **************************************************************************/ + + /* Flag denoting whether it's the first gop or not */ + WORD32 i4_is_first_gop; + + /* Number of B_PICs in an incomplete subgop */ + WORD32 i4_b_in_incomp_subgop; + + /* In CLOSED_GOPs, even if inter_frm_int > 1, there can be 2 continous + * P_PICs at the GOP end. This takes values of 0 or 1 */ + WORD32 i4_extra_p; + + /*************************************************************************** + * Arrays storing the number of frms in the gop + **************************************************************************/ + + /* In the steady state, what's the pic distribution in display order */ + WORD32 i4_frms_in_gop[MAX_PIC_TYPE]; + + /* + * In case of a change in inter frm int call, the pic distribution in + * that gop in display order + */ + WORD32 i4_frms_in_cur_gop[MAX_PIC_TYPE]; + + /* + * This is used to denote the number of frms remaining to be encoded in the + * current gop + */ + WORD32 i4_rem_frms_in_gop[MAX_PIC_TYPE]; + + /*************************************************************************** + * Variables operating on the output pics + **************************************************************************/ + + /* Counts the frms encoded in a gop */ + WORD32 i4_coded_pic_no; + + /* Counts from the start of stack to the end repeatedly */ + WORD32 i4_stack_count; + + /*************************************************************************** + * Tracking a change in the inputs from the codec + **************************************************************************/ + + /* A flag that is set when the codec calls for a change in inter_frm_int */ + WORD32 i4_change_in_inter_frm_int; + + /* + * When a change_in_inter_frm_int is called, this stores the new + * inter_frm_int + */ + WORD32 i4_new_inter_frm_int; + + /* + * When a change_in_inter_frm_int is called in the middle of a gop,this + * stores the B_PICs in the incomplete subgop of the mixed gop + */ + WORD32 i4_b_in_incomp_subgop_mix_gop; + + /* + * For a CLOSED GOP, when a change_in_inter_frm_int is called in the middle + * of a gop,this is a flag denoting if there is an extra P_PIC in the mixed + * gop + */ + WORD32 i4_extra_p_mix_gop; + + /* A flag that is set when the codec calls for a change in intra_frm_int */ + WORD32 i4_change_in_intra_frm_int; + + /* + * When a change_in_intra_frm_int is called, this stores the new + * intra_frm_int + */ + WORD32 i4_new_intra_frm_int; + + /*************************************************************************** + * Previous pic_stack_indices & details + **************************************************************************/ + pic_details_t s_prev_pic_details; + + WORD32 i4_prev_b_pic_idx; + + WORD32 i4_last_frm_in_gop; + + WORD32 i4_first_gop_encoded; + + /* NITT TBR */ + picture_type_e e_previous_pic_type; + + WORD32 i4_force_I_frame; + + WORD32 i4_forced_I_frame_cur_frame; + + WORD32 i4_sum_remaining_frm_in_gop; + + WORD32 i4_mod_temp_ref_cnt; + + WORD32 i4_frames_in_fif_gop; + + WORD32 i4_prev_intra_frame_interval; + +} pic_handling_t; + +static void irc_update_pic_distbn(pic_handling_t *ps_pic_handling, + WORD32 i4_intra_frm_int, + WORD32 i4_inter_frm_int, + WORD32 i4_gop_boundary); + +static void find_pic_distbn_in_gop(WORD32 i4_frms_in_gop[MAX_PIC_TYPE], + WORD32 i4_intra_frm_int, + WORD32 i4_inter_frm_int, + WORD32 i4_is_gop_closed, + WORD32 *pi4_b_in_incomp_subgop, + WORD32 *pi4_extra_p); + +WORD32 irc_pic_handling_num_fill_use_free_memtab(pic_handling_t **pps_pic_handling, + itt_memtab_t *ps_memtab, + ITT_FUNC_TYPE_E e_func_type) +{ + WORD32 i4_mem_tab_idx = 0; + static pic_handling_t s_pic_handling_temp; + + /* + * Hack for al alloc, during which we dont have any state memory. + * Dereferencing can cause issues + */ + if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB) + { + (*pps_pic_handling) = &s_pic_handling_temp; + } + + /*for src rate control state structure*/ + if(e_func_type != GET_NUM_MEMTAB) + { + fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(pic_handling_t), + ALIGN_128_BYTE, PERSISTENT, DDR); + use_or_fill_base(&ps_memtab[0], (void**)pps_pic_handling, e_func_type); + } + i4_mem_tab_idx++; + + return (i4_mem_tab_idx); +} + +/****************************************************************************** + Description : initializes the pic handling state struct + *****************************************************************************/ +void irc_init_pic_handling(pic_handling_t *ps_pic_handling, + WORD32 i4_intra_frm_int, + WORD32 i4_max_inter_frm_int, + WORD32 i4_is_gop_closed) +{ + /* Declarations */ + WORD32 i; + + /* Checks */ + /* Codec Parameters */ + ps_pic_handling->i4_intra_frm_int = i4_intra_frm_int; + ps_pic_handling->i4_inter_frm_int = i4_max_inter_frm_int; + ps_pic_handling->i4_max_inter_frm_int = i4_max_inter_frm_int; + ps_pic_handling->i4_is_gop_closed = i4_is_gop_closed; + + /* Pic_stack */ + memset(ps_pic_handling->as_pic_stack, 0, + sizeof(ps_pic_handling->as_pic_stack)); + memset(&ps_pic_handling->s_prev_pic_details, 0, + sizeof(ps_pic_handling->s_prev_pic_details)); + + /* Counters */ + ps_pic_handling->i4_buf_pic_no = 0; + ps_pic_handling->i4_pic_disp_order_no = 0; + + /* Indices to the pic_stack */ + ps_pic_handling->i4_ref_pic_idx = 0; + ps_pic_handling->i4_b_pic_idx = 2; + ps_pic_handling->i4_prev_b_pic_idx = 2; + + /* Variables working on the input frames */ + ps_pic_handling->i4_is_first_gop = 1; + ps_pic_handling->i4_p_count_in_gop = 0; + ps_pic_handling->i4_b_count_in_gop = 0; + ps_pic_handling->i4_b_count_in_subgop = 0; + + /* Variables working on the output frames */ + ps_pic_handling->i4_coded_pic_no = -1; + ps_pic_handling->i4_stack_count = -1; + + /* Tracks the changes in the Codec Parameters */ + ps_pic_handling->i4_change_in_inter_frm_int = 0; + ps_pic_handling->i4_new_inter_frm_int = i4_max_inter_frm_int; + + /* Tracks the changes in the Codec Parameters */ + ps_pic_handling->i4_change_in_intra_frm_int = 0; + ps_pic_handling->i4_new_intra_frm_int = i4_intra_frm_int; + + /* Variables on which the bit allocation is dependent */ + /* Get the pic distribution in the gop */ + find_pic_distbn_in_gop(ps_pic_handling->i4_frms_in_gop, i4_intra_frm_int, + i4_max_inter_frm_int, i4_is_gop_closed, + &ps_pic_handling->i4_b_in_incomp_subgop, + &ps_pic_handling->i4_extra_p); + + for(i = 0; i < MAX_PIC_TYPE; i++) + { + ps_pic_handling->i4_frms_in_cur_gop[i] = + ps_pic_handling->i4_frms_in_gop[i]; + ps_pic_handling->i4_rem_frms_in_gop[i] = + ps_pic_handling->i4_frms_in_gop[i]; + } + + ps_pic_handling->e_previous_pic_type = I_PIC; + ps_pic_handling->i4_prev_intra_frame_interval = i4_intra_frm_int; + ps_pic_handling->i4_force_I_frame = 0; + ps_pic_handling->i4_forced_I_frame_cur_frame = 0; + ps_pic_handling->i4_sum_remaining_frm_in_gop = 0; + ps_pic_handling->i4_mod_temp_ref_cnt = 0; + + ps_pic_handling->i4_b_in_incomp_subgop_mix_gop = + ps_pic_handling->i4_b_in_incomp_subgop; + ps_pic_handling->i4_extra_p_mix_gop = ps_pic_handling->i4_extra_p; + + ps_pic_handling->i4_last_frm_in_gop = 0; + ps_pic_handling->i4_first_gop_encoded = 0; + ps_pic_handling->i4_frames_in_fif_gop = 0; + +} + +/******************************************************************************* + * @brief registers the new intra frame interval value + ******************************************************************************/ +void irc_pic_handling_register_new_int_frm_interval(pic_handling_t *ps_pic_handling, + WORD32 i4_intra_frm_int) +{ + ps_pic_handling->i4_change_in_intra_frm_int = 1; + ps_pic_handling->i4_new_intra_frm_int = i4_intra_frm_int; +} + +void irc_pic_handling_register_new_inter_frm_interval(pic_handling_t *ps_pic_handling, + WORD32 i4_inter_frm_int) +{ + /* Update the state structure with the latest values */ + ps_pic_handling->i4_change_in_inter_frm_int = 1; + ps_pic_handling->i4_new_inter_frm_int = i4_inter_frm_int; +} + +static void start_new_gop(pic_handling_t *ps_pic_handling) +{ + WORD32 i; + WORD32 i4_sum_remaining_frm_in_gop = 0; + + /* Now, the end of gop updates */ + ps_pic_handling->i4_pic_disp_order_no = 0; + ps_pic_handling->i4_buf_pic_no = 0; + ps_pic_handling->i4_is_first_gop = 0; + ps_pic_handling->i4_extra_p_mix_gop = ps_pic_handling->i4_extra_p; + + if(ps_pic_handling->i4_is_gop_closed) + { + ps_pic_handling->i4_b_in_incomp_subgop_mix_gop = + ps_pic_handling->i4_b_in_incomp_subgop; + } + /* + * Store the number of frames in the gop that is encoded till now + * just before Force I frame call is made + */ + ps_pic_handling->i4_frames_in_fif_gop = ps_pic_handling->i4_b_count_in_gop + + ps_pic_handling->i4_p_count_in_gop + 1; + for(i = 0; i < MAX_PIC_TYPE; i++) + { + i4_sum_remaining_frm_in_gop += ps_pic_handling->i4_rem_frms_in_gop[i]; + } + ps_pic_handling->i4_sum_remaining_frm_in_gop = i4_sum_remaining_frm_in_gop; + for(i = 0; i < MAX_PIC_TYPE; i++) + { + ps_pic_handling->i4_frms_in_cur_gop[i] = + ps_pic_handling->i4_frms_in_gop[i]; + ps_pic_handling->i4_rem_frms_in_gop[i] = + ps_pic_handling->i4_frms_in_cur_gop[i]; + } +} + +/******************************************************************************* + * @brief Fills the pic_stack with the incoming pics in encode order + ******************************************************************************/ +void irc_add_pic_to_stack(pic_handling_t *ps_pic_handling, WORD32 i4_enc_pic_id) +{ + /* Declarations */ + WORD32 i4_inter_frm_int, i4_max_inter_frm_int, + i4_intra_frm_int, i4_new_inter_frm_int; + WORD32 i4_is_gop_closed; + WORD32 i4_buf_pic_no, i4_pic_disp_order_no; + WORD32 i4_b_pic_idx, i4_ref_pic_idx; + WORD32 i4_is_first_gop, i4_b_in_incomp_subgop, i4_p_count_in_gop, + i4_b_count_in_gop, i4_b_count_in_subgop; + WORD32 i, i4_p_frms_in_prd, i4_b_frms_in_prd, + i4_num_b_in_subgop, i4_extra_p; + WORD32 i4_condn_for_change_in_inter_frm_int; + picture_type_e e_previous_pic_type, e_cur_pic_type; + WORD32 i4_force_I_frame; + + /* + * Initialize the local vars with the state struct values needed by the + * change calls + */ + i4_intra_frm_int = ps_pic_handling->i4_intra_frm_int; + i4_inter_frm_int = ps_pic_handling->i4_inter_frm_int; + i4_max_inter_frm_int = ps_pic_handling->i4_max_inter_frm_int; + i4_is_gop_closed = ps_pic_handling->i4_is_gop_closed; + + i4_buf_pic_no = ps_pic_handling->i4_buf_pic_no; + i4_pic_disp_order_no = ps_pic_handling->i4_pic_disp_order_no; + i4_b_count_in_gop = ps_pic_handling->i4_b_count_in_gop; + i4_b_frms_in_prd = ps_pic_handling->i4_frms_in_cur_gop[B_PIC]; + i4_is_first_gop = ps_pic_handling->i4_is_first_gop; + i4_new_inter_frm_int = ps_pic_handling->i4_new_inter_frm_int; + e_previous_pic_type = ps_pic_handling->e_previous_pic_type; + i4_force_I_frame = ps_pic_handling->i4_force_I_frame; + + /* Force I frame : + * Two different cases + * 1)OPEN_GOP: New GOP is started after number of B pictures in the last + * sub gop of a gop to mimic the GOP structure. + * 2)Closed GOP:Wait till P frame at input and The frame after a P frame + * a new GOP is started to mimic the GOP structure. + */ + if(i4_force_I_frame) + { + WORD32 i4_temp_is_gop_closed; + WORD32 i4_codn = 0; + /* A special case of Open GOP where the it behaves like Closed GOP*/ + if((i4_intra_frm_int % i4_inter_frm_int) == 1) + { + i4_temp_is_gop_closed = 1; + } + else + { + i4_temp_is_gop_closed = i4_is_gop_closed; + } + /* Get the current picture type to aid decision to force an I frame*/ + if((i4_buf_pic_no % i4_inter_frm_int) + && !(i4_is_gop_closed&& (i4_b_count_in_gop == i4_b_frms_in_prd))) + { + e_cur_pic_type = B_PIC; + } + else + { + if(i4_pic_disp_order_no == 0) + { + e_cur_pic_type = I_PIC; + } + else + { + e_cur_pic_type = P_PIC; + } + } + if((i4_intra_frm_int % i4_inter_frm_int) == 0) + { + i4_codn = (e_cur_pic_type == P_PIC); + } + else + { + i4_codn = (ps_pic_handling->i4_b_count_in_subgop + == ps_pic_handling->i4_b_in_incomp_subgop); + } + if(e_cur_pic_type == I_PIC) + { + /* + * Don't do anything. Resetting the force I frame flag + * since the current picture type is already a I frame + */ + i4_force_I_frame = 0; + } + else if(i4_inter_frm_int == 1) + { + /*IPP case , Force I frame immediately*/ + start_new_gop(ps_pic_handling); + } + else if((!i4_temp_is_gop_closed) && i4_codn) + { + start_new_gop(ps_pic_handling); + if(ps_pic_handling->i4_b_count_in_subgop) + { + ps_pic_handling->i4_b_pic_idx += 1; + ps_pic_handling->i4_b_pic_idx %= (i4_max_inter_frm_int + 1); + } + } + else if(i4_temp_is_gop_closed && (e_previous_pic_type == P_PIC) + && (e_cur_pic_type != P_PIC)) + { + start_new_gop(ps_pic_handling); + ps_pic_handling->i4_b_pic_idx++; + ps_pic_handling->i4_b_pic_idx %= (i4_max_inter_frm_int + 1); + } + i4_is_first_gop = ps_pic_handling->i4_is_first_gop; + } + + + /***********************CHANGE_INTRA_FRM_INTERVAL************************** + * + * Call the irc_update_pic_distbn if + * 1)Change in intra frm interval flag is set + * 2)It's the first B_PIC of a gop + */ + if((ps_pic_handling->i4_change_in_intra_frm_int == 1) + && ((i4_pic_disp_order_no == 1))) + { + irc_update_pic_distbn(ps_pic_handling, + ps_pic_handling->i4_new_intra_frm_int, + ps_pic_handling->i4_inter_frm_int, 1); + + ps_pic_handling->i4_change_in_intra_frm_int = 0; + + if(ps_pic_handling->i4_new_intra_frm_int == 1) + { + ps_pic_handling->i4_pic_disp_order_no = 0; + } + } + /*********************CHANGE_INTER_FRM_INTERVAL****************************/ + /* Call irc_update_pic_distbn if + * 1)Change in inter frm interval flag is set + * 2)It's the first B_PIC after gop/subgop start, and + * 3)The new inter-frm-interval won't cross the intra_frm_interval + */ + if((ps_pic_handling->i4_change_in_inter_frm_int == 1) + && ((i4_buf_pic_no % i4_inter_frm_int == 1) + || (i4_pic_disp_order_no == 1) || (i4_inter_frm_int == 1))) + { + /* + * Condition which checks if the new inter_frm_int will cross the + * intra_frm_int + */ + i4_condn_for_change_in_inter_frm_int = ((i4_pic_disp_order_no + + i4_new_inter_frm_int - 1) < i4_intra_frm_int); + + if(i4_condn_for_change_in_inter_frm_int) + { + /*If the inter_frm_int = 1, then the b_pic_idx needs to be modified */ + if(i4_inter_frm_int == 1) + { + ps_pic_handling->i4_b_pic_idx = (1 + + ps_pic_handling->i4_ref_pic_idx) + % (i4_max_inter_frm_int + 1); + } + + /* + * Depending on the gop/subgop boundary, call the change_inter_frm_int + * + * TO DO: make a single call, change the name of the fxn to + * update_state, + * where state = frms_in_gop + b_incomp_subgop + extra_p + */ + + /* GOP boundary */ + if(i4_pic_disp_order_no == 1) + { + irc_update_pic_distbn(ps_pic_handling, + ps_pic_handling->i4_intra_frm_int, + ps_pic_handling->i4_new_inter_frm_int, 1); + } + /* Subgop boundary */ + else + { + irc_update_pic_distbn(ps_pic_handling, + ps_pic_handling->i4_intra_frm_int, + ps_pic_handling->i4_new_inter_frm_int, 0); + } + + ps_pic_handling->i4_change_in_inter_frm_int = 0; + ps_pic_handling->i4_new_inter_frm_int = + ps_pic_handling->i4_inter_frm_int; + } + + } + + /* Initialize the local vars with the state struct values */ + i4_buf_pic_no = ps_pic_handling->i4_buf_pic_no; + i4_pic_disp_order_no = ps_pic_handling->i4_pic_disp_order_no; + i4_b_pic_idx = ps_pic_handling->i4_b_pic_idx; + i4_ref_pic_idx = ps_pic_handling->i4_ref_pic_idx; + i4_b_in_incomp_subgop = ps_pic_handling->i4_b_in_incomp_subgop_mix_gop; + i4_p_count_in_gop = ps_pic_handling->i4_p_count_in_gop; + i4_b_count_in_gop = ps_pic_handling->i4_b_count_in_gop; + i4_b_count_in_subgop = ps_pic_handling->i4_b_count_in_subgop; + i4_p_frms_in_prd = ps_pic_handling->i4_frms_in_cur_gop[P_PIC]; + i4_b_frms_in_prd = ps_pic_handling->i4_frms_in_cur_gop[B_PIC]; + i4_extra_p = ps_pic_handling->i4_extra_p_mix_gop; + i4_inter_frm_int = ps_pic_handling->i4_inter_frm_int; + i4_intra_frm_int = ps_pic_handling->i4_intra_frm_int; + + /* Initializing the prev_state vars */ + ps_pic_handling->i4_prev_b_pic_idx = ps_pic_handling->i4_b_pic_idx; + + i4_num_b_in_subgop = (i4_inter_frm_int - 1); + + /*********************** Fill the stack ***********************************/ + /* The next part of the code is organized as + * + * if(B_PIC conditions satisfied) + * { + * Fill the pic_stack using the b_pic_index + * Update the b_pic_index and the other b_pic related vars for the + * next B_PIC + * } + * else + * { + * if(I_PIC conditions are satisfied) + * { + * Fill the pic_stack using the ref_pic_index + * Update the ref_pic_index and the other ref_pic related vars for the next + * I_PIC/P_PIC + * } + * else + * { + * Fill the pic_stack using the ref_pic_index + * Update the ref_pic_index and the other ref_pic related vars for the next + * I_PIC/P_PIC + * } + * } + */ + /* + * Condition for a B_PIC - + * 1) Other than the first I_PIC and the periodically appearing P_PICs, after + * every inter_frm_int, rest all pics are B_PICs + * 2) In case of CLOSED_GOP, the last frame of the gop has to be a P_PIC + */ + + if((i4_buf_pic_no % i4_inter_frm_int)&& !(i4_is_gop_closed + && (i4_b_count_in_gop == i4_b_frms_in_prd))) /**** B_PIC ****/ + { + /* Fill the pic_stack */ + ps_pic_handling->as_pic_stack[i4_b_pic_idx].i4_pic_id = i4_enc_pic_id; + ps_pic_handling->as_pic_stack[i4_b_pic_idx].e_pic_type = B_PIC; + ps_pic_handling->as_pic_stack[i4_b_pic_idx].i4_pic_disp_order_no = + i4_pic_disp_order_no; + + /* Store Pic type*/ + e_previous_pic_type = B_PIC; + + /* Update the prev_pic_details */ + memcpy(&ps_pic_handling->s_prev_pic_details, + &ps_pic_handling->as_pic_stack[i4_b_pic_idx], + sizeof(pic_details_t)); + + i4_b_count_in_gop++; + i4_b_count_in_subgop++; + + /* Update the i4_b_pic_idx */ + if(!i4_is_gop_closed) + { + /* If this B_PIC features in one of the complete subgops */ + if((i4_b_count_in_subgop < i4_num_b_in_subgop) + && !(i4_b_count_in_gop == i4_b_frms_in_prd)) + { + i4_b_pic_idx++; + } + else /* Else if this B_PIC is the last one in a subgop or gop */ + { + /* + * If this is the last B_PIC of a GOP, depending on the number + * of incomp B_pics in the subgop, there can be either only I + * or I,P pics between this and the next B_PIC + */ + if(i4_b_count_in_gop == i4_b_frms_in_prd) + { + i4_b_pic_idx += (2 + (!i4_b_in_incomp_subgop)); /*Prev*/ + i4_b_count_in_gop = 0; + } + /* + * For the last B_PIC of a subgop, there's always a P b/w + * this & the next B_PIC + */ + else + { + i4_b_pic_idx += 2; + } + i4_b_count_in_subgop = 0; + } + } + else + { + /* For the last B_PIC of a gop + * Normally,there will be 3 pics (P,I,P) between this and the next + * B_PIC for a CLOSED gop, except when + * 1)Number of P_pics in the gop = 1 + * 2)There is an extra P at the end of the gop + */ + if(i4_b_count_in_gop == i4_b_frms_in_prd) + { + i4_b_pic_idx += (3 + ((i4_b_in_incomp_subgop == 0) + && (i4_p_frms_in_prd> 1) + && (i4_pic_disp_order_no + != (i4_p_frms_in_prd+ i4_b_frms_in_prd- 1)))); + + i4_b_count_in_subgop = 0; + } + /* For a B_PIC which is not the last one in a subgop */ + else if(i4_b_count_in_subgop < i4_num_b_in_subgop) + { + i4_b_pic_idx++; + } + else /* For the last B_PIC of a subgop */ + { + i4_b_pic_idx += 2; + i4_b_count_in_subgop = 0; + } + } + i4_b_pic_idx %= (i4_max_inter_frm_int + 1); + } + /*********** I or P pic *********/ + else + { + ps_pic_handling->as_pic_stack[i4_ref_pic_idx].i4_pic_id = i4_enc_pic_id; + ps_pic_handling->as_pic_stack[i4_ref_pic_idx].i4_pic_disp_order_no = + i4_pic_disp_order_no; + /* Store Pic type*/ + e_previous_pic_type = I_PIC; + + /**** I_PIC ****/ + if(i4_pic_disp_order_no == 0) + { + ps_pic_handling->as_pic_stack[i4_ref_pic_idx].e_pic_type = I_PIC; + + /* Update the prev_pic_details */ + memcpy(&ps_pic_handling->s_prev_pic_details, + &ps_pic_handling->as_pic_stack[i4_ref_pic_idx], + sizeof(pic_details_t)); + /* + * In case of an I-frame depending on OPEN or CLOSED gop, + * the ref_pic_idx changes + */ + if((!i4_is_gop_closed) && (i4_is_first_gop == 0)) + { + if((i4_p_frms_in_prd <= 1) && (i4_b_in_incomp_subgop == 0)) + { + i4_ref_pic_idx++; + } + /* + * From the 2nd gop onwards, the I and first P frame are + * separated by the num_b_in_incomp_subgop + */ + else + { + i4_ref_pic_idx += (i4_b_in_incomp_subgop + 1); + } + + ps_pic_handling->i4_b_in_incomp_subgop_mix_gop = + ps_pic_handling->i4_b_in_incomp_subgop; + } + else + { + i4_ref_pic_idx++; + } + + i4_b_count_in_gop = 0; + i4_p_count_in_gop = 0; + i4_b_count_in_subgop = 0; + + } + /**** P_PIC ****/ + else + { + ps_pic_handling->as_pic_stack[i4_ref_pic_idx].e_pic_type = P_PIC; + /* Store Pic type*/ + e_previous_pic_type = P_PIC; + + /* Update the prev_pic_details */ + memcpy(&ps_pic_handling->s_prev_pic_details, + &ps_pic_handling->as_pic_stack[i4_ref_pic_idx], + sizeof(pic_details_t)); + + i4_p_count_in_gop++; + ps_pic_handling->i4_prev_intra_frame_interval = i4_intra_frm_int; + + /* + * In case of an P-frame depending on OPEN or CLOSED gop, the + * ref_pic_idx changes + */ + if(i4_is_gop_closed && (i4_p_count_in_gop == i4_p_frms_in_prd)) + { + /* + * For the last P_PIC in a gop, if extra_p or incomp_b are + * present, the number of such pics between this and the next + * ref_pic is (i4_b_in_incomp_subgop + 1) + */ + if((i4_p_count_in_gop > 1) + && (i4_b_in_incomp_subgop || i4_extra_p)) + { + i4_ref_pic_idx += (i4_b_in_incomp_subgop + 1); + } + else + { + i4_ref_pic_idx += i4_inter_frm_int; + } + } + else + { + i4_ref_pic_idx += i4_inter_frm_int; + } + } + + i4_ref_pic_idx %= (i4_max_inter_frm_int + 1); + } + + /* Update those variables working on the input frames */ + i4_pic_disp_order_no++; + i4_buf_pic_no++; + + /* For any gop */ + if(ps_pic_handling->i4_pic_disp_order_no + == (i4_max_inter_frm_int - 1- ((!i4_is_gop_closed) + * ps_pic_handling->i4_b_in_incomp_subgop_mix_gop))) + { + for(i = 0; i < MAX_PIC_TYPE; i++) + { + ps_pic_handling->i4_rem_frms_in_gop[i] = + ps_pic_handling->i4_frms_in_cur_gop[i]; + } + + if((!i4_is_gop_closed) && (i4_is_first_gop) + && (ps_pic_handling->i4_rem_frms_in_gop[B_PIC] + > ps_pic_handling->i4_b_in_incomp_subgop_mix_gop)) + { + ps_pic_handling->i4_rem_frms_in_gop[B_PIC] = + ps_pic_handling->i4_frms_in_cur_gop[B_PIC] + - ps_pic_handling->i4_b_in_incomp_subgop_mix_gop; + } + } + + /* End of GOP updates */ + if(i4_pic_disp_order_no == (i4_p_frms_in_prd + i4_b_frms_in_prd + 1)) + { + /* Now, the end of gop updates */ + i4_pic_disp_order_no = 0; + i4_buf_pic_no = 0; + i4_is_first_gop = 0; + ps_pic_handling->i4_extra_p_mix_gop = ps_pic_handling->i4_extra_p; + + if(i4_is_gop_closed) + { + ps_pic_handling->i4_b_in_incomp_subgop_mix_gop = + ps_pic_handling->i4_b_in_incomp_subgop; + } + + for(i = 0; i < MAX_PIC_TYPE; i++) + { + ps_pic_handling->i4_frms_in_cur_gop[i] = + ps_pic_handling->i4_frms_in_gop[i]; + } + } + + /* Updating the vars which work on the encoded pics */ + /* For the first gop */ + if(((ps_pic_handling->i4_is_first_gop) + && (ps_pic_handling->i4_pic_disp_order_no + == (i4_max_inter_frm_int - 1))) + || (i4_intra_frm_int == 1)) + { + ps_pic_handling->i4_coded_pic_no = 0; + ps_pic_handling->i4_stack_count = 0; + } + + /* Update the state struct with the modifiable local vars */ + ps_pic_handling->i4_buf_pic_no = i4_buf_pic_no; + ps_pic_handling->i4_pic_disp_order_no = i4_pic_disp_order_no; + ps_pic_handling->i4_b_pic_idx = i4_b_pic_idx; + ps_pic_handling->i4_ref_pic_idx = i4_ref_pic_idx; + ps_pic_handling->i4_is_first_gop = i4_is_first_gop; + ps_pic_handling->i4_p_count_in_gop = i4_p_count_in_gop; + ps_pic_handling->i4_b_count_in_gop = i4_b_count_in_gop; + ps_pic_handling->i4_b_count_in_subgop = i4_b_count_in_subgop; + ps_pic_handling->e_previous_pic_type = e_previous_pic_type; + ps_pic_handling->i4_force_I_frame = i4_force_I_frame; +} + +/******************************************************************************* + * @brief Returns the picture type, ip and display order number for the frame to + * be encoded + ******************************************************************************/ +void irc_get_pic_from_stack(pic_handling_t *ps_pic_handling, + WORD32 *pi4_pic_id, + WORD32 *pi4_pic_disp_order_no, + picture_type_e *pe_pic_type) +{ + pic_details_t s_pic_details; + pic_details_t *ps_pic_details = &s_pic_details; + + if(ps_pic_handling->i4_stack_count < 0) + { + ps_pic_details->e_pic_type = BUF_PIC; + ps_pic_details->i4_pic_disp_order_no = -1; + ps_pic_details->i4_pic_id = -1; + } + else + { + memcpy(ps_pic_details, + &ps_pic_handling->as_pic_stack[ps_pic_handling->i4_stack_count], + sizeof(pic_details_t)); + + /* Force I frame updations */ + if((ps_pic_handling->i4_force_I_frame == 1) + && (ps_pic_details->e_pic_type == I_PIC)) + { + /* Flag to signal change in remaining bits*/ + ps_pic_handling->i4_forced_I_frame_cur_frame = 1; + ps_pic_handling->i4_force_I_frame = 0; + /* + * Indicates count for no. of Pictures whose temporal reference + * has to be modified + * in the new GOP + */ + ps_pic_handling->i4_mod_temp_ref_cnt = + ps_pic_handling->i4_b_in_incomp_subgop + 1; + ps_pic_handling->i4_first_gop_encoded = 1; + } + + /* + * In MPEG2, the temporal reference of the first displayed frame in a + * gop is 0.In case of an OPEN_GOP, the B_PICs of the last subgop in a + * gop, maybe coded as a part of the next gop. Hence, in such conditions + * the pic_disp_order needs to be modified so that it gives an + * indication of the temporal reference + */ + if((!ps_pic_handling->i4_is_gop_closed) + && (ps_pic_handling->i4_first_gop_encoded)) + { + if(!ps_pic_handling->i4_mod_temp_ref_cnt) + { + ps_pic_details->i4_pic_disp_order_no = + (ps_pic_handling->as_pic_stack[ps_pic_handling->i4_stack_count].i4_pic_disp_order_no + + ps_pic_handling->i4_b_in_incomp_subgop) + % (ps_pic_handling->i4_prev_intra_frame_interval); + + } + else + { + /* + * due to force I frame First frame will have only + * ps_pic_handling->i4_frames_in_fif_gop number of frames + */ + ps_pic_details->i4_pic_disp_order_no = + (ps_pic_handling->as_pic_stack[ps_pic_handling->i4_stack_count].i4_pic_disp_order_no + + ps_pic_handling->i4_b_in_incomp_subgop) + % (ps_pic_handling->i4_frames_in_fif_gop); + ps_pic_handling->i4_mod_temp_ref_cnt--; + } + } + } + + /* Giving this to the Codec */ + *pi4_pic_id = s_pic_details.i4_pic_id; + *pi4_pic_disp_order_no = s_pic_details.i4_pic_disp_order_no; + *pe_pic_type = s_pic_details.e_pic_type; +} + +/******************************************************************************* + * @brief Updates the picture handling state whenever there is changes in input + * parameter + * + ******************************************************************************/ +static void irc_update_pic_distbn(pic_handling_t *ps_pic_handling, + WORD32 i4_intra_frm_int, + WORD32 i4_inter_frm_int, + WORD32 i4_gop_boundary) +{ + /* Declarations */ + WORD32 i4_is_gop_closed; + WORD32 i, i4_prev_inter_frm_int, i4_max_inter_frm_int, i4_pic_disp_order_no; + WORD32 i4_b_in_incomp_subgop, i4_extra_p, + i4_b_in_incomp_subgop_mix_gop,i4_extra_p_mix_gop; + WORD32 i4_pb_frms_till_prev_p; + WORD32 ai4_diff_in_frms[MAX_PIC_TYPE]; + + /* Initialize the local vars from the state struct */ + i4_is_gop_closed = ps_pic_handling->i4_is_gop_closed; + i4_prev_inter_frm_int = ps_pic_handling->i4_inter_frm_int; + i4_max_inter_frm_int = ps_pic_handling->i4_max_inter_frm_int; + i4_b_in_incomp_subgop = ps_pic_handling->i4_b_in_incomp_subgop; + i4_extra_p = ps_pic_handling->i4_extra_p; + i4_b_in_incomp_subgop_mix_gop = + ps_pic_handling->i4_b_in_incomp_subgop_mix_gop; + i4_extra_p_mix_gop = ps_pic_handling->i4_extra_p_mix_gop; + i4_pic_disp_order_no = ps_pic_handling->i4_pic_disp_order_no; + + i4_pb_frms_till_prev_p = (ps_pic_handling->i4_p_count_in_gop + * i4_prev_inter_frm_int); + + /* Check for the validity of the intra_frm_int */ + if(i4_intra_frm_int <= 0) + { + i4_intra_frm_int = ps_pic_handling->i4_intra_frm_int; + } + /* Check for the validity of the inter_frm_int */ + if((i4_inter_frm_int > i4_max_inter_frm_int) || (i4_inter_frm_int < 0)) + { + i4_inter_frm_int = ps_pic_handling->i4_inter_frm_int; + } + + /* Keep a copy of the older frms_in_gop */ + for(i = 0; i < MAX_PIC_TYPE; i++) + { + ai4_diff_in_frms[i] = ps_pic_handling->i4_frms_in_cur_gop[i]; + } + + /* Update all the variables which are calculated from the inter_frm_int */ + + /* Get the new pic distribution in the gop */ + find_pic_distbn_in_gop(ps_pic_handling->i4_frms_in_gop, i4_intra_frm_int, + i4_inter_frm_int, i4_is_gop_closed, + &i4_b_in_incomp_subgop, &i4_extra_p); + + /* Find the other related variables */ + if(i4_gop_boundary == 0) + { + /* + * Since, the inter frame interval has changed between a gop the + * current gop will be a mixed gop. So, we need to find the values of + * the related variables + */ + find_pic_distbn_in_gop(ps_pic_handling->i4_frms_in_cur_gop, + (i4_intra_frm_int - i4_pb_frms_till_prev_p), + i4_inter_frm_int, i4_is_gop_closed, + &i4_b_in_incomp_subgop_mix_gop, + &i4_extra_p_mix_gop); + + ps_pic_handling->i4_frms_in_cur_gop[P_PIC] += + ps_pic_handling->i4_p_count_in_gop; + ps_pic_handling->i4_frms_in_cur_gop[B_PIC] += + ps_pic_handling->i4_b_count_in_gop; + } + else + { + /* + * Since, the inter_frm_interval has changed at a gop boundary, the + * new gop will have all the subgops with the new inter_frm_interval + */ + for(i = 0; i < MAX_PIC_TYPE; i++) + { + ps_pic_handling->i4_frms_in_cur_gop[i] = + ps_pic_handling->i4_frms_in_gop[i]; + } + + i4_b_in_incomp_subgop_mix_gop = i4_b_in_incomp_subgop; + i4_extra_p_mix_gop = i4_extra_p; + } + + /* For bit-allocation the rem_frms_in_gop need to be updated */ + /* Checks needed: + 1) If the encoding is happening on the same gop as that of the buffering */ + if(ps_pic_handling->i4_pic_disp_order_no + >= (i4_max_inter_frm_int - 1- ((!i4_is_gop_closed) + * ps_pic_handling->i4_b_in_incomp_subgop_mix_gop))) + { + for(i = 0; i < MAX_PIC_TYPE; i++) + { + ps_pic_handling->i4_rem_frms_in_gop[i] += + (ps_pic_handling->i4_frms_in_cur_gop[i] + - ai4_diff_in_frms[i]); + } + } + + /* Update the vars which will affect the proper filling of the pic_stack */ + if(i4_pic_disp_order_no == 0) /*Check if redundant*/ + { + ps_pic_handling->i4_buf_pic_no = 0; + } + else + { + ps_pic_handling->i4_buf_pic_no = 1; + } + + ps_pic_handling->i4_b_count_in_subgop = 0; + + /* Update the state struct with the new inter_frm_int */ + ps_pic_handling->i4_inter_frm_int = i4_inter_frm_int; + ps_pic_handling->i4_intra_frm_int = i4_intra_frm_int; + ps_pic_handling->i4_b_in_incomp_subgop = i4_b_in_incomp_subgop; + ps_pic_handling->i4_extra_p = i4_extra_p; + ps_pic_handling->i4_b_in_incomp_subgop_mix_gop = + i4_b_in_incomp_subgop_mix_gop; + ps_pic_handling->i4_extra_p_mix_gop = i4_extra_p_mix_gop; + +} + +/* ***************************************************************************** + * @brief Distributes the frames as I, P and B based on intra/inter frame interval. + * Along with it it fills the number of frames in sub-gop and extra p frame + * + ******************************************************************************/ +static void find_pic_distbn_in_gop(WORD32 i4_frms_in_gop[MAX_PIC_TYPE], + WORD32 i4_intra_frm_int, + WORD32 i4_inter_frm_int, + WORD32 i4_is_gop_closed, + WORD32 *pi4_b_in_incomp_subgop, + WORD32 *pi4_extra_p) +{ + /* + * Find the pic distribution in the gop depending on the inter and intra + * frm intervals + */ + i4_frms_in_gop[I_PIC] = 1; + + /* All I frames */ + if(i4_intra_frm_int == 1) + { + i4_frms_in_gop[P_PIC] = 0; + i4_frms_in_gop[B_PIC] = 0; + *pi4_b_in_incomp_subgop = 0; + *pi4_extra_p = 0; + } + else + { + if(i4_is_gop_closed) + { + i4_frms_in_gop[P_PIC] = ((i4_intra_frm_int - 2) / i4_inter_frm_int) + + 1; + + if((((i4_intra_frm_int - 2) / i4_inter_frm_int) * i4_inter_frm_int) + == (i4_intra_frm_int - 2)) + { + *pi4_extra_p = 1; + } + else + { + *pi4_extra_p = 0; + } + } + else + { + i4_frms_in_gop[P_PIC] = ((i4_intra_frm_int - 1) / i4_inter_frm_int); + + *pi4_extra_p = 0; + } + + i4_frms_in_gop[B_PIC] = (i4_intra_frm_int - 1 - i4_frms_in_gop[P_PIC]); + + *pi4_b_in_incomp_subgop = (i4_frms_in_gop[B_PIC] - (i4_inter_frm_int - 1) + * ((i4_intra_frm_int - 1)/ i4_inter_frm_int)); + } +} + +WORD32 irc_pic_type_get_intra_frame_interval(pic_handling_t *ps_pic_handling) +{ + + return (ps_pic_handling->i4_intra_frm_int); +} + +WORD32 irc_pic_type_get_inter_frame_interval(pic_handling_t *ps_pic_handling) +{ + return (ps_pic_handling->i4_inter_frm_int); +} + +void irc_pic_type_get_rem_frms_in_gop(pic_handling_t *ps_pic_handling, + WORD32 ai4_rem_frms_in_gop[MAX_PIC_TYPE]) +{ + memcpy(ai4_rem_frms_in_gop, ps_pic_handling->i4_rem_frms_in_gop, + sizeof(ps_pic_handling->i4_rem_frms_in_gop)); +} + +WORD32 irc_pic_type_get_frms_in_gop_force_I_frm(pic_handling_t *ps_pic_handling) +{ + return (ps_pic_handling->i4_frames_in_fif_gop); +} + +void irc_pic_type_get_frms_in_gop(pic_handling_t *ps_pic_handling, + WORD32 ai4_frms_in_gop[MAX_PIC_TYPE]) +{ + memcpy(ai4_frms_in_gop, ps_pic_handling->i4_frms_in_cur_gop, + sizeof(ps_pic_handling->i4_frms_in_cur_gop)); +} + +WORD32 irc_pic_type_get_disp_order_no(pic_handling_t *ps_pic_handling) +{ + return (ps_pic_handling->i4_pic_disp_order_no); +} + +void irc_set_force_I_frame_flag(pic_handling_t *ps_pic_handling) +{ + ps_pic_handling->i4_force_I_frame = 1; +} +WORD32 irc_get_forced_I_frame_cur_frm_flag(pic_handling_t *ps_pic_handling) +{ + return (ps_pic_handling->i4_forced_I_frame_cur_frame); +} +void irc_reset_forced_I_frame_cur_frm_flag(pic_handling_t *ps_pic_handling) +{ + ps_pic_handling->i4_forced_I_frame_cur_frame = 0; +} + +/******************************************************************************/ +/* Functions that work on the encoded frames */ +/******************************************************************************/ + +/****************************************************************************** + Function Name : irc_update_pic_handling + Description : Will be called only for the frames to be encoded + *****************************************************************************/ +void irc_update_pic_handling(pic_handling_t *ps_pic_handling, + picture_type_e e_pic_type) +{ + + WORD32 i4_max_inter_frm_int; + WORD32 i; + + /* Initializing the local vars with that of the state struct */ + i4_max_inter_frm_int = ps_pic_handling->i4_max_inter_frm_int; + + /* Update the variables working on the output frames */ + /* Update the stack count */ + ps_pic_handling->i4_stack_count++; + + if(ps_pic_handling->i4_stack_count == (i4_max_inter_frm_int + 1)) + { + ps_pic_handling->i4_stack_count = 0; + } + + /* Update the rem_frms_in_gop */ + ps_pic_handling->i4_rem_frms_in_gop[e_pic_type]--; + + /* Assumption : Rem_frms_in_gop needs to be taken care of, for every change in frms */ + ps_pic_handling->i4_last_frm_in_gop = 0; + if((ps_pic_handling->i4_rem_frms_in_gop[I_PIC] <= 0) + && (ps_pic_handling->i4_rem_frms_in_gop[P_PIC] <= 0) + && (ps_pic_handling->i4_rem_frms_in_gop[B_PIC] <= 0)) + { + /* Copy the cur_frms_in_gop to the rem_frm_in_gop */ + for(i = 0; i < MAX_PIC_TYPE; i++) + { + ps_pic_handling->i4_rem_frms_in_gop[i] = + ps_pic_handling->i4_frms_in_cur_gop[i]; + } + + ps_pic_handling->i4_last_frm_in_gop = 1; + ps_pic_handling->i4_first_gop_encoded = 1; + } +} + +WORD32 irc_is_last_frame_in_gop(pic_handling_handle ps_pic_handling) +{ + return (ps_pic_handling->i4_last_frm_in_gop); +} + +/****************************************************************************** + Function Name : irc_skip_encoded_frame + Description : Needs to go to the current pic in the pic_stack. + If it's B_PIC don't do anything + If it's a reference picture, push all but the last B_PICs + in the current subgop one place down (i.e. just copy their + pic_details) and move the last B_PIC in that subgop to the + next slot of the skipped picture and convert it's pic_type + to that of the reference picture + *****************************************************************************/ +void irc_skip_encoded_frame(pic_handling_t *ps_pic_handling, + picture_type_e e_pic_type) +{ + pic_details_t s_pic_details; + WORD32 i4_stack_count, i4_next_ref_pic_idx, i4_pic_idx; + WORD32 i4_max_inter_frm_int, i4_last_b_pic_idx, i4_first_b_pic_idx; + WORD32 i4_next_pic_idx; + + /* State variables used to initialize the local vars (Not to be changed) */ + i4_stack_count = ps_pic_handling->i4_stack_count; + i4_next_ref_pic_idx = ps_pic_handling->i4_ref_pic_idx; + i4_max_inter_frm_int = ps_pic_handling->i4_max_inter_frm_int; + + i4_next_pic_idx = ((i4_stack_count + 1) % (i4_max_inter_frm_int + 1)); + + /* + * Check what is the encoded frm_type + * Changing a B_PIC to a ref_pic is not reqd if + * there are no B_PICs referring from the skipped ref_pic + */ + if(((e_pic_type == P_PIC) || (e_pic_type == I_PIC)) + && (i4_next_pic_idx != i4_next_ref_pic_idx)) + { + /* Go to the last B_PIC before the next_ref_pic */ + if(i4_next_ref_pic_idx == 0) + { + i4_last_b_pic_idx = i4_max_inter_frm_int; + } + else + { + i4_last_b_pic_idx = (i4_next_ref_pic_idx - 1); + } + + /* Keep a copy of the last B_PIC pic_details */ + memcpy(&s_pic_details, + &ps_pic_handling->as_pic_stack[i4_last_b_pic_idx], + sizeof(pic_details_t)); + + i4_pic_idx = i4_last_b_pic_idx; + i4_first_b_pic_idx = (i4_stack_count + 1) % (i4_max_inter_frm_int + 1); + + /* + * All the B_PICs other than the last one, need to be shifted one place + * in the stack + */ + while((i4_pic_idx != i4_stack_count) + && (i4_first_b_pic_idx != i4_last_b_pic_idx)) + { + if(i4_pic_idx == 0) + { + i4_pic_idx = i4_max_inter_frm_int; + } + else + { + i4_pic_idx--; + } + + memcpy(&ps_pic_handling->as_pic_stack[(i4_pic_idx + 1) + % (i4_max_inter_frm_int + 1)], + &ps_pic_handling->as_pic_stack[i4_pic_idx], + sizeof(pic_details_t)); + + } + + /* + * Copy the last B_PIC pic_details to the first B_PIC place and change + * it's pic type to the ref_PIC + */ + /*e_ref_pic_type*/ + ps_pic_handling->as_pic_stack[i4_first_b_pic_idx].e_pic_type = P_PIC; + + ps_pic_handling->as_pic_stack[i4_first_b_pic_idx].i4_pic_disp_order_no = + s_pic_details.i4_pic_disp_order_no; + ps_pic_handling->as_pic_stack[i4_first_b_pic_idx].i4_pic_id = + s_pic_details.i4_pic_id; + + /* Change the rem_frms_in_prd so that the update works properly */ + if(ps_pic_handling->i4_rem_frms_in_gop[B_PIC] > 0) + { + ps_pic_handling->i4_rem_frms_in_gop[B_PIC]--; + ps_pic_handling->i4_rem_frms_in_gop[P_PIC]++; + } + } + +} + +/****************************************************************************** + Function Name : flush_frame + Description : Since when a flush frame is called, there will be no valid + frames after it, the last frame cannot be a B_PIC, as there + will be no reference frame for it (Input in display order) + + So,this fxn needs to go to the last added pic in the pic_stack. + If it's reference pic don't do anything + If it's a B_PIC, copy it's pic_details and put it in the + place of the next reference pic, changing the pic_type to + P_PIC + *****************************************************************************/ +void irc_flush_frame_from_pic_stack(pic_handling_t *ps_pic_handling) +{ + + pic_details_t s_prev_pic_details; + + /* Get the last entered pic_details (not to be modified here) */ + WORD32 i4_prev_b_pic_idx = ps_pic_handling->i4_prev_b_pic_idx; + WORD32 i4_ref_pic_idx = ps_pic_handling->i4_ref_pic_idx; + WORD32 i4_b_pic_idx = ps_pic_handling->i4_b_pic_idx; + + memcpy(&s_prev_pic_details, &ps_pic_handling->s_prev_pic_details, + sizeof(pic_details_t)); + + if(s_prev_pic_details.e_pic_type == B_PIC) + { + /* Copy the last B_PIC details to the next reference pic in display order */ + ps_pic_handling->as_pic_stack[i4_ref_pic_idx].i4_pic_disp_order_no = + s_prev_pic_details.i4_pic_disp_order_no; + ps_pic_handling->as_pic_stack[i4_ref_pic_idx].i4_pic_id = + s_prev_pic_details.i4_pic_id; + ps_pic_handling->as_pic_stack[i4_ref_pic_idx].e_pic_type = P_PIC; + + /* + * Modify the last B_PIC pic_type, so that codec gets to know when + * all the buffered frames + * are flushed + */ + ps_pic_handling->as_pic_stack[i4_prev_b_pic_idx].e_pic_type = + MAX_PIC_TYPE; + ps_pic_handling->as_pic_stack[i4_prev_b_pic_idx].i4_pic_id = -1; + ps_pic_handling->as_pic_stack[i4_prev_b_pic_idx].i4_pic_disp_order_no = + -1; + } + else + { + /* + * Modify the next pic_type details in the stack, so that codec gets to + * know when all the + * buffered frames are flushed + */ + ps_pic_handling->as_pic_stack[i4_ref_pic_idx].e_pic_type = MAX_PIC_TYPE; + ps_pic_handling->as_pic_stack[i4_ref_pic_idx].i4_pic_id = -1; + ps_pic_handling->as_pic_stack[i4_ref_pic_idx].i4_pic_disp_order_no = -1; + + if(ps_pic_handling->i4_inter_frm_int != 1) + { + ps_pic_handling->as_pic_stack[i4_b_pic_idx].e_pic_type = + MAX_PIC_TYPE; + ps_pic_handling->as_pic_stack[i4_b_pic_idx].i4_pic_id = -1; + ps_pic_handling->as_pic_stack[i4_b_pic_idx].i4_pic_disp_order_no = + -1; + } + } +} + +/****************************************************************************** + Function Name : irc_add_pic_to_stack_re_enc + Description : In case of a re-enc, we can assume the pictures to be coming + in the encode order. + In case of re-encoder basically, there are 2 problematic cases. + 1)Inter_frm_int is not known to start with + 2)Inter_frm_int can keep changing + 3)Intra_frm_int set by the application and that actually in the + decoded bitstream may be different + *****************************************************************************/ +WORD32 irc_add_pic_to_stack_re_enc(pic_handling_t *ps_pic_handling, + WORD32 i4_enc_pic_id, + picture_type_e e_pic_type) +{ + WORD32 i4_b_count_in_subgop; + WORD32 i4_max_inter_frm_int, i4_inter_frm_int, i4_intra_frm_int; + WORD32 i4_pic_disp_order_no; + WORD32 i4_is_gop_closed; + picture_type_e e_out_pic_type; + WORD32 i4_b_in_incomp_subgop; + + /* Check if a change in intra_frm_int call has been made */ + if(ps_pic_handling->i4_change_in_intra_frm_int == 1) + { + irc_update_pic_distbn(ps_pic_handling, + ps_pic_handling->i4_new_intra_frm_int, + ps_pic_handling->i4_inter_frm_int, 1); + ps_pic_handling->i4_change_in_intra_frm_int = 0; + } + + /* Check if a change in inter_frm_int call has been made */ + if(ps_pic_handling->i4_change_in_inter_frm_int == 1) + { + irc_update_pic_distbn(ps_pic_handling, + ps_pic_handling->i4_intra_frm_int, + ps_pic_handling->i4_new_inter_frm_int, 1); + + ps_pic_handling->i4_change_in_inter_frm_int = 0; + } + + /* Initialize the local vars with the state vars */ + i4_b_count_in_subgop = ps_pic_handling->i4_b_count_in_subgop; + i4_max_inter_frm_int = ps_pic_handling->i4_max_inter_frm_int; + i4_inter_frm_int = ps_pic_handling->i4_inter_frm_int; + i4_intra_frm_int = ps_pic_handling->i4_intra_frm_int; + i4_pic_disp_order_no = ps_pic_handling->i4_pic_disp_order_no; + i4_is_gop_closed = ps_pic_handling->i4_is_gop_closed; + i4_b_in_incomp_subgop = ps_pic_handling->i4_b_in_incomp_subgop; + + e_out_pic_type = e_pic_type; + + /* Initially the rate_control assumes an IPP sequence */ + if(e_pic_type == B_PIC) + { + /* Update the number of B_PICs in a subgop */ + i4_b_count_in_subgop++; + + if(i4_b_count_in_subgop > i4_max_inter_frm_int) + { + return (-1); + } + + /* If the number of B_PICs exceed the set inter_frm_int then + change the inter_frm_int */ + if(i4_b_count_in_subgop > (i4_inter_frm_int - 1)) + { + i4_inter_frm_int = (i4_b_count_in_subgop + 1); + + irc_update_pic_distbn(ps_pic_handling, i4_intra_frm_int, + i4_inter_frm_int, 0); + } + } + else if((e_pic_type == I_PIC) || (e_pic_type == P_PIC)) + { + /* If the B_PICs in the prev subgop were fewer than the current + * (inter_frm_int-1) and none of these conditions occur, it'll mean the + * decrease in the inter_frm_int + * 1)End of a GOP + * 2)Beginning of an OPEN_GOP + */ + if((i4_b_count_in_subgop < (i4_inter_frm_int - 1)) + && !((!i4_is_gop_closed) + && (i4_b_count_in_subgop + >= i4_b_in_incomp_subgop)) + && !((i4_pic_disp_order_no + + (i4_inter_frm_int - 1 + - i4_b_count_in_subgop)) + > i4_intra_frm_int)) + { + i4_inter_frm_int = (i4_b_count_in_subgop + 1); + + irc_update_pic_distbn(ps_pic_handling, i4_intra_frm_int, + i4_inter_frm_int, 0); + } + + /* Reset the number of B_PICs in a subgop */ + i4_b_count_in_subgop = 0; + } + + /* Updation of the frame level vars */ + i4_pic_disp_order_no++; + + /* End of gop condition + *Two cases can arise : + *1) The intra_frm_int set by the application is greater than the actual + * bitstream intra_frm_int (i.e. we will get an I frame before + * pic_disp_order_no goes to intra_frm_int) + *2) The intra_frm_int set by the application is smaller than the actual bitstream intra_frm_int + * (i.e. we won't get an I_PIC even if pic_disp_order_no goes to + * intra_frm_int) Constraints : + * 1) I_PIC cannot be changed to B_PIC + * 2) B_PIC cannot be changed to I_PIC + */ + if(i4_pic_disp_order_no >= i4_intra_frm_int) + { + if(e_pic_type != B_PIC) + { + e_out_pic_type = I_PIC; + } + else + { + e_out_pic_type = B_PIC; + ps_pic_handling->i4_rem_frms_in_gop[B_PIC]++; + ps_pic_handling->i4_frms_in_cur_gop[B_PIC]++; + ps_pic_handling->i4_frms_in_gop[B_PIC]++; + } + } + else + { + if((e_pic_type == I_PIC) && (!ps_pic_handling->i4_is_first_gop)) + { + e_out_pic_type = P_PIC; + ps_pic_handling->i4_rem_frms_in_gop[P_PIC]++; + ps_pic_handling->i4_frms_in_cur_gop[P_PIC]++; + ps_pic_handling->i4_frms_in_gop[P_PIC]++; + } + else + { + e_out_pic_type = e_pic_type; + } + } + + /* Update the frm_vars at the end of the gop */ + if(i4_pic_disp_order_no + == (ps_pic_handling->i4_frms_in_cur_gop[P_PIC] + + ps_pic_handling->i4_frms_in_cur_gop[B_PIC] + + 1)) + { + i4_pic_disp_order_no = 0; + ps_pic_handling->i4_is_first_gop = 0; + } + + /* Update the vars working on the encoded pics */ + if((ps_pic_handling->i4_is_first_gop) + && (ps_pic_handling->i4_stack_count == -1)) + { + ps_pic_handling->i4_coded_pic_no = 0; + ps_pic_handling->i4_stack_count = 0; + } + + /* Add the pic_details to the pic_stack */ + ps_pic_handling->as_pic_stack[ps_pic_handling->i4_stack_count].e_pic_type = + e_out_pic_type; + ps_pic_handling->as_pic_stack[ps_pic_handling->i4_stack_count].i4_pic_disp_order_no = + ps_pic_handling->i4_pic_disp_order_no; + ps_pic_handling->as_pic_stack[ps_pic_handling->i4_stack_count].i4_pic_id = + i4_enc_pic_id; + + /* Writing back those values which need to be updated */ + ps_pic_handling->i4_inter_frm_int = i4_inter_frm_int; + ps_pic_handling->i4_pic_disp_order_no = i4_pic_disp_order_no; + ps_pic_handling->i4_b_count_in_subgop = i4_b_count_in_subgop; + + return (0); +} diff --git a/encoder/irc_picture_type.h b/encoder/irc_picture_type.h new file mode 100755 index 0000000..1af5424 --- /dev/null +++ b/encoder/irc_picture_type.h @@ -0,0 +1,95 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +#ifndef _PIC_HANDLING_H_ +#define _PIC_HANDLING_H_ + +/* + * Basic Understanding: + * irc_add_pic_to_stack(_re_enc): + * This functions converts the input (or display) order to encoding order + * */ +typedef struct pic_handling_t *pic_handling_handle; + +WORD32 irc_pic_handling_num_fill_use_free_memtab(pic_handling_handle *pps_pic_handling, + itt_memtab_t *ps_memtab, + ITT_FUNC_TYPE_E e_func_type); + +void irc_init_pic_handling(pic_handling_handle ps_pic_handling, + WORD32 i4_intra_frm_int, + WORD32 i4_max_inter_frm_int, + WORD32 i4_is_gop_closed); + +void irc_add_pic_to_stack(pic_handling_handle ps_pic_handling, + WORD32 i4_enc_pic_id); + +WORD32 irc_add_pic_to_stack_re_enc(pic_handling_handle ps_pic_handling, + WORD32 i4_enc_pic_id, + picture_type_e e_pic_type); + +void irc_get_pic_from_stack(pic_handling_handle ps_pic_handling, + WORD32 *pi4_pic_id, + WORD32 *pi4_pic_disp_order_no, + picture_type_e *pe_pic_type); + +WORD32 irc_is_last_frame_in_gop(pic_handling_handle ps_pic_handling); + +void irc_flush_frame_from_pic_stack(pic_handling_handle ps_pic_handling); + +/* NITT TBR The below two functions should be made a single function */ +void irc_skip_encoded_frame(pic_handling_handle ps_pic_handling, + picture_type_e e_pic_type); + +void irc_update_pic_handling(pic_handling_handle ps_pic_handling, + picture_type_e e_pic_type); + +/* + * Function returns the number of frames that have been encoded in the GOP in + * which the force I frame takes impact + */ +WORD32 irc_pic_type_get_frms_in_gop_force_I_frm(pic_handling_handle ps_pic_handling); + +void irc_set_force_I_frame_flag(pic_handling_handle ps_pic_handling); + +WORD32 irc_get_forced_I_frame_cur_frm_flag(pic_handling_handle ps_pic_handling); + +void irc_reset_forced_I_frame_cur_frm_flag(pic_handling_handle ps_pic_handling); + +/* Normal get functions */ +WORD32 irc_pic_type_get_inter_frame_interval(pic_handling_handle ps_pic_handling); + +WORD32 irc_pic_type_get_intra_frame_interval(pic_handling_handle ps_pic_handling); + +WORD32 irc_pic_type_get_disp_order_no(pic_handling_handle ps_pic_handling); + +void irc_pic_handling_register_new_int_frm_interval(pic_handling_handle ps_pic_handling, + WORD32 i4_intra_frm_int); + +void irc_pic_handling_register_new_inter_frm_interval(pic_handling_handle ps_pic_handling, + WORD32 i4_inter_frm_int); + +void irc_pic_type_get_rem_frms_in_gop(pic_handling_handle ps_pic_handling, + WORD32 ai4_rem_frms_in_gop[MAX_PIC_TYPE]); + +void irc_pic_type_get_frms_in_gop(pic_handling_handle ps_pic_handling, + WORD32 ai4_frms_in_gop[MAX_PIC_TYPE]); + +#endif /* _PIC_HANDLING_H_ */ + diff --git a/encoder/irc_rate_control_api.c b/encoder/irc_rate_control_api.c new file mode 100755 index 0000000..6c6586e --- /dev/null +++ b/encoder/irc_rate_control_api.c @@ -0,0 +1,1600 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/*****************************************************************************/ +/* Includes */ +/*****************************************************************************/ + +/* System include files */ +#include "stdio.h" + +/* User include files */ +#include "irc_datatypes.h" +#include "irc_common.h" +#include "irc_cntrl_param.h" +#include "irc_mem_req_and_acq.h" +#include "irc_rd_model.h" +#include "irc_est_sad.h" +#include "irc_fixed_point_error_bits.h" +#include "irc_vbr_storage_vbv.h" +#include "irc_picture_type.h" +#include "irc_bit_allocation.h" +#include "irc_mb_model_based.h" +#include "irc_cbr_buffer_control.h" +#include "irc_vbr_str_prms.h" +#include "irc_rate_control_api.h" +#include "irc_rate_control_api_structs.h" +#include "irc_trace_support.h" + +#define DEV_Q 4 /*Q format(Shift) for Deviation range factor */ +#define HI_DEV_FCTR 22 /* 1.4*16 */ +#define LO_DEV_FCTR 12 /* 0.75*16 */ +#define GET_HI_DEV_QP(Qprev) (( ((WORD32) Qprev)*HI_DEV_FCTR + (1<<(DEV_Q-1)))>>DEV_Q) +#define GET_LO_DEV_QP(Qprev) (( ((WORD32) Qprev)*LO_DEV_FCTR + (1<<(DEV_Q-1)))>>DEV_Q) +#define CLIP_QP(Qc, hi_d, lo_d) (((Qc) < (lo_d))?((lo_d)):(((Qc) > (hi_d))?(hi_d):(Qc))) + +/*****************************************************************************/ +/* Restricts the quantization parameter variation within delta */ +/*****************************************************************************/ +/* static WORD32 restrict_swing(WORD32 cur_qp, WORD32 prev_qp, WORD32 delta_qp) + { + if((cur_qp) - (prev_qp) > (delta_qp)) (cur_qp) = (prev_qp) + (delta_qp) ; + if((prev_qp) - (cur_qp) > (delta_qp)) (cur_qp) = (prev_qp) - (delta_qp) ; + return cur_qp; + }*/ + +/***************************************************************************** + Function Name : rate_control_get_init_free_memtab + Description : Takes or gives memtab + Inputs : pps_rate_control_api - pointer to RC api pointer + ps_memtab - Memtab pointer + i4_use_base - Set during init, else 0 + i4_fill_base - Set during free, else 0 + *****************************************************************************/ +WORD32 irc_rate_control_num_fill_use_free_memtab(rate_control_handle *pps_rate_control_api, + itt_memtab_t *ps_memtab, + ITT_FUNC_TYPE_E e_func_type) +{ + WORD32 i4_mem_tab_idx = 0, i; + static rate_control_api_t s_temp_rc_api; + + /* + * Hack for al alloc, during which we dont have any state memory. + * Dereferencing can cause issues + */ + if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB) + (*pps_rate_control_api) = &s_temp_rc_api; + + /*for src rate control state structure*/ + if(e_func_type != GET_NUM_MEMTAB) + { + fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(rate_control_api_t), + ALIGN_128_BYTE, PERSISTENT, DDR); + use_or_fill_base(&ps_memtab[0], (void**)pps_rate_control_api, + e_func_type); + } + i4_mem_tab_idx++; + + /* Get the memory requirement of lower modules */ + i4_mem_tab_idx += irc_ba_num_fill_use_free_memtab( + &pps_rate_control_api[0]->ps_bit_allocation, + &ps_memtab[i4_mem_tab_idx], e_func_type); + + i4_mem_tab_idx += irc_cbr_buffer_num_fill_use_free_memtab( + &pps_rate_control_api[0]->ps_cbr_buffer, + &ps_memtab[i4_mem_tab_idx], e_func_type); + + i4_mem_tab_idx += irc_est_sad_num_fill_use_free_memtab( + &pps_rate_control_api[0]->ps_est_sad, + &ps_memtab[i4_mem_tab_idx], e_func_type); + + i4_mem_tab_idx += irc_mbrc_num_fill_use_free_memtab( + &pps_rate_control_api[0]->ps_mb_rate_control, + &ps_memtab[i4_mem_tab_idx], e_func_type); + + i4_mem_tab_idx += irc_vbr_vbv_num_fill_use_free_memtab( + &pps_rate_control_api[0]->ps_vbr_storage_vbv, + &ps_memtab[i4_mem_tab_idx], e_func_type); + + for(i = 0; i < MAX_PIC_TYPE; i++) + { + i4_mem_tab_idx += irc_rd_model_num_fill_use_free_memtab( + &pps_rate_control_api[0]->aps_rd_model[i], + &ps_memtab[i4_mem_tab_idx], e_func_type); + } + i4_mem_tab_idx += irc_pic_handling_num_fill_use_free_memtab( + &pps_rate_control_api[0]->ps_pic_handling, + &ps_memtab[i4_mem_tab_idx], e_func_type); + + return (i4_mem_tab_idx); +} + +/***************************************************************************** + Function Name : irc_initialise_rate_control + Description : Initialise the rate control structure + Inputs : ps_rate_control_api - api struct + e_rate_control_type - VBR, CBR (NLDRC/LDRC), VBR_STREAMING + u1_is_mb_level_rc_on - enabling mb level RC + u4_avg_bit_rate - bit rate to achieved across the entire + file size + u4_peak_bit_rate - max possible drain rate + u4_frame_rate - number of frames in 1000 seconds + u4_intra_frame_interval - num frames between two I frames + *au1_init_qp - init_qp for I,P,B + *****************************************************************************/ +void irc_initialise_rate_control(rate_control_api_t *ps_rate_control_api, + rc_type_e e_rate_control_type, + UWORD8 u1_is_mb_level_rc_on, + UWORD32 u4_avg_bit_rate, + UWORD32 *pu4_peak_bit_rate, + UWORD32 u4_min_bit_rate, + UWORD32 u4_frame_rate, + UWORD32 u4_max_delay, + UWORD32 u4_intra_frame_interval, + UWORD8 *pu1_init_qp, + UWORD32 u4_max_vbv_buff_size, + WORD32 i4_max_inter_frm_int, + WORD32 i4_is_gop_closed, + UWORD8 *pu1_min_max_qp, + WORD32 i4_use_est_intra_sad, + UWORD32 u4_src_ticks, + UWORD32 u4_tgt_ticks) +{ + WORD32 i; + UWORD32 u4_frms_in_delay_prd = (u4_frame_rate * u4_max_delay) / 1000000; + ps_rate_control_api->e_rc_type = e_rate_control_type; + ps_rate_control_api->u1_is_mb_level_rc_on = u1_is_mb_level_rc_on; + + trace_printf((const WORD8*)"RC type = %d\n", e_rate_control_type); + + /* Set the avg_bitrate_changed flag for each pic_type to 0 */ + for(i = 0; i < MAX_PIC_TYPE; i++) + { + ps_rate_control_api->au1_avg_bitrate_changed[i] = 0; + } + + /* Initialize the pic_handling module */ + irc_init_pic_handling(ps_rate_control_api->ps_pic_handling, + (WORD32)u4_intra_frame_interval, i4_max_inter_frm_int, + i4_is_gop_closed); + + /*** Initialize the rate control modules ***/ + if(ps_rate_control_api->e_rc_type != CONST_QP) + { + UWORD32 au4_num_pics_in_delay_prd[MAX_PIC_TYPE]; + + /* Initialize the model parameter structures */ + for(i = 0; i < MAX_PIC_TYPE; i++) + { + irc_init_frm_rc_rd_model(ps_rate_control_api->aps_rd_model[i], + MAX_FRAMES_MODELLED); + } + + /* Initialize the buffer mechanism */ + if((ps_rate_control_api->e_rc_type == VBR_STORAGE) + || (ps_rate_control_api->e_rc_type + == VBR_STORAGE_DVD_COMP)) + { + /* Assuming both the peak bit rates are same for a VBR_STORAGE and + VBR_STORAGE_DVD_COMP */ + if(pu4_peak_bit_rate[0] != pu4_peak_bit_rate[1]) + { + trace_printf((const WORD8*)"For VBR_STORAGE and VBR_STORAGE_DVD_COMP the peak bit rates should be same\n"); + } + irc_init_vbr_vbv(ps_rate_control_api->ps_vbr_storage_vbv, + (WORD32)pu4_peak_bit_rate[0], + (WORD32)u4_frame_rate, + (WORD32)u4_max_vbv_buff_size); + } + else if(ps_rate_control_api->e_rc_type == CBR_NLDRC) + { + UWORD32 u4_avg_bit_rate_copy[MAX_NUM_DRAIN_RATES]; + for(i = 0; i < MAX_NUM_DRAIN_RATES; i++) + { + u4_avg_bit_rate_copy[i] = u4_avg_bit_rate; + } + /* In case of CBR the num pics in delay is ignored */ + for(i = 0; i < MAX_PIC_TYPE; i++) + au4_num_pics_in_delay_prd[i] = 0; + + irc_init_cbr_buffer(ps_rate_control_api->ps_cbr_buffer, + u4_max_delay, u4_frame_rate, + (WORD32 *)u4_avg_bit_rate_copy, + au4_num_pics_in_delay_prd, + u4_max_vbv_buff_size); + } + else if(ps_rate_control_api->e_rc_type == VBR_STREAMING) + { + irc_init_vbv_str_prms(&ps_rate_control_api->s_vbr_str_prms, + u4_intra_frame_interval, u4_src_ticks, + u4_tgt_ticks, u4_frms_in_delay_prd); + + /* Get the number of pics of each type in delay period */ + irc_get_vsp_num_pics_in_dly_prd( + &ps_rate_control_api->s_vbr_str_prms, + au4_num_pics_in_delay_prd); + + irc_init_cbr_buffer(ps_rate_control_api->ps_cbr_buffer, + u4_max_delay, u4_frame_rate, + (WORD32 *)pu4_peak_bit_rate, + au4_num_pics_in_delay_prd, + u4_max_vbv_buff_size); + } + + /* Initialize the SAD estimation module */ + irc_init_est_sad(ps_rate_control_api->ps_est_sad, i4_use_est_intra_sad); + + /* Initialize the bit allocation module according to VBR or CBR */ + if((ps_rate_control_api->e_rc_type == VBR_STORAGE) + || (ps_rate_control_api->e_rc_type == VBR_STREAMING) + || (ps_rate_control_api->e_rc_type + == VBR_STORAGE_DVD_COMP)) + { + irc_ba_init_bit_allocation(ps_rate_control_api->ps_bit_allocation, + ps_rate_control_api->ps_pic_handling, + VBR_BIT_ALLOC_PERIOD, u4_avg_bit_rate, + u4_frame_rate, + (WORD32 *)pu4_peak_bit_rate, + u4_min_bit_rate); + } + else if(ps_rate_control_api->e_rc_type == CBR_NLDRC) + { + irc_ba_init_bit_allocation(ps_rate_control_api->ps_bit_allocation, + ps_rate_control_api->ps_pic_handling, + CBR_BIT_ALLOC_PERIOD, u4_avg_bit_rate, + u4_frame_rate, + (WORD32 *)pu4_peak_bit_rate, + u4_min_bit_rate); + } + + /* + * u1_scd_detected will be initialized to 1 when a Scene change is + * detected + */ + ps_rate_control_api->u1_scd_detected = 0; + } + + /* Initialize the init_qp */ + for(i = 0; i < MAX_PIC_TYPE; i++) + { + ps_rate_control_api->au1_init_qp[i] = pu1_init_qp[i]; + ps_rate_control_api->au1_prev_frm_qp[i] = pu1_init_qp[i]; + ps_rate_control_api->au1_min_max_qp[(i << 1)] = + pu1_min_max_qp[(i << 1)]; + ps_rate_control_api->au1_min_max_qp[(i << 1) + 1] = pu1_min_max_qp[(i + << 1) + 1]; + } + + /* Initialize the is_first_frm_encoded */ + for(i = 0; i < MAX_PIC_TYPE; i++) + { + ps_rate_control_api->au1_is_first_frm_coded[i] = 0; + } + ps_rate_control_api->u1_is_first_frm = 1; + + /* + * Control flag for delayed impact after a change in peak bitrate has been + * made + */ + ps_rate_control_api->u4_frms_in_delay_prd_for_peak_bit_rate_change = 0; + for(i = 0; i < MAX_NUM_DRAIN_RATES; i++) + { + ps_rate_control_api->au4_new_peak_bit_rate[i] = pu4_peak_bit_rate[i]; + } + + /* Initialize the mb level rate control module */ + irc_init_mb_level_rc(ps_rate_control_api->ps_mb_rate_control); + ps_rate_control_api->i4_prev_frm_est_bits = u4_avg_bit_rate * 1000 + / u4_frame_rate; + + ps_rate_control_api->prev_ref_pic_type = I_PIC; +} + +/****************************************************************************** + *Description : calls irc_add_pic_to_stack + ******************************************************************************/ +void irc_add_picture_to_stack(rate_control_api_t *rate_control_api, + WORD32 i4_enc_pic_id) +{ + /* Call the routine to add the pic to stack in encode order */ + irc_add_pic_to_stack(rate_control_api->ps_pic_handling, i4_enc_pic_id); +} + +void irc_add_picture_to_stack_re_enc(rate_control_api_t *rate_control_api, + WORD32 i4_enc_pic_id, + picture_type_e e_pic_type) +{ + /* + * In case of a re-encoder, the pics will come in the encode order itself. + * So, there is no need to buffer the pics up + */ + irc_add_pic_to_stack_re_enc(rate_control_api->ps_pic_handling, + i4_enc_pic_id, e_pic_type); +} + +/******************************************************************************* + Description : Decides the picture type based on the state + ******************************************************************************/ +void irc_get_picture_details(rate_control_handle rate_control_api, + WORD32 *pi4_pic_id, + WORD32 *pi4_pic_disp_order_no, + picture_type_e *pe_pic_type) +{ + /* Call to get the pic_details */ + irc_get_pic_from_stack(rate_control_api->ps_pic_handling, pi4_pic_id, + pi4_pic_disp_order_no, pe_pic_type); +} + +/******************************************************************************* + * Description : Gets the frame level qp for the given picture type + ******************************************************************************/ +UWORD8 irc_get_frame_level_qp(rate_control_api_t *ps_rate_control_api, + picture_type_e e_pic_type, + WORD32 i4_ud_max_bits) +{ + UWORD8 u1_frame_qp, i; + + if((ps_rate_control_api->e_rc_type != VBR_STORAGE) + && (ps_rate_control_api->e_rc_type != VBR_STORAGE_DVD_COMP) + && (ps_rate_control_api->e_rc_type != CBR_NLDRC) + && (ps_rate_control_api->e_rc_type != CONST_QP) + && (ps_rate_control_api->e_rc_type != VBR_STREAMING)) + { + trace_printf((const WORD8*)(const WORD8*)" Only VBR,NLDRC and CONST QP supported for now \n"); + return (0); + } + + if(ps_rate_control_api->e_rc_type != CONST_QP) + { + UWORD8 u1_is_first_frm_coded = 1; + + /* Check whether at least one frame of a each picture type gets encoded*/ + /* Check whether it is an IPP or IPB kind of encoding */ + if((ps_rate_control_api->au1_is_first_frm_coded[I_PIC] + && ps_rate_control_api->au1_is_first_frm_coded[P_PIC]) + || ((irc_pic_type_get_intra_frame_interval( + ps_rate_control_api->ps_pic_handling) + == 1) + && (ps_rate_control_api->au1_is_first_frm_coded[I_PIC]))) + { + if(e_pic_type != B_PIC) + u1_is_first_frm_coded = 1; + else + { + for(i = 0; i < MAX_PIC_TYPE; i++) + { + u1_is_first_frm_coded &= + ps_rate_control_api->au1_is_first_frm_coded[i]; + } + } + } + else + { + u1_is_first_frm_coded = 0; + } + + if(u1_is_first_frm_coded) + { + WORD32 i4_cur_est_texture_bits, i4_cur_est_header_bits; + WORD32 i4_cur_est_bits; + UWORD32 u4_estimated_sad; + + /* Force I frame updation of rem_bits_in_frame*/ + if(irc_get_forced_I_frame_cur_frm_flag( + ps_rate_control_api->ps_pic_handling) == 1) + { + irc_ba_change_rem_bits_in_prd_at_force_I_frame( + ps_rate_control_api->ps_bit_allocation, + ps_rate_control_api->ps_pic_handling); + irc_reset_forced_I_frame_cur_frm_flag( + ps_rate_control_api->ps_pic_handling); + } + + /* Get the estimated texture bits allocated for the current frame*/ + i4_cur_est_texture_bits = irc_ba_get_cur_frm_est_texture_bits( + ps_rate_control_api->ps_bit_allocation, + ps_rate_control_api->aps_rd_model, + ps_rate_control_api->ps_est_sad, + ps_rate_control_api->ps_pic_handling, e_pic_type); + + /* Get the estimated header bits*/ + i4_cur_est_header_bits = irc_ba_get_cur_frm_est_header_bits( + ps_rate_control_api->ps_bit_allocation, e_pic_type); + + /* Total estimated bits */ + i4_cur_est_bits = i4_cur_est_header_bits + i4_cur_est_texture_bits; + + trace_printf((const WORD8*)"ft %d, etb = %d, eb %d, ", e_pic_type, + i4_cur_est_texture_bits, i4_cur_est_bits); + + /* Threshold the estimated bits based on the buffer fullness*/ + if(ps_rate_control_api->e_rc_type == VBR_STORAGE) + { + WORD32 i4_cur_frm_max_bit_possible; + i4_cur_frm_max_bit_possible = irc_get_max_target_bits( + ps_rate_control_api->ps_vbr_storage_vbv); + + if(i4_cur_est_bits > i4_cur_frm_max_bit_possible) + { + /* Assuming header would consume the same amount of bits */ + i4_cur_est_texture_bits = i4_cur_frm_max_bit_possible + - i4_cur_est_header_bits; + } + } + else if(ps_rate_control_api->e_rc_type == VBR_STORAGE_DVD_COMP) + { + WORD32 i4_rem_bits_in_gop, i4_rem_frms_in_gop, i; + WORD32 i4_cur_frm_max_bit_possible, + ai4_rem_frms_in_gop[MAX_PIC_TYPE]; + irc_pic_type_get_rem_frms_in_gop( + ps_rate_control_api->ps_pic_handling, + ai4_rem_frms_in_gop); + i4_rem_bits_in_gop = irc_get_rem_bits_in_period( + ps_rate_control_api); + i4_rem_frms_in_gop = 0; + for(i = 0; i < MAX_PIC_TYPE; i++) + i4_rem_frms_in_gop += ai4_rem_frms_in_gop[i]; + + /* Threshold the bits based on estimated buffer fullness */ + i4_cur_frm_max_bit_possible = irc_get_max_tgt_bits_dvd_comp( + ps_rate_control_api->ps_vbr_storage_vbv, + i4_rem_bits_in_gop, i4_rem_frms_in_gop, + e_pic_type); + + if(i4_cur_est_bits > i4_cur_frm_max_bit_possible) + { + /* Assuming header would consume the same amount of bits */ + i4_cur_est_texture_bits = i4_cur_frm_max_bit_possible + - i4_cur_est_header_bits; + + } + } + else if(ps_rate_control_api->e_rc_type == CBR_NLDRC) + { + WORD32 i4_cur_frm_bits_acc_buffer = + irc_cbr_buffer_constraint_check( + ps_rate_control_api->ps_cbr_buffer, + i4_cur_est_bits, e_pic_type); + + /* Assuming the header would consume the same amount of bits */ + i4_cur_est_texture_bits = i4_cur_frm_bits_acc_buffer + - i4_cur_est_header_bits; + + } + else if(ps_rate_control_api->e_rc_type == VBR_STREAMING) + { + WORD32 i4_cur_frm_bits_acc_buffer = + irc_vbr_stream_buffer_constraint_check( + ps_rate_control_api->ps_cbr_buffer, + i4_cur_est_bits, e_pic_type); + + /* Assuming the header would consume the same amount of bits */ + i4_cur_est_texture_bits = i4_cur_frm_bits_acc_buffer + - i4_cur_est_header_bits; + } + + trace_printf((const WORD8*)"emtb = %d, ", i4_cur_est_texture_bits); + + /* + * If the estimated texture bits go to values less than zero + * due to buffer underflow, make the estimated target bits to go + * to zero + */ + if(i4_cur_est_texture_bits < 0) + i4_cur_est_texture_bits = 0; + + ps_rate_control_api->i4_prev_frm_est_bits = (i4_cur_est_texture_bits + + i4_cur_est_header_bits); + + /* Clip est_texture_bits according to the user-defined max value */ + if((i4_cur_est_texture_bits + > (i4_ud_max_bits - i4_cur_est_header_bits)) + && (e_pic_type != I_PIC)) + { + i4_cur_est_texture_bits = (i4_ud_max_bits + - i4_cur_est_header_bits); + trace_printf((const WORD8*)"udcb = %d, ", + i4_ud_max_bits - i4_cur_est_header_bits); + } + + /* Calculate the estimated SAD for corresponding frame*/ + u4_estimated_sad = irc_get_est_sad(ps_rate_control_api->ps_est_sad, + e_pic_type); + + /* Query the model for the Qp for the corresponding frame*/ + + /* + * The check is because the model gives a negative QP when the + * i4_cur_est_texture_bits is less than or equal to 0 + * [This is a bug in the model]. As a temporary fix, the frame QP + * is being set to the max QP allowed + */ + if(i4_cur_est_texture_bits > 0) + { + u1_frame_qp = irc_find_qp_for_target_bits( + ps_rate_control_api->aps_rd_model[e_pic_type], + i4_cur_est_texture_bits, + u4_estimated_sad, + ps_rate_control_api->au1_min_max_qp[(e_pic_type + << 1)], + ps_rate_control_api->au1_min_max_qp[(e_pic_type + << 1) + 1]); + } + else + { + u1_frame_qp = ps_rate_control_api->au1_min_max_qp[(e_pic_type + << 1) + 1]; + } + + trace_printf((const WORD8*)"ehb %d, etb %d, fqp %d, es %d, eb %d, ", + i4_cur_est_header_bits, i4_cur_est_texture_bits, + u1_frame_qp, u4_estimated_sad, i4_cur_est_bits); + + /* Restricting the QP swing if the average bit rate has changed */ + if(ps_rate_control_api->au1_avg_bitrate_changed[e_pic_type] == 0) + { + WORD32 prev_qp; + WORD32 hi_dev_qp, lo_dev_qp; + /* Restricting the qp swing */ + prev_qp = ps_rate_control_api->au1_prev_frm_qp[ps_rate_control_api->prev_ref_pic_type]; + + if(ps_rate_control_api->prev_ref_pic_type != e_pic_type) + { + if(e_pic_type == I_PIC) + { + /* + * Constrain I-frame QP to be within specified limit of + * prev_ref_qp/Kp + */ + prev_qp = (P_TO_I_RATIO * prev_qp + (1 << (K_Q - 1))) + >> (K_Q); + } + else if(e_pic_type == P_PIC) + { + /* + * Constrain P-frame QP to be within specified limit of + * Kp*prev_ref_qp + */ + prev_qp = (I_TO_P_RATIO * prev_qp + (1 << (K_Q - 1))) + >> (K_Q); + } + else if(ps_rate_control_api->prev_ref_pic_type == P_PIC) + { + /* current frame is B-pic */ + /* Constrain B-frame QP to be within specified limit of + * prev_ref_qp/Kb + */ + prev_qp = (P_TO_B_RATIO * prev_qp + (1 << (K_Q - 1))) + >> (K_Q); + } + else /* if(ps_rate_control_api->prev_ref_pic_type == I_PIC*/ + { + /* current frame is B-pic */ + /* + * Constrain B-frame QP to be within specified limit of + * prev_ref_qp/Kb + */ + prev_qp = (P_TO_B_RATIO * I_TO_P_RATIO * prev_qp + + (1 << (K_Q + K_Q - 1))) + >> (K_Q + K_Q); + } + } + + hi_dev_qp = GET_HI_DEV_QP(prev_qp); + /* + * For lower QPs due to scale factor and fixed point arithmetic, + * the hi_dev_qp can be same as that of the prev qp and in which + * case it gets stuck in the lower most qp and thus not allowing + * QPs not to change. To avoid this,for lower qps the hi_dev_qp + * should be made slightly more than prev_qp + */ + if(prev_qp == hi_dev_qp) + { + hi_dev_qp += 1; + } + lo_dev_qp = GET_LO_DEV_QP(prev_qp); + u1_frame_qp = (UWORD8)CLIP_QP((WORD32)u1_frame_qp, hi_dev_qp, lo_dev_qp); + } + else + { + ps_rate_control_api->au1_avg_bitrate_changed[e_pic_type] = 0; + } + } + else + { + /* + * The u1_is_first_frm_coded gets reset + * a) at start of sequence + * b) whenever there is a scene change. + * In both cases since we do not have any estimate about the + * current frame, we just send in the previous frame qp value.IN + * Scene change case the previous QP is incremented by 4 , This is + * done because the Scene changed VOP will have over consumed and + * chances of future frames skipping is very high. For the init + * case, the previous frame QP is initialized with the init qp + */ + if((ps_rate_control_api->u1_scd_detected) + && (ps_rate_control_api->e_rc_type != CONST_QP)) + { + /* + * If scene change is detected, I frame Qp would have been + * updated + */ + /* Use a QP calculated in the prev update fxn */ + u1_frame_qp = ps_rate_control_api->u1_frm_qp_after_scd; + } + else + { + u1_frame_qp = ps_rate_control_api->au1_prev_frm_qp[e_pic_type]; + } + } + } + else + { + u1_frame_qp = ps_rate_control_api->au1_init_qp[e_pic_type]; + } + + trace_printf((const WORD8*)"fqp %d\n", u1_frame_qp); + + return (u1_frame_qp); +} + +/******************************************************************************* + *Function Name : irc_get_buffer_status + *Description : Gets the state of VBV buffer + *Outputs : 0 = normal, 1 = underflow, 2= overflow + *Returns : vbv_buf_status_e + ******************************************************************************/ +vbv_buf_status_e irc_get_buffer_status(rate_control_api_t *ps_rate_control_api, + WORD32 i4_total_frame_bits, + picture_type_e e_pic_type, + WORD32 *pi4_num_bits_to_prevent_vbv_underflow) +{ + vbv_buf_status_e e_buf_status = VBV_NORMAL; + + /* Get the buffer status for the current total consumed bits and error bits*/ + if(ps_rate_control_api->e_rc_type == VBR_STORAGE_DVD_COMP) + { + e_buf_status = irc_get_vbv_buffer_status( + ps_rate_control_api->ps_vbr_storage_vbv, + i4_total_frame_bits, + pi4_num_bits_to_prevent_vbv_underflow); + + trace_printf((const WORD8*)"e_buf_status = %d\n", e_buf_status); + } + else if(ps_rate_control_api->e_rc_type == VBR_STORAGE) + { + /* For VBR case since there is not underflow returning the max value */ + pi4_num_bits_to_prevent_vbv_underflow[0] = irc_get_max_vbv_buf_size( + ps_rate_control_api->ps_vbr_storage_vbv); + e_buf_status = VBV_NORMAL; + } + else if(ps_rate_control_api->e_rc_type == CBR_NLDRC) + { + e_buf_status = irc_get_cbr_buffer_status( + ps_rate_control_api->ps_cbr_buffer, i4_total_frame_bits, + pi4_num_bits_to_prevent_vbv_underflow, e_pic_type); + + } + else if(ps_rate_control_api->e_rc_type == VBR_STREAMING) + { + /* For VBR_streaming, error bits are computed according to peak bitrate*/ + e_buf_status = irc_get_cbr_buffer_status( + ps_rate_control_api->ps_cbr_buffer, i4_total_frame_bits, + pi4_num_bits_to_prevent_vbv_underflow, e_pic_type); + } + return e_buf_status; +} + +/******************************************************************************* + Function Name : irc_update_pic_handling_state + Description : If the forward path and the backward path of rate control + ******************************************************************************/ +void irc_update_pic_handling_state(rate_control_api_t *ps_rate_control_api, + picture_type_e e_pic_type) +{ + irc_update_pic_handling(ps_rate_control_api->ps_pic_handling, e_pic_type); +} + +/****************************************************************************** + Function Name : irc_update_frame_level_info + Description : Updates the frame level information into the rate control + structure + ******************************************************************************/ +void irc_update_frame_level_info(rate_control_api_t *ps_rate_control_api, + picture_type_e e_pic_type, + WORD32 *pi4_mb_type_sad, + WORD32 i4_total_frame_bits, + WORD32 i4_model_updation_hdr_bits, + WORD32 *pi4_mb_type_tex_bits, + WORD32 *pi4_tot_mb_type_qp, + WORD32 *pi4_tot_mb_in_type, + WORD32 i4_avg_activity, + UWORD8 u1_is_scd, + WORD32 i4_is_it_a_skip, + WORD32 i4_intra_frm_cost, + WORD32 i4_is_pic_handling_done) +{ + UWORD8 u1_num_skips = 0; + WORD32 i; + UWORD32 u4_frame_sad = 0; + WORD32 i4_tot_texture_bits = 0; + WORD32 i4_tot_mbs = 0; + WORD32 i4_avg_qp = 0; + + /* SCD not supported in case of IPB encoder */ + if(u1_is_scd && (irc_pic_type_get_inter_frame_interval( + ps_rate_control_api->ps_pic_handling) > 1)) + { + u1_is_scd = 0; + } + trace_printf((const WORD8*)"i4_total_frame_bits %d\n", i4_total_frame_bits); + + if(!i4_is_it_a_skip && !i4_is_pic_handling_done) + { + /* Update the pic_handling struct */ + irc_update_pic_handling(ps_rate_control_api->ps_pic_handling, + e_pic_type); + } + + if(ps_rate_control_api->e_rc_type != CONST_QP) + { + if(!i4_is_it_a_skip) + { + WORD32 i4_new_period_flag; + /****************************************************************** + Calculate the total values from the individual values + ******************************************************************/ + for(i = 0; i < MAX_MB_TYPE; i++) + u4_frame_sad += pi4_mb_type_sad[i]; + for(i = 0; i < MAX_MB_TYPE; i++) + i4_tot_texture_bits += pi4_mb_type_tex_bits[i]; + for(i = 0; i < MAX_MB_TYPE; i++) + i4_avg_qp += pi4_tot_mb_type_qp[i]; + for(i = 0; i < MAX_MB_TYPE; i++) + i4_tot_mbs += pi4_tot_mb_in_type[i]; + i4_avg_qp /= i4_tot_mbs; /* Calculate the average QP */ + + if(ps_rate_control_api->u1_is_mb_level_rc_on) + { + /* + * The model needs to take into consideration the average + * activity of the entire frame while estimating the QP. Thus + * the frame sad values are scaled by the average activity + * before updating it into the model. + */ + if(!i4_avg_activity) + i4_avg_activity = 1; + i4_intra_frm_cost *= i4_avg_activity; + u4_frame_sad *= i4_avg_activity; + } + + /****************************************************************** + Update the bit allocation module + NOTE: For bit allocation module, the pic_type should not be + modified to that of 'I', in case of a SCD. + ******************************************************************/ + i4_new_period_flag = irc_is_last_frame_in_gop( + ps_rate_control_api->ps_pic_handling); + irc_ba_update_cur_frm_consumed_bits( + ps_rate_control_api->ps_bit_allocation, + ps_rate_control_api->ps_pic_handling, + i4_total_frame_bits, i4_model_updation_hdr_bits, + e_pic_type, u1_is_scd, i4_new_period_flag); + + if(1 == i4_new_period_flag + && ((ps_rate_control_api->e_rc_type == VBR_STORAGE) + || (ps_rate_control_api->e_rc_type + == VBR_STORAGE_DVD_COMP))) + { + irc_ba_check_and_update_bit_allocation( + ps_rate_control_api->ps_bit_allocation, + ps_rate_control_api->ps_pic_handling, + irc_get_cur_vbv_buf_size( + ps_rate_control_api->ps_vbr_storage_vbv), + irc_get_max_vbv_buf_size( + ps_rate_control_api->ps_vbr_storage_vbv), + irc_get_max_bits_per_tgt_frm( + ps_rate_control_api->ps_vbr_storage_vbv), + i4_total_frame_bits); + } + } + + /********************************************************************** + Update the buffer status + *********************************************************************/ + /* + * This update is done after overflow and underflow handling to + * account for the actual bits dumped + */ + if((ps_rate_control_api->e_rc_type == VBR_STORAGE) + || (ps_rate_control_api->e_rc_type + == VBR_STORAGE_DVD_COMP)) + { + irc_update_vbr_vbv(ps_rate_control_api->ps_vbr_storage_vbv, + i4_total_frame_bits); + } + else if(ps_rate_control_api->e_rc_type == CBR_NLDRC) + { + irc_update_cbr_buffer(ps_rate_control_api->ps_cbr_buffer, + i4_total_frame_bits, e_pic_type); + } + else if(ps_rate_control_api->e_rc_type == VBR_STREAMING) + { + UWORD32 au4_num_pics_in_delay_prd[MAX_PIC_TYPE]; + + irc_get_vsp_num_pics_in_dly_prd( + &ps_rate_control_api->s_vbr_str_prms, + au4_num_pics_in_delay_prd); + + irc_update_cbr_buffer(ps_rate_control_api->ps_cbr_buffer, + i4_total_frame_bits, e_pic_type); + + irc_update_vbr_str_prms(&ps_rate_control_api->s_vbr_str_prms, + e_pic_type); + + irc_change_cbr_vbv_num_pics_in_delay_period( + ps_rate_control_api->ps_cbr_buffer, + au4_num_pics_in_delay_prd); + + /* + * If the change_in_peak_bitrate flag is set, after the delay period + * update the peak_bitrate and the buffer parameters + */ + if(!ps_rate_control_api->u4_frms_in_delay_prd_for_peak_bit_rate_change) + { + irc_ba_change_ba_peak_bit_rate( + ps_rate_control_api->ps_bit_allocation, + (WORD32 *)&ps_rate_control_api->au4_new_peak_bit_rate[0]); + irc_change_cbr_vbv_bit_rate( + ps_rate_control_api->ps_cbr_buffer, + (WORD32 *)&ps_rate_control_api->au4_new_peak_bit_rate[0]); + } + if(ps_rate_control_api->u4_frms_in_delay_prd_for_peak_bit_rate_change) + ps_rate_control_api->u4_frms_in_delay_prd_for_peak_bit_rate_change--; + } + + if(!i4_is_it_a_skip) + { + /******************************************************************* + Handle the SCENE CHANGE DETECTED + 1) Make the picture type as I, so that updation happens as if it is + an I frame + 2) Reset model, SAD and flag to restart the estimation process + ******************************************************************/ + if(u1_is_scd) + { + WORD32 i4_frm_qp_after_scd; + UWORD32 u4_prev_I_frm_sad; + + e_pic_type = I_PIC; + + /* Scale scd qp based on SCD Frm sad and previous I Frm sad */ + /* frm_qp_after_scd = (avg_qp * cur_frm_sad)/prev_I_frm_sad */ + + /* + * QP for the next frame should take care of + * 1) due to scene change, the current picture has consumed more + * bits + * 2) relative complexity of the previous scene and the current + * scene + */ + + /* Get the intra SAD for the previous scene */ + u4_prev_I_frm_sad = irc_get_est_sad( + ps_rate_control_api->ps_est_sad, I_PIC); + + /* + * Scale the QP based on the SAD ratio of the current pic and + * previous scene intra SAD + */ + X_PROD_Y_DIV_Z(i4_avg_qp, u4_frame_sad, u4_prev_I_frm_sad, + i4_frm_qp_after_scd); + + /* Limit the next frame qp by 50% across both the sides */ + if(i4_frm_qp_after_scd > ((i4_avg_qp * 3) >> 1)) + { + i4_frm_qp_after_scd = (i4_avg_qp * 3) >> 1; + } + else if(i4_frm_qp_after_scd < (i4_avg_qp >> 1)) + { + i4_frm_qp_after_scd = (i4_avg_qp >> 1); + } + + /* + * Ensure that the next frame QP is within the min_max limit of + * QP allowed + */ + if(i4_frm_qp_after_scd + > ps_rate_control_api->au1_min_max_qp[(e_pic_type + << 1) + 1]) + { + i4_frm_qp_after_scd = + ps_rate_control_api->au1_min_max_qp[(e_pic_type + << 1) + 1]; + } + else if(i4_frm_qp_after_scd + < ps_rate_control_api->au1_min_max_qp[(e_pic_type + << 1)]) + { + i4_frm_qp_after_scd = + ps_rate_control_api->au1_min_max_qp[(e_pic_type + << 1)]; + } + + /* Update the state var */ + ps_rate_control_api->u1_frm_qp_after_scd = + (UWORD8)i4_frm_qp_after_scd; + + /* re-set model */ + for(i = 0; i < MAX_PIC_TYPE; i++) + { + irc_reset_frm_rc_rd_model( + ps_rate_control_api->aps_rd_model[i]); + } + + /* Reset the SAD estimation module */ + irc_reset_est_sad(ps_rate_control_api->ps_est_sad); + + /* Reset flag */ + for(i = 0; i < MAX_PIC_TYPE; i++) + { + ps_rate_control_api->au1_is_first_frm_coded[i] = 0; + } + + /* Reset the MB Rate control */ + irc_init_mb_level_rc(ps_rate_control_api->ps_mb_rate_control); + + /*Set u1_scd_detected flag*/ + ps_rate_control_api->u1_scd_detected = 1; + + /* + * Adjust the average QP for the frame based on bits + * consumption + */ + /* + * Initialize the QP for each picture type according to the + * average QP of the SCD pic + */ + ps_rate_control_api->au1_prev_frm_qp[I_PIC] = (UWORD8)i4_avg_qp; + + trace_printf((const WORD8*)"SCD DETECTED\n"); + } + else + { + ps_rate_control_api->u1_scd_detected = 0; + /************************************************************** + Update the Qp used by the current frame + **************************************************************/ + ps_rate_control_api->au1_prev_frm_qp[e_pic_type] = + (UWORD8)i4_avg_qp; + } + + /******************************************************************** + Update the model of the correponding picture type + NOTE: For SCD, we force the frame type from 'P' to that of a 'I' + ******************************************************************/ + /* + * For very simple sequences no bits are consumed by texture. These + * frames do not add any information to the model and so not added + */ + if(i4_tot_texture_bits && u4_frame_sad) + { + irc_add_frame_to_rd_model( + ps_rate_control_api->aps_rd_model[e_pic_type], + i4_tot_texture_bits, (UWORD8)i4_avg_qp, + u4_frame_sad, u1_num_skips); + + /* + * At least one proper frame in added into the model. Until that + * keep using the initial QP + */ + ps_rate_control_api->au1_is_first_frm_coded[e_pic_type] = 1; + } + + if(i4_avg_activity) + { + /* Update the mb_level model */ + irc_mb_update_frame_level( + ps_rate_control_api->ps_mb_rate_control, + i4_avg_activity); + } + + /****************************************************************** + Update the sad estimation module + NOTE: For SCD, we force the frame type from 'P' to that of a 'I' + ******************************************************************/ + if(u4_frame_sad) + { + irc_update_actual_sad(ps_rate_control_api->ps_est_sad, + u4_frame_sad, e_pic_type); + + irc_update_actual_sad_for_intra(ps_rate_control_api->ps_est_sad, + i4_intra_frm_cost); + } + + /* + * Update the variable which denotes that a frame has been + * encountered + */ + ps_rate_control_api->u1_is_first_frm = 0; + + } + } + + /* Store the prev encoded picture type for restricting Qp swing */ + if((e_pic_type == I_PIC) || (e_pic_type == P_PIC)) + { + ps_rate_control_api->prev_ref_pic_type = e_pic_type; + } + + trace_printf((const WORD8*)"ft %d,hb %d,tb %d,qp %d,fs %d\n", e_pic_type, + i4_model_updation_hdr_bits, i4_tot_texture_bits, i4_avg_qp, + u4_frame_sad); + + return; +} + +/******************************************************************************* + MB Level API functions + ******************************************************************************/ + +/****************************************************************************** + Function Name : irc_init_mb_rc_frame_level + Description : Initialise the frame level details required for a mb level + ******************************************************************************/ + +void irc_init_mb_rc_frame_level(rate_control_api_t *ps_rate_control_api, + UWORD8 u1_frame_qp) +{ + irc_mb_init_frame_level(ps_rate_control_api->ps_mb_rate_control, + u1_frame_qp); +} + +/****************************************************************************** + Function Name : irc_get_mb_level_qp + Description : Get the mb level qp + *****************************************************************************/ +void irc_get_mb_level_qp(rate_control_api_t *ps_rate_control_api, + WORD32 i4_cur_mb_activity, + WORD32 *pi4_mb_qp, + picture_type_e e_pic_type) +{ + if(ps_rate_control_api->u1_is_mb_level_rc_on) + { + irc_get_mb_qp(ps_rate_control_api->ps_mb_rate_control, + i4_cur_mb_activity, pi4_mb_qp); + + /* Truncating the QP to the Max and Min Qp values possible */ + if(pi4_mb_qp[1] < ps_rate_control_api->au1_min_max_qp[e_pic_type << 1]) + { + pi4_mb_qp[1] = ps_rate_control_api->au1_min_max_qp[e_pic_type << 1]; + } + if(pi4_mb_qp[1] + > ps_rate_control_api->au1_min_max_qp[(e_pic_type << 1) + + 1]) + { + pi4_mb_qp[1] = ps_rate_control_api->au1_min_max_qp[(e_pic_type << 1) + + 1]; + } + } + else + { + WORD32 i4_qp; + i4_qp = irc_get_frm_level_qp(ps_rate_control_api->ps_mb_rate_control); + /* Both the qp are used for */ + pi4_mb_qp[0] = i4_qp; /* Used as feedback for the rate control */ + pi4_mb_qp[1] = i4_qp; /* Used for quantising the MB*/ + } +} + +/**************************************************************************** + Function Name : irc_get_bits_to_stuff + Description : Gets the bits to stuff to prevent Underflow of Encoder Buffer + *****************************************************************************/ +WORD32 irc_get_bits_to_stuff(rate_control_api_t *ps_rate_control_api, + WORD32 i4_tot_consumed_bits, + picture_type_e e_pic_type) +{ + WORD32 i4_bits_to_stuff; + /* Get the CBR bits to stuff*/ + i4_bits_to_stuff = irc_get_cbr_bits_to_stuff( + ps_rate_control_api->ps_cbr_buffer, i4_tot_consumed_bits, + e_pic_type); + return i4_bits_to_stuff; +} + +/**************************************************************************** + Function Name : irc_get_prev_frm_est_bits + Description : Returns previous frame estimated bits + *****************************************************************************/ +WORD32 irc_get_prev_frm_est_bits(rate_control_api_t *ps_rate_control_api) +{ + return (ps_rate_control_api->i4_prev_frm_est_bits); +} + +/****************************************************************************** + Control Level API functions + Logic: The control call sets the state structure of the rate control api + accordingly such that the next process call would implement the same. + ******************************************************************************/ + +void irc_change_inter_frm_int_call(rate_control_api_t *ps_rate_control_api, + WORD32 i4_inter_frm_int) +{ + irc_pic_handling_register_new_inter_frm_interval( + ps_rate_control_api->ps_pic_handling, i4_inter_frm_int); +} + +void irc_change_intra_frm_int_call(rate_control_api_t *ps_rate_control_api, + WORD32 i4_intra_frm_int) +{ + irc_pic_handling_register_new_int_frm_interval( + ps_rate_control_api->ps_pic_handling, i4_intra_frm_int); + + if(ps_rate_control_api->e_rc_type == VBR_STREAMING) + { + irc_change_vsp_ifi(&ps_rate_control_api->s_vbr_str_prms, + i4_intra_frm_int); + } +} + +/**************************************************************************** + Function Name : irc_change_avg_bit_rate + Description : Whenever the average bit rate changes, the excess bits is + between the changed bit rate and the old one is re-distributed + in the bit allocation module + *****************************************************************************/ +void irc_change_avg_bit_rate(rate_control_api_t *ps_rate_control_api, + UWORD32 u4_average_bit_rate) +{ + int i; + if(ps_rate_control_api->e_rc_type != CONST_QP) + { + /* + * Bit Allocation Module: distribute the excess/deficit bits between the + * old and the new frame rate to all the remaining frames + */ + irc_ba_change_remaining_bits_in_period( + ps_rate_control_api->ps_bit_allocation, + ps_rate_control_api->ps_pic_handling, + u4_average_bit_rate, + irc_ba_get_frame_rate( + ps_rate_control_api->ps_bit_allocation), + (WORD32 *)(ps_rate_control_api->au4_new_peak_bit_rate)); + } + if(ps_rate_control_api->e_rc_type == CBR_NLDRC) + { + UWORD32 u4_average_bit_rate_copy[MAX_NUM_DRAIN_RATES]; + for(i = 0; i < MAX_NUM_DRAIN_RATES; i++) + { + u4_average_bit_rate_copy[i] = u4_average_bit_rate; + } + irc_change_cbr_vbv_bit_rate(ps_rate_control_api->ps_cbr_buffer, + (WORD32 *)(u4_average_bit_rate_copy)); + } + + /* + * This is done only for average bitrate changing somewhere after the model + * stabilizes.Here it is assumed that user will not do this call after + * first few frames. If we dont have this check, what would happen is since + * the model has not stabilized, also bitrate has changed before the first + * frame, we dont restrict the qp. Qp can go to very bad values after init + * qp since if swing is disabled. + * This check will become buggy if change bitrate is called say somewhere + * after first two frames.Bottom line - RC init is done during create and + * this call is done just before first process.And we want to differentiate + * between this call done before first process and the call which is done + * during run time + */ + if(ps_rate_control_api->u1_is_first_frm == 0) + { + for(i = 0; i < MAX_PIC_TYPE; i++) + { + ps_rate_control_api->au1_avg_bitrate_changed[i] = 1; + } + } +} + +/**************************************************************************** + Function Name : irc_change_frame_rate + Description : Does the necessary changes whenever there is a change in + frame rate + *****************************************************************************/ +void irc_change_frame_rate(rate_control_api_t *ps_rate_control_api, + UWORD32 u4_frame_rate, + UWORD32 u4_src_ticks, + UWORD32 u4_tgt_ticks) +{ + + if(ps_rate_control_api->e_rc_type != CONST_QP) + { + UWORD32 u4_frms_in_delay_prd = ((u4_frame_rate + * irc_get_cbr_buffer_delay( + ps_rate_control_api->ps_cbr_buffer)) + / 1000000); + if((ps_rate_control_api->e_rc_type == VBR_STORAGE) + || (ps_rate_control_api->e_rc_type + == VBR_STORAGE_DVD_COMP)) + { + irc_change_vbr_vbv_frame_rate( + ps_rate_control_api->ps_vbr_storage_vbv, + u4_frame_rate); + } + else if(ps_rate_control_api->e_rc_type == CBR_NLDRC) + { + irc_change_cbr_vbv_tgt_frame_rate( + ps_rate_control_api->ps_cbr_buffer, u4_frame_rate); + } + else if(ps_rate_control_api->e_rc_type == VBR_STREAMING) + { + UWORD32 au4_num_pics_in_delay_prd[MAX_PIC_TYPE]; + irc_change_vsp_tgt_ticks(&ps_rate_control_api->s_vbr_str_prms, + u4_tgt_ticks); + irc_change_vsp_src_ticks(&ps_rate_control_api->s_vbr_str_prms, + u4_src_ticks); + irc_change_vsp_fidp(&ps_rate_control_api->s_vbr_str_prms, + u4_frms_in_delay_prd); + + irc_get_vsp_num_pics_in_dly_prd( + &ps_rate_control_api->s_vbr_str_prms, + au4_num_pics_in_delay_prd); + irc_change_cbr_vbv_tgt_frame_rate( + ps_rate_control_api->ps_cbr_buffer, u4_frame_rate); + irc_change_cbr_vbv_num_pics_in_delay_period( + ps_rate_control_api->ps_cbr_buffer, + au4_num_pics_in_delay_prd); + } + + /* + * Bit Allocation Module: distribute the excess/deficit bits between the + * old and the new frame rate to all the remaining frames + */ + irc_ba_change_remaining_bits_in_period( + ps_rate_control_api->ps_bit_allocation, + ps_rate_control_api->ps_pic_handling, + irc_ba_get_bit_rate( + ps_rate_control_api->ps_bit_allocation), + u4_frame_rate, + (WORD32 *)(ps_rate_control_api->au4_new_peak_bit_rate)); + } +} + +/**************************************************************************** + Function Name : irc_change_frm_rate_for_bit_alloc + Description : Does the necessary changes only in the bit_allocation module + there is a change in frame rate + *****************************************************************************/ +void irc_change_frm_rate_for_bit_alloc(rate_control_api_t *ps_rate_control_api, + UWORD32 u4_frame_rate) +{ + + if(ps_rate_control_api->e_rc_type != CONST_QP) + { + /* + * Bit Allocation Module: distribute the excess/deficit bits between the + * old and the new frame rate to all the remaining frames + */ + irc_ba_change_remaining_bits_in_period( + ps_rate_control_api->ps_bit_allocation, + ps_rate_control_api->ps_pic_handling, + irc_ba_get_bit_rate( + ps_rate_control_api->ps_bit_allocation), + u4_frame_rate, + (WORD32 *)(ps_rate_control_api->au4_new_peak_bit_rate)); + + if(ps_rate_control_api->e_rc_type == VBR_STORAGE + || ps_rate_control_api->e_rc_type + == VBR_STORAGE_DVD_COMP) + { + irc_change_vbr_max_bits_per_tgt_frm( + ps_rate_control_api->ps_vbr_storage_vbv, + u4_frame_rate); + } + } +} + +void irc_change_init_qp(rate_control_api_t *ps_rate_control_api, + UWORD8 *pu1_init_qp) +{ + WORD32 i; + /* Initialize the init_qp */ + for(i = 0; i < MAX_PIC_TYPE; i++) + { + ps_rate_control_api->au1_init_qp[i] = pu1_init_qp[i]; + ps_rate_control_api->au1_prev_frm_qp[i] = pu1_init_qp[i]; + } +} + +void irc_change_min_max_qp(rate_control_api_t *ps_rate_control_api, + UWORD8 *pu1_min_max_qp) +{ + WORD32 i; + for(i = 0; i < MAX_PIC_TYPE; i++) + { + ps_rate_control_api->au1_min_max_qp[(i << 1)] = + pu1_min_max_qp[(i << 1)]; + ps_rate_control_api->au1_min_max_qp[(i << 1) + 1] = pu1_min_max_qp[(i + << 1) + 1]; + } +} + +/**************************************************************************** + Function Name : irc_change_peak_bit_rate + Description : Does the necessary changes whenever there is a change in + peak bit rate + *****************************************************************************/ +WORD32 irc_change_peak_bit_rate(rate_control_api_t *ps_rate_control_api, + UWORD32 *pu4_peak_bit_rate) +{ + WORD32 i4_ret_val = RC_OK; + int i; + + /* + * Buffer Mechanism Module: Re-initialize the number of bits consumed per + * frame + */ + if(ps_rate_control_api->e_rc_type == VBR_STORAGE + || ps_rate_control_api->e_rc_type == VBR_STORAGE_DVD_COMP) + { + /* Send the new peak bit rate and the old frame rate */ + irc_change_vbr_vbv_bit_rate(ps_rate_control_api->ps_vbr_storage_vbv, + pu4_peak_bit_rate[0]); + irc_ba_change_ba_peak_bit_rate(ps_rate_control_api->ps_bit_allocation, + (WORD32 *)pu4_peak_bit_rate); + + for(i = 0; i < MAX_NUM_DRAIN_RATES; i++) + { + ps_rate_control_api->au4_new_peak_bit_rate[i] = + pu4_peak_bit_rate[i]; + } + } + else if(ps_rate_control_api->e_rc_type == VBR_STREAMING) + { + if(ps_rate_control_api->u4_frms_in_delay_prd_for_peak_bit_rate_change) + { + /* + * Means that change in peak bit rate has been made twice before the + * previous change could take effect + */ + i4_ret_val = RC_BENIGN_ERR; + } + /* + * If the change happens before encoding the first frame make the + * effect immediately else delay the effect + */ + if(ps_rate_control_api->u1_is_first_frm) + { + for(i = 0; i < MAX_NUM_DRAIN_RATES; i++) + { + ps_rate_control_api->au4_new_peak_bit_rate[i] = + pu4_peak_bit_rate[i]; + } + irc_ba_change_ba_peak_bit_rate( + ps_rate_control_api->ps_bit_allocation, + (WORD32 *)pu4_peak_bit_rate); + irc_change_cbr_vbv_bit_rate(ps_rate_control_api->ps_cbr_buffer, + (WORD32 *)pu4_peak_bit_rate); + } + else + { + UWORD32 au4_num_pics_in_delay_prd[MAX_NUM_DRAIN_RATES]; + /* + * Else store the number of frames after which the effect should + * happen and then update the peak bitrate + */ + ps_rate_control_api->u4_frms_in_delay_prd_for_peak_bit_rate_change = + irc_get_vsp_num_pics_in_dly_prd( + &ps_rate_control_api->s_vbr_str_prms, + au4_num_pics_in_delay_prd); + for(i = 0; i < MAX_NUM_DRAIN_RATES; i++) + { + ps_rate_control_api->au4_new_peak_bit_rate[i] = + pu4_peak_bit_rate[i]; + } + } + } + + return (i4_ret_val); +} + +void irc_change_buffer_delay(rate_control_api_t *ps_rate_control_api, + UWORD32 u4_buffer_delay) +{ + UWORD32 u4_frms_in_delay_prd = ((irc_ba_get_frame_rate( + ps_rate_control_api->ps_bit_allocation) * u4_buffer_delay) + / 1000000); + + /* Initialize the rate control modules */ + if(ps_rate_control_api->e_rc_type == CBR_NLDRC) + { + irc_change_cbr_buffer_delay(ps_rate_control_api->ps_cbr_buffer, + u4_buffer_delay); + } + else if(ps_rate_control_api->e_rc_type == VBR_STORAGE + || ps_rate_control_api->e_rc_type == VBR_STORAGE_DVD_COMP) + { + UWORD32 au4_num_pics_in_delay_prd[MAX_PIC_TYPE]; + + irc_change_vsp_fidp(&ps_rate_control_api->s_vbr_str_prms, + u4_frms_in_delay_prd); + + /* Get the number of pics of each type in delay period */ + irc_get_vsp_num_pics_in_dly_prd(&ps_rate_control_api->s_vbr_str_prms, + au4_num_pics_in_delay_prd); + + irc_change_cbr_vbv_num_pics_in_delay_period( + ps_rate_control_api->ps_cbr_buffer, + au4_num_pics_in_delay_prd); + } +} + +/* Getter functions to get the current rate control parameters */ +UWORD32 irc_get_frame_rate(rate_control_api_t *ps_rate_control_api) +{ + return (irc_ba_get_frame_rate(ps_rate_control_api->ps_bit_allocation)); +} + +UWORD32 irc_get_bit_rate(rate_control_api_t *ps_rate_control_api) +{ + return (irc_ba_get_bit_rate(ps_rate_control_api->ps_bit_allocation)); +} + +UWORD32 irc_get_peak_bit_rate(rate_control_api_t *ps_rate_control_api, + WORD32 i4_index) +{ + return (ps_rate_control_api->au4_new_peak_bit_rate[i4_index]); +} + +UWORD32 irc_get_intra_frame_interval(rate_control_api_t *ps_rate_control_api) +{ + return (irc_pic_type_get_intra_frame_interval( + ps_rate_control_api->ps_pic_handling)); +} + +UWORD32 irc_get_inter_frame_interval(rate_control_api_t *ps_rate_control_api) +{ + return (irc_pic_type_get_inter_frame_interval( + ps_rate_control_api->ps_pic_handling)); +} + +rc_type_e irc_get_rc_type(rate_control_api_t *ps_rate_control_api) +{ + return (ps_rate_control_api->e_rc_type); +} + +WORD32 irc_get_bits_per_frame(rate_control_api_t *ps_rate_control_api) +{ + WORD32 i4_bits_per_frm; + + X_PROD_Y_DIV_Z(irc_ba_get_bit_rate(ps_rate_control_api->ps_bit_allocation), + (UWORD32)1000, + irc_ba_get_frame_rate(ps_rate_control_api->ps_bit_allocation), + i4_bits_per_frm); + + return (i4_bits_per_frm); +} + +UWORD32 irc_get_max_delay(rate_control_api_t *ps_rate_control_api) +{ + return (irc_get_cbr_buffer_delay(ps_rate_control_api->ps_cbr_buffer)); +} + +UWORD32 irc_get_seq_no(rate_control_api_t *ps_rate_control_api) +{ + return (irc_pic_type_get_disp_order_no(ps_rate_control_api->ps_pic_handling)); +} + +UWORD32 irc_get_rem_frames_in_gop(rate_control_api_t *ps_rate_control_api) +{ + WORD32 ai4_rem_frms_in_period[MAX_PIC_TYPE]; + WORD32 j; + UWORD32 u4_rem_frms_in_period = 0; + + /* Get the rem_frms_in_gop & the frms_in_gop from the pic_type state struct */ + irc_pic_type_get_rem_frms_in_gop(ps_rate_control_api->ps_pic_handling, + ai4_rem_frms_in_period); + + /* Depending on the number of gops in a period, find the num_frms_in_prd */ + for(j = 0; j < MAX_PIC_TYPE; j++) + { + u4_rem_frms_in_period += ai4_rem_frms_in_period[j]; + } + + return (u4_rem_frms_in_period); +} + +/**************************************************************************** + Function Name : irc_flush_buf_frames + Description : API call to flush the buffered up frames + *****************************************************************************/ +void irc_flush_buf_frames(rate_control_api_t *ps_rate_control_api) +{ + irc_flush_frame_from_pic_stack(ps_rate_control_api->ps_pic_handling); +} + +/**************************************************************************** + Function Name : irc_flush_buf_frames + Description : API call to flush the buffered up frames + *****************************************************************************/ + +void irc_post_encode_frame_skip(rate_control_api_t *ps_rate_control_api, + picture_type_e e_pic_type) +{ + irc_skip_encoded_frame(ps_rate_control_api->ps_pic_handling, e_pic_type); +} + +/**************************************************************************** + Function Name : irc_force_I_frame + Description : API call to force an I frame + *****************************************************************************/ +void irc_force_I_frame(rate_control_api_t *ps_rate_control_api) +{ + irc_set_force_I_frame_flag(ps_rate_control_api->ps_pic_handling); +} + +/**************************************************************************** + * Function Name : rc_get_rem_bits_in_gop + * Description : API call to get remaining bits in GOP + * *****************************************************************************/ +WORD32 irc_get_rem_bits_in_period(rate_control_api_t *ps_rate_control_api) +{ + return (irc_ba_get_rem_bits_in_period( + ps_rate_control_api->ps_bit_allocation, + ps_rate_control_api->ps_pic_handling)); +} + +/**************************************************************************** + * Function Name : irc_get_vbv_buf_fullness + * Description : API call to get VBV buffer fullness + ******************************************************************************/ +WORD32 irc_get_vbv_buf_fullness(rate_control_api_t *ps_rate_control_api) +{ + return (irc_get_cur_vbv_buf_size(ps_rate_control_api->ps_vbr_storage_vbv)); +} + +WORD32 irc_get_vbv_buf_size(rate_control_api_t *ps_rate_control_api) +{ + if(ps_rate_control_api->e_rc_type == CBR_NLDRC + || ps_rate_control_api->e_rc_type == VBR_STREAMING) + { + return (irc_get_cbr_buffer_size(ps_rate_control_api->ps_cbr_buffer)); + } + else + { + return (irc_get_max_vbv_buf_size( + ps_rate_control_api->ps_vbr_storage_vbv)); + } +} + +WORD32 irc_get_vbv_fulness_with_cur_bits(rate_control_api_t *ps_rate_control_api, + UWORD32 u4_bits) +{ + return (irc_vbv_get_vbv_buf_fullness( + ps_rate_control_api->ps_vbr_storage_vbv, u4_bits)); +} + +void irc_set_avg_mb_act(rate_control_api_t *ps_rate_control_api, + WORD32 i4_avg_activity) +{ + irc_mb_update_frame_level(ps_rate_control_api->ps_mb_rate_control, + i4_avg_activity); + return; +} diff --git a/encoder/irc_rate_control_api.h b/encoder/irc_rate_control_api.h new file mode 100755 index 0000000..0173037 --- /dev/null +++ b/encoder/irc_rate_control_api.h @@ -0,0 +1,188 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +#ifndef _RATE_CONTROL_API_H_ +#define _RATE_CONTROL_API_H_ + +#define RC_OK 0 +#define RC_FAIL -1 +#define RC_BENIGN_ERR -2 + +/* This file should only contain RC API function declarations */ + +typedef struct rate_control_api_t *rate_control_handle; + +WORD32 irc_rate_control_num_fill_use_free_memtab(rate_control_handle *pps_rate_control_api, + itt_memtab_t *ps_memtab, + ITT_FUNC_TYPE_E e_func_type); + +void irc_initialise_rate_control(rate_control_handle ps_rate_control_api, + rc_type_e e_rate_control_type, + UWORD8 u1_is_mb_level_rc_on, + UWORD32 u4_avg_bit_rate, + UWORD32 *pu4_peak_bit_rate, + UWORD32 u4_min_bit_rate, + UWORD32 u4_frame_rate, + UWORD32 u4_max_delay, + UWORD32 u4_intra_frame_interval, + UWORD8 *pu1_init_qp, + UWORD32 u4_max_vbv_buff_size, + WORD32 i4_max_inter_frm_int, + WORD32 i4_is_gop_closed, + UWORD8 *pu1_min_max_qp, + WORD32 i4_use_est_intra_sad, + UWORD32 u4_src_ticks, + UWORD32 u4_tgt_ticks); + +/***************************************************************************** + Process level API fuctions (FRAME LEVEL) + *****************************************************************************/ +void irc_flush_buf_frames(rate_control_handle ps_rate_control_api); + +void irc_post_encode_frame_skip(rate_control_handle ps_rate_control_api, + picture_type_e e_pic_type); + +void irc_add_picture_to_stack(rate_control_handle rate_control_api, + WORD32 i4_enc_pic_id); + +void irc_add_picture_to_stack_re_enc(rate_control_handle rate_control_api, + WORD32 i4_enc_pic_id, + picture_type_e e_pic_type); + +void irc_get_picture_details(rate_control_handle rate_control_api, + WORD32 *pi4_pic_id, + WORD32 *pi4_pic_disp_order_no, + picture_type_e *pe_pic_type); + +/* Gets the frame level Qp */ +UWORD8 irc_get_frame_level_qp(rate_control_handle rate_control_api, + picture_type_e pic_type, + WORD32 i4_max_frm_bits); + +vbv_buf_status_e irc_get_buffer_status(rate_control_handle rate_control_api, + WORD32 i4_total_frame_bits, + picture_type_e e_pic_type, + WORD32 *pi4_num_bits_to_prevent_vbv_underflow); + +WORD32 irc_get_prev_frm_est_bits(rate_control_handle ps_rate_control_api); + +void irc_update_pic_handling_state(rate_control_handle ps_rate_control_api, + picture_type_e e_pic_type); + +void irc_update_frame_level_info(rate_control_handle ps_rate_control_api, + picture_type_e e_pic_type, + WORD32 *pi4_mb_type_sad, + WORD32 i4_total_frame_bits, + WORD32 i4_model_updation_hdr_bits, + WORD32 *pi4_mb_type_tex_bits, + WORD32 *pi4_tot_mb_type_qp, + WORD32 *pi4_tot_mb_in_type, + WORD32 i4_avg_activity, + UWORD8 u1_is_scd, + WORD32 i4_is_it_a_skip, + WORD32 i4_intra_frm_cost, + WORD32 i4_is_pic_handling_done); + +/***************************************************************************** + MB LEVEL API (just wrapper fucntions) + *****************************************************************************/ + +void irc_init_mb_rc_frame_level(rate_control_handle ps_rate_control_api, + UWORD8 u1_frame_qp);/* Current frame qp*/ + +void irc_get_mb_level_qp(rate_control_handle ps_rate_control_api, + WORD32 i4_cur_mb_activity, + WORD32 *pi4_mb_qp, + picture_type_e e_pic_type); + +WORD32 irc_get_bits_to_stuff(rate_control_handle ps_rate_control_api, + WORD32 i4_tot_consumed_bits, + picture_type_e e_pic_type); + +/****************************************************************************** + Control Level API functions + Logic: The control call sets the state structure of the rate control api + accordingly such that the next process call would implement the same. + ******************************************************************************/ + +void irc_change_inter_frm_int_call(rate_control_handle ps_rate_control_api, + WORD32 i4_inter_frm_int); + +void irc_change_intra_frm_int_call(rate_control_handle ps_rate_control_api, + WORD32 i4_intra_frm_int); + +void irc_change_avg_bit_rate(rate_control_handle ps_rate_control_api, + UWORD32 u4_average_bit_rate); + +void irc_change_frame_rate(rate_control_handle ps_rate_control_api, + UWORD32 u4_frame_rate, + UWORD32 u4_src_ticks, + UWORD32 u4_target_ticks); + +void irc_change_frm_rate_for_bit_alloc(rate_control_handle ps_rate_control_api, + UWORD32 u4_frame_rate); + +void irc_change_init_qp(rate_control_handle ps_rate_control_api, + UWORD8 *init_qp); + +WORD32 irc_change_peak_bit_rate(rate_control_handle ps_rate_control_api, + UWORD32 *u4_peak_bit_rate); + +void irc_change_buffer_delay(rate_control_handle ps_rate_control_api, + UWORD32 u4_buffer_delay); + +void irc_force_I_frame(rate_control_handle ps_rate_control_api); + +void irc_change_min_max_qp(rate_control_handle ps_rate_control_api, + UWORD8 *u1_min_max_qp); + +/******************************************************************************** + Getter functions + For getting the current state of the rate control structures + ********************************************************************************/ + +UWORD32 irc_get_frame_rate(rate_control_handle ps_rate_control_api); + +UWORD32 irc_get_bit_rate(rate_control_handle ps_rate_control_api); + +UWORD32 irc_get_intra_frame_interval(rate_control_handle ps_rate_control_api); + +UWORD32 irc_get_inter_frame_interval(rate_control_handle ps_rate_control_api); + +rc_type_e irc_get_rc_type(rate_control_handle ps_rate_control_api); + +WORD32 irc_get_bits_per_frame(rate_control_handle ps_rate_control_api); + +UWORD32 irc_get_peak_bit_rate(rate_control_handle ps_rate_control_api, + WORD32 i4_index); + +UWORD32 irc_get_max_delay(rate_control_handle ps_rate_control_api); + +UWORD32 irc_get_seq_no(rate_control_handle ps_rate_control_api); + +WORD32 irc_get_rem_bits_in_period(rate_control_handle ps_rate_control_api); + +WORD32 irc_get_vbv_buf_fullness(rate_control_handle ps_rate_control_api); + +WORD32 irc_get_vbv_buf_size(rate_control_handle ps_rate_control_api); + +WORD32 irc_get_vbv_fulness_with_cur_bits(rate_control_handle ps_rate_control_api, + UWORD32 u4_bits); +#endif diff --git a/encoder/irc_rate_control_api_structs.h b/encoder/irc_rate_control_api_structs.h new file mode 100755 index 0000000..ba39e7f --- /dev/null +++ b/encoder/irc_rate_control_api_structs.h @@ -0,0 +1,93 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +#ifndef _RATE_CONTROL_API_STRUCTS_H_ +#define _RATE_CONTROL_API_STRUCTS_H_ + +/* + * The following definitions were present in irc_cntrl_param.h, moved to this + * file as it is used by irc_rate_control_api.c + */ + +/* num_frm_in_period = BIT_ALLOC_PERIOD*intra_frame_interval */ +#define VBR_BIT_ALLOC_PERIOD 3 +#define CBR_BIT_ALLOC_PERIOD 1 + +/* Rate control state structure */ +typedef struct rate_control_api_t +{ + /* RC Algorithm */ + rc_type_e e_rc_type; + + /* Whether MB level rc is enabled or not */ + UWORD8 u1_is_mb_level_rc_on; + + /* Picture handling struct */ + pic_handling_handle ps_pic_handling; + + /* Model struct for I and P frms */ + rc_rd_model_handle aps_rd_model[MAX_PIC_TYPE]; + + /* VBR storage VBV structure */ + vbr_storage_vbv_handle ps_vbr_storage_vbv; + + /* Calculate the estimated SAD */ + est_sad_handle ps_est_sad; + + /* Allocation of bits for each frame */ + bit_allocation_handle ps_bit_allocation; + + /* Init Qp(also used for Const Qp scenarios) */ + UWORD8 au1_init_qp[MAX_PIC_TYPE]; + + /* MB Level rate control state structure */ + mb_rate_control_handle ps_mb_rate_control; + + UWORD8 au1_is_first_frm_coded[MAX_PIC_TYPE]; + + UWORD8 au1_prev_frm_qp[MAX_PIC_TYPE]; + + cbr_buffer_handle ps_cbr_buffer; + + UWORD8 u1_scd_detected; + + UWORD8 u1_frm_qp_after_scd; + + UWORD8 au1_avg_bitrate_changed[MAX_PIC_TYPE]; + + UWORD8 u1_is_first_frm; + + UWORD8 au1_min_max_qp[(MAX_PIC_TYPE << 1)]; + + WORD32 i4_prev_frm_est_bits; + + vbr_str_prms_t s_vbr_str_prms; + + /* Store the values which are to be impacted after a delay */ + UWORD32 u4_frms_in_delay_prd_for_peak_bit_rate_change; + + UWORD32 au4_new_peak_bit_rate[MAX_NUM_DRAIN_RATES]; + + picture_type_e prev_ref_pic_type; + +} rate_control_api_t; + +#endif/*_RATE_CONTROL_API_STRUCTS_H_*/ + diff --git a/encoder/irc_rd_model.c b/encoder/irc_rd_model.c new file mode 100755 index 0000000..f5c0737 --- /dev/null +++ b/encoder/irc_rd_model.c @@ -0,0 +1,565 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/****************************************************************************/ +/* File Name : irc_rd_model.c */ +/* */ +/* Description : Implall the Functions to Model the */ +/* Rate Distortion Behaviour of the Codec over the Last */ +/* Few Frames. */ +/* */ +/* List of Functions : irc_update_frame_rd_model */ +/* estimate_mpeg2_qp_for_resbits */ +/* */ +/* Issues / Problems : None */ +/* */ +/* Revision History : */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 21 06 2006 Sarat Initial Version */ +/****************************************************************************/ + +/* System include files */ +#include <stdarg.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "math.h" + +/* User include files */ +#include "irc_datatypes.h" +#include "irc_common.h" +#include "irc_mem_req_and_acq.h" +#include "irc_rd_model.h" +#include "irc_rd_model_struct.h" + + +WORD32 irc_rd_model_num_fill_use_free_memtab(rc_rd_model_t **pps_rc_rd_model, + itt_memtab_t *ps_memtab, + ITT_FUNC_TYPE_E e_func_type) +{ + WORD32 i4_mem_tab_idx = 0; + static rc_rd_model_t s_rc_rd_model_temp; + + /* + * Hack for al alloc, during which we don't have any state memory. + * Dereferencing can cause issues + */ + if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB) + (*pps_rc_rd_model) = &s_rc_rd_model_temp; + + /*for src rate control state structure*/ + if(e_func_type != GET_NUM_MEMTAB) + { + fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(rc_rd_model_t), + ALIGN_128_BYTE, PERSISTENT, DDR); + use_or_fill_base(&ps_memtab[0], (void**)pps_rc_rd_model, e_func_type); + } + i4_mem_tab_idx++; + + return (i4_mem_tab_idx); +} + +void irc_init_frm_rc_rd_model(rc_rd_model_t *ps_rd_model, + UWORD8 u1_max_frames_modelled) +{ + + ps_rd_model->u1_num_frms_in_model = 0; + ps_rd_model->u1_curr_frm_counter = 0; + ps_rd_model->u1_max_frms_to_model = u1_max_frames_modelled; + + ps_rd_model->model_coeff_a_lin_wo_int = 0; + ps_rd_model->model_coeff_b_lin_wo_int = 0; + ps_rd_model->model_coeff_c_lin_wo_int = 0; +} + +void irc_reset_frm_rc_rd_model(rc_rd_model_t *ps_rd_model) +{ + ps_rd_model->u1_num_frms_in_model = 0; + ps_rd_model->u1_curr_frm_counter = 0; + + ps_rd_model->model_coeff_a_lin_wo_int = 0; + ps_rd_model->model_coeff_b_lin_wo_int = 0; + ps_rd_model->model_coeff_c_lin_wo_int = 0; +} + +static UWORD8 find_model_coeffs(UWORD32 *pi4_res_bits, + UWORD32 *pi4_sad_h264, + UWORD8 *pu1_num_skips, + UWORD8 *pui_avg_mpeg2_qp, + UWORD8 u1_num_frms, + UWORD8 u1_model_used, + WORD8 *pi1_frame_index, + model_coeff *pmc_model_coeff, + model_coeff *pmc_model_coeff_lin, + model_coeff *pmc_model_coeff_lin_wo_int, + rc_rd_model_t *ps_rd_model) +{ + UWORD32 i; + UWORD8 u1_num_frms_used = 0; + UWORD8 u1_frm_indx; + +#if !(ENABLE_QUAD_RC_MODEL||ENABLE_LIN_MODEL_WITH_INTERCEPT) + UNUSED(pu1_num_skips); + UNUSED(pmc_model_coeff); + UNUSED(pmc_model_coeff_lin); +#endif + float sum_y = 0; + float sum_x_y = 0; + float sum_x2_y = 0; + float sum_x = 0; + float sum_x2 = 0; + float sum_x3 = 0; + float sum_x4 = 0; + + float x0, y0; + float model_coeff_a = 0.0, model_coeff_b = 0.0, model_coeff_c = 0.0; + + for(i = 0; i < u1_num_frms; i++) + { + if(-1 == pi1_frame_index[i]) + continue; + + u1_frm_indx = (UWORD8)pi1_frame_index[i]; + + y0 = (float)(pi4_res_bits[u1_frm_indx]); + x0 = (float)(pi4_sad_h264[u1_frm_indx] + / (float)pui_avg_mpeg2_qp[u1_frm_indx]); + + sum_y += y0; + sum_x_y += x0 * y0; + sum_x2_y += x0 * x0 * y0; + sum_x += x0; + sum_x2 += x0 * x0; + sum_x3 += x0 * x0 * x0; + sum_x4 += x0 * x0 * x0 * x0; + u1_num_frms_used++; + } + + sum_y /= u1_num_frms_used; + sum_x_y /= u1_num_frms_used; + sum_x2_y /= u1_num_frms_used; + sum_x /= u1_num_frms_used; + sum_x2 /= u1_num_frms_used; + sum_x3 /= u1_num_frms_used; + sum_x4 /= u1_num_frms_used; + + { + UWORD8 u1_curr_frame_index; + UWORD8 u1_avgqp_prvfrm; + UWORD32 u4_prevfrm_bits, u4_prevfrm_sad; + + u1_curr_frame_index = ps_rd_model->u1_curr_frm_counter; + if(0 == u1_curr_frame_index) + u1_curr_frame_index = (MAX_FRAMES_MODELLED - 1); + else + u1_curr_frame_index--; + + u1_avgqp_prvfrm = ps_rd_model->pu1_avg_qp[u1_curr_frame_index]; + u4_prevfrm_bits = ps_rd_model->pi4_res_bits[u1_curr_frame_index]; + u4_prevfrm_sad = ps_rd_model->pi4_sad[u1_curr_frame_index]; + + if(0 != u4_prevfrm_sad) + model_coeff_a = (float)(u4_prevfrm_bits * u1_avgqp_prvfrm) + / u4_prevfrm_sad; + else + model_coeff_a = 0; + + model_coeff_b = 0; + model_coeff_c = 0; + + pmc_model_coeff_lin_wo_int[0] = model_coeff_b; + pmc_model_coeff_lin_wo_int[1] = model_coeff_a; + pmc_model_coeff_lin_wo_int[2] = model_coeff_c; + } + + return u1_model_used; +} + +static void irc_update_frame_rd_model(rc_rd_model_t *ps_rd_model) +{ + WORD8 pi1_frame_index[MAX_FRAMES_MODELLED], + pi1_frame_index_initial[MAX_FRAMES_MODELLED]; + + UWORD8 u1_num_skips_temp; + UWORD8 u1_avg_mpeg2_qp_temp, u1_min_mpeg2_qp, u1_max_mpeg2_qp; + UWORD8 u1_num_frms_input, u1_num_active_frames, u1_reject_frame; + UWORD32 u4_num_skips; + + UWORD8 u1_min2_mpeg2_qp, u1_max2_mpeg2_qp; + UWORD8 u1_min_qp_frame_indx, u1_max_qp_frame_indx; + UWORD8 pu1_num_frames[MPEG2_QP_ELEM]; + model_coeff model_coeff_array[3], model_coeff_array_lin[3], + model_coeff_array_lin_wo_int[3]; + UWORD32 i; + UWORD8 u1_curr_frame_index; + + u1_curr_frame_index = ps_rd_model->u1_curr_frm_counter; + + ps_rd_model->u1_model_used = PREV_FRAME_MODEL; + + if(0 == u1_curr_frame_index) + u1_curr_frame_index = (MAX_FRAMES_MODELLED - 1); + else + u1_curr_frame_index--; + + /************************************************************************/ + /* Rearrange data to be fed into a Linear Regression Module */ + /* Module finds a,b,c such that */ + /* y = ax + bx^2 + c */ + /************************************************************************/ + u4_num_skips = 0; + u1_num_frms_input = 0; + memset(pu1_num_frames, 0, MPEG2_QP_ELEM); + memset(pi1_frame_index, -1, MAX_FRAMES_MODELLED); + u1_min_mpeg2_qp = MAX_MPEG2_QP; + u1_max_mpeg2_qp = 0; + + u1_num_active_frames = ps_rd_model->u1_num_frms_in_model; + if(u1_num_active_frames > MAX_ACTIVE_FRAMES) + { + u1_num_active_frames = MAX_ACTIVE_FRAMES; + } + + /************************************************************************/ + /* Choose the set of Points to be used for MSE fit of Quadratic model */ + /* Points chosen are spread across the Qp range. Max of 2 points are */ + /* chosen for a Qp. */ + /************************************************************************/ + for(i = 0; i < u1_num_active_frames; i++) + { + u1_reject_frame = 0; + u1_num_skips_temp = ps_rd_model->pu1_num_skips[u1_curr_frame_index]; + u1_avg_mpeg2_qp_temp = ps_rd_model->pu1_avg_qp[u1_curr_frame_index]; + + if((0 == u4_num_skips) && (0 != u1_num_skips_temp)) + u1_reject_frame = 1; + if((1 == u4_num_skips) && (u1_num_skips_temp > 1)) + u1_reject_frame = 1; + if(pu1_num_frames[u1_avg_mpeg2_qp_temp] >= 2) + u1_reject_frame = 1; + + if(0 == i) + u1_reject_frame = 0; + + if(0 == u1_reject_frame) + { + pi1_frame_index[u1_num_frms_input] = (WORD8)u1_curr_frame_index; + pu1_num_frames[u1_avg_mpeg2_qp_temp] += 1; + + if(u1_min_mpeg2_qp > u1_avg_mpeg2_qp_temp) + u1_min_mpeg2_qp = u1_avg_mpeg2_qp_temp; + if(u1_max_mpeg2_qp < u1_avg_mpeg2_qp_temp) + u1_max_mpeg2_qp = u1_avg_mpeg2_qp_temp; + + u1_num_frms_input++; + } + + if(0 == u1_curr_frame_index) + u1_curr_frame_index = (MAX_FRAMES_MODELLED - 1); + else + u1_curr_frame_index--; + } + + /************************************************************************/ + /* Add Pivot Points to the Data set to be used for finding Quadratic */ + /* Model Coeffs. These will help in constraining the shape of Quadratic*/ + /* to adapt too much to the Local deviations. */ + /************************************************************************/ + u1_min2_mpeg2_qp = u1_min_mpeg2_qp; + u1_max2_mpeg2_qp = u1_max_mpeg2_qp; + u1_min_qp_frame_indx = INVALID_FRAME_INDEX; + u1_max_qp_frame_indx = INVALID_FRAME_INDEX; + + /* Loop runnning over the Stored Frame Level Data + to find frames of MinQp and MaxQp */ + for(; i < ps_rd_model->u1_num_frms_in_model; i++) + { + u1_num_skips_temp = ps_rd_model->pu1_num_skips[u1_curr_frame_index]; + u1_avg_mpeg2_qp_temp = ps_rd_model->pu1_avg_qp[u1_curr_frame_index]; + + if(((0 == u4_num_skips) && (0 != u1_num_skips_temp)) + || ((1 == u4_num_skips) && (u1_num_skips_temp > 1))) + continue; + + if(u1_min2_mpeg2_qp > u1_avg_mpeg2_qp_temp) + { + u1_min2_mpeg2_qp = u1_avg_mpeg2_qp_temp; + u1_min_qp_frame_indx = u1_curr_frame_index; + } + if(u1_max2_mpeg2_qp < u1_avg_mpeg2_qp_temp) + { + u1_max2_mpeg2_qp = u1_avg_mpeg2_qp_temp; + u1_max_qp_frame_indx = u1_curr_frame_index; + } + if(0 == u1_curr_frame_index) + u1_curr_frame_index = (MAX_FRAMES_MODELLED - 1); + else + u1_curr_frame_index--; + } + + /* Add the Chosen Points to the regression data set */ + if(INVALID_FRAME_INDEX != u1_min_qp_frame_indx) + { + pi1_frame_index[u1_num_frms_input] = (WORD8)u1_min_qp_frame_indx; + u1_num_frms_input++; + } + if(INVALID_FRAME_INDEX != u1_max_qp_frame_indx) + { + pi1_frame_index[u1_num_frms_input] = (WORD8)u1_max_qp_frame_indx; + u1_num_frms_input++; + } + memcpy(pi1_frame_index_initial, pi1_frame_index, MAX_FRAMES_MODELLED); + + /***** Call the Module to Return the Coeffs for the Fed Data *****/ + ps_rd_model->u1_model_used = find_model_coeffs(ps_rd_model->pi4_res_bits, + ps_rd_model->pi4_sad, + ps_rd_model->pu1_num_skips, + ps_rd_model->pu1_avg_qp, + u1_num_frms_input, + ps_rd_model->u1_model_used, + pi1_frame_index, + model_coeff_array, + model_coeff_array_lin, + model_coeff_array_lin_wo_int, + ps_rd_model); + + ps_rd_model->model_coeff_b_lin_wo_int = model_coeff_array_lin_wo_int[0]; + ps_rd_model->model_coeff_a_lin_wo_int = model_coeff_array_lin_wo_int[1]; + ps_rd_model->model_coeff_c_lin_wo_int = model_coeff_array_lin_wo_int[2]; +} + +UWORD32 irc_estimate_bits_for_qp(rc_rd_model_t *ps_rd_model, + UWORD32 u4_estimated_sad, + UWORD8 u1_avg_qp) +{ + float fl_num_bits = 0; + + fl_num_bits = ps_rd_model->model_coeff_a_lin_wo_int + * ((float)(u4_estimated_sad / u1_avg_qp)); + + return ((UWORD32)fl_num_bits); +} + +UWORD8 irc_find_qp_for_target_bits(rc_rd_model_t *ps_rd_model, + UWORD32 u4_target_res_bits, + UWORD32 u4_estimated_sad, + UWORD8 u1_min_qp, + UWORD8 u1_max_qp) +{ + UWORD8 u1_qp; + float x_value = 1.0, f_qp; + + ps_rd_model->u1_model_used = PREV_FRAME_MODEL; + + { + x_value = (float)u4_target_res_bits + / ps_rd_model->model_coeff_a_lin_wo_int; + } + + if(0 != x_value) + f_qp = u4_estimated_sad / x_value; + else + f_qp = 255; + + if(f_qp > 255) + f_qp = 255; + + /* Truncating the QP to the Max and Min Qp values possible */ + if(f_qp < u1_min_qp) + f_qp = u1_min_qp; + if(f_qp > u1_max_qp) + f_qp = u1_max_qp; + + u1_qp = (UWORD8)(f_qp + 0.5); + + return u1_qp; +} + +void irc_add_frame_to_rd_model(rc_rd_model_t *ps_rd_model, + UWORD32 i4_res_bits, + UWORD8 u1_avg_mp2qp, + UWORD32 i4_sad_h264, + UWORD8 u1_num_skips) +{ + UWORD8 u1_curr_frame_index; + u1_curr_frame_index = ps_rd_model->u1_curr_frm_counter; + + /*Insert the Present Frame Data into the RD Model State Memory*/ + ps_rd_model->pi4_res_bits[u1_curr_frame_index] = i4_res_bits; + ps_rd_model->pi4_sad[u1_curr_frame_index] = i4_sad_h264; + ps_rd_model->pu1_num_skips[u1_curr_frame_index] = u1_num_skips; + ps_rd_model->pu1_avg_qp[u1_curr_frame_index] = u1_avg_mp2qp; + + ps_rd_model->u1_curr_frm_counter++; + if(MAX_FRAMES_MODELLED == ps_rd_model->u1_curr_frm_counter) + ps_rd_model->u1_curr_frm_counter = 0; + + if(ps_rd_model->u1_num_frms_in_model < ps_rd_model->u1_max_frms_to_model) + { + ps_rd_model->u1_num_frms_in_model++; + } + irc_update_frame_rd_model(ps_rd_model); +} + +/***************************************************************************** + *Function Name : irc_calc_per_frm_bits + *Description : + *Inputs : pu2_num_pics_of_a_pic_type + * - pointer to RC api pointer + * pu2_num_pics_of_a_pic_type + * - N1, N2,...Nk + * pu1_update_pic_type_model + * - flag which tells whether or not to update model + * coefficients of a particular pic-type + * u1_num_pic_types + * - value of k + * pu4_num_skip_of_a_pic_type + * - the number of skips of that pic-type. It "may" be used to + * update the model coefficients at a later point. Right now + * it is not being used at all. + * u1_base_pic_type + * - base pic type index wrt which alpha & beta are calculated + * pfl_gamma + * - gamma_i = beta_i / alpha_i + * pfl_eta + * - + * u1_curr_pic_type + * - the current pic-type for which the targetted bits need to + * be computed + * u4_bits_for_sub_gop + * - the number of bits to be consumed for the remaining part of + * sub-gop + * u4_curr_estimated_sad + * - + * pu1_curr_pic_type_qp + * - output of this function + *****************************************************************************/ + +WORD32 irc_calc_per_frm_bits(rc_rd_model_t *ps_rd_model, + UWORD16 *pu2_num_pics_of_a_pic_type, + UWORD8 *pu1_update_pic_type_model, + UWORD8 u1_num_pic_types, + UWORD32 *pu4_num_skip_of_a_pic_type, + UWORD8 u1_base_pic_type, + float *pfl_gamma, + float *pfl_eta, + UWORD8 u1_curr_pic_type, + UWORD32 u4_bits_for_sub_gop, + UWORD32 u4_curr_estimated_sad, + UWORD8 *pu1_curr_pic_type_qp) +{ + WORD32 i4_per_frm_bits_Ti; + UWORD8 u1_i; + rc_rd_model_t *ps_rd_model_of_pic_type; + + UNUSED(pu4_num_skip_of_a_pic_type); + UNUSED(u1_base_pic_type); + + /* First part of this function updates all the model coefficients */ + /*for all the pic-types */ + { + for(u1_i = 0; u1_i < u1_num_pic_types; u1_i++) + { + if((0 != pu2_num_pics_of_a_pic_type[u1_i]) + && (1 == pu1_update_pic_type_model[u1_i])) + { + irc_update_frame_rd_model(&ps_rd_model[u1_i]); + } + } + } + + /* + * The second part of this function deals with solving the + * equation using all the pic-types models + */ + { + UWORD8 u1_combined_model_used; + + /* solve the equation */ + { + model_coeff eff_A; + float fl_sad_by_qp_base; + float fl_sad_by_qp_curr_frm = 1.0; + float fl_qp_curr_frm; + float fl_bits_for_curr_frm = 0; + + + + /* If the combined chosen model is linear model without an intercept */ + + u1_combined_model_used = PREV_FRAME_MODEL; + { + eff_A = 0.0; + + for(u1_i = 0; u1_i < u1_num_pic_types; u1_i++) + { + ps_rd_model_of_pic_type = ps_rd_model + u1_i; + + eff_A += ((pfl_eta[u1_i] + + pu2_num_pics_of_a_pic_type[u1_i]- 1) + * ps_rd_model_of_pic_type->model_coeff_a_lin_wo_int + * pfl_gamma[u1_i]); + } + + fl_sad_by_qp_base = u4_bits_for_sub_gop / eff_A; + + fl_sad_by_qp_curr_frm = fl_sad_by_qp_base + * pfl_gamma[u1_curr_pic_type] + * pfl_eta[u1_curr_pic_type]; + + ps_rd_model_of_pic_type = ps_rd_model + u1_curr_pic_type; + + fl_bits_for_curr_frm = + ps_rd_model_of_pic_type->model_coeff_a_lin_wo_int + * fl_sad_by_qp_curr_frm; + } + + /* + * Store the model that was finally used to calculate Qp. + * This is so that the same model is used in further calculations + * for this picture. + */ + ps_rd_model_of_pic_type = ps_rd_model + u1_curr_pic_type; + ps_rd_model_of_pic_type->u1_model_used = u1_combined_model_used; + + i4_per_frm_bits_Ti = (WORD32)(fl_bits_for_curr_frm + 0.5); + + if(fl_sad_by_qp_curr_frm > 0) + fl_qp_curr_frm = (float)u4_curr_estimated_sad + / fl_sad_by_qp_curr_frm; + else + fl_qp_curr_frm = 255; + + if(fl_qp_curr_frm > 255) + fl_qp_curr_frm = 255; + + *pu1_curr_pic_type_qp = (fl_qp_curr_frm + 0.5); + + } + } + return (i4_per_frm_bits_Ti); +} + +model_coeff irc_get_linear_coefficient(rc_rd_model_t *ps_rd_model) +{ + return (ps_rd_model->model_coeff_a_lin_wo_int); +} + + diff --git a/encoder/irc_rd_model.h b/encoder/irc_rd_model.h new file mode 100755 index 0000000..8be31c1 --- /dev/null +++ b/encoder/irc_rd_model.h @@ -0,0 +1,98 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/*****************************************************************************/ +/* File Name : irc_rd_model.h */ +/* */ +/* Description : Implements all the Functions to Model the */ +/* Rate Distortion Behaviour of the Codec over the Last */ +/* Few Frames. */ +/* */ +/* List of Functions : irc_update_frame_rd_model */ +/* estimate_mpeg2_qp_for_resbits */ +/* update_mb_rd_model */ +/* find_model_coeffs */ +/* refine_set_of_points */ +/* init_mb_rd_model */ +/* irc_add_frame_to_rd_model */ +/* irc_find_qp_for_target_bits */ +/* */ +/* */ +/* Issues / Problems : None */ +/* */ +/* Revision History : */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 21 06 2006 Sarat Initial Version */ +/*****************************************************************************/ + +#ifndef RC_RD_MODEL +#define RC_RD_MODEL + +#define MAX_FRAMES_MODELLED 16 + +typedef float model_coeff; +typedef struct rc_rd_model_t *rc_rd_model_handle; + +WORD32 irc_rd_model_num_fill_use_free_memtab(rc_rd_model_handle *pps_rc_rd_model, + itt_memtab_t *ps_memtab, + ITT_FUNC_TYPE_E e_func_type); +/* Interface Functions */ +/* Initialise the rate distortion model */ +void irc_init_frm_rc_rd_model(rc_rd_model_handle ps_rd_model, + UWORD8 u1_max_frames_modelled); + +/* Reset the rate distortion model */ +void irc_reset_frm_rc_rd_model(rc_rd_model_handle ps_rd_model); + +/* Returns the Qp to be used for the given bits and SAD */ +UWORD8 irc_find_qp_for_target_bits(rc_rd_model_handle ps_rd_model, + UWORD32 u4_target_res_bits, + UWORD32 u4_estimated_sad, + UWORD8 u1_max_qp, + UWORD8 u1_min_qp); + +/* Updates the frame level statistics after encoding a frame */ +void irc_add_frame_to_rd_model(rc_rd_model_handle ps_rd_model, + UWORD32 i4_res_bits, + UWORD8 u1_avg_mp2qp, + UWORD32 i4_sad_h264, + UWORD8 u1_num_skips); + +UWORD32 irc_estimate_bits_for_qp(rc_rd_model_handle ps_rd_model, + UWORD32 u4_estimated_sad, + UWORD8 u1_avg_qp); + +/* Get the Linear model coefficient */ +model_coeff irc_get_linear_coefficient(rc_rd_model_handle ps_rd_model); + +WORD32 irc_calc_per_frm_bits(rc_rd_model_handle ps_rd_model, + UWORD16 *pu2_num_pics_of_a_pic_type, + UWORD8 *pu1_update_pic_type_model, + UWORD8 u1_num_pic_types, + UWORD32 *pu4_num_skip_of_a_pic_type, + UWORD8 u1_base_pic_type, + float *pfl_gamma, + float *pfl_eta, + UWORD8 u1_curr_pic_type, + UWORD32 u4_bits_for_sub_gop, + UWORD32 u4_curr_estimated_sad, + UWORD8 *pu1_curr_pic_type_qp); +#endif + diff --git a/encoder/irc_rd_model_struct.h b/encoder/irc_rd_model_struct.h new file mode 100755 index 0000000..dc4c0ea --- /dev/null +++ b/encoder/irc_rd_model_struct.h @@ -0,0 +1,75 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +#ifndef RC_RD_MODEL_STRUCT +#define RC_RD_MODEL_STRUCT + +/*Enable or diable QUAD model*/ +#define ENABLE_QUAD_RC_MODEL 0 +#define ENABLE_LIN_MODEL_WITH_INTERCEPT 0 + +/* Number of elements for QP */ +#define MPEG2_QP_ELEM (MAX_MPEG2_QP + 1) + + +#if ENABLE_QUAD_RC_MODEL +#define QUAD 1 +#define MIN_FRAMES_FOR_QUAD_MODEL 5 +#endif + +#define MAX_ACTIVE_FRAMES 16 +#define MIN_FRAMES_FOR_LIN_MODEL 3 +#define INVALID_FRAME_INDEX 255 + +#define UP_THR_SM 1 /* (1 /pow(2,4) = 0.0625 */ +#define UP_THR_E 4 + +#define LO_THR_SM 368 /* (368.64 / pow(2,14)) = 0.0225 */ +#define LO_THR_E 14 + +#define LIN_DEV_THR_SM 1 /* (1 / pow(1,2)) = .25*/ +#define LIN_DEV_THR_E 2 + +#define PREV_FRAME_MODEL 2 + +/* Q Factors used for fixed point calculation */ +#define Q_FORMAT_GAMMA 8 +#define Q_FORMAT_ETA 8 + +typedef struct rc_rd_model_t +{ + UWORD8 u1_curr_frm_counter; + UWORD8 u1_num_frms_in_model; + UWORD8 u1_max_frms_to_model; + UWORD8 u1_model_used; + + UWORD32 pi4_res_bits[MAX_FRAMES_MODELLED]; + UWORD32 pi4_sad[MAX_FRAMES_MODELLED]; + + UWORD8 pu1_num_skips[MAX_FRAMES_MODELLED]; + UWORD8 pu1_avg_qp[MAX_FRAMES_MODELLED]; + UWORD8 au1_num_frames[MPEG2_QP_ELEM]; + + model_coeff model_coeff_a_lin_wo_int; + model_coeff model_coeff_b_lin_wo_int; + model_coeff model_coeff_c_lin_wo_int; +} rc_rd_model_t; + +#endif /* RC_RD_MODEL_STRUCT */ diff --git a/encoder/irc_trace_support.h b/encoder/irc_trace_support.h new file mode 100755 index 0000000..c35bd4f --- /dev/null +++ b/encoder/irc_trace_support.h @@ -0,0 +1,61 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +******************************************************************************* +* @file +* ih264e_trace_support.h +* +* @brief +* This file contains extern declarations of routines that could be helpful +* for debugging purposes. +* +* @author +* Harish +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef TRACE_SUPPORT_H_ +#define TRACE_SUPPORT_H_ + +/*****************************************************************************/ +/* Structures */ +/*****************************************************************************/ + +typedef struct +{ + WORD8 * pu1_buf; + WORD32 i4_offset; + WORD32 i4_max_size; +}trace_support_t; + +/*****************************************************************************/ +/* Extern function declarations */ +/*****************************************************************************/ + +void init_trace_support(WORD8 *pu1_buf, WORD32 i4_size); + +int trace_printf(const WORD8 *format, ...); + +#endif // TRACE_SUPPORT_H_ diff --git a/encoder/irc_vbr_storage_vbv.c b/encoder/irc_vbr_storage_vbv.c new file mode 100755 index 0000000..23e9959 --- /dev/null +++ b/encoder/irc_vbr_storage_vbv.c @@ -0,0 +1,368 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/*****************************************************************************/ +/* Includes */ +/*****************************************************************************/ + +/* System include files */ +#include <stdio.h> + +/* User include files */ +#include "irc_datatypes.h" +#include "irc_common.h" +#include "irc_cntrl_param.h" +#include "irc_mem_req_and_acq.h" +#include "irc_fixed_point_error_bits.h" +#include "irc_vbr_storage_vbv.h" +#include "irc_trace_support.h" + +#define MAX(x, y) ((x) > (y) ? (x) : (y)) + +typedef struct vbr_storage_vbv_t +{ + WORD32 i4_max_buf_size; + WORD32 i4_cur_buf_size; + WORD32 i4_max_bits_inflow_per_frm_period; + WORD32 i4_max_bits_per_tgt_frm; + /* Storing input variables */ + WORD32 i4_max_bit_rate; + WORD32 i4_max_frame_rate; + /* Error bits calculation module */ + error_bits_handle ps_error_bits; + +} vbr_storage_vbv_t; + +static void overflow_avoided_summation(WORD32 *pi4_accumulator, WORD32 i4_input) +{ + if((pi4_accumulator[0] > 0) + && (((int)0x7fffffff - pi4_accumulator[0]) < i4_input)) + { + pi4_accumulator[0] = 0x7fffffff; + } + else if((pi4_accumulator[0] < 0) + && (((int)0x80000000 - pi4_accumulator[0]) > i4_input)) + { + pi4_accumulator[0] = 0x80000000; + } + else + { + pi4_accumulator[0] += i4_input; + } +} + +WORD32 irc_vbr_vbv_num_fill_use_free_memtab(vbr_storage_vbv_t **pps_vbr_storage_vbv, + itt_memtab_t *ps_memtab, + ITT_FUNC_TYPE_E e_func_type) +{ + WORD32 i4_mem_tab_idx = 0; + static vbr_storage_vbv_t s_vbr_storage_vbv_temp; + + /* + * Hack for al alloc, during which we don't have any state memory. + * Dereferencing can cause issues + */ + if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB) + (*pps_vbr_storage_vbv) = &s_vbr_storage_vbv_temp; + + /*for src rate control state structure*/ + if(e_func_type != GET_NUM_MEMTAB) + { + fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(vbr_storage_vbv_t), + ALIGN_128_BYTE, PERSISTENT, DDR); + use_or_fill_base(&ps_memtab[0], (void**)pps_vbr_storage_vbv, + e_func_type); + } + i4_mem_tab_idx++; + + i4_mem_tab_idx += irc_error_bits_num_fill_use_free_memtab( + &pps_vbr_storage_vbv[0]->ps_error_bits, + &ps_memtab[i4_mem_tab_idx], e_func_type); + return (i4_mem_tab_idx); +} + +void irc_init_vbr_vbv(vbr_storage_vbv_t *ps_vbr_storage_vbv, + WORD32 i4_max_bit_rate, + WORD32 i4_frm_rate, + WORD32 i4_max_vbv_buff_size) +{ + ps_vbr_storage_vbv->i4_max_buf_size = i4_max_vbv_buff_size; + ps_vbr_storage_vbv->i4_cur_buf_size = i4_max_vbv_buff_size; + + /* + * Calculate the max number of bits that flow into the decoder + * in the interval of two frames + */ + X_PROD_Y_DIV_Z(i4_max_bit_rate, 1000, i4_frm_rate, + ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period); + + /* init error bits */ + irc_init_error_bits(ps_vbr_storage_vbv->ps_error_bits, i4_frm_rate, + i4_max_bit_rate); + + /* Storing the input values */ + ps_vbr_storage_vbv->i4_max_bits_per_tgt_frm = + ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period; + ps_vbr_storage_vbv->i4_max_bit_rate = i4_max_bit_rate; + ps_vbr_storage_vbv->i4_max_frame_rate = i4_frm_rate; +} + +void irc_update_vbr_vbv(vbr_storage_vbv_t *ps_vbr_storage_vbv, + WORD32 i4_total_bits_decoded) +{ + WORD32 i4_error_bits = irc_get_error_bits( + ps_vbr_storage_vbv->ps_error_bits); + /* + * In the time interval between two decoded frames the buffer would have been + * filled up by the max_bits_inflow_per_frm_period. + */ + overflow_avoided_summation( + &ps_vbr_storage_vbv->i4_cur_buf_size, + (ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period + + i4_error_bits)); + + if(ps_vbr_storage_vbv->i4_cur_buf_size + > ps_vbr_storage_vbv->i4_max_buf_size) + { + ps_vbr_storage_vbv->i4_cur_buf_size = + ps_vbr_storage_vbv->i4_max_buf_size; + } + + ps_vbr_storage_vbv->i4_cur_buf_size -= i4_total_bits_decoded; + + /* Update the error bits state */ + irc_update_error_bits(ps_vbr_storage_vbv->ps_error_bits); + +} + +WORD32 irc_get_max_target_bits(vbr_storage_vbv_t *ps_vbr_storage_vbv) +{ + WORD32 i4_cur_buf_size = ps_vbr_storage_vbv->i4_cur_buf_size; + WORD32 i4_error_bits = irc_get_error_bits( + ps_vbr_storage_vbv->ps_error_bits); + + /* The buffer size when the next frame is decoded */ + overflow_avoided_summation( + &i4_cur_buf_size, + (ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period + + i4_error_bits)); + if(i4_cur_buf_size > ps_vbr_storage_vbv->i4_max_buf_size) + { + i4_cur_buf_size = ps_vbr_storage_vbv->i4_max_buf_size; + } + + /* + * Thus for the next frame the maximum number of bits the decoder can consume + * without underflow is i4_cur_buf_size + */ + return i4_cur_buf_size; +} + +/**************************************************************************** + Function Name : irc_get_buffer_status + Description : Gets the state of VBV buffer + Inputs : Rate control API , header and texture bits + Outputs : 0 = normal, 1 = underflow, 2= overflow + Returns : vbv_buf_status_e + *****************************************************************************/ +vbv_buf_status_e irc_get_vbv_buffer_status(vbr_storage_vbv_t *ps_vbr_storage_vbv, + WORD32 i4_total_frame_bits, + WORD32 *pi4_num_bits_to_prevent_vbv_underflow) +{ + vbv_buf_status_e e_buf_status; + WORD32 i4_cur_buf; + WORD32 i4_error_bits = irc_get_error_bits( + ps_vbr_storage_vbv->ps_error_bits); + + /* error bits due to fixed point computation of drain rate*/ + i4_cur_buf = ps_vbr_storage_vbv->i4_cur_buf_size; + overflow_avoided_summation( + &i4_cur_buf, + (ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period + + i4_error_bits)); + + if(i4_cur_buf > ps_vbr_storage_vbv->i4_max_buf_size) + { + i4_cur_buf = ps_vbr_storage_vbv->i4_max_buf_size; + } + + pi4_num_bits_to_prevent_vbv_underflow[0] = i4_cur_buf; + + i4_cur_buf -= i4_total_frame_bits; + if(i4_cur_buf < 0) + { + e_buf_status = VBV_UNDERFLOW; + } + else if(i4_cur_buf > ps_vbr_storage_vbv->i4_max_buf_size) + { + e_buf_status = VBV_OVERFLOW; + } + else if(i4_cur_buf < (ps_vbr_storage_vbv->i4_max_buf_size >> 2)) + { + e_buf_status = VBR_CAUTION; + } + else + { + e_buf_status = VBV_NORMAL; + } + + return e_buf_status; +} + +UWORD8 irc_restrict_swing_dvd_comp(vbr_storage_vbv_t *ps_vbr_storage_vbv) +{ + UWORD8 u1_restrict_swing = 1; + + if(ps_vbr_storage_vbv->i4_cur_buf_size + < (ps_vbr_storage_vbv->i4_max_buf_size >> 1)) + { + u1_restrict_swing = 0; + } + + return (u1_restrict_swing); +} + +WORD32 irc_get_max_vbv_buf_size(vbr_storage_vbv_t *ps_vbr_storage_vbv) +{ + return (ps_vbr_storage_vbv->i4_max_buf_size); +} + +WORD32 irc_get_cur_vbv_buf_size(vbr_storage_vbv_t *ps_vbr_storage_vbv) +{ + return (ps_vbr_storage_vbv->i4_cur_buf_size); +} + +WORD32 irc_get_max_bits_inflow_per_frm_periode(vbr_storage_vbv_t *ps_vbr_storage_vbv) +{ + return (ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period); +} + +WORD32 irc_get_max_bits_per_tgt_frm(vbr_storage_vbv_t *ps_vbr_storage_vbv) +{ + return (ps_vbr_storage_vbv->i4_max_bits_per_tgt_frm); +} + +WORD32 irc_vbv_get_vbv_buf_fullness(vbr_storage_vbv_t *ps_vbr_storage_vbv, + UWORD32 u4_bits) +{ + WORD32 i4_error_bits = irc_get_error_bits( + ps_vbr_storage_vbv->ps_error_bits); + WORD32 i4_cur_buf_size = ps_vbr_storage_vbv->i4_cur_buf_size; + + overflow_avoided_summation( + &i4_cur_buf_size, + (ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period + + i4_error_bits)); + + if(i4_cur_buf_size > ps_vbr_storage_vbv->i4_max_buf_size) + { + i4_cur_buf_size = ps_vbr_storage_vbv->i4_max_buf_size; + } + + i4_cur_buf_size -= u4_bits; + + return (i4_cur_buf_size); +} + +WORD32 irc_get_max_tgt_bits_dvd_comp(vbr_storage_vbv_t *ps_vbr_storage_vbv, + WORD32 i4_rem_bits_in_gop, + WORD32 i4_rem_frms_in_gop, + picture_type_e e_pic_type) +{ + WORD32 i4_dbf_max, i4_dbf_min, i4_dbf_prev, i4_vbv_size, i4_dbf_desired; + WORD32 i4_max_tgt_bits; + + i4_vbv_size = ps_vbr_storage_vbv->i4_max_buf_size; + i4_dbf_max = 95 * i4_vbv_size / 100; + i4_dbf_min = 10 * i4_vbv_size / 100; + i4_dbf_prev = ps_vbr_storage_vbv->i4_cur_buf_size; + + if(i4_rem_bits_in_gop < 0) + i4_rem_bits_in_gop = 0; + if(i4_rem_frms_in_gop <= 0) + i4_rem_frms_in_gop = 1; + + if(e_pic_type == I_PIC) + { + i4_dbf_desired = i4_dbf_min; + } + else + { + i4_dbf_desired = (i4_dbf_max - i4_rem_bits_in_gop / i4_rem_frms_in_gop + - i4_dbf_prev) / i4_rem_frms_in_gop; + i4_dbf_desired += i4_dbf_prev; + } + + i4_dbf_prev += ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period; + if(i4_dbf_prev > ps_vbr_storage_vbv->i4_max_buf_size) + { + i4_dbf_prev = ps_vbr_storage_vbv->i4_max_buf_size; + } + + i4_max_tgt_bits = MAX(0, (i4_dbf_prev - i4_dbf_desired)); + return (i4_max_tgt_bits); +} + +void irc_change_vbr_vbv_frame_rate(vbr_storage_vbv_t *ps_vbr_storage_vbv, + WORD32 i4_frm_rate) +{ + /* + * Calculate the max number of bits that flow into the decoder + * in the interval of two frames + */ + X_PROD_Y_DIV_Z(ps_vbr_storage_vbv->i4_max_bit_rate, 1000, i4_frm_rate, + ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period); + + /* Update the lower modules */ + irc_change_frm_rate_in_error_bits(ps_vbr_storage_vbv->ps_error_bits, + i4_frm_rate); + /* Storing the input values */ + ps_vbr_storage_vbv->i4_max_frame_rate = i4_frm_rate; +} + +void irc_change_vbr_vbv_bit_rate(vbr_storage_vbv_t *ps_vbr_storage_vbv, + WORD32 i4_max_bit_rate) +{ + /* + * Calculate the max number of bits that flow into the decoder + * in the interval of two frames + */ + X_PROD_Y_DIV_Z(i4_max_bit_rate, 1000, ps_vbr_storage_vbv->i4_max_frame_rate, + ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period); + + /* update the lower modules */ + irc_change_bitrate_in_error_bits(ps_vbr_storage_vbv->ps_error_bits, + i4_max_bit_rate); + + /* Storing the input values */ + ps_vbr_storage_vbv->i4_max_bit_rate = i4_max_bit_rate; +} + +void irc_change_vbr_max_bits_per_tgt_frm(vbr_storage_vbv_t *ps_vbr_storage_vbv, + WORD32 i4_tgt_frm_rate) +{ + /* + * Calculate the max number of bits that flow into the decoder + * in the interval of two frames + */ + X_PROD_Y_DIV_Z(ps_vbr_storage_vbv->i4_max_bit_rate, 1000, i4_tgt_frm_rate, + ps_vbr_storage_vbv->i4_max_bits_per_tgt_frm); + +} diff --git a/encoder/irc_vbr_storage_vbv.h b/encoder/irc_vbr_storage_vbv.h new file mode 100755 index 0000000..c53c66d --- /dev/null +++ b/encoder/irc_vbr_storage_vbv.h @@ -0,0 +1,119 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +#ifndef _VBR_STORAGE_VBV_H_ +#define _VBR_STORAGE_VBV_H_ +/****************************************************************************** +VBR STORAGE (VBV): +Max. buffer filling rate: Rmax +Max. buffer size: Bmax (as specified by level and profile) +Current Buffer Level: Bcur +Frame Rate: F + +For a storage scenario, the initial buffer size is assumed to be max. For every +frame the Maximum bits filled in to the buffer is given by Rmaxfrm = Rmax/F. If +the buffer overflows then the buffer is thresholded to the max buffer size. + + (overflow) + B(0) /| +---|--------------/-|------------------------------ Bmax + | / | + | /|/ | + | /| / | + | / | /|/ | + |/ | / | /| + |/ |/ | + | + | +-----------------------|--------------------------- + |<->| | +(1/F)=>1/frame_rate (underflow) + + + B"(i) - Bits in buffer just before decoding a frame. + B'(i) - Bits in buffer just after decoding a frame. + + + B(0) (initBuffer size) = Bmax. + B'(i) = B"(i) - bits_decoded + B"(i) = Min( Bmax, B'(i-1) + Rmaxfrm) + +Overflow Scenario: In VBR case, since we have only a max filling rate (or input bit rate) +buffer overflow is not a issue (since the buffer filling rate can be reduced to any value +below this rate) + +Underflow Scenario: B'(i) should always be > 0. If not then, the buffer underflows. To +prevent this condition the number bits that needs to be decoded must be equal to B"(i) +which is equal to Min( Bmax, B'(i-1) + Rmaxfrm) +****************************************************************************************/ + +typedef struct vbr_storage_vbv_t* vbr_storage_vbv_handle; + +WORD32 irc_vbr_vbv_num_fill_use_free_memtab(vbr_storage_vbv_handle *pps_vbr_storage_vbv, + itt_memtab_t *ps_memtab, + ITT_FUNC_TYPE_E e_func_type); + +/* Initalises the vbv buffer status */ +void irc_init_vbr_vbv(vbr_storage_vbv_handle ps_vbr_storage_vbv, + WORD32 max_bit_rate, /* In bits/sec*/ + WORD32 max_frm_rate, /* In frames/1000 sec*/ + WORD32 i4_max_vbv_buff_size); /* in bits*/ + +/* Updates the buffer after decoding a frame */ +void irc_update_vbr_vbv(vbr_storage_vbv_handle ps_vbr_storage_vbv, + WORD32 i4_total_bits_decoded); + +/* gets the max_number of bits that can be decoded out of the VBV without underflow */ +WORD32 irc_get_max_target_bits(vbr_storage_vbv_handle ps_vbr_storage_vbv); + +WORD32 irc_get_max_bits_inflow_per_frm_periode(vbr_storage_vbv_handle ps_vbr_storage_vbv); + +WORD32 irc_get_max_bits_per_tgt_frm(vbr_storage_vbv_handle ps_vbr_storage_vbv); + +WORD32 irc_get_cur_vbv_buf_size(vbr_storage_vbv_handle ps_vbr_storage_vbv); + +/* Queries the VBV buffer for the buffer status */ +vbv_buf_status_e irc_get_vbv_buffer_status(vbr_storage_vbv_handle ps_vbr_storage_vbv, + WORD32 i4_total_frame_bits, + WORD32 *pi4_num_bits_to_prevent_vbv_underflow); + +UWORD8 irc_restrict_swing_dvd_comp(vbr_storage_vbv_handle ps_vbr_storage_vbv); + +WORD32 irc_get_max_vbv_buf_size(vbr_storage_vbv_handle ps_vbr_storage_vbv); + +WORD32 irc_vbv_get_vbv_buf_fullness(vbr_storage_vbv_handle ps_vbr_storage_vbv, + UWORD32 u4_bits); + +WORD32 irc_get_max_tgt_bits_dvd_comp(vbr_storage_vbv_handle ps_vbr_storage_vbv, + WORD32 i4_rem_bits_in_gop, + WORD32 i4_rem_frms_in_gop, + picture_type_e e_pic_type); + +/* Changing input values at run time */ +void irc_change_vbr_vbv_bit_rate(vbr_storage_vbv_handle ps_vbr_storage_vbv, + WORD32 i4_max_bit_rate); + +void irc_change_vbr_vbv_frame_rate(vbr_storage_vbv_handle ps_vbr_storage_vbv, + WORD32 i4_frm_rate); + +void irc_change_vbr_max_bits_per_tgt_frm(vbr_storage_vbv_handle ps_vbr_storage_vbv, + WORD32 i4_tgt_frm_rate); +#endif + diff --git a/encoder/irc_vbr_str_prms.c b/encoder/irc_vbr_str_prms.c new file mode 100755 index 0000000..29055c2 --- /dev/null +++ b/encoder/irc_vbr_str_prms.c @@ -0,0 +1,199 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*****************************************************************************/ +/* Includes */ +/*****************************************************************************/ + +/* System include files */ +#include <stdio.h> + +/* User include files */ +#include "irc_datatypes.h" +#include "irc_cntrl_param.h" +#include "irc_vbr_str_prms.h" + +/****************************************************************************** + Function Name : irc_init_vbv_str_prms + Description : Initializes and calculates the number of I frame and P frames + in the delay period + Return Values : void + *****************************************************************************/ +void irc_init_vbv_str_prms(vbr_str_prms_t *p_vbr_str_prms, + UWORD32 u4_intra_frm_interval, + UWORD32 u4_src_ticks, + UWORD32 u4_tgt_ticks, + UWORD32 u4_frms_in_delay_period) +{ + + UWORD32 i4_num_i_frms_in_delay_per, i4_num_p_frms_in_delay_per; + + p_vbr_str_prms->u4_frms_in_delay_prd = u4_frms_in_delay_period; + p_vbr_str_prms->u4_src_ticks = u4_src_ticks; + p_vbr_str_prms->u4_tgt_ticks = u4_tgt_ticks; + p_vbr_str_prms->u4_intra_frame_int = u4_intra_frm_interval; + + /* + * Finding the number of I frames and P frames in delay period. This + * value along with the drain rates for the corresponding picture types will + * be used to calculate the buffer sizes + */ + i4_num_i_frms_in_delay_per = ((u4_frms_in_delay_period * u4_src_ticks) + / (u4_intra_frm_interval * u4_tgt_ticks)); + + /* Ceiling the above result*/ + if((i4_num_i_frms_in_delay_per * u4_intra_frm_interval * u4_tgt_ticks) + < (u4_frms_in_delay_period * u4_src_ticks)) + { + i4_num_i_frms_in_delay_per++; + + } + i4_num_p_frms_in_delay_per = u4_frms_in_delay_period + - i4_num_i_frms_in_delay_per; + + p_vbr_str_prms->u4_num_pics_in_delay_prd[I_PIC] = + i4_num_i_frms_in_delay_per; + p_vbr_str_prms->u4_num_pics_in_delay_prd[P_PIC] = + i4_num_p_frms_in_delay_per; + p_vbr_str_prms->u4_intra_prd_pos_in_tgt_ticks = (u4_intra_frm_interval + * (p_vbr_str_prms->u4_num_pics_in_delay_prd[I_PIC])) + * u4_tgt_ticks; + p_vbr_str_prms->u4_pic_num = 0; + p_vbr_str_prms->u4_cur_pos_in_src_ticks = 0; +} + +WORD32 irc_get_vsp_num_pics_in_dly_prd(vbr_str_prms_t *p_vbr_str_prms, + UWORD32 *pu4_num_pics_in_delay_prd) +{ + pu4_num_pics_in_delay_prd[I_PIC] = + p_vbr_str_prms->u4_num_pics_in_delay_prd[I_PIC]; + pu4_num_pics_in_delay_prd[P_PIC] = + p_vbr_str_prms->u4_num_pics_in_delay_prd[P_PIC]; + return (p_vbr_str_prms->u4_frms_in_delay_prd); +} + +/****************************************************************************** + Function Name : irc_update_vbr_str_prms + Description : update the number of I frames and P/B frames in the delay period + for buffer size calculations + *****************************************************************************/ +void irc_update_vbr_str_prms(vbr_str_prms_t *p_vbr_str_prms, + picture_type_e e_pic_type) +{ + /* + * Updating the number of I frames and P frames after encoding every + * picture. These values along with the drain rates for the corresponding + * picture types will be used to calculate the CBR buffer size every frame + */ + + if(e_pic_type == I_PIC) + { + p_vbr_str_prms->u4_num_pics_in_delay_prd[I_PIC]--; + } + else + { + p_vbr_str_prms->u4_num_pics_in_delay_prd[P_PIC]--; + } + + /* If the next I frame falls within the delay period, we need to increment + * the number of I frames in the period, else increment the number of P + * frames + */ + if((p_vbr_str_prms->u4_cur_pos_in_src_ticks + + (p_vbr_str_prms->u4_frms_in_delay_prd + * p_vbr_str_prms->u4_src_ticks)) + >= p_vbr_str_prms->u4_intra_prd_pos_in_tgt_ticks) + { + p_vbr_str_prms->u4_intra_prd_pos_in_tgt_ticks -= + p_vbr_str_prms->u4_cur_pos_in_src_ticks; + p_vbr_str_prms->u4_intra_prd_pos_in_tgt_ticks += + p_vbr_str_prms->u4_intra_frame_int + * p_vbr_str_prms->u4_tgt_ticks; + p_vbr_str_prms->u4_num_pics_in_delay_prd[I_PIC]++; + p_vbr_str_prms->u4_pic_num = 0; + p_vbr_str_prms->u4_cur_pos_in_src_ticks = 0; + } + else + { + p_vbr_str_prms->u4_num_pics_in_delay_prd[P_PIC]++; + } + p_vbr_str_prms->u4_pic_num++; + p_vbr_str_prms->u4_cur_pos_in_src_ticks += p_vbr_str_prms->u4_src_ticks; +} + +void irc_get_vsp_src_tgt_ticks(vbr_str_prms_t *p_vbr_str_prms, + UWORD32 *pu4_src_ticks, + UWORD32 *pu4_tgt_ticks) +{ + pu4_src_ticks[0] = p_vbr_str_prms->u4_src_ticks; + pu4_tgt_ticks[0] = p_vbr_str_prms->u4_tgt_ticks; +} + +/******************************************************************************* + Function Name : change_vbr_str_prms + Description : Takes in changes of Intra frame interval, source and target + ticks and recalculates the position of the next I frame + ******************************************************************************/ +void irc_change_vsp_ifi(vbr_str_prms_t *p_vbr_str_prms, + UWORD32 u4_intra_frame_int) +{ + irc_init_vbv_str_prms(p_vbr_str_prms, u4_intra_frame_int, + p_vbr_str_prms->u4_src_ticks, + p_vbr_str_prms->u4_tgt_ticks, + p_vbr_str_prms->u4_frms_in_delay_prd); +} + +void irc_change_vsp_tgt_ticks(vbr_str_prms_t *p_vbr_str_prms, + UWORD32 u4_tgt_ticks) +{ + UWORD32 u4_rem_intra_per_scaled; + UWORD32 u4_prev_tgt_ticks = p_vbr_str_prms->u4_tgt_ticks; + + /* + * If the target frame rate is changed, recalculate the position of the next + * I frame based on the new target frame rate + * LIMITATIONS : + * Currently no support is available for dynamic change in source frame rate + */ + + u4_rem_intra_per_scaled = ((p_vbr_str_prms->u4_intra_prd_pos_in_tgt_ticks + - p_vbr_str_prms->u4_cur_pos_in_src_ticks) + / u4_prev_tgt_ticks) * u4_tgt_ticks; + + p_vbr_str_prms->u4_intra_prd_pos_in_tgt_ticks = u4_rem_intra_per_scaled + + p_vbr_str_prms->u4_cur_pos_in_src_ticks; + +} + +void irc_change_vsp_src_ticks(vbr_str_prms_t *p_vbr_str_prms, + UWORD32 u4_src_ticks) +{ + irc_init_vbv_str_prms(p_vbr_str_prms, p_vbr_str_prms->u4_intra_frame_int, + u4_src_ticks, p_vbr_str_prms->u4_tgt_ticks, + p_vbr_str_prms->u4_frms_in_delay_prd); +} + +void irc_change_vsp_fidp(vbr_str_prms_t *p_vbr_str_prms, + UWORD32 u4_frms_in_delay_period) +{ + irc_init_vbv_str_prms(p_vbr_str_prms, p_vbr_str_prms->u4_intra_frame_int, + p_vbr_str_prms->u4_src_ticks, + p_vbr_str_prms->u4_tgt_ticks, + u4_frms_in_delay_period); +} diff --git a/encoder/irc_vbr_str_prms.h b/encoder/irc_vbr_str_prms.h new file mode 100755 index 0000000..34301d8 --- /dev/null +++ b/encoder/irc_vbr_str_prms.h @@ -0,0 +1,65 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +#ifndef _VBR_STR_PRMS_H_ +#define _VBR_STR_PRMS_H_ + +typedef struct +{ + UWORD32 u4_num_pics_in_delay_prd[MAX_PIC_TYPE]; + UWORD32 u4_pic_num; + UWORD32 u4_intra_prd_pos_in_tgt_ticks; + UWORD32 u4_cur_pos_in_src_ticks; + UWORD32 u4_intra_frame_int; + UWORD32 u4_src_ticks; + UWORD32 u4_tgt_ticks; + UWORD32 u4_frms_in_delay_prd; +} vbr_str_prms_t; + +void irc_init_vbv_str_prms(vbr_str_prms_t *p_vbr_str_prms, + UWORD32 u4_intra_frm_interval, + UWORD32 u4_src_ticks, + UWORD32 u4_tgt_ticks, + UWORD32 u4_frms_in_delay_period); + +WORD32 irc_get_vsp_num_pics_in_dly_prd(vbr_str_prms_t *p_vbr_str_prms, + UWORD32 *pu4_num_pics_in_delay_prd); + +void irc_get_vsp_src_tgt_ticks(vbr_str_prms_t *p_vbr_str_prms, + UWORD32 *pu4_src_ticks, + UWORD32 *pu4_tgt_ticks); + +void irc_update_vbr_str_prms(vbr_str_prms_t *p_vbr_str_prms, + picture_type_e e_pic_type); + +void irc_change_vsp_ifi(vbr_str_prms_t *p_vbr_str_prms, + UWORD32 u4_intra_frame_int); + +void irc_change_vsp_tgt_ticks(vbr_str_prms_t *p_vbr_str_prms, + UWORD32 u4_tgt_ticks); + +void irc_change_vsp_src_ticks(vbr_str_prms_t *p_vbr_str_prms, + UWORD32 u4_src_ticks); + +void irc_change_vsp_fidp(vbr_str_prms_t *p_vbr_str_prms, + UWORD32 u4_frms_in_delay_period); + +#endif + diff --git a/encoder/ithread.h b/encoder/ithread.h new file mode 100755 index 0000000..82170a5 --- /dev/null +++ b/encoder/ithread.h @@ -0,0 +1,101 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*****************************************************************************/ +/* */ +/* File Name : ithread.h */ +/* */ +/* Description : This file contains all the necessary structure and */ +/* enumeration definitions needed for the Application */ +/* Program Interface(API) of the */ +/* Thread Abstraction Layer */ +/* */ +/* List of Functions : ithread_get_handle_size() */ +/* ithread_get_mutex_lock_size() */ +/* ithread_create() */ +/* ithread_exit() */ +/* ithread_join() */ +/* ithread_get_mutex_struct_size() */ +/* ithread_mutex_init() */ +/* ithread_mutex_destroy() */ +/* ithread_mutex_lock() */ +/* ithread_mutex_unlock() */ +/* ithread_yield() */ +/* ithread_sleep() */ +/* ithread_msleep() */ +/* ithread_usleep() */ +/* ithread_get_sem_struct_size() */ +/* ithread_sem_init() */ +/* ithread_sem_post() */ +/* ithread_sem_wait() */ +/* ithread_sem_destroy() */ +/* ithread_set_affinity() */ +/* */ +/* Issues / Problems : None */ +/* */ +/* Revision History : */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 06 09 2012 Harish Initial Version */ +/* */ +/*****************************************************************************/ + +#ifndef _ITHREAD_H_ +#define _ITHREAD_H_ + +UWORD32 ithread_get_handle_size(void); + +UWORD32 ithread_get_mutex_lock_size(void); + +WORD32 ithread_create(void *thread_handle, void *attribute, void *strt, void *argument); + +void ithread_exit(void *val_ptr); + +WORD32 ithread_join(void *thread_id, void ** val_ptr); + +WORD32 ithread_get_mutex_struct_size(void); + +WORD32 ithread_mutex_init(void *mutex); + +WORD32 ithread_mutex_destroy(void *mutex); + +WORD32 ithread_mutex_lock(void *mutex); + +WORD32 ithread_mutex_unlock(void *mutex); + +void ithread_yield(void); + +void ithread_sleep(UWORD32 u4_time); + +void ithread_msleep(UWORD32 u4_time_ms); + +void ithread_usleep(UWORD32 u4_time_us); + +UWORD32 ithread_get_sem_struct_size(void); + +WORD32 ithread_sem_init(void *sem,WORD32 pshared,UWORD32 value); + +WORD32 ithread_sem_post(void *sem); + +WORD32 ithread_sem_wait(void *sem); + +WORD32 ithread_sem_destroy(void *sem); + +WORD32 ithread_set_affinity(WORD32 core_id); +#endif /* _ITHREAD_H_ */ diff --git a/encoder/iv2.h b/encoder/iv2.h new file mode 100755 index 0000000..538bb1e --- /dev/null +++ b/encoder/iv2.h @@ -0,0 +1,386 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* iv2.h +* +* @brief +* This file contains all the necessary structure and enumeration +* definitions needed for the Application Program Interface(API) of the +* Ittiam Video codecs This is version 2 of Ittiam Video API +* +* @author +* Ittiam +* +* @par List of Functions: +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef _IV2_H_ +#define _IV2_H_ + +/*****************************************************************************/ +/* Constant Macros */ +/*****************************************************************************/ +#define IV_MAX_RAW_COMPONENTS 4 + +/*****************************************************************************/ +/* Typedefs */ +/*****************************************************************************/ + +/*****************************************************************************/ +/* Enums */ +/*****************************************************************************/ + + +/** Function status */ +typedef enum{ + IV_STATUS_NA = 0x7FFFFFFF, + IV_SUCCESS = 0x0, + IV_FAIL = 0x1, +}IV_STATUS_T; + + +/** Defines the types of memory */ +typedef enum { + IV_NA_MEM_TYPE = 0x7FFFFFFF, + IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM = 0x0, + IV_EXTERNAL_CACHEABLE_SCRATCH_MEM = 0x1, + IV_EXTERNAL_NONCACHEABLE_PERSISTENT_MEM = 0x2, + IV_EXTERNAL_NONCACHEABLE_SCRATCH_MEM = 0x3, + IV_INTERNAL_CACHEABLE_PERSISTENT_MEM = 0x10, + IV_INTERNAL_CACHEABLE_SCRATCH_MEM = 0x11, + IV_INTERNAL_NONCACHEABLE_PERSISTENT_MEM = 0x12, + IV_INTERNAL_NONCACHEABLE_SCRATCH_MEM = 0x13, +}IV_MEM_TYPE_T; + +/* The color formats used in video/image codecs */ + +typedef enum { + IV_CHROMA_NA = 0x7FFFFFFF, + IV_YUV_420P = 0x0, + IV_YUV_420SP_UV = 0x1, + IV_YUV_420SP_VU = 0x2, + + IV_YUV_422P = 0x10, + IV_YUV_422IBE = 0x11, + IV_YUV_422ILE = 0x12, + + IV_YUV_444P = 0x20, + IV_YUV_411P = 0x21, + + IV_GRAY = 0x30, + + IV_RGB_565 = 0x31, + IV_RGB_24 = 0x32, + IV_RGBA_8888 = 0x33 +}IV_COLOR_FORMAT_T; + +/** Frame/Field coding types */ +typedef enum { + IV_NA_FRAME = 0x7FFFFFFF, + IV_I_FRAME = 0x0, + IV_P_FRAME = 0x1, + IV_B_FRAME = 0x2, + IV_IDR_FRAME = 0x3, + IV_II_FRAME = 0x4, + IV_IP_FRAME = 0x5, + IV_IB_FRAME = 0x6, + IV_PI_FRAME = 0x7, + IV_PP_FRAME = 0x8, + IV_PB_FRAME = 0x9, + IV_BI_FRAME = 0xa, + IV_BP_FRAME = 0xb, + IV_BB_FRAME = 0xc, + IV_MBAFF_I_FRAME = 0xd, + IV_MBAFF_P_FRAME = 0xe, + IV_MBAFF_B_FRAME = 0xf, + IV_MBAFF_IDR_FRAME = 0x10, + IV_NOT_CODED_FRAME = 0x11, + IV_FRAMETYPE_DEFAULT = IV_I_FRAME +}IV_PICTURE_CODING_TYPE_T; + +/** Field type */ +typedef enum { + IV_NA_FLD = 0x7FFFFFFF, + IV_TOP_FLD = 0x0, + IV_BOT_FLD = 0x1, + IV_FLD_TYPE_DEFAULT = IV_TOP_FLD +}IV_FLD_TYPE_T; + +/** Video content type progressive/interlaced etc */ +typedef enum { + IV_CONTENTTYPE_NA = 0x7FFFFFFF, + IV_PROGRESSIVE = 0x0, + IV_INTERLACED = 0x1, + IV_PROGRESSIVE_FRAME = 0x2, + IV_INTERLACED_FRAME = 0x3, + IV_INTERLACED_TOPFIELD = 0x4, + IV_INTERLACED_BOTTOMFIELD = 0x5, + IV_CONTENTTYPE_DEFAULT = IV_PROGRESSIVE, +}IV_CONTENT_TYPE_T; + +/** Profile */ +typedef enum +{ + IV_PROFILE_NA = 0x7FFFFFFF, + IV_PROFILE_BASE = 0x0, + IV_PROFILE_MAIN = 0x1, + IV_PROFILE_HIGH = 0x2, + + + IV_PROFILE_SIMPLE = 0x100, + IV_PROFILE_ADVSIMPLE = 0x101, + IV_PROFILE_DEFAULT = IV_PROFILE_BASE, +}IV_PROFILE_T; + + +/** Architecture Enumeration */ +typedef enum +{ + ARCH_NA = 0x7FFFFFFF, + ARCH_ARM_NONEON = 0x0, + ARCH_ARM_A9Q, + ARCH_ARM_A9A, + ARCH_ARM_A9, + ARCH_ARM_A7, + ARCH_ARM_A5, + ARCH_ARM_A15, + ARCH_ARM_NEONINTR, + ARCH_X86_GENERIC, + ARCH_X86_SSSE3, + ARCH_X86_SSE42, + ARCH_ARM_A53, + ARCH_ARM_A57, + ARCH_ARM_V8_NEON +}IV_ARCH_T; + +/** SOC Enumeration */ +typedef enum +{ + SOC_NA = 0x7FFFFFFF, + SOC_GENERIC = 0x0, + SOC_HISI_37X +}IV_SOC_T; + + +/** API command type */ +typedef enum { + IV_CMD_NA = 0x7FFFFFFF, + IV_CMD_GET_NUM_MEM_REC = 0x0, + IV_CMD_FILL_NUM_MEM_REC = 0x1, + IV_CMD_RETRIEVE_MEMREC = 0x2, + IV_CMD_INIT = 0x3, + /* Do not add anything after the following entry */ + IV_CMD_EXTENSIONS = 0x100 +}IV_API_COMMAND_TYPE_T; + +/*****************************************************************************/ +/* Structure Definitions */ +/*****************************************************************************/ + +/** This structure defines the handle for the codec instance */ + +typedef struct{ + /** size of the structure */ + UWORD32 u4_size; + /** Pointer to the API function pointer table of the codec */ + void *pv_fxns; + /** Pointer to the handle of the codec */ + void *pv_codec_handle; +}iv_obj_t; + +/** This structure defines the memory record holder which will * + * be used by the codec to communicate its memory requirements to the * + * application through appropriate API functions */ + +typedef struct { + /** size of the structure */ + UWORD32 u4_size; + /** Pointer to the memory allocated by the application */ + void *pv_base; + /** u4_size of the memory to be allocated */ + UWORD32 u4_mem_size; + /** Alignment of the memory pointer */ + UWORD32 u4_mem_alignment; + /** Type of the memory to be allocated */ + IV_MEM_TYPE_T e_mem_type; +}iv_mem_rec_t; + +/** This structure defines attributes for the raw buffer */ +typedef struct { + /** size of the structure */ + UWORD32 u4_size; + + /** Color format */ + IV_COLOR_FORMAT_T e_color_fmt; + + /** Pointer to each component */ + void *apv_bufs[IV_MAX_RAW_COMPONENTS]; + + /** Width of each component */ + UWORD32 au4_wd[IV_MAX_RAW_COMPONENTS]; + + /** Height of each component */ + UWORD32 au4_ht[IV_MAX_RAW_COMPONENTS]; + + /** Stride of each component */ + UWORD32 au4_strd[IV_MAX_RAW_COMPONENTS]; + +}iv_raw_buf_t; + +/** This structure defines attributes for the bitstream buffer */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Pointer to buffer */ + void *pv_buf; + + /** Number of valid bytes in the buffer */ + UWORD32 u4_bytes; + + /** Allocated size of the buffer */ + UWORD32 u4_bufsize; + +}iv_bits_buf_t; +/*****************************************************************************/ +/* Get Number of Memory Records */ +/*****************************************************************************/ + +/** Input structure : Get number of memory records */ +typedef struct { + /** size of the structure */ + UWORD32 u4_size; + + /** Command type */ + IV_API_COMMAND_TYPE_T e_cmd; +}iv_num_mem_rec_ip_t; + +/** Output structure : Get number of memory records */ +typedef struct{ + /** size of the structure */ + UWORD32 u4_size; + + /** Return error code */ + UWORD32 u4_error_code; + + /** Number of memory records that will be used by the codec */ + UWORD32 u4_num_mem_rec; +}iv_num_mem_rec_op_t; + + +/*****************************************************************************/ +/* Fill Memory Records */ +/*****************************************************************************/ + +/** Input structure : Fill memory records */ + +typedef struct { + /** size of the structure */ + UWORD32 u4_size; + + /** Command type */ + IV_API_COMMAND_TYPE_T e_cmd; + + /** Number of memory records */ + UWORD32 u4_num_mem_rec; + + /** pointer to array of memrecords structures should be filled by codec + with details of memory resource requirements */ + iv_mem_rec_t *ps_mem_rec; + + /** maximum width for which codec should request memory requirements */ + UWORD32 u4_max_wd; + + /** maximum height for which codec should request memory requirements*/ + UWORD32 u4_max_ht; + + /** Maximum number of reference frames */ + UWORD32 u4_max_ref_cnt; + + /** Maximum number of reorder frames */ + UWORD32 u4_max_reorder_cnt; + + /** Maximum level supported */ + UWORD32 u4_max_level; + + /** Color format that codec supports for input/output */ + IV_COLOR_FORMAT_T e_color_format; + + /** Maximum search range to be used in X direction */ + UWORD32 u4_max_srch_rng_x; + + /** Maximum search range to be used in Y direction */ + UWORD32 u4_max_srch_rng_y; + +}iv_fill_mem_rec_ip_t; + + +/** Output structure : Fill memory records */ +typedef struct{ + /** size of the structure */ + UWORD32 u4_size; + + /** Return error code */ + UWORD32 u4_error_code; + + /** no of memory record structures which are filled by codec */ + UWORD32 u4_num_mem_rec; +}iv_fill_mem_rec_op_t; + + +/*****************************************************************************/ +/* Retrieve Memory Records */ +/*****************************************************************************/ + +/** Input structure : Retrieve memory records */ + +typedef struct { + /** size of the structure */ + UWORD32 u4_size; + + /** Command type */ + IV_API_COMMAND_TYPE_T e_cmd; + + /** array of structures where codec should fill with all memory requested earlier */ + iv_mem_rec_t *ps_mem_rec; +}iv_retrieve_mem_rec_ip_t; + + +typedef struct{ + /** size of the structure */ + UWORD32 u4_size; + + /** Return error code */ + UWORD32 u4_error_code; + + /** no of memory record structures which are filled by codec */ + UWORD32 u4_num_mem_rec_filled; +}iv_retrieve_mem_rec_op_t; + +#endif /* _IV2_H_ */ + diff --git a/encoder/ive2.h b/encoder/ive2.h new file mode 100755 index 0000000..8cb0fd1 --- /dev/null +++ b/encoder/ive2.h @@ -0,0 +1,1445 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ive2.h +* +* @brief +* This file contains all the necessary structure and enumeration +* definitions needed for the Application Program Interface(API) of the +* Ittiam Video Encoders This is version 2 +* +* @author +* Ittiam +* +* @par List of Functions: +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef _IVE2_H_ +#define _IVE2_H_ + +/*****************************************************************************/ +/* Constant Macros */ +/*****************************************************************************/ + +/** Maximum number of components in I/O Buffers */ +#define IVE_MAX_IO_BUFFER_COMPONENTS 4 + +/** Maximum number of reference pictures */ +#define IVE_MAX_REF 16 + +/*****************************************************************************/ +/* Enums */ +/*****************************************************************************/ + +/** Slice modes */ +typedef enum +{ + IVE_SLICE_MODE_NA = 0x7FFFFFFF, + IVE_SLICE_MODE_NONE = 0x0, + + IVE_SLICE_MODE_BYTES = 0x1, + IVE_SLICE_MODE_BLOCKS = 0x2, +}IVE_SLICE_MODE_T; + +/** Adaptive Intra refresh modes */ +typedef enum +{ + IVE_AIR_MODE_NA = 0x7FFFFFFF, + IVE_AIR_MODE_NONE = 0x0, + IVE_AIR_MODE_CYCLIC = 0x1, + IVE_AIR_MODE_RANDOM = 0x2, + IVE_AIR_MODE_DISTORTION = 0x3, +}IVE_AIR_MODE_T; + +/** Rate control modes */ +typedef enum +{ + IVE_RC_NA = 0x7FFFFFFF, + IVE_RC_NONE = 0x0, + IVE_RC_STORAGE = 0x1, + IVE_RC_CBR_NON_LOW_DELAY = 0x2, + IVE_RC_CBR_LOW_DELAY = 0x3, + IVE_RC_TWOPASS = 0x4, + IVE_RC_RATECONTROLPRESET_DEFAULT = IVE_RC_STORAGE +}IVE_RC_MODE_T; + +/** Encoder mode */ +typedef enum +{ + IVE_ENC_MODE_NA = 0x7FFFFFFF, + IVE_ENC_MODE_HEADER = 0x1, + IVE_ENC_MODE_PICTURE = 0x0, + IVE_ENC_MODE_DEFAULT = IVE_ENC_MODE_PICTURE, +}IVE_ENC_MODE_T; + +/** Speed Config */ +typedef enum IVE_SPEED_CONFIG +{ + IVE_QUALITY_DUMMY = 0x7FFFFFFF, + IVE_CONFIG = 0, + IVE_SLOWEST = 1, + IVE_NORMAL = 2, + IVE_FAST = 3, + IVE_HIGH_SPEED = 4, + IVE_FASTEST = 5, +}IVE_SPEED_CONFIG; + +/** API command type */ +typedef enum +{ + IVE_CMD_VIDEO_NA = 0x7FFFFFFF, + IVE_CMD_VIDEO_CTL = IV_CMD_EXTENSIONS + 1, + IVE_CMD_VIDEO_ENCODE, + IVE_CMD_QUEUE_INPUT, + IVE_CMD_DEQUEUE_INPUT, + IVE_CMD_QUEUE_OUTPUT, + IVE_CMD_DEQUEUE_OUTPUT, + IVE_CMD_GET_RECON, +}IVE_API_COMMAND_TYPE_T; + +/** Video Control API command type */ +typedef enum +{ + IVE_CMD_CT_NA = 0x7FFFFFFF, + IVE_CMD_CTL_SETDEFAULT = 0x0, + IVE_CMD_CTL_SET_DIMENSIONS = 0x1, + IVE_CMD_CTL_SET_FRAMERATE = 0x2, + IVE_CMD_CTL_SET_BITRATE = 0x3, + IVE_CMD_CTL_SET_FRAMETYPE = 0x4, + IVE_CMD_CTL_SET_QP = 0x5, + IVE_CMD_CTL_SET_ENC_MODE = 0x6, + IVE_CMD_CTL_SET_VBV_PARAMS = 0x7, + IVE_CMD_CTL_SET_AIR_PARAMS = 0x8, + IVE_CMD_CTL_SET_ME_PARAMS = 0X9, + IVE_CMD_CTL_SET_GOP_PARAMS = 0XA, + IVE_CMD_CTL_SET_PROFILE_PARAMS = 0XB, + IVE_CMD_CTL_SET_DEBLOCK_PARAMS = 0XC, + IVE_CMD_CTL_SET_IPE_PARAMS = 0XD, + IVE_CMD_CTL_SET_NUM_CORES = 0x30, + IVE_CMD_CTL_RESET = 0xA0, + IVE_CMD_CTL_FLUSH = 0xB0, + IVE_CMD_CTL_GETBUFINFO = 0xC0, + IVE_CMD_CTL_GETVERSION = 0xC1, + IVE_CMD_CTL_CODEC_SUBCMD_START = 0x100, +}IVE_CONTROL_API_COMMAND_TYPE_T; + +/* IVE_ERROR_BITS_T: A UWORD32 container will be used for reporting the error*/ +/* code to the application. The first 8 bits starting from LSB have been */ +/* reserved for the codec to report internal error details. The rest of the */ +/* bits will be generic for all video encoders and each bit has an associated*/ +/* meaning as mentioned below. The unused bit fields are reserved for future */ +/* extenstions and will be zero in the current implementation */ +typedef enum { + + /* Bit 8 - Unsupported input parameter or configuration. */ + IVE_UNSUPPORTEDPARAM = 0x8, + + /* Bit 9 - Fatal error (stop the codec).If there is an */ + /* error and this bit is not set, the error is a recoverable one. */ + IVE_FATALERROR = 0x9, + + IVE_ERROR_BITS_T_DUMMY_ELEMENT = 0x7FFFFFFF +}IVE_ERROR_BITS_T; + +/* IVE_ERROR_CODES_T: The list of error codes depicting the possible error */ +/* scenarios that can be encountered while encoding */ +typedef enum +{ + + IVE_ERR_NA = 0x7FFFFFFF, + IVE_ERR_NONE = 0x00, + IVE_ERR_INVALID_API_CMD = 0x01, + IVE_ERR_INVALID_API_SUB_CMD = 0x02, + IVE_ERR_IP_GET_MEM_REC_API_STRUCT_SIZE_INCORRECT = 0x03, + IVE_ERR_OP_GET_MEM_REC_API_STRUCT_SIZE_INCORRECT = 0x04, + IVE_ERR_IP_FILL_MEM_REC_API_STRUCT_SIZE_INCORRECT = 0x05, + IVE_ERR_OP_FILL_MEM_REC_API_STRUCT_SIZE_INCORRECT = 0x06, + IVE_ERR_IP_INIT_API_STRUCT_SIZE_INCORRECT = 0x07, + IVE_ERR_OP_INIT_API_STRUCT_SIZE_INCORRECT = 0x08, + IVE_ERR_IP_RETRIEVE_MEM_REC_API_STRUCT_SIZE_INCORRECT = 0x09, + IVE_ERR_OP_RETRIEVE_MEM_REC_API_STRUCT_SIZE_INCORRECT = 0x0A, + IVE_ERR_IP_ENCODE_API_STRUCT_SIZE_INCORRECT = 0x0B, + IVE_ERR_OP_ENCODE_API_STRUCT_SIZE_INCORRECT = 0x0C, + IVE_ERR_IP_CTL_SETDEF_API_STRUCT_SIZE_INCORRECT = 0x0D, + IVE_ERR_OP_CTL_SETDEF_API_STRUCT_SIZE_INCORRECT = 0x0E, + IVE_ERR_IP_CTL_GETBUFINFO_API_STRUCT_SIZE_INCORRECT = 0x0F, + IVE_ERR_OP_CTL_GETBUFINFO_API_STRUCT_SIZE_INCORRECT = 0x10, + IVE_ERR_IP_CTL_GETVERSION_API_STRUCT_SIZE_INCORRECT = 0x11, + IVE_ERR_OP_CTL_GETVERSION_API_STRUCT_SIZE_INCORRECT = 0x12, + IVE_ERR_IP_CTL_FLUSH_API_STRUCT_SIZE_INCORRECT = 0x13, + IVE_ERR_OP_CTL_FLUSH_API_STRUCT_SIZE_INCORRECT = 0x14, + IVE_ERR_IP_CTL_RESET_API_STRUCT_SIZE_INCORRECT = 0x15, + IVE_ERR_OP_CTL_RESET_API_STRUCT_SIZE_INCORRECT = 0x16, + IVE_ERR_IP_CTL_SETCORES_API_STRUCT_SIZE_INCORRECT = 0x17, + IVE_ERR_OP_CTL_SETCORES_API_STRUCT_SIZE_INCORRECT = 0x18, + IVE_ERR_IP_CTL_SETDIM_API_STRUCT_SIZE_INCORRECT = 0x19, + IVE_ERR_OP_CTL_SETDIM_API_STRUCT_SIZE_INCORRECT = 0x1A, + IVE_ERR_IP_CTL_SETFRAMERATE_API_STRUCT_SIZE_INCORRECT = 0x1B, + IVE_ERR_OP_CTL_SETFRAMERATE_API_STRUCT_SIZE_INCORRECT = 0x1C, + IVE_ERR_IP_CTL_SETBITRATE_API_STRUCT_SIZE_INCORRECT = 0x1D, + IVE_ERR_OP_CTL_SETBITRATE_API_STRUCT_SIZE_INCORRECT = 0x1E, + IVE_ERR_IP_CTL_SETFRAMETYPE_API_STRUCT_SIZE_INCORRECT = 0x1F, + IVE_ERR_OP_CTL_SETFRAMETYPE_API_STRUCT_SIZE_INCORRECT = 0x20, + IVE_ERR_IP_CTL_SETMEPARAMS_API_STRUCT_SIZE_INCORRECT = 0x21, + IVE_ERR_OP_CTL_SETMEPARAMS_API_STRUCT_SIZE_INCORRECT = 0x22, + IVE_ERR_IP_CTL_SETIPEPARAMS_API_STRUCT_SIZE_INCORRECT = 0x23, + IVE_ERR_OP_CTL_SETIPEPARAMS_API_STRUCT_SIZE_INCORRECT = 0x24, + IVE_ERR_IP_CTL_SETGOPPARAMS_API_STRUCT_SIZE_INCORRECT = 0x25, + IVE_ERR_OP_CTL_SETGOPPARAMS_API_STRUCT_SIZE_INCORRECT = 0x26, + IVE_ERR_IP_CTL_SETDEBLKPARAMS_API_STRUCT_SIZE_INCORRECT = 0x27, + IVE_ERR_OP_CTL_SETDEBLKPARAMS_API_STRUCT_SIZE_INCORRECT = 0x28, + IVE_ERR_IP_CTL_SETQPPARAMS_API_STRUCT_SIZE_INCORRECT = 0x29, + IVE_ERR_OP_CTL_SETQPPARAMS_API_STRUCT_SIZE_INCORRECT = 0x2A, + IVE_ERR_FILL_NUM_MEM_RECS_POINTER_NULL = 0x2B, + IVE_ERR_NUM_MEM_REC_NOT_SUFFICIENT = 0x2C, + IVE_ERR_MEM_REC_STRUCT_SIZE_INCORRECT = 0x2D, + IVE_ERR_MEM_REC_BASE_POINTER_NULL = 0x2E, + IVE_ERR_MEM_REC_OVERLAP_ERR = 0x2F, + IVE_ERR_MEM_REC_INSUFFICIENT_SIZE = 0x30, + IVE_ERR_MEM_REC_ALIGNMENT_ERR = 0x31, + IVE_ERR_MEM_REC_INCORRECT_TYPE = 0x32, + IVE_ERR_HANDLE_NULL = 0x33, + IVE_ERR_HANDLE_STRUCT_SIZE_INCORRECT = 0x34, + IVE_ERR_API_FUNCTION_PTR_NULL = 0x35, + IVE_ERR_INVALID_CODEC_HANDLE = 0x36, + IVE_ERR_CTL_GET_VERSION_BUFFER_IS_NULL = 0x37, + IVE_ERR_IP_CTL_SETAIRPARAMS_API_STRUCT_SIZE_INCORRECT = 0x38, + IVE_ERR_OP_CTL_SETAIRPARAMS_API_STRUCT_SIZE_INCORRECT = 0x39, + IVE_ERR_IP_CTL_SETENCMODE_API_STRUCT_SIZE_INCORRECT = 0x3A, + IVE_ERR_OP_CTL_SETENCMODE_API_STRUCT_SIZE_INCORRECT = 0x3B, + IVE_ERR_IP_CTL_SETVBVPARAMS_API_STRUCT_SIZE_INCORRECT = 0x3C, + IVE_ERR_OP_CTL_SETVBVPARAMS_API_STRUCT_SIZE_INCORRECT = 0x3D, + IVE_ERR_IP_CTL_SETPROFILE_API_STRUCT_SIZE_INCORRECT = 0x3E, + IVE_ERR_OP_CTL_SETPROFILE_API_STRUCT_SIZE_INCORRECT = 0x3F, + +}IVE_ERROR_CODES_T; + + +/*****************************************************************************/ +/* Initialize encoder */ +/*****************************************************************************/ + +/** Input structure : Initialize the encoder */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Command type */ + IV_API_COMMAND_TYPE_T e_cmd; + + /** Number of memory records */ + UWORD32 u4_num_mem_rec; + + /** pointer to array of memrecords structures should be filled by codec + with details of memory resource requirements */ + iv_mem_rec_t *ps_mem_rec; + + /** maximum width for which codec should request memory requirements */ + UWORD32 u4_max_wd; + + /** maximum height for which codec should request memory requirements */ + UWORD32 u4_max_ht; + + /** Maximum number of reference frames */ + UWORD32 u4_max_ref_cnt; + + /** Maximum number of reorder frames */ + UWORD32 u4_max_reorder_cnt; + + /** Maximum level supported */ + UWORD32 u4_max_level; + + /** Input color format */ + IV_COLOR_FORMAT_T e_inp_color_fmt; + + /** Flag to enable/disable - To be used only for debugging/testing */ + UWORD32 u4_enable_recon; + + /** Recon color format */ + IV_COLOR_FORMAT_T e_recon_color_fmt; + + /** Rate control mode */ + IVE_RC_MODE_T e_rc_mode; + + /** Maximum frame rate to be supported */ + UWORD32 u4_max_framerate; + + /** Maximum bitrate to be supported */ + UWORD32 u4_max_bitrate; + + /** Maximum number of consecutive B frames */ + UWORD32 u4_max_num_bframes; + + /** Content type Interlaced/Progressive */ + IV_CONTENT_TYPE_T e_content_type; + + /** Maximum search range to be used in X direction */ + UWORD32 u4_max_srch_rng_x; + + /** Maximum search range to be used in Y direction */ + UWORD32 u4_max_srch_rng_y; + + /** Slice Mode */ + IVE_SLICE_MODE_T e_slice_mode; + + /** Slice parameter */ + UWORD32 u4_slice_param; + + /** Processor architecture */ + IV_ARCH_T e_arch; + + /** SOC details */ + IV_SOC_T e_soc; + + +}ive_init_ip_t; + +/** Output structure : Initialize the encoder */ +typedef struct +{ + /** Size of the structure */ + UWORD32 u4_size; + + /** Return error code */ + UWORD32 u4_error_code; +}ive_init_op_t; + + +/*****************************************************************************/ +/* Video Encode - Deprecated */ +/*****************************************************************************/ + +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + IVE_API_COMMAND_TYPE_T e_cmd; + + /** Descriptor for input raw buffer */ + iv_raw_buf_t s_inp_buf; + + /** Buffer containing pic info if mb_info_type is non-zero */ + void *pv_bufs; + + /** Flag to indicate if mb info is sent along with input buffer */ + UWORD32 u4_mb_info_type; + + /** Buffer containing mb info if mb_info_type is non-zero */ + void *pv_mb_info; + + /** Flag to indicate if pic info is sent along with input buffer */ + UWORD32 u4_pic_info_type; + + /** Buffer containing pic info if mb_info_type is non-zero */ + void *pv_pic_info; + + /** Lower 32bits of input time stamp */ + UWORD32 u4_timestamp_low; + + /** Upper 32bits of input time stamp */ + UWORD32 u4_timestamp_high; + + /** Flag to indicate if this is the last input in the stream */ + UWORD32 u4_is_last; + + /** Descriptor for output bit-stream buffer */ + iv_bits_buf_t s_out_buf; + + /** Descriptor for recon buffer */ + iv_raw_buf_t s_recon_buf; + +}ive_video_encode_ip_t; + + +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** error code */ + UWORD32 u4_error_code; + + /* Output present */ + WORD32 output_present; + + /* dump recon */ + WORD32 dump_recon; + + /* encoded frame type */ + UWORD32 u4_encoded_frame_type; + + /** Descriptor for input raw buffer freed from codec */ + iv_raw_buf_t s_inp_buf; + + /** Descriptor for output bit-stream buffer */ + iv_bits_buf_t s_out_buf; + + /** Descriptor for recon buffer */ + iv_raw_buf_t s_recon_buf; + +}ive_video_encode_op_t; + +/*****************************************************************************/ +/* Queue Input raw buffer - Send the YUV buffer to be encoded */ +/*****************************************************************************/ +/** Input structure : Queue input buffer to the encoder */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Command : IVE_CMD_QUEUE_INPUT */ + IVE_API_COMMAND_TYPE_T e_cmd; + + /** Descriptor for input raw buffer */ + iv_raw_buf_t s_inp_buf; + + /** Flag to indicate if mb info is sent along with input buffer */ + UWORD32 u4_mb_info_type; + + /** Flag to indicate the size of mb info structure */ + UWORD32 u4_mb_info_size; + + /** Buffer containing mb info if mb_info_type is non-zero */ + void *pv_mb_info; + + /** Flag to indicate if pic info is sent along with input buffer */ + UWORD32 u4_pic_info_type; + + /** Buffer containing pic info if mb_info_type is non-zero */ + void *pv_pic_info; + + /** Lower 32bits of input time stamp */ + UWORD32 u4_timestamp_low; + + /** Upper 32bits of input time stamp */ + UWORD32 u4_timestamp_high; + + + /** Flag to enable/disable blocking the current API call */ + UWORD32 u4_is_blocking; + + /** Flag to indicate if this is the last input in the stream */ + UWORD32 u4_is_last; + +}ive_queue_inp_ip_t; + +/** Input structure : Queue output buffer to the encoder */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Return error code */ + UWORD32 u4_error_code; +}ive_queue_inp_op_t; + +/*****************************************************************************/ +/* Dequeue Input raw buffer - Get free YUV buffer from the encoder */ +/*****************************************************************************/ +/** Input structure : Dequeue input buffer from the encoder */ + +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Command: IVE_CMD_DEQUEUE_INPUT */ + IVE_API_COMMAND_TYPE_T e_cmd; + + /** Flag to enable/disable blocking the current API call */ + UWORD32 u4_is_blocking; + +}ive_dequeue_inp_ip_t; + +/** Output structure : Dequeue input buffer from the encoder */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Return error code */ + UWORD32 u4_error_code; + + /** Buffer descriptor of the buffer returned from encoder */ + iv_raw_buf_t s_inp_buf; + + /** Flag to indicate if mb info is sent along with input buffer */ + UWORD32 u4_mb_info_type; + + /** Flag to indicate the size of mb info structure */ + UWORD32 u4_mb_info_size; + + /** Buffer containing mb info if mb_info_type is non-zero */ + void *pv_mb_info; + + /** Flag to indicate if pic info is sent along with input buffer */ + UWORD32 u4_pic_info_type; + + /** Buffer containing pic info if mb_info_type is non-zero */ + void *pv_pic_info; + + /** Lower 32bits of input time stamp */ + UWORD32 u4_timestamp_low; + + /** Upper 32bits of input time stamp */ + UWORD32 u4_timestamp_high; + + /** Flag to indicate if this is the last input in the stream */ + UWORD32 u4_is_last; + + +}ive_dequeue_inp_op_t; + +/*****************************************************************************/ +/* Queue Output bitstream buffer - Send the bistream buffer to be filled */ +/*****************************************************************************/ +/** Input structure : Queue output buffer to the encoder */ + +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Command : IVE_CMD_QUEUE_OUTPUT */ + IVE_API_COMMAND_TYPE_T e_cmd; + + /** Descriptor for output bit-stream buffer */ + iv_bits_buf_t s_out_buf; + + /** Flag to enable/disable blocking the current API call */ + UWORD32 u4_is_blocking; + + /** Flag to indicate if this is the last output in the stream */ + UWORD32 u4_is_last; + +}ive_queue_out_ip_t; + +/** Output structure : Queue output buffer to the encoder */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Return error code */ + UWORD32 u4_error_code; + +}ive_queue_out_op_t; + + +/*****************************************************************************/ +/* Dequeue Output bitstream buffer - Get the bistream buffer filled */ +/*****************************************************************************/ +/** Input structure : Dequeue output buffer from the encoder */ + +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Command : IVE_CMD_DEQUEUE_OUTPUT */ + IVE_API_COMMAND_TYPE_T e_cmd; + + /** Flag to enable/disable blocking the current API call */ + UWORD32 u4_is_blocking; +}ive_dequeue_out_ip_t; + +/** Output structure : Dequeue output buffer from the encoder */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Return error code */ + UWORD32 u4_error_code; + + /** Descriptor for output bit-stream buffer */ + iv_bits_buf_t s_out_buf; + + /** Lower 32bits of timestamp corresponding to this buffer */ + UWORD32 u4_timestamp_low; + + /** Upper 32bits of timestamp corresponding to this buffer */ + UWORD32 u4_timestamp_high; + + /** Flag to indicate if this is the last output in the stream */ + UWORD32 u4_is_last; + +}ive_dequeue_out_op_t; + +/*****************************************************************************/ +/* Get Recon data - Get the reconstructed data from encoder */ +/*****************************************************************************/ +/** Input structure : Get recon data from the encoder */ + +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Command : IVE_CMD_GET_RECON */ + IVE_API_COMMAND_TYPE_T e_cmd; + + /** Flag to enable/disable blocking the current API call */ + UWORD32 u4_is_blocking; + + /** Descriptor for recon buffer */ + iv_raw_buf_t s_recon_buf; + + /** Flag to indicate if this is the last recon in the stream */ + UWORD32 u4_is_last; + +}ive_get_recon_ip_t; + +/** Output structure : Get recon data from the encoder */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Return error code */ + UWORD32 u4_error_code; + + /** Lower 32bits of time stamp corresponding to this buffer */ + UWORD32 u4_timestamp_low; + + /** Upper 32bits of time stamp corresponding to this buffer */ + UWORD32 u4_timestamp_high; + + /** Flag to indicate if this is the last recon in the stream */ + UWORD32 u4_is_last; + +}ive_get_recon_op_t; + +/*****************************************************************************/ +/* Video control Flush */ +/*****************************************************************************/ + +/** Input structure : Flush all the buffers from the encoder */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Command type : IVE_CMD_VIDEO_CTL */ + IVE_API_COMMAND_TYPE_T e_cmd; + + /** Sub command type : IVE_CMD_CTL_FLUSH */ + IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd; +}ive_ctl_flush_ip_t; + +/** Output structure : Flush all the buffers from the encoder */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Return error code */ + UWORD32 u4_error_code; +}ive_ctl_flush_op_t; + +/*****************************************************************************/ +/* Video control reset */ +/*****************************************************************************/ +/** Input structure : Reset the encoder */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Command type : IVE_CMD_VIDEO_CTL */ + IVE_API_COMMAND_TYPE_T e_cmd; + + /** Sub command type : IVE_CMD_CTL_RESET */ + IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd; +}ive_ctl_reset_ip_t; + +/** Output structure : Reset the encoder */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Return error code */ + UWORD32 u4_error_code; +}ive_ctl_reset_op_t; + +/*****************************************************************************/ +/* Video control:Get Buf Info */ +/*****************************************************************************/ + +/** Input structure : Get encoder buffer requirements */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Command type : IVE_CMD_VIDEO_CTL */ + IVE_API_COMMAND_TYPE_T e_cmd; + + /** Sub command type : IVE_CMD_CTL_GETBUFINFO */ + IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd; + + /** maximum width for which codec should request memory requirements */ + UWORD32 u4_max_wd; + + /** maximum height for which codec should request memory requirements */ + UWORD32 u4_max_ht; + + /** Input color format */ + IV_COLOR_FORMAT_T e_inp_color_fmt; + +}ive_ctl_getbufinfo_ip_t; + +/** Output structure : Get encoder buffer requirements */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Return error code */ + UWORD32 u4_error_code; + + /** Minimum number of input buffers required for codec */ + UWORD32 u4_min_inp_bufs; + + /** Minimum number of output buffers required for codec */ + UWORD32 u4_min_out_bufs; + + /** Number of components in input buffers required for codec */ + UWORD32 u4_inp_comp_cnt; + + /** Number of components in output buffers required for codec */ + UWORD32 u4_out_comp_cnt; + + /** Minimum sizes of each component in input buffer required */ + UWORD32 au4_min_in_buf_size[IVE_MAX_IO_BUFFER_COMPONENTS]; + + /** Minimum sizes of each component in output buffer required */ + UWORD32 au4_min_out_buf_size[IVE_MAX_IO_BUFFER_COMPONENTS]; + +}ive_ctl_getbufinfo_op_t; + + + + +/*****************************************************************************/ +/* Video control:Get Version Info */ +/*****************************************************************************/ + +/** Input structure : Get encoder version information */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + /** Command type : IVE_CMD_VIDEO_CTL */ + IVE_API_COMMAND_TYPE_T e_cmd; + + /** Sub command type : IVE_CMD_CTL_GETVERSION */ + IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd; + + /** Buffer where version info will be returned */ + UWORD8 *pu1_version; + + /** Size of the buffer allocated for version info */ + UWORD32 u4_version_bufsize; +}ive_ctl_getversioninfo_ip_t; + +/** Output structure : Get encoder version information */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Return error code */ + UWORD32 u4_error_code; +}ive_ctl_getversioninfo_op_t; + + +/*****************************************************************************/ +/* Video control:set default params */ +/*****************************************************************************/ +/** Input structure : Set default encoder parameters */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Command type : IVE_CMD_VIDEO_CTL */ + IVE_API_COMMAND_TYPE_T e_cmd; + + /** Sub command type : IVE_CMD_CTL_SETDEFAULT */ + IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd; + + /** Lower 32bits of time stamp corresponding to input buffer, + * from which this command takes effect */ + UWORD32 u4_timestamp_low; + + /** Upper 32bits of time stamp corresponding to input buffer, + * from which this command takes effect */ + UWORD32 u4_timestamp_high; + +}ive_ctl_setdefault_ip_t; + +/** Output structure : Set default encoder parameters */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Return error code */ + UWORD32 u4_error_code; +}ive_ctl_setdefault_op_t; + +/*****************************************************************************/ +/* Video control Set Frame dimensions */ +/*****************************************************************************/ + +/** Input structure : Set frame dimensions */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Command type : IVE_CMD_VIDEO_CTL */ + IVE_API_COMMAND_TYPE_T e_cmd; + + /** Sub command type : IVE_CMD_CTL_SET_DIMENSIONS */ + IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd; + + /** Input width */ + UWORD32 u4_wd; + + /** Input height */ + UWORD32 u4_ht; + + /** Input stride */ + UWORD32 u4_strd; + + /** Lower 32bits of time stamp corresponding to input buffer, + * from which this command takes effect */ + UWORD32 u4_timestamp_low; + + /** Upper 32bits of time stamp corresponding to input buffer, + * from which this command takes effect */ + UWORD32 u4_timestamp_high; + +}ive_ctl_set_dimensions_ip_t; + +/** Output structure : Set frame dimensions */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Return error code */ + UWORD32 u4_error_code; +}ive_ctl_set_dimensions_op_t; + + +/*****************************************************************************/ +/* Video control Set Frame rates */ +/*****************************************************************************/ + +/** Input structure : Set frame rate */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Command type : IVE_CMD_VIDEO_CTL */ + IVE_API_COMMAND_TYPE_T e_cmd; + + /** Sub command type : IVE_CMD_CTL_SET_FRAMERATE */ + IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd; + + /** Source frame rate */ + UWORD32 u4_src_frame_rate; + + /** Target frame rate */ + UWORD32 u4_tgt_frame_rate; + + /** Lower 32bits of time stamp corresponding to input buffer, + * from which this command takes effect */ + UWORD32 u4_timestamp_low; + + /** Upper 32bits of time stamp corresponding to input buffer, + * from which this command takes effect */ + UWORD32 u4_timestamp_high; + +}ive_ctl_set_frame_rate_ip_t; + +/** Output structure : Set frame rate */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Return error code */ + UWORD32 u4_error_code; +}ive_ctl_set_frame_rate_op_t; + +/*****************************************************************************/ +/* Video control Set Bitrate */ +/*****************************************************************************/ + +/** Input structure : Set bitrate */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Command type : IVE_CMD_VIDEO_CTL */ + IVE_API_COMMAND_TYPE_T e_cmd; + + /** Sub command type : IVE_CMD_CTL_SET_BITRATE */ + IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd; + + /** Target bitrate in kilobits per second */ + UWORD32 u4_target_bitrate; + + /** Lower 32bits of time stamp corresponding to input buffer, + * from which this command takes effect */ + UWORD32 u4_timestamp_low; + + /** Upper 32bits of time stamp corresponding to input buffer, + * from which this command takes effect */ + UWORD32 u4_timestamp_high; + +}ive_ctl_set_bitrate_ip_t; + +/** Output structure : Set bitrate */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Return error code */ + UWORD32 u4_error_code; +}ive_ctl_set_bitrate_op_t; + +/*****************************************************************************/ +/* Video control Set Frame type */ +/*****************************************************************************/ + +/** Input structure : Set frametype */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Command type : IVE_CMD_VIDEO_CTL */ + IVE_API_COMMAND_TYPE_T e_cmd; + + /** Sub command type : IVE_CMD_CTL_SET_FRAMETYPE */ + IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd; + + /** Force current frame type */ + IV_PICTURE_CODING_TYPE_T e_frame_type; + + /** Lower 32bits of time stamp corresponding to input buffer, + * from which this command takes effect */ + UWORD32 u4_timestamp_low; + + /** Upper 32bits of time stamp corresponding to input buffer, + * from which this command takes effect */ + UWORD32 u4_timestamp_high; + +}ive_ctl_set_frame_type_ip_t; + +/** Output structure : Set frametype */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Return error code */ + UWORD32 u4_error_code; +}ive_ctl_set_frame_type_op_t; + +/*****************************************************************************/ +/* Video control Set Encode mode */ +/*****************************************************************************/ + +/** Input structure : Set encode mode */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Command type : IVE_CMD_VIDEO_CTL */ + IVE_API_COMMAND_TYPE_T e_cmd; + + /** Sub command type : IVE_CMD_CTL_SET_ENC_MODE */ + IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd; + + /** Encoder mode */ + IVE_ENC_MODE_T e_enc_mode; + + /** Lower 32bits of time stamp corresponding to input buffer, + * from which this command takes effect */ + UWORD32 u4_timestamp_low; + + /** Upper 32bits of time stamp corresponding to input buffer, + * from which this command takes effect */ + UWORD32 u4_timestamp_high; + +}ive_ctl_set_enc_mode_ip_t; + +/** Output structure : Set encode mode */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Return error code */ + UWORD32 u4_error_code; + +}ive_ctl_set_enc_mode_op_t; + +/*****************************************************************************/ +/* Video control Set QP */ +/*****************************************************************************/ + +/** Input structure : Set QP */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Command type : IVE_CMD_VIDEO_CTL */ + IVE_API_COMMAND_TYPE_T e_cmd; + + /** Sub command type : IVE_CMD_CTL_SET_QP */ + IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd; + + /** Set initial Qp for I pictures */ + UWORD32 u4_i_qp; + + /** Set initial Qp for P pictures */ + UWORD32 u4_p_qp; + + /** Set initial Qp for B pictures */ + UWORD32 u4_b_qp; + + /** Set minimum Qp for I pictures */ + UWORD32 u4_i_qp_min; + + /** Set maximum Qp for I pictures */ + UWORD32 u4_i_qp_max; + + /** Set minimum Qp for P pictures */ + UWORD32 u4_p_qp_min; + + /** Set maximum Qp for P pictures */ + UWORD32 u4_p_qp_max; + + /** Set minimum Qp for B pictures */ + UWORD32 u4_b_qp_min; + + /** Set maximum Qp for B pictures */ + UWORD32 u4_b_qp_max; + + /** Lower 32bits of time stamp corresponding to input buffer, + * from which this command takes effect */ + UWORD32 u4_timestamp_low; + + /** Upper 32bits of time stamp corresponding to input buffer, + * from which this command takes effect */ + UWORD32 u4_timestamp_high; + + +}ive_ctl_set_qp_ip_t; + +/** Output structure : Set QP */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Return error code */ + UWORD32 u4_error_code; +}ive_ctl_set_qp_op_t; + +/*****************************************************************************/ +/* Video control Set AIR params */ +/*****************************************************************************/ + +/** Input structure : Set AIR params */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + /** Command type : IVE_CMD_VIDEO_CTL */ + IVE_API_COMMAND_TYPE_T e_cmd; + + /** Sub command type : IVE_CMD_CTL_SET_AIR_PARAMS */ + IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd; + + /** Adaptive intra refresh mode */ + IVE_AIR_MODE_T e_air_mode; + + /** Adaptive intra refresh period in frames */ + UWORD32 u4_air_refresh_period; + + /** Lower 32bits of time stamp corresponding to input buffer, + * from which this command takes effect */ + UWORD32 u4_timestamp_low; + + /** Upper 32bits of time stamp corresponding to input buffer, + * from which this command takes effect */ + UWORD32 u4_timestamp_high; + + +}ive_ctl_set_air_params_ip_t; + +/** Output structure : Set AIR params */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Return error code */ + UWORD32 u4_error_code; +}ive_ctl_set_air_params_op_t; + +/*****************************************************************************/ +/* Video control Set VBV params */ +/*****************************************************************************/ + +/** Input structure : Set VBV params */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Command type : IVE_CMD_VIDEO_CTL */ + IVE_API_COMMAND_TYPE_T e_cmd; + + /** Sub command type : IVE_CMD_CTL_SET_VBV_PARAMS */ + IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd; + + /** VBV buffer delay */ + UWORD32 u4_vbv_buffer_delay; + + /** VBV buffer size */ + UWORD32 u4_vbv_buf_size; + + /** Lower 32bits of time stamp corresponding to input buffer, + * from which this command takes effect */ + UWORD32 u4_timestamp_low; + + /** Upper 32bits of time stamp corresponding to input buffer, + * from which this command takes effect */ + UWORD32 u4_timestamp_high; + + +}ive_ctl_set_vbv_params_ip_t; + +/** Output structure : Set VBV params */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Return error code */ + UWORD32 u4_error_code; +}ive_ctl_set_vbv_params_op_t; + + +/*****************************************************************************/ +/* Video control Set Processor Details */ +/*****************************************************************************/ + +/** Input structure : Set processor details */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Command type : IVE_CMD_VIDEO_CTL */ + IVE_API_COMMAND_TYPE_T e_cmd; + + /** Sub command type : IVE_CMD_CTL_SET_NUM_CORES */ + IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd; + + /** Total number of cores to be used */ + UWORD32 u4_num_cores; + + /** Lower 32bits of time stamp corresponding to input buffer, + * from which this command takes effect */ + UWORD32 u4_timestamp_low; + + /** Upper 32bits of time stamp corresponding to input buffer, + * from which this command takes effect */ + UWORD32 u4_timestamp_high; + +}ive_ctl_set_num_cores_ip_t; + +/** Output structure : Set processor details */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Return error code */ + UWORD32 u4_error_code; +}ive_ctl_set_num_cores_op_t; + +/*****************************************************************************/ +/* Video control Set Intra Prediction estimation params */ +/*****************************************************************************/ + +/** Input structure : Set IPE params */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Command type : IVE_CMD_VIDEO_CTL */ + IVE_API_COMMAND_TYPE_T e_cmd; + + /** Sub command type : IVE_CMD_CTL_SET_IPE_PARAMS */ + IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd; + + /** Flag to enable/disbale intra 4x4 analysis */ + UWORD32 u4_enable_intra_4x4; + + /** Flag to enable/disable pre-enc stage of Intra Pred estimation */ + UWORD32 u4_pre_enc_ipe; + + /** Speed preset - Value between 0 (slowest) and 100 (fastest) */ + IVE_SPEED_CONFIG u4_enc_speed_preset; + + /** Lower 32bits of time stamp corresponding to input buffer, + * from which this command takes effect */ + UWORD32 u4_timestamp_low; + + /** Upper 32bits of time stamp corresponding to input buffer, + * from which this command takes effect */ + UWORD32 u4_timestamp_high; + +}ive_ctl_set_ipe_params_ip_t; + +/** Output structure : Set IPE Params */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Return error code */ + UWORD32 u4_error_code; +}ive_ctl_set_ipe_params_op_t; + +/*****************************************************************************/ +/* Video control Set Motion estimation params */ +/*****************************************************************************/ + +/** Input structure : Set ME Params */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Command type : IVE_CMD_VIDEO_CTL */ + IVE_API_COMMAND_TYPE_T e_cmd; + + /** Sub command type : IVE_CMD_CTL_SET_ME_PARAMS */ + IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd; + + /** Flag to enable/disable pre-enc stage of Motion estimation */ + UWORD32 u4_pre_enc_me; + + /** Speed preset - Value between 0 (slowest) and 100 (fastest) */ + UWORD32 u4_me_speed_preset; + + /** Flag to enable/disable half pel motion estimation */ + UWORD32 u4_enable_hpel; + + /** Flag to enable/disable quarter pel motion estimation */ + UWORD32 u4_enable_qpel; + + /** Flag to enable/disable fast SAD approximation */ + UWORD32 u4_enable_fast_sad; + + /** Flag to enable/disable alternate reference frames */ + UWORD32 u4_enable_alt_ref; + + /** Maximum search range in X direction for farthest reference */ + UWORD32 u4_srch_rng_x; + + /** Maximum search range in Y direction for farthest reference */ + UWORD32 u4_srch_rng_y; + + /** Lower 32bits of time stamp corresponding to input buffer, + * from which this command takes effect */ + UWORD32 u4_timestamp_low; + + /** Upper 32bits of time stamp corresponding to input buffer, + * from which this command takes effect */ + UWORD32 u4_timestamp_high; + +}ive_ctl_set_me_params_ip_t; + +/** Output structure : Set ME Params */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Return error code */ + UWORD32 u4_error_code; +}ive_ctl_set_me_params_op_t; + +/*****************************************************************************/ +/* Video control Set GOP params */ +/*****************************************************************************/ + +/** Input structure : Set GOP Params */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Command type : IVE_CMD_VIDEO_CTL */ + IVE_API_COMMAND_TYPE_T e_cmd; + + /** Sub command type : IVE_CMD_CTL_SET_GOP_PARAMS */ + IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd; + + /** I frame interval */ + UWORD32 u4_i_frm_interval; + + /** IDR frame interval */ + UWORD32 u4_idr_frm_interval; + + /** consecutive B frames */ + UWORD32 u4_num_b_frames; + + /** Lower 32bits of time stamp corresponding to input buffer, + * from which this command takes effect */ + UWORD32 u4_timestamp_low; + + /** Upper 32bits of time stamp corresponding to input buffer, + * from which this command takes effect */ + UWORD32 u4_timestamp_high; + +}ive_ctl_set_gop_params_ip_t; + +/** Output structure : Set GOP params */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Return error code */ + UWORD32 u4_error_code; +}ive_ctl_set_gop_params_op_t; + +/*****************************************************************************/ +/* Video control Set Deblock params */ +/*****************************************************************************/ + +/** Input structure : Set Deblock Params */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Command type : IVE_CMD_VIDEO_CTL */ + IVE_API_COMMAND_TYPE_T e_cmd; + + /** Sub command type : IVE_CMD_CTL_SET_GOP_PARAMS */ + IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd; + + /** Disable deblock level (0: Enable completely, 3: Disable completely */ + UWORD32 u4_disable_deblock_level; + + /** Lower 32bits of time stamp corresponding to input buffer, + * from which this command takes effect */ + UWORD32 u4_timestamp_low; + + /** Upper 32bits of time stamp corresponding to input buffer, + * from which this command takes effect */ + UWORD32 u4_timestamp_high; + +}ive_ctl_set_deblock_params_ip_t; + +/** Output structure : Set Deblock Params */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Return error code */ + UWORD32 u4_error_code; +}ive_ctl_set_deblock_params_op_t; + +/*****************************************************************************/ +/* Video control Set Profile params */ +/*****************************************************************************/ + +/** Input structure : Set Profile Params */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Command type : IVE_CMD_VIDEO_CTL */ + IVE_API_COMMAND_TYPE_T e_cmd; + + /** Sub command type : IVE_CMD_CTL_SET_PROFILE_PARAMS */ + IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd; + + /** Profile */ + IV_PROFILE_T e_profile; + + /** Lower 32bits of time stamp corresponding to input buffer, + * from which this command takes effect */ + UWORD32 u4_timestamp_low; + + /** Upper 32bits of time stamp corresponding to input buffer, + * from which this command takes effect */ + UWORD32 u4_timestamp_high; + +}ive_ctl_set_profile_params_ip_t; + +/** Output structure : Set Profile Params */ +typedef struct +{ + /** size of the structure */ + UWORD32 u4_size; + + /** Return error code */ + UWORD32 u4_error_code; +}ive_ctl_set_profile_params_op_t; + + +#endif /* _IVE2_H_ */ + diff --git a/encoder/mips/ih264e_function_selector.c b/encoder/mips/ih264e_function_selector.c new file mode 100755 index 0000000..58ec4d0 --- /dev/null +++ b/encoder/mips/ih264e_function_selector.c @@ -0,0 +1,110 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +******************************************************************************* +* @file +* ih264e_function_selector.c +* +* @brief +* Contains functions to initialize function pointers used in h264 +* +* @author +* Ittiam +* +* @par List of Functions: +* +* @remarks +* None +* +******************************************************************************* +*/ + + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System Include Files */ +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +/* User Include Files */ +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" +#include "ih264_defs.h" +#include "ih264_size_defs.h" +#include "ih264e_defs.h" +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "ime_distortion_metrics.h" +#include "ime_structs.h" +#include "ih264_defs.h" +#include "ih264_error.h" +#include "ih264_structs.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include "ih264_inter_pred_filters.h" +#include "ih264_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" + +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264e_defs.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "ih264e_rate_control.h" +#include "ih264e_structs.h" +#include "ih264e_platform_macros.h" + +/** +******************************************************************************* +* +* @brief Initialize the intra/inter/transform/deblk function pointers of +* codec context +* +* @par Description: the current routine initializes the function pointers of +* codec context basing on the architecture in use +* +* @param[in] ps_codec +* Codec context pointer +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ + +void ih264e_init_function_ptr(void *pv_codec) +{ + codec_t *ps_codec = (codec_t *)pv_codec; + ih264e_init_function_ptr_generic(ps_codec); +} + +IV_ARCH_T ih264e_default_arch(void) +{ + return ARCH_NA; +} + diff --git a/encoder/mips/ih264e_platform_macros.h b/encoder/mips/ih264e_platform_macros.h new file mode 100755 index 0000000..ed1edd4 --- /dev/null +++ b/encoder/mips/ih264e_platform_macros.h @@ -0,0 +1,135 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** + ******************************************************************************* + * @file + * ih264e_platform_macros.h + * + * @brief + * Contains platform specific routines used for codec context intialization + * + * @author + * ittiam + * + * @remarks + * none + * + ******************************************************************************* + */ + + +#ifndef IH264E_PLATFORM_MACROS_H_ +#define IH264E_PLATFORM_MACROS_H_ + +#define DATA_SYNC() +/*****************************************************************************/ +/* Extern Function Declarations */ +/*****************************************************************************/ + +/** +******************************************************************************* +* +* @brief Initialize the intra/inter/transform/deblk function pointers of +* codec context +* +* @par Description: the current routine initializes the function pointers of +* codec context basing on the architecture in use +* +* @param[in] ps_codec +* Codec context pointer +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void ih264e_init_function_ptr_generic(codec_t *ps_codec); + +/** +******************************************************************************* +* +* @brief Initialize the intra/inter/transform/deblk function pointers of +* codec context +* +* @par Description: the current routine initializes the function pointers of +* codec context basing on the architecture in use +* +* @param[in] ps_codec +* Codec context pointer +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void ih264e_init_function_ptr(void *pv_codec); + +/** +******************************************************************************* +* +* @brief Determine the architecture of the encoder executing environment +* +* @par Description: This routine returns the architecture of the enviro- +* ment in which the current encoder is being tested +* +* @param[in] void +* +* @returns IV_ARCH_T +* architecture +* +* @remarks none +* +******************************************************************************* +*/ +IV_ARCH_T ih264e_default_arch(void); + +/** +******************************************************************************* +* +* @brief Data Memory Barrier, Data Synchronization Barrier +* +* +* @par Description: These functions do nothing on x86 side. But on arm platforms, +* +* Data Memory Barrier acts as a memory barrier. It ensures that all explicit +* memory accesses that appear in program order before the DMB instruction are +* observed before any explicit memory accesses that appear in program order +* after the DMB instruction. It does not affect the ordering of any other +* instructions executing on the processor +* +* Data Synchronization Barrier acts as a special kind of memory barrier. No +* instruction in program order after this instruction executes until this instruction +* completes. This instruction completes when: +* 1. All explicit memory accesses before this instruction complete. +* 2. All Cache, Branch predictor and TLB maintenance operations before +* this instruction complete. +* +* @param[in] void +* +* @returns void +* +* @remarks none +* +******************************************************************************* +*/ + +#endif /* IH264E_PLATFORM_MACROS_H_ */ diff --git a/encoder/mips/ime_platform_macros.h b/encoder/mips/ime_platform_macros.h new file mode 100755 index 0000000..18e2e8f --- /dev/null +++ b/encoder/mips/ime_platform_macros.h @@ -0,0 +1,52 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ime_platform_macros.h +* +* @brief +* Platform specific Macro definitions used in the codec +* +* @author +* Ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ + + +#ifndef _IME_PLATFORM_MACROS_H_ +#define _IME_PLATFORM_MACROS_H_ + +/*****************************************************************************/ +/* Function macro definitions */ +/*****************************************************************************/ + +#define USADA8(src,est,sad) \ + sad += ABS(src[0]-est[0]) + \ + ABS(src[1]-est[1]) + \ + ABS(src[2]-est[2]) + \ + ABS(src[3]-est[3]) + + +#endif /* _IH264_PLATFORM_MACROS_H_ */ diff --git a/encoder/x86/ih264e_function_selector.c b/encoder/x86/ih264e_function_selector.c new file mode 100755 index 0000000..429cdab --- /dev/null +++ b/encoder/x86/ih264e_function_selector.c @@ -0,0 +1,141 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +******************************************************************************* +* @file +* ih264e_function_selector.c +* +* @brief +* Contains functions to initialize function pointers used in h264 +* +* @author +* Ittiam +* +* @par List of Functions: +* +* @remarks +* None +* +******************************************************************************* +*/ + + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System Include Files */ +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +/* User Include Files */ +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" +#include "ih264_defs.h" +#include "ih264_size_defs.h" +#include "ih264e_defs.h" +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "ime_distortion_metrics.h" +#include "ime_structs.h" +#include "ih264_defs.h" +#include "ih264_error.h" +#include "ih264_structs.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include "ih264_inter_pred_filters.h" +#include "ih264_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" + +#include "ih264_macros.h" +#include "ih264_platform_macros.h" +#include "ih264e_defs.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "ih264e_rate_control.h" +#include "ih264e_structs.h" +#include "ih264e_platform_macros.h" + +/** +******************************************************************************* +* +* @brief Initialize the intra/inter/transform/deblk function pointers of +* codec context +* +* @par Description: the current routine initializes the function pointers of +* codec context basing on the architecture in use +* +* @param[in] ps_codec +* Codec context pointer +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void ih264e_init_function_ptr(void *pv_codec) +{ + codec_t *ps_codec = (codec_t *)pv_codec; + ih264e_init_function_ptr_generic(ps_codec); + switch(ps_codec->s_cfg.e_arch) + { + case ARCH_X86_GENERIC: + ih264e_init_function_ptr_generic(ps_codec); + break; + case ARCH_X86_SSSE3: + ih264e_init_function_ptr_ssse3(ps_codec); + break; + case ARCH_X86_SSE42: + default: + ih264e_init_function_ptr_ssse3(ps_codec); + ih264e_init_function_ptr_sse42(ps_codec); + break; + } +} + +/** +******************************************************************************* +* +* @brief Determine the architecture of the encoder executing environment +* +* @par Description: This routine returns the architecture of the enviro- +* ment in which the current encoder is being tested +* +* @param[in] void +* +* @returns IV_ARCH_T +* architecture +* +* @remarks none +* +******************************************************************************* +*/ +IV_ARCH_T ih264e_default_arch(void) +{ + return ARCH_X86_SSE42; +} + + diff --git a/encoder/x86/ih264e_function_selector_sse42.c b/encoder/x86/ih264e_function_selector_sse42.c new file mode 100755 index 0000000..6fa6308 --- /dev/null +++ b/encoder/x86/ih264e_function_selector_sse42.c @@ -0,0 +1,146 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ih264e_function_selector_sse42.c +* +* @brief +* Contains functions to initialize function pointers of codec context +* +* @author +* Ittiam +* +* @par List of Functions: +* - ih264e_init_function_ptr_sse42 +* +* @remarks +* None +* +******************************************************************************* +*/ + + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + + +/* System Include files */ +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +/* User Include files */ +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" +#include "ih264_defs.h" +#include "ih264_size_defs.h" +#include "ih264e_defs.h" +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "ime_distortion_metrics.h" +#include "ime_structs.h" +#include "ih264_defs.h" +#include "ih264_error.h" +#include "ih264_structs.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include "ih264_inter_pred_filters.h" +#include "ih264_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" + +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "ih264e_rate_control.h" +#include "ih264e_structs.h" +#include "ih264e_platform_macros.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include "ih264e_defs.h" +#include "ih264e_structs.h" +#include "ih264_deblk_edge_filters.h" +#include "ih264e_core_coding.h" +#include "ih264_cavlc_tables.h" +#include "ih264e_cavlc.h" +#include "ih264_padding.h" +#include "ih264e_intra_modes_eval.h" +#include "ih264_mem_fns.h" +#include "ih264e_fmt_conv.h" +#include "ih264e_half_pel.h" + +/** +******************************************************************************* +* +* @brief Initialize the intra/inter/transform/deblk function pointers of +* codec context +* +* @par Description: the current routine initializes the function pointers of +* codec context basing on the architecture in use +* +* @param[in] ps_codec +* Codec context pointer +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void ih264e_init_function_ptr_sse42(codec_t *ps_codec) +{ + WORD32 i; + process_ctxt_t *ps_proc = NULL; + me_ctxt_t *ps_me_ctxt = NULL; + printf("Enabling SSE42 functions\n"); + + /* Init luma forward transform fn ptr */ + ps_codec->pf_resi_trans_quant_4x4 = ih264_resi_trans_quant_4x4_sse42; + ps_codec->pf_resi_trans_quant_chroma_4x4 = ih264_resi_trans_quant_chroma_4x4_sse42; + ps_codec->pf_hadamard_quant_4x4 = ih264_hadamard_quant_4x4_sse42; + ps_codec->pf_hadamard_quant_2x2_uv = ih264_hadamard_quant_2x2_uv_sse42; + + /* Init inverse transform fn ptr */ + ps_codec->pf_iquant_itrans_recon_4x4 = ih264_iquant_itrans_recon_4x4_sse42; + ps_codec->pf_iquant_itrans_recon_chroma_4x4 = ih264_iquant_itrans_recon_chroma_4x4_sse42; + ps_codec->pf_ihadamard_scaling_4x4 = ih264_ihadamard_scaling_4x4_sse42; + + /* sad me level functions */ + ps_codec->apf_compute_sad_16x16[0] = ime_compute_sad_16x16_sse42; + ps_codec->apf_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_sse42; + ps_codec->pf_compute_sad_16x8 = ime_compute_sad_16x8_sse42; + + /* sad me level functions */ + for(i = 0; i < (MAX_PROCESS_CTXT); i++) + { + ps_proc = &ps_codec->as_process[i]; + + ps_me_ctxt = &ps_proc->s_me_ctxt; + ps_me_ctxt->pf_ime_compute_sad_16x16[0] = ime_compute_sad_16x16_sse42; + ps_me_ctxt->pf_ime_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_sse42; + ps_me_ctxt->pf_ime_compute_sad_16x8 = ime_compute_sad_16x8_sse42; + ps_me_ctxt->pf_ime_compute_sad4_diamond = ime_calculate_sad4_prog_sse42; + ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16 = ime_sub_pel_compute_sad_16x16_sse42; + ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16 = ime_compute_satqd_16x16_lumainter_sse42; + } +} diff --git a/encoder/x86/ih264e_function_selector_ssse3.c b/encoder/x86/ih264e_function_selector_ssse3.c new file mode 100755 index 0000000..7401e53 --- /dev/null +++ b/encoder/x86/ih264e_function_selector_ssse3.c @@ -0,0 +1,190 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ih264e_function_selector_ssse3.c +* +* @brief +* Contains functions to initialize function pointers of codec context +* +* @author +* Ittiam +* +* @par List of Functions: +* - ih264e_init_function_ptr_ssse3 +* +* @remarks +* None +* +******************************************************************************* +*/ + + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + + +/* System Include files */ +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +/* User Include files */ +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" +#include "ih264_defs.h" +#include "ih264_size_defs.h" +#include "ih264e_defs.h" +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "ime_distortion_metrics.h" +#include "ime_structs.h" +#include "ih264_defs.h" +#include "ih264_error.h" +#include "ih264_structs.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include "ih264_inter_pred_filters.h" +#include "ih264_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" + +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "ih264e_rate_control.h" +#include "ih264e_structs.h" +#include "ih264e_platform_macros.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include "ih264e_defs.h" +#include "ih264e_structs.h" +#include "ih264_deblk_edge_filters.h" +#include "ih264e_core_coding.h" +#include "ih264_cavlc_tables.h" +#include "ih264e_cavlc.h" +#include "ih264_padding.h" +#include "ih264e_intra_modes_eval.h" +#include "ih264_mem_fns.h" +#include "ih264e_fmt_conv.h" +#include "ih264e_half_pel.h" + +/** +******************************************************************************* +* +* @brief Initialize the intra/inter/transform/deblk function pointers of +* codec context +* +* @par Description: the current routine initializes the function pointers of +* codec context basing on the architecture in use +* +* @param[in] ps_codec +* Codec context pointer +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void ih264e_init_function_ptr_ssse3(codec_t *ps_codec) +{ + printf("Enabling SSSE3 functions\n"); + + /* Init function pointers for intra pred leaf level functions luma + * Intra 16x16 */ + ps_codec->apf_intra_pred_16_l[0] = ih264_intra_pred_luma_16x16_mode_vert_ssse3; + ps_codec->apf_intra_pred_16_l[1] = ih264_intra_pred_luma_16x16_mode_horz_ssse3; + ps_codec->apf_intra_pred_16_l[2] = ih264_intra_pred_luma_16x16_mode_dc_ssse3; + ps_codec->apf_intra_pred_16_l[3] = ih264_intra_pred_luma_16x16_mode_plane_ssse3; + + /* Init function pointers for intra pred leaf level functions luma + * Intra 4x4 */ + ps_codec->apf_intra_pred_4_l[0] = ih264_intra_pred_luma_4x4_mode_vert_ssse3; + ps_codec->apf_intra_pred_4_l[1] = ih264_intra_pred_luma_4x4_mode_horz_ssse3; + ps_codec->apf_intra_pred_4_l[2] = ih264_intra_pred_luma_4x4_mode_dc_ssse3; + ps_codec->apf_intra_pred_4_l[3] = ih264_intra_pred_luma_4x4_mode_diag_dl_ssse3; + ps_codec->apf_intra_pred_4_l[4] = ih264_intra_pred_luma_4x4_mode_diag_dr_ssse3; + ps_codec->apf_intra_pred_4_l[5] = ih264_intra_pred_luma_4x4_mode_vert_r_ssse3; + ps_codec->apf_intra_pred_4_l[6] = ih264_intra_pred_luma_4x4_mode_horz_d_ssse3; + ps_codec->apf_intra_pred_4_l[7] = ih264_intra_pred_luma_4x4_mode_vert_l_ssse3; + ps_codec->apf_intra_pred_4_l[8] = ih264_intra_pred_luma_4x4_mode_horz_u_ssse3; + + /* Init function pointers for intra pred leaf level functions luma + * Intra 8x8 */ + ps_codec->apf_intra_pred_8_l[0] = ih264_intra_pred_luma_8x8_mode_vert_ssse3; + ps_codec->apf_intra_pred_8_l[2] = ih264_intra_pred_luma_8x8_mode_dc_ssse3; + ps_codec->apf_intra_pred_8_l[3] = ih264_intra_pred_luma_8x8_mode_diag_dl_ssse3; + ps_codec->apf_intra_pred_8_l[4] = ih264_intra_pred_luma_8x8_mode_diag_dr_ssse3; + ps_codec->apf_intra_pred_8_l[5] = ih264_intra_pred_luma_8x8_mode_vert_r_ssse3; + ps_codec->apf_intra_pred_8_l[6] = ih264_intra_pred_luma_8x8_mode_horz_d_ssse3; + ps_codec->apf_intra_pred_8_l[7] = ih264_intra_pred_luma_8x8_mode_vert_l_ssse3; + ps_codec->apf_intra_pred_8_l[8] = ih264_intra_pred_luma_8x8_mode_horz_u_ssse3; + + /* Init function pointers for intra pred leaf level functions chroma + * Intra 8x8 */ + ps_codec->apf_intra_pred_c[1] = ih264_intra_pred_chroma_8x8_mode_horz_ssse3; + ps_codec->apf_intra_pred_c[2] = ih264_intra_pred_chroma_8x8_mode_vert_ssse3; + ps_codec->apf_intra_pred_c[3] = ih264_intra_pred_chroma_8x8_mode_plane_ssse3; + + /* Init inverse transform fn ptr */ + ps_codec->pf_iquant_itrans_recon_8x8 = ih264_iquant_itrans_recon_8x8_ssse3; + ps_codec->pf_iquant_itrans_recon_4x4_dc = ih264_iquant_itrans_recon_4x4_dc_ssse3; + ps_codec->pf_iquant_itrans_recon_chroma_4x4_dc = ih264_iquant_itrans_recon_chroma_4x4_dc_ssse3; + + /* Init fn ptr luma deblocking */ + ps_codec->pf_deblk_luma_vert_bs4 = ih264_deblk_luma_vert_bs4_ssse3; + ps_codec->pf_deblk_luma_vert_bslt4 = ih264_deblk_luma_vert_bslt4_ssse3; + ps_codec->pf_deblk_luma_horz_bs4 = ih264_deblk_luma_horz_bs4_ssse3; + ps_codec->pf_deblk_luma_horz_bslt4 = ih264_deblk_luma_horz_bslt4_ssse3; + /* Init fn ptr chroma deblocking */ + ps_codec->pf_deblk_chroma_vert_bs4 = ih264_deblk_chroma_vert_bs4_ssse3; + ps_codec->pf_deblk_chroma_vert_bslt4 = ih264_deblk_chroma_vert_bslt4_ssse3; + ps_codec->pf_deblk_chroma_horz_bs4 = ih264_deblk_chroma_horz_bs4_ssse3; + ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4_ssse3; + + /* Padding Functions */ + ps_codec->pf_pad_left_luma = ih264_pad_left_luma_ssse3; + ps_codec->pf_pad_left_chroma = ih264_pad_left_chroma_ssse3; + ps_codec->pf_pad_right_luma = ih264_pad_right_luma_ssse3; + ps_codec->pf_pad_right_chroma = ih264_pad_right_chroma_ssse3; + + /* Inter pred leaf level functions */ + ps_codec->pf_inter_pred_luma_copy = ih264_inter_pred_luma_copy_ssse3; + ps_codec->pf_inter_pred_luma_horz = ih264_inter_pred_luma_horz_ssse3; + ps_codec->pf_inter_pred_luma_vert = ih264_inter_pred_luma_vert_ssse3; + ps_codec->pf_inter_pred_chroma = ih264_inter_pred_chroma_ssse3; + + /* memory handling operations */ + ps_codec->pf_mem_cpy_mul8 = ih264_memcpy_mul_8_ssse3; + ps_codec->pf_mem_set_mul8 = ih264_memset_mul_8_ssse3; + + /*intra mode eval -encoder level function*/ + ps_codec->pf_ih264e_evaluate_intra16x16_modes = ih264e_evaluate_intra16x16_modes_ssse3; + ps_codec->pf_ih264e_evaluate_intra_4x4_modes = ih264e_evaluate_intra_4x4_modes_ssse3; + ps_codec->pf_ih264e_evaluate_intra_chroma_modes = ih264e_evaluate_intra_chroma_modes_ssse3; + + /* Halp pel generation function - encoder level*/ + ps_codec->pf_ih264e_sixtapfilter_horz = ih264e_sixtapfilter_horz_ssse3; + ps_codec->pf_ih264e_sixtap_filter_2dvh_vert = ih264e_sixtap_filter_2dvh_vert_ssse3; +} diff --git a/encoder/x86/ih264e_half_pel_ssse3.c b/encoder/x86/ih264e_half_pel_ssse3.c new file mode 100755 index 0000000..42580fa --- /dev/null +++ b/encoder/x86/ih264e_half_pel_ssse3.c @@ -0,0 +1,487 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** + ******************************************************************************* + * @file + * ih264e_half_pel_ssse3.c + * + * @brief + * Contains the x86 intrinsic function definitions for 6-tap vertical filter + * and cascaded 2D filter used in motion estimation in H264 encoder. + * + * @author + * Ittiam + * + * @par List of Functions: + * ih264e_sixtapfilter_horz_ssse3 + * ih264e_sixtap_filter_2dvh_vert_ssse3 + * + * @remarks + * None + * + ******************************************************************************* + */ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include <stdio.h> +#include <assert.h> +#include <limits.h> + +/* User include files */ +#include "ih264_typedefs.h" +#include "ithread.h" +#include "ih264_platform_macros.h" +#include "ih264_defs.h" +#include "ih264e_half_pel.h" +#include "ih264_macros.h" +#include "ih264e_half_pel.h" +#include "ih264e_debug.h" +#include "ih264_inter_pred_filters.h" +#include "ih264_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" + + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ +/* +******************************************************************************* +* +* @brief +* Interprediction luma filter for horizontal input(Filter run for width = 17 +* and height =16) +* +* @par Description: +* Applies a 6 tap horizontal filter .The output is clipped to 8 bits sec. +* 8.4.2.2.1 titled "Luma sample interpolation process" +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[out] pu1_dst +* UWORD8 pointer to the destination +* +* @param[in] src_strd +* integer source stride +* +* @param[in] dst_strd +* integer destination stride +* +* @returns +* None +* +* @remarks +* None +* +******************************************************************************* +*/ +void ih264e_sixtapfilter_horz_ssse3(UWORD8 *pu1_src, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 dst_strd) +{ + WORD32 ht; + WORD32 tmp; + + __m128i src_r0_16x8b, src_r1_16x8b, src_r0_sht_16x8b, src_r1_sht_16x8b; + __m128i src_r0_t1_16x8b, src_r1_t1_16x8b; + + __m128i res_r0_t1_8x16b, res_r0_t2_8x16b, res_r0_t3_8x16b; + __m128i res_r1_t1_8x16b, res_r1_t2_8x16b, res_r1_t3_8x16b; + + __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b; + __m128i const_val16_8x16b; + + ht = 16; + pu1_src -= 2; // the filter input starts from x[-2] (till x[3]) + + coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01); //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 + coeff2_3_16x8b = _mm_set1_epi32(0x14141414); //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 + coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB); //c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 + //c0 = c5 = 1, c1 = c4 = -5, c2 = c3 = 20 + const_val16_8x16b = _mm_set1_epi16(16); + + //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9..... + //Row0 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9..... + //b0 is same a8. Similarly other bn pixels are same as a(n+8) pixels. + + do + { + src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 + src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 8)); //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 + + src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1); //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0 + src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1); //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0 + + src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 + src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 + + res_r0_t1_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff0_1_16x8b); //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1 + //a4*c0+a5*c1 a5*c0+a6*c1 a6*c0+a7*c1 a7*c0+a8*c1 + res_r1_t1_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff0_1_16x8b); //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1 + //b4*c0+b5*c1 b5*c0+b6*c1 b6*c0+b7*c1 b7*c0+b8*c1 + + src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a2 a3 a4 a5 a6 a7 a8 a9....a15 0 0 + src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b2 b3 b4 b5 b6 b7 b8 b9....b15 0 0 + + src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a3 a4 a5 a6 a7 a8 a9....a15 0 0 0 + src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b3 b4 b5 b6 b7 b8 b9....b15 0 0 0 + + src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 + src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 a8 a9 a9 a10 + + res_r0_t2_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff2_3_16x8b); //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3 + //a6*c2+a7*c3 a7*c2+a8*c3 a8*c2+a9*c3 a9*c2+a10*c3 + res_r1_t2_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff2_3_16x8b); //b2*c2+b3*c3 b3*c2+b4*c3 b2*c4+b5*c3 b5*c2+b6*c3 + //b6*c2+b7*c3 b7*c2+b8*c3 b8*c2+b9*c3 b9*c2+b10*c3 + + src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a4 a5 a6 a7 a8 a9....a15 0 0 0 0 + src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b4 b5 b6 b7 b8 b9....b15 0 0 0 0 + + src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a5 a6 a7 a8 a9....a15 0 0 0 0 0 + src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b5 b6 b7 b8 b9....b15 0 0 0 0 0 + + src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 a10 a11 a11 a12 + src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b4 b5 b5 b6 b6 b7 b7 b8 b8 b9 b9 b10 b10 b11 b11 b12 + + res_r0_t3_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff4_5_16x8b); //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5 + //a8*c4+a9*c5 a9*c4+a10*c5 a10*c4+a11*c5 a11*c4+a12*c5 + res_r1_t3_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff4_5_16x8b); //b4*c4+b5*c5 b5*c4+b6*c5 b6*c4+b7*c5 b7*c4+b8*c5 + //b8*c4+b9*c5 b9*c4+b10*c5 b10*c4+b11*c5 b11*c4+b12*c5 + res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t2_8x16b); + res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t2_8x16b); + res_r0_t3_8x16b = _mm_add_epi16(res_r0_t3_8x16b, const_val16_8x16b); + res_r1_t3_8x16b = _mm_add_epi16(res_r1_t3_8x16b, const_val16_8x16b); + res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t3_8x16b); + res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t3_8x16b); + + tmp = ((pu1_src[18] + pu1_src[19]) << 2) - pu1_src[17] - pu1_src[20]; + tmp = pu1_src[16] + pu1_src[21] + (tmp << 2) + tmp; + + res_r0_t1_8x16b = _mm_srai_epi16(res_r0_t1_8x16b, 5); //shifting right by 5 bits. + res_r1_t1_8x16b = _mm_srai_epi16(res_r1_t1_8x16b, 5); + tmp = (tmp + 16) >> 5; + + src_r0_16x8b = _mm_packus_epi16(res_r0_t1_8x16b, res_r1_t1_8x16b); + pu1_dst[16] = CLIP_U8(tmp); + + _mm_storeu_si128((__m128i *)pu1_dst, src_r0_16x8b); + + ht--; + pu1_src += src_strd; + pu1_dst += dst_strd; + } + while(ht > 0); +} + +/* +******************************************************************************* +* +* @brief +* This function implements a two stage cascaded six tap filter. It +* applies the six tap filter in the vertical direction on the +* predictor values, followed by applying the same filter in the +* horizontal direction on the output of the first stage. The six tap +* filtering operation is described in sec 8.4.2.2.1 titled "Luma sample +* interpolation process" (Filter run for width = 17 and height =17) +* +* @par Description: +* The function interpolates the predictors first in the vertical direction +* and then in the horizontal direction to output the (1/2,1/2). The output +* of the first stage of the filter is stored in the buffer pointed to by +* pi16_pred1(only in C) in 16 bit precision. +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[out] pu1_dst1 +* UWORD8 pointer to the destination(Vertical filtered output) +* +* @param[out] pu1_dst2 +* UWORD8 pointer to the destination(out put after applying horizontal filter +* to the intermediate vertical output) +* +* @param[in] src_strd +* integer source stride + +* @param[in] dst_strd +* integer destination stride of pu1_dst +* +* @param[in]pi16_pred1 +* Pointer to 16bit intermediate buffer(used only in c) +* +* @param[in] pi16_pred1_strd +* integer destination stride of pi16_pred1 +* +* @returns +* None +* +* @remarks +* None +* +******************************************************************************* +*/ +void ih264e_sixtap_filter_2dvh_vert_ssse3(UWORD8 *pu1_src, + UWORD8 *pu1_dst1, + UWORD8 *pu1_dst2, + WORD32 src_strd, + WORD32 dst_strd, + WORD32 *pi4_pred1, + WORD32 pred1_strd) +{ + WORD32 ht; + WORD16 *pi2_pred1; + + ht = 17; + pi2_pred1 = (WORD16 *)pi4_pred1; + pred1_strd = pred1_strd << 1; + + // Vertical 6-tap filter + { + __m128i src1_r0_16x8b, src1_r1_16x8b, src1_r2_16x8b; + __m128i src1_r3_16x8b, src1_r4_16x8b, src1_r5_16x8b; + __m128i src2_r0_16x8b, src2_r1_16x8b, src2_r2_16x8b; + __m128i src2_r3_16x8b, src2_r4_16x8b, src2_r5_16x8b; + + __m128i src_r0r1_16x8b, src_r2r3_16x8b, src_r4r5_16x8b; + + __m128i res_t1_8x16b, res_t2_8x16b, res_t3_8x16b; + __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b; + + coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01); //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 + coeff2_3_16x8b = _mm_set1_epi32(0x14141414); //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 + coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB); //c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 + //c0 = c5 = 1, c1 = c4 = -5, c2 = c3 = 20 + + pu1_src -= 2; + pu1_src -= src_strd << 1; // the filter input starts from x[-2] (till x[3]) + + // Loading first five rows to start first row processing. + // 22 values loaded in each row. + src1_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); + src2_r0_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + 14)); + pu1_src += src_strd; + + src1_r1_16x8b = _mm_loadu_si128((__m128i *)pu1_src); + src2_r1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + 14)); + pu1_src += src_strd; + + src1_r2_16x8b = _mm_loadu_si128((__m128i *)pu1_src); + src2_r2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + 14)); + pu1_src += src_strd; + + src1_r3_16x8b = _mm_loadu_si128((__m128i *)pu1_src); + src2_r3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + 14)); + pu1_src += src_strd; + + src1_r4_16x8b = _mm_loadu_si128((__m128i *)pu1_src); + src2_r4_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + 14)); + pu1_src += src_strd; + + do + { + src1_r5_16x8b = _mm_loadu_si128((__m128i *)pu1_src); + src2_r5_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + 14)); + + src_r0r1_16x8b = _mm_unpacklo_epi8(src1_r0_16x8b, src1_r1_16x8b); + src_r2r3_16x8b = _mm_unpacklo_epi8(src1_r2_16x8b, src1_r3_16x8b); + src_r4r5_16x8b = _mm_unpacklo_epi8(src1_r4_16x8b, src1_r5_16x8b); + + res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b); + res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b); + res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b); + + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b); + res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b); + + _mm_storeu_si128((__m128i *)pi2_pred1, res_t1_8x16b); + + src_r0r1_16x8b = _mm_unpackhi_epi8(src1_r0_16x8b, src1_r1_16x8b); + src_r2r3_16x8b = _mm_unpackhi_epi8(src1_r2_16x8b, src1_r3_16x8b); + src_r4r5_16x8b = _mm_unpackhi_epi8(src1_r4_16x8b, src1_r5_16x8b); + + res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b); + res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b); + res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b); + + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b); + res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b); + + _mm_storeu_si128((__m128i *)(pi2_pred1 + 8), res_t1_8x16b); + + src_r0r1_16x8b = _mm_unpacklo_epi8(src2_r0_16x8b, src2_r1_16x8b); + src_r2r3_16x8b = _mm_unpacklo_epi8(src2_r2_16x8b, src2_r3_16x8b); + src_r4r5_16x8b = _mm_unpacklo_epi8(src2_r4_16x8b, src2_r5_16x8b); + + res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b); + res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b); + res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b); + + res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b); + res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b); + + _mm_storeu_si128((__m128i *)(pi2_pred1 + 14), res_t1_8x16b); + + src1_r0_16x8b = src1_r1_16x8b; + src1_r1_16x8b = src1_r2_16x8b; + src1_r2_16x8b = src1_r3_16x8b; + src1_r3_16x8b = src1_r4_16x8b; + src1_r4_16x8b = src1_r5_16x8b; + + src2_r0_16x8b = src2_r1_16x8b; + src2_r1_16x8b = src2_r2_16x8b; + src2_r2_16x8b = src2_r3_16x8b; + src2_r3_16x8b = src2_r4_16x8b; + src2_r4_16x8b = src2_r5_16x8b; + + ht--; + pu1_src += src_strd; + pi2_pred1 += pred1_strd; + } + while(ht > 0); + } + + ht = 17; + pi2_pred1 = (WORD16 *)pi4_pred1; + + // Horizontal 6-tap filter + { + WORD32 temp; + + __m128i src_r0_8x16b, src_r1_8x16b, src_r2_8x16b, src_r3_8x16b; + __m128i src_r4_8x16b, src_r5_8x16b; + __m128i src_r0r1_8x16b, src_r2r3_8x16b, src_r4r5_8x16b; + __m128i res_vert1_8x16b, res_vert2_8x16b, res_16x8b; + + __m128i res_t0_4x32b, res_t1_4x32b, res_t2_4x32b, res_t3_4x32b; + __m128i res_c0_8x16b, res_c1_8x16b; + + __m128i coeff0_1_8x16b, coeff2_3_8x16b, coeff4_5_8x16b; + __m128i const_val512_4x32b, const_val16_8x16b; + + coeff0_1_8x16b = _mm_set1_epi32(0xFFFB0001); //c0 c1 c0 c1 c0 c1 c0 c1 + coeff2_3_8x16b = _mm_set1_epi32(0x00140014); //c2 c3 c2 c3 c2 c3 c2 c3 + coeff4_5_8x16b = _mm_set1_epi32(0x0001FFFB); //c4 c5 c4 c5 c4 c5 c4 c5 + //c0 = c5 = 1, c1 = c4 = -5, c2 = c3 = 20 + const_val512_4x32b = _mm_set1_epi32(512); + const_val16_8x16b = _mm_set1_epi16(16); + + do + { + src_r0_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1)); + src_r1_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1 + 1)); + src_r2_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1 + 2)); + src_r3_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1 + 3)); + src_r4_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1 + 4)); + src_r5_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1 + 5)); + + res_vert1_8x16b = _mm_add_epi16(src_r2_8x16b, const_val16_8x16b); + res_vert1_8x16b = _mm_srai_epi16(res_vert1_8x16b, 5); //shifting right by 5 bits. + + src_r0r1_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b); + src_r2r3_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b); + src_r4r5_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b); + + res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b); + res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b); + res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b); + + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b); + res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b); + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b); + res_t0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10); + + src_r0r1_8x16b = _mm_unpackhi_epi16(src_r0_8x16b, src_r1_8x16b); + src_r2r3_8x16b = _mm_unpackhi_epi16(src_r2_8x16b, src_r3_8x16b); + src_r4r5_8x16b = _mm_unpackhi_epi16(src_r4_8x16b, src_r5_8x16b); + + res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b); + res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b); + res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b); + + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b); + res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b); + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b); + res_t1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10); + + res_c0_8x16b = _mm_packs_epi32(res_t0_4x32b, res_t1_4x32b); + + src_r0_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1 + 8)); + src_r1_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1 + 8 + 1)); + src_r2_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1 + 8 + 2)); + src_r3_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1 + 8 + 3)); + src_r4_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1 + 8 + 4)); + src_r5_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1 + 8 + 5)); + + res_vert2_8x16b = _mm_add_epi16(src_r2_8x16b, const_val16_8x16b); + res_vert2_8x16b = _mm_srai_epi16(res_vert2_8x16b, 5); //shifting right by 5 bits. + + src_r0r1_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b); + src_r2r3_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b); + src_r4r5_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b); + + res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b); + res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b); + res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b); + + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b); + res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b); + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b); + res_t0_4x32b = _mm_srai_epi32(res_t1_4x32b ,10); + + src_r0r1_8x16b = _mm_unpackhi_epi16(src_r0_8x16b, src_r1_8x16b); + src_r2r3_8x16b = _mm_unpackhi_epi16(src_r2_8x16b, src_r3_8x16b); + src_r4r5_8x16b = _mm_unpackhi_epi16(src_r4_8x16b, src_r5_8x16b); + + res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b); + res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b); + res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b); + + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b); + res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b); + res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b); + res_t1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10); + + res_c1_8x16b = _mm_packs_epi32(res_t0_4x32b, res_t1_4x32b); + + res_16x8b = _mm_packus_epi16(res_vert1_8x16b, res_vert2_8x16b); + _mm_storeu_si128((__m128i *)pu1_dst1, res_16x8b); + pu1_dst1[16] = CLIP_U8((pi2_pred1[18] + 16) >> 5); + + res_16x8b = _mm_packus_epi16(res_c0_8x16b, res_c1_8x16b); + _mm_storeu_si128((__m128i *)pu1_dst2, res_16x8b); + temp = ((pi2_pred1[18] + pi2_pred1[19]) << 2) - pi2_pred1[17] - pi2_pred1[20]; + temp = pi2_pred1[16] + pi2_pred1[21] + (temp << 2) + temp; + pu1_dst2[16] = CLIP_U8((temp + 512) >> 10); + + ht--; + pi2_pred1 += pred1_strd; + pu1_dst1 += dst_strd; + pu1_dst2 += dst_strd; + } + while(ht > 0); + } +} diff --git a/encoder/x86/ih264e_intra_modes_eval_ssse3.c b/encoder/x86/ih264e_intra_modes_eval_ssse3.c new file mode 100755 index 0000000..657921f --- /dev/null +++ b/encoder/x86/ih264e_intra_modes_eval_ssse3.c @@ -0,0 +1,1259 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ih264e_intra_modes_eval_ssse3.c +* +* @brief +* This file contains definitions of routines that perform rate distortion +* analysis on a macroblock if they are to be coded as intra. +* +* @author +* Ittiam +* +* @par List of Functions: +* ih264e_evaluate_intra16x16_modes_ssse3 +* ih264e_evaluate_intra_4x4_modes_ssse3 +* ih264e_evaluate_intra_chroma_modes_ssse3 +* +* @remarks +* None +* +******************************************************************************* +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include <stdio.h> +#include <string.h> +#include <limits.h> +#include <assert.h> +#include <immintrin.h> + +/* User include files */ +#include "ih264e_config.h" +#include "ih264_typedefs.h" +#include "ih264e_defs.h" +#include "iv2.h" +#include "ive2.h" +#include "ih264_debug.h" +#include "ih264_defs.h" +#include "ih264_macros.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_structs.h" +#include "ih264_common_tables.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include "ih264_inter_pred_filters.h" +#include "ih264_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" +#include "ime_distortion_metrics.h" +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "ime_structs.h" + +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "ih264e_rate_control.h" + +#include "ih264e_structs.h" +#include "ih264e_intra_modes_eval.h" +#include "ih264e_globals.h" +#include "ime_platform_macros.h" + + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ +/** +****************************************************************************** +* +* @brief +* evaluate best intra 16x16 mode (among VERT, HORZ and DC) and do the +* prediction. +* +* @par Description +* This function evaluates first three 16x16 modes and compute corresponding +* SAD and returns the buffer predicted with best mode. +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[in] pu1_ngbr_pels_i16 +* UWORD8 pointer to neighbouring pels +* +* @param[out] pu1_dst +* UWORD8 pointer to the destination +* +* @param[in] src_strd +* integer source stride +* +* @param[in] dst_strd +* integer destination stride +* +* @param[in] u4_n_avblty +* availability of neighbouring pixels +* +* @param[in] u4_intra_mode +* pointer to the variable in which best mode is returned +* +* @param[in] pu4_sadmin +* pointer to the variable in which minimum sad is returned +* +* @param[in] u4_valid_intra_modes +* says what all modes are valid +* +* @return +* None +* +****************************************************************************** +*/ +void ih264e_evaluate_intra16x16_modes_ssse3(UWORD8 *pu1_src, + UWORD8 *pu1_ngbr_pels_i16, + UWORD8 *pu1_dst, + UWORD32 src_strd, + UWORD32 dst_strd, + WORD32 n_avblty, + UWORD32 *u4_intra_mode, + WORD32 *pu4_sadmin, + UWORD32 u4_valid_intra_modes) +{ + UWORD8 *pu1_src_temp; + + WORD32 left, top, horz_flag, vert_flag, dc_flag; + WORD32 sad_vert, sad_horz, sad_dc, min_sad; + + WORD32 cnt, dcval; + WORD32 src_strd2, src_strd3, src_strd4; + WORD32 dst_strd2, dst_strd3, dst_strd4; + + __m128i src1_16x8b, src2_16x8b, src3_16x8b, src4_16x8b; + __m128i val1_16x8b, val2_16x8b, val3_16x8b, val4_16x8b; + __m128i sad1_8x16b, sad2_8x16b, sad3_8x16b, sad4_8x16b; + + __m128i sad_8x16b, val_16x8b, zero_vector; + + sad_vert = INT_MAX; + sad_horz = INT_MAX; + sad_dc = INT_MAX; + + src_strd2 = src_strd << 1; + src_strd4 = src_strd << 2; + src_strd3 = src_strd + src_strd2; + + dst_strd2 = dst_strd << 1; + dst_strd4 = dst_strd << 2; + dst_strd3 = dst_strd + dst_strd2; + + left = (n_avblty & LEFT_MB_AVAILABLE_MASK); + top = (n_avblty & TOP_MB_AVAILABLE_MASK) >> 2; + + zero_vector = _mm_setzero_si128(); + + horz_flag = left && ((u4_valid_intra_modes & 02) != 0); + vert_flag = top && ((u4_valid_intra_modes & 01) != 0); + dc_flag = (u4_valid_intra_modes & 04) != 0; + + if(horz_flag) + { + pu1_src_temp = pu1_src; + + val1_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[15]); + val2_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[14]); + val3_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[13]); + val4_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[12]); + + src1_16x8b = _mm_loadu_si128((__m128i *)pu1_src_temp); + src2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd)); + src3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd2)); + src4_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd3)); + + sad1_8x16b = _mm_sad_epu8(val1_16x8b, src1_16x8b); + sad2_8x16b = _mm_sad_epu8(val2_16x8b, src2_16x8b); + sad3_8x16b = _mm_sad_epu8(val3_16x8b, src3_16x8b); + sad4_8x16b = _mm_sad_epu8(val4_16x8b, src4_16x8b); + + sad1_8x16b = _mm_packs_epi32(sad1_8x16b, sad2_8x16b); + sad3_8x16b = _mm_packs_epi32(sad3_8x16b, sad4_8x16b); + + cnt = 11; + sad_8x16b = _mm_packs_epi32(sad1_8x16b, sad3_8x16b); + do + { + pu1_src_temp += src_strd4; + + val1_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[cnt]); + val2_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[cnt - 1]); + val3_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[cnt - 2]); + val4_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[cnt - 3]); + + src1_16x8b = _mm_loadu_si128((__m128i *)pu1_src_temp); + src2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd)); + src3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd2)); + src4_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd3)); + + sad1_8x16b = _mm_sad_epu8(val1_16x8b, src1_16x8b); + sad2_8x16b = _mm_sad_epu8(val2_16x8b, src2_16x8b); + sad3_8x16b = _mm_sad_epu8(val3_16x8b, src3_16x8b); + sad4_8x16b = _mm_sad_epu8(val4_16x8b, src4_16x8b); + + sad1_8x16b = _mm_packs_epi32(sad1_8x16b, sad2_8x16b); + sad3_8x16b = _mm_packs_epi32(sad3_8x16b, sad4_8x16b); + sad1_8x16b = _mm_packs_epi32(sad1_8x16b, sad3_8x16b); + + cnt -= 4; + sad_8x16b = _mm_add_epi16(sad_8x16b, sad1_8x16b); + } + while(cnt >= 0); + + sad_8x16b = _mm_hadd_epi16(sad_8x16b, sad_8x16b); + sad_8x16b = _mm_hadd_epi16(sad_8x16b, sad_8x16b); + sad_8x16b = _mm_hadd_epi16(sad_8x16b, sad_8x16b); + + sad_horz = _mm_extract_epi16(sad_8x16b, 0); + } + + if(vert_flag) + { + pu1_src_temp = pu1_src; + + val1_16x8b = _mm_loadu_si128((__m128i *)(pu1_ngbr_pels_i16 + 17)); + + src1_16x8b = _mm_loadu_si128((__m128i *)pu1_src_temp); + src2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd)); + src3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd2)); + src4_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd3)); + + sad1_8x16b = _mm_sad_epu8(val1_16x8b, src1_16x8b); + sad2_8x16b = _mm_sad_epu8(val1_16x8b, src2_16x8b); + sad3_8x16b = _mm_sad_epu8(val1_16x8b, src3_16x8b); + sad4_8x16b = _mm_sad_epu8(val1_16x8b, src4_16x8b); + + sad1_8x16b = _mm_packs_epi32(sad1_8x16b, sad2_8x16b); + sad3_8x16b = _mm_packs_epi32(sad3_8x16b, sad4_8x16b); + + cnt = 11; + sad_8x16b = _mm_packs_epi32(sad1_8x16b, sad3_8x16b); + do + { + pu1_src_temp += src_strd4; + + src1_16x8b = _mm_loadu_si128((__m128i *)pu1_src_temp); + src2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd)); + src3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd2)); + src4_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd3)); + + sad1_8x16b = _mm_sad_epu8(val1_16x8b, src1_16x8b); + sad2_8x16b = _mm_sad_epu8(val1_16x8b, src2_16x8b); + sad3_8x16b = _mm_sad_epu8(val1_16x8b, src3_16x8b); + sad4_8x16b = _mm_sad_epu8(val1_16x8b, src4_16x8b); + + sad1_8x16b = _mm_packs_epi32(sad1_8x16b, sad2_8x16b); + sad3_8x16b = _mm_packs_epi32(sad3_8x16b, sad4_8x16b); + sad1_8x16b = _mm_packs_epi32(sad1_8x16b, sad3_8x16b); + + cnt -= 4; + sad_8x16b = _mm_add_epi16(sad_8x16b, sad1_8x16b); + } + while(cnt >= 0); + + sad_8x16b = _mm_hadd_epi16(sad_8x16b, sad_8x16b); + sad_8x16b = _mm_hadd_epi16(sad_8x16b, sad_8x16b); + sad_8x16b = _mm_hadd_epi16(sad_8x16b, sad_8x16b); + + sad_vert = _mm_extract_epi16(sad_8x16b, 0); + } + + dcval = 0; + + if(left) + { + val_16x8b = _mm_loadu_si128((__m128i *)pu1_ngbr_pels_i16); + dcval += 8; + + sad1_8x16b = _mm_sad_epu8(val_16x8b, zero_vector); + dcval += _mm_extract_epi16(sad1_8x16b, 0); + dcval += _mm_extract_epi16(sad1_8x16b, 4); + } + if(top) + { + val_16x8b = _mm_loadu_si128((__m128i *)(pu1_ngbr_pels_i16 + 17)); + dcval += 8; + + sad1_8x16b = _mm_sad_epu8(val_16x8b, zero_vector); + dcval += _mm_extract_epi16(sad1_8x16b, 0); + dcval += _mm_extract_epi16(sad1_8x16b, 4); + } + dcval = dcval >> (3 + left + top); + dcval += ((left == 0) & (top == 0)) << 7; + + if(dc_flag) + { + pu1_src_temp = pu1_src; + val1_16x8b = _mm_set1_epi8(dcval); + + src1_16x8b = _mm_loadu_si128((__m128i *)pu1_src_temp); + src2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd)); + src3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd2)); + src4_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd3)); + + sad1_8x16b = _mm_sad_epu8(val1_16x8b, src1_16x8b); + sad2_8x16b = _mm_sad_epu8(val1_16x8b, src2_16x8b); + sad3_8x16b = _mm_sad_epu8(val1_16x8b, src3_16x8b); + sad4_8x16b = _mm_sad_epu8(val1_16x8b, src4_16x8b); + + sad1_8x16b = _mm_packs_epi32(sad1_8x16b, sad2_8x16b); + sad3_8x16b = _mm_packs_epi32(sad3_8x16b, sad4_8x16b); + + cnt = 12; + sad_8x16b = _mm_packs_epi32(sad1_8x16b, sad3_8x16b); + do + { + pu1_src_temp += src_strd4; + + src1_16x8b = _mm_loadu_si128((__m128i *)pu1_src_temp); + src2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd)); + src3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd2)); + src4_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd3)); + + sad1_8x16b = _mm_sad_epu8(val1_16x8b, src1_16x8b); + sad2_8x16b = _mm_sad_epu8(val1_16x8b, src2_16x8b); + sad3_8x16b = _mm_sad_epu8(val1_16x8b, src3_16x8b); + sad4_8x16b = _mm_sad_epu8(val1_16x8b, src4_16x8b); + + sad1_8x16b = _mm_packs_epi32(sad1_8x16b, sad2_8x16b); + sad3_8x16b = _mm_packs_epi32(sad3_8x16b, sad4_8x16b); + sad1_8x16b = _mm_packs_epi32(sad1_8x16b, sad3_8x16b); + + cnt -= 4; + sad_8x16b = _mm_add_epi16(sad_8x16b, sad1_8x16b); + } + while(cnt > 0); + + sad_8x16b = _mm_hadd_epi16(sad_8x16b, sad_8x16b); + sad_8x16b = _mm_hadd_epi16(sad_8x16b, sad_8x16b); + sad_8x16b = _mm_hadd_epi16(sad_8x16b, sad_8x16b); + + sad_dc = _mm_extract_epi16(sad_8x16b, 0); + } + + // Doing prediction for minimum SAD + min_sad = MIN3(sad_horz, sad_vert, sad_dc); + if(min_sad < *pu4_sadmin) + { + *pu4_sadmin = min_sad; + if(min_sad == sad_vert) + { + *u4_intra_mode = VERT_I16x16; + val1_16x8b = _mm_loadu_si128((__m128i *)(pu1_ngbr_pels_i16 + 17)); + cnt = 15; + do + { + _mm_storeu_si128((__m128i *)pu1_dst, val1_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), val1_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), val1_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), val1_16x8b); + + cnt -= 4; + pu1_dst += dst_strd4; + } + while(cnt > 0); + } + else if(min_sad == sad_horz) + { + *u4_intra_mode = HORZ_I16x16; + cnt = 15; + do + { + val1_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[cnt]); + val2_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[cnt - 1]); + val3_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[cnt - 2]); + val4_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[cnt - 3]); + + _mm_storeu_si128((__m128i *)pu1_dst, val1_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), val2_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), val3_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), val4_16x8b); + + cnt -= 4; + pu1_dst += dst_strd4; + } + while(cnt >= 0); + } + else + { + *u4_intra_mode = DC_I16x16; + val1_16x8b = _mm_set1_epi8(dcval); + cnt = 15; + do + { + _mm_storeu_si128((__m128i *)pu1_dst, val1_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), val1_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), val1_16x8b); + _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), val1_16x8b); + + cnt -= 4; + pu1_dst += dst_strd4; + } + while(cnt > 0); + } + } +} + +/** +****************************************************************************** +* +* @brief :Evaluate best intra 4x4 mode and do the prediction. +* +* @par Description +* This function evaluates intra 4x4 modes, computes corresponding sad +* and returns the buffer predicted with best mode. +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +** @param[in] pu1_ngbr_pels +* UWORD8 pointer to neighbouring pels +* +* @param[out] pu1_dst +* UWORD8 pointer to the destination +* +* @param[in] src_strd +* integer source stride +* +* @param[in] dst_strd +* integer destination stride +* +* @param[in] u4_n_avblty +* availability of neighbouring pixels +* +* @param[in] u4_intra_mode +* Pointer to the variable in which best mode is returned +* +* @param[in] pu4_sadmin +* Pointer to the variable in which minimum cost is returned +* +* @param[in] u4_valid_intra_modes +* Says what all modes are valid +* +* * @param[in] u4_lambda +* Lamda value for computing cost from SAD +* +* @param[in] u4_predictd_mode +* Predicted mode for cost computation +* +* @return none +* +****************************************************************************** +*/ +void ih264e_evaluate_intra_4x4_modes_ssse3(UWORD8 *pu1_src, + UWORD8 *pu1_ngbr_pels, + UWORD8 *pu1_dst, + UWORD32 src_strd, + UWORD32 dst_strd, + WORD32 u4_n_avblty, + UWORD32 *u4_intra_mode, + WORD32 *pu4_sadmin, + UWORD32 u4_valid_intra_modes, + UWORD32 u4_lambda, + UWORD32 u4_predictd_mode) +{ + WORD32 left, top; + WORD32 sad[MAX_I4x4] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX, + INT_MAX, INT_MAX, INT_MAX, INT_MAX }; + WORD32 cost[MAX_I4x4] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX, + INT_MAX, INT_MAX, INT_MAX, INT_MAX }; + + WORD32 min_cost; + WORD32 lambda4 = u4_lambda << 2; + WORD32 dst_strd2, dst_strd3; + + __m128i left_top_16x8b, src_16x8b, pred0_16x8b, sad_8x16b; + __m128i pred1_16x8b, pred2_16x8b, pred3_16x8b, pred4_16x8b; + __m128i pred5_16x8b, pred6_16x8b, pred7_16x8b, pred8_16x8b; + __m128i shuffle_16x8b, zero_vector, mask_low_32b; + + left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK); + top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2; + + dst_strd2 = dst_strd << 1; + dst_strd3 = dst_strd + dst_strd2; + + // loading the 4x4 source block and neighbouring pixels + { + __m128i row1_16x8b, row2_16x8b; + + row1_16x8b = _mm_loadl_epi64((__m128i *)pu1_src); + row2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd)); + left_top_16x8b = _mm_loadu_si128((__m128i *)pu1_ngbr_pels); + + pu1_src += src_strd << 1; + src_16x8b = _mm_unpacklo_epi32(row1_16x8b, row2_16x8b); + + row1_16x8b = _mm_loadl_epi64((__m128i *)pu1_src); + row2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd)); + zero_vector = _mm_setzero_si128(); + + row1_16x8b = _mm_unpacklo_epi32(row1_16x8b, row2_16x8b); + src_16x8b = _mm_unpacklo_epi64(src_16x8b, row1_16x8b); + } + + /* Computing SADs*/ + if(u4_valid_intra_modes & 1)/* VERT mode valid ????*/ + { + pred0_16x8b = _mm_srli_si128(left_top_16x8b, 5); + pred0_16x8b = _mm_shuffle_epi32(pred0_16x8b, 0); + sad_8x16b = _mm_sad_epu8(src_16x8b, pred0_16x8b); + + sad[VERT_I4x4] = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4); + cost[VERT_I4x4] = sad[VERT_I4x4] + ((u4_predictd_mode == VERT_I4x4) ? u4_lambda: lambda4); + } + + if(u4_valid_intra_modes & 2)/* HORZ mode valid ????*/ + { + shuffle_16x8b = _mm_setr_epi8(3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0); + pred1_16x8b = _mm_shuffle_epi8(left_top_16x8b, shuffle_16x8b); + + sad_8x16b = _mm_sad_epu8(src_16x8b, pred1_16x8b); + + sad[HORZ_I4x4] = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4); + cost[HORZ_I4x4] = sad[HORZ_I4x4] + ((u4_predictd_mode == HORZ_I4x4) ? u4_lambda: lambda4); + } + + if(u4_valid_intra_modes & 4)/* DC mode valid ????*/ + { + if(top + left) + { + WORD32 shft = 1, dcval = 0; + + __m128i val_16x8b, temp_16x8b, temp_8x16b; + + val_16x8b = _mm_setzero_si128(); + + if(top) + { + temp_16x8b = _mm_srli_si128(left_top_16x8b, 5); + val_16x8b = _mm_alignr_epi8(temp_16x8b, val_16x8b, 4); + shft ++; + dcval += 2; + } + if(left) + { + val_16x8b = _mm_alignr_epi8(left_top_16x8b, val_16x8b, 4); + shft++; + dcval += 2; + } + + temp_8x16b = _mm_sad_epu8(val_16x8b, zero_vector); + dcval += _mm_extract_epi16(temp_8x16b, 4); + dcval = dcval >> shft; + pred2_16x8b = _mm_set1_epi8(dcval); + } + else + pred2_16x8b = _mm_set1_epi8(128); + + sad_8x16b = _mm_sad_epu8(src_16x8b, pred2_16x8b); + + sad[DC_I4x4] = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4); + cost[DC_I4x4] = sad[DC_I4x4] + ((u4_predictd_mode == DC_I4x4) ? u4_lambda: lambda4); + } + + if(u4_valid_intra_modes > 7)/* if modes other than VERT, HORZ and DC are valid ????*/ + { + __m128i w11_16x8b, w121_16x8b; + __m128i temp1_16x8b, temp2_16x8b; + + /* Performing FILT121 and FILT11 operation for all neighbour values*/ + { + __m128i temp1_8x16b, temp2_8x16b, temp3_8x16b; + __m128i const_2_8x16b; + + const_2_8x16b = _mm_set1_epi16(2); + + temp1_8x16b = _mm_unpacklo_epi8(left_top_16x8b, zero_vector); //l3 l2 l1 l0 tl t0 t1 t2 + temp2_8x16b = _mm_slli_si128(temp1_8x16b, 2); // 0 l3 l2 l1 l0 tl t0 t1 + temp2_8x16b = _mm_shufflelo_epi16(temp2_8x16b, 0xe5); //l3 l3 l2 l1 l0 tl t0 t1 + + temp1_8x16b = _mm_add_epi16(temp1_8x16b, temp2_8x16b); //l3+l3 l3+l2 l2+l1... t1+t2 + temp2_8x16b = _mm_slli_si128(temp1_8x16b, 2); //l3+l3 l3+l3 l3+l2... t0+t1 + temp2_8x16b = _mm_shufflelo_epi16(temp2_8x16b, 0xe5); + temp1_8x16b = _mm_add_epi16(temp1_8x16b, temp2_8x16b); //4*l3 l3+2*l3+l2 l3+2*l2+l1... t0+2*t1+t2 + + temp1_8x16b = _mm_add_epi16(const_2_8x16b, temp1_8x16b); //4*l3+2 3*l3+l2+2 l3+2*l2+l1+2.. t0+2*t1+t2+2 + temp1_8x16b = _mm_srli_epi16(temp1_8x16b, 2); + + temp1_16x8b = _mm_srli_si128(left_top_16x8b, 1); + w11_16x8b = _mm_avg_epu8(left_top_16x8b, temp1_16x8b); + + temp2_16x8b = _mm_srli_si128(left_top_16x8b, 6); + temp2_8x16b = _mm_unpacklo_epi8(temp2_16x8b, zero_vector); //t1 t2 t3 t4 t5 t6 t7 0 + temp3_8x16b = _mm_srli_si128(temp2_8x16b, 2); //t2 t3 t4 t5 t6 t7 0 0 + temp3_8x16b = _mm_shufflehi_epi16(temp3_8x16b, 0xd4); //t2 t3 t4 t5 t6 t7 t7 0 + + temp2_8x16b = _mm_add_epi16(temp2_8x16b, temp3_8x16b); //t1+t2 t2+t3... t6+t7 t7+t7 0 + temp3_8x16b = _mm_srli_si128(temp2_8x16b, 2); //t2+t3 t3+t4... t7+t7 0 0 + temp2_8x16b = _mm_add_epi16(temp2_8x16b, temp3_8x16b); //t1+2*t2+t3 t2+2*t3+t4.. t6+2*t7+t7 t7+t7 0 + + temp2_8x16b = _mm_add_epi16(const_2_8x16b, temp2_8x16b); //t1+2*t2+t3+2 t2+2*t3+t4+2 t3+2*t4+t5+2... t6+2*t7+t7+2 t7+t7+2 2 + temp2_8x16b = _mm_srli_epi16(temp2_8x16b, 2); + + w121_16x8b = _mm_packus_epi16(temp1_8x16b, temp2_8x16b); + } + + if(u4_valid_intra_modes & 8)/* DIAG_DL */ + { + shuffle_16x8b = _mm_setr_epi8( 7, 8, 9, 10, + 8, 9, 10, 11, + 9, 10, 11, 12, + 10, 11, 12, 13); + pred3_16x8b = _mm_shuffle_epi8(w121_16x8b, shuffle_16x8b); + sad_8x16b = _mm_sad_epu8(src_16x8b, pred3_16x8b); + + sad[DIAG_DL_I4x4] = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4); + cost[DIAG_DL_I4x4] = sad[DIAG_DL_I4x4] + ((u4_predictd_mode == DIAG_DL_I4x4) ? u4_lambda: lambda4); + } + + if(u4_valid_intra_modes & 16)/* DIAG_DR */ + { + shuffle_16x8b = _mm_setr_epi8(5, 6, 7, 8, + 4, 5, 6, 7, + 3, 4, 5, 6, + 2, 3, 4, 5); + pred4_16x8b = _mm_shuffle_epi8(w121_16x8b, shuffle_16x8b); + sad_8x16b = _mm_sad_epu8(src_16x8b, pred4_16x8b); + + sad[DIAG_DR_I4x4] = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4); + cost[DIAG_DR_I4x4] = sad[DIAG_DR_I4x4] + ((u4_predictd_mode == DIAG_DR_I4x4) ? u4_lambda: lambda4); + } + + if(u4_valid_intra_modes & 32)/* VERT_R mode valid ????*/ + { + temp1_16x8b = _mm_srli_si128(w121_16x8b, 1); + temp1_16x8b = _mm_unpacklo_epi64(temp1_16x8b, w11_16x8b); + shuffle_16x8b = _mm_setr_epi8(12, 13, 14, 15, + 4, 5, 6, 7, + 3, 12, 13, 14, + 2, 4, 5, 6); + pred5_16x8b = _mm_shuffle_epi8(temp1_16x8b, shuffle_16x8b); + sad_8x16b = _mm_sad_epu8(src_16x8b, pred5_16x8b); + + sad[VERT_R_I4x4] = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4); + cost[VERT_R_I4x4] = sad[VERT_R_I4x4] + ((u4_predictd_mode == VERT_R_I4x4) ? u4_lambda: lambda4); + } + + if(u4_valid_intra_modes & 64)/* HORZ_D mode valid ????*/ + { + temp1_16x8b = _mm_unpacklo_epi64(w121_16x8b, w11_16x8b); + shuffle_16x8b = _mm_setr_epi8(11, 5, 6, 7, + 10, 4, 11, 5, + 9, 3, 10, 4, + 8, 2, 9, 3); + pred6_16x8b = _mm_shuffle_epi8(temp1_16x8b, shuffle_16x8b); + sad_8x16b = _mm_sad_epu8(src_16x8b, pred6_16x8b); + + sad[HORZ_D_I4x4] = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4); + cost[HORZ_D_I4x4] = sad[HORZ_D_I4x4] + ((u4_predictd_mode == HORZ_D_I4x4) ? u4_lambda: lambda4); + } + + if(u4_valid_intra_modes & 128)/* VERT_L mode valid ????*/ + { + temp1_16x8b = _mm_srli_si128(w121_16x8b, 5); + temp2_16x8b = _mm_srli_si128(w11_16x8b, 5); + temp1_16x8b = _mm_unpacklo_epi64(temp1_16x8b, temp2_16x8b); + shuffle_16x8b = _mm_setr_epi8(8, 9, 10, 11, + 2, 3, 4, 5, + 9, 10, 11, 12, + 3, 4, 5, 6); + pred7_16x8b = _mm_shuffle_epi8(temp1_16x8b, shuffle_16x8b); + sad_8x16b = _mm_sad_epu8(src_16x8b, pred7_16x8b); + + sad[VERT_L_I4x4] = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4); + cost[VERT_L_I4x4] = sad[VERT_L_I4x4] + ((u4_predictd_mode == VERT_L_I4x4) ? u4_lambda: lambda4); + } + + if(u4_valid_intra_modes & 256)/* HORZ_U mode valid ????*/ + { + temp1_16x8b = _mm_unpacklo_epi64(w121_16x8b, w11_16x8b); + shuffle_16x8b = _mm_setr_epi8(10, 3, 9, 2, + 9, 2, 8, 1, + 8, 1, 0, 0, + 0, 0, 0, 0); + pred8_16x8b = _mm_shuffle_epi8(temp1_16x8b, shuffle_16x8b); + sad_8x16b = _mm_sad_epu8(src_16x8b, pred8_16x8b); + + sad[HORZ_U_I4x4] = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4); + cost[HORZ_U_I4x4] = sad[HORZ_U_I4x4] + ((u4_predictd_mode == HORZ_U_I4x4) ? u4_lambda: lambda4); + } + + min_cost = MIN3(MIN3(cost[0], cost[1], cost[2]), + MIN3(cost[3], cost[4], cost[5]), + MIN3(cost[6], cost[7], cost[8])); + } + else + { /*Only first three modes valid*/ + min_cost = MIN3(cost[0], cost[1], cost[2]); + } + + *pu4_sadmin = min_cost; + + if(min_cost == cost[0]) + { + *u4_intra_mode = VERT_I4x4; + } + else if(min_cost == cost[1]) + { + *u4_intra_mode = HORZ_I4x4; + pred0_16x8b = pred1_16x8b; + } + else if(min_cost == cost[2]) + { + *u4_intra_mode = DC_I4x4; + pred0_16x8b = pred2_16x8b; + } + else if(min_cost == cost[3]) + { + *u4_intra_mode = DIAG_DL_I4x4; + pred0_16x8b = pred3_16x8b; + } + else if(min_cost == cost[4]) + { + *u4_intra_mode = DIAG_DR_I4x4; + pred0_16x8b = pred4_16x8b; + } + else if(min_cost == cost[5]) + { + *u4_intra_mode = VERT_R_I4x4; + pred0_16x8b = pred5_16x8b; + } + else if(min_cost == cost[6]) + { + *u4_intra_mode = HORZ_D_I4x4; + pred0_16x8b = pred6_16x8b; + } + else if(min_cost == cost[7]) + { + *u4_intra_mode = VERT_L_I4x4; + pred0_16x8b = pred7_16x8b; + } + else if(min_cost == cost[8]) + { + *u4_intra_mode = HORZ_U_I4x4; + pred0_16x8b = pred8_16x8b; + } + + mask_low_32b = _mm_set1_epi8(0xff); + mask_low_32b = _mm_srli_si128(mask_low_32b, 12); + + _mm_maskmoveu_si128(pred0_16x8b, mask_low_32b, (char*)pu1_dst); + pred0_16x8b = _mm_srli_si128(pred0_16x8b, 4); + _mm_maskmoveu_si128(pred0_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd)); + pred0_16x8b = _mm_srli_si128(pred0_16x8b, 4); + _mm_maskmoveu_si128(pred0_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd2)); + pred0_16x8b = _mm_srli_si128(pred0_16x8b, 4); + _mm_maskmoveu_si128(pred0_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd3)); + +} + +/** +****************************************************************************** +* +* @brief +* Evaluate best intra chroma mode (among VERT, HORZ and DC) and do the prediction. +* +* @par Description +* This function evaluates first three intra chroma modes and compute corresponding sad +* and return the buffer predicted with best mode. +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +** @param[in] pu1_ngbr_pels +* UWORD8 pointer to neighbouring pels +* +* @param[out] pu1_dst +* UWORD8 pointer to the destination +* +* @param[in] src_strd +* integer source stride +* +* @param[in] dst_strd +* integer destination stride +* +* @param[in] u4_n_avblty +* availability of neighbouring pixels +* +* @param[in] u4_intra_mode +* pointer to the variable in which best mode is returned +* +* @param[in] pu4_sadmin +* pointer to the variable in which minimum sad is returned +* +* @param[in] u4_valid_intra_modes +* says what all modes are valid +* +* @return +* none +* +****************************************************************************** +*/ + +void ih264e_evaluate_intra_chroma_modes_ssse3(UWORD8 *pu1_src, + UWORD8 *pu1_ngbr_pels, + UWORD8 *pu1_dst, + UWORD32 src_strd, + UWORD32 dst_strd, + WORD32 u4_n_avblty, + UWORD32 *u4_intra_mode, + WORD32 *pu4_sadmin, + UWORD32 u4_valid_intra_modes) +{ + WORD32 left, top; + WORD32 sad_vert = INT_MAX, sad_horz = INT_MAX, sad_dc = INT_MAX, min_sad; + + __m128i src1_16x8b, src2_16x8b, src3_16x8b, src4_16x8b; + __m128i src5_16x8b, src6_16x8b, src7_16x8b, src8_16x8b; + + __m128i top_16x8b, left_16x8b; + __m128i pred1_16x8b, pred2_16x8b; + __m128i tmp1_8x16b, tmp2_8x16b, sad_8x16b; + + left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK); + top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2; + + //Loading source + { + src1_16x8b = _mm_loadu_si128((__m128i *)pu1_src); + pu1_src += src_strd; + src2_16x8b = _mm_loadu_si128((__m128i *)pu1_src); + pu1_src += src_strd; + src3_16x8b = _mm_loadu_si128((__m128i *)pu1_src); + pu1_src += src_strd; + src4_16x8b = _mm_loadu_si128((__m128i *)pu1_src); + pu1_src += src_strd; + src5_16x8b = _mm_loadu_si128((__m128i *)pu1_src); + pu1_src += src_strd; + src6_16x8b = _mm_loadu_si128((__m128i *)pu1_src); + pu1_src += src_strd; + src7_16x8b = _mm_loadu_si128((__m128i *)pu1_src); + pu1_src += src_strd; + src8_16x8b = _mm_loadu_si128((__m128i *)pu1_src); + } + + if(left) + { + left_16x8b = _mm_loadu_si128((__m128i *)pu1_ngbr_pels); + + if(u4_valid_intra_modes & 02) //If HORZ mode is valid + { + __m128i left_tmp_16x8b, left_sh_16x8b; + __m128i const_14_15_16x8b; + + const_14_15_16x8b = _mm_set1_epi16(0x0f0e); + left_sh_16x8b = _mm_slli_si128(left_16x8b, 2); + + pred1_16x8b = _mm_shuffle_epi8(left_16x8b, const_14_15_16x8b); //row 1 + pred2_16x8b = _mm_shuffle_epi8(left_sh_16x8b, const_14_15_16x8b); //row 2 + tmp1_8x16b = _mm_sad_epu8(src1_16x8b, pred1_16x8b); + tmp2_8x16b = _mm_sad_epu8(src2_16x8b, pred2_16x8b); + + left_tmp_16x8b = _mm_slli_si128(left_16x8b, 4); + left_sh_16x8b = _mm_slli_si128(left_sh_16x8b, 4); + sad_8x16b = _mm_add_epi16(tmp1_8x16b, tmp2_8x16b); + + pred1_16x8b = _mm_shuffle_epi8(left_tmp_16x8b, const_14_15_16x8b); //row 3 + pred2_16x8b = _mm_shuffle_epi8(left_sh_16x8b, const_14_15_16x8b); //row 4 + tmp1_8x16b = _mm_sad_epu8(src3_16x8b, pred1_16x8b); + tmp2_8x16b = _mm_sad_epu8(src4_16x8b, pred2_16x8b); + + left_tmp_16x8b = _mm_slli_si128(left_tmp_16x8b, 4); + sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b); + left_sh_16x8b = _mm_slli_si128(left_sh_16x8b, 4); + sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b); + + pred1_16x8b = _mm_shuffle_epi8(left_tmp_16x8b, const_14_15_16x8b); //row 5 + pred2_16x8b = _mm_shuffle_epi8(left_sh_16x8b, const_14_15_16x8b); //row 6 + tmp1_8x16b = _mm_sad_epu8(src5_16x8b, pred1_16x8b); + tmp2_8x16b = _mm_sad_epu8(src6_16x8b, pred2_16x8b); + + left_tmp_16x8b = _mm_slli_si128(left_tmp_16x8b, 4); + sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b); + left_sh_16x8b = _mm_slli_si128(left_sh_16x8b, 4); + sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b); + + pred1_16x8b = _mm_shuffle_epi8(left_tmp_16x8b, const_14_15_16x8b); //row 7 + pred2_16x8b = _mm_shuffle_epi8(left_sh_16x8b, const_14_15_16x8b); //row 8 + tmp1_8x16b = _mm_sad_epu8(src7_16x8b, pred1_16x8b); + tmp2_8x16b = _mm_sad_epu8(src8_16x8b, pred2_16x8b); + + sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b); + sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b); + + sad_horz = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4); + } + } + + if(top) + { + UWORD8 *pu1_top; + + pu1_top = pu1_ngbr_pels + 2 * BLK8x8SIZE + 2; + top_16x8b = _mm_loadu_si128((__m128i *)pu1_top); + + if(u4_valid_intra_modes & 04) //If VERT mode is valid + { + tmp1_8x16b = _mm_sad_epu8(src1_16x8b, top_16x8b); + tmp2_8x16b = _mm_sad_epu8(src2_16x8b, top_16x8b); + sad_8x16b = _mm_add_epi16(tmp1_8x16b, tmp2_8x16b); + + tmp1_8x16b = _mm_sad_epu8(src3_16x8b, top_16x8b); + tmp2_8x16b = _mm_sad_epu8(src4_16x8b, top_16x8b); + sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b); + sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b); + + tmp1_8x16b = _mm_sad_epu8(src5_16x8b, top_16x8b); + tmp2_8x16b = _mm_sad_epu8(src6_16x8b, top_16x8b); + sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b); + sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b); + + tmp1_8x16b = _mm_sad_epu8(src7_16x8b, top_16x8b); + tmp2_8x16b = _mm_sad_epu8(src8_16x8b, top_16x8b); + sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b); + sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b); + + sad_vert = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4); + } + } + + if(u4_valid_intra_modes & 01) //If DC mode is valid + { + if(left && top) + { + WORD32 left_up_u, left_down_u, left_up_v, left_down_v; + WORD32 top_left_u, top_right_u, top_left_v, top_right_v; + WORD32 dc_1u, dc_1v, dc_2u, dc_2v; + + __m128i val_sh_16x8b; + __m128i intrlv_mask_8x16b, zero_vector; + + intrlv_mask_8x16b = _mm_set1_epi16(0x00ff); + zero_vector = _mm_setzero_si128(); + + val_sh_16x8b = _mm_srli_si128(left_16x8b, 1); + + tmp1_8x16b = _mm_and_si128(intrlv_mask_8x16b, left_16x8b); + tmp2_8x16b = _mm_and_si128(intrlv_mask_8x16b, val_sh_16x8b); + tmp1_8x16b = _mm_sad_epu8(zero_vector, tmp1_8x16b); + tmp2_8x16b = _mm_sad_epu8(zero_vector, tmp2_8x16b); + + left_up_u = _mm_extract_epi16(tmp1_8x16b, 4); + left_up_v = _mm_extract_epi16(tmp2_8x16b, 4); + left_down_u = _mm_extract_epi16(tmp1_8x16b, 0); + left_down_v = _mm_extract_epi16(tmp2_8x16b, 0); + + val_sh_16x8b = _mm_srli_si128(top_16x8b, 1); + + tmp1_8x16b = _mm_and_si128(intrlv_mask_8x16b, top_16x8b); + tmp2_8x16b = _mm_and_si128(intrlv_mask_8x16b, val_sh_16x8b); + tmp1_8x16b = _mm_sad_epu8(zero_vector, tmp1_8x16b); + tmp2_8x16b = _mm_sad_epu8(zero_vector, tmp2_8x16b); + + top_left_u = _mm_extract_epi16(tmp1_8x16b, 0); + top_left_v = _mm_extract_epi16(tmp2_8x16b, 0); + top_right_u = _mm_extract_epi16(tmp1_8x16b, 4); + top_right_v = _mm_extract_epi16(tmp2_8x16b, 4); + + // First four rows + dc_1u = (left_up_u + top_left_u + 4) >> 3; + dc_1v = (left_up_v + top_left_v + 4) >> 3; + dc_2u = (top_right_u + 2) >> 2; + dc_2v = (top_right_v + 2) >> 2; + + pred1_16x8b = _mm_setr_epi8(dc_1u, dc_1v, dc_1u, dc_1v, dc_1u, dc_1v, dc_1u, dc_1v, + dc_2u, dc_2v, dc_2u, dc_2v, dc_2u, dc_2v, dc_2u, dc_2v); + + tmp1_8x16b = _mm_sad_epu8(src1_16x8b, pred1_16x8b); + tmp2_8x16b = _mm_sad_epu8(src2_16x8b, pred1_16x8b); + sad_8x16b = _mm_add_epi16(tmp1_8x16b, tmp2_8x16b); + + tmp1_8x16b = _mm_sad_epu8(src3_16x8b, pred1_16x8b); + tmp2_8x16b = _mm_sad_epu8(src4_16x8b, pred1_16x8b); + sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b); + sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b); + + // Second four rows + dc_1u = (left_down_u + 2) >> 2; + dc_1v = (left_down_v + 2) >> 2; + dc_2u = (left_down_u + top_right_u + 4) >> 3; + dc_2v = (left_down_v + top_right_v + 4) >> 3; + + pred2_16x8b = _mm_setr_epi8(dc_1u, dc_1v, dc_1u, dc_1v, dc_1u, dc_1v, dc_1u, dc_1v, + dc_2u, dc_2v, dc_2u, dc_2v, dc_2u, dc_2v, dc_2u, dc_2v); + + tmp1_8x16b = _mm_sad_epu8(src5_16x8b, pred2_16x8b); + tmp2_8x16b = _mm_sad_epu8(src6_16x8b, pred2_16x8b); + sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b); + sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b); + + tmp1_8x16b = _mm_sad_epu8(src7_16x8b, pred2_16x8b); + tmp2_8x16b = _mm_sad_epu8(src8_16x8b, pred2_16x8b); + sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b); + sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b); + + sad_dc = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4); + } + else if(left) + { + WORD32 left_up_u, left_down_u, left_up_v, left_down_v; + WORD32 dc_u, dc_v; + + __m128i left_sh_16x8b; + __m128i intrlv_mask_8x16b, zero_vector; + + intrlv_mask_8x16b = _mm_set1_epi16(0x00ff); + zero_vector = _mm_setzero_si128(); + + left_sh_16x8b = _mm_srli_si128(left_16x8b, 1); + + tmp1_8x16b = _mm_and_si128(intrlv_mask_8x16b, left_16x8b); + tmp2_8x16b = _mm_and_si128(intrlv_mask_8x16b, left_sh_16x8b); + tmp1_8x16b = _mm_sad_epu8(zero_vector, tmp1_8x16b); + tmp2_8x16b = _mm_sad_epu8(zero_vector, tmp2_8x16b); + + left_up_u = _mm_extract_epi16(tmp1_8x16b, 4); + left_up_v = _mm_extract_epi16(tmp2_8x16b, 4); + left_down_u = _mm_extract_epi16(tmp1_8x16b, 0); + left_down_v = _mm_extract_epi16(tmp2_8x16b, 0); + + // First four rows + dc_u = (left_up_u + 2) >> 2; + dc_v = (left_up_v + 2) >> 2; + + pred1_16x8b = _mm_set1_epi16(dc_u | (dc_v << 8)); + + tmp1_8x16b = _mm_sad_epu8(src1_16x8b, pred1_16x8b); + tmp2_8x16b = _mm_sad_epu8(src2_16x8b, pred1_16x8b); + sad_8x16b = _mm_add_epi16(tmp1_8x16b, tmp2_8x16b); + + tmp1_8x16b = _mm_sad_epu8(src3_16x8b, pred1_16x8b); + tmp2_8x16b = _mm_sad_epu8(src4_16x8b, pred1_16x8b); + sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b); + sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b); + + // Second four rows + dc_u = (left_down_u + 2) >> 2; + dc_v = (left_down_v + 2) >> 2; + + pred2_16x8b = _mm_set1_epi16(dc_u | (dc_v << 8)); + + tmp1_8x16b = _mm_sad_epu8(src5_16x8b, pred2_16x8b); + tmp2_8x16b = _mm_sad_epu8(src6_16x8b, pred2_16x8b); + sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b); + sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b); + + tmp1_8x16b = _mm_sad_epu8(src7_16x8b, pred2_16x8b); + tmp2_8x16b = _mm_sad_epu8(src8_16x8b, pred2_16x8b); + sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b); + sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b); + + sad_dc = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4); + } + else if(top) + { + WORD32 top_left_u, top_right_u, top_left_v, top_right_v; + WORD32 dc_1u, dc_1v, dc_2u, dc_2v; + + __m128i top_sh_16x8b; + __m128i intrlv_mask_8x16b, zero_vector; + + intrlv_mask_8x16b = _mm_set1_epi16(0x00ff); + zero_vector = _mm_setzero_si128(); + + top_sh_16x8b = _mm_srli_si128(top_16x8b, 1); + + tmp1_8x16b = _mm_and_si128(intrlv_mask_8x16b, top_16x8b); + tmp2_8x16b = _mm_and_si128(intrlv_mask_8x16b, top_sh_16x8b); + tmp1_8x16b = _mm_sad_epu8(zero_vector, tmp1_8x16b); + tmp2_8x16b = _mm_sad_epu8(zero_vector, tmp2_8x16b); + + top_left_u = _mm_extract_epi16(tmp1_8x16b, 0); + top_left_v = _mm_extract_epi16(tmp2_8x16b, 0); + top_right_u = _mm_extract_epi16(tmp1_8x16b, 4); + top_right_v = _mm_extract_epi16(tmp2_8x16b, 4); + + dc_1u = (top_left_u + 2) >> 2; + dc_1v = (top_left_v + 2) >> 2; + dc_2u = (top_right_u + 2) >> 2; + dc_2v = (top_right_v + 2) >> 2; + + pred1_16x8b = _mm_setr_epi8(dc_1u, dc_1v, dc_1u, dc_1v, dc_1u, dc_1v, dc_1u, dc_1v, + dc_2u, dc_2v, dc_2u, dc_2v, dc_2u, dc_2v, dc_2u, dc_2v); + + tmp1_8x16b = _mm_sad_epu8(src1_16x8b, pred1_16x8b); + tmp2_8x16b = _mm_sad_epu8(src2_16x8b, pred1_16x8b); + sad_8x16b = _mm_add_epi16(tmp1_8x16b, tmp2_8x16b); + + tmp1_8x16b = _mm_sad_epu8(src3_16x8b, pred1_16x8b); + tmp2_8x16b = _mm_sad_epu8(src4_16x8b, pred1_16x8b); + sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b); + sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b); + + tmp1_8x16b = _mm_sad_epu8(src5_16x8b, pred1_16x8b); + tmp2_8x16b = _mm_sad_epu8(src6_16x8b, pred1_16x8b); + sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b); + sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b); + + tmp1_8x16b = _mm_sad_epu8(src7_16x8b, pred1_16x8b); + tmp2_8x16b = _mm_sad_epu8(src8_16x8b, pred1_16x8b); + sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b); + sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b); + + sad_dc = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4); + } + else + { + pred1_16x8b = _mm_set1_epi8(128); + + tmp1_8x16b = _mm_sad_epu8(src1_16x8b, pred1_16x8b); + tmp2_8x16b = _mm_sad_epu8(src2_16x8b, pred1_16x8b); + sad_8x16b = _mm_add_epi16(tmp1_8x16b, tmp2_8x16b); + + tmp1_8x16b = _mm_sad_epu8(src3_16x8b, pred1_16x8b); + tmp2_8x16b = _mm_sad_epu8(src4_16x8b, pred1_16x8b); + sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b); + sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b); + + tmp1_8x16b = _mm_sad_epu8(src5_16x8b, pred1_16x8b); + tmp2_8x16b = _mm_sad_epu8(src6_16x8b, pred1_16x8b); + sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b); + sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b); + + tmp1_8x16b = _mm_sad_epu8(src7_16x8b, pred1_16x8b); + tmp2_8x16b = _mm_sad_epu8(src8_16x8b, pred1_16x8b); + sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b); + sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b); + + sad_dc = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4); + } + } + + min_sad = MIN3(sad_horz, sad_vert, sad_dc); + + /* Finding minimum SAD and doing corresponding prediction*/ + if(min_sad < *pu4_sadmin) + { + *pu4_sadmin = min_sad; + + if(min_sad == sad_dc) + { + *u4_intra_mode = DC_CH_I8x8; + + if(!left) + pred2_16x8b = pred1_16x8b; + + _mm_storeu_si128((__m128i *)pu1_dst, pred1_16x8b); + pu1_dst += dst_strd; + _mm_storeu_si128((__m128i *)pu1_dst, pred1_16x8b); + pu1_dst += dst_strd; + _mm_storeu_si128((__m128i *)pu1_dst, pred1_16x8b); + pu1_dst += dst_strd; + _mm_storeu_si128((__m128i *)pu1_dst, pred1_16x8b); + pu1_dst += dst_strd; + + _mm_storeu_si128((__m128i *)pu1_dst, pred2_16x8b); + pu1_dst += dst_strd; + _mm_storeu_si128((__m128i *)pu1_dst, pred2_16x8b); + pu1_dst += dst_strd; + _mm_storeu_si128((__m128i *)pu1_dst, pred2_16x8b); + pu1_dst += dst_strd; + _mm_storeu_si128((__m128i *)pu1_dst, pred2_16x8b); + } + else if(min_sad == sad_horz) + { + __m128i left_sh_16x8b, const_14_15_16x8b; + + *u4_intra_mode = HORZ_CH_I8x8; + + const_14_15_16x8b = _mm_set1_epi16(0x0f0e); + + left_sh_16x8b = _mm_slli_si128(left_16x8b, 2); + pred1_16x8b = _mm_shuffle_epi8(left_16x8b, const_14_15_16x8b); //row 1 + pred2_16x8b = _mm_shuffle_epi8(left_sh_16x8b, const_14_15_16x8b); //row 2 + + _mm_storeu_si128((__m128i *)pu1_dst, pred1_16x8b); + pu1_dst += dst_strd; + _mm_storeu_si128((__m128i *)pu1_dst, pred2_16x8b); + + left_16x8b = _mm_slli_si128(left_16x8b, 4); + left_sh_16x8b = _mm_slli_si128(left_sh_16x8b, 4); + pred1_16x8b = _mm_shuffle_epi8(left_16x8b, const_14_15_16x8b); //row 3 + pred2_16x8b = _mm_shuffle_epi8(left_sh_16x8b, const_14_15_16x8b); //row 4 + + pu1_dst += dst_strd; + _mm_storeu_si128((__m128i *)pu1_dst, pred1_16x8b); + pu1_dst += dst_strd; + _mm_storeu_si128((__m128i *)pu1_dst, pred2_16x8b); + + left_16x8b = _mm_slli_si128(left_16x8b, 4); + left_sh_16x8b = _mm_slli_si128(left_sh_16x8b, 4); + pred1_16x8b = _mm_shuffle_epi8(left_16x8b, const_14_15_16x8b); //row 5 + pred2_16x8b = _mm_shuffle_epi8(left_sh_16x8b, const_14_15_16x8b); //row 6 + + pu1_dst += dst_strd; + _mm_storeu_si128((__m128i *)pu1_dst, pred1_16x8b); + pu1_dst += dst_strd; + _mm_storeu_si128((__m128i *)pu1_dst, pred2_16x8b); + + left_16x8b = _mm_slli_si128(left_16x8b, 4); + left_sh_16x8b = _mm_slli_si128(left_sh_16x8b, 4); + pred1_16x8b = _mm_shuffle_epi8(left_16x8b, const_14_15_16x8b); //row 7 + pred2_16x8b = _mm_shuffle_epi8(left_sh_16x8b, const_14_15_16x8b); //row 8 + + pu1_dst += dst_strd; + _mm_storeu_si128((__m128i *)pu1_dst, pred1_16x8b); + pu1_dst += dst_strd; + _mm_storeu_si128((__m128i *)pu1_dst, pred2_16x8b); + } + else + { + *u4_intra_mode = VERT_CH_I8x8; + + _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b); + pu1_dst += dst_strd; + _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b); + pu1_dst += dst_strd; + _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b); + pu1_dst += dst_strd; + _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b); + pu1_dst += dst_strd; + _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b); + pu1_dst += dst_strd; + _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b); + pu1_dst += dst_strd; + _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b); + pu1_dst += dst_strd; + _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b); + } + } +} diff --git a/encoder/x86/ih264e_platform_macros.h b/encoder/x86/ih264e_platform_macros.h new file mode 100755 index 0000000..b4dfadd --- /dev/null +++ b/encoder/x86/ih264e_platform_macros.h @@ -0,0 +1,154 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** + ******************************************************************************* + * @file + * ih264e_platform_macros.h + * + * @brief + * Contains platform specific routines used for codec context intialization + * + * @author + * ittiam + * + * @remarks + * none + * + ******************************************************************************* + */ + + +#ifndef IH264E_PLATFORM_MACROS_H_ +#define IH264E_PLATFORM_MACROS_H_ + +/*****************************************************************************/ +/* Extern Function Declarations */ +/*****************************************************************************/ + +/** +******************************************************************************* +* +* @brief Initialize the intra/inter/transform/deblk function pointers of +* codec context +* +* @par Description: the current routine initializes the function pointers of +* codec context basing on the architecture in use +* +* @param[in] ps_codec +* Codec context pointer +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void ih264e_init_function_ptr_generic(codec_t *ps_codec); +/** +******************************************************************************* +* +* @brief Initialize the intra/inter/transform/deblk function pointers of +* codec context +* +* @par Description: the current routine initializes the function pointers of +* codec context basing on the architecture in use +* +* @param[in] ps_codec +* Codec context pointer +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void ih264e_init_function_ptr_ssse3(codec_t *ps_codec); +void ih264e_init_function_ptr_sse42(codec_t *ps_codec); + +/** +******************************************************************************* +* +* @brief Initialize the intra/inter/transform/deblk function pointers of +* codec context +* +* @par Description: the current routine initializes the function pointers of +* codec context basing on the architecture in use +* +* @param[in] ps_codec +* Codec context pointer +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void ih264e_init_function_ptr(void *pv_codec); + +/** +******************************************************************************* +* +* @brief Determine the architecture of the encoder executing environment +* +* @par Description: This routine returns the architecture of the enviro- +* ment in which the current encoder is being tested +* +* @param[in] void +* +* @returns IV_ARCH_T +* architecture +* +* @remarks none +* +******************************************************************************* +*/ +IV_ARCH_T ih264e_default_arch(void); + +/** +******************************************************************************* +* +* @brief Data Memory Barrier, Data Synchronization Barrier +* +* +* @par Description: These functions do nothing on x86 side. But on arm platforms, +* +* Data Memory Barrier acts as a memory barrier. It ensures that all explicit +* memory accesses that appear in program order before the DMB instruction are +* observed before any explicit memory accesses that appear in program order +* after the DMB instruction. It does not affect the ordering of any other +* instructions executing on the processor +* +* Data Synchronization Barrier acts as a special kind of memory barrier. No +* instruction in program order after this instruction executes until this instruction +* completes. This instruction completes when: +* 1. All explicit memory accesses before this instruction complete. +* 2. All Cache, Branch predictor and TLB maintenance operations before +* this instruction complete. +* +* @param[in] void +* +* @returns void +* +* @remarks none +* +******************************************************************************* +*/ + +#endif /* IH264E_PLATFORM_MACROS_H_ */ diff --git a/encoder/x86/ime_distortion_metrics_sse42.c b/encoder/x86/ime_distortion_metrics_sse42.c new file mode 100755 index 0000000..0876788 --- /dev/null +++ b/encoder/x86/ime_distortion_metrics_sse42.c @@ -0,0 +1,1940 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +****************************************************************************** +* @file ime_distortion_metrics_sse42.c +* +* @brief +* This file contains definitions of routines that compute distortion +* between two macro/sub blocks of identical dimensions +* +* @author +* Ittiam +* +* @par List of Functions: +* - ime_compute_sad_16x16_sse42() +* - ime_compute_sad_16x16_fast_sse42() +* - ime_compute_sad_16x16_ea8_sse42() +* - ime_compute_sad_16x8_sse42() +* - ime_calculate_sad4_prog_sse42() +* - ime_sub_pel_compute_sad_16x16_sse42() +* - ime_compute_satqd_16x16_lumainter_sse42() +* +* @remarks +* None +* +******************************************************************************* +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +/* User include files */ +#include "ime_typedefs.h" +#include "ime_defs.h" +#include "ime_macros.h" +#include "ime_statistics.h" +#include "ime_platform_macros.h" +#include "ime_distortion_metrics.h" +#include <immintrin.h> + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + +/** +****************************************************************************** +* +* @brief computes distortion (SAD) between 2 16x16 blocks +* +* @par Description +* This functions computes SAD between 2 16x16 blocks. There is a provision +* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To +* compute the distortion of the entire block set u4_max_sad to USHRT_MAX. +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[out] pu1_dst +* UWORD8 pointer to the destination +* +* @param[in] src_strd +* integer source stride +* +* @param[in] dst_strd +* integer destination stride +* +* @param[in] i4_max_sad +* integer maximum allowed distortion +* +* @param[out] pi4_mb_distortion +* integer evaluated sad +* +* @remarks +* +****************************************************************************** +*/ +void ime_compute_sad_16x16_sse42(UWORD8 *pu1_src, + UWORD8 *pu1_est, + WORD32 src_strd, + WORD32 est_strd, + WORD32 i4_max_sad, + WORD32 *pi4_mb_distortion) +{ + __m128i src_r0, src_r1, src_r2, src_r3; + __m128i est_r0, est_r1, est_r2, est_r3; + __m128i res_r0, res_r1, res_r2, res_r3; + __m128i sad_val; + int val1, val2; + + // Row 0-3 sad calculation + src_r0 = _mm_loadu_si128((__m128i *) (pu1_src)); + src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + src_strd)); + src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 2*src_strd)); + src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 3*src_strd)); + + est_r0 = _mm_loadu_si128((__m128i *) (pu1_est)); + est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + est_strd)); + est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 2*est_strd)); + est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 3*est_strd)); + + res_r0 = _mm_sad_epu8(src_r0, est_r0); + res_r1 = _mm_sad_epu8(src_r1, est_r1); + res_r2 = _mm_sad_epu8(src_r2, est_r2); + res_r3 = _mm_sad_epu8(src_r3, est_r3); + + sad_val = _mm_add_epi64(res_r0, res_r1); + sad_val = _mm_add_epi64(sad_val, res_r2); + sad_val = _mm_add_epi64(sad_val, res_r3); + + // Row 4-7 sad calculation + pu1_src += 4*src_strd; + pu1_est += 4*est_strd; + + src_r0 = _mm_loadu_si128((__m128i *) (pu1_src)); + src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + src_strd)); + src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 2*src_strd)); + src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 3*src_strd)); + + est_r0 = _mm_loadu_si128((__m128i *) (pu1_est)); + est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + est_strd)); + est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 2*est_strd)); + est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 3*est_strd)); + + res_r0 = _mm_sad_epu8(src_r0, est_r0); + res_r1 = _mm_sad_epu8(src_r1, est_r1); + res_r2 = _mm_sad_epu8(src_r2, est_r2); + res_r3 = _mm_sad_epu8(src_r3, est_r3); + + sad_val = _mm_add_epi64(sad_val, res_r0); + sad_val = _mm_add_epi64(sad_val, res_r1); + sad_val = _mm_add_epi64(sad_val, res_r2); + sad_val = _mm_add_epi64(sad_val, res_r3); + + // Row 8-11 sad calculation + pu1_src += 4*src_strd; + pu1_est += 4*est_strd; + src_r0 = _mm_loadu_si128((__m128i *) (pu1_src)); + src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + src_strd)); + src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 2*src_strd)); + src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 3*src_strd)); + + est_r0 = _mm_loadu_si128((__m128i *) (pu1_est)); + est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + est_strd)); + est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 2*est_strd)); + est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 3*est_strd)); + + res_r0 = _mm_sad_epu8(src_r0, est_r0); + res_r1 = _mm_sad_epu8(src_r1, est_r1); + res_r2 = _mm_sad_epu8(src_r2, est_r2); + res_r3 = _mm_sad_epu8(src_r3, est_r3); + + sad_val = _mm_add_epi64(sad_val, res_r0); + sad_val = _mm_add_epi64(sad_val, res_r1); + sad_val = _mm_add_epi64(sad_val, res_r2); + sad_val = _mm_add_epi64(sad_val, res_r3); + + // Row 12-15 sad calculation + pu1_src += 4*src_strd; + pu1_est += 4*est_strd; + src_r0 = _mm_loadu_si128((__m128i *) (pu1_src)); + src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + src_strd)); + src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 2*src_strd)); + src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 3*src_strd)); + + est_r0 = _mm_loadu_si128((__m128i *) (pu1_est)); + est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + est_strd)); + est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 2*est_strd)); + est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 3*est_strd)); + + res_r0 = _mm_sad_epu8(src_r0, est_r0); + res_r1 = _mm_sad_epu8(src_r1, est_r1); + res_r2 = _mm_sad_epu8(src_r2, est_r2); + res_r3 = _mm_sad_epu8(src_r3, est_r3); + + sad_val = _mm_add_epi64(sad_val, res_r0); + sad_val = _mm_add_epi64(sad_val, res_r1); + sad_val = _mm_add_epi64(sad_val, res_r2); + sad_val = _mm_add_epi64(sad_val, res_r3); + + val1 = _mm_extract_epi32(sad_val,0); + val2 = _mm_extract_epi32(sad_val, 2); + *pi4_mb_distortion = (val1+val2); + + return; +} + +/** +****************************************************************************** +* +* @brief computes distortion (SAD) between 2 16x8 blocks +* +* +* @par Description +* This functions computes SAD between 2 16x8 blocks. There is a provision +* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To +* compute the distortion of the entire block set u4_max_sad to USHRT_MAX. +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[out] pu1_dst +* UWORD8 pointer to the destination +* +* @param[in] src_strd +* integer source stride +* +* @param[in] dst_strd +* integer destination stride +* +* @param[in] u4_max_sad +* integer maximum allowed distortion +* +* @param[out] pi4_mb_distortion +* integer evaluated sad +* +* @remarks +* +****************************************************************************** +*/ +void ime_compute_sad_16x8_sse42(UWORD8 *pu1_src, + UWORD8 *pu1_est, + WORD32 src_strd, + WORD32 est_strd, + WORD32 i4_max_sad, + WORD32 *pi4_mb_distortion) +{ + __m128i src_r0, src_r1, src_r2, src_r3; + __m128i est_r0, est_r1, est_r2, est_r3; + __m128i res_r0, res_r1, res_r2, res_r3; + __m128i sad_val; + int val1, val2; + + // Row 0-3 sad calculation + src_r0 = _mm_loadu_si128((__m128i *) (pu1_src)); + src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + src_strd)); + src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 2*src_strd)); + src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 3*src_strd)); + + est_r0 = _mm_loadu_si128((__m128i *) (pu1_est)); + est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + est_strd)); + est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 2*est_strd)); + est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 3*est_strd)); + + res_r0 = _mm_sad_epu8(src_r0, est_r0); + res_r1 = _mm_sad_epu8(src_r1, est_r1); + res_r2 = _mm_sad_epu8(src_r2, est_r2); + res_r3 = _mm_sad_epu8(src_r3, est_r3); + + sad_val = _mm_add_epi64(res_r0, res_r1); + sad_val = _mm_add_epi64(sad_val, res_r2); + sad_val = _mm_add_epi64(sad_val, res_r3); + + // Row 4-7 sad calculation + pu1_src += 4*src_strd; + pu1_est += 4*est_strd; + + src_r0 = _mm_loadu_si128((__m128i *) (pu1_src)); + src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + src_strd)); + src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 2*src_strd)); + src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 3*src_strd)); + + est_r0 = _mm_loadu_si128((__m128i *) (pu1_est)); + est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + est_strd)); + est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 2*est_strd)); + est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 3*est_strd)); + + res_r0 = _mm_sad_epu8(src_r0, est_r0); + res_r1 = _mm_sad_epu8(src_r1, est_r1); + res_r2 = _mm_sad_epu8(src_r2, est_r2); + res_r3 = _mm_sad_epu8(src_r3, est_r3); + + sad_val = _mm_add_epi64(sad_val, res_r0); + sad_val = _mm_add_epi64(sad_val, res_r1); + sad_val = _mm_add_epi64(sad_val, res_r2); + sad_val = _mm_add_epi64(sad_val, res_r3); + + val1 = _mm_extract_epi32(sad_val,0); + val2 = _mm_extract_epi32(sad_val, 2); + *pi4_mb_distortion = (val1+val2); + return; +} + +/** +****************************************************************************** +* +* @brief computes distortion (SAD) between 2 16x16 blocks +* +* @par Description +* This functions computes SAD between 2 16x16 blocks. There is a provision +* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To +* compute the distortion of the entire block set u4_max_sad to USHRT_MAX. +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[out] pu1_dst +* UWORD8 pointer to the destination +* +* @param[in] src_strd +* integer source stride +* +* @param[in] dst_strd +* integer destination stride +* +* @param[in] i4_max_sad +* integer maximum allowed distortion +* +* @param[out] pi4_mb_distortion +* integer evaluated sad +* +* @remarks +* +****************************************************************************** +*/ +void ime_compute_sad_16x16_ea8_sse42(UWORD8 *pu1_src, + UWORD8 *pu1_est, + WORD32 src_strd, + WORD32 est_strd, + WORD32 i4_max_sad, + WORD32 *pi4_mb_distortion) +{ + __m128i src_r0, src_r1, src_r2, src_r3; + __m128i est_r0, est_r1, est_r2, est_r3; + __m128i res_r0, res_r1, res_r2, res_r3; + __m128i sad_val; + WORD32 val1, val2; + WORD32 i4_sad; + UWORD8 *pu1_src_temp = pu1_src + src_strd; + UWORD8 *pu1_est_temp = pu1_est + est_strd; + + // Row 0,2,4,6 sad calculation + src_r0 = _mm_loadu_si128((__m128i *) (pu1_src)); + src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + 2*src_strd)); + src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 4*src_strd)); + src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 6*src_strd)); + + est_r0 = _mm_loadu_si128((__m128i *) (pu1_est)); + est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + 2*est_strd)); + est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 4*est_strd)); + est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 6*est_strd)); + + res_r0 = _mm_sad_epu8(src_r0, est_r0); + res_r1 = _mm_sad_epu8(src_r1, est_r1); + res_r2 = _mm_sad_epu8(src_r2, est_r2); + res_r3 = _mm_sad_epu8(src_r3, est_r3); + + sad_val = _mm_add_epi64(res_r0, res_r1); + sad_val = _mm_add_epi64(sad_val, res_r2); + sad_val = _mm_add_epi64(sad_val, res_r3); + + // Row 8,10,12,14 sad calculation + pu1_src += 8*src_strd; + pu1_est += 8*est_strd; + + src_r0 = _mm_loadu_si128((__m128i *) (pu1_src)); + src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + 2*src_strd)); + src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 4*src_strd)); + src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 6*src_strd)); + + est_r0 = _mm_loadu_si128((__m128i *) (pu1_est)); + est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + 2*est_strd)); + est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 4*est_strd)); + est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 6*est_strd)); + + res_r0 = _mm_sad_epu8(src_r0, est_r0); + res_r1 = _mm_sad_epu8(src_r1, est_r1); + res_r2 = _mm_sad_epu8(src_r2, est_r2); + res_r3 = _mm_sad_epu8(src_r3, est_r3); + + sad_val = _mm_add_epi64(sad_val, res_r0); + sad_val = _mm_add_epi64(sad_val, res_r1); + sad_val = _mm_add_epi64(sad_val, res_r2); + sad_val = _mm_add_epi64(sad_val, res_r3); + + pu1_src = pu1_src_temp; + pu1_est = pu1_est_temp; + + val1 = _mm_extract_epi32(sad_val, 0); + val2 = _mm_extract_epi32(sad_val, 2); + + i4_sad = val1 + val2; + if (i4_max_sad < i4_sad) + { + *pi4_mb_distortion = i4_sad; + return ; + } + // Row 1,3,5,7 sad calculation + src_r0 = _mm_loadu_si128((__m128i *) (pu1_src)); + src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + 2*src_strd)); + src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 4*src_strd)); + src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 6*src_strd)); + + est_r0 = _mm_loadu_si128((__m128i *) (pu1_est)); + est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + 2*est_strd)); + est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 4*est_strd)); + est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 6*est_strd)); + + res_r0 = _mm_sad_epu8(src_r0, est_r0); + res_r1 = _mm_sad_epu8(src_r1, est_r1); + res_r2 = _mm_sad_epu8(src_r2, est_r2); + res_r3 = _mm_sad_epu8(src_r3, est_r3); + + sad_val = _mm_add_epi64(sad_val, res_r0); + sad_val = _mm_add_epi64(sad_val, res_r1); + sad_val = _mm_add_epi64(sad_val, res_r2); + sad_val = _mm_add_epi64(sad_val, res_r3); + + // Row 9,11,13,15 sad calculation + pu1_src += 8*src_strd; + pu1_est += 8*est_strd; + src_r0 = _mm_loadu_si128((__m128i *) (pu1_src)); + src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + 2*src_strd)); + src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 4*src_strd)); + src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 6*src_strd)); + + est_r0 = _mm_loadu_si128((__m128i *) (pu1_est)); + est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + 2*est_strd)); + est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 4*est_strd)); + est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 6*est_strd)); + + res_r0 = _mm_sad_epu8(src_r0, est_r0); + res_r1 = _mm_sad_epu8(src_r1, est_r1); + res_r2 = _mm_sad_epu8(src_r2, est_r2); + res_r3 = _mm_sad_epu8(src_r3, est_r3); + + sad_val = _mm_add_epi64(sad_val, res_r0); + sad_val = _mm_add_epi64(sad_val, res_r1); + sad_val = _mm_add_epi64(sad_val, res_r2); + sad_val = _mm_add_epi64(sad_val, res_r3); + + val1 = _mm_extract_epi32(sad_val, 0); + val2 = _mm_extract_epi32(sad_val, 2); + *pi4_mb_distortion = (val1+val2); + + return; +} + +/** +****************************************************************************** +* +* @brief computes distortion (SAD) between 2 16x16 blocks (fast mode) +* +* @par Description +* This functions computes SAD between 2 16x16 blocks by processing alternate +* rows (fast mode). For fast mode it is assumed sad obtained by processing +* alternate rows is approximately twice as that for the whole block. +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[out] pu1_dst +* UWORD8 pointer to the destination +* +* @param[in] src_strd +* integer source stride +* +* @param[in] dst_strd +* integer destination stride +* +* @param[in] i4_max_sad +* integer maximum allowed distortion +* +* @param[out] pi4_mb_distortion +* integer evaluated sad +* +* @remarks +* +****************************************************************************** +*/ +void ime_compute_sad_16x16_fast_sse42(UWORD8 *pu1_src, + UWORD8 *pu1_est, + WORD32 src_strd, + WORD32 est_strd, + WORD32 i4_max_sad, + WORD32 *pi4_mb_distortion) +{ + __m128i src_r0, src_r1, src_r2, src_r3; + __m128i est_r0, est_r1, est_r2, est_r3; + __m128i res_r0, res_r1, res_r2, res_r3; + __m128i sad_val; + WORD32 val1, val2; + WORD32 i4_sad; + UWORD8 *pu1_src_temp = pu1_src + src_strd; + UWORD8 *pu1_est_temp = pu1_est + est_strd; + + // Row 0,2,4,6 sad calculation + src_r0 = _mm_loadu_si128((__m128i *) (pu1_src)); + src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + 2 * src_strd)); + src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 4 * src_strd)); + src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 6 * src_strd)); + + est_r0 = _mm_loadu_si128((__m128i *) (pu1_est)); + est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + 2 * est_strd)); + est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 4 * est_strd)); + est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 6 * est_strd)); + + res_r0 = _mm_sad_epu8(src_r0, est_r0); + res_r1 = _mm_sad_epu8(src_r1, est_r1); + res_r2 = _mm_sad_epu8(src_r2, est_r2); + res_r3 = _mm_sad_epu8(src_r3, est_r3); + + sad_val = _mm_add_epi64(res_r0, res_r1); + sad_val = _mm_add_epi64(sad_val, res_r2); + sad_val = _mm_add_epi64(sad_val, res_r3); + + // Row 8,10,12,14 sad calculation + pu1_src += 8 * src_strd; + pu1_est += 8 * est_strd; + + src_r0 = _mm_loadu_si128((__m128i *) (pu1_src)); + src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + 2 * src_strd)); + src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 4 * src_strd)); + src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 6 * src_strd)); + + est_r0 = _mm_loadu_si128((__m128i *) (pu1_est)); + est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + 2 * est_strd)); + est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 4 * est_strd)); + est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 6 * est_strd)); + + res_r0 = _mm_sad_epu8(src_r0, est_r0); + res_r1 = _mm_sad_epu8(src_r1, est_r1); + res_r2 = _mm_sad_epu8(src_r2, est_r2); + res_r3 = _mm_sad_epu8(src_r3, est_r3); + + sad_val = _mm_add_epi64(sad_val, res_r0); + sad_val = _mm_add_epi64(sad_val, res_r1); + sad_val = _mm_add_epi64(sad_val, res_r2); + sad_val = _mm_add_epi64(sad_val, res_r3); + + pu1_src = pu1_src_temp; + pu1_est = pu1_est_temp; + + val1 = _mm_extract_epi32(sad_val, 0); + val2 = _mm_extract_epi32(sad_val, 2); + + i4_sad = val1 + val2; + *pi4_mb_distortion = (i4_sad<<1); + return; +} + +/** +******************************************************************************* +* +* @brief compute sad +* +* @par Description: This function computes the sad at vertices of diamond grid +* centered at reference pointer and at unit distance from it. +* +* @param[in] pu1_ref +* UWORD8 pointer to the reference +* +* @param[out] pu1_src +* UWORD8 pointer to the source +* +* @param[in] ref_strd +* integer reference stride +* +* @param[in] src_strd +* integer source stride +* +* @param[out] pi4_sad +* pointer to integer array evaluated sad +* +* @returns sad at all evaluated vertexes +* +* @remarks none +* +******************************************************************************* +*/ +void ime_calculate_sad4_prog_sse42(UWORD8 *pu1_ref, + UWORD8 *pu1_src, + WORD32 ref_strd, + WORD32 src_strd, + WORD32 *pi4_sad) +{ + /* reference ptrs at unit 1 distance in diamond pattern centered at pu1_ref */ + UWORD8 *left_ptr = pu1_ref - 1; + UWORD8 *right_ptr = pu1_ref + 1; + UWORD8 *top_ptr = pu1_ref - ref_strd; + UWORD8 *bot_ptr = pu1_ref + ref_strd; + + WORD32 val1, val2; + __m128i src, ref_left, ref_right, ref_top, ref_bot; + __m128i res_r0, res_r1, res_r2, res_r3; + __m128i sad_r0, sad_r1, sad_r2, sad_r3; + + // Row 0 sad calculation + src = _mm_loadu_si128((__m128i *) (pu1_src)); + ref_left = _mm_loadu_si128((__m128i *) (left_ptr)); + ref_right = _mm_loadu_si128((__m128i *) (right_ptr)); + ref_top = _mm_loadu_si128((__m128i *) (top_ptr)); + ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr)); + + sad_r0 = _mm_sad_epu8(src, ref_left); + sad_r1 = _mm_sad_epu8(src, ref_right); + sad_r2 = _mm_sad_epu8(src, ref_top); + sad_r3 = _mm_sad_epu8(src, ref_bot); + + pu1_src += src_strd; + left_ptr += ref_strd; + right_ptr += ref_strd; + top_ptr += ref_strd; + bot_ptr += ref_strd; + + // Row 1 sad calculation + src = _mm_loadu_si128((__m128i *) (pu1_src)); + ref_left = _mm_loadu_si128((__m128i *) (left_ptr)); + ref_right = _mm_loadu_si128((__m128i *) (right_ptr)); + ref_top = _mm_loadu_si128((__m128i *) (top_ptr)); + ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr)); + + res_r0 = _mm_sad_epu8(src, ref_left); + res_r1 = _mm_sad_epu8(src, ref_right); + res_r2 = _mm_sad_epu8(src, ref_top); + res_r3 = _mm_sad_epu8(src, ref_bot); + + sad_r0 = _mm_add_epi64(sad_r0, res_r0); + sad_r1 = _mm_add_epi64(sad_r1, res_r1); + sad_r2 = _mm_add_epi64(sad_r2, res_r2); + sad_r3 = _mm_add_epi64(sad_r3, res_r3); + + pu1_src += src_strd; + left_ptr += ref_strd; + right_ptr += ref_strd; + top_ptr += ref_strd; + bot_ptr += ref_strd; + + // Row 2 sad calculation + src = _mm_loadu_si128((__m128i *) (pu1_src)); + ref_left = _mm_loadu_si128((__m128i *) (left_ptr)); + ref_right = _mm_loadu_si128((__m128i *) (right_ptr)); + ref_top = _mm_loadu_si128((__m128i *) (top_ptr)); + ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr)); + + res_r0 = _mm_sad_epu8(src, ref_left); + res_r1 = _mm_sad_epu8(src, ref_right); + res_r2 = _mm_sad_epu8(src, ref_top); + res_r3 = _mm_sad_epu8(src, ref_bot); + + sad_r0 = _mm_add_epi64(sad_r0, res_r0); + sad_r1 = _mm_add_epi64(sad_r1, res_r1); + sad_r2 = _mm_add_epi64(sad_r2, res_r2); + sad_r3 = _mm_add_epi64(sad_r3, res_r3); + + pu1_src += src_strd; + left_ptr += ref_strd; + right_ptr += ref_strd; + top_ptr += ref_strd; + bot_ptr += ref_strd; + + // Row 3 sad calculation + src = _mm_loadu_si128((__m128i *) (pu1_src)); + ref_left = _mm_loadu_si128((__m128i *) (left_ptr)); + ref_right = _mm_loadu_si128((__m128i *) (right_ptr)); + ref_top = _mm_loadu_si128((__m128i *) (top_ptr)); + ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr)); + + res_r0 = _mm_sad_epu8(src, ref_left); + res_r1 = _mm_sad_epu8(src, ref_right); + res_r2 = _mm_sad_epu8(src, ref_top); + res_r3 = _mm_sad_epu8(src, ref_bot); + + sad_r0 = _mm_add_epi64(sad_r0, res_r0); + sad_r1 = _mm_add_epi64(sad_r1, res_r1); + sad_r2 = _mm_add_epi64(sad_r2, res_r2); + sad_r3 = _mm_add_epi64(sad_r3, res_r3); + + pu1_src += src_strd; + left_ptr += ref_strd; + right_ptr += ref_strd; + top_ptr += ref_strd; + bot_ptr += ref_strd; + + // Row 4 sad calculation + src = _mm_loadu_si128((__m128i *) (pu1_src)); + ref_left = _mm_loadu_si128((__m128i *) (left_ptr)); + ref_right = _mm_loadu_si128((__m128i *) (right_ptr)); + ref_top = _mm_loadu_si128((__m128i *) (top_ptr)); + ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr)); + + res_r0 = _mm_sad_epu8(src, ref_left); + res_r1 = _mm_sad_epu8(src, ref_right); + res_r2 = _mm_sad_epu8(src, ref_top); + res_r3 = _mm_sad_epu8(src, ref_bot); + + sad_r0 = _mm_add_epi64(sad_r0, res_r0); + sad_r1 = _mm_add_epi64(sad_r1, res_r1); + sad_r2 = _mm_add_epi64(sad_r2, res_r2); + sad_r3 = _mm_add_epi64(sad_r3, res_r3); + + pu1_src += src_strd; + left_ptr += ref_strd; + right_ptr += ref_strd; + top_ptr += ref_strd; + bot_ptr += ref_strd; + + // Row 5 sad calculation + src = _mm_loadu_si128((__m128i *) (pu1_src)); + ref_left = _mm_loadu_si128((__m128i *) (left_ptr)); + ref_right = _mm_loadu_si128((__m128i *) (right_ptr)); + ref_top = _mm_loadu_si128((__m128i *) (top_ptr)); + ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr)); + + res_r0 = _mm_sad_epu8(src, ref_left); + res_r1 = _mm_sad_epu8(src, ref_right); + res_r2 = _mm_sad_epu8(src, ref_top); + res_r3 = _mm_sad_epu8(src, ref_bot); + + sad_r0 = _mm_add_epi64(sad_r0, res_r0); + sad_r1 = _mm_add_epi64(sad_r1, res_r1); + sad_r2 = _mm_add_epi64(sad_r2, res_r2); + sad_r3 = _mm_add_epi64(sad_r3, res_r3); + + pu1_src += src_strd; + left_ptr += ref_strd; + right_ptr += ref_strd; + top_ptr += ref_strd; + bot_ptr += ref_strd; + + // Row 6 sad calculation + src = _mm_loadu_si128((__m128i *) (pu1_src)); + ref_left = _mm_loadu_si128((__m128i *) (left_ptr)); + ref_right = _mm_loadu_si128((__m128i *) (right_ptr)); + ref_top = _mm_loadu_si128((__m128i *) (top_ptr)); + ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr)); + + res_r0 = _mm_sad_epu8(src, ref_left); + res_r1 = _mm_sad_epu8(src, ref_right); + res_r2 = _mm_sad_epu8(src, ref_top); + res_r3 = _mm_sad_epu8(src, ref_bot); + + sad_r0 = _mm_add_epi64(sad_r0, res_r0); + sad_r1 = _mm_add_epi64(sad_r1, res_r1); + sad_r2 = _mm_add_epi64(sad_r2, res_r2); + sad_r3 = _mm_add_epi64(sad_r3, res_r3); + + pu1_src += src_strd; + left_ptr += ref_strd; + right_ptr += ref_strd; + top_ptr += ref_strd; + bot_ptr += ref_strd; + + // Row 7 sad calculation + src = _mm_loadu_si128((__m128i *) (pu1_src)); + ref_left = _mm_loadu_si128((__m128i *) (left_ptr)); + ref_right = _mm_loadu_si128((__m128i *) (right_ptr)); + ref_top = _mm_loadu_si128((__m128i *) (top_ptr)); + ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr)); + + res_r0 = _mm_sad_epu8(src, ref_left); + res_r1 = _mm_sad_epu8(src, ref_right); + res_r2 = _mm_sad_epu8(src, ref_top); + res_r3 = _mm_sad_epu8(src, ref_bot); + + sad_r0 = _mm_add_epi64(sad_r0, res_r0); + sad_r1 = _mm_add_epi64(sad_r1, res_r1); + sad_r2 = _mm_add_epi64(sad_r2, res_r2); + sad_r3 = _mm_add_epi64(sad_r3, res_r3); + + pu1_src += src_strd; + left_ptr += ref_strd; + right_ptr += ref_strd; + top_ptr += ref_strd; + bot_ptr += ref_strd; + + // Row 8 sad calculation + src = _mm_loadu_si128((__m128i *) (pu1_src)); + ref_left = _mm_loadu_si128((__m128i *) (left_ptr)); + ref_right = _mm_loadu_si128((__m128i *) (right_ptr)); + ref_top = _mm_loadu_si128((__m128i *) (top_ptr)); + ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr)); + + res_r0 = _mm_sad_epu8(src, ref_left); + res_r1 = _mm_sad_epu8(src, ref_right); + res_r2 = _mm_sad_epu8(src, ref_top); + res_r3 = _mm_sad_epu8(src, ref_bot); + + sad_r0 = _mm_add_epi64(sad_r0, res_r0); + sad_r1 = _mm_add_epi64(sad_r1, res_r1); + sad_r2 = _mm_add_epi64(sad_r2, res_r2); + sad_r3 = _mm_add_epi64(sad_r3, res_r3); + + pu1_src += src_strd; + left_ptr += ref_strd; + right_ptr += ref_strd; + top_ptr += ref_strd; + bot_ptr += ref_strd; + + // Row 9 sad calculation + src = _mm_loadu_si128((__m128i *) (pu1_src)); + ref_left = _mm_loadu_si128((__m128i *) (left_ptr)); + ref_right = _mm_loadu_si128((__m128i *) (right_ptr)); + ref_top = _mm_loadu_si128((__m128i *) (top_ptr)); + ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr)); + + res_r0 = _mm_sad_epu8(src, ref_left); + res_r1 = _mm_sad_epu8(src, ref_right); + res_r2 = _mm_sad_epu8(src, ref_top); + res_r3 = _mm_sad_epu8(src, ref_bot); + + sad_r0 = _mm_add_epi64(sad_r0, res_r0); + sad_r1 = _mm_add_epi64(sad_r1, res_r1); + sad_r2 = _mm_add_epi64(sad_r2, res_r2); + sad_r3 = _mm_add_epi64(sad_r3, res_r3); + + pu1_src += src_strd; + left_ptr += ref_strd; + right_ptr += ref_strd; + top_ptr += ref_strd; + bot_ptr += ref_strd; + + // Row 10 sad calculation + src = _mm_loadu_si128((__m128i *) (pu1_src)); + ref_left = _mm_loadu_si128((__m128i *) (left_ptr)); + ref_right = _mm_loadu_si128((__m128i *) (right_ptr)); + ref_top = _mm_loadu_si128((__m128i *) (top_ptr)); + ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr)); + + res_r0 = _mm_sad_epu8(src, ref_left); + res_r1 = _mm_sad_epu8(src, ref_right); + res_r2 = _mm_sad_epu8(src, ref_top); + res_r3 = _mm_sad_epu8(src, ref_bot); + + sad_r0 = _mm_add_epi64(sad_r0, res_r0); + sad_r1 = _mm_add_epi64(sad_r1, res_r1); + sad_r2 = _mm_add_epi64(sad_r2, res_r2); + sad_r3 = _mm_add_epi64(sad_r3, res_r3); + + pu1_src += src_strd; + left_ptr += ref_strd; + right_ptr += ref_strd; + top_ptr += ref_strd; + bot_ptr += ref_strd; + + // Row 11 sad calculation + src = _mm_loadu_si128((__m128i *) (pu1_src)); + ref_left = _mm_loadu_si128((__m128i *) (left_ptr)); + ref_right = _mm_loadu_si128((__m128i *) (right_ptr)); + ref_top = _mm_loadu_si128((__m128i *) (top_ptr)); + ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr)); + + res_r0 = _mm_sad_epu8(src, ref_left); + res_r1 = _mm_sad_epu8(src, ref_right); + res_r2 = _mm_sad_epu8(src, ref_top); + res_r3 = _mm_sad_epu8(src, ref_bot); + + sad_r0 = _mm_add_epi64(sad_r0, res_r0); + sad_r1 = _mm_add_epi64(sad_r1, res_r1); + sad_r2 = _mm_add_epi64(sad_r2, res_r2); + sad_r3 = _mm_add_epi64(sad_r3, res_r3); + + pu1_src += src_strd; + left_ptr += ref_strd; + right_ptr += ref_strd; + top_ptr += ref_strd; + bot_ptr += ref_strd; + + // Row 12 sad calculation + src = _mm_loadu_si128((__m128i *) (pu1_src)); + ref_left = _mm_loadu_si128((__m128i *) (left_ptr)); + ref_right = _mm_loadu_si128((__m128i *) (right_ptr)); + ref_top = _mm_loadu_si128((__m128i *) (top_ptr)); + ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr)); + + res_r0 = _mm_sad_epu8(src, ref_left); + res_r1 = _mm_sad_epu8(src, ref_right); + res_r2 = _mm_sad_epu8(src, ref_top); + res_r3 = _mm_sad_epu8(src, ref_bot); + + sad_r0 = _mm_add_epi64(sad_r0, res_r0); + sad_r1 = _mm_add_epi64(sad_r1, res_r1); + sad_r2 = _mm_add_epi64(sad_r2, res_r2); + sad_r3 = _mm_add_epi64(sad_r3, res_r3); + + pu1_src += src_strd; + left_ptr += ref_strd; + right_ptr += ref_strd; + top_ptr += ref_strd; + bot_ptr += ref_strd; + + // Row 13 sad calculation + src = _mm_loadu_si128((__m128i *) (pu1_src)); + ref_left = _mm_loadu_si128((__m128i *) (left_ptr)); + ref_right = _mm_loadu_si128((__m128i *) (right_ptr)); + ref_top = _mm_loadu_si128((__m128i *) (top_ptr)); + ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr)); + + res_r0 = _mm_sad_epu8(src, ref_left); + res_r1 = _mm_sad_epu8(src, ref_right); + res_r2 = _mm_sad_epu8(src, ref_top); + res_r3 = _mm_sad_epu8(src, ref_bot); + + sad_r0 = _mm_add_epi64(sad_r0, res_r0); + sad_r1 = _mm_add_epi64(sad_r1, res_r1); + sad_r2 = _mm_add_epi64(sad_r2, res_r2); + sad_r3 = _mm_add_epi64(sad_r3, res_r3); + + pu1_src += src_strd; + left_ptr += ref_strd; + right_ptr += ref_strd; + top_ptr += ref_strd; + bot_ptr += ref_strd; + + // Row 14 sad calculation + src = _mm_loadu_si128((__m128i *) (pu1_src)); + ref_left = _mm_loadu_si128((__m128i *) (left_ptr)); + ref_right = _mm_loadu_si128((__m128i *) (right_ptr)); + ref_top = _mm_loadu_si128((__m128i *) (top_ptr)); + ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr)); + + res_r0 = _mm_sad_epu8(src, ref_left); + res_r1 = _mm_sad_epu8(src, ref_right); + res_r2 = _mm_sad_epu8(src, ref_top); + res_r3 = _mm_sad_epu8(src, ref_bot); + + sad_r0 = _mm_add_epi64(sad_r0, res_r0); + sad_r1 = _mm_add_epi64(sad_r1, res_r1); + sad_r2 = _mm_add_epi64(sad_r2, res_r2); + sad_r3 = _mm_add_epi64(sad_r3, res_r3); + + pu1_src += src_strd; + left_ptr += ref_strd; + right_ptr += ref_strd; + top_ptr += ref_strd; + bot_ptr += ref_strd; + + // Row 15 sad calculation + src = _mm_loadu_si128((__m128i *) (pu1_src)); + ref_left = _mm_loadu_si128((__m128i *) (left_ptr)); + ref_right = _mm_loadu_si128((__m128i *) (right_ptr)); + ref_top = _mm_loadu_si128((__m128i *) (top_ptr)); + ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr)); + + res_r0 = _mm_sad_epu8(src, ref_left); + res_r1 = _mm_sad_epu8(src, ref_right); + res_r2 = _mm_sad_epu8(src, ref_top); + res_r3 = _mm_sad_epu8(src, ref_bot); + + sad_r0 = _mm_add_epi64(sad_r0, res_r0); + sad_r1 = _mm_add_epi64(sad_r1, res_r1); + sad_r2 = _mm_add_epi64(sad_r2, res_r2); + sad_r3 = _mm_add_epi64(sad_r3, res_r3); + + val1 = _mm_extract_epi32(sad_r0, 0); + val2 = _mm_extract_epi32(sad_r0, 2); + pi4_sad[0] = (val1 + val2); + + val1 = _mm_extract_epi32(sad_r1, 0); + val2 = _mm_extract_epi32(sad_r1, 2); + pi4_sad[1] = (val1 + val2); + + val1 = _mm_extract_epi32(sad_r2, 0); + val2 = _mm_extract_epi32(sad_r2, 2); + pi4_sad[2] = (val1 + val2); + + val1 = _mm_extract_epi32(sad_r3, 0); + val2 = _mm_extract_epi32(sad_r3, 2); + pi4_sad[3] = (val1 + val2); +} + +/** +****************************************************************************** +* +* @brief computes distortion (SAD) at all subpel points about the src location +* +* @par Description +* This functions computes SAD at all points at a subpel distance from the +* current source location. +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[out] pu1_ref_half_x +* UWORD8 pointer to half pel buffer +* +* @param[out] pu1_ref_half_y +* UWORD8 pointer to half pel buffer +* +* @param[out] pu1_ref_half_xy +* UWORD8 pointer to half pel buffer +* +* @param[in] src_strd +* integer source stride +* +* @param[in] ref_strd +* integer ref stride +* +* @param[out] pi4_sad +* integer evaluated sad +* pi4_sad[0] - half x +* pi4_sad[1] - half x - 1 +* pi4_sad[2] - half y +* pi4_sad[3] - half y - 1 +* pi4_sad[4] - half xy +* pi4_sad[5] - half xy - 1 +* pi4_sad[6] - half xy - strd +* pi4_sad[7] - half xy - 1 - strd +* +* @remarks +* +****************************************************************************** +*/ +void ime_sub_pel_compute_sad_16x16_sse42(UWORD8 *pu1_src, + UWORD8 *pu1_ref_half_x, + UWORD8 *pu1_ref_half_y, + UWORD8 *pu1_ref_half_xy, + WORD32 src_strd, + WORD32 ref_strd, + WORD32 *pi4_sad) +{ + UWORD8 *pu1_ref_half_x_left = pu1_ref_half_x - 1; + UWORD8 *pu1_ref_half_y_top = pu1_ref_half_y - ref_strd; + UWORD8 *pu1_ref_half_xy_left = pu1_ref_half_xy - 1; + UWORD8 *pu1_ref_half_xy_top = pu1_ref_half_xy - ref_strd; + UWORD8 *pu1_ref_half_xy_top_left = pu1_ref_half_xy - ref_strd - 1; + WORD32 val1, val2; + + __m128i src, ref_half_x, ref_half_y, ref_half_xy; + __m128i ref_half_x_left, ref_half_y_top, ref_half_xy_left, ref_half_xy_top, ref_half_xy_top_left; + __m128i res_r0, res_r1, res_r2, res_r3, res_r4, res_r5, res_r6, res_r7; + __m128i sad_r0, sad_r1, sad_r2, sad_r3, sad_r4, sad_r5, sad_r6, sad_r7; + // Row 0 sad calculation + src = _mm_loadu_si128((__m128i *) (pu1_src)); + ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x)); + ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y)); + ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy)); + ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left)); + ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top)); + ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left)); + ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top)); + ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left)); + + sad_r0 = _mm_sad_epu8(src, ref_half_x); + sad_r1 = _mm_sad_epu8(src, ref_half_x_left); + sad_r2 = _mm_sad_epu8(src, ref_half_y); + sad_r3 = _mm_sad_epu8(src, ref_half_y_top); + sad_r4 = _mm_sad_epu8(src, ref_half_xy); + sad_r5 = _mm_sad_epu8(src, ref_half_xy_left); + sad_r6 = _mm_sad_epu8(src, ref_half_xy_top); + sad_r7 = _mm_sad_epu8(src, ref_half_xy_top_left); + + pu1_src += src_strd; + pu1_ref_half_x += ref_strd; + pu1_ref_half_x_left += ref_strd; + pu1_ref_half_y += ref_strd; + pu1_ref_half_y_top += ref_strd; + pu1_ref_half_xy += ref_strd; + pu1_ref_half_xy_left += ref_strd; + pu1_ref_half_xy_top += ref_strd; + pu1_ref_half_xy_top_left += ref_strd; + + // Row 1 sad calculation + src = _mm_loadu_si128((__m128i *) (pu1_src)); + ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x)); + ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y)); + ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy)); + ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left)); + ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top)); + ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left)); + ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top)); + ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left)); + + res_r0 = _mm_sad_epu8(src, ref_half_x); + res_r1 = _mm_sad_epu8(src, ref_half_x_left); + res_r2 = _mm_sad_epu8(src, ref_half_y); + res_r3 = _mm_sad_epu8(src, ref_half_y_top); + res_r4 = _mm_sad_epu8(src, ref_half_xy); + res_r5 = _mm_sad_epu8(src, ref_half_xy_left); + res_r6 = _mm_sad_epu8(src, ref_half_xy_top); + res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left); + + sad_r0 = _mm_add_epi64(sad_r0, res_r0); + sad_r1 = _mm_add_epi64(sad_r1, res_r1); + sad_r2 = _mm_add_epi64(sad_r2, res_r2); + sad_r3 = _mm_add_epi64(sad_r3, res_r3); + sad_r4 = _mm_add_epi64(sad_r4, res_r4); + sad_r5 = _mm_add_epi64(sad_r5, res_r5); + sad_r6 = _mm_add_epi64(sad_r6, res_r6); + sad_r7 = _mm_add_epi64(sad_r7, res_r7); + + pu1_src += src_strd; + pu1_ref_half_x += ref_strd; + pu1_ref_half_x_left += ref_strd; + pu1_ref_half_y += ref_strd; + pu1_ref_half_y_top += ref_strd; + pu1_ref_half_xy += ref_strd; + pu1_ref_half_xy_left += ref_strd; + pu1_ref_half_xy_top += ref_strd; + pu1_ref_half_xy_top_left += ref_strd; + + // Row 2 sad calculation + src = _mm_loadu_si128((__m128i *) (pu1_src)); + ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x)); + ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y)); + ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy)); + ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left)); + ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top)); + ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left)); + ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top)); + ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left)); + + res_r0 = _mm_sad_epu8(src, ref_half_x); + res_r1 = _mm_sad_epu8(src, ref_half_x_left); + res_r2 = _mm_sad_epu8(src, ref_half_y); + res_r3 = _mm_sad_epu8(src, ref_half_y_top); + res_r4 = _mm_sad_epu8(src, ref_half_xy); + res_r5 = _mm_sad_epu8(src, ref_half_xy_left); + res_r6 = _mm_sad_epu8(src, ref_half_xy_top); + res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left); + + sad_r0 = _mm_add_epi64(sad_r0, res_r0); + sad_r1 = _mm_add_epi64(sad_r1, res_r1); + sad_r2 = _mm_add_epi64(sad_r2, res_r2); + sad_r3 = _mm_add_epi64(sad_r3, res_r3); + sad_r4 = _mm_add_epi64(sad_r4, res_r4); + sad_r5 = _mm_add_epi64(sad_r5, res_r5); + sad_r6 = _mm_add_epi64(sad_r6, res_r6); + sad_r7 = _mm_add_epi64(sad_r7, res_r7); + + pu1_src += src_strd; + pu1_ref_half_x += ref_strd; + pu1_ref_half_x_left += ref_strd; + pu1_ref_half_y += ref_strd; + pu1_ref_half_y_top += ref_strd; + pu1_ref_half_xy += ref_strd; + pu1_ref_half_xy_left += ref_strd; + pu1_ref_half_xy_top += ref_strd; + pu1_ref_half_xy_top_left += ref_strd; + + // Row 3 sad calculation + src = _mm_loadu_si128((__m128i *) (pu1_src)); + ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x)); + ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y)); + ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy)); + ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left)); + ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top)); + ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left)); + ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top)); + ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left)); + + res_r0 = _mm_sad_epu8(src, ref_half_x); + res_r1 = _mm_sad_epu8(src, ref_half_x_left); + res_r2 = _mm_sad_epu8(src, ref_half_y); + res_r3 = _mm_sad_epu8(src, ref_half_y_top); + res_r4 = _mm_sad_epu8(src, ref_half_xy); + res_r5 = _mm_sad_epu8(src, ref_half_xy_left); + res_r6 = _mm_sad_epu8(src, ref_half_xy_top); + res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left); + + sad_r0 = _mm_add_epi64(sad_r0, res_r0); + sad_r1 = _mm_add_epi64(sad_r1, res_r1); + sad_r2 = _mm_add_epi64(sad_r2, res_r2); + sad_r3 = _mm_add_epi64(sad_r3, res_r3); + sad_r4 = _mm_add_epi64(sad_r4, res_r4); + sad_r5 = _mm_add_epi64(sad_r5, res_r5); + sad_r6 = _mm_add_epi64(sad_r6, res_r6); + sad_r7 = _mm_add_epi64(sad_r7, res_r7); + + pu1_src += src_strd; + pu1_ref_half_x += ref_strd; + pu1_ref_half_x_left += ref_strd; + pu1_ref_half_y += ref_strd; + pu1_ref_half_y_top += ref_strd; + pu1_ref_half_xy += ref_strd; + pu1_ref_half_xy_left += ref_strd; + pu1_ref_half_xy_top += ref_strd; + pu1_ref_half_xy_top_left += ref_strd; + + // Row 4 sad calculation + src = _mm_loadu_si128((__m128i *) (pu1_src)); + ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x)); + ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y)); + ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy)); + ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left)); + ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top)); + ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left)); + ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top)); + ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left)); + + res_r0 = _mm_sad_epu8(src, ref_half_x); + res_r1 = _mm_sad_epu8(src, ref_half_x_left); + res_r2 = _mm_sad_epu8(src, ref_half_y); + res_r3 = _mm_sad_epu8(src, ref_half_y_top); + res_r4 = _mm_sad_epu8(src, ref_half_xy); + res_r5 = _mm_sad_epu8(src, ref_half_xy_left); + res_r6 = _mm_sad_epu8(src, ref_half_xy_top); + res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left); + + sad_r0 = _mm_add_epi64(sad_r0, res_r0); + sad_r1 = _mm_add_epi64(sad_r1, res_r1); + sad_r2 = _mm_add_epi64(sad_r2, res_r2); + sad_r3 = _mm_add_epi64(sad_r3, res_r3); + sad_r4 = _mm_add_epi64(sad_r4, res_r4); + sad_r5 = _mm_add_epi64(sad_r5, res_r5); + sad_r6 = _mm_add_epi64(sad_r6, res_r6); + sad_r7 = _mm_add_epi64(sad_r7, res_r7); + + pu1_src += src_strd; + pu1_ref_half_x += ref_strd; + pu1_ref_half_x_left += ref_strd; + pu1_ref_half_y += ref_strd; + pu1_ref_half_y_top += ref_strd; + pu1_ref_half_xy += ref_strd; + pu1_ref_half_xy_left += ref_strd; + pu1_ref_half_xy_top += ref_strd; + pu1_ref_half_xy_top_left += ref_strd; + + + // Row 5 sad calculation + src = _mm_loadu_si128((__m128i *) (pu1_src)); + ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x)); + ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y)); + ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy)); + ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left)); + ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top)); + ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left)); + ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top)); + ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left)); + + res_r0 = _mm_sad_epu8(src, ref_half_x); + res_r1 = _mm_sad_epu8(src, ref_half_x_left); + res_r2 = _mm_sad_epu8(src, ref_half_y); + res_r3 = _mm_sad_epu8(src, ref_half_y_top); + res_r4 = _mm_sad_epu8(src, ref_half_xy); + res_r5 = _mm_sad_epu8(src, ref_half_xy_left); + res_r6 = _mm_sad_epu8(src, ref_half_xy_top); + res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left); + + sad_r0 = _mm_add_epi64(sad_r0, res_r0); + sad_r1 = _mm_add_epi64(sad_r1, res_r1); + sad_r2 = _mm_add_epi64(sad_r2, res_r2); + sad_r3 = _mm_add_epi64(sad_r3, res_r3); + sad_r4 = _mm_add_epi64(sad_r4, res_r4); + sad_r5 = _mm_add_epi64(sad_r5, res_r5); + sad_r6 = _mm_add_epi64(sad_r6, res_r6); + sad_r7 = _mm_add_epi64(sad_r7, res_r7); + + pu1_src += src_strd; + pu1_ref_half_x += ref_strd; + pu1_ref_half_x_left += ref_strd; + pu1_ref_half_y += ref_strd; + pu1_ref_half_y_top += ref_strd; + pu1_ref_half_xy += ref_strd; + pu1_ref_half_xy_left += ref_strd; + pu1_ref_half_xy_top += ref_strd; + pu1_ref_half_xy_top_left += ref_strd; + + // Row 6 sad calculation + src = _mm_loadu_si128((__m128i *) (pu1_src)); + ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x)); + ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y)); + ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy)); + ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left)); + ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top)); + ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left)); + ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top)); + ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left)); + + res_r0 = _mm_sad_epu8(src, ref_half_x); + res_r1 = _mm_sad_epu8(src, ref_half_x_left); + res_r2 = _mm_sad_epu8(src, ref_half_y); + res_r3 = _mm_sad_epu8(src, ref_half_y_top); + res_r4 = _mm_sad_epu8(src, ref_half_xy); + res_r5 = _mm_sad_epu8(src, ref_half_xy_left); + res_r6 = _mm_sad_epu8(src, ref_half_xy_top); + res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left); + + sad_r0 = _mm_add_epi64(sad_r0, res_r0); + sad_r1 = _mm_add_epi64(sad_r1, res_r1); + sad_r2 = _mm_add_epi64(sad_r2, res_r2); + sad_r3 = _mm_add_epi64(sad_r3, res_r3); + sad_r4 = _mm_add_epi64(sad_r4, res_r4); + sad_r5 = _mm_add_epi64(sad_r5, res_r5); + sad_r6 = _mm_add_epi64(sad_r6, res_r6); + sad_r7 = _mm_add_epi64(sad_r7, res_r7); + + pu1_src += src_strd; + pu1_ref_half_x += ref_strd; + pu1_ref_half_x_left += ref_strd; + pu1_ref_half_y += ref_strd; + pu1_ref_half_y_top += ref_strd; + pu1_ref_half_xy += ref_strd; + pu1_ref_half_xy_left += ref_strd; + pu1_ref_half_xy_top += ref_strd; + pu1_ref_half_xy_top_left += ref_strd; + + // Row 7 sad calculation + src = _mm_loadu_si128((__m128i *) (pu1_src)); + ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x)); + ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y)); + ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy)); + ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left)); + ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top)); + ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left)); + ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top)); + ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left)); + + res_r0 = _mm_sad_epu8(src, ref_half_x); + res_r1 = _mm_sad_epu8(src, ref_half_x_left); + res_r2 = _mm_sad_epu8(src, ref_half_y); + res_r3 = _mm_sad_epu8(src, ref_half_y_top); + res_r4 = _mm_sad_epu8(src, ref_half_xy); + res_r5 = _mm_sad_epu8(src, ref_half_xy_left); + res_r6 = _mm_sad_epu8(src, ref_half_xy_top); + res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left); + + sad_r0 = _mm_add_epi64(sad_r0, res_r0); + sad_r1 = _mm_add_epi64(sad_r1, res_r1); + sad_r2 = _mm_add_epi64(sad_r2, res_r2); + sad_r3 = _mm_add_epi64(sad_r3, res_r3); + sad_r4 = _mm_add_epi64(sad_r4, res_r4); + sad_r5 = _mm_add_epi64(sad_r5, res_r5); + sad_r6 = _mm_add_epi64(sad_r6, res_r6); + sad_r7 = _mm_add_epi64(sad_r7, res_r7); + + pu1_src += src_strd; + pu1_ref_half_x += ref_strd; + pu1_ref_half_x_left += ref_strd; + pu1_ref_half_y += ref_strd; + pu1_ref_half_y_top += ref_strd; + pu1_ref_half_xy += ref_strd; + pu1_ref_half_xy_left += ref_strd; + pu1_ref_half_xy_top += ref_strd; + pu1_ref_half_xy_top_left += ref_strd; + + // Row 8 sad calculation + src = _mm_loadu_si128((__m128i *) (pu1_src)); + ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x)); + ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y)); + ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy)); + ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left)); + ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top)); + ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left)); + ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top)); + ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left)); + + res_r0 = _mm_sad_epu8(src, ref_half_x); + res_r1 = _mm_sad_epu8(src, ref_half_x_left); + res_r2 = _mm_sad_epu8(src, ref_half_y); + res_r3 = _mm_sad_epu8(src, ref_half_y_top); + res_r4 = _mm_sad_epu8(src, ref_half_xy); + res_r5 = _mm_sad_epu8(src, ref_half_xy_left); + res_r6 = _mm_sad_epu8(src, ref_half_xy_top); + res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left); + + sad_r0 = _mm_add_epi64(sad_r0, res_r0); + sad_r1 = _mm_add_epi64(sad_r1, res_r1); + sad_r2 = _mm_add_epi64(sad_r2, res_r2); + sad_r3 = _mm_add_epi64(sad_r3, res_r3); + sad_r4 = _mm_add_epi64(sad_r4, res_r4); + sad_r5 = _mm_add_epi64(sad_r5, res_r5); + sad_r6 = _mm_add_epi64(sad_r6, res_r6); + sad_r7 = _mm_add_epi64(sad_r7, res_r7); + + pu1_src += src_strd; + pu1_ref_half_x += ref_strd; + pu1_ref_half_x_left += ref_strd; + pu1_ref_half_y += ref_strd; + pu1_ref_half_y_top += ref_strd; + pu1_ref_half_xy += ref_strd; + pu1_ref_half_xy_left += ref_strd; + pu1_ref_half_xy_top += ref_strd; + pu1_ref_half_xy_top_left += ref_strd; + + // Row 9 sad calculation + src = _mm_loadu_si128((__m128i *) (pu1_src)); + ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x)); + ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y)); + ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy)); + ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left)); + ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top)); + ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left)); + ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top)); + ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left)); + + res_r0 = _mm_sad_epu8(src, ref_half_x); + res_r1 = _mm_sad_epu8(src, ref_half_x_left); + res_r2 = _mm_sad_epu8(src, ref_half_y); + res_r3 = _mm_sad_epu8(src, ref_half_y_top); + res_r4 = _mm_sad_epu8(src, ref_half_xy); + res_r5 = _mm_sad_epu8(src, ref_half_xy_left); + res_r6 = _mm_sad_epu8(src, ref_half_xy_top); + res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left); + + sad_r0 = _mm_add_epi64(sad_r0, res_r0); + sad_r1 = _mm_add_epi64(sad_r1, res_r1); + sad_r2 = _mm_add_epi64(sad_r2, res_r2); + sad_r3 = _mm_add_epi64(sad_r3, res_r3); + sad_r4 = _mm_add_epi64(sad_r4, res_r4); + sad_r5 = _mm_add_epi64(sad_r5, res_r5); + sad_r6 = _mm_add_epi64(sad_r6, res_r6); + sad_r7 = _mm_add_epi64(sad_r7, res_r7); + + pu1_src += src_strd; + pu1_ref_half_x += ref_strd; + pu1_ref_half_x_left += ref_strd; + pu1_ref_half_y += ref_strd; + pu1_ref_half_y_top += ref_strd; + pu1_ref_half_xy += ref_strd; + pu1_ref_half_xy_left += ref_strd; + pu1_ref_half_xy_top += ref_strd; + pu1_ref_half_xy_top_left += ref_strd; + + // Row 10 sad calculation + src = _mm_loadu_si128((__m128i *) (pu1_src)); + ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x)); + ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y)); + ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy)); + ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left)); + ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top)); + ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left)); + ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top)); + ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left)); + + res_r0 = _mm_sad_epu8(src, ref_half_x); + res_r1 = _mm_sad_epu8(src, ref_half_x_left); + res_r2 = _mm_sad_epu8(src, ref_half_y); + res_r3 = _mm_sad_epu8(src, ref_half_y_top); + res_r4 = _mm_sad_epu8(src, ref_half_xy); + res_r5 = _mm_sad_epu8(src, ref_half_xy_left); + res_r6 = _mm_sad_epu8(src, ref_half_xy_top); + res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left); + + sad_r0 = _mm_add_epi64(sad_r0, res_r0); + sad_r1 = _mm_add_epi64(sad_r1, res_r1); + sad_r2 = _mm_add_epi64(sad_r2, res_r2); + sad_r3 = _mm_add_epi64(sad_r3, res_r3); + sad_r4 = _mm_add_epi64(sad_r4, res_r4); + sad_r5 = _mm_add_epi64(sad_r5, res_r5); + sad_r6 = _mm_add_epi64(sad_r6, res_r6); + sad_r7 = _mm_add_epi64(sad_r7, res_r7); + + pu1_src += src_strd; + pu1_ref_half_x += ref_strd; + pu1_ref_half_x_left += ref_strd; + pu1_ref_half_y += ref_strd; + pu1_ref_half_y_top += ref_strd; + pu1_ref_half_xy += ref_strd; + pu1_ref_half_xy_left += ref_strd; + pu1_ref_half_xy_top += ref_strd; + pu1_ref_half_xy_top_left += ref_strd; + + // Row 11 sad calculation + src = _mm_loadu_si128((__m128i *) (pu1_src)); + ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x)); + ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y)); + ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy)); + ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left)); + ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top)); + ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left)); + ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top)); + ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left)); + + res_r0 = _mm_sad_epu8(src, ref_half_x); + res_r1 = _mm_sad_epu8(src, ref_half_x_left); + res_r2 = _mm_sad_epu8(src, ref_half_y); + res_r3 = _mm_sad_epu8(src, ref_half_y_top); + res_r4 = _mm_sad_epu8(src, ref_half_xy); + res_r5 = _mm_sad_epu8(src, ref_half_xy_left); + res_r6 = _mm_sad_epu8(src, ref_half_xy_top); + res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left); + + sad_r0 = _mm_add_epi64(sad_r0, res_r0); + sad_r1 = _mm_add_epi64(sad_r1, res_r1); + sad_r2 = _mm_add_epi64(sad_r2, res_r2); + sad_r3 = _mm_add_epi64(sad_r3, res_r3); + sad_r4 = _mm_add_epi64(sad_r4, res_r4); + sad_r5 = _mm_add_epi64(sad_r5, res_r5); + sad_r6 = _mm_add_epi64(sad_r6, res_r6); + sad_r7 = _mm_add_epi64(sad_r7, res_r7); + + pu1_src += src_strd; + pu1_ref_half_x += ref_strd; + pu1_ref_half_x_left += ref_strd; + pu1_ref_half_y += ref_strd; + pu1_ref_half_y_top += ref_strd; + pu1_ref_half_xy += ref_strd; + pu1_ref_half_xy_left += ref_strd; + pu1_ref_half_xy_top += ref_strd; + pu1_ref_half_xy_top_left += ref_strd; + + // Row 12 sad calculation + src = _mm_loadu_si128((__m128i *) (pu1_src)); + ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x)); + ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y)); + ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy)); + ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left)); + ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top)); + ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left)); + ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top)); + ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left)); + + res_r0 = _mm_sad_epu8(src, ref_half_x); + res_r1 = _mm_sad_epu8(src, ref_half_x_left); + res_r2 = _mm_sad_epu8(src, ref_half_y); + res_r3 = _mm_sad_epu8(src, ref_half_y_top); + res_r4 = _mm_sad_epu8(src, ref_half_xy); + res_r5 = _mm_sad_epu8(src, ref_half_xy_left); + res_r6 = _mm_sad_epu8(src, ref_half_xy_top); + res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left); + + sad_r0 = _mm_add_epi64(sad_r0, res_r0); + sad_r1 = _mm_add_epi64(sad_r1, res_r1); + sad_r2 = _mm_add_epi64(sad_r2, res_r2); + sad_r3 = _mm_add_epi64(sad_r3, res_r3); + sad_r4 = _mm_add_epi64(sad_r4, res_r4); + sad_r5 = _mm_add_epi64(sad_r5, res_r5); + sad_r6 = _mm_add_epi64(sad_r6, res_r6); + sad_r7 = _mm_add_epi64(sad_r7, res_r7); + + pu1_src += src_strd; + pu1_ref_half_x += ref_strd; + pu1_ref_half_x_left += ref_strd; + pu1_ref_half_y += ref_strd; + pu1_ref_half_y_top += ref_strd; + pu1_ref_half_xy += ref_strd; + pu1_ref_half_xy_left += ref_strd; + pu1_ref_half_xy_top += ref_strd; + pu1_ref_half_xy_top_left += ref_strd; + + // Row 13 sad calculation + src = _mm_loadu_si128((__m128i *) (pu1_src)); + ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x)); + ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y)); + ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy)); + ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left)); + ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top)); + ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left)); + ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top)); + ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left)); + + res_r0 = _mm_sad_epu8(src, ref_half_x); + res_r1 = _mm_sad_epu8(src, ref_half_x_left); + res_r2 = _mm_sad_epu8(src, ref_half_y); + res_r3 = _mm_sad_epu8(src, ref_half_y_top); + res_r4 = _mm_sad_epu8(src, ref_half_xy); + res_r5 = _mm_sad_epu8(src, ref_half_xy_left); + res_r6 = _mm_sad_epu8(src, ref_half_xy_top); + res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left); + + sad_r0 = _mm_add_epi64(sad_r0, res_r0); + sad_r1 = _mm_add_epi64(sad_r1, res_r1); + sad_r2 = _mm_add_epi64(sad_r2, res_r2); + sad_r3 = _mm_add_epi64(sad_r3, res_r3); + sad_r4 = _mm_add_epi64(sad_r4, res_r4); + sad_r5 = _mm_add_epi64(sad_r5, res_r5); + sad_r6 = _mm_add_epi64(sad_r6, res_r6); + sad_r7 = _mm_add_epi64(sad_r7, res_r7); + + pu1_src += src_strd; + pu1_ref_half_x += ref_strd; + pu1_ref_half_x_left += ref_strd; + pu1_ref_half_y += ref_strd; + pu1_ref_half_y_top += ref_strd; + pu1_ref_half_xy += ref_strd; + pu1_ref_half_xy_left += ref_strd; + pu1_ref_half_xy_top += ref_strd; + pu1_ref_half_xy_top_left += ref_strd; + + // Row 14 sad calculation + src = _mm_loadu_si128((__m128i *) (pu1_src)); + ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x)); + ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y)); + ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy)); + ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left)); + ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top)); + ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left)); + ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top)); + ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left)); + + res_r0 = _mm_sad_epu8(src, ref_half_x); + res_r1 = _mm_sad_epu8(src, ref_half_x_left); + res_r2 = _mm_sad_epu8(src, ref_half_y); + res_r3 = _mm_sad_epu8(src, ref_half_y_top); + res_r4 = _mm_sad_epu8(src, ref_half_xy); + res_r5 = _mm_sad_epu8(src, ref_half_xy_left); + res_r6 = _mm_sad_epu8(src, ref_half_xy_top); + res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left); + + sad_r0 = _mm_add_epi64(sad_r0, res_r0); + sad_r1 = _mm_add_epi64(sad_r1, res_r1); + sad_r2 = _mm_add_epi64(sad_r2, res_r2); + sad_r3 = _mm_add_epi64(sad_r3, res_r3); + sad_r4 = _mm_add_epi64(sad_r4, res_r4); + sad_r5 = _mm_add_epi64(sad_r5, res_r5); + sad_r6 = _mm_add_epi64(sad_r6, res_r6); + sad_r7 = _mm_add_epi64(sad_r7, res_r7); + + pu1_src += src_strd; + pu1_ref_half_x += ref_strd; + pu1_ref_half_x_left += ref_strd; + pu1_ref_half_y += ref_strd; + pu1_ref_half_y_top += ref_strd; + pu1_ref_half_xy += ref_strd; + pu1_ref_half_xy_left += ref_strd; + pu1_ref_half_xy_top += ref_strd; + pu1_ref_half_xy_top_left += ref_strd; + + // Row 15 sad calculation + src = _mm_loadu_si128((__m128i *) (pu1_src)); + ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x)); + ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y)); + ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy)); + ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left)); + ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top)); + ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left)); + ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top)); + ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left)); + + res_r0 = _mm_sad_epu8(src, ref_half_x); + res_r1 = _mm_sad_epu8(src, ref_half_x_left); + res_r2 = _mm_sad_epu8(src, ref_half_y); + res_r3 = _mm_sad_epu8(src, ref_half_y_top); + res_r4 = _mm_sad_epu8(src, ref_half_xy); + res_r5 = _mm_sad_epu8(src, ref_half_xy_left); + res_r6 = _mm_sad_epu8(src, ref_half_xy_top); + res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left); + + sad_r0 = _mm_add_epi64(sad_r0, res_r0); + sad_r1 = _mm_add_epi64(sad_r1, res_r1); + sad_r2 = _mm_add_epi64(sad_r2, res_r2); + sad_r3 = _mm_add_epi64(sad_r3, res_r3); + sad_r4 = _mm_add_epi64(sad_r4, res_r4); + sad_r5 = _mm_add_epi64(sad_r5, res_r5); + sad_r6 = _mm_add_epi64(sad_r6, res_r6); + sad_r7 = _mm_add_epi64(sad_r7, res_r7); + + val1 = _mm_extract_epi32(sad_r0, 0); + val2 = _mm_extract_epi32(sad_r0, 2); + pi4_sad[0] = (val1 + val2); + + val1 = _mm_extract_epi32(sad_r1, 0); + val2 = _mm_extract_epi32(sad_r1, 2); + pi4_sad[1] = (val1 + val2); + + val1 = _mm_extract_epi32(sad_r2, 0); + val2 = _mm_extract_epi32(sad_r2, 2); + pi4_sad[2] = (val1 + val2); + + val1 = _mm_extract_epi32(sad_r3, 0); + val2 = _mm_extract_epi32(sad_r3, 2); + pi4_sad[3] = (val1 + val2); + + val1 = _mm_extract_epi32(sad_r4, 0); + val2 = _mm_extract_epi32(sad_r4, 2); + pi4_sad[4] = (val1 + val2); + + val1 = _mm_extract_epi32(sad_r5, 0); + val2 = _mm_extract_epi32(sad_r5, 2); + pi4_sad[5] = (val1 + val2); + + val1 = _mm_extract_epi32(sad_r6, 0); + val2 = _mm_extract_epi32(sad_r6, 2); + pi4_sad[6] = (val1 + val2); + + val1 = _mm_extract_epi32(sad_r7, 0); + val2 = _mm_extract_epi32(sad_r7, 2); + pi4_sad[7] = (val1 + val2); + + return; +} +/* +* +* @brief This function computes SAD between two 16x16 blocks +* It also computes if the block will be zero after H264 transform and quant for +* Intra 16x16 blocks +* +* @param[in] pu1_src +* UWORD8 pointer to the source +* +* @param[out] pu1_dst +* UWORD8 pointer to the destination +* +* @param[in] src_strd +* integer source stride +* +* @param[in] dst_strd +* integer destination stride +* +* @param[in] pu2_thrsh +* Threshold for each element of transofrmed quantized block +* +* @param[out] pi4_mb_distortion +* integer evaluated sad +* +* @param[out] pu4_is_zero +* Poitner to store if the block is zero after transform and quantization +* +* @remarks +* +****************************************************************************** +*/ +void ime_compute_satqd_16x16_lumainter_sse42(UWORD8 *pu1_src, + UWORD8 *pu1_est, + WORD32 src_strd, + WORD32 est_strd, + UWORD16 *pu2_thrsh, + WORD32 *pi4_mb_distortion, + UWORD32 *pu4_is_zero) +{ + __m128i src_r0, src_r1, src_r2, src_r3; + __m128i est_r0, est_r1, est_r2, est_r3; + __m128i temp0, temp1, temp2, temp3, temp4; + __m128i zero = _mm_setzero_si128(); // all bits reset to zero + __m128i all_one = _mm_set1_epi8(0xFF); + __m128i sad_b1, sad_b2, threshold; + WORD16 sad_1, sad_2; + WORD32 i; + UWORD32 flag = 0; + WORD32 test1, test2; + threshold = _mm_loadu_si128((__m128i *) pu2_thrsh); + (*pi4_mb_distortion) = 0; + + for (i=0; i<4; i++) + { + src_r0 = _mm_loadl_epi64((__m128i *) pu1_src); //Row 0 - Block1 and 2 + src_r1 = _mm_loadl_epi64((__m128i *) (pu1_src + src_strd)); //Row 1 - Block1 and 2 + src_r2 = _mm_loadl_epi64((__m128i *) (pu1_src + 2 * src_strd)); //Row 2 - Block1 and 2 + src_r3 = _mm_loadl_epi64((__m128i *) (pu1_src + 3 * src_strd)); //Row 3 - Block1 and 2 + + src_r0 = _mm_cvtepu8_epi16(src_r0); + src_r1 = _mm_cvtepu8_epi16(src_r1); + src_r2 = _mm_cvtepu8_epi16(src_r2); + src_r3 = _mm_cvtepu8_epi16(src_r3); + + est_r0 = _mm_loadl_epi64((__m128i *) pu1_est); + est_r1 = _mm_loadl_epi64((__m128i *) (pu1_est + est_strd)); + est_r2 = _mm_loadl_epi64((__m128i *) (pu1_est + 2 * est_strd)); + est_r3 = _mm_loadl_epi64((__m128i *) (pu1_est + 3 * est_strd)); + + est_r0 = _mm_cvtepu8_epi16(est_r0); + est_r1 = _mm_cvtepu8_epi16(est_r1); + est_r2 = _mm_cvtepu8_epi16(est_r2); + est_r3 = _mm_cvtepu8_epi16(est_r3); + + src_r0 = _mm_sub_epi16(src_r0, est_r0); + src_r1 = _mm_sub_epi16(src_r1, est_r1); + src_r2 = _mm_sub_epi16(src_r2, est_r2); + src_r3 = _mm_sub_epi16(src_r3, est_r3); + + src_r0 = _mm_abs_epi16(src_r0); + src_r1 = _mm_abs_epi16(src_r1); + src_r2 = _mm_abs_epi16(src_r2); + src_r3 = _mm_abs_epi16(src_r3); + + src_r0 = _mm_add_epi16(src_r0, src_r3); //s1 s4 s4 s1 a1 a4 a4 a1 + src_r1 = _mm_add_epi16(src_r1, src_r2); //s2 s3 s3 s2 a2 a3 a3 a2 + + //SAD calculation + temp0 = _mm_add_epi16(src_r0, src_r1); //s1+s2 s4+s3 s4+s3 s1+s2 a1+a2 a4+a3 a4+a3 a1+a2 + temp0 = _mm_hadd_epi16(temp0, zero); + temp0 = _mm_hadd_epi16(temp0, zero); //sad1, sad2 - 16bit values + + sad_1 = _mm_extract_epi16(temp0, 0); + sad_2 = _mm_extract_epi16(temp0, 1); + + (*pi4_mb_distortion) += sad_1 + sad_2; + + if (flag == 0) { + sad_b1 = _mm_set1_epi16((sad_1 << 1)); + sad_b2 = _mm_set1_epi16((sad_2 << 1)); + + src_r0 = _mm_shufflelo_epi16(src_r0, 0x9c); //Block 0 s1 s1 s4 s4 a1 a4 a4 a1 + src_r0 = _mm_shufflehi_epi16(src_r0, 0x9c); //Block 1 s1 s1 s4 s4 a1 a1 a4 a4 + + src_r1 = _mm_shufflelo_epi16(src_r1, 0x9c); //Block 0 s2 s2 s3 s3 a2 a3 a3 a2 + src_r1 = _mm_shufflehi_epi16(src_r1, 0x9c); //Block 1 s2 s2 s3 s3 a2 a2 a3 a3 + + src_r0 = _mm_hadd_epi16(src_r0, zero); //s1 s4 a1 a4 0 0 0 0 + src_r1 = _mm_hadd_epi16(src_r1, zero); //s2 s3 a2 a3 0 0 0 0 + + temp0 = _mm_slli_epi16(src_r0, 1);//s1<<1 s4<<1 a1<<1 a4<<1 0 0 0 0 + temp1 = _mm_slli_epi16(src_r1, 1);//s2<<1 s3<<1 a2<<1 a3<<1 0 0 0 0 + + temp0 = _mm_shufflelo_epi16(temp0, 0xb1);//s4<<1 s1<<1 a4<<1 a1<<1 0 0 0 0 + temp1 = _mm_shufflelo_epi16(temp1, 0xb1);//s3<<1 s2<<1 a3<<1 a2<<1 0 0 0 0 + + temp2 = _mm_sub_epi16(src_r0, temp1);//(s1-s3<<1) (s4-s2<<1) (a1-a3<<1) (a4-a2<<1) 0 0 0 0 + temp3 = _mm_sub_epi16(src_r1, temp0);//(s2-s4<<1) (s3-s1<<1) (a2-a4<<1) (a3-a1<<1) 0 0 0 0 + + temp4 = _mm_add_epi16(src_r0, src_r1);//s1+s2 s4+s3 a1+a2 a4+a3 0 0 0 0 + + temp0 = _mm_hadd_epi16(src_r0, zero); //s1+s4 a1+a4 0 0 0 0 0 0 + temp1 = _mm_hadd_epi16(src_r1, zero); //s2+s3 a2+a3 0 0 0 0 0 0 + + temp0 = _mm_unpacklo_epi16(temp0, temp1);//s1+s4 s2+s3 a1+a4 a2+a3 0 0 0 0 + + temp0 = _mm_unpacklo_epi32(temp0, temp2);//s1+s4 s2+s3 (s1-s3<<1) (s4-s2<<1) a1+a4 a2+a3 (a1-a3<<1) (a4-a2<<1) + temp1 = _mm_unpacklo_epi32(temp4, temp3);//s1+s2 s4+s3 (s2-s4<<1) (s3-s1<<1) a1+a2 a4+a3 (a2-a4<<1) (a3-a1<<1) + + temp2 = _mm_unpacklo_epi64(temp0, temp1);//s1+s4 s2+s3 (s1-s3<<1) (s4-s2<<1) s1+s2 s4+s3 (s2-s4<<1) (s3-s1<<1) + temp3 = _mm_unpackhi_epi64(temp0, temp1); //a1+a4 a2+a3 (a1-a3<<1) (a4-a2<<1) a1+a2 a4+a3 (s2-s4<<1) (s3-s1<<1) + + sad_b1 = _mm_sub_epi16(sad_b1, temp2); //lsi values Block0 + sad_b2 = _mm_sub_epi16(sad_b2, temp3); //lsi values Block1 + + temp0 = _mm_cmpgt_epi16(threshold, sad_b1); //if any threshold[i]>ls[i], corresponding 16-bit value in temp becomes 0xffff + + temp1 = _mm_cmpgt_epi16(threshold, sad_b2); + + temp0 = _mm_xor_si128(temp0, all_one); //Xor with 1 => NOT operation + temp1 = _mm_xor_si128(temp1, all_one); + + test1 = _mm_test_all_zeros(temp0, all_one); + test2 = _mm_test_all_zeros(temp1, all_one); + + if (test1 == 0 || test2 == 0 || pu2_thrsh[8] <= sad_1 + || pu2_thrsh[8] <= sad_2) + flag = 1; + } + + pu1_src += 8; + pu1_est += 8; + + src_r0 = _mm_loadl_epi64((__m128i *) pu1_src); //Row 0 - Block1 and 2 + src_r1 = _mm_loadl_epi64((__m128i *) (pu1_src + src_strd)); //Row 1 - Block1 and 2 + src_r2 = _mm_loadl_epi64((__m128i *) (pu1_src + 2 * src_strd)); //Row 2 - Block1 and 2 + src_r3 = _mm_loadl_epi64((__m128i *) (pu1_src + 3 * src_strd)); //Row 3 - Block1 and 2 + + src_r0 = _mm_cvtepu8_epi16(src_r0); + src_r1 = _mm_cvtepu8_epi16(src_r1); + src_r2 = _mm_cvtepu8_epi16(src_r2); + src_r3 = _mm_cvtepu8_epi16(src_r3); + + est_r0 = _mm_loadl_epi64((__m128i *) pu1_est); + est_r1 = _mm_loadl_epi64((__m128i *) (pu1_est + est_strd)); + est_r2 = _mm_loadl_epi64((__m128i *) (pu1_est + 2 * est_strd)); + est_r3 = _mm_loadl_epi64((__m128i *) (pu1_est + 3 * est_strd)); + + est_r0 = _mm_cvtepu8_epi16(est_r0); + est_r1 = _mm_cvtepu8_epi16(est_r1); + est_r2 = _mm_cvtepu8_epi16(est_r2); + est_r3 = _mm_cvtepu8_epi16(est_r3); + + src_r0 = _mm_sub_epi16(src_r0, est_r0); + src_r1 = _mm_sub_epi16(src_r1, est_r1); + src_r2 = _mm_sub_epi16(src_r2, est_r2); + src_r3 = _mm_sub_epi16(src_r3, est_r3); + + src_r0 = _mm_abs_epi16(src_r0); + src_r1 = _mm_abs_epi16(src_r1); + src_r2 = _mm_abs_epi16(src_r2); + src_r3 = _mm_abs_epi16(src_r3); + + src_r0 = _mm_add_epi16(src_r0, src_r3); //s1 s4 s4 s1 a1 a4 a4 a1 + src_r1 = _mm_add_epi16(src_r1, src_r2); //s2 s3 s3 s2 a2 a3 a3 a2 + + //SAD calculation + temp0 = _mm_add_epi16(src_r0, src_r1); + temp0 = _mm_hadd_epi16(temp0, zero); + temp0 = _mm_hadd_epi16(temp0, zero); //sad1, sad2 - 16bit values + + sad_1 = _mm_extract_epi16(temp0, 0); + sad_2 = _mm_extract_epi16(temp0, 1); + + (*pi4_mb_distortion) += sad_1 + sad_2; + + if (flag == 0) { + sad_b1 = _mm_set1_epi16((sad_1 << 1)); + sad_b2 = _mm_set1_epi16((sad_2 << 1)); + + src_r0 = _mm_shufflelo_epi16(src_r0, 0x9c); //Block 0 s1 s1 s4 s4 a1 a4 a4 a1 + src_r0 = _mm_shufflehi_epi16(src_r0, 0x9c); //Block 1 s1 s1 s4 s4 a1 a1 a4 a4 + + src_r1 = _mm_shufflelo_epi16(src_r1, 0x9c); //Block 0 s2 s2 s3 s3 a2 a3 a3 a2 + src_r1 = _mm_shufflehi_epi16(src_r1, 0x9c); //Block 1 s2 s2 s3 s3 a2 a2 a3 a3 + + src_r0 = _mm_hadd_epi16(src_r0, zero); //s1 s4 a1 a4 0 0 0 0 + src_r1 = _mm_hadd_epi16(src_r1, zero); //s2 s3 a2 a3 0 0 0 0 + + temp0 = _mm_slli_epi16(src_r0, 1);//s1<<1 s4<<1 a1<<1 a4<<1 0 0 0 0 + temp1 = _mm_slli_epi16(src_r1, 1);//s2<<1 s3<<1 a2<<1 a3<<1 0 0 0 0 + + temp0 = _mm_shufflelo_epi16(temp0, 0xb1);//s4<<1 s1<<1 a4<<1 a1<<1 0 0 0 0 + temp1 = _mm_shufflelo_epi16(temp1, 0xb1);//s3<<1 s2<<1 a3<<1 a2<<1 0 0 0 0 + + temp2 = _mm_sub_epi16(src_r0, temp1);//(s1-s3<<1) (s4-s2<<1) (a1-a3<<1) (a4-a2<<1) 0 0 0 0 + temp3 = _mm_sub_epi16(src_r1, temp0);//(s2-s4<<1) (s3-s1<<1) (a2-a4<<1) (a3-a1<<1) 0 0 0 0 + + temp4 = _mm_add_epi16(src_r0, src_r1);//s1+s2 s4+s3 a1+a2 a4+a3 0 0 0 0 + + temp0 = _mm_hadd_epi16(src_r0, zero); //s1+s4 a1+a4 0 0 0 0 0 0 + temp1 = _mm_hadd_epi16(src_r1, zero); //s2+s3 a2+a3 0 0 0 0 0 0 + + temp0 = _mm_unpacklo_epi16(temp0, temp1);//s1+s4 s2+s3 a1+a4 a2+a3 0 0 0 0 + + temp0 = _mm_unpacklo_epi32(temp0, temp2);//s1+s4 s2+s3 (s1-s3<<1) (s4-s2<<1) a1+a4 a2+a3 (a1-a3<<1) (a4-a2<<1) + temp1 = _mm_unpacklo_epi32(temp4, temp3);//s1+s2 s4+s3 (s2-s4<<1) (s3-s1<<1) a1+a2 a4+a3 (a2-a4<<1) (a3-a1<<1) + + temp2 = _mm_unpacklo_epi64(temp0, temp1);//s1+s4 s2+s3 (s1-s3<<1) (s4-s2<<1) s1+s2 s4+s3 (s2-s4<<1) (s3-s1<<1) + temp3 = _mm_unpackhi_epi64(temp0, temp1); //a1+a4 a2+a3 (a1-a3<<1) (a4-a2<<1) a1+a2 a4+a3 (s2-s4<<1) (s3-s1<<1) + + sad_b1 = _mm_sub_epi16(sad_b1, temp2); //lsi values Block0 + sad_b2 = _mm_sub_epi16(sad_b2, temp3); //lsi values Block1 + + temp0 = _mm_cmpgt_epi16(threshold, sad_b1); //if any threshold[i]>ls[i], corresponding 16-bit value in temp becomes 0xffff + + temp1 = _mm_cmpgt_epi16(threshold, sad_b2); + + temp0 = _mm_xor_si128(temp0, all_one); //Xor with 1 => NOT operation + temp1 = _mm_xor_si128(temp1, all_one); + + test1 = _mm_test_all_zeros(temp0, all_one); + test2 = _mm_test_all_zeros(temp1, all_one); + + if (test1 == 0 || test2 == 0 || pu2_thrsh[8] <= sad_1 + || pu2_thrsh[8] <= sad_2) + flag = 1; + } + + pu1_src += 4*src_strd - 8; + pu1_est += 4*est_strd - 8; + } + + *pu4_is_zero = flag; +} diff --git a/encoder/x86/ime_platform_macros.h b/encoder/x86/ime_platform_macros.h new file mode 100755 index 0000000..18e2e8f --- /dev/null +++ b/encoder/x86/ime_platform_macros.h @@ -0,0 +1,52 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ime_platform_macros.h +* +* @brief +* Platform specific Macro definitions used in the codec +* +* @author +* Ittiam +* +* @remarks +* None +* +******************************************************************************* +*/ + + +#ifndef _IME_PLATFORM_MACROS_H_ +#define _IME_PLATFORM_MACROS_H_ + +/*****************************************************************************/ +/* Function macro definitions */ +/*****************************************************************************/ + +#define USADA8(src,est,sad) \ + sad += ABS(src[0]-est[0]) + \ + ABS(src[1]-est[1]) + \ + ABS(src[2]-est[2]) + \ + ABS(src[3]-est[3]) + + +#endif /* _IH264_PLATFORM_MACROS_H_ */ diff --git a/test/Android.mk b/test/Android.mk new file mode 100755 index 0000000..0085832 --- /dev/null +++ b/test/Android.mk @@ -0,0 +1,8 @@ +LOCAL_PATH := $(call my-dir) +include $(CLEAR_VARS) + +# encoder +include $(LOCAL_PATH)/encoder.mk + +# decoder +include $(LOCAL_PATH)/decoder.mk diff --git a/test/decoder.mk b/test/decoder.mk new file mode 100755 index 0000000..1a49a92 --- /dev/null +++ b/test/decoder.mk @@ -0,0 +1,13 @@ +LOCAL_PATH := $(call my-dir) + +include $(CLEAR_VARS) + +LOCAL_MODULE := avcdec +LOCAL_MODULE_TAGS := optional + +LOCAL_CFLAGS := -DPROFILE_ENABLE -DARM -DMD5_DISABLE -fPIC +LOCAL_C_INCLUDES += $(LOCAL_PATH)/../decoder $(LOCAL_PATH)/../common $(LOCAL_PATH)/decoder/ +LOCAL_SRC_FILES := decoder/main.c +LOCAL_STATIC_LIBRARIES := libavcdec + +include $(BUILD_EXECUTABLE) diff --git a/test/decoder/main.c b/test/decoder/main.c new file mode 100755 index 0000000..0076ce9 --- /dev/null +++ b/test/decoder/main.c @@ -0,0 +1,3196 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*****************************************************************************/ +/* */ +/* File Name : main.c */ +/* */ +/* Description : Contains an application that demonstrates use of H264*/ +/* decoder API */ +/* */ +/* List of Functions : */ +/* */ +/* Issues / Problems : None */ +/* */ +/* Revision History : */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 09 2012 Harish Initial Version */ +/*****************************************************************************/ +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> + +#ifdef X86_MINGW +#include <signal.h> +#endif + +#ifndef IOS +#include <malloc.h> +#endif +#ifdef IOS_DISPLAY +#include "cast_types.h" +#else +#include "ih264_typedefs.h" +#endif + +#include "iv.h" +#include "ivd.h" +#include "ih264d.h" +#include "ithread.h" + +#ifdef WINDOWS_TIMER +#include <windows.h> +#else +#include <sys/time.h> +#endif + +#define ALIGN8(x) ((((x) + 7) >> 3) << 3) +#define NUM_DISPLAY_BUFFERS 4 +#define DEFAULT_FPS 30 + +#define ENABLE_DEGRADE 0 +#define MAX_DISP_BUFFERS 64 +#define EXTRA_DISP_BUFFERS 8 +#define STRLENGTH 1000 + +//#define TEST_FLUSH +#define FLUSH_FRM_CNT 100 +//#define APP_EXTRA_BUFS 1 + +#ifdef IOS +#define PATHLENMAX 500 +char filename_with_path[PATHLENMAX]; +#endif + +#ifdef PROFILE_ENABLE + #ifdef WINDOWS_TIMER + typedef LARGE_INTEGER TIMER; + #else + //#ifdef GCC_TIMER + typedef struct timeval TIMER; + //#endif + #endif +#else + typedef WORD32 TIMER; +#endif + +#ifdef PROFILE_ENABLE + #ifdef WINDOWS_TIMER + #define GETTIME(timer) QueryPerformanceCounter(timer); + #else + //#ifdef GCC_TIMER + #define GETTIME(timer) gettimeofday(timer,NULL); + //#endif + #endif + + #ifdef WINDOWS_TIMER + #define ELAPSEDTIME(s_start_timer,s_end_timer, s_elapsed_time, frequency) \ + { \ + TIMER s_temp_time; \ + s_temp_time.LowPart = s_end_timer.LowPart - s_start_timer.LowPart ; \ + s_elapsed_time = (UWORD32) ( ((DOUBLE)s_temp_time.LowPart / (DOUBLE)frequency.LowPart ) * 1000000); \ + } + #else + //#ifdef GCC_TIMER + #define ELAPSEDTIME(s_start_timer,s_end_timer, s_elapsed_time, frequency) \ + s_elapsed_time = ((s_end_timer.tv_sec - s_start_timer.tv_sec) * 1000000) + (s_end_timer.tv_usec - s_start_timer.tv_usec); + //#endif + #endif + +#else + #define GETTIME(timer) + #define ELAPSEDTIME(s_start_timer,s_end_timer, s_elapsed_time, frequency) +#endif + + +/* Function declarations */ +#ifndef MD5_DISABLE +void calc_md5_cksum(UWORD8 *pu1_inbuf,UWORD32 u4_stride,UWORD32 u4_width,UWORD32 u4_height,UWORD8 *pu1_cksum_p ); +#else +#define calc_md5_cksum(a, b, c, d, e) +#endif +#ifdef SDL_DISPLAY +void* sdl_disp_init(UWORD32, UWORD32, WORD32, WORD32, WORD32, WORD32, WORD32, WORD32 *, WORD32 *); +void sdl_alloc_disp_buffers(void *); +void sdl_display(void *, WORD32 ); +void sdl_set_disp_buffers(void *, WORD32, UWORD8 **, UWORD8 **, UWORD8 **); +void sdl_disp_deinit(void *); +void sdl_disp_usleep(UWORD32); +IV_COLOR_FORMAT_T sdl_get_color_fmt(void); +UWORD32 sdl_get_stride(void); +#endif + +#ifdef INTEL_CE5300 +void* gdl_disp_init(UWORD32, UWORD32, WORD32, WORD32, WORD32, WORD32, WORD32, WORD32 *, WORD32 *); +void gdl_alloc_disp_buffers(void *); +void gdl_display(void *, WORD32 ); +void gdl_set_disp_buffers(void *, WORD32, UWORD8 **, UWORD8 **, UWORD8 **); +void gdl_disp_deinit(void *); +void gdl_disp_usleep(UWORD32); +IV_COLOR_FORMAT_T gdl_get_color_fmt(void); +UWORD32 gdl_get_stride(void); +#endif + +#ifdef FBDEV_DISPLAY +void* fbd_disp_init(UWORD32, UWORD32, WORD32, WORD32, WORD32, WORD32, WORD32, WORD32 *, WORD32 *); +void fbd_alloc_disp_buffers(void *); +void fbd_display(void *, WORD32 ); +void fbd_set_disp_buffers(void *, WORD32, UWORD8 **, UWORD8 **, UWORD8 **); +void fbd_disp_deinit(void *); +void fbd_disp_usleep(UWORD32); +IV_COLOR_FORMAT_T fbd_get_color_fmt(void); +UWORD32 fbd_get_stride(void); +#endif + +#ifdef IOS_DISPLAY +void* ios_disp_init(UWORD32, UWORD32, WORD32, WORD32, WORD32, WORD32, WORD32, WORD32 *, WORD32 *); +void ios_alloc_disp_buffers(void *); +void ios_display(void *, WORD32 ); +void ios_set_disp_buffers(void *, WORD32, UWORD8 **, UWORD8 **, UWORD8 **); +void ios_disp_deinit(void *); +void ios_disp_usleep(UWORD32); +IV_COLOR_FORMAT_T ios_get_color_fmt(void); +UWORD32 ios_get_stride(void); +#endif + +typedef struct +{ + UWORD32 u4_piclen_flag; + UWORD32 u4_file_save_flag; + UWORD32 u4_chksum_save_flag; + UWORD32 u4_max_frm_ts; + IV_COLOR_FORMAT_T e_output_chroma_format; + IVD_ARCH_T e_arch; + IVD_SOC_T e_soc; + UWORD32 dump_q_rd_idx; + UWORD32 dump_q_wr_idx; + WORD32 disp_q_wr_idx; + WORD32 disp_q_rd_idx; + + void *cocodec_obj; + UWORD32 u4_share_disp_buf; + UWORD32 num_disp_buf; + UWORD32 b_pic_present; + UWORD32 u4_disable_dblk_level; + WORD32 i4_degrade_type; + WORD32 i4_degrade_pics; + UWORD32 u4_num_cores; + UWORD32 disp_delay; + WORD32 trace_enable; + CHAR ac_trace_fname[STRLENGTH]; + CHAR ac_piclen_fname[STRLENGTH]; + CHAR ac_ip_fname[STRLENGTH]; + CHAR ac_op_fname[STRLENGTH]; + CHAR ac_op_chksum_fname[STRLENGTH]; + ivd_out_bufdesc_t s_disp_buffers[MAX_DISP_BUFFERS]; + iv_yuv_buf_t s_disp_frm_queue[MAX_DISP_BUFFERS]; + UWORD32 s_disp_frm_id_queue[MAX_DISP_BUFFERS]; + UWORD32 loopback; + UWORD32 display; + UWORD32 full_screen; + UWORD32 fps; + UWORD32 max_wd; + UWORD32 max_ht; + UWORD32 max_level; + + UWORD32 u4_strd; + + /* For signalling to display thread */ + UWORD32 u4_pic_wd; + UWORD32 u4_pic_ht; + + /* For IOS diplay */ + WORD32 i4_screen_wd; + WORD32 i4_screen_ht; + + //UWORD32 u4_output_present; + WORD32 quit; + WORD32 paused; + + + void *pv_disp_ctx; + void *display_thread_handle; + WORD32 display_thread_created; + volatile WORD32 display_init_done; + volatile WORD32 display_deinit_flag; + + void *(*disp_init)(UWORD32, UWORD32, WORD32, WORD32, WORD32, WORD32, WORD32, WORD32 *, WORD32 *); + void (*alloc_disp_buffers)(void *); + void (*display_buffer)(void *, WORD32); + void (*set_disp_buffers)(void *, WORD32, UWORD8 **, UWORD8 **, UWORD8 **); + void (*disp_deinit)(void *); + void (*disp_usleep)(UWORD32); + IV_COLOR_FORMAT_T (*get_color_fmt)(void); + UWORD32 (*get_stride)(void); +} vid_dec_ctx_t; + + + +typedef enum +{ + INVALID, + HELP, + VERSION, + INPUT_FILE, + OUTPUT, + CHKSUM, + SAVE_OUTPUT, + SAVE_CHKSUM, + CHROMA_FORMAT, + NUM_FRAMES, + NUM_CORES, + DISABLE_DEBLOCK_LEVEL, + SHARE_DISPLAY_BUF, + LOOPBACK, + DISPLAY, + FULLSCREEN, + FPS, + TRACE, + MAX_WD, + MAX_HT, + MAX_LEVEL, + CONFIG, + + DEGRADE_TYPE, + DEGRADE_PICS, + ARCH, + SOC, + PICLEN, + PICLEN_FILE, +} ARGUMENT_T; + +typedef struct +{ + CHAR argument_shortname[4]; + CHAR argument_name[128]; + ARGUMENT_T argument; + CHAR description[512]; +} argument_t; + +static const argument_t argument_mapping[] = +{ + {"-h", "--help", HELP, + "Print this help\n"}, + { "-c", "--config", CONFIG, + "config file (Default: test.cfg)\n" }, + + {"-v", "--version", VERSION, + "Version information\n"}, + {"-i", "--input", INPUT_FILE, + "Input file\n"}, + {"-o", "--output", OUTPUT, + "Output file\n"}, + {"--", "--piclen", PICLEN, + "Flag to signal if the decoder has to use a file containing number of bytes in each picture to be fed in each call\n"}, + {"--", "--piclen_file", PICLEN_FILE, + "File containing number of bytes in each picture - each line containing one i4_size\n"}, + {"--", "--chksum", CHKSUM, + "Output MD5 Checksum file\n"}, + { "-s", "--save_output", SAVE_OUTPUT, + "Save Output file\n" }, + { "--", "--save_chksum", SAVE_CHKSUM, + "Save Check sum file\n" }, + {"--", "--chroma_format", CHROMA_FORMAT, + "Output Chroma format Supported values YUV_420P, YUV_422ILE, RGB_565, YUV_420SP_UV, YUV_420SP_VU\n" }, + { "-n", "--num_frames", NUM_FRAMES, + "Number of frames to be decoded\n" }, + { "--", "--num_cores", NUM_CORES, + "Number of cores to be used\n" }, + { "--", "--share_display_buf", SHARE_DISPLAY_BUF, + "Enable shared display buffer mode\n" }, + {"--", "--disable_deblock_level", DISABLE_DEBLOCK_LEVEL, + "Disable deblocking level : 0 to 4 - 0 Enable deblocking 4 Disable deblocking completely\n"}, + { "--", "--loopback", LOOPBACK, + "Enable playback in a loop\n" }, + { "--", "--display", DISPLAY, + "Enable display (uses SDL)\n" }, + { "--", "--fullscreen", FULLSCREEN, + "Enable full screen (Only for GDL and SDL)\n" }, + { "--", "--fps", FPS, + "FPS to be used for display \n" }, + {"-i", "--trace", TRACE, + "Trace file\n"}, + { "--", "--max_wd", MAX_WD, + "Maximum width (Default: 2560) \n" }, + { "--", "--max_ht", MAX_HT, + "Maximum height (Default: 1600)\n" }, + + { "--", "--max_level", MAX_LEVEL, + "Maximum Decoder Level (Default: 50)\n" }, + + {"--", "--degrade_type", DEGRADE_TYPE, + "Degrade type : 0: No degrade 0th bit set : Disable SAO 1st bit set : Disable deblocking 2nd bit set : Faster inter prediction filters 3rd bit set : Fastest inter prediction filters\n" }, + {"--", "--degrade_pics", DEGRADE_PICS, + "Degrade pics : 0 : No degrade 1 : Only on non-reference frames 2 : Do not degrade every 4th or key frames 3 : All non-key frames 4 : All frames"}, + + {"--", "--arch", ARCH, + "Set Architecture. Supported values ARM_NONEON, ARM_A9Q, ARM_A7, ARM_A5, ARM_NEONINTR,ARMV8_GENERIC, X86_GENERIC, X86_SSSE3, X86_SSE4 \n" }, + {"--", "--soc", SOC, + "Set SOC. Supported values GENERIC, HISI_37X \n" }, + +}; + +#define PEAK_WINDOW_SIZE 8 +#define MAX_FRAME_WIDTH 2560 +#define MAX_FRAME_HEIGHT 1600 +#define MAX_LEVEL_SUPPORTED 50 +#define MAX_REF_FRAMES 16 +#define MAX_REORDER_FRAMES 16 +#define DEFAULT_SHARE_DISPLAY_BUF 0 +#define STRIDE 0 +#define DEFAULT_NUM_CORES 1 + + +#define DUMP_SINGLE_BUF 0 +#define IV_ISFATALERROR(x) (((x) >> IVD_FATALERROR) & 0x1) + +#define ivd_api_function ih264d_api_function + +#ifdef IOS +char filename_trace[PATHLENMAX]; +#endif + +#if ANDROID_NDK +/*****************************************************************************/ +/* */ +/* Function Name : raise */ +/* */ +/* Description : Needed as a workaround when the application is built in */ +/* Android NDK. This is an exception to be called for divide*/ +/* by zero error */ +/* */ +/* Inputs : a */ +/* Globals : */ +/* Processing : None */ +/* */ +/* Outputs : */ +/* Returns : */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 09 2012 100189 Initial Version */ +/* */ +/*****************************************************************************/ +int raise(int a) +{ + printf("Divide by zero\n"); + return 0; +} +#endif + +#ifdef _WIN32 +/*****************************************************************************/ +/* Function to print library calls */ +/*****************************************************************************/ +/*****************************************************************************/ +/* */ +/* Function Name : memalign */ +/* */ +/* Description : Returns malloc data. Ideally should return aligned memory*/ +/* support alignment will be added later */ +/* */ +/* Inputs : alignment */ +/* i4_size */ +/* Globals : */ +/* Processing : */ +/* */ +/* Outputs : */ +/* Returns : */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 09 2012 100189 Initial Version */ +/* */ +/*****************************************************************************/ + +void * ih264a_aligned_malloc(WORD32 alignment, WORD32 i4_size) +{ + return (void *)_aligned_malloc(i4_size, alignment); +} + +void ih264a_aligned_free(void *pv_buf) +{ + _aligned_free(pv_buf); + return; +} +#endif + +#if IOS +void * ih264a_aligned_malloc(WORD32 alignment, WORD32 i4_size) +{ + return malloc(i4_size); +} + +void ih264a_aligned_free(void *pv_buf) +{ + free(pv_buf); + return; +} +#endif + +#if (!defined(IOS)) && (!defined(_WIN32)) +void * ih264a_aligned_malloc(WORD32 alignment, WORD32 i4_size) +{ + return memalign(alignment, i4_size); +} + +void ih264a_aligned_free(void *pv_buf) +{ + free(pv_buf); + return; +} +#endif +/*****************************************************************************/ +/* */ +/* Function Name : set_degrade */ +/* */ +/* Description : Control call to set degrade level */ +/* */ +/* */ +/* Inputs : codec_obj - Codec Handle */ +/* type - degrade level value between 0 to 4 */ +/* 0 : No degrade */ +/* 1st bit : Disable SAO */ +/* 2nd bit : Disable Deblock */ +/* 3rd bit : Faster MC for non-ref */ +/* 4th bit : Fastest MC for non-ref */ +/* pics - Pictures that are are degraded */ +/* 0 : No degrade */ +/* 1 : Non-ref pictures */ +/* 2 : Pictures at given interval are not degraded */ +/* 3 : All non-key pictures */ +/* 4 : All pictures */ +/* Globals : */ +/* Processing : Calls degrade control to the codec */ +/* */ +/* Outputs : */ +/* Returns : Control call return i4_status */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 09 2012 100189 Initial Version */ +/* */ +/*****************************************************************************/ + +IV_API_CALL_STATUS_T set_degrade(void *codec_obj, UWORD32 type, WORD32 pics) +{ + ih264d_ctl_degrade_ip_t s_ctl_ip; + ih264d_ctl_degrade_op_t s_ctl_op; + void *pv_api_ip, *pv_api_op; + IV_API_CALL_STATUS_T e_dec_status; + + s_ctl_ip.u4_size = sizeof(ih264d_ctl_degrade_ip_t); + s_ctl_ip.i4_degrade_type = type; + s_ctl_ip.i4_nondegrade_interval = 4; + s_ctl_ip.i4_degrade_pics = pics; + + s_ctl_op.u4_size = sizeof(ih264d_ctl_degrade_op_t); + + pv_api_ip = (void *)&s_ctl_ip; + pv_api_op = (void *)&s_ctl_op; + + s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_ctl_ip.e_sub_cmd = (IVD_CONTROL_API_COMMAND_TYPE_T) IH264D_CMD_CTL_DEGRADE; + + e_dec_status = ivd_api_function((iv_obj_t *)codec_obj, pv_api_ip, pv_api_op); + + if(IV_SUCCESS != e_dec_status) + { + printf("Error in setting degrade level \n"); + } + return (e_dec_status); + +} + + + +/*****************************************************************************/ +/* */ +/* Function Name : enable_skipb_frames */ +/* */ +/* Description : Control call to enable skipping of b frames */ +/* */ +/* */ +/* Inputs : codec_obj : Codec handle */ +/* Globals : */ +/* Processing : Calls enable skip B frames control */ +/* */ +/* Outputs : */ +/* Returns : Control call return i4_status */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 09 2012 100189 Initial Version */ +/* */ +/*****************************************************************************/ + +IV_API_CALL_STATUS_T enable_skipb_frames(void *codec_obj, + vid_dec_ctx_t *ps_app_ctx) +{ + ivd_ctl_set_config_ip_t s_ctl_ip; + ivd_ctl_set_config_op_t s_ctl_op; + IV_API_CALL_STATUS_T e_dec_status; + + s_ctl_ip.u4_disp_wd = ps_app_ctx->u4_strd; + s_ctl_ip.e_frm_skip_mode = IVD_SKIP_B; + + s_ctl_ip.e_frm_out_mode = IVD_DISPLAY_FRAME_OUT; + s_ctl_ip.e_vid_dec_mode = IVD_DECODE_FRAME; + s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_SETPARAMS; + s_ctl_ip.u4_size = sizeof(ivd_ctl_set_config_ip_t); + s_ctl_op.u4_size = sizeof(ivd_ctl_set_config_op_t); + + e_dec_status = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_ip, + (void *)&s_ctl_op); + + if(IV_SUCCESS != e_dec_status) + { + printf("Error in Enable SkipB frames \n"); + } + + return e_dec_status; +} +/*****************************************************************************/ +/* */ +/* Function Name : disable_skipb_frames */ +/* */ +/* Description : Control call to disable skipping of b frames */ +/* */ +/* */ +/* Inputs : codec_obj : Codec handle */ +/* Globals : */ +/* Processing : Calls disable B frame skip control */ +/* */ +/* Outputs : */ +/* Returns : Control call return i4_status */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 09 2012 100189 Initial Version */ +/* */ +/*****************************************************************************/ + +IV_API_CALL_STATUS_T disable_skipb_frames(void *codec_obj, + vid_dec_ctx_t *ps_app_ctx) +{ + ivd_ctl_set_config_ip_t s_ctl_ip; + ivd_ctl_set_config_op_t s_ctl_op; + IV_API_CALL_STATUS_T e_dec_status; + + s_ctl_ip.u4_disp_wd = ps_app_ctx->u4_strd; + s_ctl_ip.e_frm_skip_mode = IVD_SKIP_NONE; + + s_ctl_ip.e_frm_out_mode = IVD_DISPLAY_FRAME_OUT; + s_ctl_ip.e_vid_dec_mode = IVD_DECODE_FRAME; + s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_SETPARAMS; + s_ctl_ip.u4_size = sizeof(ivd_ctl_set_config_ip_t); + s_ctl_op.u4_size = sizeof(ivd_ctl_set_config_op_t); + + e_dec_status = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_ip, + (void *)&s_ctl_op); + + if(IV_SUCCESS != e_dec_status) + { + printf("Error in Disable SkipB frames\n"); + } + + return e_dec_status; +} + +/*****************************************************************************/ +/* */ +/* Function Name : enable_skippb_frames */ +/* */ +/* Description : Control call to enable skipping of P & B frames */ +/* */ +/* */ +/* Inputs : codec_obj : Codec handle */ +/* Globals : */ +/* Processing : Calls enable skip P and B frames control */ +/* */ +/* Outputs : */ +/* Returns : Control call return i4_status */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 09 2012 100189 Initial Version */ +/* */ +/*****************************************************************************/ + +IV_API_CALL_STATUS_T enable_skippb_frames(void *codec_obj, + vid_dec_ctx_t *ps_app_ctx) +{ + ivd_ctl_set_config_ip_t s_ctl_ip; + ivd_ctl_set_config_op_t s_ctl_op; + IV_API_CALL_STATUS_T e_dec_status; + + s_ctl_ip.u4_disp_wd = ps_app_ctx->u4_strd; + s_ctl_ip.e_frm_skip_mode = IVD_SKIP_PB; + + s_ctl_ip.e_frm_out_mode = IVD_DISPLAY_FRAME_OUT; + s_ctl_ip.e_vid_dec_mode = IVD_DECODE_FRAME; + s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_SETPARAMS; + s_ctl_ip.u4_size = sizeof(ivd_ctl_set_config_ip_t); + s_ctl_op.u4_size = sizeof(ivd_ctl_set_config_op_t); + + e_dec_status = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_ip, + (void *)&s_ctl_op); + if(IV_SUCCESS != e_dec_status) + { + printf("Error in Enable SkipPB frames\n"); + } + + return e_dec_status; +} + +/*****************************************************************************/ +/* */ +/* Function Name : disable_skippb_frames */ +/* */ +/* Description : Control call to disable skipping of P and B frames */ +/* */ +/* */ +/* Inputs : codec_obj : Codec handle */ +/* Globals : */ +/* Processing : Calls disable P and B frame skip control */ +/* */ +/* Outputs : */ +/* Returns : Control call return i4_status */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 09 2012 100189 Initial Version */ +/* */ +/*****************************************************************************/ + +IV_API_CALL_STATUS_T disable_skippb_frames(void *codec_obj, + vid_dec_ctx_t *ps_app_ctx) +{ + ivd_ctl_set_config_ip_t s_ctl_ip; + ivd_ctl_set_config_op_t s_ctl_op; + IV_API_CALL_STATUS_T e_dec_status; + + s_ctl_ip.u4_disp_wd = ps_app_ctx->u4_strd; + s_ctl_ip.e_frm_skip_mode = IVD_SKIP_NONE; + + s_ctl_ip.e_frm_out_mode = IVD_DISPLAY_FRAME_OUT; + s_ctl_ip.e_vid_dec_mode = IVD_DECODE_FRAME; + s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_SETPARAMS; + s_ctl_ip.u4_size = sizeof(ivd_ctl_set_config_ip_t); + s_ctl_op.u4_size = sizeof(ivd_ctl_set_config_op_t); + + e_dec_status = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_ip, + (void *)&s_ctl_op); + if(IV_SUCCESS != e_dec_status) + { + printf("Error in Disable SkipPB frames\n"); + } + + return e_dec_status; +} + +/*****************************************************************************/ +/* */ +/* Function Name : release_disp_frame */ +/* */ +/* Description : Calls release display control - Used to signal to the */ +/* decoder that this particular buffer has been displayed */ +/* and that the codec is now free to write to this buffer */ +/* */ +/* */ +/* Inputs : codec_obj : Codec Handle */ +/* buf_id : Buffer Id of the buffer to be released */ +/* This id would have been returned earlier by */ +/* the codec */ +/* Globals : */ +/* Processing : Calls Release Display call */ +/* */ +/* Outputs : */ +/* Returns : Status of release display call */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 09 2012 100189 Initial Version */ +/* */ +/*****************************************************************************/ + +IV_API_CALL_STATUS_T release_disp_frame(void *codec_obj, UWORD32 buf_id) +{ + ivd_rel_display_frame_ip_t s_video_rel_disp_ip; + ivd_rel_display_frame_op_t s_video_rel_disp_op; + IV_API_CALL_STATUS_T e_dec_status; + + s_video_rel_disp_ip.e_cmd = IVD_CMD_REL_DISPLAY_FRAME; + s_video_rel_disp_ip.u4_size = sizeof(ivd_rel_display_frame_ip_t); + s_video_rel_disp_op.u4_size = sizeof(ivd_rel_display_frame_op_t); + s_video_rel_disp_ip.u4_disp_buf_id = buf_id; + + e_dec_status = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_video_rel_disp_ip, + (void *)&s_video_rel_disp_op); + if(IV_SUCCESS != e_dec_status) + { + printf("Error in Release Disp frame\n"); + } + + + return (e_dec_status); +} + +/*****************************************************************************/ +/* */ +/* Function Name : get_version */ +/* */ +/* Description : Control call to get codec version */ +/* */ +/* */ +/* Inputs : codec_obj : Codec handle */ +/* Globals : */ +/* Processing : Calls enable skip B frames control */ +/* */ +/* Outputs : */ +/* Returns : Control call return i4_status */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 09 2012 100189 Initial Version */ +/* */ +/*****************************************************************************/ + +IV_API_CALL_STATUS_T get_version(void *codec_obj) +{ + ivd_ctl_getversioninfo_ip_t ps_ctl_ip; + ivd_ctl_getversioninfo_op_t ps_ctl_op; + UWORD8 au1_buf[512]; + IV_API_CALL_STATUS_T i4_status; + ps_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL; + ps_ctl_ip.e_sub_cmd = IVD_CMD_CTL_GETVERSION; + ps_ctl_ip.u4_size = sizeof(ivd_ctl_getversioninfo_ip_t); + ps_ctl_op.u4_size = sizeof(ivd_ctl_getversioninfo_op_t); + ps_ctl_ip.pv_version_buffer = au1_buf; + ps_ctl_ip.u4_version_buffer_size = sizeof(au1_buf); + + i4_status = ivd_api_function((iv_obj_t *)codec_obj, + (void *)&(ps_ctl_ip), + (void *)&(ps_ctl_op)); + + if(i4_status != IV_SUCCESS) + { + printf("Error in Getting Version number e_dec_status = %d u4_error_code = %x\n", + i4_status, ps_ctl_op.u4_error_code); + } + else + { + printf("Ittiam Decoder Version number: %s\n", + (char *)ps_ctl_ip.pv_version_buffer); + } + return i4_status; +} +/*****************************************************************************/ +/* */ +/* Function Name : codec_exit */ +/* */ +/* Description : handles unrecoverable errors */ +/* Inputs : Error message */ +/* Globals : None */ +/* Processing : Prints error message to console and exits. */ +/* Outputs : Error mesage to the console */ +/* Returns : None */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 07 06 2006 Sankar Creation */ +/* */ +/*****************************************************************************/ +void codec_exit(CHAR *pc_err_message) +{ + printf("%s\n", pc_err_message); + exit(-1); +} + +/*****************************************************************************/ +/* */ +/* Function Name : dump_output */ +/* */ +/* Description : Used to dump output YUV */ +/* Inputs : App context, disp output desc, File pointer */ +/* Globals : None */ +/* Processing : Dumps to a file */ +/* Returns : None */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 07 06 2006 Sankar Creation */ +/* */ +/*****************************************************************************/ +void dump_output(vid_dec_ctx_t *ps_app_ctx, + iv_yuv_buf_t *ps_disp_frm_buf, + UWORD32 u4_disp_frm_id, + FILE *ps_op_file, + FILE *ps_op_chksum_file, + WORD32 i4_op_frm_ts, + UWORD32 file_save, + UWORD32 chksum_save) + +{ + + UWORD32 i; + iv_yuv_buf_t s_dump_disp_frm_buf; + UWORD32 u4_disp_id; + + memset(&s_dump_disp_frm_buf, 0, sizeof(iv_yuv_buf_t)); + + if(ps_app_ctx->u4_share_disp_buf) + { + if(ps_app_ctx->dump_q_wr_idx == MAX_DISP_BUFFERS) + ps_app_ctx->dump_q_wr_idx = 0; + + if(ps_app_ctx->dump_q_rd_idx == MAX_DISP_BUFFERS) + ps_app_ctx->dump_q_rd_idx = 0; + + ps_app_ctx->s_disp_frm_queue[ps_app_ctx->dump_q_wr_idx] = + *ps_disp_frm_buf; + ps_app_ctx->s_disp_frm_id_queue[ps_app_ctx->dump_q_wr_idx] = + u4_disp_frm_id; + ps_app_ctx->dump_q_wr_idx++; + + if((WORD32)i4_op_frm_ts >= (WORD32)(ps_app_ctx->disp_delay - 1)) + { + s_dump_disp_frm_buf = + ps_app_ctx->s_disp_frm_queue[ps_app_ctx->dump_q_rd_idx]; + u4_disp_id = + ps_app_ctx->s_disp_frm_id_queue[ps_app_ctx->dump_q_rd_idx]; + ps_app_ctx->dump_q_rd_idx++; + } + else + { + return; + } + } + else + { + s_dump_disp_frm_buf = *ps_disp_frm_buf; + u4_disp_id = u4_disp_frm_id; + } + + release_disp_frame(ps_app_ctx->cocodec_obj, u4_disp_id); + + if(0 == file_save && 0 == chksum_save) + return; + + if(NULL == s_dump_disp_frm_buf.pv_y_buf) + return; + + if(ps_app_ctx->e_output_chroma_format == IV_YUV_420P) + { +#if DUMP_SINGLE_BUF + { + UWORD8 *buf = s_dump_disp_frm_buf.pv_y_buf - 80 - (s_dump_disp_frm_buf.u4_y_strd * 80); + + UWORD32 i4_size = s_dump_disp_frm_buf.u4_y_strd * ((s_dump_disp_frm_buf.u4_y_ht + 160) + (s_dump_disp_frm_buf.u4_u_ht + 80)); + fwrite(buf, 1, i4_size ,ps_op_file); + + } +#else + if(0 != file_save) + { + UWORD8 *buf; + + buf = (UWORD8 *)s_dump_disp_frm_buf.pv_y_buf; + for(i = 0; i < s_dump_disp_frm_buf.u4_y_ht; i++) + { + fwrite(buf, 1, s_dump_disp_frm_buf.u4_y_wd, ps_op_file); + buf += s_dump_disp_frm_buf.u4_y_strd; + } + + buf = (UWORD8 *)s_dump_disp_frm_buf.pv_u_buf; + for(i = 0; i < s_dump_disp_frm_buf.u4_u_ht; i++) + { + fwrite(buf, 1, s_dump_disp_frm_buf.u4_u_wd, ps_op_file); + buf += s_dump_disp_frm_buf.u4_u_strd; + } + buf = (UWORD8 *)s_dump_disp_frm_buf.pv_v_buf; + for(i = 0; i < s_dump_disp_frm_buf.u4_v_ht; i++) + { + fwrite(buf, 1, s_dump_disp_frm_buf.u4_v_wd, ps_op_file); + buf += s_dump_disp_frm_buf.u4_v_strd; + } + + } + + if(0 != chksum_save) + { + UWORD8 au1_y_chksum[16]; + UWORD8 au1_u_chksum[16]; + UWORD8 au1_v_chksum[16]; + calc_md5_cksum((UWORD8 *)s_dump_disp_frm_buf.pv_y_buf, + s_dump_disp_frm_buf.u4_y_strd, + s_dump_disp_frm_buf.u4_y_wd, + s_dump_disp_frm_buf.u4_y_ht, + au1_y_chksum); + calc_md5_cksum((UWORD8 *)s_dump_disp_frm_buf.pv_u_buf, + s_dump_disp_frm_buf.u4_u_strd, + s_dump_disp_frm_buf.u4_u_wd, + s_dump_disp_frm_buf.u4_u_ht, + au1_u_chksum); + calc_md5_cksum((UWORD8 *)s_dump_disp_frm_buf.pv_v_buf, + s_dump_disp_frm_buf.u4_v_strd, + s_dump_disp_frm_buf.u4_v_wd, + s_dump_disp_frm_buf.u4_v_ht, + au1_v_chksum); + + fwrite(au1_y_chksum, sizeof(UWORD8), 16, ps_op_chksum_file); + fwrite(au1_u_chksum, sizeof(UWORD8), 16, ps_op_chksum_file); + fwrite(au1_v_chksum, sizeof(UWORD8), 16, ps_op_chksum_file); + } +#endif + } + else if((ps_app_ctx->e_output_chroma_format == IV_YUV_420SP_UV) + || (ps_app_ctx->e_output_chroma_format == IV_YUV_420SP_VU)) + { +#if DUMP_SINGLE_BUF + { + + UWORD8 *buf = s_dump_disp_frm_buf.pv_y_buf - 24 - (s_dump_disp_frm_buf.u4_y_strd * 40); + + UWORD32 i4_size = s_dump_disp_frm_buf.u4_y_strd * ((s_dump_disp_frm_buf.u4_y_ht + 80) + (s_dump_disp_frm_buf.u4_u_ht + 40)); + fwrite(buf, 1, i4_size ,ps_op_file); + } +#else + { + UWORD8 *buf; + + buf = (UWORD8 *)s_dump_disp_frm_buf.pv_y_buf; + for(i = 0; i < s_dump_disp_frm_buf.u4_y_ht; i++) + { + fwrite(buf, 1, s_dump_disp_frm_buf.u4_y_wd, ps_op_file); + buf += s_dump_disp_frm_buf.u4_y_strd; + } + + buf = (UWORD8 *)s_dump_disp_frm_buf.pv_u_buf; + for(i = 0; i < s_dump_disp_frm_buf.u4_u_ht; i++) + { + fwrite(buf, 1, s_dump_disp_frm_buf.u4_u_wd, ps_op_file); + buf += s_dump_disp_frm_buf.u4_u_strd; + } + } +#endif + } + else if(ps_app_ctx->e_output_chroma_format == IV_RGBA_8888) + { + UWORD8 *buf; + + buf = (UWORD8 *)s_dump_disp_frm_buf.pv_y_buf; + for(i = 0; i < s_dump_disp_frm_buf.u4_y_ht; i++) + { + fwrite(buf, 1, s_dump_disp_frm_buf.u4_y_wd * 4, ps_op_file); + buf += s_dump_disp_frm_buf.u4_y_strd * 4; + } + } + else + { + UWORD8 *buf; + + buf = (UWORD8 *)s_dump_disp_frm_buf.pv_y_buf; + for(i = 0; i < s_dump_disp_frm_buf.u4_y_ht; i++) + { + fwrite(buf, 1, s_dump_disp_frm_buf.u4_y_strd * 2, ps_op_file); + buf += s_dump_disp_frm_buf.u4_y_strd * 2; + } + } + + fflush(ps_op_file); + fflush(ps_op_chksum_file); + +} + + +/*****************************************************************************/ +/* */ +/* Function Name : print_usage */ +/* */ +/* Description : Prints argument format */ +/* */ +/* */ +/* Inputs : */ +/* Globals : */ +/* Processing : Prints argument format */ +/* */ +/* Outputs : */ +/* Returns : */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 09 2012 100189 Initial Version */ +/* */ +/*****************************************************************************/ + +void print_usage(void) +{ + WORD32 i = 0; + WORD32 num_entries = sizeof(argument_mapping) / sizeof(argument_t); + printf("\nUsage:\n"); + while(i < num_entries) + { + printf("%-32s\t %s", argument_mapping[i].argument_name, + argument_mapping[i].description); + i++; + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : get_argument */ +/* */ +/* Description : Gets argument for a given string */ +/* */ +/* */ +/* Inputs : name */ +/* Globals : */ +/* Processing : Searches the given string in the array and returns */ +/* appropriate argument ID */ +/* */ +/* Outputs : Argument ID */ +/* Returns : Argument ID */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 09 2012 100189 Initial Version */ +/* */ +/*****************************************************************************/ + +ARGUMENT_T get_argument(CHAR *name) +{ + WORD32 i = 0; + WORD32 num_entries = sizeof(argument_mapping) / sizeof(argument_t); + while(i < num_entries) + { + if((0 == strcmp(argument_mapping[i].argument_name, name)) || + ((0 == strcmp(argument_mapping[i].argument_shortname, name)) && + (0 != strcmp(argument_mapping[i].argument_shortname, "--")))) + { + return argument_mapping[i].argument; + } + i++; + } + return INVALID; +} + +/*****************************************************************************/ +/* */ +/* Function Name : get_argument */ +/* */ +/* Description : Gets argument for a given string */ +/* */ +/* */ +/* Inputs : name */ +/* Globals : */ +/* Processing : Searches the given string in the array and returns */ +/* appropriate argument ID */ +/* */ +/* Outputs : Argument ID */ +/* Returns : Argument ID */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 09 2012 100189 Initial Version */ +/* */ +/*****************************************************************************/ + +void parse_argument(vid_dec_ctx_t *ps_app_ctx, CHAR *argument, CHAR *value) +{ + ARGUMENT_T arg; + + arg = get_argument(argument); + switch(arg) + { + case HELP: + print_usage(); + exit(-1); + case VERSION: + break; + case INPUT_FILE: + sscanf(value, "%s", ps_app_ctx->ac_ip_fname); + //input_passed = 1; + break; + + case OUTPUT: + sscanf(value, "%s", ps_app_ctx->ac_op_fname); + break; + + case CHKSUM: + sscanf(value, "%s", ps_app_ctx->ac_op_chksum_fname); + break; + + case SAVE_OUTPUT: + sscanf(value, "%d", &ps_app_ctx->u4_file_save_flag); + break; + + case SAVE_CHKSUM: + sscanf(value, "%d", &ps_app_ctx->u4_chksum_save_flag); + break; + + case CHROMA_FORMAT: + if((strcmp(value, "YUV_420P")) == 0) + ps_app_ctx->e_output_chroma_format = IV_YUV_420P; + else if((strcmp(value, "YUV_422ILE")) == 0) + ps_app_ctx->e_output_chroma_format = IV_YUV_422ILE; + else if((strcmp(value, "RGB_565")) == 0) + ps_app_ctx->e_output_chroma_format = IV_RGB_565; + else if((strcmp(value, "RGBA_8888")) == 0) + ps_app_ctx->e_output_chroma_format = IV_RGBA_8888; + else if((strcmp(value, "YUV_420SP_UV")) == 0) + ps_app_ctx->e_output_chroma_format = IV_YUV_420SP_UV; + else if((strcmp(value, "YUV_420SP_VU")) == 0) + ps_app_ctx->e_output_chroma_format = IV_YUV_420SP_VU; + else + { + printf("\nInvalid colour format setting it to IV_YUV_420P\n"); + ps_app_ctx->e_output_chroma_format = IV_YUV_420P; + } + + break; + case NUM_FRAMES: + sscanf(value, "%d", &ps_app_ctx->u4_max_frm_ts); + break; + + case NUM_CORES: + sscanf(value, "%d", &ps_app_ctx->u4_num_cores); + break; + case DEGRADE_PICS: + sscanf(value, "%d", &ps_app_ctx->i4_degrade_pics); + break; + case DEGRADE_TYPE: + sscanf(value, "%d", &ps_app_ctx->i4_degrade_type); + break; + case SHARE_DISPLAY_BUF: + sscanf(value, "%d", &ps_app_ctx->u4_share_disp_buf); + break; + case LOOPBACK: + sscanf(value, "%d", &ps_app_ctx->loopback); + break; + case DISPLAY: +#if defined(SDL_DISPLAY) || defined(FBDEV_DISPLAY) || defined(INTEL_CE5300) || defined(IOS_DISPLAY) + sscanf(value, "%d", &ps_app_ctx->display); +#else + ps_app_ctx->display = 0; +#endif + break; + case FULLSCREEN: + sscanf(value, "%d", &ps_app_ctx->full_screen); + break; + case FPS: + sscanf(value, "%d", &ps_app_ctx->fps); + if(ps_app_ctx->fps <= 0) + ps_app_ctx->fps = DEFAULT_FPS; + break; + case MAX_WD: + sscanf(value, "%d", &ps_app_ctx->max_wd); + break; + case MAX_HT: + sscanf(value, "%d", &ps_app_ctx->max_ht); + break; + case MAX_LEVEL: + sscanf(value, "%d", &ps_app_ctx->max_level); + break; + case ARCH: + if((strcmp(value, "ARM_NONEON")) == 0) + ps_app_ctx->e_arch = ARCH_ARM_NONEON; + else if((strcmp(value, "ARM_A9Q")) == 0) + ps_app_ctx->e_arch = ARCH_ARM_A9Q; + else if((strcmp(value, "ARM_A7")) == 0) + ps_app_ctx->e_arch = ARCH_ARM_A7; + else if((strcmp(value, "ARM_A5")) == 0) + ps_app_ctx->e_arch = ARCH_ARM_A5; + else if((strcmp(value, "ARM_NEONINTR")) == 0) + ps_app_ctx->e_arch = ARCH_ARM_NEONINTR; + else if((strcmp(value, "X86_GENERIC")) == 0) + ps_app_ctx->e_arch = ARCH_X86_GENERIC; + else if((strcmp(value, "X86_SSSE3")) == 0) + ps_app_ctx->e_arch = ARCH_X86_SSSE3; + else if((strcmp(value, "X86_SSE42")) == 0) + ps_app_ctx->e_arch = ARCH_X86_SSE42; + else if((strcmp(value, "X86_AVX2")) == 0) + ps_app_ctx->e_arch = ARCH_X86_AVX2; + else if((strcmp(value, "MIPS_GENERIC")) == 0) + ps_app_ctx->e_arch = ARCH_MIPS_GENERIC; + else if((strcmp(value, "MIPS_32")) == 0) + ps_app_ctx->e_arch = ARCH_MIPS_32; + else if((strcmp(value, "ARMV8_GENERIC")) == 0) + ps_app_ctx->e_arch = ARCH_ARMV8_GENERIC; + else + { + printf("\nInvalid Arch. Setting it to ARM_A9Q\n"); + ps_app_ctx->e_arch = ARCH_ARM_A9Q; + } + + break; + case SOC: + if((strcmp(value, "GENERIC")) == 0) + ps_app_ctx->e_soc = SOC_GENERIC; + else if((strcmp(value, "HISI_37X")) == 0) + ps_app_ctx->e_soc = SOC_HISI_37X; + else + { + ps_app_ctx->e_soc = atoi(value); +/* + printf("\nInvalid SOC. Setting it to GENERIC\n"); + ps_app_ctx->e_soc = SOC_GENERIC; +*/ + } + break; + case PICLEN: + sscanf(value, "%d", &ps_app_ctx->u4_piclen_flag); + break; + + case PICLEN_FILE: + sscanf(value, "%s", ps_app_ctx->ac_piclen_fname); + break; + case DISABLE_DEBLOCK_LEVEL: + sscanf(value, "%d", &ps_app_ctx->u4_disable_dblk_level); + break; + + case INVALID: + default: + printf("Ignoring argument : %s\n", argument); + break; + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : read_cfg_file */ +/* */ +/* Description : Reads arguments from a configuration file */ +/* */ +/* */ +/* Inputs : ps_app_ctx : Application context */ +/* fp_cfg_file : Configuration file handle */ +/* Globals : */ +/* Processing : Parses the arguments and fills in the application context*/ +/* */ +/* Outputs : Arguments parsed */ +/* Returns : None */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 09 2012 100189 Initial Version */ +/* */ +/*****************************************************************************/ + +void read_cfg_file(vid_dec_ctx_t *ps_app_ctx, FILE *fp_cfg_file) +{ + + CHAR line[STRLENGTH]; + CHAR description[STRLENGTH]; + CHAR value[STRLENGTH]; + CHAR argument[STRLENGTH]; + void *ret; + while(0 == feof(fp_cfg_file)) + { + line[0] = '\0'; + ret = fgets(line, STRLENGTH, fp_cfg_file); + if(NULL == ret) + break; + argument[0] = '\0'; + /* Reading Input File Name */ + sscanf(line, "%s %s %s", argument, value, description); + if(argument[0] == '\0') + continue; + + parse_argument(ps_app_ctx, argument, value); + } + + +} + +/*! +************************************************************************** +* \if Function name : dispq_producer_dequeue \endif +* +* \brief +* This function gets a free buffer index where display data can be written +* This is a blocking call and can be exited by setting quit to true in +* the application context +* +* \param[in] ps_app_ctx : Pointer to application context +* +* \return +* returns Next free buffer index for producer +* +* \author +* Ittiam +* +************************************************************************** +*/ +WORD32 dispq_producer_dequeue(vid_dec_ctx_t *ps_app_ctx) +{ + WORD32 idx; + + /* If there is no free buffer wait */ + + while(((ps_app_ctx->disp_q_wr_idx + 1) % NUM_DISPLAY_BUFFERS) == ps_app_ctx->disp_q_rd_idx) + { + + ithread_msleep(1); + + if(ps_app_ctx->quit) + return(-1); + } + + idx = ps_app_ctx->disp_q_wr_idx; + return (idx); +} + +/*! +************************************************************************** +* \if Function name : dispq_producer_queue \endif +* +* \brief +* This function adds buffer which can be displayed +* +* \param[in] ps_app_ctx : Pointer to application context +* +* \return +* returns Next free buffer index for producer +* +* \author +* Ittiam +* +************************************************************************** +*/ +WORD32 dispq_producer_queue(vid_dec_ctx_t *ps_app_ctx) +{ + ps_app_ctx->disp_q_wr_idx++; + if(ps_app_ctx->disp_q_wr_idx == NUM_DISPLAY_BUFFERS) + ps_app_ctx->disp_q_wr_idx = 0; + + return (0); +} +/*! +************************************************************************** +* \if Function name : dispq_consumer_dequeue \endif +* +* \brief +* This function gets a free buffer index where display data can be written +* This is a blocking call and can be exited by setting quit to true in +* the application context +* +* \param[in] ps_app_ctx : Pointer to application context +* +* \return +* returns Next free buffer index for producer +* +* \author +* Ittiam +* +************************************************************************** +*/ +WORD32 dispq_consumer_dequeue(vid_dec_ctx_t *ps_app_ctx) +{ + WORD32 idx; + + /* If there is no free buffer wait */ + + while(ps_app_ctx->disp_q_wr_idx == ps_app_ctx->disp_q_rd_idx) + { + + ithread_msleep(1); + + if(ps_app_ctx->quit) + return(-1); + } + + idx = ps_app_ctx->disp_q_rd_idx; + return (idx); +} + +/*! +************************************************************************** +* \if Function name : dispq_producer_queue \endif +* +* \brief +* This function adds buffer which can be displayed +* +* \param[in] ps_app_ctx : Pointer to application context +* +* \return +* returns Next free buffer index for producer +* +* \author +* Ittiam +* +************************************************************************** +*/ +WORD32 dispq_consumer_queue(vid_dec_ctx_t *ps_app_ctx) +{ + ps_app_ctx->disp_q_rd_idx++; + if(ps_app_ctx->disp_q_rd_idx == NUM_DISPLAY_BUFFERS) + ps_app_ctx->disp_q_rd_idx = 0; + + return (0); +} + +/*****************************************************************************/ +/* */ +/* Function Name : display_thread */ +/* */ +/* Description : Thread to display the frame */ +/* */ +/* */ +/* Inputs : pv_ctx : Application context */ +/* */ +/* Globals : */ +/* Processing : Wait for a buffer to get produced by decoder and display */ +/* that frame */ +/* */ +/* Outputs : */ +/* Returns : None */ +/* */ +/* Issues : Pause followed by quit is making some deadlock condn */ +/* If decoder was lagging initially and then fasten up, */ +/* display will also go at faster rate till it reaches */ +/* equilibrium wrt the initial time */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 05 2013 100578 Initial Version */ +/* */ +/*****************************************************************************/ + +WORD32 display_thread(void *pv_ctx) +{ + vid_dec_ctx_t *ps_app_ctx = (vid_dec_ctx_t *) pv_ctx; + + + UWORD32 frm_duration; /* in us */ + UWORD32 current_time; + UWORD32 expected_time; + TIMER s_end_timer; + TIMER s_first_frame_time; + UWORD32 first_frame_displayed; + +#ifdef WINDOWS_TIMER + TIMER frequency; +#endif + +#ifdef WINDOWS_TIMER + QueryPerformanceFrequency ( &frequency); +#endif + first_frame_displayed = 0; + expected_time = 0; + frm_duration = 1000000/ps_app_ctx->fps; + + /* Init display and allocate display buffers */ + ps_app_ctx->pv_disp_ctx = (void *)ps_app_ctx->disp_init(ps_app_ctx->u4_pic_wd, + ps_app_ctx->u4_pic_ht, + ps_app_ctx->i4_screen_wd, + ps_app_ctx->i4_screen_ht, + ps_app_ctx->max_wd, + ps_app_ctx->max_ht, + ps_app_ctx->full_screen, + &ps_app_ctx->quit, + &ps_app_ctx->paused); + ps_app_ctx->alloc_disp_buffers(ps_app_ctx->pv_disp_ctx); + + ps_app_ctx->display_init_done = 1; + + while(1) + { + WORD32 rd_idx; + + rd_idx = dispq_consumer_dequeue(ps_app_ctx); + if (ps_app_ctx->quit) + break; + + ps_app_ctx->display_buffer(ps_app_ctx->pv_disp_ctx, rd_idx); + + if(0 == first_frame_displayed) + { + GETTIME(&s_first_frame_time); + first_frame_displayed = 1; + } + + /*********************************************************************/ + /* Sleep based on the expected time of arrival of current buffer and */ + /* the Current frame */ + /*********************************************************************/ + + GETTIME(&s_end_timer); + ELAPSEDTIME(s_first_frame_time,s_end_timer,current_time,frequency); + + /* time in micro second */ + expected_time += frm_duration; + + //printf("current_time %d expected_time %d diff %d \n", current_time, expected_time, (expected_time - current_time)); + /* sleep for the diff. in time */ + if(current_time < expected_time) + ps_app_ctx->disp_usleep((expected_time - current_time)); + else + expected_time += (current_time - expected_time); + + dispq_consumer_queue(ps_app_ctx); + + } + + + while(0 == ps_app_ctx->display_deinit_flag) + { + ps_app_ctx->disp_usleep(1000); + } + ps_app_ctx->disp_deinit(ps_app_ctx->pv_disp_ctx); + + /* destroy the display thread */ + ithread_exit(ps_app_ctx->display_thread_handle); + + return 0; +} + +void output_write_stall(CHAR *fname, UWORD32 cur_frm_idx) +{ + const UWORD8 threshold = 64; + CHAR past_fname[1000]; + FILE *fp_fast_file = NULL; + + if (cur_frm_idx >= threshold) + { + sprintf(past_fname, fname, cur_frm_idx - threshold); + do + { + fp_fast_file = fopen(past_fname,"rb"); + if (fp_fast_file != NULL) + { + fclose(fp_fast_file); + /* Wait until the resource is released by a third party app*/ + ithread_msleep(5); + } + else + break; + } while(1); + } +} + +void flush_output(iv_obj_t *codec_obj, + vid_dec_ctx_t *ps_app_ctx, + ivd_out_bufdesc_t *ps_out_buf, + UWORD8 *pu1_bs_buf, + UWORD32 *pu4_op_frm_ts, + FILE *ps_op_file, + FILE *ps_op_chksum_file, + UWORD32 u4_ip_frm_ts, + UWORD32 u4_bytes_remaining) +{ + WORD32 ret; + + do + { + + ivd_ctl_flush_ip_t s_ctl_ip; + ivd_ctl_flush_op_t s_ctl_op; + + if(*pu4_op_frm_ts >= (ps_app_ctx->u4_max_frm_ts + ps_app_ctx->disp_delay)) + break; + + s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_FLUSH; + s_ctl_ip.u4_size = sizeof(ivd_ctl_flush_ip_t); + s_ctl_op.u4_size = sizeof(ivd_ctl_flush_op_t); + ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_ip, + (void *)&s_ctl_op); + + if(ret != IV_SUCCESS) + { + printf("Error in Setting the decoder in flush mode\n"); + } + + if(IV_SUCCESS == ret) + { + ivd_video_decode_ip_t s_video_decode_ip; + ivd_video_decode_op_t s_video_decode_op; + + s_video_decode_ip.e_cmd = IVD_CMD_VIDEO_DECODE; + s_video_decode_ip.u4_ts = u4_ip_frm_ts; + s_video_decode_ip.pv_stream_buffer = pu1_bs_buf; + s_video_decode_ip.u4_num_Bytes = u4_bytes_remaining; + s_video_decode_ip.u4_size = sizeof(ivd_video_decode_ip_t); + s_video_decode_ip.s_out_buffer.u4_min_out_buf_size[0] = + ps_out_buf->u4_min_out_buf_size[0]; + s_video_decode_ip.s_out_buffer.u4_min_out_buf_size[1] = + ps_out_buf->u4_min_out_buf_size[1]; + s_video_decode_ip.s_out_buffer.u4_min_out_buf_size[2] = + ps_out_buf->u4_min_out_buf_size[2]; + + s_video_decode_ip.s_out_buffer.pu1_bufs[0] = + ps_out_buf->pu1_bufs[0]; + s_video_decode_ip.s_out_buffer.pu1_bufs[1] = + ps_out_buf->pu1_bufs[1]; + s_video_decode_ip.s_out_buffer.pu1_bufs[2] = + ps_out_buf->pu1_bufs[2]; + s_video_decode_ip.s_out_buffer.u4_num_bufs = + ps_out_buf->u4_num_bufs; + + s_video_decode_op.u4_size = sizeof(ivd_video_decode_op_t); + + /*****************************************************************************/ + /* API Call: Video Decode */ + /*****************************************************************************/ + ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_video_decode_ip, + (void *)&s_video_decode_op); + + if(1 == s_video_decode_op.u4_output_present) + { + CHAR cur_fname[1000]; + CHAR *extn = NULL; + /* The objective is to dump the decoded frames into separate files instead of + * dumping all the frames in one common file. Also, the number of dumped frames + * at any given instance of time cannot exceed 'frame_memory' + */ + if(ps_app_ctx->u4_file_save_flag) + { + /* Locate the position of extension yuv */ + extn = strstr(ps_app_ctx->ac_op_fname,"%d"); + if (extn != NULL) + { + output_write_stall(ps_app_ctx->ac_op_fname,*pu4_op_frm_ts); + /* Generate output file names */ + sprintf(cur_fname,ps_app_ctx->ac_op_fname,*pu4_op_frm_ts); + /* Open Output file */ + ps_op_file = fopen(cur_fname,"wb"); + if (NULL == ps_op_file) + { + CHAR ac_error_str[STRLENGTH]; + sprintf(ac_error_str, "Could not open output file %s", + cur_fname); + + codec_exit(ac_error_str); + } + } + } + + dump_output(ps_app_ctx, &(s_video_decode_op.s_disp_frm_buf), + s_video_decode_op.u4_disp_buf_id, ps_op_file, + ps_op_chksum_file, + *pu4_op_frm_ts, ps_app_ctx->u4_file_save_flag, + ps_app_ctx->u4_chksum_save_flag); + if (extn != NULL) + fclose(ps_op_file); + (*pu4_op_frm_ts)++; + } + } + } + while(IV_SUCCESS == ret); + +} + +#ifdef X86_MINGW +void sigsegv_handler() +{ + printf("Segmentation fault, Exiting.. \n"); + exit(-1); +} +#endif + +UWORD32 default_get_stride(void) +{ + return 0; +} + + +IV_COLOR_FORMAT_T default_get_color_fmt(void) +{ + return IV_YUV_420P; +} +/*****************************************************************************/ +/* */ +/* Function Name : main */ +/* */ +/* Description : Application to demonstrate codec API */ +/* */ +/* */ +/* Inputs : argc - Number of arguments */ +/* argv[] - Arguments */ +/* Globals : */ +/* Processing : Shows how to use create, process, control and delete */ +/* */ +/* Outputs : Codec output in a file */ +/* Returns : */ +/* */ +/* Issues : Assumes both PROFILE_ENABLE to be */ +/* defined for multithread decode-display working */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 09 2012 100189 Initial Version */ +/* 09 05 2013 100578 Multithread decode-display */ +/*****************************************************************************/ +#ifdef IOS +int h264dec_main(char * homedir,char *documentdir, int screen_wd, int screen_ht) +#else +int main(WORD32 argc, CHAR *argv[]) +#endif +{ + CHAR ac_cfg_fname[STRLENGTH]; + FILE *fp_cfg_file = NULL; + FILE *ps_piclen_file = NULL; + FILE *ps_ip_file = NULL; + FILE *ps_op_file = NULL; + FILE *ps_op_chksum_file = NULL; + WORD32 ret; + CHAR ac_error_str[STRLENGTH]; + vid_dec_ctx_t s_app_ctx; + UWORD8 *pu1_bs_buf; + + ivd_out_bufdesc_t *ps_out_buf; + UWORD32 u4_num_bytes_dec = 0; + UWORD32 file_pos = 0; + IV_API_CALL_STATUS_T e_dec_status; + UWORD32 u4_ip_frm_ts = 0, u4_op_frm_ts = 0; + + WORD32 u4_bytes_remaining = 0; + void *pv_mem_rec_location; + UWORD32 u4_num_mem_recs; + UWORD32 i; + UWORD32 u4_ip_buf_len; + UWORD32 frm_cnt = 0; + WORD32 total_bytes_comsumed; + UWORD32 max_op_frm_ts; + +#ifdef PROFILE_ENABLE + UWORD32 u4_tot_cycles = 0; + UWORD32 u4_tot_fmt_cycles = 0; + UWORD32 peak_window[PEAK_WINDOW_SIZE]; + UWORD32 peak_window_idx = 0; + UWORD32 peak_avg_max = 0; +#ifdef INTEL_CE5300 + UWORD32 time_consumed = 0; + UWORD32 bytes_consumed = 0; +#endif +#endif + +#ifdef WINDOWS_TIMER + TIMER frequency; +#endif + WORD32 width = 0, height = 0; + iv_obj_t *codec_obj; +#if defined(GPU_BUILD) && !defined(X86) +// int ioctl_init(); +// ioctl_init(); +#endif + +#ifdef X86_MINGW + //For getting printfs without any delay + setvbuf(stdout, NULL, _IONBF, 0); + setvbuf(stderr, NULL, _IONBF, 0); +#endif +#ifdef IOS + sprintf(filename_trace, "%s/iostrace.txt", homedir ); + printf("\ntrace file name = %s",filename_trace); +#endif + +#ifdef X86_MINGW + { + signal(SIGSEGV, sigsegv_handler); + } +#endif + + +#ifndef IOS + /* Usage */ + if(argc < 2) + { + printf("Using test.cfg as configuration file \n"); + strcpy(ac_cfg_fname, "test.cfg"); + } + else if(argc == 2) + { + strcpy(ac_cfg_fname, argv[1]); + } + +#else + strcpy(ac_cfg_fname, "test.cfg"); + +#endif + + + /***********************************************************************/ + /* Initialize Application parameters */ + /***********************************************************************/ + + strcpy(s_app_ctx.ac_ip_fname, "\0"); + s_app_ctx.dump_q_wr_idx = 0; + s_app_ctx.dump_q_rd_idx = 0; + s_app_ctx.display_thread_created = 0; + s_app_ctx.disp_q_wr_idx = 0; + s_app_ctx.disp_q_rd_idx = 0; + s_app_ctx.disp_delay = 0; + s_app_ctx.loopback = 0; + s_app_ctx.display = 0; + s_app_ctx.full_screen = 0; + s_app_ctx.u4_piclen_flag = 0; + s_app_ctx.fps = DEFAULT_FPS; + file_pos = 0; + total_bytes_comsumed = 0; + u4_ip_frm_ts = 0; + u4_op_frm_ts = 0; +#ifdef PROFILE_ENABLE + memset(peak_window, 0, sizeof(WORD32) * PEAK_WINDOW_SIZE); +#endif + s_app_ctx.u4_share_disp_buf = DEFAULT_SHARE_DISPLAY_BUF; + s_app_ctx.u4_num_cores = DEFAULT_NUM_CORES; + s_app_ctx.i4_degrade_type = 0; + s_app_ctx.i4_degrade_pics = 0; + s_app_ctx.max_wd = 0; + s_app_ctx.max_ht = 0; + s_app_ctx.max_level = 0; + s_app_ctx.e_arch = ARCH_ARM_A9Q; + s_app_ctx.e_soc = SOC_GENERIC; + + s_app_ctx.u4_strd = STRIDE; + + s_app_ctx.display_thread_handle = malloc(ithread_get_handle_size()); + s_app_ctx.quit = 0; + s_app_ctx.paused = 0; + //s_app_ctx.u4_output_present = 0; + + s_app_ctx.get_stride = &default_get_stride; + + s_app_ctx.get_color_fmt = &default_get_color_fmt; + + /* Set function pointers for display */ +#ifdef SDL_DISPLAY + s_app_ctx.disp_init = &sdl_disp_init; + s_app_ctx.alloc_disp_buffers = &sdl_alloc_disp_buffers; + s_app_ctx.display_buffer = &sdl_display; + s_app_ctx.set_disp_buffers = &sdl_set_disp_buffers; + s_app_ctx.disp_deinit = &sdl_disp_deinit; + s_app_ctx.disp_usleep = &sdl_disp_usleep; + s_app_ctx.get_color_fmt = &sdl_get_color_fmt; + s_app_ctx.get_stride = &sdl_get_stride; +#endif + +#ifdef FBDEV_DISPLAY + s_app_ctx.disp_init = &fbd_disp_init; + s_app_ctx.alloc_disp_buffers = &fbd_alloc_disp_buffers; + s_app_ctx.display_buffer = &fbd_display; + s_app_ctx.set_disp_buffers = &fbd_set_disp_buffers; + s_app_ctx.disp_deinit = &fbd_disp_deinit; + s_app_ctx.disp_usleep = &fbd_disp_usleep; + s_app_ctx.get_color_fmt = &fbd_get_color_fmt; + s_app_ctx.get_stride = &fbd_get_stride; +#endif + +#ifdef INTEL_CE5300 + s_app_ctx.disp_init = &gdl_disp_init; + s_app_ctx.alloc_disp_buffers = &gdl_alloc_disp_buffers; + s_app_ctx.display_buffer = &gdl_display; + s_app_ctx.set_disp_buffers = &gdl_set_disp_buffers; + s_app_ctx.disp_deinit = &gdl_disp_deinit; + s_app_ctx.disp_usleep = &gdl_disp_usleep; + s_app_ctx.get_color_fmt = &gdl_get_color_fmt; + s_app_ctx.get_stride = &gdl_get_stride; +#endif + +#ifdef IOS_DISPLAY + s_app_ctx.disp_init = &ios_disp_init; + s_app_ctx.alloc_disp_buffers = &ios_alloc_disp_buffers; + s_app_ctx.display_buffer = &ios_display; + s_app_ctx.set_disp_buffers = &ios_set_disp_buffers; + s_app_ctx.disp_deinit = &ios_disp_deinit; + s_app_ctx.disp_usleep = &ios_disp_usleep; + s_app_ctx.get_color_fmt = &ios_get_color_fmt; + s_app_ctx.get_stride = &ios_get_stride; +#endif + + s_app_ctx.display_deinit_flag = 0; + s_app_ctx.e_output_chroma_format = IV_YUV_420SP_UV; + /*************************************************************************/ + /* Parse arguments */ + /*************************************************************************/ + +#ifndef IOS + /* Read command line arguments */ + if(argc > 2) + { + for(i = 1; i < (UWORD32)argc; i += 2) + { + if(CONFIG == get_argument(argv[i])) + { + strcpy(ac_cfg_fname, argv[i + 1]); + if((fp_cfg_file = fopen(ac_cfg_fname, "r")) == NULL) + { + sprintf(ac_error_str, "Could not open Configuration file %s", + ac_cfg_fname); + codec_exit(ac_error_str); + } + read_cfg_file(&s_app_ctx, fp_cfg_file); + fclose(fp_cfg_file); + } + else + { + parse_argument(&s_app_ctx, argv[i], argv[i + 1]); + } + } + } + else + { + if((fp_cfg_file = fopen(ac_cfg_fname, "r")) == NULL) + { + sprintf(ac_error_str, "Could not open Configuration file %s", + ac_cfg_fname); + codec_exit(ac_error_str); + } + read_cfg_file(&s_app_ctx, fp_cfg_file); + fclose(fp_cfg_file); + } +#else + sprintf(filename_with_path, "%s/%s", homedir, ac_cfg_fname); + if((fp_cfg_file = fopen(filename_with_path, "r")) == NULL) + { + sprintf(ac_error_str, "Could not open Configuration file %s", + ac_cfg_fname); + codec_exit(ac_error_str); + + } + read_cfg_file(&s_app_ctx, fp_cfg_file); + fclose(fp_cfg_file); + +#endif +#ifdef PRINT_PICSIZE + /* If the binary is used for only getting number of bytes in each picture, then disable the following features */ + s_app_ctx.u4_piclen_flag = 0; + s_app_ctx.u4_file_save_flag = 0; + s_app_ctx.u4_chksum_save_flag = 0; + s_app_ctx.i4_degrade_pics = 0; + s_app_ctx.i4_degrade_type = 0; + s_app_ctx.loopback = 0; + s_app_ctx.u4_share_disp_buf = 0; + s_app_ctx.display = 0; +#endif + + /* If display is enabled, then turn off shared mode and get color format that is supported by display */ + if(1 == s_app_ctx.display) + { + s_app_ctx.u4_share_disp_buf = 0; + s_app_ctx.e_output_chroma_format = s_app_ctx.get_color_fmt(); + } + if(strcmp(s_app_ctx.ac_ip_fname, "\0") == 0) + { + printf("\nNo input file given for decoding\n"); + exit(-1); + } + + + + /***********************************************************************/ + /* create the file object for input file */ + /***********************************************************************/ +#ifdef IOS + sprintf(filename_with_path, "%s/%s", homedir, s_app_ctx.ac_ip_fname); + ps_ip_file = fopen(filename_with_path, "rb"); +#else + ps_ip_file = fopen(s_app_ctx.ac_ip_fname, "rb"); +#endif + if(NULL == ps_ip_file) + { + sprintf(ac_error_str, "Could not open input file %s", + s_app_ctx.ac_ip_fname); + codec_exit(ac_error_str); + } + /***********************************************************************/ + /* create the file object for input file */ + /***********************************************************************/ + if(1 == s_app_ctx.u4_piclen_flag) + { +#ifdef IOS + sprintf(filename_with_path, "%s/%s", homedir, s_app_ctx.ac_piclen_fname); + ps_piclen_file = fopen(filename_with_path, "rb"); +#else + ps_piclen_file = fopen(s_app_ctx.ac_piclen_fname, "rb"); +#endif + if(NULL == ps_piclen_file) + { + sprintf(ac_error_str, "Could not open piclen file %s", + s_app_ctx.ac_piclen_fname); + codec_exit(ac_error_str); + } + } + + /***********************************************************************/ + /* create the file object for output file */ + /***********************************************************************/ + + /* If the filename does not contain %d, then output will be dumped to + a single file and it is opened here */ + if((1 == s_app_ctx.u4_file_save_flag) && (strstr(s_app_ctx.ac_op_fname,"%d") == NULL)) + { +#ifdef IOS + sprintf(filename_with_path, "%s/%s", documentdir, s_app_ctx.ac_op_fname); + ps_op_file = fopen(filename_with_path,"wb"); +#else + ps_op_file = fopen(s_app_ctx.ac_op_fname, "wb"); +#endif + + if(NULL == ps_op_file) + { + sprintf(ac_error_str, "Could not open output file %s", + s_app_ctx.ac_op_fname); + codec_exit(ac_error_str); + } + } + + /***********************************************************************/ + /* create the file object for check sum file */ + /***********************************************************************/ + if(1 == s_app_ctx.u4_chksum_save_flag) + { +#if IOS + sprintf(filename_with_path, "%s/%s", documentdir, s_app_ctx.ac_op_chksum_fname); + ps_op_chksum_file = fopen(filename_with_path,"wb"); +#else + ps_op_chksum_file = fopen(s_app_ctx.ac_op_chksum_fname, "wb"); +#endif + if(NULL == ps_op_chksum_file) + { + sprintf(ac_error_str, "Could not open check sum file %s", + s_app_ctx.ac_op_chksum_fname); + codec_exit(ac_error_str); + } + } + /***********************************************************************/ + /* Create decoder instance */ + /***********************************************************************/ + { + + ps_out_buf = (ivd_out_bufdesc_t *)malloc(sizeof(ivd_out_bufdesc_t)); + + { + iv_num_mem_rec_ip_t s_no_of_mem_rec_query_ip; + iv_num_mem_rec_op_t s_no_of_mem_rec_query_op; + + s_no_of_mem_rec_query_ip.u4_size = sizeof(s_no_of_mem_rec_query_ip); + s_no_of_mem_rec_query_op.u4_size = sizeof(s_no_of_mem_rec_query_op); + s_no_of_mem_rec_query_ip.e_cmd = IV_CMD_GET_NUM_MEM_REC; + + /*****************************************************************************/ + /* API Call: Get Number of Mem Records */ + /*****************************************************************************/ + e_dec_status = ivd_api_function( + NULL, (void*)&s_no_of_mem_rec_query_ip, + (void*)&s_no_of_mem_rec_query_op); + if(IV_SUCCESS != e_dec_status) + { + sprintf(ac_error_str, "Error in get mem records"); + codec_exit(ac_error_str); + } + + u4_num_mem_recs = s_no_of_mem_rec_query_op.u4_num_mem_rec; + } + + pv_mem_rec_location = malloc(u4_num_mem_recs * sizeof(iv_mem_rec_t)); + if(pv_mem_rec_location == NULL) + { + sprintf(ac_error_str, "Allocation failure for mem_rec_location"); + codec_exit(ac_error_str); + + } + + { + ih264d_fill_mem_rec_ip_t s_fill_mem_rec_ip; + ih264d_fill_mem_rec_op_t s_fill_mem_rec_op; + iv_mem_rec_t *ps_mem_rec; + UWORD32 total_size; + + s_fill_mem_rec_ip.s_ivd_fill_mem_rec_ip_t.e_cmd = + IV_CMD_FILL_NUM_MEM_REC; + s_fill_mem_rec_ip.s_ivd_fill_mem_rec_ip_t.pv_mem_rec_location = + (iv_mem_rec_t *)pv_mem_rec_location; + s_fill_mem_rec_ip.s_ivd_fill_mem_rec_ip_t.u4_max_frm_wd = + (s_app_ctx.max_wd == 0) ? MAX_FRAME_WIDTH : s_app_ctx.max_wd; + s_fill_mem_rec_ip.s_ivd_fill_mem_rec_ip_t.u4_max_frm_ht = + (s_app_ctx.max_ht == 0) ? MAX_FRAME_HEIGHT : s_app_ctx.max_ht; + s_fill_mem_rec_ip.i4_level = (s_app_ctx.max_level == 0) ? MAX_LEVEL_SUPPORTED : s_app_ctx.max_level; + s_fill_mem_rec_ip.u4_num_ref_frames = MAX_REF_FRAMES; + s_fill_mem_rec_ip.u4_num_reorder_frames = MAX_REORDER_FRAMES; + s_fill_mem_rec_ip.u4_share_disp_buf = s_app_ctx.u4_share_disp_buf; + s_fill_mem_rec_ip.e_output_format = + (IV_COLOR_FORMAT_T)s_app_ctx.e_output_chroma_format; + s_fill_mem_rec_ip.u4_num_extra_disp_buf = EXTRA_DISP_BUFFERS; + + s_fill_mem_rec_ip.s_ivd_fill_mem_rec_ip_t.u4_size = + sizeof(ih264d_fill_mem_rec_ip_t); + s_fill_mem_rec_op.s_ivd_fill_mem_rec_op_t.u4_size = + sizeof(ih264d_fill_mem_rec_op_t); + + ps_mem_rec = (iv_mem_rec_t *)pv_mem_rec_location; + for(i = 0; i < u4_num_mem_recs; i++) + ps_mem_rec[i].u4_size = sizeof(iv_mem_rec_t); + + /*****************************************************************************/ + /* API Call: Fill Mem Records */ + /*****************************************************************************/ + + e_dec_status = ivd_api_function(NULL, + (void *)&s_fill_mem_rec_ip, + (void *)&s_fill_mem_rec_op); + + u4_num_mem_recs = + s_fill_mem_rec_op.s_ivd_fill_mem_rec_op_t.u4_num_mem_rec_filled; + + if(IV_SUCCESS != e_dec_status) + { + sprintf(ac_error_str, "Error in fill mem records: %x",s_fill_mem_rec_op.s_ivd_fill_mem_rec_op_t.u4_error_code); + codec_exit(ac_error_str); + } + + ps_mem_rec = (iv_mem_rec_t *)pv_mem_rec_location; + total_size = 0; + for(i = 0; i < u4_num_mem_recs; i++) + { + ps_mem_rec->pv_base = ih264a_aligned_malloc(ps_mem_rec->u4_mem_alignment, + ps_mem_rec->u4_mem_size); + if(ps_mem_rec->pv_base == NULL) + { + sprintf(ac_error_str, + "\nAllocation failure for mem record id %d i4_size %d\n", + i, ps_mem_rec->u4_mem_size); + codec_exit(ac_error_str); + + } + total_size += ps_mem_rec->u4_mem_size; + ps_mem_rec++; + } + printf("\nTotal memory for codec %d\n", total_size); + } + /*****************************************************************************/ + /* API Call: Initialize the Decoder */ + /*****************************************************************************/ + { + ih264d_init_ip_t s_init_ip; + ih264d_init_op_t s_init_op; + void *fxns = &ivd_api_function; + iv_mem_rec_t *mem_tab; + + mem_tab = (iv_mem_rec_t*)pv_mem_rec_location; + s_init_ip.s_ivd_init_ip_t.e_cmd = (IVD_API_COMMAND_TYPE_T)IV_CMD_INIT; + s_init_ip.s_ivd_init_ip_t.pv_mem_rec_location = mem_tab; + s_init_ip.s_ivd_init_ip_t.u4_frm_max_wd = (s_app_ctx.max_wd == 0) ? MAX_FRAME_WIDTH : s_app_ctx.max_wd; + s_init_ip.s_ivd_init_ip_t.u4_frm_max_ht = (s_app_ctx.max_ht == 0) ? MAX_FRAME_HEIGHT : s_app_ctx.max_ht; + s_init_ip.i4_level = (s_app_ctx.max_level == 0) ? MAX_LEVEL_SUPPORTED : s_app_ctx.max_level; + s_init_ip.u4_num_ref_frames = MAX_REF_FRAMES; + s_init_ip.u4_num_reorder_frames = MAX_REORDER_FRAMES; + s_init_ip.u4_share_disp_buf = s_app_ctx.u4_share_disp_buf; + s_init_ip.u4_num_extra_disp_buf = EXTRA_DISP_BUFFERS; + s_init_ip.s_ivd_init_ip_t.u4_num_mem_rec = u4_num_mem_recs; + s_init_ip.s_ivd_init_ip_t.e_output_format = + (IV_COLOR_FORMAT_T)s_app_ctx.e_output_chroma_format; + s_init_ip.s_ivd_init_ip_t.u4_size = sizeof(ih264d_init_ip_t); + s_init_op.s_ivd_init_op_t.u4_size = sizeof(ih264d_init_op_t); + + codec_obj = (iv_obj_t*)mem_tab[0].pv_base; + codec_obj->pv_fxns = fxns; + codec_obj->u4_size = sizeof(iv_obj_t); + + s_app_ctx.cocodec_obj = codec_obj; + + ret = ivd_api_function((iv_obj_t*)codec_obj, (void *)&s_init_ip, + (void *)&s_init_op); + if(ret != IV_SUCCESS) + { + sprintf(ac_error_str, "Error in Init %8x\n", + s_init_op.s_ivd_init_op_t.u4_error_code); + codec_exit(ac_error_str); + } + + /*****************************************************************************/ + /* Input and output buffer allocation */ + /*****************************************************************************/ + { + + ivd_ctl_getbufinfo_ip_t s_ctl_ip; + ivd_ctl_getbufinfo_op_t s_ctl_op; + + s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_GETBUFINFO; + s_ctl_ip.u4_size = sizeof(ivd_ctl_getbufinfo_ip_t); + s_ctl_op.u4_size = sizeof(ivd_ctl_getbufinfo_op_t); + ret = ivd_api_function((iv_obj_t*)codec_obj, (void *)&s_ctl_ip, + (void *)&s_ctl_op); + if(ret != IV_SUCCESS) + { + sprintf(ac_error_str, "Error in Get Buf Info %x", s_ctl_op.u4_error_code); + codec_exit(ac_error_str); + } + + /* Allocate input buffer */ + u4_ip_buf_len = s_ctl_op.u4_min_in_buf_size[0]; + pu1_bs_buf = (UWORD8 *)malloc(u4_ip_buf_len); + + if(pu1_bs_buf == NULL) + { + sprintf(ac_error_str, + "\nAllocation failure for input buffer of i4_size %d", + u4_ip_buf_len); + codec_exit(ac_error_str); + } + s_app_ctx.num_disp_buf = s_ctl_op.u4_num_disp_bufs; + /* Allocate output buffer only if display buffers are not shared */ + /* Or if shared and output is 420P */ + if((0 == s_app_ctx.u4_share_disp_buf) || (IV_YUV_420P == s_app_ctx.e_output_chroma_format)) + { + UWORD32 outlen; + ps_out_buf->u4_min_out_buf_size[0] = + s_ctl_op.u4_min_out_buf_size[0]; + ps_out_buf->u4_min_out_buf_size[1] = + s_ctl_op.u4_min_out_buf_size[1]; + ps_out_buf->u4_min_out_buf_size[2] = + s_ctl_op.u4_min_out_buf_size[2]; + + outlen = s_ctl_op.u4_min_out_buf_size[0]; + if(s_ctl_op.u4_min_num_out_bufs > 1) + outlen += s_ctl_op.u4_min_out_buf_size[1]; + + if(s_ctl_op.u4_min_num_out_bufs > 2) + outlen += s_ctl_op.u4_min_out_buf_size[2]; + + ps_out_buf->pu1_bufs[0] = (UWORD8 *)malloc(outlen); + if(ps_out_buf->pu1_bufs[0] == NULL) + { + sprintf(ac_error_str, + "\nAllocation failure for output buffer of i4_size %d", + outlen); + codec_exit(ac_error_str); + } + + if(s_ctl_op.u4_min_num_out_bufs > 1) + ps_out_buf->pu1_bufs[1] = ps_out_buf->pu1_bufs[0] + + (s_ctl_op.u4_min_out_buf_size[0]); + + if(s_ctl_op.u4_min_num_out_bufs > 2) + ps_out_buf->pu1_bufs[2] = ps_out_buf->pu1_bufs[1] + + (s_ctl_op.u4_min_out_buf_size[1]); + + ps_out_buf->u4_num_bufs = s_ctl_op.u4_min_num_out_bufs; + } + + } + } + + } + + + /*************************************************************************/ + /* set num of cores */ + /*************************************************************************/ + { + + ih264d_ctl_set_num_cores_ip_t s_ctl_set_cores_ip; + ih264d_ctl_set_num_cores_op_t s_ctl_set_cores_op; + + s_ctl_set_cores_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_ctl_set_cores_ip.e_sub_cmd =(IVD_CONTROL_API_COMMAND_TYPE_T) IH264D_CMD_CTL_SET_NUM_CORES; + s_ctl_set_cores_ip.u4_num_cores = s_app_ctx.u4_num_cores; + s_ctl_set_cores_ip.u4_size = sizeof(ih264d_ctl_set_num_cores_ip_t); + s_ctl_set_cores_op.u4_size = sizeof(ih264d_ctl_set_num_cores_op_t); + + ret = ivd_api_function((iv_obj_t*)codec_obj, (void *)&s_ctl_set_cores_ip, + (void *)&s_ctl_set_cores_op); + if(ret != IV_SUCCESS) + { + sprintf(ac_error_str, "\nError in setting number of cores"); + codec_exit(ac_error_str); + } + + } + + /*************************************************************************/ + /* set processsor */ + /*************************************************************************/ + { + + ih264d_ctl_set_processor_ip_t s_ctl_set_num_processor_ip; + ih264d_ctl_set_processor_op_t s_ctl_set_num_processor_op; + + s_ctl_set_num_processor_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_ctl_set_num_processor_ip.e_sub_cmd =(IVD_CONTROL_API_COMMAND_TYPE_T) IH264D_CMD_CTL_SET_PROCESSOR; + s_ctl_set_num_processor_ip.u4_arch = s_app_ctx.e_arch; + s_ctl_set_num_processor_ip.u4_soc = s_app_ctx.e_soc; + s_ctl_set_num_processor_ip.u4_size = sizeof(ih264d_ctl_set_processor_ip_t); + s_ctl_set_num_processor_op.u4_size = sizeof(ih264d_ctl_set_processor_op_t); + + ret = ivd_api_function((iv_obj_t*)codec_obj, (void *)&s_ctl_set_num_processor_ip, + (void *)&s_ctl_set_num_processor_op); + if(ret != IV_SUCCESS) + { + sprintf(ac_error_str, "\nError in setting Processor type"); + codec_exit(ac_error_str); + } + + } + + + /*****************************************************************************/ + /* Decode header to get width and height and buffer sizes */ + /*****************************************************************************/ + { + + ivd_ctl_set_config_ip_t s_ctl_ip; + ivd_ctl_set_config_op_t s_ctl_op; + + ivd_video_decode_ip_t s_video_decode_ip; + ivd_video_decode_op_t s_video_decode_op; + + s_ctl_ip.u4_disp_wd = STRIDE; + if(1 == s_app_ctx.display) + s_ctl_ip.u4_disp_wd = s_app_ctx.get_stride(); + + s_ctl_ip.e_frm_skip_mode = IVD_SKIP_NONE; + s_ctl_ip.e_frm_out_mode = IVD_DISPLAY_FRAME_OUT; + s_ctl_ip.e_vid_dec_mode = IVD_DECODE_HEADER; + s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_SETPARAMS; + s_ctl_ip.u4_size = sizeof(ivd_ctl_set_config_ip_t); + s_ctl_op.u4_size = sizeof(ivd_ctl_set_config_op_t); + + ret = ivd_api_function((iv_obj_t*)codec_obj, (void *)&s_ctl_ip, + (void *)&s_ctl_op); + if(ret != IV_SUCCESS) + { + sprintf(ac_error_str, + "\nError in setting the codec in header decode mode"); + codec_exit(ac_error_str); + } + + do + { + WORD32 numbytes; + if(0 == s_app_ctx.u4_piclen_flag) + { + fseek(ps_ip_file, file_pos, SEEK_SET); + numbytes = u4_ip_buf_len; + } + else + { + WORD32 entries; + entries = fscanf(ps_piclen_file, "%d\n", &numbytes); + if(1 != entries) + numbytes = u4_ip_buf_len; + } + + u4_bytes_remaining = fread(pu1_bs_buf, sizeof(UWORD8), numbytes, + ps_ip_file); + + if(0 == u4_bytes_remaining) + { + sprintf(ac_error_str, "\nUnable to read from input file"); + codec_exit(ac_error_str); + } + + s_video_decode_ip.e_cmd = IVD_CMD_VIDEO_DECODE; + s_video_decode_ip.u4_ts = u4_ip_frm_ts; + s_video_decode_ip.pv_stream_buffer = pu1_bs_buf; + s_video_decode_ip.u4_num_Bytes = u4_bytes_remaining; + s_video_decode_ip.u4_size = sizeof(ivd_video_decode_ip_t); + s_video_decode_op.u4_size = sizeof(ivd_video_decode_op_t); + + /*****************************************************************************/ + /* API Call: Header Decode */ + /*****************************************************************************/ + ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_video_decode_ip, + (void *)&s_video_decode_op); + + if(ret != IV_SUCCESS) + { + printf("Error in header decode %x\n", s_video_decode_op.u4_error_code); + // codec_exit(ac_error_str); + } + + u4_num_bytes_dec = s_video_decode_op.u4_num_bytes_consumed; +#ifndef PROFILE_ENABLE + printf("%d\n",s_video_decode_op.u4_num_bytes_consumed); +#endif + file_pos += u4_num_bytes_dec; + total_bytes_comsumed += u4_num_bytes_dec; + }while(ret != IV_SUCCESS); + + /* copy pic_wd and pic_ht to initialize buffers */ + s_app_ctx.u4_pic_wd = s_video_decode_op.u4_pic_wd; + s_app_ctx.u4_pic_ht = s_video_decode_op.u4_pic_ht; + +#if IOS_DISPLAY + s_app_ctx.i4_screen_wd = screen_wd; + s_app_ctx.i4_screen_ht = screen_ht; +#endif + + /* Create display thread and wait for the display buffers to be initialized */ + if(1 == s_app_ctx.display) + { + if(0 == s_app_ctx.display_thread_created) + { + s_app_ctx.display_init_done = 0; + ithread_create(s_app_ctx.display_thread_handle, NULL, + (void *) &display_thread, (void *) &s_app_ctx); + s_app_ctx.display_thread_created = 1; + + while(1) + { + if(s_app_ctx.display_init_done) + break; + + ithread_msleep(1); + } + } + + s_app_ctx.u4_strd = s_app_ctx.get_stride(); + } + } + + /*************************************************************************/ + /* Get actual number of output buffers requried, which is dependent */ + /* on ps_bitstrm properties such as width, height and level etc */ + /* This is needed mainly for shared display mode */ + /*************************************************************************/ + //if(1 == s_app_ctx.u4_share_disp_buf) + { + ivd_ctl_getbufinfo_ip_t s_ctl_ip; + ivd_ctl_getbufinfo_op_t s_ctl_op; + WORD32 outlen = 0; + + s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_GETBUFINFO; + s_ctl_ip.u4_size = sizeof(ivd_ctl_getbufinfo_ip_t); + s_ctl_op.u4_size = sizeof(ivd_ctl_getbufinfo_op_t); + ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_ip, + (void *)&s_ctl_op); + if(ret != IV_SUCCESS) + { + sprintf(ac_error_str, "Error in Get Buf Info %x", s_ctl_op.u4_error_code); + codec_exit(ac_error_str); + } + +#ifdef APP_EXTRA_BUFS + s_app_ctx.disp_delay = EXTRA_DISP_BUFFERS; + s_ctl_op.u4_num_disp_bufs += EXTRA_DISP_BUFFERS; +#endif + + /*****************************************************************************/ + /* API Call: Allocate display buffers for display buffer shared case */ + /*****************************************************************************/ + + for(i = 0; i < s_ctl_op.u4_num_disp_bufs; i++) + { + + s_app_ctx.s_disp_buffers[i].u4_min_out_buf_size[0] = + s_ctl_op.u4_min_out_buf_size[0]; + s_app_ctx.s_disp_buffers[i].u4_min_out_buf_size[1] = + s_ctl_op.u4_min_out_buf_size[1]; + s_app_ctx.s_disp_buffers[i].u4_min_out_buf_size[2] = + s_ctl_op.u4_min_out_buf_size[2]; + + outlen = s_ctl_op.u4_min_out_buf_size[0]; + if(s_ctl_op.u4_min_num_out_bufs > 1) + outlen += s_ctl_op.u4_min_out_buf_size[1]; + + if(s_ctl_op.u4_min_num_out_bufs > 2) + outlen += s_ctl_op.u4_min_out_buf_size[2]; + + s_app_ctx.s_disp_buffers[i].pu1_bufs[0] = (UWORD8 *)malloc(outlen); + + if(s_app_ctx.s_disp_buffers[i].pu1_bufs[0] == NULL) + { + sprintf(ac_error_str, + "\nAllocation failure for output buffer of i4_size %d", + outlen); + codec_exit(ac_error_str); + } + + if(s_ctl_op.u4_min_num_out_bufs > 1) + s_app_ctx.s_disp_buffers[i].pu1_bufs[1] = + s_app_ctx.s_disp_buffers[i].pu1_bufs[0] + + (s_ctl_op.u4_min_out_buf_size[0]); + + if(s_ctl_op.u4_min_num_out_bufs > 2) + s_app_ctx.s_disp_buffers[i].pu1_bufs[2] = + s_app_ctx.s_disp_buffers[i].pu1_bufs[1] + + (s_ctl_op.u4_min_out_buf_size[1]); + + s_app_ctx.s_disp_buffers[i].u4_num_bufs = + s_ctl_op.u4_min_num_out_bufs; + } + s_app_ctx.num_disp_buf = s_ctl_op.u4_num_disp_bufs; + + /*****************************************************************************/ + /* API Call: Send the allocated display buffers to codec */ + /*****************************************************************************/ + { + ivd_set_display_frame_ip_t s_set_display_frame_ip; + ivd_set_display_frame_op_t s_set_display_frame_op; + + s_set_display_frame_ip.e_cmd = IVD_CMD_SET_DISPLAY_FRAME; + s_set_display_frame_ip.u4_size = sizeof(ivd_set_display_frame_ip_t); + s_set_display_frame_op.u4_size = sizeof(ivd_set_display_frame_op_t); + + s_set_display_frame_ip.num_disp_bufs = s_app_ctx.num_disp_buf; + + memcpy(&(s_set_display_frame_ip.s_disp_buffer), + &(s_app_ctx.s_disp_buffers), + s_ctl_op.u4_num_disp_bufs * sizeof(ivd_out_bufdesc_t)); + + ret = ivd_api_function((iv_obj_t *)codec_obj, + (void *)&s_set_display_frame_ip, + (void *)&s_set_display_frame_op); + + if(IV_SUCCESS != ret) + { + sprintf(ac_error_str, "Error in Set display frame"); + codec_exit(ac_error_str); + } + + } + + } + + /*************************************************************************/ + /* Get frame dimensions for display buffers such as x_offset,y_offset */ + /* etc. This information might be needed to set display buffer */ + /* offsets in case of shared display buffer mode */ + /*************************************************************************/ + { + + ih264d_ctl_get_frame_dimensions_ip_t s_ctl_get_frame_dimensions_ip; + ih264d_ctl_get_frame_dimensions_op_t s_ctl_get_frame_dimensions_op; + + s_ctl_get_frame_dimensions_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_ctl_get_frame_dimensions_ip.e_sub_cmd = + (IVD_CONTROL_API_COMMAND_TYPE_T)IH264D_CMD_CTL_GET_BUFFER_DIMENSIONS; + s_ctl_get_frame_dimensions_ip.u4_size = + sizeof(ih264d_ctl_get_frame_dimensions_ip_t); + s_ctl_get_frame_dimensions_op.u4_size = + sizeof(ih264d_ctl_get_frame_dimensions_op_t); + + ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_get_frame_dimensions_ip, + (void *)&s_ctl_get_frame_dimensions_op); + if(IV_SUCCESS != ret) + { + sprintf(ac_error_str, "Error in Get buffer Dimensions"); + codec_exit(ac_error_str); + } + +/* + printf("Frame offsets due to padding\n"); + printf("s_ctl_get_frame_dimensions_op.x_offset[0] %d s_ctl_get_frame_dimensions_op.y_offset[0] %d\n", + s_ctl_get_frame_dimensions_op.u4_x_offset[0], + s_ctl_get_frame_dimensions_op.u4_y_offset[0]); +*/ + } + + + + /*************************************************************************/ + /* Set the decoder in frame decode mode. It was set in header decode */ + /* mode earlier */ + /*************************************************************************/ + { + + ivd_ctl_set_config_ip_t s_ctl_ip; + ivd_ctl_set_config_op_t s_ctl_op; + + s_ctl_ip.u4_disp_wd = STRIDE; + if(1 == s_app_ctx.display) + s_ctl_ip.u4_disp_wd = s_app_ctx.get_stride(); + s_ctl_ip.e_frm_skip_mode = IVD_SKIP_NONE; + + s_ctl_ip.e_frm_out_mode = IVD_DISPLAY_FRAME_OUT; + s_ctl_ip.e_vid_dec_mode = IVD_DECODE_FRAME; + s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_SETPARAMS; + s_ctl_ip.u4_size = sizeof(ivd_ctl_set_config_ip_t); + + s_ctl_op.u4_size = sizeof(ivd_ctl_set_config_op_t); + + ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_ip, (void *)&s_ctl_op); + + if(IV_SUCCESS != ret) + { + sprintf(ac_error_str, "Error in Set Parameters"); + //codec_exit(ac_error_str); + } + + } + /*************************************************************************/ + /* If required disable deblocking and sao at given level */ + /*************************************************************************/ + + set_degrade(codec_obj, s_app_ctx.i4_degrade_type, s_app_ctx.i4_degrade_pics); +#ifdef WINDOWS_TIMER + QueryPerformanceFrequency ( &frequency); +#endif +#ifndef PRINT_PICSIZE + get_version(codec_obj); +#endif + max_op_frm_ts = (s_app_ctx.u4_max_frm_ts > 0)? (s_app_ctx.u4_max_frm_ts + s_app_ctx.disp_delay): 0xffffffff; + while(u4_op_frm_ts < max_op_frm_ts) + { + +#ifdef TEST_FLUSH + if(u4_ip_frm_ts == FLUSH_FRM_CNT) + { + ivd_ctl_flush_ip_t s_ctl_ip; + ivd_ctl_flush_op_t s_ctl_op; + + s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_FLUSH; + s_ctl_ip.u4_size = sizeof(ivd_ctl_flush_ip_t); + s_ctl_op.u4_size = sizeof(ivd_ctl_flush_op_t); + ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_ip, + (void *)&s_ctl_op); + + if(ret != IV_SUCCESS) + { + printf("Error in Setting the decoder in flush mode\n"); + } +// file_pos = 0; + +// fseek(ps_ip_file, file_pos, SEEK_SET); + + } +#endif + if(u4_ip_frm_ts < s_app_ctx.num_disp_buf) + { + release_disp_frame(codec_obj, u4_ip_frm_ts); + } + + + /*************************************************************************/ + /* set num of cores */ + /*************************************************************************/ +#ifdef DYNAMIC_NUMCORES + { + + ih264d_ctl_set_num_cores_ip_t s_ctl_set_cores_ip; + ih264d_ctl_set_num_cores_op_t s_ctl_set_cores_op; + + s_ctl_set_cores_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_ctl_set_cores_ip.e_sub_cmd = IH264D_CMD_CTL_SET_NUM_CORES; + s_ctl_set_cores_ip.u4_num_cores = 1 + 3 * (u4_ip_frm_ts % 2); + s_ctl_set_cores_ip.u4_size = sizeof(ih264d_ctl_set_num_cores_ip_t); + s_ctl_set_cores_op.u4_size = sizeof(ih264d_ctl_set_num_cores_op_t); + + ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_set_cores_ip, + (void *)&s_ctl_set_cores_op); + if(ret != IV_SUCCESS) + { + sprintf(ac_error_str, "\nError in setting number of cores"); + codec_exit(ac_error_str); + } + + } +#endif + /***********************************************************************/ + /* Seek the file to start of current frame, this is equavelent of */ + /* having a parcer which tells the start of current frame */ + /***********************************************************************/ + { + WORD32 numbytes; + + if(0 == s_app_ctx.u4_piclen_flag) + { + fseek(ps_ip_file, file_pos, SEEK_SET); + numbytes = u4_ip_buf_len; + } + else + { + WORD32 entries; + entries = fscanf(ps_piclen_file, "%d\n", &numbytes); + if(1 != entries) + numbytes = u4_ip_buf_len; + } + + u4_bytes_remaining = fread(pu1_bs_buf, sizeof(UWORD8), + numbytes, ps_ip_file); + + if(u4_bytes_remaining == 0) + { + if(1 == s_app_ctx.loopback) + { + file_pos = 0; + if(0 == s_app_ctx.u4_piclen_flag) + { + fseek(ps_ip_file, file_pos, SEEK_SET); + numbytes = u4_ip_buf_len; + } + else + { + WORD32 entries; + entries = fscanf(ps_piclen_file, "%d\n", &numbytes); + if(1 != entries) + numbytes = u4_ip_buf_len; + } + + + u4_bytes_remaining = fread(pu1_bs_buf, sizeof(UWORD8), + numbytes, ps_ip_file); + } + else + break; + } + } + + /*********************************************************************/ + /* Following calls can be enabled at diffent times */ + /*********************************************************************/ +#if ENABLE_DEGRADE + if(u4_op_frm_ts >= 10000) + disable_deblocking(codec_obj, 4); + + if(u4_op_frm_ts == 30000) + enable_deblocking(codec_obj); + + if(u4_op_frm_ts == 10000) + enable_skippb_frames(codec_obj); + + if(u4_op_frm_ts == 60000) + disable_skippb_frames(codec_obj); + + if(u4_op_frm_ts == 30000) + enable_skipb_frames(codec_obj); + + if(u4_op_frm_ts == 60000) + disable_skipb_frames(codec_obj); +#endif + + + { + ivd_video_decode_ip_t s_video_decode_ip; + ivd_video_decode_op_t s_video_decode_op; +#ifdef PROFILE_ENABLE + UWORD32 s_elapsed_time; + TIMER s_start_timer; + TIMER s_end_timer; +#endif + + + s_video_decode_ip.e_cmd = IVD_CMD_VIDEO_DECODE; + s_video_decode_ip.u4_ts = u4_ip_frm_ts; + s_video_decode_ip.pv_stream_buffer = pu1_bs_buf; + s_video_decode_ip.u4_num_Bytes = u4_bytes_remaining; + s_video_decode_ip.u4_size = sizeof(ivd_video_decode_ip_t); + s_video_decode_ip.s_out_buffer.u4_min_out_buf_size[0] = + ps_out_buf->u4_min_out_buf_size[0]; + s_video_decode_ip.s_out_buffer.u4_min_out_buf_size[1] = + ps_out_buf->u4_min_out_buf_size[1]; + s_video_decode_ip.s_out_buffer.u4_min_out_buf_size[2] = + ps_out_buf->u4_min_out_buf_size[2]; + + s_video_decode_ip.s_out_buffer.pu1_bufs[0] = + ps_out_buf->pu1_bufs[0]; + s_video_decode_ip.s_out_buffer.pu1_bufs[1] = + ps_out_buf->pu1_bufs[1]; + s_video_decode_ip.s_out_buffer.pu1_bufs[2] = + ps_out_buf->pu1_bufs[2]; + s_video_decode_ip.s_out_buffer.u4_num_bufs = + ps_out_buf->u4_num_bufs; + s_video_decode_op.u4_size = sizeof(ivd_video_decode_op_t); + + /* Get display buffer pointers */ + if(1 == s_app_ctx.display) + { + WORD32 wr_idx; + + wr_idx = dispq_producer_dequeue(&s_app_ctx); + + if(s_app_ctx.quit) + break; + + s_app_ctx.set_disp_buffers(s_app_ctx.pv_disp_ctx, wr_idx, + &s_video_decode_ip.s_out_buffer.pu1_bufs[0], + &s_video_decode_ip.s_out_buffer.pu1_bufs[1], + &s_video_decode_ip.s_out_buffer.pu1_bufs[2]); + } + + /*****************************************************************************/ + /* API Call: Video Decode */ + /*****************************************************************************/ + + GETTIME(&s_start_timer); + + ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_video_decode_ip, + (void *)&s_video_decode_op); + + + GETTIME(&s_end_timer); + ELAPSEDTIME(s_start_timer,s_end_timer,s_elapsed_time,frequency); +#ifdef PROFILE_ENABLE + { + UWORD32 peak_avg, id; + u4_tot_cycles += s_elapsed_time; + peak_window[peak_window_idx++] = s_elapsed_time; + if(peak_window_idx == PEAK_WINDOW_SIZE) + peak_window_idx = 0; + peak_avg = 0; + for(id = 0; id < PEAK_WINDOW_SIZE; id++) + { + peak_avg += peak_window[id]; + } + peak_avg /= PEAK_WINDOW_SIZE; + if(peak_avg > peak_avg_max) + peak_avg_max = peak_avg; + frm_cnt++; + + printf("FrameNum: %4d TimeTaken(microsec): %6d AvgTime: %6d PeakAvgTimeMax: %6d Output: %2d NumBytes: %6d \n", + frm_cnt, s_elapsed_time, u4_tot_cycles / frm_cnt, peak_avg_max, s_video_decode_op.u4_output_present, s_video_decode_op.u4_num_bytes_consumed); + + } +#ifdef INTEL_CE5300 + time_consumed += s_elapsed_time; + bytes_consumed += s_video_decode_op.u4_num_bytes_consumed; + if (!(frm_cnt % (s_app_ctx.fps))) + { + time_consumed = time_consumed/s_app_ctx.fps; + printf("Average decode time(micro sec) for the last second = %6d\n",time_consumed); + printf("Average bitrate(kb) for the last second = %6d\n",(bytes_consumed * 8) / 1024); + time_consumed = 0; + bytes_consumed = 0; + + } +#endif +#else + printf("%d\n",s_video_decode_op.u4_num_bytes_consumed); +#endif + + if(ret != IV_SUCCESS) + { + printf("Error in video Frame decode : ret %x Error %x\n", ret, + s_video_decode_op.u4_error_code); + } + + if((IV_SUCCESS != ret) && + ((s_video_decode_op.u4_error_code & 0xFF) == IVD_RES_CHANGED)) + { + ivd_ctl_reset_ip_t s_ctl_ip; + ivd_ctl_reset_op_t s_ctl_op; + + flush_output(codec_obj, &s_app_ctx, ps_out_buf, + pu1_bs_buf, &u4_op_frm_ts, + ps_op_file, ps_op_chksum_file, + u4_ip_frm_ts, u4_bytes_remaining); + + s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_RESET; + s_ctl_ip.u4_size = sizeof(ivd_ctl_reset_ip_t); + s_ctl_op.u4_size = sizeof(ivd_ctl_reset_op_t); + + ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_ip, + (void *)&s_ctl_op); + if(IV_SUCCESS != ret) + { + sprintf(ac_error_str, "Error in Reset"); + codec_exit(ac_error_str); + } + /*************************************************************************/ + /* set num of cores */ + /*************************************************************************/ + { + + ih264d_ctl_set_num_cores_ip_t s_ctl_set_cores_ip; + ih264d_ctl_set_num_cores_op_t s_ctl_set_cores_op; + + s_ctl_set_cores_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_ctl_set_cores_ip.e_sub_cmd =(IVD_CONTROL_API_COMMAND_TYPE_T) IH264D_CMD_CTL_SET_NUM_CORES; + s_ctl_set_cores_ip.u4_num_cores = s_app_ctx.u4_num_cores; + s_ctl_set_cores_ip.u4_size = sizeof(ih264d_ctl_set_num_cores_ip_t); + s_ctl_set_cores_op.u4_size = sizeof(ih264d_ctl_set_num_cores_op_t); + + ret = ivd_api_function((iv_obj_t*)codec_obj, (void *)&s_ctl_set_cores_ip, + (void *)&s_ctl_set_cores_op); + if(ret != IV_SUCCESS) + { + sprintf(ac_error_str, "\nError in setting number of cores"); + codec_exit(ac_error_str); + } + + } + /*************************************************************************/ + /* set processsor */ + /*************************************************************************/ + + { + + ih264d_ctl_set_processor_ip_t s_ctl_set_num_processor_ip; + ih264d_ctl_set_processor_op_t s_ctl_set_num_processor_op; + + s_ctl_set_num_processor_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_ctl_set_num_processor_ip.e_sub_cmd =(IVD_CONTROL_API_COMMAND_TYPE_T) IH264D_CMD_CTL_SET_PROCESSOR; + s_ctl_set_num_processor_ip.u4_arch = s_app_ctx.e_arch; + s_ctl_set_num_processor_ip.u4_soc = s_app_ctx.e_soc; + s_ctl_set_num_processor_ip.u4_size = sizeof(ih264d_ctl_set_processor_ip_t); + s_ctl_set_num_processor_op.u4_size = sizeof(ih264d_ctl_set_processor_op_t); + + ret = ivd_api_function((iv_obj_t*)codec_obj, (void *)&s_ctl_set_num_processor_ip, + (void *)&s_ctl_set_num_processor_op); + if(ret != IV_SUCCESS) + { + sprintf(ac_error_str, "\nError in setting Processor type"); + codec_exit(ac_error_str); + } + + } + + } + + + if((1 == s_app_ctx.display) && + (1 == s_video_decode_op.u4_output_present)) + { + dispq_producer_queue(&s_app_ctx); + } + + if(IV_B_FRAME == s_video_decode_op.e_pic_type) + s_app_ctx.b_pic_present |= 1; + + u4_num_bytes_dec = s_video_decode_op.u4_num_bytes_consumed; + + file_pos += u4_num_bytes_dec; + total_bytes_comsumed += u4_num_bytes_dec; + u4_ip_frm_ts++; + + + if(1 == s_video_decode_op.u4_output_present) + { + + CHAR cur_fname[1000]; + CHAR *extn = NULL; + /* The objective is to dump the decoded frames into separate files instead of + * dumping all the frames in one common file. Also, the number of dumped frames + * at any given instance of time cannot exceed 'frame_memory' + */ + if(s_app_ctx.u4_file_save_flag) + { + /* Locate the position of extension yuv */ + extn = strstr(s_app_ctx.ac_op_fname,"%d"); + if (extn != NULL) + { + output_write_stall(s_app_ctx.ac_op_fname,u4_op_frm_ts); + /* Generate output file names */ + sprintf(cur_fname,s_app_ctx.ac_op_fname,u4_op_frm_ts); + /* Open Output file */ + ps_op_file = fopen(cur_fname,"wb"); + if (NULL == ps_op_file) + { + sprintf(ac_error_str, "Could not open output file %s", + cur_fname); + + codec_exit(ac_error_str); + } + } + } + + width = s_video_decode_op.s_disp_frm_buf.u4_y_wd; + height = s_video_decode_op.s_disp_frm_buf.u4_y_ht; + dump_output(&s_app_ctx, &(s_video_decode_op.s_disp_frm_buf), + s_video_decode_op.u4_disp_buf_id, ps_op_file, + ps_op_chksum_file, + u4_op_frm_ts, s_app_ctx.u4_file_save_flag, + s_app_ctx.u4_chksum_save_flag); + + u4_op_frm_ts++; + if (extn != NULL) + fclose(ps_op_file); + + } + else + { + if((s_video_decode_op.u4_error_code >> IVD_FATALERROR) & 1) + { + printf("Fatal error\n"); + break; + } + } + + } + } + + /***********************************************************************/ + /* To get the last decoded frames, call process with NULL input */ + /***********************************************************************/ + flush_output(codec_obj, &s_app_ctx, ps_out_buf, + pu1_bs_buf, &u4_op_frm_ts, + ps_op_file, ps_op_chksum_file, + u4_ip_frm_ts, u4_bytes_remaining); + + /* set disp_end u4_flag */ + s_app_ctx.quit = 1; + + +#ifdef PROFILE_ENABLE + printf("Summary\n"); + printf("Input filename : %s\n", s_app_ctx.ac_ip_fname); + printf("Output Width : %-4d\n", width); + printf("Output Height : %-4d\n", height); + + if(frm_cnt) + { + double avg = u4_tot_cycles / frm_cnt; + double bytes_avg = total_bytes_comsumed / frm_cnt; + double bitrate = (bytes_avg * 8 * s_app_ctx.fps)/1000000; + printf("Bitrate @ %2d fps(mbps) : %-6.2f\n", s_app_ctx.fps, bitrate); + printf("Average decode time(micro sec) : %-6d\n", (WORD32)avg); + printf("Avg Peak decode time(%2d frames) : %-6d\n", PEAK_WINDOW_SIZE, (WORD32)peak_avg_max); + avg = (u4_tot_cycles + u4_tot_fmt_cycles)* 1.0 / frm_cnt; + + if(0 == s_app_ctx.u4_share_disp_buf) + printf("FPS achieved (with format conv) : %-3.2f\n", 1000000/avg); + else + printf("FPS achieved : %-3.2f\n", 1000000/avg); + } +#endif + /***********************************************************************/ + /* Clear the decoder, close all the files, free all the memory */ + /***********************************************************************/ + if(1 == s_app_ctx.display) + { + s_app_ctx.display_deinit_flag = 1; + /* wait for display to finish */ + if(s_app_ctx.display_thread_created) + { + ithread_join(s_app_ctx.display_thread_handle, NULL); + } + free(s_app_ctx.display_thread_handle); + } + + { + iv_retrieve_mem_rec_ip_t s_retrieve_dec_ip; + iv_retrieve_mem_rec_op_t s_retrieve_dec_op; + s_retrieve_dec_ip.pv_mem_rec_location = (iv_mem_rec_t *)pv_mem_rec_location; + + s_retrieve_dec_ip.e_cmd = IV_CMD_RETRIEVE_MEMREC; + s_retrieve_dec_ip.u4_size = sizeof(iv_retrieve_mem_rec_ip_t); + s_retrieve_dec_op.u4_size = sizeof(iv_retrieve_mem_rec_op_t); + + ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_retrieve_dec_ip, + (void *)&s_retrieve_dec_op); + + if(IV_SUCCESS != ret) + { + sprintf(ac_error_str, "Error in Retrieve Memrec"); + codec_exit(ac_error_str); + } + + { + iv_mem_rec_t *ps_mem_rec; + UWORD16 u2_i; + + u4_num_mem_recs = s_retrieve_dec_op.u4_num_mem_rec_filled; + + ps_mem_rec = s_retrieve_dec_ip.pv_mem_rec_location; + + for(u2_i = 0; u2_i < u4_num_mem_recs; u2_i++) + { + ih264a_aligned_free(ps_mem_rec->pv_base); + ps_mem_rec++; + } + free(s_retrieve_dec_ip.pv_mem_rec_location); + } + + } + /***********************************************************************/ + /* Close all the files and free all the memory */ + /***********************************************************************/ + { + fclose(ps_ip_file); + + if((1 == s_app_ctx.u4_file_save_flag) && (strstr(s_app_ctx.ac_op_fname,"%d") == NULL)) + { + fclose(ps_op_file); + } + if(1 == s_app_ctx.u4_chksum_save_flag) + { + fclose(ps_op_chksum_file); + } + + } + + if(0 == s_app_ctx.u4_share_disp_buf) + { + free(ps_out_buf->pu1_bufs[0]); + } + + for(i = 0; i < s_app_ctx.num_disp_buf; i++) + { + free(s_app_ctx.s_disp_buffers[i].pu1_bufs[0]); + } + + free(ps_out_buf); + free(pu1_bs_buf); + + return (0); +} diff --git a/test/encoder.mk b/test/encoder.mk new file mode 100755 index 0000000..9a0980e --- /dev/null +++ b/test/encoder.mk @@ -0,0 +1,13 @@ +LOCAL_PATH := $(call my-dir) + +include $(CLEAR_VARS) + +LOCAL_MODULE := avcenc +LOCAL_MODULE_TAGS := optional + +LOCAL_CFLAGS := -DPROFILE_ENABLE -DARM -DMD5_DISABLE -fPIC -pie +LOCAL_C_INCLUDES += $(LOCAL_PATH)/../encoder $(LOCAL_PATH)/../common $(LOCAL_PATH)/encoder/ +LOCAL_SRC_FILES := encoder/main.c encoder/psnr.c encoder/input.c encoder/output.c encoder/recon.c +LOCAL_STATIC_LIBRARIES := libavcenc + +include $(BUILD_EXECUTABLE) diff --git a/test/encoder/app.h b/test/encoder/app.h new file mode 100755 index 0000000..8c409b8 --- /dev/null +++ b/test/encoder/app.h @@ -0,0 +1,348 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*****************************************************************************/ +/* */ +/* File Name : app.h */ +/* */ +/* Description : This file contains all the necessary structure and */ +/* enumeration definitions needed for the Application */ +/* */ +/* List of Functions : */ +/* */ +/* Issues / Problems : None */ +/* */ +/* Revision History : */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 26 08 2010 Ittiam Draft */ +/* */ +/*****************************************************************************/ + +#ifndef _APP_H_ +#define _APP_H_ + +#include "iv2.h" +#include "ive2.h" +#ifdef WINDOWS_TIMER +#include <windows.h> +#endif +/*****************************************************************************/ +/* Function Macros */ +/*****************************************************************************/ +#define MAX(a, b) ((a) > (b))? (a) : (b) +#define MIN(a, b) ((a) < (b))? (a) : (b) + +#define ALIGN16(x) ((((x) + 15) >> 4) << 4) + +/*****************************************************************************/ +/* Constant Macros */ +/*****************************************************************************/ + +#define DEFAULT_NUM_INPUT_BUFS 1 +#define DEFAULT_MAX_INPUT_BUFS 32 + +#define DEFAULT_NUM_OUTPUT_BUFS 1 +#define DEFAULT_MAX_OUTPUT_BUFS 32 + +#define DEFAULT_NUM_RECON_BUFS 1 +#define DEFAULT_MAX_RECON_BUFS DEFAULT_NUM_RECON_BUFS + + +#define LEN_STATUS_BUFFER (10 * 1024) +#define MAX_VBV_BUFF_SIZE (120 * 16384) +#define MAX_NUM_IO_BUFS 3 + +#define DEFAULT_MAX_REF_FRM 1 +#define DEFAULT_MAX_REORDER_FRM 0 +#define DEFAULT_QP_MIN 0 +#define DEFAULT_QP_MAX 51 +#define DEFAULT_MAX_BITRATE 20000000 +#define DEFAULT_MAX_SRCH_RANGE_X 256 +#define DEFAULT_MAX_SRCH_RANGE_Y 256 +#define DEFAULT_MAX_FRAMERATE 120000 +#define DEFAULT_NUM_CORES 1 +#define DEFAULT_NUM_CORES_PRE_ENC 0 +#define DEFAULT_FPS 30 +#define DEFAULT_ENC_SPEED 100 + +#define DEFAULT_MEM_REC_CNT 0 +#define DEFAULT_RECON_ENABLE 0 +#define DEFAULT_CHKSUM_ENABLE 0 +#define DEFAULT_START_FRM 0 +#define DEFAULT_NUM_FRMS 0xFFFFFFFF +#define DEFAULT_INP_COLOR_FMT IV_YUV_420SP_UV +#define DEFAULT_RECON_COLOR_FMT IV_YUV_420P +#define DEFAULT_LOOPBACK 0 +#define DEFAULT_SRC_FRAME_RATE 30 +#define DEFAULT_TGT_FRAME_RATE 30 +#define DEFAULT_MAX_WD 1920 +#define DEFAULT_MAX_HT 1920 +#define DEFAULT_MAX_LEVEL 50 +#define DEFAULT_STRIDE 0 +#define DEFAULT_WD 0 +#define DEFAULT_HT 0 +#define DEFAULT_PSNR_ENABLE 0 +#define DEFAULT_ME_SPEED 100 +#define DEFAULT_ENABLE_FAST_SAD 0 +#define DEFAULT_ENABLE_ALT_REF 0 +#define DEFAULT_RC 1 +#define DEFAULT_BITRATE 6000000 +#define DEFAULT_I_QP 25 +#define DEFAULT_I_QP_MAX DEFAULT_QP_MAX +#define DEFAULT_I_QP_MIN 0 +#define DEFAULT_P_QP 28 +#define DEFAULT_P_QP_MAX DEFAULT_QP_MAX +#define DEFAULT_P_QP_MIN 0 +#define DEFAULT_B_QP 28 +#define DEFAULT_B_QP_MAX DEFAULT_QP_MAX +#define DEFAULT_B_QP_MIN 0 +#define DEFAULT_AIR 0 +#define DEFAULT_AIR_REFRESH_PERIOD 30 +#define DEFAULT_SRCH_RNG_X 64 +#define DEFAULT_SRCH_RNG_Y 48 +#define DEFAULT_I_INTERVAL 30 +#define DEFAULT_IDR_INTERVAL 1000 +#define DEFAULT_B_FRAMES 0 +#define DEFAULT_DISABLE_DEBLK_LEVEL 0 +#define DEFAULT_HPEL 1 +#define DEFAULT_QPEL 1 +#define DEFAULT_I4 1 +#define DEFAULT_EPROFILE IV_PROFILE_BASE +#define DEFAULT_SLICE_MODE 0 +#define DEFAULT_SLICE_PARAM 256 +#define DEFAULT_ARCH ARCH_ARM_A9Q +#define STRLENGTH 500 + + +/*****************************************************************************/ +/* profile Macros */ +/*****************************************************************************/ +#ifdef PROFILE_ENABLE + #ifdef WINDOWS_TIMER + typedef LARGE_INTEGER TIMER; + #else + //#ifdef X86_MINGW + typedef struct timeval TIMER; + //#endif + #endif +#endif + +#ifdef PROFILE_ENABLE + #ifdef WINDOWS_TIMER + #define GETTIME(timer) QueryPerformanceCounter(timer); + #else + //#ifdef X86_MINGW + #define GETTIME(timer) gettimeofday(timer,NULL); + //#endif + #endif + + #ifdef WINDOWS_TIMER + #define ELAPSEDTIME(s_start_timer,s_end_timer, s_elapsed_time, frequency) \ + { \ + TIMER s_temp_time; \ + s_temp_time.LowPart = s_end_timer.LowPart - s_start_timer.LowPart ; \ + s_elapsed_time = (UWORD32) ( ((DOUBLE)s_temp_time.LowPart / (DOUBLE)frequency.LowPart ) * 1000000); \ + } + #else + //#ifdef X86_MINGW + #define ELAPSEDTIME(s_start_timer,s_end_timer, s_elapsed_time, frequency) \ + s_elapsed_time = ((s_end_timer.tv_sec - s_start_timer.tv_sec) * 1000000) + (s_end_timer.tv_usec - s_start_timer.tv_usec); + //#endif + #endif + +#else + #define GETTIME(timer) + #define ELAPSEDTIME(s_start_timer,s_end_timer, s_elapsed_time, frequency) +#endif + + +/*****************************************************************************/ +/* Structure definitions */ +/*****************************************************************************/ +typedef struct +{ + UWORD8 *pu1_buf; + UWORD32 u4_buf_size; + UWORD32 u4_timestamp_low; + UWORD32 u4_timestamp_high; + UWORD32 u4_is_free; + void *pv_mb_info; + void *pv_pic_info; +}input_buf_t; + +typedef struct +{ + UWORD8 *pu1_buf; + UWORD32 u4_buf_size; + UWORD32 u4_timestamp_low; + UWORD32 u4_timestamp_high; + UWORD32 u4_is_free; +}output_buf_t; + +typedef struct +{ + UWORD8 *pu1_buf; + UWORD32 u4_buf_size; + UWORD32 u4_timestamp_low; + UWORD32 u4_timestamp_high; + UWORD32 u4_is_free; +}recon_buf_t; + +typedef struct +{ + iv_obj_t *ps_enc; + iv_mem_rec_t *ps_mem_rec; + UWORD32 u4_num_mem_rec; + UWORD32 u4_recon_enable; + UWORD32 u4_chksum_enable; + UWORD32 u4_mb_info_type; + UWORD32 u4_pic_info_type; + UWORD32 u4_mb_info_size; + UWORD32 u4_pic_info_size; + UWORD32 u4_start_frm; + UWORD32 u4_max_num_frms; + UWORD32 u4_total_bytes; + UWORD32 u4_pics_cnt; + IV_COLOR_FORMAT_T e_inp_color_fmt; + IV_COLOR_FORMAT_T e_recon_color_fmt; + IV_ARCH_T e_arch; + IV_SOC_T e_soc; + + WORD32 header_generated; + void *pv_codec_obj; + + UWORD32 u4_num_cores; + UWORD32 u4_pre_enc_me; + UWORD32 u4_pre_enc_ipe; + CHAR ac_ip_fname[STRLENGTH]; + CHAR ac_op_fname[STRLENGTH]; + CHAR ac_recon_fname[STRLENGTH]; + CHAR ac_chksum_fname[STRLENGTH]; + CHAR ac_mb_info_fname[STRLENGTH]; + CHAR ac_pic_info_fname[STRLENGTH]; + + + FILE *fp_ip; + FILE *fp_op; + FILE *fp_recon; + FILE *fp_chksum; + FILE *fp_psnr_ip; + FILE *fp_mb_info; + FILE *fp_pic_info; + FILE *fp_dump_op; + + + UWORD32 u4_loopback; + UWORD32 u4_max_frame_rate; + UWORD32 u4_src_frame_rate; + UWORD32 u4_tgt_frame_rate; + UWORD32 u4_max_wd; + UWORD32 u4_max_ht; + UWORD32 u4_max_level; + + UWORD32 u4_strd; + + UWORD32 u4_wd; + UWORD32 u4_ht; + + UWORD32 u4_psnr_enable; + + + UWORD32 u4_enc_speed; + UWORD32 u4_me_speed; + UWORD32 u4_enable_fast_sad; + UWORD32 u4_enable_alt_ref; + UWORD32 u4_rc; + UWORD32 u4_max_bitrate; + UWORD32 u4_bitrate; + UWORD32 u4_i_qp,u4_i_qp_max,u4_i_qp_min; + UWORD32 u4_p_qp,u4_p_qp_max,u4_p_qp_min; + UWORD32 u4_b_qp,u4_b_qp_max,u4_b_qp_min; + UWORD32 u4_air; + UWORD32 u4_air_refresh_period; + UWORD32 u4_srch_rng_x; + UWORD32 u4_srch_rng_y; + UWORD32 u4_i_interval; + UWORD32 u4_idr_interval; + UWORD32 u4_b_frames; + UWORD32 u4_disable_deblk_level; + UWORD32 u4_hpel; + UWORD32 u4_qpel; + UWORD32 u4_enable_intra_4x4; + IV_PROFILE_T e_profile; + + UWORD32 u4_slice_mode; + UWORD32 u4_slice_param; + + void *pv_input_thread_handle; + void *pv_output_thread_handle; + void *pv_recon_thread_handle; + + ih264e_ctl_getbufinfo_op_t s_get_buf_info_op; + input_buf_t as_input_buf[DEFAULT_MAX_INPUT_BUFS]; + output_buf_t as_output_buf[DEFAULT_MAX_OUTPUT_BUFS]; + recon_buf_t as_recon_buf[DEFAULT_MAX_RECON_BUFS]; + + DOUBLE adbl_psnr[3]; + UWORD32 u4_psnr_cnt; + UWORD8 *pu1_psnr_buf; + UWORD8 u4_psnr_buf_size; + + UWORD32 u4_vbv_buffer_delay; + UWORD32 u4_vbv_buf_size; + + TIMER enc_start_time; + TIMER enc_last_time; + WORD32 avg_time; + + +} app_ctxt_t; + +/*****************************************************************************/ +/* Function Declarations */ +/*****************************************************************************/ +void codec_exit(CHAR *pc_err_message); +void allocate_input(app_ctxt_t *ps_app_ctxt); +void allocate_output(app_ctxt_t *ps_app_ctxt); +void allocate_recon(app_ctxt_t *ps_app_ctxt); + +IV_STATUS_T read_input(FILE *fp, iv_raw_buf_t *ps_raw_buf); +IV_STATUS_T write_recon(FILE *fp, iv_raw_buf_t *ps_raw_buf); +IV_STATUS_T write_output(FILE *fp, UWORD8 *pu1_buf, WORD32 num_bytes); + +IV_STATUS_T read_mb_info(app_ctxt_t *ps_app_ctxt, void *pv_mb_info); +IV_STATUS_T read_pic_info(app_ctxt_t *ps_app_ctxt, void *pv_pic_info); + +void * ih264a_aligned_malloc(WORD32 alignment, WORD32 size); +void ih264a_aligned_free(void *pv_buf); + +void free_input(app_ctxt_t *ps_app_ctxt); +void free_recon(app_ctxt_t *ps_app_ctxt); +void free_output(app_ctxt_t *ps_app_ctxt); + +void init_raw_buf_descr(app_ctxt_t *ps_app_ctxt, iv_raw_buf_t *ps_raw_buf, UWORD8 *pu1_buf, IV_COLOR_FORMAT_T e_color_fmt); + +#ifndef MD5_DISABLE +void calc_md5_cksum(UWORD8 *pu1_inbuf,UWORD32 u4_stride,UWORD32 u4_width,UWORD32 u4_height,UWORD8 *pu1_cksum_p ); +#else +#define calc_md5_cksum(a, b, c, d, e) +#endif + +#endif /* _APP_H_ */ diff --git a/test/encoder/input.c b/test/encoder/input.c new file mode 100755 index 0000000..aa52b45 --- /dev/null +++ b/test/encoder/input.c @@ -0,0 +1,312 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include <stdlib.h> +#include <stdio.h> +#include <assert.h> +#include <string.h> +#include <sys/time.h> + +/* User include files */ +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" +#include "ih264e.h" +#include "app.h" + +/*****************************************************************************/ +/* Constant Macros */ +/*****************************************************************************/ + + +/*****************************************************************************/ +/* Macros */ +/*****************************************************************************/ + + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + +IV_STATUS_T read_pic_info(app_ctxt_t *ps_app_ctxt, void *pv_pic_info) +{ + IV_STATUS_T ret = IV_SUCCESS; + WORD32 size, bytes; + + switch(ps_app_ctxt->u4_pic_info_type) + { + case 1: + size = sizeof(ih264e_pic_info1_t); + ps_app_ctxt->u4_pic_info_size = sizeof(ih264e_pic_info1_t); + break; + case 2: + size = sizeof(ih264e_pic_info2_t); + ps_app_ctxt->u4_pic_info_size = sizeof(ih264e_pic_info2_t); + break; + default: + size = 0; + break; + } + + bytes = fread(pv_pic_info, 1, size, ps_app_ctxt->fp_pic_info); + if(bytes != size) + ret = IV_FAIL; + + return ret; +} + +IV_STATUS_T read_mb_info(app_ctxt_t *ps_app_ctxt, void *pv_mb_info) +{ + IV_STATUS_T ret = IV_SUCCESS; + WORD32 num_mbs; + WORD32 size; + WORD32 bytes; + + num_mbs = ALIGN16(ps_app_ctxt->u4_wd) * ALIGN16(ps_app_ctxt->u4_ht); + num_mbs /= 256; + + switch(ps_app_ctxt->u4_mb_info_type) + { + case 1: + size = sizeof(ih264e_mb_info1_t) * num_mbs; + ps_app_ctxt->u4_mb_info_size = sizeof(ih264e_mb_info1_t); + break; + case 2: + size = sizeof(ih264e_mb_info2_t) * num_mbs; + ps_app_ctxt->u4_mb_info_size = sizeof(ih264e_mb_info2_t); + break; + case 3: + size = sizeof(ih264e_mb_info3_t) * num_mbs; + ps_app_ctxt->u4_mb_info_size = sizeof(ih264e_mb_info3_t); + break; + case 4: + size = sizeof(ih264e_mb_info4_t) * num_mbs; + ps_app_ctxt->u4_mb_info_size = sizeof(ih264e_mb_info4_t); + break; + default: + size = 0; + break; + } + + bytes = fread(pv_mb_info, 1, size, ps_app_ctxt->fp_mb_info); + if(bytes != size) + ret = IV_FAIL; + + return ret; +} + +IV_STATUS_T read_input(FILE *fp, iv_raw_buf_t *ps_raw_buf) +{ + WORD32 bytes; + WORD32 wd, ht, strd; + UWORD8 *pu1_buf; + WORD32 i; + WORD32 comp; + WORD32 num_comp; + + if (IV_YUV_422ILE == ps_raw_buf->e_color_fmt) + { + wd = ps_raw_buf->au4_wd[0]; + ht = ps_raw_buf->au4_ht[0]; + strd = ps_raw_buf->au4_strd[0]; + pu1_buf = ps_raw_buf->apv_bufs[0]; + + for(i = 0; i < ht; i++) + { + bytes = fread(pu1_buf, sizeof(UWORD8), wd, fp); + if(bytes != wd ) + { + return(IV_FAIL); + } + pu1_buf += strd; + } + } + else + { + num_comp = 2; + + if(IV_YUV_420P == ps_raw_buf->e_color_fmt) + num_comp = 3; + + for(comp = 0; comp < num_comp; comp++) + { + wd = ps_raw_buf->au4_wd[comp]; + ht = ps_raw_buf->au4_ht[comp]; + strd = ps_raw_buf->au4_strd[comp]; + pu1_buf = ps_raw_buf->apv_bufs[comp]; + + for(i = 0; i < ht; i++) + { + bytes = fread(pu1_buf, sizeof(UWORD8), wd, fp); + if(bytes != wd) + { + return(IV_FAIL); + } + pu1_buf += strd; + } + } + } + return IV_SUCCESS; +} + + +IV_STATUS_T dump_input(FILE *fp, iv_raw_buf_t *ps_raw_buf) +{ + WORD32 bytes; + WORD32 wd, ht, strd; + UWORD8 *pu1_buf; + WORD32 i; + WORD32 comp; + WORD32 num_comp; + + if (IV_YUV_422ILE == ps_raw_buf->e_color_fmt) + { + wd = ps_raw_buf->au4_wd[0]; + ht = ps_raw_buf->au4_ht[0]; + strd = ps_raw_buf->au4_strd[0]; + pu1_buf = ps_raw_buf->apv_bufs[0]; + + for(i = 0; i < ht; i++) + { + bytes = fwrite(pu1_buf, sizeof(UWORD8), wd, fp); + if(bytes != wd ) + { + return(IV_FAIL); + } + pu1_buf += strd; + } + } + else + { + num_comp = 2; + + if(IV_YUV_420P == ps_raw_buf->e_color_fmt) + num_comp = 3; + + for(comp = 0; comp < num_comp; comp++) + { + wd = ps_raw_buf->au4_wd[comp]; + ht = ps_raw_buf->au4_ht[comp]; + strd = ps_raw_buf->au4_strd[comp]; + pu1_buf = ps_raw_buf->apv_bufs[comp]; + + for(i = 0; i < ht; i++) + { + bytes = fwrite(pu1_buf, sizeof(UWORD8), wd, fp); + if(bytes != wd) + { + return(IV_FAIL); + } + pu1_buf += strd; + } + } + } + return IV_SUCCESS; +} + +void allocate_input(app_ctxt_t *ps_app_ctxt) +{ + + WORD32 num_bufs; + WORD32 pic_size; + WORD32 luma_size; + WORD32 chroma_size; + WORD32 num_mbs; + WORD32 i; + UWORD8 *pu1_buf[3]; + + ih264e_ctl_getbufinfo_op_t *ps_get_buf_info_op = &ps_app_ctxt->s_get_buf_info_op; + + num_bufs = MAX(DEFAULT_NUM_INPUT_BUFS, ps_get_buf_info_op->s_ive_op.u4_min_inp_bufs); + num_bufs = MIN(DEFAULT_MAX_INPUT_BUFS, num_bufs); + + /* Size of buffer */ + luma_size = ps_get_buf_info_op->s_ive_op.au4_min_in_buf_size[0]; + chroma_size = ps_get_buf_info_op->s_ive_op.au4_min_in_buf_size[1]+ + ps_get_buf_info_op->s_ive_op.au4_min_in_buf_size[2]; + + pic_size = luma_size + chroma_size; + + num_mbs = ALIGN16(ps_app_ctxt->u4_max_wd) * ALIGN16(ps_app_ctxt->u4_max_ht); + num_mbs /= 256; + + /* Memset the input buffer array to set is_free to 0 */ + memset(ps_app_ctxt->as_input_buf, 0, sizeof(input_buf_t) * DEFAULT_MAX_INPUT_BUFS); + + for(i = 0; i < num_bufs; i++) + { + pu1_buf[0] = (UWORD8 *)ih264a_aligned_malloc(16, pic_size); + if(NULL == pu1_buf[0]) + { + CHAR ac_error[STRLENGTH]; + sprintf(ac_error, "Allocation failed for input buffer of size %d\n", + pic_size); + codec_exit(ac_error); + } + ps_app_ctxt->as_input_buf[i].pu1_buf = pu1_buf[0]; + + pu1_buf[0] = (UWORD8 *)ih264a_aligned_malloc(16, num_mbs * sizeof(ih264e_mb_info_t)); + if(NULL == pu1_buf[0]) + { + CHAR ac_error[STRLENGTH]; + sprintf(ac_error, "Allocation failed for mb info buffer of size %d\n", + (WORD32)(num_mbs * sizeof(ih264e_mb_info_t))); + codec_exit(ac_error); + } + ps_app_ctxt->as_input_buf[i].pv_mb_info = pu1_buf[0]; + pu1_buf[0] = (UWORD8 *)ih264a_aligned_malloc(16, sizeof(ih264e_pic_info2_t)); + if(NULL == pu1_buf[0]) + { + CHAR ac_error[STRLENGTH]; + sprintf(ac_error, "Allocation failed for pic info buffer of size %d\n", + (WORD32) sizeof(ih264e_pic_info2_t)); + codec_exit(ac_error); + } + ps_app_ctxt->as_input_buf[i].pv_pic_info = pu1_buf[0]; + ps_app_ctxt->as_input_buf[i].u4_buf_size = pic_size; + ps_app_ctxt->as_input_buf[i].u4_is_free = 1; + } + return; +} + + +void free_input(app_ctxt_t *ps_app_ctxt) +{ + + WORD32 num_bufs; + WORD32 i; + + num_bufs = MAX(DEFAULT_NUM_INPUT_BUFS, ps_app_ctxt->s_get_buf_info_op.s_ive_op.u4_min_inp_bufs); + num_bufs = MIN(DEFAULT_MAX_INPUT_BUFS, num_bufs); + + for(i = 0; i < num_bufs; i++) + { + ih264a_aligned_free(ps_app_ctxt->as_input_buf[i].pu1_buf); + ih264a_aligned_free(ps_app_ctxt->as_input_buf[i].pv_mb_info); + ih264a_aligned_free(ps_app_ctxt->as_input_buf[i].pv_pic_info); + } + return; +} + diff --git a/test/encoder/main.c b/test/encoder/main.c new file mode 100755 index 0000000..b02958a --- /dev/null +++ b/test/encoder/main.c @@ -0,0 +1,2512 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <assert.h> +#include <string.h> +#include <sys/time.h> +#include <malloc.h> + +#ifdef WINDOWS_TIMER +#include "windows.h" +#endif +/* User include files */ +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" +#include "ih264e.h" +#include "app.h" +#include "psnr.h" + +/* Function declarations */ +#ifndef MD5_DISABLE +void calc_md5_cksum(UWORD8 *pu1_inbuf,UWORD32 u4_stride,UWORD32 u4_width,UWORD32 u4_height,UWORD8 *pu1_cksum_p ); +#else +#define calc_md5_cksum(a, b, c, d, e) +#endif + +/*****************************************************************************/ +/* Enums */ +/*****************************************************************************/ +typedef enum +{ + INVALID, + HELP, + INPUT_FILE, + OUTPUT_FILE, + RECON_FILE, + RECON_ENABLE, + CHKSUM_ENABLE, + CHKSUM_FILE, + INPUT_CHROMA_FORMAT, + RECON_CHROMA_FORMAT, + MAX_WD, + MAX_HT, + WD, + HT, + MAX_LEVEL, + ENC_SPEED, + ME_SPEED, + START_FRM, + NUM_FRMS, + MAX_FRAMERATE, + SRC_FRAMERATE, + TGT_FRAMERATE, + RC, + MAX_BITRATE, + BITRATE, + I_QP, + P_QP, + B_QP, + I_QP_MAX, + P_QP_MAX, + B_QP_MAX, + I_QP_MIN, + P_QP_MIN, + B_QP_MIN, + AIR, + AIR_REFRESH_PERIOD, + ARCH, + SOC, + NUMCORES, + PRE_ENC_ME, + PRE_ENC_IPE, + HPEL, + QPEL, + SRCH_RNG_X, + SRCH_RNG_Y, + I_INTERVAL, + IDR_INTERVAL, + B_FRMS, + DISABLE_DBLK, + PROFILE, + FAST_SAD, + ALT_REF, + DISABLE_DEBLOCK_LEVEL, + PSNR, + SLICE_MODE, + SLICE_PARAM, + CONFIG, + LOOPBACK, + VBV_DELAY, + VBV_SIZE, + INTRA_4x4_ENABLE, + MB_INFO_FILE, + MB_INFO_TYPE, + PIC_INFO_FILE, + PIC_INFO_TYPE, +} ARGUMENT_T; + +typedef struct +{ + CHAR argument_shortname[8]; + CHAR argument_name[128]; + ARGUMENT_T argument; + CHAR description[512]; +} argument_t; + +static const argument_t argument_mapping[] = + { + { "--", "--help", HELP, "Print this help\n" }, + { "-i", "--input", INPUT_FILE, "Input file\n" }, + { "-o", "--output", OUTPUT_FILE, "Output file\n" }, + { "--", "--recon_enable", RECON_ENABLE, "Recon enable flag\n" }, + { "-r", "--recon", RECON_FILE, "Recon file \n" }, + { "--", "--input_chroma_format", INPUT_CHROMA_FORMAT, + "Input Chroma format Supported values YUV_420P, YUV_420SP_UV, YUV_420SP_VU\n" }, + { "--", "--recon_chroma_format", RECON_CHROMA_FORMAT, + "Recon Chroma format Supported values YUV_420P, YUV_420SP_UV, YUV_420SP_VU\n" }, + { "-w", "--width", WD, "Width of input file\n" }, + { "-h", "--height", HT, "Height file\n" }, + { "--", "--start_frame", START_FRM, "Starting frame number\n" }, + { "-f", "--num_frames", NUM_FRMS, "Number of frames to be encoded\n" }, + { "--", "--rc", RC, "Rate control mode 0: Constant Qp, 1: Storage, 2: CBR non low delay, 3: CBR low delay \n" }, + { "--", "--max_framerate", MAX_FRAMERATE, "Maximum frame rate \n" }, + { "--", "--tgt_framerate", TGT_FRAMERATE, "Target frame rate \n" }, + { "--", "--src_framerate", SRC_FRAMERATE, "Source frame rate \n" }, + { "--", "--i_interval", I_INTERVAL, "Intra frame interval \n" }, + { "--", "--idr_interval", IDR_INTERVAL, "IDR frame interval \n" }, + { "--", "--bframes", B_FRMS, "Consecutive B frames \n" }, + { "--", "--speed", ENC_SPEED, "Encoder speed preset 0 (slowest) and 100 (fastest)\n" }, + { "--", "--me_speed", ME_SPEED, "Encoder speed preset 0 (slowest) and 100 (fastest)\n" }, + { "--", "--fast_sad", FAST_SAD, " Flag for faster sad execution\n" }, + { "--", "--alt_ref", ALT_REF , "Flag to enable alternate refernce frames"}, + { "--", "--hpel", HPEL, "Flag to enable/disable Quarter pel estimation \n" }, + { "--", "--qpel", QPEL, "Flag to enable/disable Quarter pel estimation \n" }, + { "--", "--disable_deblock_level", DISABLE_DEBLOCK_LEVEL, + "Disable deblock level - 0 : Enables deblock completely, 1: enables for I and 8th frame , 2: Enables for I only, 3 : disables completely\n" }, + { "--", "--search_range_x", SRCH_RNG_X, "Search range for X \n" }, + { "--", "--search_range_y", SRCH_RNG_Y, "Search range for Y \n" }, + { "--", "--psnr", PSNR, "Enable PSNR computation (Disable while benchmarking performance) \n" }, + { "--", "--pre_enc_me", PRE_ENC_ME, "Flag to enable/disable Pre Enc Motion Estimation\n" }, + { "--", "--pre_enc_ipe", PRE_ENC_IPE, "Flag to enable/disable Pre Enc Intra prediction Estimation\n" }, + { "-n", "--num_cores", NUMCORES, "Number of cores to be used\n" }, + { "--", "--adaptive_intra_refresh", AIR ,"Adaptive Intra Refresh enable/disable\n"}, + { "--", "--air_refresh_period", AIR_REFRESH_PERIOD,"adaptive intra refresh period\n"}, + { "--", "--slice", SLICE_MODE, "Slice mode- 0 :No slice, 1: Bytes per slice, 2: MB/CTB per slice \n" }, + { "--", "--slice_param", SLICE_PARAM, "Slice param value based on slice mode. Slice mode of 1 implies number of bytes per slice, 2 implies number of MBs/CTBs, for 0 value is neglected \n" }, + { "--", "--max_wd", MAX_WD, "Maximum width (Default: 1920) \n" }, + { "--", "--max_ht", MAX_HT, "Maximum height (Default: 1088)\n" }, + { "--", "--max_level", MAX_LEVEL, "Maximum Level (Default: 50)\n" }, + { "--", "--arch", ARCH, "Set Architecture. Supported values ARM_NONEON, ARM_A9Q, ARM_A7, ARM_A5, ARM_NEONINTR, X86_GENERIC, X86_SSSE3, X86_SSE4 \n" }, + { "--", "--soc", SOC, "Set SOC. Supported values GENERIC, HISI_37X \n" }, + { "--", "--chksum", CHKSUM_FILE, "Save Check sum file for recon data\n" }, + { "--", "--chksum_enable", CHKSUM_ENABLE, "Recon MD5 Checksum file\n"}, + { "-c", "--config", CONFIG, "config file (Default: enc.cfg)\n" }, + { "--", "--loopback", LOOPBACK, "Enable encoding in a loop\n" }, + { "--", "--profile", PROFILE, "Profile mode: Supported values BASE, MAIN, HIGH\n" }, + { "--", "--max_bitrate", MAX_BITRATE, "Max bitrate\n"}, + { "--", "--bitrate", BITRATE, "Target bitrate\n"}, + { "--", "--qp_i", I_QP, "QP for I frames\n"}, + { "--", "--qp_p", P_QP, "QP for P frames\n"}, + { "--", "--qp_b", B_QP, "QP for B frames\n"}, + { "--", "--qp_i_max", I_QP_MAX, "Max QP for I frames\n"}, + { "--", "--qp_p_max", P_QP_MAX, "Max QP for P frames\n"}, + { "--", "--qp_b_max", B_QP_MAX, "Max QP for B frames\n"}, + { "--", "--qp_i_min", I_QP_MIN, "Min QP for I frames\n"}, + { "--", "--qp_p_min", P_QP_MIN, "Min QP for P frames\n"}, + { "--", "--qp_b_min", B_QP_MIN, "Min QP for B frames\n"}, + { "--", "--vbv_delay", VBV_DELAY, "VBV buffer delay\n"}, + { "--", "--vbv_size", VBV_SIZE, "VBV buffer size\n"}, + { "-i4", "--intra_4x4_enable", INTRA_4x4_ENABLE, "Intra 4x4 enable \n" }, + { "--", "--mb_info_file", MB_INFO_FILE, "MB info file\n"}, + { "--", "--mb_info_type", MB_INFO_TYPE, "MB info type\n"}, + { "--", "--pic_info_file", PIC_INFO_FILE, "Pic info file\n"}, + { "--", "--pic_info_type", PIC_INFO_TYPE, "Pic info type\n"}, + }; + + + +/*****************************************************************************/ +/* Function Declarations */ +/*****************************************************************************/ + + + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + + +#if(defined X86) && (defined X86_MINGW) +/*****************************************************************************/ +/* Function to print library calls */ +/*****************************************************************************/ +/*****************************************************************************/ +/* */ +/* Function Name : memalign */ +/* */ +/* Description : Returns malloc data. Ideally should return aligned memory*/ +/* support alignment will be added later */ +/* */ +/* Inputs : alignment */ +/* size */ +/* Globals : */ +/* Processing : */ +/* */ +/* Outputs : */ +/* Returns : */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 09 2012 100189 Initial Version */ +/* */ +/*****************************************************************************/ + +void * ih264a_aligned_malloc(WORD32 alignment, WORD32 size) +{ + return _aligned_malloc(size, alignment); +} + +void ih264a_aligned_free(void *pv_buf) +{ + _aligned_free(pv_buf); + return; +} + +#elif IOS + +void * ih264a_aligned_malloc(WORD32 alignment, WORD32 size) +{ + return malloc(size); +} + +void ih264a_aligned_free(void *pv_buf) +{ + free(pv_buf); + return; +} + +#else + +void * ih264a_aligned_malloc(WORD32 alignment, WORD32 size) +{ + return memalign(alignment, size); +} + +void ih264a_aligned_free(void *pv_buf) +{ + free(pv_buf); + return; +} + +#endif + +/*****************************************************************************/ +/* */ +/* Function Name : codec_exit */ +/* */ +/* Description : handles unrecoverable errors */ +/* Inputs : Error message */ +/* Globals : None */ +/* Processing : Prints error message to console and exits. */ +/* Outputs : Error message to the console */ +/* Returns : None */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 07 06 2006 Sankar Creation */ +/* */ +/*****************************************************************************/ +void codec_exit(CHAR *pc_err_message) +{ + printf("%s\n", pc_err_message); + exit(-1); +} + +/*****************************************************************************/ +/* */ +/* Function Name : codec_exit */ +/* */ +/* Description : handles unrecoverable errors */ +/* Inputs : Error message */ +/* Globals : None */ +/* Processing : Prints error message to console and exits. */ +/* Outputs : Error mesage to the console */ +/* Returns : None */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 07 06 2006 Sankar Creation */ +/* */ +/*****************************************************************************/ +IV_COLOR_FORMAT_T get_chroma_fmt(CHAR *value) +{ + IV_COLOR_FORMAT_T e_chroma_format; + if((strcmp(value, "YUV_420P")) == 0) + e_chroma_format = IV_YUV_420P; + else if((strcmp(value, "YUV_422ILE")) == 0) + e_chroma_format = IV_YUV_422ILE; + else if((strcmp(value, "RGB_565")) == 0) + e_chroma_format = IV_RGB_565; + else if((strcmp(value, "RGBA_8888")) == 0) + e_chroma_format = IV_RGBA_8888; + else if((strcmp(value, "YUV_420SP_UV")) == 0) + e_chroma_format = IV_YUV_420SP_UV; + else if((strcmp(value, "YUV_420SP_VU")) == 0) + e_chroma_format = IV_YUV_420SP_VU; + else + { + printf("\nInvalid colour format setting it to IV_YUV_420P\n"); + e_chroma_format = IV_YUV_420P; + } + return e_chroma_format; +} + +/*****************************************************************************/ +/* */ +/* Function Name : codec_exit */ +/* */ +/* Description : handles unrecoverable errors */ +/* Inputs : Error message */ +/* Globals : None */ +/* Processing : Prints error message to console and exits. */ +/* Outputs : Error mesage to the console */ +/* Returns : None */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 07 06 2006 Sankar Creation */ +/* */ +/*****************************************************************************/ +IVE_SPEED_CONFIG get_speed_preset(CHAR *value) +{ + IVE_SPEED_CONFIG e_enc_speed_preset; + if((strcmp(value, "CONFIG")) == 0) + e_enc_speed_preset = IVE_CONFIG; + else if((strcmp(value, "SLOWEST")) == 0) + e_enc_speed_preset = IVE_SLOWEST; + else if((strcmp(value, "NORMAL")) == 0) + e_enc_speed_preset = IVE_NORMAL; + else if((strcmp(value, "FAST")) == 0) + e_enc_speed_preset = IVE_FAST; + else if((strcmp(value, "HIGH_SPEED")) == 0) + e_enc_speed_preset = IVE_HIGH_SPEED; + else if((strcmp(value, "FASTEST")) == 0) + e_enc_speed_preset = IVE_FASTEST; + else + { + printf("\nInvalid speed preset, setting it to IVE_FASTEST\n"); + e_enc_speed_preset = IVE_FASTEST; + } + return e_enc_speed_preset; +} + +/*****************************************************************************/ +/* */ +/* Function Name : print_usage */ +/* */ +/* Description : Prints argument format */ +/* */ +/* */ +/* Inputs : */ +/* Globals : */ +/* Processing : Prints argument format */ +/* */ +/* Outputs : */ +/* Returns : */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 09 2012 100189 Initial Version */ +/* */ +/*****************************************************************************/ + +void print_usage(void) +{ + WORD32 i = 0; + WORD32 num_entries = sizeof(argument_mapping) / sizeof(argument_t); + printf("\nUsage:\n"); + while(i < num_entries) + { + printf("%-32s\t %s", argument_mapping[i].argument_name, + argument_mapping[i].description); + i++; + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : get_argument */ +/* */ +/* Description : Gets argument for a given string */ +/* */ +/* */ +/* Inputs : name */ +/* Globals : */ +/* Processing : Searches the given string in the array and returns */ +/* appropriate argument ID */ +/* */ +/* Outputs : Argument ID */ +/* Returns : Argument ID */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 09 2012 100189 Initial Version */ +/* */ +/*****************************************************************************/ +ARGUMENT_T get_argument(CHAR *name) +{ + WORD32 i = 0; + WORD32 num_entries = sizeof(argument_mapping) / sizeof(argument_t); + while(i < num_entries) + { + if((0 == strcmp(argument_mapping[i].argument_name, name)) || + ((0 == strcmp(argument_mapping[i].argument_shortname, name)) && + (0 != strcmp(argument_mapping[i].argument_shortname, "--")))) + { + return argument_mapping[i].argument; + } + i++; + } + return INVALID; +} + +/*****************************************************************************/ +/* */ +/* Function Name : get_argument */ +/* */ +/* Description : Gets argument for a given string */ +/* */ +/* */ +/* Inputs : name */ +/* Globals : */ +/* Processing : Searches the given string in the array and returns */ +/* appropriate argument ID */ +/* */ +/* Outputs : Argument ID */ +/* Returns : Argument ID */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 09 2012 100189 Initial Version */ +/* */ +/*****************************************************************************/ +void parse_argument(app_ctxt_t *ps_app_ctxt, CHAR *argument, CHAR *value) +{ + ARGUMENT_T arg; + + arg = get_argument(argument); + switch(arg) + { + case HELP: + print_usage(); + exit(-1); + break; + case SLICE_MODE: + sscanf(value, "%d", &ps_app_ctxt->u4_slice_mode); + break; + case SLICE_PARAM: + sscanf(value, "%d", &ps_app_ctxt->u4_slice_param); + break; + case INPUT_FILE: + sscanf(value, "%s", ps_app_ctxt->ac_ip_fname); + break; + + case OUTPUT_FILE: + sscanf(value, "%s", ps_app_ctxt->ac_op_fname); + break; + + case RECON_FILE: + sscanf(value, "%s", ps_app_ctxt->ac_recon_fname); + break; + + case RECON_ENABLE: + sscanf(value, "%d", &ps_app_ctxt->u4_recon_enable); + break; + + case CHKSUM_FILE: + sscanf(value, "%s", ps_app_ctxt->ac_chksum_fname); + break; + + case CHKSUM_ENABLE: + sscanf(value, "%d", &ps_app_ctxt->u4_chksum_enable); + break; + + case MB_INFO_FILE: + sscanf(value, "%s", ps_app_ctxt->ac_mb_info_fname); + break; + + case MB_INFO_TYPE: + sscanf(value, "%d", &ps_app_ctxt->u4_mb_info_type); + break; + + case PIC_INFO_FILE: + sscanf(value, "%s", ps_app_ctxt->ac_pic_info_fname); + break; + + case PIC_INFO_TYPE: + sscanf(value, "%d", &ps_app_ctxt->u4_pic_info_type); + break; + + case INPUT_CHROMA_FORMAT: + ps_app_ctxt->e_inp_color_fmt = get_chroma_fmt(value); + break; + + case RECON_CHROMA_FORMAT: + ps_app_ctxt->e_recon_color_fmt = get_chroma_fmt(value); + break; + + case MAX_WD: + sscanf(value, "%d", &ps_app_ctxt->u4_max_wd); + break; + + case MAX_HT: + sscanf(value, "%d", &ps_app_ctxt->u4_max_ht); + break; + + case WD: + sscanf(value, "%d", &ps_app_ctxt->u4_wd); + break; + + case HT: + sscanf(value, "%d", &ps_app_ctxt->u4_ht); + break; + + case MAX_LEVEL: + sscanf(value, "%d", &ps_app_ctxt->u4_max_level); + break; + + case ENC_SPEED: + ps_app_ctxt->u4_enc_speed = get_speed_preset(value); + break; + + case ME_SPEED: + sscanf(value, "%d", &ps_app_ctxt->u4_me_speed); + break; + + case START_FRM: + sscanf(value, "%d", &ps_app_ctxt->u4_start_frm); + break; + + case NUM_FRMS: + sscanf(value, "%d", &ps_app_ctxt->u4_max_num_frms); + break; + + case MAX_FRAMERATE: + sscanf(value, "%d", &ps_app_ctxt->u4_max_frame_rate); + if(ps_app_ctxt->u4_max_frame_rate <= 0) + ps_app_ctxt->u4_max_frame_rate = DEFAULT_MAX_FRAMERATE; + break; + + case SRC_FRAMERATE: + sscanf(value, "%d", &ps_app_ctxt->u4_src_frame_rate); + if(ps_app_ctxt->u4_src_frame_rate <= 0) + ps_app_ctxt->u4_src_frame_rate = DEFAULT_SRC_FRAME_RATE; + break; + + case TGT_FRAMERATE: + sscanf(value, "%d", &ps_app_ctxt->u4_tgt_frame_rate); + if(ps_app_ctxt->u4_tgt_frame_rate <= 0) + ps_app_ctxt->u4_tgt_frame_rate = DEFAULT_TGT_FRAME_RATE; + break; + + case RC: + sscanf(value, "%d", &ps_app_ctxt->u4_rc); + break; + + case MAX_BITRATE: + sscanf(value, "%d", &ps_app_ctxt->u4_max_bitrate); + break; + + case BITRATE: + sscanf(value, "%d", &ps_app_ctxt->u4_bitrate); + break; + + case I_QP: + sscanf(value, "%d", &ps_app_ctxt->u4_i_qp); + break; + + case I_QP_MAX: + sscanf(value, "%d", &ps_app_ctxt->u4_i_qp_max); + break; + + case I_QP_MIN: + sscanf(value, "%d", &ps_app_ctxt->u4_i_qp_min); + break; + + case P_QP: + sscanf(value, "%d", &ps_app_ctxt->u4_p_qp); + break; + + case P_QP_MAX: + sscanf(value, "%d", &ps_app_ctxt->u4_p_qp_max); + break; + + case P_QP_MIN: + sscanf(value, "%d", &ps_app_ctxt->u4_p_qp_min); + break; + + case B_QP: + sscanf(value, "%d", &ps_app_ctxt->u4_b_qp); + break; + + case B_QP_MAX: + sscanf(value, "%d", &ps_app_ctxt->u4_b_qp_max); + break; + + case B_QP_MIN: + sscanf(value, "%d", &ps_app_ctxt->u4_b_qp_min); + break; + + case AIR: + sscanf(value, "%d", &ps_app_ctxt->u4_air); + break; + + case ARCH: + if((strcmp(value, "ARM_NONEON")) == 0) + ps_app_ctxt->e_arch = ARCH_ARM_NONEON; + else if((strcmp(value, "ARM_A9Q")) == 0) + ps_app_ctxt->e_arch = ARCH_ARM_A9Q; + else if((strcmp(value, "ARM_A7")) == 0) + ps_app_ctxt->e_arch = ARCH_ARM_A7; + else if((strcmp(value, "ARM_A5")) == 0) + ps_app_ctxt->e_arch = ARCH_ARM_A5; + else if((strcmp(value, "ARM_NEONINTR")) == 0) + ps_app_ctxt->e_arch = ARCH_ARM_NEONINTR; + else if((strcmp(value, "X86_GENERIC")) == 0) + ps_app_ctxt->e_arch = ARCH_X86_GENERIC; + else if((strcmp(value, "X86_SSSE3")) == 0) + ps_app_ctxt->e_arch = ARCH_X86_SSSE3; + else if((strcmp(value, "X86_SSE42")) == 0) + ps_app_ctxt->e_arch = ARCH_X86_SSE42; + else if((strcmp(value, "ARM_A53")) == 0) + ps_app_ctxt->e_arch = ARCH_ARM_A53; + else if((strcmp(value, "ARM_A57")) == 0) + ps_app_ctxt->e_arch = ARCH_ARM_A57; + else if((strcmp(value, "ARM_V8_NEON")) == 0) + ps_app_ctxt->e_arch = ARCH_ARM_V8_NEON; + else + { + printf("\nInvalid Arch. Setting it to ARM_A9Q\n"); + ps_app_ctxt->e_arch = ARCH_ARM_A9Q; + } + + break; + case SOC: + if((strcmp(value, "GENERIC")) == 0) + ps_app_ctxt->e_soc = SOC_GENERIC; + else if((strcmp(value, "HISI_37X")) == 0) + ps_app_ctxt->e_soc = SOC_HISI_37X; + else + { + ps_app_ctxt->e_soc = SOC_GENERIC; + } + break; + + case NUMCORES: + sscanf(value, "%d", &ps_app_ctxt->u4_num_cores); + break; + + case LOOPBACK: + sscanf(value, "%d", &ps_app_ctxt->u4_loopback); + break; + + case PRE_ENC_ME: + sscanf(value, "%d", &ps_app_ctxt->u4_pre_enc_me); + break; + + case PRE_ENC_IPE: + sscanf(value, "%d", &ps_app_ctxt->u4_pre_enc_ipe); + break; + + case HPEL: + sscanf(value, "%d", &ps_app_ctxt->u4_hpel); + break; + + case QPEL: + sscanf(value, "%d", &ps_app_ctxt->u4_qpel); + break; + + case SRCH_RNG_X: + sscanf(value, "%d", &ps_app_ctxt->u4_srch_rng_x); + break; + + case SRCH_RNG_Y: + sscanf(value, "%d", &ps_app_ctxt->u4_srch_rng_y); + break; + + case I_INTERVAL: + sscanf(value, "%d", &ps_app_ctxt->u4_i_interval); + break; + + case IDR_INTERVAL: + sscanf(value, "%d", &ps_app_ctxt->u4_idr_interval); + break; + + case B_FRMS: + sscanf(value, "%d", &ps_app_ctxt->u4_b_frames); + break; + + case DISABLE_DEBLOCK_LEVEL: + sscanf(value, "%d", &ps_app_ctxt->u4_disable_deblk_level); + break; + + case VBV_DELAY: + sscanf(value, "%d", &ps_app_ctxt->u4_vbv_buffer_delay); + break; + + case VBV_SIZE: + sscanf(value, "%d", &ps_app_ctxt->u4_vbv_buf_size); + break; + + case FAST_SAD: + sscanf(value, "%d", &ps_app_ctxt->u4_enable_fast_sad); + break; + + case ALT_REF: + sscanf(value, "%d", &ps_app_ctxt->u4_enable_alt_ref); + break; + + case AIR_REFRESH_PERIOD: + sscanf(value, "%d", &ps_app_ctxt->u4_air_refresh_period); + break; + + case PROFILE: + if((strcmp(value, "BASE")) == 0) + ps_app_ctxt->e_profile = IV_PROFILE_BASE; + else if((strcmp(value, "MAIN")) == 0) + ps_app_ctxt->e_profile = IV_PROFILE_MAIN; + else if((strcmp(value, "HIGH")) == 0) + ps_app_ctxt->e_profile = IV_PROFILE_HIGH; + else + { + printf("\nInvalid profile. Setting it to BASE\n"); + ps_app_ctxt->e_profile = IV_PROFILE_BASE; + } + break; + + case PSNR: + sscanf(value, "%d", &ps_app_ctxt->u4_psnr_enable); + break; + + case INTRA_4x4_ENABLE: + sscanf(value, "%d", &ps_app_ctxt->u4_enable_intra_4x4); + break; + + + case INVALID: + default: + printf("Ignoring argument : %s\n", argument); + break; + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : read_cfg_file */ +/* */ +/* Description : Reads arguments from a configuration file */ +/* */ +/* */ +/* Inputs : ps_app_ctxt : Application context */ +/* fp_cfg_file : Configuration file handle */ +/* Globals : */ +/* Processing : Parses the arguments and fills in the application context*/ +/* */ +/* Outputs : Arguments parsed */ +/* Returns : None */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 09 2012 100189 Initial Version */ +/* */ +/*****************************************************************************/ +void read_cfg_file(app_ctxt_t *ps_app_ctxt, FILE *fp_cfg) +{ + CHAR line[STRLENGTH]; + CHAR description[STRLENGTH]; + CHAR value[STRLENGTH]; + CHAR argument[STRLENGTH]; + + while(0 == (feof(fp_cfg))) + { + line[0] = '\0'; + fgets(line, STRLENGTH, fp_cfg); + argument[0] = '\0'; + /* Reading Input File Name */ + sscanf(line, "%s %s %s", argument, value, description); + if(argument[0] == '\0') + continue; + + parse_argument(ps_app_ctxt, argument, value); + } +} + +void invalid_argument_exit(CHAR *pc_err_message) +{ + print_usage(); + codec_exit(pc_err_message); +} + +void validate_params(app_ctxt_t *ps_app_ctxt) +{ + CHAR ac_error[STRLENGTH]; + + if(ps_app_ctxt->ac_ip_fname[0] == '\0') + { + invalid_argument_exit("Specify input file"); + } + if(ps_app_ctxt->ac_op_fname[0] == '\0') + { + invalid_argument_exit("Specify output file"); + } + if((1 == ps_app_ctxt->u4_recon_enable) && (ps_app_ctxt->ac_recon_fname[0] == '\0')) + { + invalid_argument_exit("Specify recon file"); + } + if((1 == ps_app_ctxt->u4_chksum_enable) && (ps_app_ctxt->ac_chksum_fname[0] == '\0')) + { + invalid_argument_exit("Specify checksum file"); + } + if(0 >= (WORD32)ps_app_ctxt->u4_wd) + { + sprintf(ac_error, "Invalid width: %d", ps_app_ctxt->u4_wd); + invalid_argument_exit(ac_error); + } + if(0 >= (WORD32)ps_app_ctxt->u4_ht) + { + sprintf(ac_error, "Invalid height: %d", ps_app_ctxt->u4_ht); + invalid_argument_exit(ac_error); + } + + if(0 == (WORD32)ps_app_ctxt->u4_max_num_frms) + { + sprintf(ac_error, "Invalid number of frames to be encoded: %d", ps_app_ctxt->u4_max_num_frms); + invalid_argument_exit(ac_error); + } + + return; +} + +void init_default_params(app_ctxt_t *ps_app_ctxt) +{ + + ps_app_ctxt->ps_enc = NULL; + ps_app_ctxt->ps_mem_rec = NULL; + ps_app_ctxt->u4_num_mem_rec = DEFAULT_MEM_REC_CNT; + ps_app_ctxt->u4_recon_enable = DEFAULT_RECON_ENABLE; + ps_app_ctxt->u4_chksum_enable = DEFAULT_CHKSUM_ENABLE; + ps_app_ctxt->u4_mb_info_type = 0; + ps_app_ctxt->u4_pic_info_type = 0; + ps_app_ctxt->u4_mb_info_size = 0; + ps_app_ctxt->u4_pic_info_size = 0; + ps_app_ctxt->u4_start_frm = DEFAULT_START_FRM; + ps_app_ctxt->u4_max_num_frms = DEFAULT_NUM_FRMS; + ps_app_ctxt->u4_total_bytes = 0; + ps_app_ctxt->u4_pics_cnt = 0; + ps_app_ctxt->e_inp_color_fmt = DEFAULT_INP_COLOR_FMT; + ps_app_ctxt->e_recon_color_fmt = DEFAULT_RECON_COLOR_FMT; + ps_app_ctxt->e_arch = DEFAULT_ARCH; + ps_app_ctxt->e_soc = SOC_GENERIC; + ps_app_ctxt->header_generated = 0; + ps_app_ctxt->pv_codec_obj = NULL; + ps_app_ctxt->u4_num_cores = DEFAULT_NUM_CORES; + ps_app_ctxt->u4_pre_enc_me = 0; + ps_app_ctxt->u4_pre_enc_ipe = 0; + ps_app_ctxt->ac_ip_fname[0] = '\0'; + ps_app_ctxt->ac_op_fname[0] = '\0'; + ps_app_ctxt->ac_recon_fname[0] = '\0'; + ps_app_ctxt->ac_chksum_fname[0] = '\0'; + ps_app_ctxt->ac_mb_info_fname[0] = '\0'; + ps_app_ctxt->fp_ip = NULL; + ps_app_ctxt->fp_op = NULL; + ps_app_ctxt->fp_recon = NULL; + ps_app_ctxt->fp_chksum = NULL; + ps_app_ctxt->fp_psnr_ip = NULL; + ps_app_ctxt->fp_mb_info = NULL; + ps_app_ctxt->fp_pic_info = NULL; + ps_app_ctxt->u4_loopback = DEFAULT_LOOPBACK; + ps_app_ctxt->u4_max_frame_rate = DEFAULT_MAX_FRAMERATE; + ps_app_ctxt->u4_src_frame_rate = DEFAULT_SRC_FRAME_RATE; + ps_app_ctxt->u4_tgt_frame_rate = DEFAULT_TGT_FRAME_RATE; + ps_app_ctxt->u4_max_wd = DEFAULT_MAX_WD; + ps_app_ctxt->u4_max_ht = DEFAULT_MAX_HT; + ps_app_ctxt->u4_max_level = DEFAULT_MAX_LEVEL; + ps_app_ctxt->u4_strd = DEFAULT_STRIDE; + ps_app_ctxt->u4_wd = DEFAULT_WD; + ps_app_ctxt->u4_ht = DEFAULT_HT; + ps_app_ctxt->u4_psnr_enable = DEFAULT_PSNR_ENABLE; + ps_app_ctxt->u4_enc_speed = IVE_FASTEST; + ps_app_ctxt->u4_me_speed = DEFAULT_ME_SPEED; + ps_app_ctxt->u4_enable_fast_sad = DEFAULT_ENABLE_FAST_SAD; + ps_app_ctxt->u4_enable_alt_ref = DEFAULT_ENABLE_ALT_REF; + ps_app_ctxt->u4_rc = DEFAULT_RC; + ps_app_ctxt->u4_max_bitrate = DEFAULT_MAX_BITRATE; + ps_app_ctxt->u4_bitrate = DEFAULT_BITRATE; + ps_app_ctxt->u4_i_qp = DEFAULT_I_QP; + ps_app_ctxt->u4_p_qp = DEFAULT_P_QP; + ps_app_ctxt->u4_b_qp = DEFAULT_B_QP; + ps_app_ctxt->u4_i_qp_min = DEFAULT_QP_MIN; + ps_app_ctxt->u4_i_qp_max = DEFAULT_QP_MAX; + ps_app_ctxt->u4_p_qp_min = DEFAULT_QP_MIN; + ps_app_ctxt->u4_p_qp_max = DEFAULT_QP_MAX; + ps_app_ctxt->u4_b_qp_min = DEFAULT_QP_MIN; + ps_app_ctxt->u4_b_qp_max = DEFAULT_QP_MAX; + ps_app_ctxt->u4_air = DEFAULT_AIR; + ps_app_ctxt->u4_air_refresh_period = DEFAULT_AIR_REFRESH_PERIOD; + ps_app_ctxt->u4_srch_rng_x = DEFAULT_SRCH_RNG_X; + ps_app_ctxt->u4_srch_rng_y = DEFAULT_SRCH_RNG_Y; + ps_app_ctxt->u4_i_interval = DEFAULT_I_INTERVAL; + ps_app_ctxt->u4_idr_interval = DEFAULT_IDR_INTERVAL; + ps_app_ctxt->u4_b_frames = DEFAULT_B_FRAMES; + ps_app_ctxt->u4_disable_deblk_level = DEFAULT_DISABLE_DEBLK_LEVEL; + ps_app_ctxt->u4_hpel = DEFAULT_HPEL; + ps_app_ctxt->u4_qpel = DEFAULT_QPEL; + ps_app_ctxt->u4_enable_intra_4x4 = DEFAULT_I4; + ps_app_ctxt->e_profile = DEFAULT_EPROFILE; + ps_app_ctxt->u4_slice_mode = DEFAULT_SLICE_MODE; + ps_app_ctxt->u4_slice_param = DEFAULT_SLICE_PARAM; + ps_app_ctxt->pv_input_thread_handle = NULL; + ps_app_ctxt->pv_output_thread_handle = NULL; + ps_app_ctxt->pv_recon_thread_handle = NULL; + ps_app_ctxt->u4_vbv_buf_size = 0; + ps_app_ctxt->u4_vbv_buffer_delay = 1000; + ps_app_ctxt->adbl_psnr[0] = 0.0; + ps_app_ctxt->adbl_psnr[1] = 0.0; + ps_app_ctxt->adbl_psnr[2] = 0.0; + ps_app_ctxt->u4_psnr_cnt = 0; + ps_app_ctxt->pu1_psnr_buf = NULL; + ps_app_ctxt->u4_psnr_buf_size = 0; + + return; +} + +void set_dimensions(app_ctxt_t *ps_app_ctxt, + UWORD32 u4_timestamp_low, + UWORD32 u4_timestamp_high) +{ + ih264e_ctl_set_dimensions_ip_t s_frame_dimensions_ip; + ih264e_ctl_set_dimensions_op_t s_frame_dimensions_op; + IV_STATUS_T status; + + s_frame_dimensions_ip.s_ive_ip.e_cmd = IVE_CMD_VIDEO_CTL; + s_frame_dimensions_ip.s_ive_ip.e_sub_cmd = IVE_CMD_CTL_SET_DIMENSIONS; + + s_frame_dimensions_ip.s_ive_ip.u4_ht = ps_app_ctxt->u4_ht; + s_frame_dimensions_ip.s_ive_ip.u4_wd = ps_app_ctxt->u4_wd; + s_frame_dimensions_ip.s_ive_ip.u4_strd = ps_app_ctxt->u4_strd; + + s_frame_dimensions_ip.s_ive_ip.u4_timestamp_high = u4_timestamp_high; + s_frame_dimensions_ip.s_ive_ip.u4_timestamp_low = u4_timestamp_low; + + s_frame_dimensions_ip.s_ive_ip.u4_size = + sizeof(ih264e_ctl_set_dimensions_ip_t); + s_frame_dimensions_op.s_ive_op.u4_size = + sizeof(ih264e_ctl_set_dimensions_op_t); + + status = ih264e_api_function(ps_app_ctxt->ps_enc, + &s_frame_dimensions_ip, + &s_frame_dimensions_op); + if(status != IV_SUCCESS) + { + CHAR ac_error[STRLENGTH]; + sprintf(ac_error, "Unable to set frame dimensions = 0x%x\n", + s_frame_dimensions_op.s_ive_op.u4_error_code); + codec_exit(ac_error); + } + return; +} + +void set_frame_rate(app_ctxt_t *ps_app_ctxt, + UWORD32 u4_timestamp_low, + UWORD32 u4_timestamp_high) +{ + ih264e_ctl_set_frame_rate_ip_t s_frame_rate_ip; + ih264e_ctl_set_frame_rate_op_t s_frame_rate_op; + IV_STATUS_T status; + + s_frame_rate_ip.s_ive_ip.e_cmd = IVE_CMD_VIDEO_CTL; + s_frame_rate_ip.s_ive_ip.e_sub_cmd = IVE_CMD_CTL_SET_FRAMERATE; + + s_frame_rate_ip.s_ive_ip.u4_src_frame_rate = + ps_app_ctxt->u4_src_frame_rate; + s_frame_rate_ip.s_ive_ip.u4_tgt_frame_rate = + ps_app_ctxt->u4_tgt_frame_rate; + + s_frame_rate_ip.s_ive_ip.u4_timestamp_high = u4_timestamp_high; + s_frame_rate_ip.s_ive_ip.u4_timestamp_low = u4_timestamp_low; + + s_frame_rate_ip.s_ive_ip.u4_size = sizeof(ih264e_ctl_set_frame_rate_ip_t); + s_frame_rate_op.s_ive_op.u4_size = sizeof(ih264e_ctl_set_frame_rate_op_t); + + status = ih264e_api_function(ps_app_ctxt->ps_enc,&s_frame_rate_ip,&s_frame_rate_op); + if(status != IV_SUCCESS) + { + CHAR ac_error[STRLENGTH]; + sprintf(ac_error, "Unable to set frame rate = 0x%x\n", + s_frame_rate_op.s_ive_op.u4_error_code); + codec_exit(ac_error); + } + return; +} + + +void set_ipe_params(app_ctxt_t *ps_app_ctxt, + UWORD32 u4_timestamp_low, + UWORD32 u4_timestamp_high) +{ + ih264e_ctl_set_ipe_params_ip_t s_ipe_params_ip; + ih264e_ctl_set_ipe_params_op_t s_ipe_params_op; + IV_STATUS_T status; + + s_ipe_params_ip.s_ive_ip.e_cmd = IVE_CMD_VIDEO_CTL; + s_ipe_params_ip.s_ive_ip.e_sub_cmd = IVE_CMD_CTL_SET_IPE_PARAMS; + + s_ipe_params_ip.s_ive_ip.u4_enable_intra_4x4 = ps_app_ctxt->u4_enable_intra_4x4; + s_ipe_params_ip.s_ive_ip.u4_enc_speed_preset = ps_app_ctxt->u4_enc_speed; + + s_ipe_params_ip.s_ive_ip.u4_timestamp_high = u4_timestamp_high; + s_ipe_params_ip.s_ive_ip.u4_timestamp_low = u4_timestamp_low; + + s_ipe_params_ip.s_ive_ip.u4_size = sizeof(ih264e_ctl_set_ipe_params_ip_t); + s_ipe_params_op.s_ive_op.u4_size = sizeof(ih264e_ctl_set_ipe_params_op_t); + + status = ih264e_api_function(ps_app_ctxt->ps_enc,&s_ipe_params_ip,&s_ipe_params_op); + if(status != IV_SUCCESS) + { + CHAR ac_error[STRLENGTH]; + sprintf(ac_error, "Unable to set ipe params = 0x%x\n", + s_ipe_params_op.s_ive_op.u4_error_code); + codec_exit(ac_error); + } + return; +} + +void set_bit_rate(app_ctxt_t *ps_app_ctxt, + UWORD32 u4_timestamp_low, UWORD32 u4_timestamp_high) +{ + ih264e_ctl_set_bitrate_ip_t s_bitrate_ip; + ih264e_ctl_set_bitrate_op_t s_bitrate_op; + IV_STATUS_T status; + + s_bitrate_ip.s_ive_ip.e_cmd = IVE_CMD_VIDEO_CTL; + s_bitrate_ip.s_ive_ip.e_sub_cmd = IVE_CMD_CTL_SET_BITRATE; + + s_bitrate_ip.s_ive_ip.u4_target_bitrate = ps_app_ctxt->u4_bitrate; + + s_bitrate_ip.s_ive_ip.u4_timestamp_high = u4_timestamp_high; + s_bitrate_ip.s_ive_ip.u4_timestamp_low = u4_timestamp_low; + + s_bitrate_ip.s_ive_ip.u4_size = sizeof(ih264e_ctl_set_bitrate_ip_t); + s_bitrate_op.s_ive_op.u4_size = sizeof(ih264e_ctl_set_bitrate_op_t); + + status = ih264e_api_function(ps_app_ctxt->ps_enc,&s_bitrate_ip,&s_bitrate_op); + if(status != IV_SUCCESS) + { + CHAR ac_error[STRLENGTH]; + sprintf(ac_error, "Unable to set bit rate = 0x%x\n", + s_bitrate_op.s_ive_op.u4_error_code); + codec_exit(ac_error); + } + return; +} + + +void set_frame_type(app_ctxt_t *ps_app_ctxt, + UWORD32 u4_timestamp_low, + UWORD32 u4_timestamp_high, + IV_PICTURE_CODING_TYPE_T e_frame_type) +{ + ih264e_ctl_set_frame_type_ip_t s_frame_type_ip; + ih264e_ctl_set_frame_type_op_t s_frame_type_op; + IV_STATUS_T status; + + s_frame_type_ip.s_ive_ip.e_cmd = IVE_CMD_VIDEO_CTL; + s_frame_type_ip.s_ive_ip.e_sub_cmd = IVE_CMD_CTL_SET_FRAMETYPE; + + s_frame_type_ip.s_ive_ip.e_frame_type = e_frame_type; + + s_frame_type_ip.s_ive_ip.u4_timestamp_high = u4_timestamp_high; + s_frame_type_ip.s_ive_ip.u4_timestamp_low = u4_timestamp_low; + + s_frame_type_ip.s_ive_ip.u4_size = sizeof(ih264e_ctl_set_frame_type_ip_t); + s_frame_type_op.s_ive_op.u4_size = sizeof(ih264e_ctl_set_frame_type_op_t); + + status = ih264e_api_function(ps_app_ctxt->ps_enc,&s_frame_type_ip,&s_frame_type_op); + if(status != IV_SUCCESS) + { + CHAR ac_error[STRLENGTH]; + sprintf(ac_error, "Unable to set frame type = 0x%x\n", + s_frame_type_op.s_ive_op.u4_error_code); + codec_exit(ac_error); + } + return; +} + +void set_qp(app_ctxt_t *ps_app_ctxt, + UWORD32 u4_timestamp_low, UWORD32 u4_timestamp_high) +{ + ih264e_ctl_set_qp_ip_t s_qp_ip; + ih264e_ctl_set_qp_op_t s_qp_op; + IV_STATUS_T status; + + s_qp_ip.s_ive_ip.e_cmd = IVE_CMD_VIDEO_CTL; + s_qp_ip.s_ive_ip.e_sub_cmd = IVE_CMD_CTL_SET_QP; + + s_qp_ip.s_ive_ip.u4_i_qp = ps_app_ctxt->u4_i_qp; + s_qp_ip.s_ive_ip.u4_i_qp_max = ps_app_ctxt->u4_i_qp_max; + s_qp_ip.s_ive_ip.u4_i_qp_min = ps_app_ctxt->u4_i_qp_min; + + s_qp_ip.s_ive_ip.u4_p_qp = ps_app_ctxt->u4_p_qp; + s_qp_ip.s_ive_ip.u4_p_qp_max = ps_app_ctxt->u4_p_qp_max; + s_qp_ip.s_ive_ip.u4_p_qp_min = ps_app_ctxt->u4_p_qp_min; + + s_qp_ip.s_ive_ip.u4_b_qp = ps_app_ctxt->u4_b_qp; + s_qp_ip.s_ive_ip.u4_b_qp_max = ps_app_ctxt->u4_b_qp_max; + s_qp_ip.s_ive_ip.u4_b_qp_min = ps_app_ctxt->u4_b_qp_min; + + s_qp_ip.s_ive_ip.u4_timestamp_high = u4_timestamp_high; + s_qp_ip.s_ive_ip.u4_timestamp_low = u4_timestamp_low; + + s_qp_ip.s_ive_ip.u4_size = sizeof(ih264e_ctl_set_qp_ip_t); + s_qp_op.s_ive_op.u4_size = sizeof(ih264e_ctl_set_qp_op_t); + + status = ih264e_api_function(ps_app_ctxt->ps_enc,&s_qp_ip,&s_qp_op); + if(status != IV_SUCCESS) + { + CHAR ac_error[STRLENGTH]; + sprintf(ac_error, "Unable to set qp 0x%x\n", + s_qp_op.s_ive_op.u4_error_code); + codec_exit(ac_error); + } + return; +} + +void set_enc_mode(app_ctxt_t *ps_app_ctxt, + UWORD32 u4_timestamp_low, UWORD32 u4_timestamp_high, + IVE_ENC_MODE_T e_enc_mode) +{ + IV_STATUS_T status; + + ih264e_ctl_set_enc_mode_ip_t s_enc_mode_ip; + ih264e_ctl_set_enc_mode_op_t s_enc_mode_op; + + s_enc_mode_ip.s_ive_ip.e_cmd = IVE_CMD_VIDEO_CTL; + s_enc_mode_ip.s_ive_ip.e_sub_cmd = IVE_CMD_CTL_SET_ENC_MODE; + + s_enc_mode_ip.s_ive_ip.e_enc_mode = e_enc_mode; + + s_enc_mode_ip.s_ive_ip.u4_timestamp_high = u4_timestamp_high; + s_enc_mode_ip.s_ive_ip.u4_timestamp_low = u4_timestamp_low; + + s_enc_mode_ip.s_ive_ip.u4_size = sizeof(ih264e_ctl_set_enc_mode_ip_t); + s_enc_mode_op.s_ive_op.u4_size = sizeof(ih264e_ctl_set_enc_mode_op_t); + + status = ih264e_api_function(ps_app_ctxt->ps_enc, &s_enc_mode_ip, + &s_enc_mode_op); + if(status != IV_SUCCESS) + { + CHAR ac_error[STRLENGTH]; + sprintf(ac_error, "Unable to set in header encode mode = 0x%x\n", + s_enc_mode_op.s_ive_op.u4_error_code); + codec_exit(ac_error); + } + return; +} + + +void set_vbv_params(app_ctxt_t *ps_app_ctxt, + UWORD32 u4_timestamp_low, + UWORD32 u4_timestamp_high) +{ + ih264e_ctl_set_vbv_params_ip_t s_vbv_ip; + ih264e_ctl_set_vbv_params_op_t s_vbv_op; + IV_STATUS_T status; + + s_vbv_ip.s_ive_ip.e_cmd = IVE_CMD_VIDEO_CTL; + s_vbv_ip.s_ive_ip.e_sub_cmd = IVE_CMD_CTL_SET_VBV_PARAMS; + + s_vbv_ip.s_ive_ip.u4_vbv_buf_size = ps_app_ctxt->u4_vbv_buf_size; + s_vbv_ip.s_ive_ip.u4_vbv_buffer_delay = + ps_app_ctxt->u4_vbv_buffer_delay; + + s_vbv_ip.s_ive_ip.u4_timestamp_high = u4_timestamp_high; + s_vbv_ip.s_ive_ip.u4_timestamp_low = u4_timestamp_low; + + s_vbv_ip.s_ive_ip.u4_size = sizeof(ih264e_ctl_set_vbv_params_ip_t); + s_vbv_op.s_ive_op.u4_size = sizeof(ih264e_ctl_set_vbv_params_op_t); + + status = ih264e_api_function(ps_app_ctxt->ps_enc,&s_vbv_ip,&s_vbv_op); + if(status != IV_SUCCESS) + { + CHAR ac_error[STRLENGTH]; + sprintf(ac_error, "Unable to set VBC params = 0x%x\n", + s_vbv_op.s_ive_op.u4_error_code); + codec_exit(ac_error); + } + return; +} + +void set_air_params(app_ctxt_t *ps_app_ctxt, + UWORD32 u4_timestamp_low, + UWORD32 u4_timestamp_high) +{ + ih264e_ctl_set_air_params_ip_t s_air_ip; + ih264e_ctl_set_air_params_op_t s_air_op; + IV_STATUS_T status; + + s_air_ip.s_ive_ip.e_cmd = IVE_CMD_VIDEO_CTL; + s_air_ip.s_ive_ip.e_sub_cmd = IVE_CMD_CTL_SET_AIR_PARAMS; + + s_air_ip.s_ive_ip.e_air_mode = ps_app_ctxt->u4_air; + s_air_ip.s_ive_ip.u4_air_refresh_period = ps_app_ctxt->u4_air_refresh_period; + + s_air_ip.s_ive_ip.u4_timestamp_high = u4_timestamp_high; + s_air_ip.s_ive_ip.u4_timestamp_low = u4_timestamp_low; + + s_air_ip.s_ive_ip.u4_size = sizeof(ih264e_ctl_set_air_params_ip_t); + s_air_op.s_ive_op.u4_size = sizeof(ih264e_ctl_set_air_params_op_t); + + status = ih264e_api_function(ps_app_ctxt->ps_enc,&s_air_ip,&s_air_op); + if(status != IV_SUCCESS) + { + CHAR ac_error[STRLENGTH]; + sprintf(ac_error, "Unable to set air params = 0x%x\n", + s_air_op.s_ive_op.u4_error_code); + codec_exit(ac_error); + } + return; +} + +void set_me_params(app_ctxt_t *ps_app_ctxt, + UWORD32 u4_timestamp_low, + UWORD32 u4_timestamp_high) +{ + IV_STATUS_T status; + + ih264e_ctl_set_me_params_ip_t s_me_params_ip; + ih264e_ctl_set_me_params_op_t s_me_params_op; + + s_me_params_ip.s_ive_ip.e_cmd = IVE_CMD_VIDEO_CTL; + s_me_params_ip.s_ive_ip.e_sub_cmd = IVE_CMD_CTL_SET_ME_PARAMS; + + s_me_params_ip.s_ive_ip.u4_enable_fast_sad = ps_app_ctxt->u4_enable_fast_sad; + s_me_params_ip.s_ive_ip.u4_enable_alt_ref = ps_app_ctxt->u4_enable_alt_ref; + + s_me_params_ip.s_ive_ip.u4_enable_hpel = ps_app_ctxt->u4_hpel; + s_me_params_ip.s_ive_ip.u4_enable_qpel = ps_app_ctxt->u4_qpel; + s_me_params_ip.s_ive_ip.u4_me_speed_preset = ps_app_ctxt->u4_me_speed; + s_me_params_ip.s_ive_ip.u4_srch_rng_x = ps_app_ctxt->u4_srch_rng_x; + s_me_params_ip.s_ive_ip.u4_srch_rng_y = ps_app_ctxt->u4_srch_rng_y; + + s_me_params_ip.s_ive_ip.u4_timestamp_high = u4_timestamp_high; + s_me_params_ip.s_ive_ip.u4_timestamp_low = u4_timestamp_low; + + s_me_params_ip.s_ive_ip.u4_size = sizeof(ih264e_ctl_set_me_params_ip_t); + s_me_params_op.s_ive_op.u4_size = sizeof(ih264e_ctl_set_me_params_op_t); + + status = ih264e_api_function(ps_app_ctxt->ps_enc, &s_me_params_ip, + &s_me_params_op); + if(status != IV_SUCCESS) + { + CHAR ac_error[STRLENGTH]; + sprintf(ac_error, "Unable to set me params = 0x%x\n", + s_me_params_op.s_ive_op.u4_error_code); + codec_exit(ac_error); + } + return; +} + + +void set_gop_params(app_ctxt_t *ps_app_ctxt, + UWORD32 u4_timestamp_low, + UWORD32 u4_timestamp_high) +{ + IV_STATUS_T status; + + ih264e_ctl_set_gop_params_ip_t s_gop_params_ip; + ih264e_ctl_set_gop_params_op_t s_gop_params_op; + + s_gop_params_ip.s_ive_ip.e_cmd = IVE_CMD_VIDEO_CTL; + s_gop_params_ip.s_ive_ip.e_sub_cmd = IVE_CMD_CTL_SET_GOP_PARAMS; + + s_gop_params_ip.s_ive_ip.u4_i_frm_interval = ps_app_ctxt->u4_i_interval; + s_gop_params_ip.s_ive_ip.u4_idr_frm_interval = ps_app_ctxt->u4_idr_interval; + s_gop_params_ip.s_ive_ip.u4_num_b_frames = ps_app_ctxt->u4_b_frames; + + s_gop_params_ip.s_ive_ip.u4_timestamp_high = u4_timestamp_high; + s_gop_params_ip.s_ive_ip.u4_timestamp_low = u4_timestamp_low; + + s_gop_params_ip.s_ive_ip.u4_size = sizeof(ih264e_ctl_set_gop_params_ip_t); + s_gop_params_op.s_ive_op.u4_size = sizeof(ih264e_ctl_set_gop_params_op_t); + + status = ih264e_api_function(ps_app_ctxt->ps_enc, &s_gop_params_ip, + &s_gop_params_op); + if(status != IV_SUCCESS) + { + CHAR ac_error[STRLENGTH]; + sprintf(ac_error, "Unable to set ME params = 0x%x\n", + s_gop_params_op.s_ive_op.u4_error_code); + codec_exit(ac_error); + } + return; +} + +void set_profile_params(app_ctxt_t *ps_app_ctxt, + UWORD32 u4_timestamp_low, + UWORD32 u4_timestamp_high) +{ + IV_STATUS_T status; + + ih264e_ctl_set_profile_params_ip_t s_profile_params_ip; + ih264e_ctl_set_profile_params_op_t s_profile_params_op; + + s_profile_params_ip.s_ive_ip.e_cmd = IVE_CMD_VIDEO_CTL; + s_profile_params_ip.s_ive_ip.e_sub_cmd = IVE_CMD_CTL_SET_PROFILE_PARAMS; + + s_profile_params_ip.s_ive_ip.e_profile = ps_app_ctxt->e_profile; + + s_profile_params_ip.s_ive_ip.u4_timestamp_high = u4_timestamp_high; + s_profile_params_ip.s_ive_ip.u4_timestamp_low = u4_timestamp_low; + + s_profile_params_ip.s_ive_ip.u4_size = sizeof(ih264e_ctl_set_profile_params_ip_t); + s_profile_params_op.s_ive_op.u4_size = sizeof(ih264e_ctl_set_profile_params_op_t); + + status = ih264e_api_function(ps_app_ctxt->ps_enc, &s_profile_params_ip, + &s_profile_params_op); + if(status != IV_SUCCESS) + { + CHAR ac_error[STRLENGTH]; + sprintf(ac_error, "Unable to set profile params = 0x%x\n", + s_profile_params_op.s_ive_op.u4_error_code); + codec_exit(ac_error); + } + return; +} + +void set_deblock_params(app_ctxt_t *ps_app_ctxt, + UWORD32 u4_timestamp_low, + UWORD32 u4_timestamp_high) +{ + IV_STATUS_T status; + + ih264e_ctl_set_deblock_params_ip_t s_deblock_params_ip; + ih264e_ctl_set_deblock_params_op_t s_deblock_params_op; + + s_deblock_params_ip.s_ive_ip.e_cmd = IVE_CMD_VIDEO_CTL; + s_deblock_params_ip.s_ive_ip.e_sub_cmd = IVE_CMD_CTL_SET_DEBLOCK_PARAMS; + + s_deblock_params_ip.s_ive_ip.u4_disable_deblock_level = + ps_app_ctxt->u4_disable_deblk_level; + + s_deblock_params_ip.s_ive_ip.u4_timestamp_high = u4_timestamp_high; + s_deblock_params_ip.s_ive_ip.u4_timestamp_low = u4_timestamp_low; + + s_deblock_params_ip.s_ive_ip.u4_size = sizeof(ih264e_ctl_set_deblock_params_ip_t); + s_deblock_params_op.s_ive_op.u4_size = sizeof(ih264e_ctl_set_deblock_params_op_t); + + status = ih264e_api_function(ps_app_ctxt->ps_enc, &s_deblock_params_ip, + &s_deblock_params_op); + if(status != IV_SUCCESS) + { + CHAR ac_error[STRLENGTH]; + sprintf(ac_error, "Unable to enable/disable deblock params = 0x%x\n", + s_deblock_params_op.s_ive_op.u4_error_code); + codec_exit(ac_error); + } + return; +} + +#define PEAK_WINDOW_SIZE 8 + +void synchronous_encode(iv_obj_t *ps_enc, app_ctxt_t *ps_app_ctxt) +{ + ih264e_video_encode_ip_t ih264e_video_encode_ip; + ih264e_video_encode_op_t ih264e_video_encode_op; + + ive_video_encode_ip_t *ps_video_encode_ip = &ih264e_video_encode_ip.s_ive_ip; + ive_video_encode_op_t *ps_video_encode_op = &ih264e_video_encode_op.s_ive_op; + + iv_raw_buf_t *ps_inp_raw_buf = &ps_video_encode_ip->s_inp_buf; + + IV_STATUS_T status = IV_SUCCESS; + + WORD32 i, read_failed = 0, is_last = 0, buff_size = 0, num_bytes = 0; + UWORD32 u4_total_time = 0; + UWORD8 *pu1_buf = NULL; + UWORD32 u4_timestamp_low, u4_timestamp_high; + void *pv_mb_info = NULL, *pv_pic_info = NULL; + + TIMER curtime ; +#ifdef WINDOWS_TIMER + TIMER frequency; +#endif + WORD32 peak_window[PEAK_WINDOW_SIZE] = {0}; + WORD32 peak_window_idx = 0; + WORD32 peak_avg_max = 0, timetaken = 0; + iv_raw_buf_t s_inp_buf, s_recon_buf; + CHAR ac_error[STRLENGTH]; + WORD32 end_of_frames=0; + + u4_timestamp_low = 0; + u4_timestamp_high = 0; + + /*************************************************************************/ + /* Allocate I/O Buffers */ + /*************************************************************************/ + allocate_input(ps_app_ctxt); + allocate_output(ps_app_ctxt); + allocate_recon(ps_app_ctxt); + + /* init psnr */ + init_psnr(ps_app_ctxt); + + /* open file pointers */ + ps_app_ctxt->fp_ip = fopen(ps_app_ctxt->ac_ip_fname, "rb"); + if(NULL == ps_app_ctxt->fp_ip) + { + sprintf(ac_error, "Unable to open input file for reading: %s", ps_app_ctxt->ac_ip_fname); + invalid_argument_exit(ac_error); + } + + ps_app_ctxt->fp_op = fopen(ps_app_ctxt->ac_op_fname, "wb"); + if(NULL == ps_app_ctxt->fp_op) + { + sprintf(ac_error, "Unable to open output file for writing: %s", ps_app_ctxt->ac_op_fname); + invalid_argument_exit(ac_error); + } + + if(1 == ps_app_ctxt->u4_recon_enable) + { + ps_app_ctxt->fp_recon = fopen(ps_app_ctxt->ac_recon_fname, "wb"); + if(NULL == ps_app_ctxt->fp_recon) + { + sprintf(ac_error, "Unable to open recon file for writing: %s", ps_app_ctxt->ac_recon_fname); + invalid_argument_exit(ac_error); + } + } + + if(1 == ps_app_ctxt->u4_chksum_enable) + { + ps_app_ctxt->fp_chksum = fopen(ps_app_ctxt->ac_chksum_fname, "wb"); + if(NULL == ps_app_ctxt->fp_chksum) + { + sprintf(ac_error, "Unable to open checksum file for writing: %s", ps_app_ctxt->ac_chksum_fname); + invalid_argument_exit(ac_error); + } + } + +#if 0 //Input buffer dump + //if(1 == ps_app_ctxt->u4_psnr_enable) + { + ps_app_ctxt->fp_dump_op = fopen("D:\\dump\\inp.yuv", "wb"); + if(NULL == ps_app_ctxt->fp_dump_op) + { + sprintf(ac_error, "Unable to open output file for input dump: %s", "D:\\dump\\inp.yuv"); + invalid_argument_exit(ac_error); + } + } +#endif //Input buffer dump + + /* If PSNR is enabled, open input file again and hold a different file pointer + * This makes it easy to compute PSNR without adding dependency between input and recon threads + */ + if(1 == ps_app_ctxt->u4_psnr_enable) + { + ps_app_ctxt->fp_psnr_ip = fopen(ps_app_ctxt->ac_ip_fname, "rb"); + if(NULL == ps_app_ctxt->fp_psnr_ip) + { + sprintf(ac_error, "Unable to open input file for reading: %s", ps_app_ctxt->ac_ip_fname); + invalid_argument_exit(ac_error); + } + } + + if(0 != ps_app_ctxt->u4_mb_info_type) + { + ps_app_ctxt->fp_mb_info = fopen(ps_app_ctxt->ac_mb_info_fname, "rb"); + if(NULL == ps_app_ctxt->fp_mb_info) + { + sprintf(ac_error, "Unable to open MB info file for reading: %s", ps_app_ctxt->ac_mb_info_fname); + invalid_argument_exit(ac_error); + } + } + if (ps_app_ctxt->u4_pic_info_type) + { + ps_app_ctxt->fp_pic_info = fopen(ps_app_ctxt->ac_pic_info_fname, "rb"); + if(NULL == ps_app_ctxt->fp_pic_info) + { + sprintf(ac_error, "Unable to open Pic info file for reading: %s", ps_app_ctxt->ac_pic_info_fname); + invalid_argument_exit(ac_error); + } + } + + GETTIME(&ps_app_ctxt->enc_start_time); + ps_app_ctxt->enc_last_time = ps_app_ctxt->enc_start_time; + + while(1) + { + + + + + + /******************************************************************************/ + /****************** Input Initialization **************************************/ + /******************************************************************************/ + + for(i = 0; i < DEFAULT_MAX_INPUT_BUFS; i++) + { + if(ps_app_ctxt->as_input_buf[i].u4_is_free) + { + pu1_buf = ps_app_ctxt->as_input_buf[i].pu1_buf; + pv_mb_info = ps_app_ctxt->as_input_buf[i].pv_mb_info; + pv_pic_info = ps_app_ctxt->as_input_buf[i].pv_pic_info; + ps_app_ctxt->as_input_buf[i].u4_is_free = 0; + break; + } + } + + ps_video_encode_ip->u4_size = sizeof(ih264e_video_encode_ip_t); + ps_video_encode_op->u4_size = sizeof(ih264e_video_encode_op_t); + + ps_video_encode_ip->e_cmd = IVE_CMD_VIDEO_ENCODE; + ps_video_encode_ip->pv_bufs = pu1_buf; + ps_video_encode_ip->pv_mb_info = pv_mb_info; + ps_video_encode_ip->pv_pic_info = pv_pic_info; + ps_video_encode_ip->u4_pic_info_type = ps_app_ctxt->u4_pic_info_type; + /* + * Since the buffers are used for reading, + * And after each row we have a stride we nned to calculate + * the luma size according to the stride + */ + ps_inp_raw_buf->e_color_fmt = ps_app_ctxt->e_inp_color_fmt; + + /* Initialize for 420SP */ + if(IV_YUV_420SP_UV == ps_app_ctxt->e_inp_color_fmt|| + IV_YUV_420SP_VU == ps_app_ctxt->e_inp_color_fmt) + { + /*init luma buffer*/ + ps_inp_raw_buf->apv_bufs[0] = pu1_buf; + + /*Init chroma buffer*/ + pu1_buf += (ps_app_ctxt->u4_strd) * ALIGN16(ps_app_ctxt->u4_ht); + ps_inp_raw_buf->apv_bufs[1] = pu1_buf; + + ps_inp_raw_buf->au4_wd[0] = ps_app_ctxt->u4_wd; + ps_inp_raw_buf->au4_wd[1] = ps_app_ctxt->u4_wd; + + ps_inp_raw_buf->au4_ht[0] = ps_app_ctxt->u4_ht; + ps_inp_raw_buf->au4_ht[1] = ps_app_ctxt->u4_ht / 2; + + ps_inp_raw_buf->au4_strd[0] = ps_app_ctxt->u4_strd; + ps_inp_raw_buf->au4_strd[1] = ps_app_ctxt->u4_strd; + } + else if(IV_YUV_420P == ps_app_ctxt->e_inp_color_fmt) + { + /* init buffers */ + ps_inp_raw_buf->apv_bufs[0] = pu1_buf; + pu1_buf += (ps_app_ctxt->u4_strd) * ALIGN16(ps_app_ctxt->u4_ht); + ps_inp_raw_buf->apv_bufs[1] = pu1_buf; + pu1_buf += (ps_app_ctxt->u4_strd >> 1) * (ALIGN16(ps_app_ctxt->u4_ht) >> 1); + ps_inp_raw_buf->apv_bufs[2] = pu1_buf; + + ps_inp_raw_buf->au4_wd[0] = ps_app_ctxt->u4_wd; + ps_inp_raw_buf->au4_wd[1] = ps_app_ctxt->u4_wd / 2; + ps_inp_raw_buf->au4_wd[2] = ps_app_ctxt->u4_wd / 2; + + ps_inp_raw_buf->au4_ht[0] = ps_app_ctxt->u4_ht; + ps_inp_raw_buf->au4_ht[1] = ps_app_ctxt->u4_ht / 2; + ps_inp_raw_buf->au4_ht[2] = ps_app_ctxt->u4_ht / 2; + + ps_inp_raw_buf->au4_strd[0] = ps_app_ctxt->u4_strd; + ps_inp_raw_buf->au4_strd[1] = ps_app_ctxt->u4_strd / 2; + ps_inp_raw_buf->au4_strd[2] = ps_app_ctxt->u4_strd / 2; + + } + else if(IV_YUV_422ILE == ps_app_ctxt->e_inp_color_fmt) + { + /*init luma buffer*/ + ps_inp_raw_buf->apv_bufs[0] = pu1_buf; + + ps_inp_raw_buf->au4_wd[0] = ps_app_ctxt->u4_wd * 2; + + ps_inp_raw_buf->au4_ht[0] = ps_app_ctxt->u4_ht; + + ps_inp_raw_buf->au4_strd[0] = ps_app_ctxt->u4_strd *2; + } + + while(1) + { + IV_STATUS_T mb_info_status = IV_SUCCESS, pic_info_status = IV_SUCCESS; + read_failed = 0; + status = read_input(ps_app_ctxt->fp_ip, ps_inp_raw_buf); + if (ps_app_ctxt->u4_mb_info_type != 0) + { + mb_info_status = read_mb_info(ps_app_ctxt, pv_mb_info); + } + if (ps_app_ctxt->u4_pic_info_type != 0) + { + pic_info_status = read_pic_info(ps_app_ctxt, pv_pic_info); + } + if((IV_SUCCESS != status) || (IV_SUCCESS != mb_info_status) + || (IV_SUCCESS != pic_info_status)) + { + if(0 == ps_app_ctxt->u4_loopback) + { + is_last = 1; + read_failed = 1; + + break; + } + else + fseek(ps_app_ctxt->fp_ip, 0, SEEK_SET); + } + else + break; + } + + /******************************************************************************/ + /****************** Output Initialization *************************************/ + /******************************************************************************/ + + for(i = 0; i < DEFAULT_MAX_OUTPUT_BUFS; i++) + { + if(ps_app_ctxt->as_output_buf[i].u4_is_free) + { + pu1_buf = ps_app_ctxt->as_output_buf[i].pu1_buf; + buff_size = ps_app_ctxt->as_output_buf[i].u4_buf_size; + ps_app_ctxt->as_output_buf[i].u4_is_free = 0; + break; + } + } + ps_video_encode_ip->s_out_buf.pv_buf = pu1_buf; + ps_video_encode_ip->s_out_buf.u4_bytes = 0; + ps_video_encode_ip->s_out_buf.u4_bufsize = buff_size; + + /******************************************************************************/ + /****************** Recon Initialization **************************************/ + /******************************************************************************/ + init_raw_buf_descr(ps_app_ctxt, &s_recon_buf, ps_app_ctxt->as_recon_buf[0].pu1_buf, ps_app_ctxt->e_recon_color_fmt); + + if(ps_app_ctxt->u4_psnr_enable) + init_raw_buf_descr(ps_app_ctxt, &s_inp_buf, ps_app_ctxt->pu1_psnr_buf, ps_app_ctxt->e_inp_color_fmt); + + ps_video_encode_ip->s_recon_buf = s_recon_buf; + + /******************************************************************************/ + /************************* Un Initialized *************************************/ + /******************************************************************************/ + if(0 == ps_app_ctxt->u4_loopback) + { + /* If input file is read completely and loopback is not enabled, + * then exit the loop */ + if(feof(ps_app_ctxt->fp_ip)) + { + is_last = 1; + } + } + + + /* If last frame, send input null to get back encoded frames */ + if ( is_last == 1 || ((ps_app_ctxt->u4_max_num_frms) <= u4_timestamp_low) ) + { + is_last = 1; + ps_inp_raw_buf->apv_bufs[0] = NULL; + ps_inp_raw_buf->apv_bufs[1] = NULL; + ps_inp_raw_buf->apv_bufs[2] = NULL; + end_of_frames = 1; + } + + ps_video_encode_ip->u4_is_last = is_last; + ps_video_encode_ip->u4_mb_info_type = ps_app_ctxt->u4_mb_info_type; + ps_video_encode_ip->u4_pic_info_type = ps_app_ctxt->u4_pic_info_type;; + ps_video_encode_op->s_out_buf.pv_buf= 0; + ps_video_encode_ip->u4_timestamp_high = u4_timestamp_high; + ps_video_encode_ip->u4_timestamp_low = u4_timestamp_low; + + + GETTIME(&ps_app_ctxt->enc_last_time); + + status = ih264e_api_function(ps_enc, &ih264e_video_encode_ip, &ih264e_video_encode_op); + + if (IV_SUCCESS != status) + { + printf("Encode Frame failed = 0x%x\n", ih264e_video_encode_op.s_ive_op.u4_error_code); + break; + } + +#ifdef WINDOWS_TIMER + QueryPerformanceFrequency ( &frequency); +#endif + GETTIME(&curtime); + ELAPSEDTIME(ps_app_ctxt->enc_last_time, curtime, timetaken, frequency); + ps_app_ctxt->enc_last_time = curtime; + +#ifdef PROFILE_ENABLE + { + WORD32 peak_avg, id; + u4_total_time += timetaken; + peak_window[peak_window_idx++] = timetaken; + if(peak_window_idx == PEAK_WINDOW_SIZE) + peak_window_idx = 0; + peak_avg = 0; + for(id = 0; id < PEAK_WINDOW_SIZE; id++) + { + peak_avg += peak_window[id]; + } + peak_avg /= PEAK_WINDOW_SIZE; + if (peak_avg > peak_avg_max) + peak_avg_max = peak_avg; + } +#endif + + /******************************************************************************/ + /****************** Writing Output ********************************************/ + /******************************************************************************/ + num_bytes = 0; + /* Break if all the encoded frames are taken from encoder */ + if(1 == end_of_frames && 0 == ps_video_encode_op->output_present) + { + break; + } + if(1 == ps_video_encode_op->output_present) + { + num_bytes = ps_video_encode_op->s_out_buf.u4_bytes; + buff_size = ps_video_encode_op->s_out_buf.u4_bufsize; + pu1_buf = (UWORD8*)ps_video_encode_op->s_out_buf.pv_buf; + + status = write_output(ps_app_ctxt->fp_op, pu1_buf, num_bytes); + if(IV_SUCCESS != status) + { + printf("Error: Unable to write to output file\n"); + break; + } + + /* Reuse of freed input buffer */ + for(i = 0; i < DEFAULT_MAX_INPUT_BUFS; i++) + { + if(ps_app_ctxt->as_input_buf[i].pu1_buf == ps_video_encode_op->s_inp_buf.apv_bufs[0]) + { + ps_app_ctxt->as_input_buf[i].u4_is_free = 1; + break; + } + } + + /* Reuse of freed output buffer */ + for(i = 0; i < DEFAULT_MAX_OUTPUT_BUFS; i++) + { + if(ps_app_ctxt->as_output_buf[i].pu1_buf == ps_video_encode_op->s_out_buf.pv_buf) + { + ps_app_ctxt->as_output_buf[i].u4_is_free = 1; + break; + } + } + } + + if (ps_video_encode_op->dump_recon == 1) + { + ps_app_ctxt->u4_pics_cnt++; + + ps_app_ctxt->avg_time = u4_total_time / ps_app_ctxt->u4_pics_cnt; + if (ps_app_ctxt->u4_psnr_enable == 0) + { + UWORD8 u1_pic_type[][5] = { "IDR", "I", "P","NA" }; + WORD32 lookup_idx = 0; + + if (ih264e_video_encode_op.s_ive_op.u4_encoded_frame_type == IV_IDR_FRAME) + { + lookup_idx = 0; + } + else if (ih264e_video_encode_op.s_ive_op.u4_encoded_frame_type == IV_I_FRAME) + { + lookup_idx = 1; + } + else if (ih264e_video_encode_op.s_ive_op.u4_encoded_frame_type == IV_P_FRAME) + { + lookup_idx = 2; + } + else if (ih264e_video_encode_op.s_ive_op.u4_encoded_frame_type == IV_NA_FRAME) + { + lookup_idx = 3; + } + + printf("[%s] PicNum %4d Bytes Generated %6d TimeTaken(microsec): %6d AvgTime: %6d PeakAvgTimeMax: %6d\n", u1_pic_type[lookup_idx], ps_app_ctxt->u4_pics_cnt, num_bytes, timetaken, ps_app_ctxt->avg_time, peak_avg_max); + } + + ps_app_ctxt->u4_total_bytes += num_bytes; + + /******************************************************************************/ + /****************** Writing Recon ********************************************/ + /******************************************************************************/ + if(1 == ps_video_encode_op->output_present) + { + s_recon_buf = ps_video_encode_op->s_recon_buf; + + /* Dump recon when enabled, and output bytes != 0*/ + if(ps_app_ctxt->u4_recon_enable) + { + status = write_recon(ps_app_ctxt->fp_recon, &s_recon_buf); + if(IV_SUCCESS != status) + { + printf("Error: Unable to write to recon file\n"); + break; + } + } + + + if(ps_app_ctxt->u4_psnr_enable) + { + read_input(ps_app_ctxt->fp_psnr_ip, &s_inp_buf); + compute_psnr(ps_app_ctxt, &s_recon_buf, &s_inp_buf); + } + + + if(ps_app_ctxt->u4_chksum_enable) + { + WORD32 comp; + WORD32 num_comp; + num_comp = 2; + if(IV_YUV_420P == s_recon_buf.e_color_fmt) + num_comp = 3; + + for(comp = 0; comp < num_comp; comp++ ) + { + UWORD8 au1_chksum[16]; + + calc_md5_cksum((UWORD8 *)s_recon_buf.apv_bufs[comp], + s_recon_buf.au4_strd[comp], + s_recon_buf.au4_wd[comp], + s_recon_buf.au4_ht[comp], + au1_chksum); + + fwrite(au1_chksum, sizeof(UWORD8), 16, ps_app_ctxt->fp_chksum); + } + } + + + } + } + else + { + if (ps_app_ctxt->u4_psnr_enable == 0) + { + UWORD8 u1_pic_type[][5] = { "IDR", "I", "P", "NA" }; + WORD32 lookup_idx = 0; + + if (ih264e_video_encode_op.s_ive_op.u4_encoded_frame_type == IV_IDR_FRAME) + { + lookup_idx = 0; + } + else if (ih264e_video_encode_op.s_ive_op.u4_encoded_frame_type == IV_I_FRAME) + { + lookup_idx = 1; + } + else if (ih264e_video_encode_op.s_ive_op.u4_encoded_frame_type == IV_P_FRAME) + { + lookup_idx = 2; + } + else if (ih264e_video_encode_op.s_ive_op.u4_encoded_frame_type == IV_NA_FRAME) + { + lookup_idx = 3; + } + + printf("[%s] PicNum %4d Bytes Generated %6d TimeTaken(microsec): %6d AvgTime: %6d PeakAvgTimeMax: %6d\n", u1_pic_type[lookup_idx], ps_app_ctxt->u4_pics_cnt, num_bytes, timetaken, ps_app_ctxt->avg_time, peak_avg_max); + } + else + { + read_input(ps_app_ctxt->fp_psnr_ip, &s_inp_buf); + } + } +#if 0 //Input buffer dump + /*Dump input buffers to a file*/ + dump_input(ps_app_ctxt->fp_dump_op, ps_inp_raw_buf); +#endif //Input buffer dump + + if(is_last) + break; + + u4_timestamp_low++; + } + + /* Pic count is 1 more than actual num frames encoded, beacause last call is to just get the output */ + ps_app_ctxt->u4_pics_cnt--; + + if(ps_app_ctxt->u4_psnr_enable) + { + print_average_psnr(ps_app_ctxt); + } + + /* house keeping operations */ + fclose(ps_app_ctxt->fp_ip); + fclose(ps_app_ctxt->fp_op); + if(1 == ps_app_ctxt->u4_recon_enable) + { + fclose(ps_app_ctxt->fp_recon); + } + if(1 == ps_app_ctxt->u4_chksum_enable) + { + fclose(ps_app_ctxt->fp_chksum); + } + if(1 == ps_app_ctxt->u4_psnr_enable) + { + fclose(ps_app_ctxt->fp_psnr_ip); + } + + if(0 != ps_app_ctxt->u4_mb_info_type) + { + fclose(ps_app_ctxt->fp_mb_info); + } + if (ps_app_ctxt->u4_pic_info_type) + { + fclose(ps_app_ctxt->fp_pic_info); + } + + free_input(ps_app_ctxt); + free_output(ps_app_ctxt); + free_recon(ps_app_ctxt); +} + +/*****************************************************************************/ +/* */ +/* Function Name : main */ +/* */ +/* Description : Application to demonstrate codec API */ +/* */ +/* */ +/* Inputs : argc - Number of arguments */ +/* argv[] - Arguments */ +/* Globals : */ +/* Processing : Shows how to use create, process, control and delete */ +/* */ +/* Outputs : Codec output in a file */ +/* Returns : */ +/* */ +/* Issues : Assumes both PROFILE_ENABLE to be */ +/* defined for multithread decode-display working */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 20 11 2013 100189 Initial Version */ +/*****************************************************************************/ +#ifdef IOS +int h264enc_main(char * homedir) +#else +int main(int argc, char *argv[]) +#endif +{ + /* Config Parameters for Encoding */ + app_ctxt_t s_app_ctxt; + + /* error string */ + CHAR ac_error[STRLENGTH]; + + /* config file name */ + CHAR ac_cfg_fname[STRLENGTH]; + + /* error status */ + IV_STATUS_T status = IV_SUCCESS; + + /* temp var */ + CHAR filename_with_path[STRLENGTH]; + WORD32 num_mem_recs; + iv_obj_t *ps_enc; + WORD32 i; + FILE *fp_cfg = NULL; + +#ifdef X86_MINGW + + /* For getting printfs without any delay in eclipse */ + setvbuf(stdout, NULL, _IONBF, 0); + setvbuf(stderr, NULL, _IONBF, 0); + +#endif + + init_default_params(&s_app_ctxt); + +#ifndef IOS + + /* Usage */ + if(argc < 2) + { + printf("Using enc.cfg as configuration file \n"); + strcpy(ac_cfg_fname, "enc.cfg"); + } + else if(argc == 2) + { + strcpy(ac_cfg_fname, argv[1]); + } + +#endif + + /*************************************************************************/ + /* Parse arguments */ + /*************************************************************************/ + +#ifndef IOS + + /* Read command line arguments */ + if(argc > 2) + { + for(i = 1; i < argc; i += 2) + { + if(CONFIG == get_argument(argv[i])) + { + strcpy(ac_cfg_fname, argv[i + 1]); + if((fp_cfg = fopen(ac_cfg_fname, "r")) == NULL) + { + sprintf(ac_error, + "Could not open Configuration file %s", + ac_cfg_fname); + codec_exit(ac_error); + } + read_cfg_file(&s_app_ctxt, fp_cfg); + fclose(fp_cfg); + } + else + { + parse_argument(&s_app_ctxt, argv[i], argv[i + 1]); + } + } + } + else + { + if((fp_cfg = fopen(ac_cfg_fname, "r")) == NULL) + { + sprintf(ac_error, "Could not open Configuration file %s", + ac_cfg_fname); + codec_exit(ac_error); + } + read_cfg_file(&s_app_ctxt, fp_cfg); + fclose(fp_cfg); + } + +#else + + sprintf(filename_with_path, "%s/%s", homedir, "enc.cfg"); + if((fp_cfg = fopen(filename_with_path, "r")) == NULL) + { + sprintf(ac_error, "Could not open Configuration file %s", + ac_cfg_fname); + codec_exit(ac_error); + + } + read_cfg_file(&s_app_ctxt, fp_cfg); + fclose(fp_cfg); + +#endif + + + validate_params(&s_app_ctxt); + + + /*************************************************************************/ + /* Getting Number of MemRecords */ + /*************************************************************************/ + { + ih264e_num_mem_rec_ip_t s_num_mem_rec_ip; + ih264e_num_mem_rec_op_t s_num_mem_rec_op; + + s_num_mem_rec_ip.s_ive_ip.u4_size = sizeof(ih264e_num_mem_rec_ip_t); + s_num_mem_rec_op.s_ive_op.u4_size = sizeof(ih264e_num_mem_rec_op_t); + + s_num_mem_rec_ip.s_ive_ip.e_cmd = IV_CMD_GET_NUM_MEM_REC; + + status = ih264e_api_function(0, &s_num_mem_rec_ip, &s_num_mem_rec_op); + + if(status != IV_SUCCESS) + { + sprintf(ac_error, "Get number of memory records failed = 0x%x\n", s_num_mem_rec_op.s_ive_op.u4_error_code); + codec_exit(ac_error); + } + + s_app_ctxt.u4_num_mem_rec = num_mem_recs = s_num_mem_rec_op.s_ive_op.u4_num_mem_rec; + } + + /* Allocate array to hold memory records */ + s_app_ctxt.ps_mem_rec = (iv_mem_rec_t *) malloc(num_mem_recs * sizeof(iv_mem_rec_t)); + if(NULL == s_app_ctxt.ps_mem_rec) + { + + sprintf(ac_error, "Unable to allocate memory for hold memory records: Size %d", (WORD32)(num_mem_recs * sizeof(iv_mem_rec_t))); + codec_exit(ac_error); + } + + { + iv_mem_rec_t *ps_mem_rec; + ps_mem_rec = s_app_ctxt.ps_mem_rec; + for(i = 0; i < num_mem_recs; i++) + { + ps_mem_rec->u4_size = sizeof(iv_mem_rec_t); + ps_mem_rec->pv_base = NULL; + ps_mem_rec->u4_mem_size = 0; + ps_mem_rec->u4_mem_alignment = 0; + ps_mem_rec->e_mem_type = IV_NA_MEM_TYPE; + + ps_mem_rec++; + } + } + + /*************************************************************************/ + /* Getting MemRecords Attributes */ + /*************************************************************************/ + { + ih264e_fill_mem_rec_ip_t s_fill_mem_rec_ip; + ih264e_fill_mem_rec_op_t s_fill_mem_rec_op; + + s_fill_mem_rec_ip.s_ive_ip.u4_size = sizeof(ih264e_fill_mem_rec_ip_t); + s_fill_mem_rec_op.s_ive_op.u4_size = sizeof(ih264e_fill_mem_rec_op_t); + + s_fill_mem_rec_ip.s_ive_ip.e_cmd = IV_CMD_FILL_NUM_MEM_REC; + s_fill_mem_rec_ip.s_ive_ip.ps_mem_rec = s_app_ctxt.ps_mem_rec; + s_fill_mem_rec_ip.s_ive_ip.u4_num_mem_rec = s_app_ctxt.u4_num_mem_rec; + s_fill_mem_rec_ip.s_ive_ip.u4_max_wd = s_app_ctxt.u4_max_wd; + s_fill_mem_rec_ip.s_ive_ip.u4_max_ht = s_app_ctxt.u4_max_ht; + s_fill_mem_rec_ip.s_ive_ip.u4_max_level = s_app_ctxt.u4_max_level; + s_fill_mem_rec_ip.s_ive_ip.e_color_format = DEFAULT_INP_COLOR_FMT; + s_fill_mem_rec_ip.s_ive_ip.u4_max_ref_cnt = DEFAULT_MAX_REF_FRM; + s_fill_mem_rec_ip.s_ive_ip.u4_max_reorder_cnt = DEFAULT_MAX_REORDER_FRM; + s_fill_mem_rec_ip.s_ive_ip.u4_max_srch_rng_x = DEFAULT_MAX_SRCH_RANGE_X; + s_fill_mem_rec_ip.s_ive_ip.u4_max_srch_rng_y = DEFAULT_MAX_SRCH_RANGE_Y; + + status = ih264e_api_function(0, &s_fill_mem_rec_ip, &s_fill_mem_rec_op); + + if(status != IV_SUCCESS) + { + sprintf(ac_error, "Fill memory records failed = 0x%x\n", + s_fill_mem_rec_op.s_ive_op.u4_error_code); + codec_exit(ac_error); + } + } + + /*************************************************************************/ + /* Allocating Memory for Mem Records */ + /*************************************************************************/ + { + WORD32 total_size; + iv_mem_rec_t *ps_mem_rec; + total_size = 0; + + ps_mem_rec = s_app_ctxt.ps_mem_rec; + for(i = 0; i < num_mem_recs; i++) + { + ps_mem_rec->pv_base = ih264a_aligned_malloc(ps_mem_rec->u4_mem_alignment, + ps_mem_rec->u4_mem_size); + if(ps_mem_rec->pv_base == NULL) + { + sprintf(ac_error, "Allocation failure for mem record id %d size %d\n", + i, ps_mem_rec->u4_mem_size); + codec_exit(ac_error); + } + total_size += ps_mem_rec->u4_mem_size; + + ps_mem_rec++; + } + printf("\nTotal memory for codec %d\n", total_size); + } + + + /*************************************************************************/ + /* Codec Instance Creation */ + /*************************************************************************/ + { + ih264e_init_ip_t s_init_ip; + ih264e_init_op_t s_init_op; + + ps_enc = s_app_ctxt.ps_mem_rec[0].pv_base; + ps_enc->u4_size = sizeof(iv_obj_t); + ps_enc->pv_fxns = ih264e_api_function; + s_app_ctxt.ps_enc = ps_enc; + + s_init_ip.s_ive_ip.u4_size = sizeof(ih264e_init_ip_t); + s_init_op.s_ive_op.u4_size = sizeof(ih264e_init_op_t); + + s_init_ip.s_ive_ip.e_cmd = IV_CMD_INIT; + s_init_ip.s_ive_ip.u4_num_mem_rec = s_app_ctxt.u4_num_mem_rec; + s_init_ip.s_ive_ip.ps_mem_rec = s_app_ctxt.ps_mem_rec; + s_init_ip.s_ive_ip.u4_max_wd = s_app_ctxt.u4_max_wd; + s_init_ip.s_ive_ip.u4_max_ht = s_app_ctxt.u4_max_ht; + s_init_ip.s_ive_ip.u4_max_ref_cnt = DEFAULT_MAX_REF_FRM; + s_init_ip.s_ive_ip.u4_max_reorder_cnt = DEFAULT_MAX_REORDER_FRM; + s_init_ip.s_ive_ip.u4_max_level = s_app_ctxt.u4_max_level; + s_init_ip.s_ive_ip.e_inp_color_fmt = s_app_ctxt.e_inp_color_fmt; + if(s_app_ctxt.u4_recon_enable || s_app_ctxt.u4_psnr_enable || s_app_ctxt.u4_chksum_enable) + { + s_init_ip.s_ive_ip.u4_enable_recon = 1; + } + else + { + s_init_ip.s_ive_ip.u4_enable_recon = 0; + } + s_init_ip.s_ive_ip.e_recon_color_fmt = s_app_ctxt.e_recon_color_fmt; + s_init_ip.s_ive_ip.e_rc_mode = s_app_ctxt.u4_rc; + s_init_ip.s_ive_ip.u4_max_framerate = s_app_ctxt.u4_max_frame_rate; + s_init_ip.s_ive_ip.u4_max_bitrate = s_app_ctxt.u4_max_bitrate; + s_init_ip.s_ive_ip.u4_max_num_bframes = DEFAULT_B_FRAMES; + s_init_ip.s_ive_ip.e_content_type = IV_PROGRESSIVE; + s_init_ip.s_ive_ip.u4_max_srch_rng_x = DEFAULT_MAX_SRCH_RANGE_X; + s_init_ip.s_ive_ip.u4_max_srch_rng_y = DEFAULT_MAX_SRCH_RANGE_Y; + s_init_ip.s_ive_ip.e_slice_mode = s_app_ctxt.u4_slice_mode; + s_init_ip.s_ive_ip.u4_slice_param = s_app_ctxt.u4_slice_param; + s_init_ip.s_ive_ip.e_arch = s_app_ctxt.e_arch; + s_init_ip.s_ive_ip.e_soc = s_app_ctxt.e_soc; + + status = ih264e_api_function(ps_enc, &s_init_ip, &s_init_op); + + if(status != IV_SUCCESS) + { + sprintf(ac_error, "Init memory records failed = 0x%x\n", + s_init_op.s_ive_op.u4_error_code); + codec_exit(ac_error); + } + } + + /*************************************************************************/ + /* set processor details */ + /*************************************************************************/ + { + ih264e_ctl_set_num_cores_ip_t s_ctl_set_num_cores_ip; + ih264e_ctl_set_num_cores_op_t s_ctl_set_num_cores_op; + s_ctl_set_num_cores_ip.s_ive_ip.e_cmd = IVE_CMD_VIDEO_CTL; + s_ctl_set_num_cores_ip.s_ive_ip.e_sub_cmd = IVE_CMD_CTL_SET_NUM_CORES; + s_ctl_set_num_cores_ip.s_ive_ip.u4_num_cores = s_app_ctxt.u4_num_cores; + s_ctl_set_num_cores_ip.s_ive_ip.u4_timestamp_high = 0; + s_ctl_set_num_cores_ip.s_ive_ip.u4_timestamp_low = 0; + s_ctl_set_num_cores_ip.s_ive_ip.u4_size = sizeof(ih264e_ctl_set_num_cores_ip_t); + + s_ctl_set_num_cores_op.s_ive_op.u4_size = sizeof(ih264e_ctl_set_num_cores_op_t); + + status = ih264e_api_function(ps_enc, (void *) &s_ctl_set_num_cores_ip, + (void *) &s_ctl_set_num_cores_op); + if(status != IV_SUCCESS) + { + sprintf(ac_error, "Unable to set processor params = 0x%x\n", + s_ctl_set_num_cores_op.s_ive_op.u4_error_code); + codec_exit(ac_error); + } + + } + + /*************************************************************************/ + /* Get Codec Version */ + /*************************************************************************/ + { + ih264e_ctl_getversioninfo_ip_t s_ctl_set_getversioninfo_ip; + ih264e_ctl_getversioninfo_op_t s_ctl_set_getversioninfo_op; + CHAR ac_version_string[STRLENGTH]; + s_ctl_set_getversioninfo_ip.s_ive_ip.e_cmd = IVE_CMD_VIDEO_CTL; + s_ctl_set_getversioninfo_ip.s_ive_ip.e_sub_cmd = IVE_CMD_CTL_GETVERSION; + s_ctl_set_getversioninfo_ip.s_ive_ip.pu1_version = (UWORD8 *)ac_version_string; + s_ctl_set_getversioninfo_ip.s_ive_ip.u4_version_bufsize = sizeof(ac_version_string); + s_ctl_set_getversioninfo_ip.s_ive_ip.u4_size = sizeof(ih264e_ctl_getversioninfo_ip_t); + s_ctl_set_getversioninfo_op.s_ive_op.u4_size = sizeof(ih264e_ctl_getversioninfo_op_t); + + status = ih264e_api_function(ps_enc, (void *) &s_ctl_set_getversioninfo_ip, + (void *) &s_ctl_set_getversioninfo_op); + if(status != IV_SUCCESS) + { + sprintf(ac_error, "Unable to get codec version = 0x%x\n", + s_ctl_set_getversioninfo_op.s_ive_op.u4_error_code); + codec_exit(ac_error); + } + printf("CODEC VERSION %s\n", ac_version_string); + } + + /*************************************************************************/ + /* Get I/O Buffer Requirement */ + /*************************************************************************/ + { + ih264e_ctl_getbufinfo_ip_t s_get_buf_info_ip; + ih264e_ctl_getbufinfo_op_t s_get_buf_info_op; + + s_get_buf_info_ip.s_ive_ip.u4_size = sizeof(ih264e_ctl_getbufinfo_ip_t); + s_get_buf_info_op.s_ive_op.u4_size = sizeof(ih264e_ctl_getbufinfo_op_t); + + s_get_buf_info_ip.s_ive_ip.e_cmd = IVE_CMD_VIDEO_CTL; + s_get_buf_info_ip.s_ive_ip.e_sub_cmd = IVE_CMD_CTL_GETBUFINFO; + s_get_buf_info_ip.s_ive_ip.u4_max_ht = s_app_ctxt.u4_max_ht; + s_get_buf_info_ip.s_ive_ip.u4_max_wd = s_app_ctxt.u4_max_wd; + s_get_buf_info_ip.s_ive_ip.e_inp_color_fmt = s_app_ctxt.e_inp_color_fmt; + + status = ih264e_api_function(ps_enc, &s_get_buf_info_ip, &s_get_buf_info_op); + + if (status != IV_SUCCESS) + { + sprintf(ac_error, "Unable to get I/O buffer requirements = 0x%x\n", + s_get_buf_info_op.s_ive_op.u4_error_code); + codec_exit(ac_error); + } + s_app_ctxt.s_get_buf_info_op = s_get_buf_info_op; + } + + /*****************************************************************************/ + /* Add the following initializations based on the parameters in context */ + /*****************************************************************************/ + + + /*****************************************************************************/ + /* Video control Set Frame dimensions */ + /*****************************************************************************/ + s_app_ctxt.u4_strd = ALIGN16(s_app_ctxt.u4_wd); + set_dimensions(&s_app_ctxt, 0, 0); + + /*****************************************************************************/ + /* Video control Set Frame rates */ + /*****************************************************************************/ + set_frame_rate(&s_app_ctxt, 0, 0); + + /*****************************************************************************/ + /* Video control Set IPE Params */ + /*****************************************************************************/ + set_ipe_params(&s_app_ctxt, 0, 0); + + /*****************************************************************************/ + /* Video control Set Bitrate */ + /*****************************************************************************/ + set_bit_rate(&s_app_ctxt, 0, 0); + + /*****************************************************************************/ + /* Video control Set QP */ + /*****************************************************************************/ + set_qp(&s_app_ctxt,0,0); + + /*****************************************************************************/ + /* Video control Set AIR params */ + /*****************************************************************************/ + set_air_params(&s_app_ctxt,0,0); + + /*****************************************************************************/ + /* Video control Set VBV params */ + /*****************************************************************************/ + set_vbv_params(&s_app_ctxt,0,0); + + /*****************************************************************************/ + /* Video control Set Motion estimation params */ + /*****************************************************************************/ + set_me_params(&s_app_ctxt,0,0); + + /*****************************************************************************/ + /* Video control Set GOP params */ + /*****************************************************************************/ + set_gop_params(&s_app_ctxt, 0, 0); + + /*****************************************************************************/ + /* Video control Set Deblock params */ + /*****************************************************************************/ + set_deblock_params(&s_app_ctxt, 0, 0); + + /*****************************************************************************/ + /* Video control Set Profile params */ + /*****************************************************************************/ + set_profile_params(&s_app_ctxt, 0, 0); + + /*****************************************************************************/ + /* Video control Set in Encode header mode */ + /*****************************************************************************/ + set_enc_mode(&s_app_ctxt, 0, 0, IVE_ENC_MODE_PICTURE); + +#ifdef IOS + /* Correct file paths */ + sprintf(filename_with_path, "%s/%s", homedir, s_app_ctxt.ac_ip_fname); + strcpy (s_app_ctxt.ac_ip_fname, filename_with_path); + + sprintf(filename_with_path, "%s/%s", homedir, s_app_ctxt.ac_op_fname); + strcpy (s_app_ctxt.ac_op_fname, filename_with_path); + + sprintf(filename_with_path, "%s/%s", homedir, s_app_ctxt.ac_recon_fname); + strcpy (s_app_ctxt.ac_recon_fname, filename_with_path); + + sprintf(filename_with_path, "%s/%s", homedir, s_app_ctxt.ac_chksum_fname); + strcpy (s_app_ctxt.ac_chksum_fname, filename_with_path); + + sprintf(filename_with_path, "%s/%s", homedir, s_app_ctxt.ac_mb_info_fname); + strcpy (s_app_ctxt.ac_mb_info_fname, filename_with_path); + + sprintf(filename_with_path, "%s/%s", homedir, s_app_ctxt.ac_pic_info_fname); + strcpy (s_app_ctxt.ac_pic_info_fname, filename_with_path); +#endif + + /*************************************************************************/ + /* begin encoding */ + /*************************************************************************/ + + synchronous_encode(ps_enc, &s_app_ctxt); + + { + DOUBLE bytes_per_frame; + DOUBLE bytes_per_second; + WORD32 achieved_bitrate; + + if(s_app_ctxt.u4_pics_cnt != 0) + bytes_per_frame = (s_app_ctxt.u4_total_bytes) / (s_app_ctxt.u4_pics_cnt); + else + bytes_per_frame = 0; + + bytes_per_second = (bytes_per_frame * s_app_ctxt.u4_tgt_frame_rate); + + achieved_bitrate = bytes_per_second * 8; + + printf("\nEncoding Completed\n"); + printf("Summary\n"); + printf("Input filename : %s\n", s_app_ctxt.ac_ip_fname); + printf("Output filename : %s\n", s_app_ctxt.ac_op_fname); + printf("Output Width : %-4d\n", s_app_ctxt.u4_wd); + printf("Output Height : %-4d\n", s_app_ctxt.u4_ht); + printf("Target Bitrate (bps) : %-4d\n", s_app_ctxt.u4_bitrate); + printf("Achieved Bitrate (bps) : %-4d\n", achieved_bitrate); + printf("Average Time per Frame : %-4d\n", s_app_ctxt.avg_time); + printf("Achieved FPS : %-4.2f\n", 1000000.0 / s_app_ctxt.avg_time); + } + + + /*************************************************************************/ + /* Close Codec Instance */ + /*************************************************************************/ + { + ih264e_retrieve_mem_rec_ip_t s_retrieve_mem_ip; + ih264e_retrieve_mem_rec_op_t s_retrieve_mem_op; + iv_mem_rec_t *ps_mem_rec; + s_retrieve_mem_ip.s_ive_ip.u4_size = + sizeof(ih264e_retrieve_mem_rec_ip_t); + s_retrieve_mem_op.s_ive_op.u4_size = + sizeof(ih264e_retrieve_mem_rec_op_t); + + s_retrieve_mem_ip.s_ive_ip.e_cmd = IV_CMD_RETRIEVE_MEMREC; + s_retrieve_mem_ip.s_ive_ip.ps_mem_rec = s_app_ctxt.ps_mem_rec; + + status = ih264e_api_function(ps_enc, &s_retrieve_mem_ip, + &s_retrieve_mem_op); + + if(status != IV_SUCCESS) + { + sprintf(ac_error, "Unable to retrieve memory records = 0x%x\n", + s_retrieve_mem_op.s_ive_op.u4_error_code); + codec_exit(ac_error); + } + + /* Free memory records */ + ps_mem_rec = s_app_ctxt.ps_mem_rec; + for(i = 0; i < num_mem_recs; i++) + { + ih264a_aligned_free(ps_mem_rec->pv_base); + ps_mem_rec++; + } + + free(s_app_ctxt.ps_mem_rec); + + } + + return 0; +} + + +#ifdef ANDROID_NDK +int raise(int a) +{ + printf("Divide by zero\n"); + return 0; +} +void __aeabi_assert(const char *assertion, const char *file, unsigned int line) +{ + return; +} +#endif diff --git a/test/encoder/output.c b/test/encoder/output.c new file mode 100755 index 0000000..e0f27dd --- /dev/null +++ b/test/encoder/output.c @@ -0,0 +1,109 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ + +#include <stdlib.h> +#include <stdio.h> +#include <assert.h> +#include <string.h> +#include <sys/time.h> +/* User include files */ + +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" +#include "ih264e.h" +#include "app.h" + +/*****************************************************************************/ +/* Constant Macros */ +/*****************************************************************************/ +#define PEAK_WINDOW_SIZE 8 +/*****************************************************************************/ +/* Macros */ +/*****************************************************************************/ +/*****************************************************************************/ +/* Function Declarations */ +/*****************************************************************************/ +IV_STATUS_T write_output(FILE *fp, UWORD8 *pu1_buf, WORD32 num_bytes) +{ + WORD32 bytes; + + bytes = fwrite(pu1_buf, sizeof(UWORD8), num_bytes, fp); + if(bytes != num_bytes) + return IV_FAIL; + fflush(fp); + + return IV_SUCCESS; +} + +void allocate_output(app_ctxt_t *ps_app_ctxt) +{ + + WORD32 num_bufs; + WORD32 i; + UWORD8 *pu1_buf; + WORD32 buf_size; + num_bufs = MAX(DEFAULT_NUM_OUTPUT_BUFS, ps_app_ctxt->s_get_buf_info_op.s_ive_op.u4_min_out_bufs); + num_bufs = MIN(DEFAULT_MAX_OUTPUT_BUFS, num_bufs); + + buf_size = ps_app_ctxt->s_get_buf_info_op.s_ive_op.au4_min_out_buf_size[0]; + /* Memset the output buffer array to set is_free to 0 */ + memset(ps_app_ctxt->as_output_buf, 0, sizeof(output_buf_t) * DEFAULT_MAX_OUTPUT_BUFS); + + for(i = 0; i < num_bufs; i++) + { + pu1_buf = (UWORD8 *)ih264a_aligned_malloc(16, buf_size); + if(NULL == pu1_buf) + { + CHAR ac_error[STRLENGTH]; + sprintf(ac_error, "Allocation failed for output buffer of size %d\n", + buf_size); + codec_exit(ac_error); + } + ps_app_ctxt->as_output_buf[i].pu1_buf = pu1_buf; + ps_app_ctxt->as_output_buf[i].u4_buf_size = buf_size; + ps_app_ctxt->as_output_buf[i].u4_is_free = 1; + + } + return; +} + +void free_output(app_ctxt_t *ps_app_ctxt) +{ + + WORD32 num_bufs; + WORD32 i; + + num_bufs = MAX(DEFAULT_NUM_OUTPUT_BUFS, ps_app_ctxt->s_get_buf_info_op.s_ive_op.u4_min_out_bufs); + num_bufs = MIN(DEFAULT_MAX_OUTPUT_BUFS, num_bufs); + for(i = 0; i < num_bufs; i++) + { + + ih264a_aligned_free(ps_app_ctxt->as_output_buf[i].pu1_buf); + } + return; +} + diff --git a/test/encoder/psnr.c b/test/encoder/psnr.c new file mode 100755 index 0000000..c9bb6a1 --- /dev/null +++ b/test/encoder/psnr.c @@ -0,0 +1,242 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ +/* System include files */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <math.h> +#include <sys/time.h> + +/* User include files */ +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" +#include "ih264e.h" +#include "app.h" +#include "psnr.h" + +/*****************************************************************************/ +/* */ +/* Function Name : init_psnr */ +/* */ +/* Description : Initialize PSNR for the Y, U, V component */ +/* */ +/* Inputs : */ +/* */ +/* Globals : */ +/* */ +/* Processing : */ +/* */ +/* Outputs : */ +/* */ +/* Returns : */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 28 12 2005 Ittiam Draft */ +/* */ +/*****************************************************************************/ +void init_psnr(app_ctxt_t *ps_app_ctxt) +{ + ps_app_ctxt->adbl_psnr[0] = 0; + ps_app_ctxt->adbl_psnr[1] = 0; + ps_app_ctxt->adbl_psnr[2] = 0; + ps_app_ctxt->u4_psnr_cnt = 0; +} + + +/*****************************************************************************/ +/* */ +/* Function Name : compute_psnr */ +/* */ +/* Description : Computes the PSNR for the Y, U, V component */ +/* */ +/* Inputs : */ +/* */ +/* Globals : */ +/* */ +/* Processing : */ +/* */ +/* Outputs : */ +/* */ +/* Returns : */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 28 12 2005 Ittiam Draft */ +/* */ +/*****************************************************************************/ +void compute_psnr(app_ctxt_t *ps_app_ctxt, iv_raw_buf_t *ps_buf1, iv_raw_buf_t *ps_buf2) +{ + WORD32 i, j; + WORD32 comp; + DOUBLE df_psnr[3]; + WORD32 wd, ht, strd1, strd2; + UWORD8 *pu1_buf1, *pu1_buf2; + WORD32 incr1, incr2; + + printf("\nPicNum %4d\t ", ps_app_ctxt->u4_psnr_cnt); + + for(comp = 0; comp < 3; comp++) + { + df_psnr[comp] = 0; + pu1_buf1 = (UWORD8 *)ps_buf1->apv_bufs[comp]; + pu1_buf2 = (UWORD8 *)ps_buf2->apv_bufs[comp]; + wd = ps_buf1->au4_wd[comp]; + ht = ps_buf1->au4_ht[comp]; + strd1 = ps_buf1->au4_strd[comp]; + strd2 = ps_buf2->au4_strd[comp]; + incr1 = 1; + incr2 = 1; + + if((IV_YUV_420SP_UV == ps_buf1->e_color_fmt) + || (IV_YUV_420SP_UV == ps_buf1->e_color_fmt)) + { + switch(comp) + { + case 0: + pu1_buf1 = ps_buf1->apv_bufs[0]; + break; + case 1: + if(IV_YUV_420SP_UV == ps_buf1->e_color_fmt) + pu1_buf1 = (UWORD8 *)ps_buf1->apv_bufs[1]; + else + pu1_buf1 = (UWORD8 *)ps_buf1->apv_bufs[1] + 1; + incr1 = 2; + break; + case 2: + if(IV_YUV_420SP_UV == ps_buf1->e_color_fmt) + pu1_buf1 = (UWORD8 *)ps_buf1->apv_bufs[1] + 1; + else + pu1_buf1 = ps_buf1->apv_bufs[1]; + incr1 = 2; + break; + } + } + if ((IV_YUV_420SP_UV == ps_buf2->e_color_fmt) + || (IV_YUV_420SP_UV == ps_buf2->e_color_fmt)) + { + switch(comp) + { + case 0: + pu1_buf2 = ps_buf2->apv_bufs[0]; + break; + case 1: + if(IV_YUV_420SP_UV == ps_buf2->e_color_fmt) + pu1_buf2 = ps_buf2->apv_bufs[1]; + else + pu1_buf2 = (UWORD8 *)ps_buf2->apv_bufs[1] + 1; + incr1 = 2; + break; + case 2: + if(IV_YUV_420SP_UV == ps_buf2->e_color_fmt) + pu1_buf2 = (UWORD8 *)ps_buf2->apv_bufs[1] + 1; + else + pu1_buf2 = ps_buf2->apv_bufs[1]; + incr1 = 2; + break; + } + } + + for(i = 0; i < ht; i++) + { + for(j = 0; j < wd; j++) + { + WORD32 diff; + diff = (*pu1_buf1 - *pu1_buf2); + pu1_buf1 += incr1; + pu1_buf2 += incr2; + df_psnr[comp] += diff * diff; + } + pu1_buf1 += strd1 - ps_buf1->au4_wd[comp]; + pu1_buf2 += strd2 - ps_buf2->au4_wd[comp]; + } + df_psnr[comp] /= (wd * ht); + if(df_psnr[comp]) + df_psnr[comp] = 20 * log10(255 / sqrt(df_psnr[comp])); + else + df_psnr[comp] = 100; + + ps_app_ctxt->adbl_psnr[comp] += df_psnr[comp]; + switch(comp) + { + case 0: + printf("Y :"); + break; + case 1: + printf("U :"); + break; + case 2: + printf("V :"); + break; + default: + break; + } + printf("%2.2f\t", df_psnr[comp]); + + } + + ps_app_ctxt->u4_psnr_cnt++; +} + + +/*****************************************************************************/ +/* */ +/* Function Name : print_average_psnr */ +/* */ +/* Description : Computes the average PSNR for the Y, U, V component */ +/* */ +/* Inputs : */ +/* */ +/* Globals : */ +/* */ +/* Processing : */ +/* */ +/* Outputs : */ +/* */ +/* Returns : */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 28 12 2005 Ittiam Draft */ +/* */ +/*****************************************************************************/ +void print_average_psnr(app_ctxt_t *ps_app_ctxt) +{ + printf("\n"); + + printf("Avg PSNR Y : %-2.2f\n", (ps_app_ctxt->adbl_psnr[0] / ps_app_ctxt->u4_psnr_cnt)); + printf("Avg PSNR U : %-2.2f\n", (ps_app_ctxt->adbl_psnr[1] / ps_app_ctxt->u4_psnr_cnt)); + printf("Avg PSNR V : %-2.2f\n", (ps_app_ctxt->adbl_psnr[2] / ps_app_ctxt->u4_psnr_cnt)); +} + diff --git a/test/encoder/psnr.h b/test/encoder/psnr.h new file mode 100755 index 0000000..fd388cf --- /dev/null +++ b/test/encoder/psnr.h @@ -0,0 +1,62 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/****************************************************************************/ +/* */ +/* File Name : psnr.h */ +/* */ +/* Description : Contains functions for psnr computation */ +/* */ +/* List of Functions : ih264e_api_function */ +/* compute_psnr */ +/* print_average_psnr */ +/* Issues / Problems : */ +/* */ +/* Revision History : */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes) */ +/* */ +/****************************************************************************/ +#ifndef PSNR_H +#define PSNR_H + +/*****************************************************************************/ +/* Function Declarations */ +/*****************************************************************************/ +void init_psnr(app_ctxt_t *ps_app_ctxt); + +void compute_psnr(app_ctxt_t *ps_app_ctxt, + iv_raw_buf_t *ps_buf1, + iv_raw_buf_t *ps_buf2); + +void print_average_psnr(app_ctxt_t *ps_app_ctxt); + +#if COMPUTE_PSNR + +#define GET_AVERAGE_PSNR_Y(print) print_average_psnr(print) + +#else /* COMPUTE_PSNR */ + +#define GET_AVERAGE_PSNR_Y(print) 0 + +#endif /* COMPUTE_PSNR */ + +#endif + + diff --git a/test/encoder/recon.c b/test/encoder/recon.c new file mode 100755 index 0000000..7fd0f5c --- /dev/null +++ b/test/encoder/recon.c @@ -0,0 +1,221 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ + +#include <stdlib.h> +#include <stdio.h> +#include <assert.h> +#include <string.h> +#include <sys/time.h> +/* User include files */ + +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" +#include "ih264e.h" +#include "app.h" + +/*****************************************************************************/ +/* Constant Macros */ +/*****************************************************************************/ + + +/*****************************************************************************/ +/* Macros */ +/*****************************************************************************/ + + +/*****************************************************************************/ +/* Function Declarations */ +/*****************************************************************************/ + +IV_STATUS_T write_recon(FILE *fp, iv_raw_buf_t *ps_raw_buf) +{ + WORD32 bytes; + WORD32 wd, ht, strd; + UWORD8 *pu1_buf; + WORD32 i; + WORD32 comp; + WORD32 num_comp; + + num_comp = 2; + if(IV_YUV_420P == ps_raw_buf->e_color_fmt) + num_comp = 3; + + for(comp = 0; comp < num_comp; comp++) + { + wd = ps_raw_buf->au4_wd[comp]; + ht = ps_raw_buf->au4_ht[comp]; + strd = ps_raw_buf->au4_strd[comp]; + pu1_buf = ps_raw_buf->apv_bufs[comp]; + for(i = 0; i < ht; i++) + { + bytes = fwrite(pu1_buf, sizeof(UWORD8), wd, fp); + if(bytes != wd) + { + return(IV_FAIL); + } + pu1_buf += wd; + } + } + + fflush(fp); + return IV_SUCCESS; +} +void allocate_recon(app_ctxt_t *ps_app_ctxt) +{ + + WORD32 num_bufs; + WORD32 pic_size; + WORD32 luma_size; + WORD32 chroma_size; + WORD32 i; + UWORD8 *pu1_buf; + + num_bufs = DEFAULT_NUM_RECON_BUFS; + + /* Size of buffer for YUV420/420SP */ + luma_size = ALIGN16(ps_app_ctxt->u4_max_wd) * ALIGN16(ps_app_ctxt->u4_max_ht); + chroma_size = (luma_size) / 4; + pic_size = luma_size + chroma_size * 2; + + + for(i = 0; i < num_bufs; i++) + { + pu1_buf = (UWORD8 *)ih264a_aligned_malloc(16, pic_size); + if(NULL == pu1_buf) + { + CHAR ac_error[STRLENGTH]; + sprintf(ac_error, "Allocation failed for recon buffer of size %d\n", + pic_size); + codec_exit(ac_error); + } + ps_app_ctxt->as_recon_buf[i].pu1_buf = pu1_buf; + ps_app_ctxt->as_recon_buf[i].u4_buf_size = pic_size; + ps_app_ctxt->as_recon_buf[i].u4_is_free = 1; + } + + if(ps_app_ctxt->u4_psnr_enable) + { + pu1_buf = (UWORD8 *)ih264a_aligned_malloc(16, pic_size); + if(NULL == pu1_buf) + { + CHAR ac_error[STRLENGTH]; + sprintf(ac_error, "Allocation failed for recon buffer of size %d\n", + pic_size); + codec_exit(ac_error); + } + ps_app_ctxt->pu1_psnr_buf = pu1_buf; + ps_app_ctxt->u4_psnr_buf_size = pic_size; + } + return; +} + +void free_recon(app_ctxt_t *ps_app_ctxt) +{ + + WORD32 num_bufs; + WORD32 i; + + num_bufs = DEFAULT_NUM_RECON_BUFS; + + for(i = 0; i < num_bufs; i++) + { + ih264a_aligned_free(ps_app_ctxt->as_recon_buf[i].pu1_buf); + } + + if(ps_app_ctxt->u4_psnr_enable) + { + ih264a_aligned_free(ps_app_ctxt->pu1_psnr_buf); + + } + return; +} + + + +void init_raw_buf_descr(app_ctxt_t *ps_app_ctxt, iv_raw_buf_t *ps_raw_buf, UWORD8 *pu1_buf, IV_COLOR_FORMAT_T e_color_fmt) +{ + WORD32 luma_size; + WORD32 chroma_size; + + /* All the pointers and dimensions are initialized here + * to support change in resolution from the application */ + luma_size = ALIGN16(ps_app_ctxt->u4_wd) * ALIGN16(ps_app_ctxt->u4_ht); + chroma_size = (luma_size) / 4; + + ps_raw_buf->apv_bufs[0] = pu1_buf; + pu1_buf += luma_size; + + ps_raw_buf->apv_bufs[1] = pu1_buf; + pu1_buf += chroma_size; + + ps_raw_buf->apv_bufs[2] = NULL; + if(IV_YUV_420P == e_color_fmt) + { + ps_raw_buf->apv_bufs[2] = pu1_buf; + } + + ps_raw_buf->e_color_fmt = e_color_fmt; + ps_raw_buf->au4_wd[0] = ps_app_ctxt->u4_wd; + ps_raw_buf->au4_ht[0] = ps_app_ctxt->u4_ht; + ps_raw_buf->au4_strd[0] = ps_app_ctxt->u4_wd; + + /* Initialize for 420SP */ + { + ps_raw_buf->au4_wd[1] = ps_app_ctxt->u4_wd; + ps_raw_buf->au4_wd[2] = 0; + + ps_raw_buf->au4_ht[1] = ps_app_ctxt->u4_ht / 2; + ps_raw_buf->au4_ht[2] = 0; + + ps_raw_buf->au4_strd[1] = ps_app_ctxt->u4_wd; + ps_raw_buf->au4_strd[2] = 0; + } + + if(IV_YUV_420P == e_color_fmt) + { + ps_raw_buf->au4_wd[1] = ps_app_ctxt->u4_wd / 2; + ps_raw_buf->au4_wd[2] = ps_app_ctxt->u4_wd / 2; + + ps_raw_buf->au4_ht[1] = ps_app_ctxt->u4_ht / 2; + ps_raw_buf->au4_ht[2] = ps_app_ctxt->u4_ht / 2; + + ps_raw_buf->au4_strd[1] = ps_app_ctxt->u4_wd / 2; + ps_raw_buf->au4_strd[2] = ps_app_ctxt->u4_wd / 2; + } + /* If stride is not initialized, then use width as stride */ + if(0 == ps_raw_buf->au4_strd[0]) + { + ps_raw_buf->au4_strd[0] = ps_raw_buf->au4_wd[0]; + ps_raw_buf->au4_strd[1] = ps_raw_buf->au4_wd[1]; + ps_raw_buf->au4_strd[2] = ps_raw_buf->au4_wd[2]; + } + + ps_raw_buf->u4_size = sizeof(iv_raw_buf_t); + return; +} + +